aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/802/psnap.c9
-rw-r--r--net/802/tr.c2
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/8021q/vlan_core.c45
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/Kconfig3
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/ddp.c6
-rw-r--r--net/appletalk/dev.c10
-rw-r--r--net/atm/br2684.c58
-rw-r--r--net/atm/clip.c30
-rw-r--r--net/atm/lec.c64
-rw-r--r--net/atm/lec.h1
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/ax25/ax25_iface.c13
-rw-r--r--net/bluetooth/af_bluetooth.c17
-rw-r--r--net/bluetooth/cmtp/core.c3
-rw-r--r--net/bluetooth/hci_conn.c64
-rw-r--r--net/bluetooth/hci_core.c3
-rw-r--r--net/bluetooth/hci_event.c26
-rw-r--r--net/bluetooth/l2cap.c602
-rw-r--r--net/bluetooth/rfcomm/core.c179
-rw-r--r--net/bluetooth/rfcomm/sock.c189
-rw-r--r--net/bluetooth/sco.c57
-rw-r--r--net/bridge/br_netfilter.c2
-rw-r--r--net/bridge/br_netlink.c3
-rw-r--r--net/can/af_can.c5
-rw-r--r--net/can/raw.c3
-rw-r--r--net/compat.c19
-rw-r--r--net/core/dev.c263
-rw-r--r--net/core/ethtool.c58
-rw-r--r--net/core/fib_rules.c3
-rw-r--r--net/core/neighbour.c15
-rw-r--r--net/core/pktgen.c18
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/skbuff.c213
-rw-r--r--net/core/sock.c95
-rw-r--r--net/core/sysctl_net_core.c1
-rw-r--r--net/dccp/ackvec.h3
-rw-r--r--net/dccp/dccp.h26
-rw-r--r--net/dccp/feat.c232
-rw-r--r--net/dccp/feat.h21
-rw-r--r--net/dccp/minisocks.c11
-rw-r--r--net/dccp/options.c8
-rw-r--r--net/dccp/output.c37
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/dccp/sysctl.c43
-rw-r--r--net/decnet/af_decnet.c22
-rw-r--r--net/decnet/dn_dev.c6
-rw-r--r--net/decnet/dn_route.c2
-rw-r--r--net/decnet/dn_table.c3
-rw-r--r--net/decnet/sysctl_net_decnet.c2
-rw-r--r--net/dsa/mv88e6123_61_65.c2
-rw-r--r--net/dsa/mv88e6131.c2
-rw-r--r--net/dsa/tag_dsa.c2
-rw-r--r--net/dsa/tag_edsa.c2
-rw-r--r--net/dsa/tag_trailer.c2
-rw-r--r--net/econet/af_econet.c2
-rw-r--r--net/ipv4/Kconfig52
-rw-r--r--net/ipv4/af_inet.c26
-rw-r--r--net/ipv4/arp.c2
-rw-r--r--net/ipv4/devinet.c12
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_semantics.c5
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/inet_connection_sock.c42
-rw-r--r--net/ipv4/inet_fragment.c1
-rw-r--r--net/ipv4/inet_hashtables.c12
-rw-r--r--net/ipv4/ip_gre.c136
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/ipconfig.c8
-rw-r--r--net/ipv4/ipip.c7
-rw-r--r--net/ipv4/ipmr.c464
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c4
-rw-r--r--net/ipv4/proc.c4
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/route.c15
-rw-r--r--net/ipv4/tcp.c31
-rw-r--r--net/ipv4/tcp_bic.c11
-rw-r--r--net/ipv4/tcp_cong.c21
-rw-r--r--net/ipv4/tcp_cubic.c11
-rw-r--r--net/ipv4/tcp_htcp.c3
-rw-r--r--net/ipv4/tcp_input.c116
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_minisocks.c5
-rw-r--r--net/ipv4/tcp_output.c52
-rw-r--r--net/ipv4/tcp_scalable.c10
-rw-r--r--net/ipv4/tcp_timer.c23
-rw-r--r--net/ipv4/tcp_veno.c7
-rw-r--r--net/ipv4/tcp_yeah.c9
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/xfrm4_policy.c2
-rw-r--r--net/ipv6/addrconf.c54
-rw-r--r--net/ipv6/af_inet6.c32
-rw-r--r--net/ipv6/ipv6_sockglue.c3
-rw-r--r--net/ipv6/ndisc.c22
-rw-r--r--net/ipv6/route.c9
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/tcp_ipv6.c8
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/ipv6/xfrm6_state.c2
-rw-r--r--net/ipx/af_ipx.c4
-rw-r--r--net/irda/irmod.c2
-rw-r--r--net/iucv/af_iucv.c3
-rw-r--r--net/key/af_key.c6
-rw-r--r--net/llc/af_llc.c6
-rw-r--r--net/llc/llc_conn.c3
-rw-r--r--net/llc/llc_core.c4
-rw-r--r--net/mac80211/Makefile6
-rw-r--r--net/mac80211/aes_cmac.c135
-rw-r--r--net/mac80211/aes_cmac.h19
-rw-r--r--net/mac80211/agg-rx.c302
-rw-r--r--net/mac80211/agg-tx.c701
-rw-r--r--net/mac80211/cfg.c167
-rw-r--r--net/mac80211/debugfs.c59
-rw-r--r--net/mac80211/debugfs_key.c79
-rw-r--r--net/mac80211/debugfs_key.h10
-rw-r--r--net/mac80211/debugfs_netdev.c48
-rw-r--r--net/mac80211/debugfs_sta.c5
-rw-r--r--net/mac80211/ht.c903
-rw-r--r--net/mac80211/ibss.c905
-rw-r--r--net/mac80211/ieee80211_i.h277
-rw-r--r--net/mac80211/iface.c154
-rw-r--r--net/mac80211/key.c115
-rw-r--r--net/mac80211/key.h16
-rw-r--r--net/mac80211/main.c172
-rw-r--r--net/mac80211/mesh.c15
-rw-r--r--net/mac80211/mesh.h10
-rw-r--r--net/mac80211/mesh_hwmp.c7
-rw-r--r--net/mac80211/mesh_plink.c38
-rw-r--r--net/mac80211/mlme.c1849
-rw-r--r--net/mac80211/pm.c117
-rw-r--r--net/mac80211/rate.h12
-rw-r--r--net/mac80211/rx.c406
-rw-r--r--net/mac80211/scan.c684
-rw-r--r--net/mac80211/spectmgmt.c103
-rw-r--r--net/mac80211/sta_info.c52
-rw-r--r--net/mac80211/sta_info.h13
-rw-r--r--net/mac80211/tx.c122
-rw-r--r--net/mac80211/util.c267
-rw-r--r--net/mac80211/wext.c441
-rw-r--r--net/mac80211/wme.c161
-rw-r--r--net/mac80211/wme.h6
-rw-r--r--net/mac80211/wpa.c152
-rw-r--r--net/mac80211/wpa.h5
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c4
-rw-r--r--net/netfilter/nf_conntrack_amanda.c4
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c8
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c2
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netfilter/nf_tproxy_core.c1
-rw-r--r--net/netlink/af_netlink.c49
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/netrom/nr_dev.c26
-rw-r--r--net/packet/af_packet.c3
-rw-r--r--net/phonet/af_phonet.c31
-rw-r--r--net/phonet/pn_dev.c119
-rw-r--r--net/phonet/pn_netlink.c29
-rw-r--r--net/rds/Kconfig14
-rw-r--r--net/rds/Makefile14
-rw-r--r--net/rds/af_rds.c586
-rw-r--r--net/rds/bind.c199
-rw-r--r--net/rds/cong.c404
-rw-r--r--net/rds/connection.c487
-rw-r--r--net/rds/ib.c323
-rw-r--r--net/rds/ib.h367
-rw-r--r--net/rds/ib_cm.c726
-rw-r--r--net/rds/ib_rdma.c641
-rw-r--r--net/rds/ib_recv.c869
-rw-r--r--net/rds/ib_ring.c168
-rw-r--r--net/rds/ib_send.c874
-rw-r--r--net/rds/ib_stats.c95
-rw-r--r--net/rds/ib_sysctl.c137
-rw-r--r--net/rds/info.c241
-rw-r--r--net/rds/info.h30
-rw-r--r--net/rds/iw.c333
-rw-r--r--net/rds/iw.h395
-rw-r--r--net/rds/iw_cm.c750
-rw-r--r--net/rds/iw_rdma.c888
-rw-r--r--net/rds/iw_recv.c869
-rw-r--r--net/rds/iw_ring.c169
-rw-r--r--net/rds/iw_send.c975
-rw-r--r--net/rds/iw_stats.c95
-rw-r--r--net/rds/iw_sysctl.c137
-rw-r--r--net/rds/loop.c188
-rw-r--r--net/rds/loop.h9
-rw-r--r--net/rds/message.c402
-rw-r--r--net/rds/page.c221
-rw-r--r--net/rds/rdma.c679
-rw-r--r--net/rds/rdma.h84
-rw-r--r--net/rds/rdma_transport.c214
-rw-r--r--net/rds/rdma_transport.h28
-rw-r--r--net/rds/rds.h686
-rw-r--r--net/rds/recv.c542
-rw-r--r--net/rds/send.c1003
-rw-r--r--net/rds/stats.c148
-rw-r--r--net/rds/sysctl.c122
-rw-r--r--net/rds/threads.c265
-rw-r--r--net/rds/transport.c117
-rw-r--r--net/rose/af_rose.c3
-rw-r--r--net/rose/rose_dev.c22
-rw-r--r--net/sched/sch_api.c11
-rw-r--r--net/sched/sch_hfsc.c6
-rw-r--r--net/sched/sch_htb.c42
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sctp/debug.c4
-rw-r--r--net/sctp/input.c14
-rw-r--r--net/sctp/ipv6.c36
-rw-r--r--net/sctp/output.c16
-rw-r--r--net/sctp/outqueue.c3
-rw-r--r--net/sctp/protocol.c31
-rw-r--r--net/sctp/sm_make_chunk.c4
-rw-r--r--net/sctp/sm_sideeffect.c43
-rw-r--r--net/sctp/sm_statefuns.c6
-rw-r--r--net/sctp/socket.c64
-rw-r--r--net/sctp/transport.c9
-rw-r--r--net/socket.c84
-rw-r--r--net/sunrpc/xprtsock.c53
-rw-r--r--net/unix/af_unix.c3
-rw-r--r--net/wanrouter/wanmain.c8
-rw-r--r--net/wanrouter/wanproc.c2
-rw-r--r--net/wimax/op-msg.c9
-rw-r--r--net/wimax/stack.c12
-rw-r--r--net/wireless/Makefile2
-rw-r--r--net/wireless/core.c123
-rw-r--r--net/wireless/core.h56
-rw-r--r--net/wireless/nl80211.c540
-rw-r--r--net/wireless/nl80211.h12
-rw-r--r--net/wireless/reg.c1164
-rw-r--r--net/wireless/reg.h45
-rw-r--r--net/wireless/scan.c866
-rw-r--r--net/wireless/sysfs.c39
-rw-r--r--net/wireless/util.c2
-rw-r--r--net/wireless/wext-compat.c97
-rw-r--r--net/x25/af_x25.c2
235 files changed, 26059 insertions, 5481 deletions
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 70980baeb682..bdbffa3cb043 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -51,7 +51,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
51 int rc = 1; 51 int rc = 1;
52 struct datalink_proto *proto; 52 struct datalink_proto *proto;
53 static struct packet_type snap_packet_type = { 53 static struct packet_type snap_packet_type = {
54 .type = __constant_htons(ETH_P_SNAP), 54 .type = cpu_to_be16(ETH_P_SNAP),
55 }; 55 };
56 56
57 if (unlikely(!pskb_may_pull(skb, 5))) 57 if (unlikely(!pskb_may_pull(skb, 5)))
@@ -95,15 +95,16 @@ static int snap_request(struct datalink_proto *dl,
95EXPORT_SYMBOL(register_snap_client); 95EXPORT_SYMBOL(register_snap_client);
96EXPORT_SYMBOL(unregister_snap_client); 96EXPORT_SYMBOL(unregister_snap_client);
97 97
98static char snap_err_msg[] __initdata = 98static const char snap_err_msg[] __initconst =
99 KERN_CRIT "SNAP - unable to register with 802.2\n"; 99 KERN_CRIT "SNAP - unable to register with 802.2\n";
100 100
101static int __init snap_init(void) 101static int __init snap_init(void)
102{ 102{
103 snap_sap = llc_sap_open(0xAA, snap_rcv); 103 snap_sap = llc_sap_open(0xAA, snap_rcv);
104 104 if (!snap_sap) {
105 if (!snap_sap)
106 printk(snap_err_msg); 105 printk(snap_err_msg);
106 return -EBUSY;
107 }
107 108
108 return 0; 109 return 0;
109} 110}
diff --git a/net/802/tr.c b/net/802/tr.c
index f47ae289d83b..e7eb13084d71 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -486,6 +486,7 @@ static struct rif_cache *rif_get_idx(loff_t pos)
486} 486}
487 487
488static void *rif_seq_start(struct seq_file *seq, loff_t *pos) 488static void *rif_seq_start(struct seq_file *seq, loff_t *pos)
489 __acquires(&rif_lock)
489{ 490{
490 spin_lock_irq(&rif_lock); 491 spin_lock_irq(&rif_lock);
491 492
@@ -517,6 +518,7 @@ static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
517} 518}
518 519
519static void rif_seq_stop(struct seq_file *seq, void *v) 520static void rif_seq_stop(struct seq_file *seq, void *v)
521 __releases(&rif_lock)
520{ 522{
521 spin_unlock_irq(&rif_lock); 523 spin_unlock_irq(&rif_lock);
522} 524}
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 41e8f65bd3f0..4163ea65bf41 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -52,7 +52,7 @@ static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
52static const char vlan_buggyright[] = "David S. Miller <davem@redhat.com>"; 52static const char vlan_buggyright[] = "David S. Miller <davem@redhat.com>";
53 53
54static struct packet_type vlan_packet_type = { 54static struct packet_type vlan_packet_type = {
55 .type = __constant_htons(ETH_P_8021Q), 55 .type = cpu_to_be16(ETH_P_8021Q),
56 .func = vlan_skb_recv, /* VLAN receive method */ 56 .func = vlan_skb_recv, /* VLAN receive method */
57}; 57};
58 58
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 2886d2fb9ab5..2d6e405fc498 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -89,7 +89,9 @@ static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
89 goto drop; 89 goto drop;
90 90
91 for (p = napi->gro_list; p; p = p->next) { 91 for (p = napi->gro_list; p; p = p->next) {
92 NAPI_GRO_CB(p)->same_flow = p->dev == skb->dev; 92 NAPI_GRO_CB(p)->same_flow =
93 p->dev == skb->dev && !compare_ether_header(
94 skb_mac_header(p), skb_gro_mac_header(skb));
93 NAPI_GRO_CB(p)->flush = 0; 95 NAPI_GRO_CB(p)->flush = 0;
94 } 96 }
95 97
@@ -102,25 +104,12 @@ drop:
102int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, 104int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
103 unsigned int vlan_tci, struct sk_buff *skb) 105 unsigned int vlan_tci, struct sk_buff *skb)
104{ 106{
105 int err = NET_RX_SUCCESS; 107 skb_gro_reset_offset(skb);
106 108
107 if (netpoll_receive_skb(skb)) 109 if (netpoll_receive_skb(skb))
108 return NET_RX_DROP; 110 return NET_RX_DROP;
109 111
110 switch (vlan_gro_common(napi, grp, vlan_tci, skb)) { 112 return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb);
111 case -1:
112 return netif_receive_skb(skb);
113
114 case 2:
115 err = NET_RX_DROP;
116 /* fall through */
117
118 case 1:
119 kfree_skb(skb);
120 break;
121 }
122
123 return err;
124} 113}
125EXPORT_SYMBOL(vlan_gro_receive); 114EXPORT_SYMBOL(vlan_gro_receive);
126 115
@@ -128,30 +117,14 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
128 unsigned int vlan_tci, struct napi_gro_fraginfo *info) 117 unsigned int vlan_tci, struct napi_gro_fraginfo *info)
129{ 118{
130 struct sk_buff *skb = napi_fraginfo_skb(napi, info); 119 struct sk_buff *skb = napi_fraginfo_skb(napi, info);
131 int err = NET_RX_DROP;
132 120
133 if (!skb) 121 if (!skb)
134 goto out; 122 return NET_RX_DROP;
135 123
136 if (netpoll_receive_skb(skb)) 124 if (netpoll_receive_skb(skb))
137 goto out; 125 return NET_RX_DROP;
138
139 err = NET_RX_SUCCESS;
140
141 switch (vlan_gro_common(napi, grp, vlan_tci, skb)) {
142 case -1:
143 return netif_receive_skb(skb);
144
145 case 2:
146 err = NET_RX_DROP;
147 /* fall through */
148
149 case 1:
150 napi_reuse_skb(napi, skb);
151 break;
152 }
153 126
154out: 127 return napi_frags_finish(napi, skb,
155 return err; 128 vlan_gro_common(napi, grp, vlan_tci, skb));
156} 129}
157EXPORT_SYMBOL(vlan_gro_frags); 130EXPORT_SYMBOL(vlan_gro_frags);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 1df0356f242b..c613ed08a5ee 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -417,7 +417,7 @@ static int p9_fd_write(struct p9_client *client, void *v, int len)
417 oldfs = get_fs(); 417 oldfs = get_fs();
418 set_fs(get_ds()); 418 set_fs(get_ds());
419 /* The cast to a user pointer is valid due to the set_fs() */ 419 /* The cast to a user pointer is valid due to the set_fs() */
420 ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos); 420 ret = vfs_write(ts->wr, (__force void __user *)v, len, &ts->wr->f_pos);
421 set_fs(oldfs); 421 set_fs(oldfs);
422 422
423 if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) 423 if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
diff --git a/net/Kconfig b/net/Kconfig
index cdb8fdef6c4a..6b39ede3b1b1 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -171,6 +171,7 @@ endif
171 171
172source "net/dccp/Kconfig" 172source "net/dccp/Kconfig"
173source "net/sctp/Kconfig" 173source "net/sctp/Kconfig"
174source "net/rds/Kconfig"
174source "net/tipc/Kconfig" 175source "net/tipc/Kconfig"
175source "net/atm/Kconfig" 176source "net/atm/Kconfig"
176source "net/802/Kconfig" 177source "net/802/Kconfig"
@@ -185,6 +186,7 @@ source "net/x25/Kconfig"
185source "net/lapb/Kconfig" 186source "net/lapb/Kconfig"
186source "net/econet/Kconfig" 187source "net/econet/Kconfig"
187source "net/wanrouter/Kconfig" 188source "net/wanrouter/Kconfig"
189source "net/phonet/Kconfig"
188source "net/sched/Kconfig" 190source "net/sched/Kconfig"
189source "net/dcb/Kconfig" 191source "net/dcb/Kconfig"
190 192
@@ -229,7 +231,6 @@ source "net/can/Kconfig"
229source "net/irda/Kconfig" 231source "net/irda/Kconfig"
230source "net/bluetooth/Kconfig" 232source "net/bluetooth/Kconfig"
231source "net/rxrpc/Kconfig" 233source "net/rxrpc/Kconfig"
232source "net/phonet/Kconfig"
233 234
234config FIB_RULES 235config FIB_RULES
235 bool 236 bool
diff --git a/net/Makefile b/net/Makefile
index 0fcce89d7169..9e00a55a901b 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -49,6 +49,7 @@ obj-y += 8021q/
49endif 49endif
50obj-$(CONFIG_IP_DCCP) += dccp/ 50obj-$(CONFIG_IP_DCCP) += dccp/
51obj-$(CONFIG_IP_SCTP) += sctp/ 51obj-$(CONFIG_IP_SCTP) += sctp/
52obj-$(CONFIG_RDS) += rds/
52obj-y += wireless/ 53obj-y += wireless/
53obj-$(CONFIG_MAC80211) += mac80211/ 54obj-$(CONFIG_MAC80211) += mac80211/
54obj-$(CONFIG_TIPC) += tipc/ 55obj-$(CONFIG_TIPC) += tipc/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 5abce07fb50a..cf05c43cba52 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1861,12 +1861,12 @@ static struct notifier_block ddp_notifier = {
1861}; 1861};
1862 1862
1863static struct packet_type ltalk_packet_type = { 1863static struct packet_type ltalk_packet_type = {
1864 .type = __constant_htons(ETH_P_LOCALTALK), 1864 .type = cpu_to_be16(ETH_P_LOCALTALK),
1865 .func = ltalk_rcv, 1865 .func = ltalk_rcv,
1866}; 1866};
1867 1867
1868static struct packet_type ppptalk_packet_type = { 1868static struct packet_type ppptalk_packet_type = {
1869 .type = __constant_htons(ETH_P_PPPTALK), 1869 .type = cpu_to_be16(ETH_P_PPPTALK),
1870 .func = atalk_rcv, 1870 .func = atalk_rcv,
1871}; 1871};
1872 1872
@@ -1877,7 +1877,7 @@ EXPORT_SYMBOL(aarp_send_ddp);
1877EXPORT_SYMBOL(atrtr_get_dev); 1877EXPORT_SYMBOL(atrtr_get_dev);
1878EXPORT_SYMBOL(atalk_find_dev_addr); 1878EXPORT_SYMBOL(atalk_find_dev_addr);
1879 1879
1880static char atalk_err_snap[] __initdata = 1880static const char atalk_err_snap[] __initconst =
1881 KERN_CRIT "Unable to register DDP with SNAP.\n"; 1881 KERN_CRIT "Unable to register DDP with SNAP.\n";
1882 1882
1883/* Called by proto.c on kernel start up */ 1883/* Called by proto.c on kernel start up */
diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c
index d856a62ab50f..72277d70c980 100644
--- a/net/appletalk/dev.c
+++ b/net/appletalk/dev.c
@@ -9,22 +9,20 @@
9#include <linux/if_arp.h> 9#include <linux/if_arp.h>
10#include <linux/if_ltalk.h> 10#include <linux/if_ltalk.h>
11 11
12#ifdef CONFIG_COMPAT_NET_DEV_OPS
12static int ltalk_change_mtu(struct net_device *dev, int mtu) 13static int ltalk_change_mtu(struct net_device *dev, int mtu)
13{ 14{
14 return -EINVAL; 15 return -EINVAL;
15} 16}
16 17#endif
17static int ltalk_mac_addr(struct net_device *dev, void *addr)
18{
19 return -EINVAL;
20}
21 18
22static void ltalk_setup(struct net_device *dev) 19static void ltalk_setup(struct net_device *dev)
23{ 20{
24 /* Fill in the fields of the device structure with localtalk-generic values. */ 21 /* Fill in the fields of the device structure with localtalk-generic values. */
25 22
23#ifdef CONFIG_COMPAT_NET_DEV_OPS
26 dev->change_mtu = ltalk_change_mtu; 24 dev->change_mtu = ltalk_change_mtu;
27 dev->set_mac_address = ltalk_mac_addr; 25#endif
28 26
29 dev->type = ARPHRD_LOCALTLK; 27 dev->type = ARPHRD_LOCALTLK;
30 dev->hard_header_len = LTALK_HLEN; 28 dev->hard_header_len = LTALK_HLEN;
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index ea9438fc6855..334fcd4a4ea4 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -83,7 +83,6 @@ struct br2684_dev {
83 struct list_head br2684_devs; 83 struct list_head br2684_devs;
84 int number; 84 int number;
85 struct list_head brvccs; /* one device <=> one vcc (before xmas) */ 85 struct list_head brvccs; /* one device <=> one vcc (before xmas) */
86 struct net_device_stats stats;
87 int mac_was_set; 86 int mac_was_set;
88 enum br2684_payload payload; 87 enum br2684_payload payload;
89}; 88};
@@ -148,9 +147,10 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s)
148 * the way for multiple vcc's per itf. Returns true if we can send, 147 * the way for multiple vcc's per itf. Returns true if we can send,
149 * otherwise false 148 * otherwise false
150 */ 149 */
151static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, 150static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
152 struct br2684_vcc *brvcc) 151 struct br2684_vcc *brvcc)
153{ 152{
153 struct br2684_dev *brdev = BRPRIV(dev);
154 struct atm_vcc *atmvcc; 154 struct atm_vcc *atmvcc;
155 int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2; 155 int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2;
156 156
@@ -211,8 +211,8 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
211 } 211 }
212 atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); 212 atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
213 ATM_SKB(skb)->atm_options = atmvcc->atm_options; 213 ATM_SKB(skb)->atm_options = atmvcc->atm_options;
214 brdev->stats.tx_packets++; 214 dev->stats.tx_packets++;
215 brdev->stats.tx_bytes += skb->len; 215 dev->stats.tx_bytes += skb->len;
216 atmvcc->send(atmvcc, skb); 216 atmvcc->send(atmvcc, skb);
217 return 1; 217 return 1;
218} 218}
@@ -233,14 +233,14 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev)
233 brvcc = pick_outgoing_vcc(skb, brdev); 233 brvcc = pick_outgoing_vcc(skb, brdev);
234 if (brvcc == NULL) { 234 if (brvcc == NULL) {
235 pr_debug("no vcc attached to dev %s\n", dev->name); 235 pr_debug("no vcc attached to dev %s\n", dev->name);
236 brdev->stats.tx_errors++; 236 dev->stats.tx_errors++;
237 brdev->stats.tx_carrier_errors++; 237 dev->stats.tx_carrier_errors++;
238 /* netif_stop_queue(dev); */ 238 /* netif_stop_queue(dev); */
239 dev_kfree_skb(skb); 239 dev_kfree_skb(skb);
240 read_unlock(&devs_lock); 240 read_unlock(&devs_lock);
241 return 0; 241 return 0;
242 } 242 }
243 if (!br2684_xmit_vcc(skb, brdev, brvcc)) { 243 if (!br2684_xmit_vcc(skb, dev, brvcc)) {
244 /* 244 /*
245 * We should probably use netif_*_queue() here, but that 245 * We should probably use netif_*_queue() here, but that
246 * involves added complication. We need to walk before 246 * involves added complication. We need to walk before
@@ -248,27 +248,20 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev)
248 * 248 *
249 * Don't free here! this pointer might be no longer valid! 249 * Don't free here! this pointer might be no longer valid!
250 */ 250 */
251 brdev->stats.tx_errors++; 251 dev->stats.tx_errors++;
252 brdev->stats.tx_fifo_errors++; 252 dev->stats.tx_fifo_errors++;
253 } 253 }
254 read_unlock(&devs_lock); 254 read_unlock(&devs_lock);
255 return 0; 255 return 0;
256} 256}
257 257
258static struct net_device_stats *br2684_get_stats(struct net_device *dev)
259{
260 pr_debug("br2684_get_stats\n");
261 return &BRPRIV(dev)->stats;
262}
263
264/* 258/*
265 * We remember when the MAC gets set, so we don't override it later with 259 * We remember when the MAC gets set, so we don't override it later with
266 * the ESI of the ATM card of the first VC 260 * the ESI of the ATM card of the first VC
267 */ 261 */
268static int (*my_eth_mac_addr) (struct net_device *, void *);
269static int br2684_mac_addr(struct net_device *dev, void *p) 262static int br2684_mac_addr(struct net_device *dev, void *p)
270{ 263{
271 int err = my_eth_mac_addr(dev, p); 264 int err = eth_mac_addr(dev, p);
272 if (!err) 265 if (!err)
273 BRPRIV(dev)->mac_was_set = 1; 266 BRPRIV(dev)->mac_was_set = 1;
274 return err; 267 return err;
@@ -430,17 +423,17 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
430 /* sigh, interface is down? */ 423 /* sigh, interface is down? */
431 if (unlikely(!(net_dev->flags & IFF_UP))) 424 if (unlikely(!(net_dev->flags & IFF_UP)))
432 goto dropped; 425 goto dropped;
433 brdev->stats.rx_packets++; 426 net_dev->stats.rx_packets++;
434 brdev->stats.rx_bytes += skb->len; 427 net_dev->stats.rx_bytes += skb->len;
435 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); 428 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
436 netif_rx(skb); 429 netif_rx(skb);
437 return; 430 return;
438 431
439dropped: 432dropped:
440 brdev->stats.rx_dropped++; 433 net_dev->stats.rx_dropped++;
441 goto free_skb; 434 goto free_skb;
442error: 435error:
443 brdev->stats.rx_errors++; 436 net_dev->stats.rx_errors++;
444free_skb: 437free_skb:
445 dev_kfree_skb(skb); 438 dev_kfree_skb(skb);
446 return; 439 return;
@@ -531,8 +524,8 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
531 524
532 skb->next = skb->prev = NULL; 525 skb->next = skb->prev = NULL;
533 br2684_push(atmvcc, skb); 526 br2684_push(atmvcc, skb);
534 BRPRIV(skb->dev)->stats.rx_bytes -= skb->len; 527 skb->dev->stats.rx_bytes -= skb->len;
535 BRPRIV(skb->dev)->stats.rx_packets--; 528 skb->dev->stats.rx_packets--;
536 529
537 skb = next; 530 skb = next;
538 } 531 }
@@ -544,17 +537,20 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
544 return err; 537 return err;
545} 538}
546 539
540static const struct net_device_ops br2684_netdev_ops = {
541 .ndo_start_xmit = br2684_start_xmit,
542 .ndo_set_mac_address = br2684_mac_addr,
543 .ndo_change_mtu = eth_change_mtu,
544 .ndo_validate_addr = eth_validate_addr,
545};
546
547static void br2684_setup(struct net_device *netdev) 547static void br2684_setup(struct net_device *netdev)
548{ 548{
549 struct br2684_dev *brdev = BRPRIV(netdev); 549 struct br2684_dev *brdev = BRPRIV(netdev);
550 550
551 ether_setup(netdev); 551 ether_setup(netdev);
552 brdev->net_dev = netdev;
553 552
554 my_eth_mac_addr = netdev->set_mac_address; 553 netdev->netdev_ops = &br2684_netdev_ops;
555 netdev->set_mac_address = br2684_mac_addr;
556 netdev->hard_start_xmit = br2684_start_xmit;
557 netdev->get_stats = br2684_get_stats;
558 554
559 INIT_LIST_HEAD(&brdev->brvccs); 555 INIT_LIST_HEAD(&brdev->brvccs);
560} 556}
@@ -565,10 +561,8 @@ static void br2684_setup_routed(struct net_device *netdev)
565 brdev->net_dev = netdev; 561 brdev->net_dev = netdev;
566 562
567 netdev->hard_header_len = 0; 563 netdev->hard_header_len = 0;
568 my_eth_mac_addr = netdev->set_mac_address; 564
569 netdev->set_mac_address = br2684_mac_addr; 565 netdev->netdev_ops = &br2684_netdev_ops;
570 netdev->hard_start_xmit = br2684_start_xmit;
571 netdev->get_stats = br2684_get_stats;
572 netdev->addr_len = 0; 566 netdev->addr_len = 0;
573 netdev->mtu = 1500; 567 netdev->mtu = 1500;
574 netdev->type = ARPHRD_PPP; 568 netdev->type = ARPHRD_PPP;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 2d33a83be799..da42fd06b61f 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -214,15 +214,15 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
214 skb->protocol = ((__be16 *) skb->data)[3]; 214 skb->protocol = ((__be16 *) skb->data)[3];
215 skb_pull(skb, RFC1483LLC_LEN); 215 skb_pull(skb, RFC1483LLC_LEN);
216 if (skb->protocol == htons(ETH_P_ARP)) { 216 if (skb->protocol == htons(ETH_P_ARP)) {
217 PRIV(skb->dev)->stats.rx_packets++; 217 skb->dev->stats.rx_packets++;
218 PRIV(skb->dev)->stats.rx_bytes += skb->len; 218 skb->dev->stats.rx_bytes += skb->len;
219 clip_arp_rcv(skb); 219 clip_arp_rcv(skb);
220 return; 220 return;
221 } 221 }
222 } 222 }
223 clip_vcc->last_use = jiffies; 223 clip_vcc->last_use = jiffies;
224 PRIV(skb->dev)->stats.rx_packets++; 224 skb->dev->stats.rx_packets++;
225 PRIV(skb->dev)->stats.rx_bytes += skb->len; 225 skb->dev->stats.rx_bytes += skb->len;
226 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); 226 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
227 netif_rx(skb); 227 netif_rx(skb);
228} 228}
@@ -372,7 +372,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
372 if (!skb->dst) { 372 if (!skb->dst) {
373 printk(KERN_ERR "clip_start_xmit: skb->dst == NULL\n"); 373 printk(KERN_ERR "clip_start_xmit: skb->dst == NULL\n");
374 dev_kfree_skb(skb); 374 dev_kfree_skb(skb);
375 clip_priv->stats.tx_dropped++; 375 dev->stats.tx_dropped++;
376 return 0; 376 return 0;
377 } 377 }
378 if (!skb->dst->neighbour) { 378 if (!skb->dst->neighbour) {
@@ -380,13 +380,13 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
380 skb->dst->neighbour = clip_find_neighbour(skb->dst, 1); 380 skb->dst->neighbour = clip_find_neighbour(skb->dst, 1);
381 if (!skb->dst->neighbour) { 381 if (!skb->dst->neighbour) {
382 dev_kfree_skb(skb); /* lost that one */ 382 dev_kfree_skb(skb); /* lost that one */
383 clip_priv->stats.tx_dropped++; 383 dev->stats.tx_dropped++;
384 return 0; 384 return 0;
385 } 385 }
386#endif 386#endif
387 printk(KERN_ERR "clip_start_xmit: NO NEIGHBOUR !\n"); 387 printk(KERN_ERR "clip_start_xmit: NO NEIGHBOUR !\n");
388 dev_kfree_skb(skb); 388 dev_kfree_skb(skb);
389 clip_priv->stats.tx_dropped++; 389 dev->stats.tx_dropped++;
390 return 0; 390 return 0;
391 } 391 }
392 entry = NEIGH2ENTRY(skb->dst->neighbour); 392 entry = NEIGH2ENTRY(skb->dst->neighbour);
@@ -400,7 +400,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
400 skb_queue_tail(&entry->neigh->arp_queue, skb); 400 skb_queue_tail(&entry->neigh->arp_queue, skb);
401 else { 401 else {
402 dev_kfree_skb(skb); 402 dev_kfree_skb(skb);
403 clip_priv->stats.tx_dropped++; 403 dev->stats.tx_dropped++;
404 } 404 }
405 return 0; 405 return 0;
406 } 406 }
@@ -423,8 +423,8 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
423 printk(KERN_WARNING "clip_start_xmit: XOFF->XOFF transition\n"); 423 printk(KERN_WARNING "clip_start_xmit: XOFF->XOFF transition\n");
424 return 0; 424 return 0;
425 } 425 }
426 clip_priv->stats.tx_packets++; 426 dev->stats.tx_packets++;
427 clip_priv->stats.tx_bytes += skb->len; 427 dev->stats.tx_bytes += skb->len;
428 vcc->send(vcc, skb); 428 vcc->send(vcc, skb);
429 if (atm_may_send(vcc, 0)) { 429 if (atm_may_send(vcc, 0)) {
430 entry->vccs->xoff = 0; 430 entry->vccs->xoff = 0;
@@ -443,11 +443,6 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
443 return 0; 443 return 0;
444} 444}
445 445
446static struct net_device_stats *clip_get_stats(struct net_device *dev)
447{
448 return &PRIV(dev)->stats;
449}
450
451static int clip_mkip(struct atm_vcc *vcc, int timeout) 446static int clip_mkip(struct atm_vcc *vcc, int timeout)
452{ 447{
453 struct clip_vcc *clip_vcc; 448 struct clip_vcc *clip_vcc;
@@ -501,8 +496,8 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout)
501 496
502 skb_get(skb); 497 skb_get(skb);
503 clip_push(vcc, skb); 498 clip_push(vcc, skb);
504 PRIV(skb->dev)->stats.rx_packets--; 499 skb->dev->stats.rx_packets--;
505 PRIV(skb->dev)->stats.rx_bytes -= len; 500 skb->dev->stats.rx_bytes -= len;
506 kfree_skb(skb); 501 kfree_skb(skb);
507 } 502 }
508 503
@@ -561,7 +556,6 @@ static void clip_setup(struct net_device *dev)
561{ 556{
562 dev->hard_start_xmit = clip_start_xmit; 557 dev->hard_start_xmit = clip_start_xmit;
563 /* sg_xmit ... */ 558 /* sg_xmit ... */
564 dev->get_stats = clip_get_stats;
565 dev->type = ARPHRD_ATM; 559 dev->type = ARPHRD_ATM;
566 dev->hard_header_len = RFC1483LLC_LEN; 560 dev->hard_header_len = RFC1483LLC_LEN;
567 dev->mtu = RFC1626_MTU; 561 dev->mtu = RFC1626_MTU;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index e5e301550e8a..c0cba9a037e8 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -62,7 +62,6 @@ static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 };
62static int lec_open(struct net_device *dev); 62static int lec_open(struct net_device *dev);
63static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev); 63static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev);
64static int lec_close(struct net_device *dev); 64static int lec_close(struct net_device *dev);
65static struct net_device_stats *lec_get_stats(struct net_device *dev);
66static void lec_init(struct net_device *dev); 65static void lec_init(struct net_device *dev);
67static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, 66static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
68 const unsigned char *mac_addr); 67 const unsigned char *mac_addr);
@@ -218,28 +217,28 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
218 217
219static int lec_open(struct net_device *dev) 218static int lec_open(struct net_device *dev)
220{ 219{
221 struct lec_priv *priv = netdev_priv(dev);
222
223 netif_start_queue(dev); 220 netif_start_queue(dev);
224 memset(&priv->stats, 0, sizeof(struct net_device_stats)); 221 memset(&dev->stats, 0, sizeof(struct net_device_stats));
225 222
226 return 0; 223 return 0;
227} 224}
228 225
229static __inline__ void 226static void
230lec_send(struct atm_vcc *vcc, struct sk_buff *skb, struct lec_priv *priv) 227lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
231{ 228{
229 struct net_device *dev = skb->dev;
230
232 ATM_SKB(skb)->vcc = vcc; 231 ATM_SKB(skb)->vcc = vcc;
233 ATM_SKB(skb)->atm_options = vcc->atm_options; 232 ATM_SKB(skb)->atm_options = vcc->atm_options;
234 233
235 atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); 234 atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
236 if (vcc->send(vcc, skb) < 0) { 235 if (vcc->send(vcc, skb) < 0) {
237 priv->stats.tx_dropped++; 236 dev->stats.tx_dropped++;
238 return; 237 return;
239 } 238 }
240 239
241 priv->stats.tx_packets++; 240 dev->stats.tx_packets++;
242 priv->stats.tx_bytes += skb->len; 241 dev->stats.tx_bytes += skb->len;
243} 242}
244 243
245static void lec_tx_timeout(struct net_device *dev) 244static void lec_tx_timeout(struct net_device *dev)
@@ -270,7 +269,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
270 pr_debug("lec_start_xmit called\n"); 269 pr_debug("lec_start_xmit called\n");
271 if (!priv->lecd) { 270 if (!priv->lecd) {
272 printk("%s:No lecd attached\n", dev->name); 271 printk("%s:No lecd attached\n", dev->name);
273 priv->stats.tx_errors++; 272 dev->stats.tx_errors++;
274 netif_stop_queue(dev); 273 netif_stop_queue(dev);
275 return -EUNATCH; 274 return -EUNATCH;
276 } 275 }
@@ -345,7 +344,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
345 GFP_ATOMIC); 344 GFP_ATOMIC);
346 dev_kfree_skb(skb); 345 dev_kfree_skb(skb);
347 if (skb2 == NULL) { 346 if (skb2 == NULL) {
348 priv->stats.tx_dropped++; 347 dev->stats.tx_dropped++;
349 return 0; 348 return 0;
350 } 349 }
351 skb = skb2; 350 skb = skb2;
@@ -380,7 +379,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
380 ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", 379 ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ",
381 dev->name); 380 dev->name);
382 pr_debug("MAC address %pM\n", lec_h->h_dest); 381 pr_debug("MAC address %pM\n", lec_h->h_dest);
383 priv->stats.tx_dropped++; 382 dev->stats.tx_dropped++;
384 dev_kfree_skb(skb); 383 dev_kfree_skb(skb);
385 } 384 }
386 goto out; 385 goto out;
@@ -392,10 +391,10 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
392 while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) { 391 while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) {
393 pr_debug("lec.c: emptying tx queue, "); 392 pr_debug("lec.c: emptying tx queue, ");
394 pr_debug("MAC address %pM\n", lec_h->h_dest); 393 pr_debug("MAC address %pM\n", lec_h->h_dest);
395 lec_send(vcc, skb2, priv); 394 lec_send(vcc, skb2);
396 } 395 }
397 396
398 lec_send(vcc, skb, priv); 397 lec_send(vcc, skb);
399 398
400 if (!atm_may_send(vcc, 0)) { 399 if (!atm_may_send(vcc, 0)) {
401 struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); 400 struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
@@ -427,15 +426,6 @@ static int lec_close(struct net_device *dev)
427 return 0; 426 return 0;
428} 427}
429 428
430/*
431 * Get the current statistics.
432 * This may be called with the card open or closed.
433 */
434static struct net_device_stats *lec_get_stats(struct net_device *dev)
435{
436 return &((struct lec_priv *)netdev_priv(dev))->stats;
437}
438
439static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) 429static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
440{ 430{
441 unsigned long flags; 431 unsigned long flags;
@@ -677,17 +667,19 @@ static void lec_set_multicast_list(struct net_device *dev)
677 return; 667 return;
678} 668}
679 669
670static const struct net_device_ops lec_netdev_ops = {
671 .ndo_open = lec_open,
672 .ndo_stop = lec_close,
673 .ndo_start_xmit = lec_start_xmit,
674 .ndo_change_mtu = lec_change_mtu,
675 .ndo_tx_timeout = lec_tx_timeout,
676 .ndo_set_multicast_list = lec_set_multicast_list,
677};
678
679
680static void lec_init(struct net_device *dev) 680static void lec_init(struct net_device *dev)
681{ 681{
682 dev->change_mtu = lec_change_mtu; 682 dev->netdev_ops = &lec_netdev_ops;
683 dev->open = lec_open;
684 dev->stop = lec_close;
685 dev->hard_start_xmit = lec_start_xmit;
686 dev->tx_timeout = lec_tx_timeout;
687
688 dev->get_stats = lec_get_stats;
689 dev->set_multicast_list = lec_set_multicast_list;
690 dev->do_ioctl = NULL;
691 printk("%s: Initialized!\n", dev->name); 683 printk("%s: Initialized!\n", dev->name);
692} 684}
693 685
@@ -810,8 +802,8 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
810 else 802 else
811#endif 803#endif
812 skb->protocol = eth_type_trans(skb, dev); 804 skb->protocol = eth_type_trans(skb, dev);
813 priv->stats.rx_packets++; 805 dev->stats.rx_packets++;
814 priv->stats.rx_bytes += skb->len; 806 dev->stats.rx_bytes += skb->len;
815 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); 807 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
816 netif_rx(skb); 808 netif_rx(skb);
817 } 809 }
@@ -1887,7 +1879,7 @@ restart:
1887 lec_arp_hold(entry); 1879 lec_arp_hold(entry);
1888 spin_unlock_irqrestore(&priv->lec_arp_lock, flags); 1880 spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
1889 while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) 1881 while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
1890 lec_send(vcc, skb, entry->priv); 1882 lec_send(vcc, skb);
1891 entry->last_used = jiffies; 1883 entry->last_used = jiffies;
1892 entry->status = ESI_FORWARD_DIRECT; 1884 entry->status = ESI_FORWARD_DIRECT;
1893 lec_arp_put(entry); 1885 lec_arp_put(entry);
@@ -2305,7 +2297,7 @@ restart:
2305 lec_arp_hold(entry); 2297 lec_arp_hold(entry);
2306 spin_unlock_irqrestore(&priv->lec_arp_lock, flags); 2298 spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
2307 while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) 2299 while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
2308 lec_send(vcc, skb, entry->priv); 2300 lec_send(vcc, skb);
2309 entry->last_used = jiffies; 2301 entry->last_used = jiffies;
2310 entry->status = ESI_FORWARD_DIRECT; 2302 entry->status = ESI_FORWARD_DIRECT;
2311 lec_arp_put(entry); 2303 lec_arp_put(entry);
diff --git a/net/atm/lec.h b/net/atm/lec.h
index 0d376682c1a3..9d14d196cc1d 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -69,7 +69,6 @@ struct lane2_ops {
69#define LEC_ARP_TABLE_SIZE 16 69#define LEC_ARP_TABLE_SIZE 16
70 70
71struct lec_priv { 71struct lec_priv {
72 struct net_device_stats stats;
73 unsigned short lecid; /* Lecid of this client */ 72 unsigned short lecid; /* Lecid of this client */
74 struct hlist_head lec_arp_empty_ones; 73 struct hlist_head lec_arp_empty_ones;
75 /* Used for storing VCC's that don't have a MAC address attached yet */ 74 /* Used for storing VCC's that don't have a MAC address attached yet */
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 00d9e5e13158..d127fd3ba5c6 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1986,7 +1986,7 @@ static const struct proto_ops ax25_proto_ops = {
1986 * Called by socket.c on kernel start up 1986 * Called by socket.c on kernel start up
1987 */ 1987 */
1988static struct packet_type ax25_packet_type = { 1988static struct packet_type ax25_packet_type = {
1989 .type = __constant_htons(ETH_P_AX25), 1989 .type = cpu_to_be16(ETH_P_AX25),
1990 .dev = NULL, /* All devices */ 1990 .dev = NULL, /* All devices */
1991 .func = ax25_kiss_rcv, 1991 .func = ax25_kiss_rcv,
1992}; 1992};
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index 8443af57a374..71338f112108 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c
@@ -61,27 +61,24 @@ void ax25_protocol_release(unsigned int pid)
61 61
62 write_lock_bh(&protocol_list_lock); 62 write_lock_bh(&protocol_list_lock);
63 protocol = protocol_list; 63 protocol = protocol_list;
64 if (protocol == NULL) { 64 if (protocol == NULL)
65 write_unlock_bh(&protocol_list_lock); 65 goto out;
66 return;
67 }
68 66
69 if (protocol->pid == pid) { 67 if (protocol->pid == pid) {
70 protocol_list = protocol->next; 68 protocol_list = protocol->next;
71 write_unlock_bh(&protocol_list_lock); 69 goto out;
72 return;
73 } 70 }
74 71
75 while (protocol != NULL && protocol->next != NULL) { 72 while (protocol != NULL && protocol->next != NULL) {
76 if (protocol->next->pid == pid) { 73 if (protocol->next->pid == pid) {
77 s = protocol->next; 74 s = protocol->next;
78 protocol->next = protocol->next->next; 75 protocol->next = protocol->next->next;
79 write_unlock_bh(&protocol_list_lock); 76 goto out;
80 return;
81 } 77 }
82 78
83 protocol = protocol->next; 79 protocol = protocol->next;
84 } 80 }
81out:
85 write_unlock_bh(&protocol_list_lock); 82 write_unlock_bh(&protocol_list_lock);
86} 83}
87 84
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 744ed3f07ef3..02b9baa1930b 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -41,14 +41,13 @@
41 41
42#include <net/bluetooth/bluetooth.h> 42#include <net/bluetooth/bluetooth.h>
43 43
44#define VERSION "2.14" 44#define VERSION "2.15"
45 45
46/* Bluetooth sockets */ 46/* Bluetooth sockets */
47#define BT_MAX_PROTO 8 47#define BT_MAX_PROTO 8
48static struct net_proto_family *bt_proto[BT_MAX_PROTO]; 48static struct net_proto_family *bt_proto[BT_MAX_PROTO];
49static DEFINE_RWLOCK(bt_proto_lock); 49static DEFINE_RWLOCK(bt_proto_lock);
50 50
51#ifdef CONFIG_DEBUG_LOCK_ALLOC
52static struct lock_class_key bt_lock_key[BT_MAX_PROTO]; 51static struct lock_class_key bt_lock_key[BT_MAX_PROTO];
53static const char *bt_key_strings[BT_MAX_PROTO] = { 52static const char *bt_key_strings[BT_MAX_PROTO] = {
54 "sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP", 53 "sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP",
@@ -86,11 +85,6 @@ static inline void bt_sock_reclassify_lock(struct socket *sock, int proto)
86 bt_slock_key_strings[proto], &bt_slock_key[proto], 85 bt_slock_key_strings[proto], &bt_slock_key[proto],
87 bt_key_strings[proto], &bt_lock_key[proto]); 86 bt_key_strings[proto], &bt_lock_key[proto]);
88} 87}
89#else
90static inline void bt_sock_reclassify_lock(struct socket *sock, int proto)
91{
92}
93#endif
94 88
95int bt_sock_register(int proto, struct net_proto_family *ops) 89int bt_sock_register(int proto, struct net_proto_family *ops)
96{ 90{
@@ -217,7 +211,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
217 continue; 211 continue;
218 } 212 }
219 213
220 if (sk->sk_state == BT_CONNECTED || !newsock) { 214 if (sk->sk_state == BT_CONNECTED || !newsock ||
215 bt_sk(parent)->defer_setup) {
221 bt_accept_unlink(sk); 216 bt_accept_unlink(sk);
222 if (newsock) 217 if (newsock)
223 sock_graft(sk, newsock); 218 sock_graft(sk, newsock);
@@ -232,7 +227,7 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
232EXPORT_SYMBOL(bt_accept_dequeue); 227EXPORT_SYMBOL(bt_accept_dequeue);
233 228
234int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, 229int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
235 struct msghdr *msg, size_t len, int flags) 230 struct msghdr *msg, size_t len, int flags)
236{ 231{
237 int noblock = flags & MSG_DONTWAIT; 232 int noblock = flags & MSG_DONTWAIT;
238 struct sock *sk = sock->sk; 233 struct sock *sk = sock->sk;
@@ -277,7 +272,9 @@ static inline unsigned int bt_accept_poll(struct sock *parent)
277 272
278 list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { 273 list_for_each_safe(p, n, &bt_sk(parent)->accept_q) {
279 sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); 274 sk = (struct sock *) list_entry(p, struct bt_sock, accept_q);
280 if (sk->sk_state == BT_CONNECTED) 275 if (sk->sk_state == BT_CONNECTED ||
276 (bt_sk(parent)->defer_setup &&
277 sk->sk_state == BT_CONNECT2))
281 return POLLIN | POLLRDNORM; 278 return POLLIN | POLLRDNORM;
282 } 279 }
283 280
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index c9cac7719efe..0073ec8495da 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -126,8 +126,7 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const
126 126
127 session->reassembly[id] = nskb; 127 session->reassembly[id] = nskb;
128 128
129 if (skb) 129 kfree_skb(skb);
130 kfree_skb(skb);
131} 130}
132 131
133static inline int cmtp_recv_frame(struct cmtp_session *session, struct sk_buff *skb) 132static inline int cmtp_recv_frame(struct cmtp_session *session, struct sk_buff *skb)
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index a4a789f24c8d..1181db08d9de 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -123,6 +123,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle)
123 conn->state = BT_CONNECT; 123 conn->state = BT_CONNECT;
124 conn->out = 1; 124 conn->out = 1;
125 125
126 conn->attempt++;
127
126 cp.handle = cpu_to_le16(handle); 128 cp.handle = cpu_to_le16(handle);
127 cp.pkt_type = cpu_to_le16(conn->pkt_type); 129 cp.pkt_type = cpu_to_le16(conn->pkt_type);
128 130
@@ -139,6 +141,8 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle)
139 conn->state = BT_CONNECT; 141 conn->state = BT_CONNECT;
140 conn->out = 1; 142 conn->out = 1;
141 143
144 conn->attempt++;
145
142 cp.handle = cpu_to_le16(handle); 146 cp.handle = cpu_to_le16(handle);
143 cp.pkt_type = cpu_to_le16(conn->pkt_type); 147 cp.pkt_type = cpu_to_le16(conn->pkt_type);
144 148
@@ -155,6 +159,7 @@ static void hci_conn_timeout(unsigned long arg)
155{ 159{
156 struct hci_conn *conn = (void *) arg; 160 struct hci_conn *conn = (void *) arg;
157 struct hci_dev *hdev = conn->hdev; 161 struct hci_dev *hdev = conn->hdev;
162 __u8 reason;
158 163
159 BT_DBG("conn %p state %d", conn, conn->state); 164 BT_DBG("conn %p state %d", conn, conn->state);
160 165
@@ -173,7 +178,8 @@ static void hci_conn_timeout(unsigned long arg)
173 break; 178 break;
174 case BT_CONFIG: 179 case BT_CONFIG:
175 case BT_CONNECTED: 180 case BT_CONNECTED:
176 hci_acl_disconn(conn, 0x13); 181 reason = hci_proto_disconn_ind(conn);
182 hci_acl_disconn(conn, reason);
177 break; 183 break;
178 default: 184 default:
179 conn->state = BT_CLOSED; 185 conn->state = BT_CLOSED;
@@ -216,12 +222,13 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
216 break; 222 break;
217 case SCO_LINK: 223 case SCO_LINK:
218 if (lmp_esco_capable(hdev)) 224 if (lmp_esco_capable(hdev))
219 conn->pkt_type = hdev->esco_type & SCO_ESCO_MASK; 225 conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
226 (hdev->esco_type & EDR_ESCO_MASK);
220 else 227 else
221 conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK; 228 conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK;
222 break; 229 break;
223 case ESCO_LINK: 230 case ESCO_LINK:
224 conn->pkt_type = hdev->esco_type; 231 conn->pkt_type = hdev->esco_type & ~EDR_ESCO_MASK;
225 break; 232 break;
226 } 233 }
227 234
@@ -280,6 +287,8 @@ int hci_conn_del(struct hci_conn *conn)
280 287
281 skb_queue_purge(&conn->data_q); 288 skb_queue_purge(&conn->data_q);
282 289
290 hci_conn_del_sysfs(conn);
291
283 return 0; 292 return 0;
284} 293}
285 294
@@ -325,7 +334,7 @@ EXPORT_SYMBOL(hci_get_route);
325 334
326/* Create SCO or ACL connection. 335/* Create SCO or ACL connection.
327 * Device _must_ be locked */ 336 * Device _must_ be locked */
328struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 auth_type) 337struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type)
329{ 338{
330 struct hci_conn *acl; 339 struct hci_conn *acl;
331 struct hci_conn *sco; 340 struct hci_conn *sco;
@@ -340,6 +349,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
340 hci_conn_hold(acl); 349 hci_conn_hold(acl);
341 350
342 if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { 351 if (acl->state == BT_OPEN || acl->state == BT_CLOSED) {
352 acl->sec_level = sec_level;
343 acl->auth_type = auth_type; 353 acl->auth_type = auth_type;
344 hci_acl_connect(acl); 354 hci_acl_connect(acl);
345 } 355 }
@@ -385,51 +395,59 @@ int hci_conn_check_link_mode(struct hci_conn *conn)
385EXPORT_SYMBOL(hci_conn_check_link_mode); 395EXPORT_SYMBOL(hci_conn_check_link_mode);
386 396
387/* Authenticate remote device */ 397/* Authenticate remote device */
388int hci_conn_auth(struct hci_conn *conn) 398static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
389{ 399{
390 BT_DBG("conn %p", conn); 400 BT_DBG("conn %p", conn);
391 401
392 if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) { 402 if (sec_level > conn->sec_level)
393 if (!(conn->auth_type & 0x01)) { 403 conn->sec_level = sec_level;
394 conn->auth_type |= 0x01; 404 else if (conn->link_mode & HCI_LM_AUTH)
395 conn->link_mode &= ~HCI_LM_AUTH;
396 }
397 }
398
399 if (conn->link_mode & HCI_LM_AUTH)
400 return 1; 405 return 1;
401 406
407 conn->auth_type = auth_type;
408
402 if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { 409 if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
403 struct hci_cp_auth_requested cp; 410 struct hci_cp_auth_requested cp;
404 cp.handle = cpu_to_le16(conn->handle); 411 cp.handle = cpu_to_le16(conn->handle);
405 hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, 412 hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED,
406 sizeof(cp), &cp); 413 sizeof(cp), &cp);
407 } 414 }
415
408 return 0; 416 return 0;
409} 417}
410EXPORT_SYMBOL(hci_conn_auth);
411 418
412/* Enable encryption */ 419/* Enable security */
413int hci_conn_encrypt(struct hci_conn *conn) 420int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
414{ 421{
415 BT_DBG("conn %p", conn); 422 BT_DBG("conn %p", conn);
416 423
424 if (sec_level == BT_SECURITY_SDP)
425 return 1;
426
427 if (sec_level == BT_SECURITY_LOW) {
428 if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0)
429 return hci_conn_auth(conn, sec_level, auth_type);
430 else
431 return 1;
432 }
433
417 if (conn->link_mode & HCI_LM_ENCRYPT) 434 if (conn->link_mode & HCI_LM_ENCRYPT)
418 return hci_conn_auth(conn); 435 return hci_conn_auth(conn, sec_level, auth_type);
419 436
420 if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) 437 if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend))
421 return 0; 438 return 0;
422 439
423 if (hci_conn_auth(conn)) { 440 if (hci_conn_auth(conn, sec_level, auth_type)) {
424 struct hci_cp_set_conn_encrypt cp; 441 struct hci_cp_set_conn_encrypt cp;
425 cp.handle = cpu_to_le16(conn->handle); 442 cp.handle = cpu_to_le16(conn->handle);
426 cp.encrypt = 1; 443 cp.encrypt = 1;
427 hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, 444 hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT,
428 sizeof(cp), &cp); 445 sizeof(cp), &cp);
429 } 446 }
447
430 return 0; 448 return 0;
431} 449}
432EXPORT_SYMBOL(hci_conn_encrypt); 450EXPORT_SYMBOL(hci_conn_security);
433 451
434/* Change link key */ 452/* Change link key */
435int hci_conn_change_link_key(struct hci_conn *conn) 453int hci_conn_change_link_key(struct hci_conn *conn)
@@ -442,12 +460,13 @@ int hci_conn_change_link_key(struct hci_conn *conn)
442 hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, 460 hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY,
443 sizeof(cp), &cp); 461 sizeof(cp), &cp);
444 } 462 }
463
445 return 0; 464 return 0;
446} 465}
447EXPORT_SYMBOL(hci_conn_change_link_key); 466EXPORT_SYMBOL(hci_conn_change_link_key);
448 467
449/* Switch role */ 468/* Switch role */
450int hci_conn_switch_role(struct hci_conn *conn, uint8_t role) 469int hci_conn_switch_role(struct hci_conn *conn, __u8 role)
451{ 470{
452 BT_DBG("conn %p", conn); 471 BT_DBG("conn %p", conn);
453 472
@@ -460,6 +479,7 @@ int hci_conn_switch_role(struct hci_conn *conn, uint8_t role)
460 cp.role = role; 479 cp.role = role;
461 hci_send_cmd(conn->hdev, HCI_OP_SWITCH_ROLE, sizeof(cp), &cp); 480 hci_send_cmd(conn->hdev, HCI_OP_SWITCH_ROLE, sizeof(cp), &cp);
462 } 481 }
482
463 return 0; 483 return 0;
464} 484}
465EXPORT_SYMBOL(hci_conn_switch_role); 485EXPORT_SYMBOL(hci_conn_switch_role);
@@ -542,9 +562,7 @@ void hci_conn_hash_flush(struct hci_dev *hdev)
542 562
543 c->state = BT_CLOSED; 563 c->state = BT_CLOSED;
544 564
545 hci_conn_del_sysfs(c); 565 hci_proto_disconn_cfm(c, 0x16);
546
547 hci_proto_disconn_ind(c, 0x16);
548 hci_conn_del(c); 566 hci_conn_del(c);
549 } 567 }
550} 568}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index ba78cc1eb8d9..cd061510b6bd 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1565,8 +1565,7 @@ static void hci_cmd_task(unsigned long arg)
1565 1565
1566 /* Send queued commands */ 1566 /* Send queued commands */
1567 if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) { 1567 if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) {
1568 if (hdev->sent_cmd) 1568 kfree_skb(hdev->sent_cmd);
1569 kfree_skb(hdev->sent_cmd);
1570 1569
1571 if ((hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC))) { 1570 if ((hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC))) {
1572 atomic_dec(&hdev->cmd_cnt); 1571 atomic_dec(&hdev->cmd_cnt);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index f91ba690f5d2..55534244c3a0 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -484,6 +484,15 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, struct sk_buff *skb
484 if (hdev->features[4] & LMP_EV5) 484 if (hdev->features[4] & LMP_EV5)
485 hdev->esco_type |= (ESCO_EV5); 485 hdev->esco_type |= (ESCO_EV5);
486 486
487 if (hdev->features[5] & LMP_EDR_ESCO_2M)
488 hdev->esco_type |= (ESCO_2EV3);
489
490 if (hdev->features[5] & LMP_EDR_ESCO_3M)
491 hdev->esco_type |= (ESCO_3EV3);
492
493 if (hdev->features[5] & LMP_EDR_3S_ESCO)
494 hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5);
495
487 BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name, 496 BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name,
488 hdev->features[0], hdev->features[1], 497 hdev->features[0], hdev->features[1],
489 hdev->features[2], hdev->features[3], 498 hdev->features[2], hdev->features[3],
@@ -914,7 +923,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
914 if (ev->status) { 923 if (ev->status) {
915 hci_proto_connect_cfm(conn, ev->status); 924 hci_proto_connect_cfm(conn, ev->status);
916 hci_conn_del(conn); 925 hci_conn_del(conn);
917 } 926 } else if (ev->link_type != ACL_LINK)
927 hci_proto_connect_cfm(conn, ev->status);
918 928
919unlock: 929unlock:
920 hci_dev_unlock(hdev); 930 hci_dev_unlock(hdev);
@@ -1009,9 +1019,7 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff
1009 if (conn) { 1019 if (conn) {
1010 conn->state = BT_CLOSED; 1020 conn->state = BT_CLOSED;
1011 1021
1012 hci_conn_del_sysfs(conn); 1022 hci_proto_disconn_cfm(conn, ev->reason);
1013
1014 hci_proto_disconn_ind(conn, ev->reason);
1015 hci_conn_del(conn); 1023 hci_conn_del(conn);
1016 } 1024 }
1017 1025
@@ -1600,7 +1608,8 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b
1600 1608
1601 if (conn->state == BT_CONFIG) { 1609 if (conn->state == BT_CONFIG) {
1602 if (!ev->status && hdev->ssp_mode > 0 && 1610 if (!ev->status && hdev->ssp_mode > 0 &&
1603 conn->ssp_mode > 0 && conn->out) { 1611 conn->ssp_mode > 0 && conn->out &&
1612 conn->sec_level != BT_SECURITY_SDP) {
1604 struct hci_cp_auth_requested cp; 1613 struct hci_cp_auth_requested cp;
1605 cp.handle = ev->handle; 1614 cp.handle = ev->handle;
1606 hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, 1615 hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED,
@@ -1637,6 +1646,13 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu
1637 conn->type = SCO_LINK; 1646 conn->type = SCO_LINK;
1638 } 1647 }
1639 1648
1649 if (conn->out && ev->status == 0x1c && conn->attempt < 2) {
1650 conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
1651 (hdev->esco_type & EDR_ESCO_MASK);
1652 hci_setup_sync(conn, conn->link->handle);
1653 goto unlock;
1654 }
1655
1640 if (!ev->status) { 1656 if (!ev->status) {
1641 conn->handle = __le16_to_cpu(ev->handle); 1657 conn->handle = __le16_to_cpu(ev->handle);
1642 conn->state = BT_CONNECTED; 1658 conn->state = BT_CONNECTED;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index b93748e224ff..ca4d3b40d5ce 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -50,9 +50,10 @@
50#include <net/bluetooth/hci_core.h> 50#include <net/bluetooth/hci_core.h>
51#include <net/bluetooth/l2cap.h> 51#include <net/bluetooth/l2cap.h>
52 52
53#define VERSION "2.11" 53#define VERSION "2.13"
54 54
55static u32 l2cap_feat_mask = 0x0000; 55static u32 l2cap_feat_mask = 0x0080;
56static u8 l2cap_fixed_chan[8] = { 0x02, };
56 57
57static const struct proto_ops l2cap_sock_ops; 58static const struct proto_ops l2cap_sock_ops;
58 59
@@ -77,9 +78,10 @@ static void l2cap_sock_timeout(unsigned long arg)
77 78
78 bh_lock_sock(sk); 79 bh_lock_sock(sk);
79 80
80 if (sk->sk_state == BT_CONNECT && 81 if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG)
81 (l2cap_pi(sk)->link_mode & (L2CAP_LM_AUTH | 82 reason = ECONNREFUSED;
82 L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE))) 83 else if (sk->sk_state == BT_CONNECT &&
84 l2cap_pi(sk)->sec_level != BT_SECURITY_SDP)
83 reason = ECONNREFUSED; 85 reason = ECONNREFUSED;
84 else 86 else
85 reason = ETIMEDOUT; 87 reason = ETIMEDOUT;
@@ -204,6 +206,8 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so
204 206
205 BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid); 207 BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid);
206 208
209 conn->disc_reason = 0x13;
210
207 l2cap_pi(sk)->conn = conn; 211 l2cap_pi(sk)->conn = conn;
208 212
209 if (sk->sk_type == SOCK_SEQPACKET) { 213 if (sk->sk_type == SOCK_SEQPACKET) {
@@ -259,18 +263,35 @@ static void l2cap_chan_del(struct sock *sk, int err)
259} 263}
260 264
261/* Service level security */ 265/* Service level security */
262static inline int l2cap_check_link_mode(struct sock *sk) 266static inline int l2cap_check_security(struct sock *sk)
263{ 267{
264 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 268 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
269 __u8 auth_type;
265 270
266 if ((l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) || 271 if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) {
267 (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) 272 if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH)
268 return hci_conn_encrypt(conn->hcon); 273 auth_type = HCI_AT_NO_BONDING_MITM;
274 else
275 auth_type = HCI_AT_NO_BONDING;
269 276
270 if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH) 277 if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW)
271 return hci_conn_auth(conn->hcon); 278 l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
279 } else {
280 switch (l2cap_pi(sk)->sec_level) {
281 case BT_SECURITY_HIGH:
282 auth_type = HCI_AT_GENERAL_BONDING_MITM;
283 break;
284 case BT_SECURITY_MEDIUM:
285 auth_type = HCI_AT_GENERAL_BONDING;
286 break;
287 default:
288 auth_type = HCI_AT_NO_BONDING;
289 break;
290 }
291 }
272 292
273 return 1; 293 return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level,
294 auth_type);
274} 295}
275 296
276static inline u8 l2cap_get_ident(struct l2cap_conn *conn) 297static inline u8 l2cap_get_ident(struct l2cap_conn *conn)
@@ -312,7 +333,10 @@ static void l2cap_do_start(struct sock *sk)
312 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 333 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
313 334
314 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { 335 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) {
315 if (l2cap_check_link_mode(sk)) { 336 if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
337 return;
338
339 if (l2cap_check_security(sk)) {
316 struct l2cap_conn_req req; 340 struct l2cap_conn_req req;
317 req.scid = cpu_to_le16(l2cap_pi(sk)->scid); 341 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
318 req.psm = l2cap_pi(sk)->psm; 342 req.psm = l2cap_pi(sk)->psm;
@@ -356,7 +380,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
356 } 380 }
357 381
358 if (sk->sk_state == BT_CONNECT) { 382 if (sk->sk_state == BT_CONNECT) {
359 if (l2cap_check_link_mode(sk)) { 383 if (l2cap_check_security(sk)) {
360 struct l2cap_conn_req req; 384 struct l2cap_conn_req req;
361 req.scid = cpu_to_le16(l2cap_pi(sk)->scid); 385 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
362 req.psm = l2cap_pi(sk)->psm; 386 req.psm = l2cap_pi(sk)->psm;
@@ -371,10 +395,18 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
371 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); 395 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
372 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); 396 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
373 397
374 if (l2cap_check_link_mode(sk)) { 398 if (l2cap_check_security(sk)) {
375 sk->sk_state = BT_CONFIG; 399 if (bt_sk(sk)->defer_setup) {
376 rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); 400 struct sock *parent = bt_sk(sk)->parent;
377 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); 401 rsp.result = cpu_to_le16(L2CAP_CR_PEND);
402 rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND);
403 parent->sk_data_ready(parent, 0);
404
405 } else {
406 sk->sk_state = BT_CONFIG;
407 rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
408 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
409 }
378 } else { 410 } else {
379 rsp.result = cpu_to_le16(L2CAP_CR_PEND); 411 rsp.result = cpu_to_le16(L2CAP_CR_PEND);
380 rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND); 412 rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND);
@@ -426,7 +458,7 @@ static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err)
426 read_lock(&l->lock); 458 read_lock(&l->lock);
427 459
428 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { 460 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
429 if (l2cap_pi(sk)->link_mode & L2CAP_LM_RELIABLE) 461 if (l2cap_pi(sk)->force_reliable)
430 sk->sk_err = err; 462 sk->sk_err = err;
431 } 463 }
432 464
@@ -437,6 +469,7 @@ static void l2cap_info_timeout(unsigned long arg)
437{ 469{
438 struct l2cap_conn *conn = (void *) arg; 470 struct l2cap_conn *conn = (void *) arg;
439 471
472 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
440 conn->info_ident = 0; 473 conn->info_ident = 0;
441 474
442 l2cap_conn_start(conn); 475 l2cap_conn_start(conn);
@@ -470,6 +503,8 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
470 spin_lock_init(&conn->lock); 503 spin_lock_init(&conn->lock);
471 rwlock_init(&conn->chan_list.lock); 504 rwlock_init(&conn->chan_list.lock);
472 505
506 conn->disc_reason = 0x13;
507
473 return conn; 508 return conn;
474} 509}
475 510
@@ -483,8 +518,7 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
483 518
484 BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); 519 BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
485 520
486 if (conn->rx_skb) 521 kfree_skb(conn->rx_skb);
487 kfree_skb(conn->rx_skb);
488 522
489 /* Kill channels */ 523 /* Kill channels */
490 while ((sk = conn->chan_list.head)) { 524 while ((sk = conn->chan_list.head)) {
@@ -608,7 +642,6 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
608 642
609 case BT_CONNECTED: 643 case BT_CONNECTED:
610 case BT_CONFIG: 644 case BT_CONFIG:
611 case BT_CONNECT2:
612 if (sk->sk_type == SOCK_SEQPACKET) { 645 if (sk->sk_type == SOCK_SEQPACKET) {
613 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 646 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
614 struct l2cap_disconn_req req; 647 struct l2cap_disconn_req req;
@@ -624,6 +657,27 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
624 l2cap_chan_del(sk, reason); 657 l2cap_chan_del(sk, reason);
625 break; 658 break;
626 659
660 case BT_CONNECT2:
661 if (sk->sk_type == SOCK_SEQPACKET) {
662 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
663 struct l2cap_conn_rsp rsp;
664 __u16 result;
665
666 if (bt_sk(sk)->defer_setup)
667 result = L2CAP_CR_SEC_BLOCK;
668 else
669 result = L2CAP_CR_BAD_PSM;
670
671 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
672 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
673 rsp.result = cpu_to_le16(result);
674 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
675 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
676 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
677 } else
678 l2cap_chan_del(sk, reason);
679 break;
680
627 case BT_CONNECT: 681 case BT_CONNECT:
628 case BT_DISCONN: 682 case BT_DISCONN:
629 l2cap_chan_del(sk, reason); 683 l2cap_chan_del(sk, reason);
@@ -653,13 +707,19 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
653 707
654 if (parent) { 708 if (parent) {
655 sk->sk_type = parent->sk_type; 709 sk->sk_type = parent->sk_type;
710 bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup;
711
656 pi->imtu = l2cap_pi(parent)->imtu; 712 pi->imtu = l2cap_pi(parent)->imtu;
657 pi->omtu = l2cap_pi(parent)->omtu; 713 pi->omtu = l2cap_pi(parent)->omtu;
658 pi->link_mode = l2cap_pi(parent)->link_mode; 714 pi->sec_level = l2cap_pi(parent)->sec_level;
715 pi->role_switch = l2cap_pi(parent)->role_switch;
716 pi->force_reliable = l2cap_pi(parent)->force_reliable;
659 } else { 717 } else {
660 pi->imtu = L2CAP_DEFAULT_MTU; 718 pi->imtu = L2CAP_DEFAULT_MTU;
661 pi->omtu = 0; 719 pi->omtu = 0;
662 pi->link_mode = 0; 720 pi->sec_level = BT_SECURITY_LOW;
721 pi->role_switch = 0;
722 pi->force_reliable = 0;
663 } 723 }
664 724
665 /* Default config options */ 725 /* Default config options */
@@ -723,17 +783,24 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol)
723 return 0; 783 return 0;
724} 784}
725 785
726static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) 786static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
727{ 787{
728 struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
729 struct sock *sk = sock->sk; 788 struct sock *sk = sock->sk;
730 int err = 0; 789 struct sockaddr_l2 la;
790 int len, err = 0;
731 791
732 BT_DBG("sk %p, %s %d", sk, batostr(&la->l2_bdaddr), la->l2_psm); 792 BT_DBG("sk %p", sk);
733 793
734 if (!addr || addr->sa_family != AF_BLUETOOTH) 794 if (!addr || addr->sa_family != AF_BLUETOOTH)
735 return -EINVAL; 795 return -EINVAL;
736 796
797 memset(&la, 0, sizeof(la));
798 len = min_t(unsigned int, sizeof(la), alen);
799 memcpy(&la, addr, len);
800
801 if (la.l2_cid)
802 return -EINVAL;
803
737 lock_sock(sk); 804 lock_sock(sk);
738 805
739 if (sk->sk_state != BT_OPEN) { 806 if (sk->sk_state != BT_OPEN) {
@@ -741,7 +808,7 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_
741 goto done; 808 goto done;
742 } 809 }
743 810
744 if (la->l2_psm && btohs(la->l2_psm) < 0x1001 && 811 if (la.l2_psm && btohs(la.l2_psm) < 0x1001 &&
745 !capable(CAP_NET_BIND_SERVICE)) { 812 !capable(CAP_NET_BIND_SERVICE)) {
746 err = -EACCES; 813 err = -EACCES;
747 goto done; 814 goto done;
@@ -749,14 +816,17 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_
749 816
750 write_lock_bh(&l2cap_sk_list.lock); 817 write_lock_bh(&l2cap_sk_list.lock);
751 818
752 if (la->l2_psm && __l2cap_get_sock_by_addr(la->l2_psm, &la->l2_bdaddr)) { 819 if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) {
753 err = -EADDRINUSE; 820 err = -EADDRINUSE;
754 } else { 821 } else {
755 /* Save source address */ 822 /* Save source address */
756 bacpy(&bt_sk(sk)->src, &la->l2_bdaddr); 823 bacpy(&bt_sk(sk)->src, &la.l2_bdaddr);
757 l2cap_pi(sk)->psm = la->l2_psm; 824 l2cap_pi(sk)->psm = la.l2_psm;
758 l2cap_pi(sk)->sport = la->l2_psm; 825 l2cap_pi(sk)->sport = la.l2_psm;
759 sk->sk_state = BT_BOUND; 826 sk->sk_state = BT_BOUND;
827
828 if (btohs(la.l2_psm) == 0x0001 || btohs(la.l2_psm) == 0x0003)
829 l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
760 } 830 }
761 831
762 write_unlock_bh(&l2cap_sk_list.lock); 832 write_unlock_bh(&l2cap_sk_list.lock);
@@ -776,7 +846,8 @@ static int l2cap_do_connect(struct sock *sk)
776 __u8 auth_type; 846 __u8 auth_type;
777 int err = 0; 847 int err = 0;
778 848
779 BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst), l2cap_pi(sk)->psm); 849 BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst),
850 l2cap_pi(sk)->psm);
780 851
781 if (!(hdev = hci_get_route(dst, src))) 852 if (!(hdev = hci_get_route(dst, src)))
782 return -EHOSTUNREACH; 853 return -EHOSTUNREACH;
@@ -785,21 +856,42 @@ static int l2cap_do_connect(struct sock *sk)
785 856
786 err = -ENOMEM; 857 err = -ENOMEM;
787 858
788 if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH || 859 if (sk->sk_type == SOCK_RAW) {
789 l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT || 860 switch (l2cap_pi(sk)->sec_level) {
790 l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) { 861 case BT_SECURITY_HIGH:
791 if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) 862 auth_type = HCI_AT_DEDICATED_BONDING_MITM;
863 break;
864 case BT_SECURITY_MEDIUM:
865 auth_type = HCI_AT_DEDICATED_BONDING;
866 break;
867 default:
868 auth_type = HCI_AT_NO_BONDING;
869 break;
870 }
871 } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) {
872 if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH)
792 auth_type = HCI_AT_NO_BONDING_MITM; 873 auth_type = HCI_AT_NO_BONDING_MITM;
793 else 874 else
794 auth_type = HCI_AT_GENERAL_BONDING_MITM;
795 } else {
796 if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001))
797 auth_type = HCI_AT_NO_BONDING; 875 auth_type = HCI_AT_NO_BONDING;
798 else 876
877 if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW)
878 l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
879 } else {
880 switch (l2cap_pi(sk)->sec_level) {
881 case BT_SECURITY_HIGH:
882 auth_type = HCI_AT_GENERAL_BONDING_MITM;
883 break;
884 case BT_SECURITY_MEDIUM:
799 auth_type = HCI_AT_GENERAL_BONDING; 885 auth_type = HCI_AT_GENERAL_BONDING;
886 break;
887 default:
888 auth_type = HCI_AT_NO_BONDING;
889 break;
890 }
800 } 891 }
801 892
802 hcon = hci_connect(hdev, ACL_LINK, dst, auth_type); 893 hcon = hci_connect(hdev, ACL_LINK, dst,
894 l2cap_pi(sk)->sec_level, auth_type);
803 if (!hcon) 895 if (!hcon)
804 goto done; 896 goto done;
805 897
@@ -835,20 +927,25 @@ done:
835 927
836static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) 928static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)
837{ 929{
838 struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
839 struct sock *sk = sock->sk; 930 struct sock *sk = sock->sk;
840 int err = 0; 931 struct sockaddr_l2 la;
841 932 int len, err = 0;
842 lock_sock(sk);
843 933
844 BT_DBG("sk %p", sk); 934 BT_DBG("sk %p", sk);
845 935
846 if (addr->sa_family != AF_BLUETOOTH || alen < sizeof(struct sockaddr_l2)) { 936 if (!addr || addr->sa_family != AF_BLUETOOTH)
847 err = -EINVAL; 937 return -EINVAL;
848 goto done; 938
849 } 939 memset(&la, 0, sizeof(la));
940 len = min_t(unsigned int, sizeof(la), alen);
941 memcpy(&la, addr, len);
942
943 if (la.l2_cid)
944 return -EINVAL;
945
946 lock_sock(sk);
850 947
851 if (sk->sk_type == SOCK_SEQPACKET && !la->l2_psm) { 948 if (sk->sk_type == SOCK_SEQPACKET && !la.l2_psm) {
852 err = -EINVAL; 949 err = -EINVAL;
853 goto done; 950 goto done;
854 } 951 }
@@ -875,8 +972,8 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
875 } 972 }
876 973
877 /* Set destination address and psm */ 974 /* Set destination address and psm */
878 bacpy(&bt_sk(sk)->dst, &la->l2_bdaddr); 975 bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr);
879 l2cap_pi(sk)->psm = la->l2_psm; 976 l2cap_pi(sk)->psm = la.l2_psm;
880 977
881 if ((err = l2cap_do_connect(sk))) 978 if ((err = l2cap_do_connect(sk)))
882 goto done; 979 goto done;
@@ -1000,12 +1097,16 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
1000 addr->sa_family = AF_BLUETOOTH; 1097 addr->sa_family = AF_BLUETOOTH;
1001 *len = sizeof(struct sockaddr_l2); 1098 *len = sizeof(struct sockaddr_l2);
1002 1099
1003 if (peer) 1100 if (peer) {
1101 la->l2_psm = l2cap_pi(sk)->psm;
1004 bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst); 1102 bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst);
1005 else 1103 la->l2_cid = htobs(l2cap_pi(sk)->dcid);
1104 } else {
1105 la->l2_psm = l2cap_pi(sk)->sport;
1006 bacpy(&la->l2_bdaddr, &bt_sk(sk)->src); 1106 bacpy(&la->l2_bdaddr, &bt_sk(sk)->src);
1107 la->l2_cid = htobs(l2cap_pi(sk)->scid);
1108 }
1007 1109
1008 la->l2_psm = l2cap_pi(sk)->psm;
1009 return 0; 1110 return 0;
1010} 1111}
1011 1112
@@ -1106,11 +1207,38 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
1106 return err; 1207 return err;
1107} 1208}
1108 1209
1109static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) 1210static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags)
1211{
1212 struct sock *sk = sock->sk;
1213
1214 lock_sock(sk);
1215
1216 if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
1217 struct l2cap_conn_rsp rsp;
1218
1219 sk->sk_state = BT_CONFIG;
1220
1221 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
1222 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
1223 rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
1224 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
1225 l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident,
1226 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
1227
1228 release_sock(sk);
1229 return 0;
1230 }
1231
1232 release_sock(sk);
1233
1234 return bt_sock_recvmsg(iocb, sock, msg, len, flags);
1235}
1236
1237static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, int optlen)
1110{ 1238{
1111 struct sock *sk = sock->sk; 1239 struct sock *sk = sock->sk;
1112 struct l2cap_options opts; 1240 struct l2cap_options opts;
1113 int err = 0, len; 1241 int len, err = 0;
1114 u32 opt; 1242 u32 opt;
1115 1243
1116 BT_DBG("sk %p", sk); 1244 BT_DBG("sk %p", sk);
@@ -1140,7 +1268,15 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch
1140 break; 1268 break;
1141 } 1269 }
1142 1270
1143 l2cap_pi(sk)->link_mode = opt; 1271 if (opt & L2CAP_LM_AUTH)
1272 l2cap_pi(sk)->sec_level = BT_SECURITY_LOW;
1273 if (opt & L2CAP_LM_ENCRYPT)
1274 l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM;
1275 if (opt & L2CAP_LM_SECURE)
1276 l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH;
1277
1278 l2cap_pi(sk)->role_switch = (opt & L2CAP_LM_MASTER);
1279 l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE);
1144 break; 1280 break;
1145 1281
1146 default: 1282 default:
@@ -1152,12 +1288,77 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch
1152 return err; 1288 return err;
1153} 1289}
1154 1290
1155static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) 1291static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1292{
1293 struct sock *sk = sock->sk;
1294 struct bt_security sec;
1295 int len, err = 0;
1296 u32 opt;
1297
1298 BT_DBG("sk %p", sk);
1299
1300 if (level == SOL_L2CAP)
1301 return l2cap_sock_setsockopt_old(sock, optname, optval, optlen);
1302
1303 if (level != SOL_BLUETOOTH)
1304 return -ENOPROTOOPT;
1305
1306 lock_sock(sk);
1307
1308 switch (optname) {
1309 case BT_SECURITY:
1310 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) {
1311 err = -EINVAL;
1312 break;
1313 }
1314
1315 sec.level = BT_SECURITY_LOW;
1316
1317 len = min_t(unsigned int, sizeof(sec), optlen);
1318 if (copy_from_user((char *) &sec, optval, len)) {
1319 err = -EFAULT;
1320 break;
1321 }
1322
1323 if (sec.level < BT_SECURITY_LOW ||
1324 sec.level > BT_SECURITY_HIGH) {
1325 err = -EINVAL;
1326 break;
1327 }
1328
1329 l2cap_pi(sk)->sec_level = sec.level;
1330 break;
1331
1332 case BT_DEFER_SETUP:
1333 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
1334 err = -EINVAL;
1335 break;
1336 }
1337
1338 if (get_user(opt, (u32 __user *) optval)) {
1339 err = -EFAULT;
1340 break;
1341 }
1342
1343 bt_sk(sk)->defer_setup = opt;
1344 break;
1345
1346 default:
1347 err = -ENOPROTOOPT;
1348 break;
1349 }
1350
1351 release_sock(sk);
1352 return err;
1353}
1354
1355static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
1156{ 1356{
1157 struct sock *sk = sock->sk; 1357 struct sock *sk = sock->sk;
1158 struct l2cap_options opts; 1358 struct l2cap_options opts;
1159 struct l2cap_conninfo cinfo; 1359 struct l2cap_conninfo cinfo;
1160 int len, err = 0; 1360 int len, err = 0;
1361 u32 opt;
1161 1362
1162 BT_DBG("sk %p", sk); 1363 BT_DBG("sk %p", sk);
1163 1364
@@ -1180,12 +1381,36 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch
1180 break; 1381 break;
1181 1382
1182 case L2CAP_LM: 1383 case L2CAP_LM:
1183 if (put_user(l2cap_pi(sk)->link_mode, (u32 __user *) optval)) 1384 switch (l2cap_pi(sk)->sec_level) {
1385 case BT_SECURITY_LOW:
1386 opt = L2CAP_LM_AUTH;
1387 break;
1388 case BT_SECURITY_MEDIUM:
1389 opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT;
1390 break;
1391 case BT_SECURITY_HIGH:
1392 opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT |
1393 L2CAP_LM_SECURE;
1394 break;
1395 default:
1396 opt = 0;
1397 break;
1398 }
1399
1400 if (l2cap_pi(sk)->role_switch)
1401 opt |= L2CAP_LM_MASTER;
1402
1403 if (l2cap_pi(sk)->force_reliable)
1404 opt |= L2CAP_LM_RELIABLE;
1405
1406 if (put_user(opt, (u32 __user *) optval))
1184 err = -EFAULT; 1407 err = -EFAULT;
1185 break; 1408 break;
1186 1409
1187 case L2CAP_CONNINFO: 1410 case L2CAP_CONNINFO:
1188 if (sk->sk_state != BT_CONNECTED) { 1411 if (sk->sk_state != BT_CONNECTED &&
1412 !(sk->sk_state == BT_CONNECT2 &&
1413 bt_sk(sk)->defer_setup)) {
1189 err = -ENOTCONN; 1414 err = -ENOTCONN;
1190 break; 1415 break;
1191 } 1416 }
@@ -1208,6 +1433,60 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch
1208 return err; 1433 return err;
1209} 1434}
1210 1435
1436static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
1437{
1438 struct sock *sk = sock->sk;
1439 struct bt_security sec;
1440 int len, err = 0;
1441
1442 BT_DBG("sk %p", sk);
1443
1444 if (level == SOL_L2CAP)
1445 return l2cap_sock_getsockopt_old(sock, optname, optval, optlen);
1446
1447 if (level != SOL_BLUETOOTH)
1448 return -ENOPROTOOPT;
1449
1450 if (get_user(len, optlen))
1451 return -EFAULT;
1452
1453 lock_sock(sk);
1454
1455 switch (optname) {
1456 case BT_SECURITY:
1457 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) {
1458 err = -EINVAL;
1459 break;
1460 }
1461
1462 sec.level = l2cap_pi(sk)->sec_level;
1463
1464 len = min_t(unsigned int, len, sizeof(sec));
1465 if (copy_to_user(optval, (char *) &sec, len))
1466 err = -EFAULT;
1467
1468 break;
1469
1470 case BT_DEFER_SETUP:
1471 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
1472 err = -EINVAL;
1473 break;
1474 }
1475
1476 if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval))
1477 err = -EFAULT;
1478
1479 break;
1480
1481 default:
1482 err = -ENOPROTOOPT;
1483 break;
1484 }
1485
1486 release_sock(sk);
1487 return err;
1488}
1489
1211static int l2cap_sock_shutdown(struct socket *sock, int how) 1490static int l2cap_sock_shutdown(struct socket *sock, int how)
1212{ 1491{
1213 struct sock *sk = sock->sk; 1492 struct sock *sk = sock->sk;
@@ -1270,11 +1549,6 @@ static void l2cap_chan_ready(struct sock *sk)
1270 */ 1549 */
1271 parent->sk_data_ready(parent, 0); 1550 parent->sk_data_ready(parent, 0);
1272 } 1551 }
1273
1274 if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) {
1275 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
1276 hci_conn_change_link_key(conn->hcon);
1277 }
1278} 1552}
1279 1553
1280/* Copy frame to all raw sockets on that connection */ 1554/* Copy frame to all raw sockets on that connection */
@@ -1549,8 +1823,11 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hd
1549 1823
1550 if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && 1824 if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) &&
1551 cmd->ident == conn->info_ident) { 1825 cmd->ident == conn->info_ident) {
1552 conn->info_ident = 0;
1553 del_timer(&conn->info_timer); 1826 del_timer(&conn->info_timer);
1827
1828 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
1829 conn->info_ident = 0;
1830
1554 l2cap_conn_start(conn); 1831 l2cap_conn_start(conn);
1555 } 1832 }
1556 1833
@@ -1580,6 +1857,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
1580 /* Check if the ACL is secure enough (if not SDP) */ 1857 /* Check if the ACL is secure enough (if not SDP) */
1581 if (psm != cpu_to_le16(0x0001) && 1858 if (psm != cpu_to_le16(0x0001) &&
1582 !hci_conn_check_link_mode(conn->hcon)) { 1859 !hci_conn_check_link_mode(conn->hcon)) {
1860 conn->disc_reason = 0x05;
1583 result = L2CAP_CR_SEC_BLOCK; 1861 result = L2CAP_CR_SEC_BLOCK;
1584 goto response; 1862 goto response;
1585 } 1863 }
@@ -1621,11 +1899,18 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
1621 1899
1622 l2cap_pi(sk)->ident = cmd->ident; 1900 l2cap_pi(sk)->ident = cmd->ident;
1623 1901
1624 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { 1902 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) {
1625 if (l2cap_check_link_mode(sk)) { 1903 if (l2cap_check_security(sk)) {
1626 sk->sk_state = BT_CONFIG; 1904 if (bt_sk(sk)->defer_setup) {
1627 result = L2CAP_CR_SUCCESS; 1905 sk->sk_state = BT_CONNECT2;
1628 status = L2CAP_CS_NO_INFO; 1906 result = L2CAP_CR_PEND;
1907 status = L2CAP_CS_AUTHOR_PEND;
1908 parent->sk_data_ready(parent, 0);
1909 } else {
1910 sk->sk_state = BT_CONFIG;
1911 result = L2CAP_CR_SUCCESS;
1912 status = L2CAP_CS_NO_INFO;
1913 }
1629 } else { 1914 } else {
1630 sk->sk_state = BT_CONNECT2; 1915 sk->sk_state = BT_CONNECT2;
1631 result = L2CAP_CR_PEND; 1916 result = L2CAP_CR_PEND;
@@ -1695,11 +1980,14 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
1695 l2cap_pi(sk)->dcid = dcid; 1980 l2cap_pi(sk)->dcid = dcid;
1696 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; 1981 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
1697 1982
1983 l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND;
1984
1698 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, 1985 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
1699 l2cap_build_conf_req(sk, req), req); 1986 l2cap_build_conf_req(sk, req), req);
1700 break; 1987 break;
1701 1988
1702 case L2CAP_CR_PEND: 1989 case L2CAP_CR_PEND:
1990 l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
1703 break; 1991 break;
1704 1992
1705 default: 1993 default:
@@ -1908,6 +2196,14 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm
1908 put_unaligned(cpu_to_le32(l2cap_feat_mask), (__le32 *) rsp->data); 2196 put_unaligned(cpu_to_le32(l2cap_feat_mask), (__le32 *) rsp->data);
1909 l2cap_send_cmd(conn, cmd->ident, 2197 l2cap_send_cmd(conn, cmd->ident,
1910 L2CAP_INFO_RSP, sizeof(buf), buf); 2198 L2CAP_INFO_RSP, sizeof(buf), buf);
2199 } else if (type == L2CAP_IT_FIXED_CHAN) {
2200 u8 buf[12];
2201 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf;
2202 rsp->type = cpu_to_le16(L2CAP_IT_FIXED_CHAN);
2203 rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS);
2204 memcpy(buf + 4, l2cap_fixed_chan, 8);
2205 l2cap_send_cmd(conn, cmd->ident,
2206 L2CAP_INFO_RSP, sizeof(buf), buf);
1911 } else { 2207 } else {
1912 struct l2cap_info_rsp rsp; 2208 struct l2cap_info_rsp rsp;
1913 rsp.type = cpu_to_le16(type); 2209 rsp.type = cpu_to_le16(type);
@@ -1929,14 +2225,31 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm
1929 2225
1930 BT_DBG("type 0x%4.4x result 0x%2.2x", type, result); 2226 BT_DBG("type 0x%4.4x result 0x%2.2x", type, result);
1931 2227
1932 conn->info_ident = 0;
1933
1934 del_timer(&conn->info_timer); 2228 del_timer(&conn->info_timer);
1935 2229
1936 if (type == L2CAP_IT_FEAT_MASK) 2230 if (type == L2CAP_IT_FEAT_MASK) {
1937 conn->feat_mask = get_unaligned_le32(rsp->data); 2231 conn->feat_mask = get_unaligned_le32(rsp->data);
1938 2232
1939 l2cap_conn_start(conn); 2233 if (conn->feat_mask & 0x0080) {
2234 struct l2cap_info_req req;
2235 req.type = cpu_to_le16(L2CAP_IT_FIXED_CHAN);
2236
2237 conn->info_ident = l2cap_get_ident(conn);
2238
2239 l2cap_send_cmd(conn, conn->info_ident,
2240 L2CAP_INFO_REQ, sizeof(req), &req);
2241 } else {
2242 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
2243 conn->info_ident = 0;
2244
2245 l2cap_conn_start(conn);
2246 }
2247 } else if (type == L2CAP_IT_FIXED_CHAN) {
2248 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
2249 conn->info_ident = 0;
2250
2251 l2cap_conn_start(conn);
2252 }
1940 2253
1941 return 0; 2254 return 0;
1942} 2255}
@@ -2143,10 +2456,15 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
2143 continue; 2456 continue;
2144 2457
2145 if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr)) { 2458 if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr)) {
2146 lm1 |= (HCI_LM_ACCEPT | l2cap_pi(sk)->link_mode); 2459 lm1 |= HCI_LM_ACCEPT;
2460 if (l2cap_pi(sk)->role_switch)
2461 lm1 |= HCI_LM_MASTER;
2147 exact++; 2462 exact++;
2148 } else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) 2463 } else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) {
2149 lm2 |= (HCI_LM_ACCEPT | l2cap_pi(sk)->link_mode); 2464 lm2 |= HCI_LM_ACCEPT;
2465 if (l2cap_pi(sk)->role_switch)
2466 lm2 |= HCI_LM_MASTER;
2467 }
2150 } 2468 }
2151 read_unlock(&l2cap_sk_list.lock); 2469 read_unlock(&l2cap_sk_list.lock);
2152 2470
@@ -2172,89 +2490,48 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
2172 return 0; 2490 return 0;
2173} 2491}
2174 2492
2175static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason) 2493static int l2cap_disconn_ind(struct hci_conn *hcon)
2176{ 2494{
2177 BT_DBG("hcon %p reason %d", hcon, reason); 2495 struct l2cap_conn *conn = hcon->l2cap_data;
2178 2496
2179 if (hcon->type != ACL_LINK) 2497 BT_DBG("hcon %p", hcon);
2180 return 0;
2181 2498
2182 l2cap_conn_del(hcon, bt_err(reason)); 2499 if (hcon->type != ACL_LINK || !conn)
2500 return 0x13;
2183 2501
2184 return 0; 2502 return conn->disc_reason;
2185} 2503}
2186 2504
2187static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) 2505static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
2188{ 2506{
2189 struct l2cap_chan_list *l; 2507 BT_DBG("hcon %p reason %d", hcon, reason);
2190 struct l2cap_conn *conn = hcon->l2cap_data;
2191 struct sock *sk;
2192 2508
2193 if (!conn) 2509 if (hcon->type != ACL_LINK)
2194 return 0; 2510 return 0;
2195 2511
2196 l = &conn->chan_list; 2512 l2cap_conn_del(hcon, bt_err(reason));
2197
2198 BT_DBG("conn %p", conn);
2199
2200 read_lock(&l->lock);
2201
2202 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
2203 struct l2cap_pinfo *pi = l2cap_pi(sk);
2204
2205 bh_lock_sock(sk);
2206
2207 if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) &&
2208 !(hcon->link_mode & HCI_LM_ENCRYPT) &&
2209 !status) {
2210 bh_unlock_sock(sk);
2211 continue;
2212 }
2213
2214 if (sk->sk_state == BT_CONNECT) {
2215 if (!status) {
2216 struct l2cap_conn_req req;
2217 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
2218 req.psm = l2cap_pi(sk)->psm;
2219
2220 l2cap_pi(sk)->ident = l2cap_get_ident(conn);
2221
2222 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
2223 L2CAP_CONN_REQ, sizeof(req), &req);
2224 } else {
2225 l2cap_sock_clear_timer(sk);
2226 l2cap_sock_set_timer(sk, HZ / 10);
2227 }
2228 } else if (sk->sk_state == BT_CONNECT2) {
2229 struct l2cap_conn_rsp rsp;
2230 __u16 result;
2231 2513
2232 if (!status) { 2514 return 0;
2233 sk->sk_state = BT_CONFIG; 2515}
2234 result = L2CAP_CR_SUCCESS;
2235 } else {
2236 sk->sk_state = BT_DISCONN;
2237 l2cap_sock_set_timer(sk, HZ / 10);
2238 result = L2CAP_CR_SEC_BLOCK;
2239 }
2240 2516
2241 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); 2517static inline void l2cap_check_encryption(struct sock *sk, u8 encrypt)
2242 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); 2518{
2243 rsp.result = cpu_to_le16(result); 2519 if (sk->sk_type != SOCK_SEQPACKET)
2244 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); 2520 return;
2245 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
2246 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
2247 }
2248 2521
2249 bh_unlock_sock(sk); 2522 if (encrypt == 0x00) {
2523 if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM) {
2524 l2cap_sock_clear_timer(sk);
2525 l2cap_sock_set_timer(sk, HZ * 5);
2526 } else if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH)
2527 __l2cap_sock_close(sk, ECONNREFUSED);
2528 } else {
2529 if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM)
2530 l2cap_sock_clear_timer(sk);
2250 } 2531 }
2251
2252 read_unlock(&l->lock);
2253
2254 return 0;
2255} 2532}
2256 2533
2257static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) 2534static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
2258{ 2535{
2259 struct l2cap_chan_list *l; 2536 struct l2cap_chan_list *l;
2260 struct l2cap_conn *conn = hcon->l2cap_data; 2537 struct l2cap_conn *conn = hcon->l2cap_data;
@@ -2270,15 +2547,16 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
2270 read_lock(&l->lock); 2547 read_lock(&l->lock);
2271 2548
2272 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { 2549 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
2273 struct l2cap_pinfo *pi = l2cap_pi(sk);
2274
2275 bh_lock_sock(sk); 2550 bh_lock_sock(sk);
2276 2551
2277 if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) && 2552 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND) {
2278 (sk->sk_state == BT_CONNECTED || 2553 bh_unlock_sock(sk);
2279 sk->sk_state == BT_CONFIG) && 2554 continue;
2280 !status && encrypt == 0x00) { 2555 }
2281 __l2cap_sock_close(sk, ECONNREFUSED); 2556
2557 if (!status && (sk->sk_state == BT_CONNECTED ||
2558 sk->sk_state == BT_CONFIG)) {
2559 l2cap_check_encryption(sk, encrypt);
2282 bh_unlock_sock(sk); 2560 bh_unlock_sock(sk);
2283 continue; 2561 continue;
2284 } 2562 }
@@ -2376,7 +2654,7 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
2376 goto drop; 2654 goto drop;
2377 2655
2378 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), 2656 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
2379 skb->len); 2657 skb->len);
2380 conn->rx_len = len - skb->len; 2658 conn->rx_len = len - skb->len;
2381 } else { 2659 } else {
2382 BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len); 2660 BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len);
@@ -2398,7 +2676,7 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
2398 } 2676 }
2399 2677
2400 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), 2678 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
2401 skb->len); 2679 skb->len);
2402 conn->rx_len -= skb->len; 2680 conn->rx_len -= skb->len;
2403 2681
2404 if (!conn->rx_len) { 2682 if (!conn->rx_len) {
@@ -2424,10 +2702,10 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf)
2424 sk_for_each(sk, node, &l2cap_sk_list.head) { 2702 sk_for_each(sk, node, &l2cap_sk_list.head) {
2425 struct l2cap_pinfo *pi = l2cap_pi(sk); 2703 struct l2cap_pinfo *pi = l2cap_pi(sk);
2426 2704
2427 str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d 0x%x\n", 2705 str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n",
2428 batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), 2706 batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
2429 sk->sk_state, btohs(pi->psm), pi->scid, pi->dcid, 2707 sk->sk_state, btohs(pi->psm), pi->scid, pi->dcid,
2430 pi->imtu, pi->omtu, pi->link_mode); 2708 pi->imtu, pi->omtu, pi->sec_level);
2431 } 2709 }
2432 2710
2433 read_unlock_bh(&l2cap_sk_list.lock); 2711 read_unlock_bh(&l2cap_sk_list.lock);
@@ -2447,7 +2725,7 @@ static const struct proto_ops l2cap_sock_ops = {
2447 .accept = l2cap_sock_accept, 2725 .accept = l2cap_sock_accept,
2448 .getname = l2cap_sock_getname, 2726 .getname = l2cap_sock_getname,
2449 .sendmsg = l2cap_sock_sendmsg, 2727 .sendmsg = l2cap_sock_sendmsg,
2450 .recvmsg = bt_sock_recvmsg, 2728 .recvmsg = l2cap_sock_recvmsg,
2451 .poll = bt_sock_poll, 2729 .poll = bt_sock_poll,
2452 .ioctl = bt_sock_ioctl, 2730 .ioctl = bt_sock_ioctl,
2453 .mmap = sock_no_mmap, 2731 .mmap = sock_no_mmap,
@@ -2469,8 +2747,8 @@ static struct hci_proto l2cap_hci_proto = {
2469 .connect_ind = l2cap_connect_ind, 2747 .connect_ind = l2cap_connect_ind,
2470 .connect_cfm = l2cap_connect_cfm, 2748 .connect_cfm = l2cap_connect_cfm,
2471 .disconn_ind = l2cap_disconn_ind, 2749 .disconn_ind = l2cap_disconn_ind,
2472 .auth_cfm = l2cap_auth_cfm, 2750 .disconn_cfm = l2cap_disconn_cfm,
2473 .encrypt_cfm = l2cap_encrypt_cfm, 2751 .security_cfm = l2cap_security_cfm,
2474 .recv_acldata = l2cap_recv_acldata 2752 .recv_acldata = l2cap_recv_acldata
2475}; 2753};
2476 2754
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index acd84fd524b8..1d0fb0f23c63 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -46,7 +46,7 @@
46#include <net/bluetooth/l2cap.h> 46#include <net/bluetooth/l2cap.h>
47#include <net/bluetooth/rfcomm.h> 47#include <net/bluetooth/rfcomm.h>
48 48
49#define VERSION "1.10" 49#define VERSION "1.11"
50 50
51static int disable_cfc = 0; 51static int disable_cfc = 0;
52static int channel_mtu = -1; 52static int channel_mtu = -1;
@@ -223,19 +223,25 @@ static int rfcomm_l2sock_create(struct socket **sock)
223 return err; 223 return err;
224} 224}
225 225
226static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d) 226static inline int rfcomm_check_security(struct rfcomm_dlc *d)
227{ 227{
228 struct sock *sk = d->session->sock->sk; 228 struct sock *sk = d->session->sock->sk;
229 __u8 auth_type;
229 230
230 if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) { 231 switch (d->sec_level) {
231 if (!hci_conn_encrypt(l2cap_pi(sk)->conn->hcon)) 232 case BT_SECURITY_HIGH:
232 return 1; 233 auth_type = HCI_AT_GENERAL_BONDING_MITM;
233 } else if (d->link_mode & RFCOMM_LM_AUTH) { 234 break;
234 if (!hci_conn_auth(l2cap_pi(sk)->conn->hcon)) 235 case BT_SECURITY_MEDIUM:
235 return 1; 236 auth_type = HCI_AT_GENERAL_BONDING;
237 break;
238 default:
239 auth_type = HCI_AT_NO_BONDING;
240 break;
236 } 241 }
237 242
238 return 0; 243 return hci_conn_security(l2cap_pi(sk)->conn->hcon, d->sec_level,
244 auth_type);
239} 245}
240 246
241/* ---- RFCOMM DLCs ---- */ 247/* ---- RFCOMM DLCs ---- */
@@ -388,10 +394,10 @@ static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst,
388 d->cfc = (s->cfc == RFCOMM_CFC_UNKNOWN) ? 0 : s->cfc; 394 d->cfc = (s->cfc == RFCOMM_CFC_UNKNOWN) ? 0 : s->cfc;
389 395
390 if (s->state == BT_CONNECTED) { 396 if (s->state == BT_CONNECTED) {
391 if (rfcomm_check_link_mode(d)) 397 if (rfcomm_check_security(d))
392 set_bit(RFCOMM_AUTH_PENDING, &d->flags);
393 else
394 rfcomm_send_pn(s, 1, d); 398 rfcomm_send_pn(s, 1, d);
399 else
400 set_bit(RFCOMM_AUTH_PENDING, &d->flags);
395 } 401 }
396 402
397 rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); 403 rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT);
@@ -421,9 +427,16 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
421 d, d->state, d->dlci, err, s); 427 d, d->state, d->dlci, err, s);
422 428
423 switch (d->state) { 429 switch (d->state) {
424 case BT_CONNECTED:
425 case BT_CONFIG:
426 case BT_CONNECT: 430 case BT_CONNECT:
431 case BT_CONFIG:
432 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
433 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
434 rfcomm_schedule(RFCOMM_SCHED_AUTH);
435 break;
436 }
437 /* Fall through */
438
439 case BT_CONNECTED:
427 d->state = BT_DISCONN; 440 d->state = BT_DISCONN;
428 if (skb_queue_empty(&d->tx_queue)) { 441 if (skb_queue_empty(&d->tx_queue)) {
429 rfcomm_send_disc(s, d->dlci); 442 rfcomm_send_disc(s, d->dlci);
@@ -434,6 +447,15 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
434 } 447 }
435 break; 448 break;
436 449
450 case BT_OPEN:
451 case BT_CONNECT2:
452 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
453 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
454 rfcomm_schedule(RFCOMM_SCHED_AUTH);
455 break;
456 }
457 /* Fall through */
458
437 default: 459 default:
438 rfcomm_dlc_clear_timer(d); 460 rfcomm_dlc_clear_timer(d);
439 461
@@ -636,6 +658,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
636 bacpy(&addr.l2_bdaddr, src); 658 bacpy(&addr.l2_bdaddr, src);
637 addr.l2_family = AF_BLUETOOTH; 659 addr.l2_family = AF_BLUETOOTH;
638 addr.l2_psm = 0; 660 addr.l2_psm = 0;
661 addr.l2_cid = 0;
639 *err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); 662 *err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr));
640 if (*err < 0) 663 if (*err < 0)
641 goto failed; 664 goto failed;
@@ -657,6 +680,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
657 bacpy(&addr.l2_bdaddr, dst); 680 bacpy(&addr.l2_bdaddr, dst);
658 addr.l2_family = AF_BLUETOOTH; 681 addr.l2_family = AF_BLUETOOTH;
659 addr.l2_psm = htobs(RFCOMM_PSM); 682 addr.l2_psm = htobs(RFCOMM_PSM);
683 addr.l2_cid = 0;
660 *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); 684 *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK);
661 if (*err == 0 || *err == -EINPROGRESS) 685 if (*err == 0 || *err == -EINPROGRESS)
662 return s; 686 return s;
@@ -1162,7 +1186,7 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)
1162 return 0; 1186 return 0;
1163} 1187}
1164 1188
1165static void rfcomm_dlc_accept(struct rfcomm_dlc *d) 1189void rfcomm_dlc_accept(struct rfcomm_dlc *d)
1166{ 1190{
1167 struct sock *sk = d->session->sock->sk; 1191 struct sock *sk = d->session->sock->sk;
1168 1192
@@ -1175,12 +1199,31 @@ static void rfcomm_dlc_accept(struct rfcomm_dlc *d)
1175 d->state_change(d, 0); 1199 d->state_change(d, 0);
1176 rfcomm_dlc_unlock(d); 1200 rfcomm_dlc_unlock(d);
1177 1201
1178 if (d->link_mode & RFCOMM_LM_MASTER) 1202 if (d->role_switch)
1179 hci_conn_switch_role(l2cap_pi(sk)->conn->hcon, 0x00); 1203 hci_conn_switch_role(l2cap_pi(sk)->conn->hcon, 0x00);
1180 1204
1181 rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig); 1205 rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig);
1182} 1206}
1183 1207
1208static void rfcomm_check_accept(struct rfcomm_dlc *d)
1209{
1210 if (rfcomm_check_security(d)) {
1211 if (d->defer_setup) {
1212 set_bit(RFCOMM_DEFER_SETUP, &d->flags);
1213 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
1214
1215 rfcomm_dlc_lock(d);
1216 d->state = BT_CONNECT2;
1217 d->state_change(d, 0);
1218 rfcomm_dlc_unlock(d);
1219 } else
1220 rfcomm_dlc_accept(d);
1221 } else {
1222 set_bit(RFCOMM_AUTH_PENDING, &d->flags);
1223 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
1224 }
1225}
1226
1184static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci) 1227static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci)
1185{ 1228{
1186 struct rfcomm_dlc *d; 1229 struct rfcomm_dlc *d;
@@ -1203,11 +1246,7 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci)
1203 if (d) { 1246 if (d) {
1204 if (d->state == BT_OPEN) { 1247 if (d->state == BT_OPEN) {
1205 /* DLC was previously opened by PN request */ 1248 /* DLC was previously opened by PN request */
1206 if (rfcomm_check_link_mode(d)) { 1249 rfcomm_check_accept(d);
1207 set_bit(RFCOMM_AUTH_PENDING, &d->flags);
1208 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
1209 } else
1210 rfcomm_dlc_accept(d);
1211 } 1250 }
1212 return 0; 1251 return 0;
1213 } 1252 }
@@ -1219,11 +1258,7 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci)
1219 d->addr = __addr(s->initiator, dlci); 1258 d->addr = __addr(s->initiator, dlci);
1220 rfcomm_dlc_link(s, d); 1259 rfcomm_dlc_link(s, d);
1221 1260
1222 if (rfcomm_check_link_mode(d)) { 1261 rfcomm_check_accept(d);
1223 set_bit(RFCOMM_AUTH_PENDING, &d->flags);
1224 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
1225 } else
1226 rfcomm_dlc_accept(d);
1227 } else { 1262 } else {
1228 rfcomm_send_dm(s, dlci); 1263 rfcomm_send_dm(s, dlci);
1229 } 1264 }
@@ -1637,11 +1672,12 @@ static void rfcomm_process_connect(struct rfcomm_session *s)
1637 d = list_entry(p, struct rfcomm_dlc, list); 1672 d = list_entry(p, struct rfcomm_dlc, list);
1638 if (d->state == BT_CONFIG) { 1673 if (d->state == BT_CONFIG) {
1639 d->mtu = s->mtu; 1674 d->mtu = s->mtu;
1640 if (rfcomm_check_link_mode(d)) { 1675 if (rfcomm_check_security(d)) {
1676 rfcomm_send_pn(s, 1, d);
1677 } else {
1641 set_bit(RFCOMM_AUTH_PENDING, &d->flags); 1678 set_bit(RFCOMM_AUTH_PENDING, &d->flags);
1642 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); 1679 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
1643 } else 1680 }
1644 rfcomm_send_pn(s, 1, d);
1645 } 1681 }
1646 } 1682 }
1647} 1683}
@@ -1717,11 +1753,17 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s)
1717 if (d->out) { 1753 if (d->out) {
1718 rfcomm_send_pn(s, 1, d); 1754 rfcomm_send_pn(s, 1, d);
1719 rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); 1755 rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT);
1720 } else 1756 } else {
1721 rfcomm_dlc_accept(d); 1757 if (d->defer_setup) {
1722 if (d->link_mode & RFCOMM_LM_SECURE) { 1758 set_bit(RFCOMM_DEFER_SETUP, &d->flags);
1723 struct sock *sk = s->sock->sk; 1759 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
1724 hci_conn_change_link_key(l2cap_pi(sk)->conn->hcon); 1760
1761 rfcomm_dlc_lock(d);
1762 d->state = BT_CONNECT2;
1763 d->state_change(d, 0);
1764 rfcomm_dlc_unlock(d);
1765 } else
1766 rfcomm_dlc_accept(d);
1725 } 1767 }
1726 continue; 1768 continue;
1727 } else if (test_and_clear_bit(RFCOMM_AUTH_REJECT, &d->flags)) { 1769 } else if (test_and_clear_bit(RFCOMM_AUTH_REJECT, &d->flags)) {
@@ -1734,6 +1776,9 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s)
1734 continue; 1776 continue;
1735 } 1777 }
1736 1778
1779 if (test_bit(RFCOMM_SEC_PENDING, &d->flags))
1780 continue;
1781
1737 if (test_bit(RFCOMM_TX_THROTTLED, &s->flags)) 1782 if (test_bit(RFCOMM_TX_THROTTLED, &s->flags))
1738 continue; 1783 continue;
1739 1784
@@ -1876,6 +1921,7 @@ static int rfcomm_add_listener(bdaddr_t *ba)
1876 bacpy(&addr.l2_bdaddr, ba); 1921 bacpy(&addr.l2_bdaddr, ba);
1877 addr.l2_family = AF_BLUETOOTH; 1922 addr.l2_family = AF_BLUETOOTH;
1878 addr.l2_psm = htobs(RFCOMM_PSM); 1923 addr.l2_psm = htobs(RFCOMM_PSM);
1924 addr.l2_cid = 0;
1879 err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); 1925 err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr));
1880 if (err < 0) { 1926 if (err < 0) {
1881 BT_ERR("Bind failed %d", err); 1927 BT_ERR("Bind failed %d", err);
@@ -1947,42 +1993,7 @@ static int rfcomm_run(void *unused)
1947 return 0; 1993 return 0;
1948} 1994}
1949 1995
1950static void rfcomm_auth_cfm(struct hci_conn *conn, u8 status) 1996static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
1951{
1952 struct rfcomm_session *s;
1953 struct rfcomm_dlc *d;
1954 struct list_head *p, *n;
1955
1956 BT_DBG("conn %p status 0x%02x", conn, status);
1957
1958 s = rfcomm_session_get(&conn->hdev->bdaddr, &conn->dst);
1959 if (!s)
1960 return;
1961
1962 rfcomm_session_hold(s);
1963
1964 list_for_each_safe(p, n, &s->dlcs) {
1965 d = list_entry(p, struct rfcomm_dlc, list);
1966
1967 if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) &&
1968 !(conn->link_mode & HCI_LM_ENCRYPT) && !status)
1969 continue;
1970
1971 if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags))
1972 continue;
1973
1974 if (!status)
1975 set_bit(RFCOMM_AUTH_ACCEPT, &d->flags);
1976 else
1977 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
1978 }
1979
1980 rfcomm_session_put(s);
1981
1982 rfcomm_schedule(RFCOMM_SCHED_AUTH);
1983}
1984
1985static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
1986{ 1997{
1987 struct rfcomm_session *s; 1998 struct rfcomm_session *s;
1988 struct rfcomm_dlc *d; 1999 struct rfcomm_dlc *d;
@@ -1999,18 +2010,29 @@ static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
1999 list_for_each_safe(p, n, &s->dlcs) { 2010 list_for_each_safe(p, n, &s->dlcs) {
2000 d = list_entry(p, struct rfcomm_dlc, list); 2011 d = list_entry(p, struct rfcomm_dlc, list);
2001 2012
2002 if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) && 2013 if (test_and_clear_bit(RFCOMM_SEC_PENDING, &d->flags)) {
2003 (d->state == BT_CONNECTED || 2014 rfcomm_dlc_clear_timer(d);
2004 d->state == BT_CONFIG) && 2015 if (status || encrypt == 0x00) {
2005 !status && encrypt == 0x00) { 2016 __rfcomm_dlc_close(d, ECONNREFUSED);
2006 __rfcomm_dlc_close(d, ECONNREFUSED); 2017 continue;
2007 continue; 2018 }
2019 }
2020
2021 if (d->state == BT_CONNECTED && !status && encrypt == 0x00) {
2022 if (d->sec_level == BT_SECURITY_MEDIUM) {
2023 set_bit(RFCOMM_SEC_PENDING, &d->flags);
2024 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
2025 continue;
2026 } else if (d->sec_level == BT_SECURITY_HIGH) {
2027 __rfcomm_dlc_close(d, ECONNREFUSED);
2028 continue;
2029 }
2008 } 2030 }
2009 2031
2010 if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags)) 2032 if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags))
2011 continue; 2033 continue;
2012 2034
2013 if (!status && encrypt) 2035 if (!status)
2014 set_bit(RFCOMM_AUTH_ACCEPT, &d->flags); 2036 set_bit(RFCOMM_AUTH_ACCEPT, &d->flags);
2015 else 2037 else
2016 set_bit(RFCOMM_AUTH_REJECT, &d->flags); 2038 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
@@ -2023,8 +2045,7 @@ static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
2023 2045
2024static struct hci_cb rfcomm_cb = { 2046static struct hci_cb rfcomm_cb = {
2025 .name = "RFCOMM", 2047 .name = "RFCOMM",
2026 .auth_cfm = rfcomm_auth_cfm, 2048 .security_cfm = rfcomm_security_cfm
2027 .encrypt_cfm = rfcomm_encrypt_cfm
2028}; 2049};
2029 2050
2030static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf) 2051static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf)
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index d3fc6fca38d0..7f482784e9f7 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -261,12 +261,19 @@ static void rfcomm_sock_init(struct sock *sk, struct sock *parent)
261 261
262 if (parent) { 262 if (parent) {
263 sk->sk_type = parent->sk_type; 263 sk->sk_type = parent->sk_type;
264 pi->link_mode = rfcomm_pi(parent)->link_mode; 264 pi->dlc->defer_setup = bt_sk(parent)->defer_setup;
265
266 pi->sec_level = rfcomm_pi(parent)->sec_level;
267 pi->role_switch = rfcomm_pi(parent)->role_switch;
265 } else { 268 } else {
266 pi->link_mode = 0; 269 pi->dlc->defer_setup = 0;
270
271 pi->sec_level = BT_SECURITY_LOW;
272 pi->role_switch = 0;
267 } 273 }
268 274
269 pi->dlc->link_mode = pi->link_mode; 275 pi->dlc->sec_level = pi->sec_level;
276 pi->dlc->role_switch = pi->role_switch;
270} 277}
271 278
272static struct proto rfcomm_proto = { 279static struct proto rfcomm_proto = {
@@ -406,7 +413,8 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a
406 bacpy(&bt_sk(sk)->dst, &sa->rc_bdaddr); 413 bacpy(&bt_sk(sk)->dst, &sa->rc_bdaddr);
407 rfcomm_pi(sk)->channel = sa->rc_channel; 414 rfcomm_pi(sk)->channel = sa->rc_channel;
408 415
409 d->link_mode = rfcomm_pi(sk)->link_mode; 416 d->sec_level = rfcomm_pi(sk)->sec_level;
417 d->role_switch = rfcomm_pi(sk)->role_switch;
410 418
411 err = rfcomm_dlc_open(d, &bt_sk(sk)->src, &sa->rc_bdaddr, sa->rc_channel); 419 err = rfcomm_dlc_open(d, &bt_sk(sk)->src, &sa->rc_bdaddr, sa->rc_channel);
412 if (!err) 420 if (!err)
@@ -554,6 +562,9 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
554 struct sk_buff *skb; 562 struct sk_buff *skb;
555 int sent = 0; 563 int sent = 0;
556 564
565 if (test_bit(RFCOMM_DEFER_SETUP, &d->flags))
566 return -ENOTCONN;
567
557 if (msg->msg_flags & MSG_OOB) 568 if (msg->msg_flags & MSG_OOB)
558 return -EOPNOTSUPP; 569 return -EOPNOTSUPP;
559 570
@@ -570,8 +581,11 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
570 581
571 skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE, 582 skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE,
572 msg->msg_flags & MSG_DONTWAIT, &err); 583 msg->msg_flags & MSG_DONTWAIT, &err);
573 if (!skb) 584 if (!skb) {
585 if (sent == 0)
586 sent = err;
574 break; 587 break;
588 }
575 skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE); 589 skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE);
576 590
577 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 591 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
@@ -630,10 +644,16 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
630 struct msghdr *msg, size_t size, int flags) 644 struct msghdr *msg, size_t size, int flags)
631{ 645{
632 struct sock *sk = sock->sk; 646 struct sock *sk = sock->sk;
647 struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc;
633 int err = 0; 648 int err = 0;
634 size_t target, copied = 0; 649 size_t target, copied = 0;
635 long timeo; 650 long timeo;
636 651
652 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
653 rfcomm_dlc_accept(d);
654 return 0;
655 }
656
637 if (flags & MSG_OOB) 657 if (flags & MSG_OOB)
638 return -EOPNOTSUPP; 658 return -EOPNOTSUPP;
639 659
@@ -710,7 +730,7 @@ out:
710 return copied ? : err; 730 return copied ? : err;
711} 731}
712 732
713static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) 733static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, int optlen)
714{ 734{
715 struct sock *sk = sock->sk; 735 struct sock *sk = sock->sk;
716 int err = 0; 736 int err = 0;
@@ -727,7 +747,14 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c
727 break; 747 break;
728 } 748 }
729 749
730 rfcomm_pi(sk)->link_mode = opt; 750 if (opt & RFCOMM_LM_AUTH)
751 rfcomm_pi(sk)->sec_level = BT_SECURITY_LOW;
752 if (opt & RFCOMM_LM_ENCRYPT)
753 rfcomm_pi(sk)->sec_level = BT_SECURITY_MEDIUM;
754 if (opt & RFCOMM_LM_SECURE)
755 rfcomm_pi(sk)->sec_level = BT_SECURITY_HIGH;
756
757 rfcomm_pi(sk)->role_switch = (opt & RFCOMM_LM_MASTER);
731 break; 758 break;
732 759
733 default: 760 default:
@@ -739,12 +766,76 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c
739 return err; 766 return err;
740} 767}
741 768
742static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) 769static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
770{
771 struct sock *sk = sock->sk;
772 struct bt_security sec;
773 int len, err = 0;
774 u32 opt;
775
776 BT_DBG("sk %p", sk);
777
778 if (level == SOL_RFCOMM)
779 return rfcomm_sock_setsockopt_old(sock, optname, optval, optlen);
780
781 if (level != SOL_BLUETOOTH)
782 return -ENOPROTOOPT;
783
784 lock_sock(sk);
785
786 switch (optname) {
787 case BT_SECURITY:
788 if (sk->sk_type != SOCK_STREAM) {
789 err = -EINVAL;
790 break;
791 }
792
793 sec.level = BT_SECURITY_LOW;
794
795 len = min_t(unsigned int, sizeof(sec), optlen);
796 if (copy_from_user((char *) &sec, optval, len)) {
797 err = -EFAULT;
798 break;
799 }
800
801 if (sec.level > BT_SECURITY_HIGH) {
802 err = -EINVAL;
803 break;
804 }
805
806 rfcomm_pi(sk)->sec_level = sec.level;
807 break;
808
809 case BT_DEFER_SETUP:
810 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
811 err = -EINVAL;
812 break;
813 }
814
815 if (get_user(opt, (u32 __user *) optval)) {
816 err = -EFAULT;
817 break;
818 }
819
820 bt_sk(sk)->defer_setup = opt;
821 break;
822
823 default:
824 err = -ENOPROTOOPT;
825 break;
826 }
827
828 release_sock(sk);
829 return err;
830}
831
832static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
743{ 833{
744 struct sock *sk = sock->sk; 834 struct sock *sk = sock->sk;
745 struct sock *l2cap_sk; 835 struct sock *l2cap_sk;
746 struct rfcomm_conninfo cinfo; 836 struct rfcomm_conninfo cinfo;
747 int len, err = 0; 837 int len, err = 0;
838 u32 opt;
748 839
749 BT_DBG("sk %p", sk); 840 BT_DBG("sk %p", sk);
750 841
@@ -755,12 +846,32 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c
755 846
756 switch (optname) { 847 switch (optname) {
757 case RFCOMM_LM: 848 case RFCOMM_LM:
758 if (put_user(rfcomm_pi(sk)->link_mode, (u32 __user *) optval)) 849 switch (rfcomm_pi(sk)->sec_level) {
850 case BT_SECURITY_LOW:
851 opt = RFCOMM_LM_AUTH;
852 break;
853 case BT_SECURITY_MEDIUM:
854 opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT;
855 break;
856 case BT_SECURITY_HIGH:
857 opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT |
858 RFCOMM_LM_SECURE;
859 break;
860 default:
861 opt = 0;
862 break;
863 }
864
865 if (rfcomm_pi(sk)->role_switch)
866 opt |= RFCOMM_LM_MASTER;
867
868 if (put_user(opt, (u32 __user *) optval))
759 err = -EFAULT; 869 err = -EFAULT;
760 break; 870 break;
761 871
762 case RFCOMM_CONNINFO: 872 case RFCOMM_CONNINFO:
763 if (sk->sk_state != BT_CONNECTED) { 873 if (sk->sk_state != BT_CONNECTED &&
874 !rfcomm_pi(sk)->dlc->defer_setup) {
764 err = -ENOTCONN; 875 err = -ENOTCONN;
765 break; 876 break;
766 } 877 }
@@ -785,6 +896,60 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c
785 return err; 896 return err;
786} 897}
787 898
899static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
900{
901 struct sock *sk = sock->sk;
902 struct bt_security sec;
903 int len, err = 0;
904
905 BT_DBG("sk %p", sk);
906
907 if (level == SOL_RFCOMM)
908 return rfcomm_sock_getsockopt_old(sock, optname, optval, optlen);
909
910 if (level != SOL_BLUETOOTH)
911 return -ENOPROTOOPT;
912
913 if (get_user(len, optlen))
914 return -EFAULT;
915
916 lock_sock(sk);
917
918 switch (optname) {
919 case BT_SECURITY:
920 if (sk->sk_type != SOCK_STREAM) {
921 err = -EINVAL;
922 break;
923 }
924
925 sec.level = rfcomm_pi(sk)->sec_level;
926
927 len = min_t(unsigned int, len, sizeof(sec));
928 if (copy_to_user(optval, (char *) &sec, len))
929 err = -EFAULT;
930
931 break;
932
933 case BT_DEFER_SETUP:
934 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
935 err = -EINVAL;
936 break;
937 }
938
939 if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval))
940 err = -EFAULT;
941
942 break;
943
944 default:
945 err = -ENOPROTOOPT;
946 break;
947 }
948
949 release_sock(sk);
950 return err;
951}
952
788static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 953static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
789{ 954{
790 struct sock *sk __maybe_unused = sock->sk; 955 struct sock *sk __maybe_unused = sock->sk;
@@ -888,6 +1053,10 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
888 1053
889done: 1054done:
890 bh_unlock_sock(parent); 1055 bh_unlock_sock(parent);
1056
1057 if (bt_sk(parent)->defer_setup)
1058 parent->sk_state_change(parent);
1059
891 return result; 1060 return result;
892} 1061}
893 1062
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 46fd8bf9a690..51ae0c3e470a 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -195,7 +195,7 @@ static int sco_connect(struct sock *sk)
195 else 195 else
196 type = SCO_LINK; 196 type = SCO_LINK;
197 197
198 hcon = hci_connect(hdev, type, dst, HCI_AT_NO_BONDING); 198 hcon = hci_connect(hdev, type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING);
199 if (!hcon) 199 if (!hcon)
200 goto done; 200 goto done;
201 201
@@ -668,7 +668,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char
668 return err; 668 return err;
669} 669}
670 670
671static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) 671static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
672{ 672{
673 struct sock *sk = sock->sk; 673 struct sock *sk = sock->sk;
674 struct sco_options opts; 674 struct sco_options opts;
@@ -723,6 +723,31 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char
723 return err; 723 return err;
724} 724}
725 725
726static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
727{
728 struct sock *sk = sock->sk;
729 int len, err = 0;
730
731 BT_DBG("sk %p", sk);
732
733 if (level == SOL_SCO)
734 return sco_sock_getsockopt_old(sock, optname, optval, optlen);
735
736 if (get_user(len, optlen))
737 return -EFAULT;
738
739 lock_sock(sk);
740
741 switch (optname) {
742 default:
743 err = -ENOPROTOOPT;
744 break;
745 }
746
747 release_sock(sk);
748 return err;
749}
750
726static int sco_sock_release(struct socket *sock) 751static int sco_sock_release(struct socket *sock)
727{ 752{
728 struct sock *sk = sock->sk; 753 struct sock *sk = sock->sk;
@@ -832,10 +857,30 @@ done:
832/* ----- SCO interface with lower layer (HCI) ----- */ 857/* ----- SCO interface with lower layer (HCI) ----- */
833static int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type) 858static int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type)
834{ 859{
860 register struct sock *sk;
861 struct hlist_node *node;
862 int lm = 0;
863
864 if (type != SCO_LINK && type != ESCO_LINK)
865 return 0;
866
835 BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); 867 BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr));
836 868
837 /* Always accept connection */ 869 /* Find listening sockets */
838 return HCI_LM_ACCEPT; 870 read_lock(&sco_sk_list.lock);
871 sk_for_each(sk, node, &sco_sk_list.head) {
872 if (sk->sk_state != BT_LISTEN)
873 continue;
874
875 if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr) ||
876 !bacmp(&bt_sk(sk)->src, BDADDR_ANY)) {
877 lm |= HCI_LM_ACCEPT;
878 break;
879 }
880 }
881 read_unlock(&sco_sk_list.lock);
882
883 return lm;
839} 884}
840 885
841static int sco_connect_cfm(struct hci_conn *hcon, __u8 status) 886static int sco_connect_cfm(struct hci_conn *hcon, __u8 status)
@@ -857,7 +902,7 @@ static int sco_connect_cfm(struct hci_conn *hcon, __u8 status)
857 return 0; 902 return 0;
858} 903}
859 904
860static int sco_disconn_ind(struct hci_conn *hcon, __u8 reason) 905static int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
861{ 906{
862 BT_DBG("hcon %p reason %d", hcon, reason); 907 BT_DBG("hcon %p reason %d", hcon, reason);
863 908
@@ -940,7 +985,7 @@ static struct hci_proto sco_hci_proto = {
940 .id = HCI_PROTO_SCO, 985 .id = HCI_PROTO_SCO,
941 .connect_ind = sco_connect_ind, 986 .connect_ind = sco_connect_ind,
942 .connect_cfm = sco_connect_cfm, 987 .connect_cfm = sco_connect_cfm,
943 .disconn_ind = sco_disconn_ind, 988 .disconn_cfm = sco_disconn_cfm,
944 .recv_scodata = sco_recv_scodata 989 .recv_scodata = sco_recv_scodata
945}; 990};
946 991
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index cf754ace0b75..3953ac4214c8 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -107,7 +107,7 @@ static void fake_update_pmtu(struct dst_entry *dst, u32 mtu)
107 107
108static struct dst_ops fake_dst_ops = { 108static struct dst_ops fake_dst_ops = {
109 .family = AF_INET, 109 .family = AF_INET,
110 .protocol = __constant_htons(ETH_P_IP), 110 .protocol = cpu_to_be16(ETH_P_IP),
111 .update_pmtu = fake_update_pmtu, 111 .update_pmtu = fake_update_pmtu,
112 .entries = ATOMIC_INIT(0), 112 .entries = ATOMIC_INIT(0),
113}; 113};
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index ba7be195803c..fcffb3fb1177 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -98,7 +98,8 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
98 kfree_skb(skb); 98 kfree_skb(skb);
99 goto errout; 99 goto errout;
100 } 100 }
101 err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); 101 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
102 return;
102errout: 103errout:
103 if (err < 0) 104 if (err < 0)
104 rtnl_set_sk_err(net, RTNLGRP_LINK, err); 105 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
diff --git a/net/can/af_can.c b/net/can/af_can.c
index fa417ca6cbe6..547bafc79e28 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -273,8 +273,7 @@ int can_send(struct sk_buff *skb, int loop)
273 err = net_xmit_errno(err); 273 err = net_xmit_errno(err);
274 274
275 if (err) { 275 if (err) {
276 if (newskb) 276 kfree_skb(newskb);
277 kfree_skb(newskb);
278 return err; 277 return err;
279 } 278 }
280 279
@@ -828,7 +827,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
828 */ 827 */
829 828
830static struct packet_type can_packet __read_mostly = { 829static struct packet_type can_packet __read_mostly = {
831 .type = __constant_htons(ETH_P_CAN), 830 .type = cpu_to_be16(ETH_P_CAN),
832 .dev = NULL, 831 .dev = NULL,
833 .func = can_rcv, 832 .func = can_rcv,
834}; 833};
diff --git a/net/can/raw.c b/net/can/raw.c
index 0703cba4bf9f..6aa154e806ae 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -648,6 +648,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
648 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 648 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
649 if (err < 0) 649 if (err < 0)
650 goto free_skb; 650 goto free_skb;
651 err = sock_tx_timestamp(msg, sk, skb_tx(skb));
652 if (err < 0)
653 goto free_skb;
651 skb->dev = dev; 654 skb->dev = dev;
652 skb->sk = sk; 655 skb->sk = sk;
653 656
diff --git a/net/compat.c b/net/compat.c
index a3a2ba0fac08..8d739053afe4 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -216,7 +216,7 @@ Efault:
216int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data) 216int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data)
217{ 217{
218 struct compat_timeval ctv; 218 struct compat_timeval ctv;
219 struct compat_timespec cts; 219 struct compat_timespec cts[3];
220 struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control; 220 struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
221 struct compat_cmsghdr cmhdr; 221 struct compat_cmsghdr cmhdr;
222 int cmlen; 222 int cmlen;
@@ -233,12 +233,17 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
233 data = &ctv; 233 data = &ctv;
234 len = sizeof(ctv); 234 len = sizeof(ctv);
235 } 235 }
236 if (level == SOL_SOCKET && type == SCM_TIMESTAMPNS) { 236 if (level == SOL_SOCKET &&
237 (type == SCM_TIMESTAMPNS || type == SCM_TIMESTAMPING)) {
238 int count = type == SCM_TIMESTAMPNS ? 1 : 3;
239 int i;
237 struct timespec *ts = (struct timespec *)data; 240 struct timespec *ts = (struct timespec *)data;
238 cts.tv_sec = ts->tv_sec; 241 for (i = 0; i < count; i++) {
239 cts.tv_nsec = ts->tv_nsec; 242 cts[i].tv_sec = ts[i].tv_sec;
243 cts[i].tv_nsec = ts[i].tv_nsec;
244 }
240 data = &cts; 245 data = &cts;
241 len = sizeof(cts); 246 len = sizeof(cts[0]) * count;
242 } 247 }
243 248
244 cmlen = CMSG_COMPAT_LEN(len); 249 cmlen = CMSG_COMPAT_LEN(len);
@@ -455,7 +460,7 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
455 struct timeval tv; 460 struct timeval tv;
456 461
457 if (!sock_flag(sk, SOCK_TIMESTAMP)) 462 if (!sock_flag(sk, SOCK_TIMESTAMP))
458 sock_enable_timestamp(sk); 463 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
459 tv = ktime_to_timeval(sk->sk_stamp); 464 tv = ktime_to_timeval(sk->sk_stamp);
460 if (tv.tv_sec == -1) 465 if (tv.tv_sec == -1)
461 return err; 466 return err;
@@ -479,7 +484,7 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta
479 struct timespec ts; 484 struct timespec ts;
480 485
481 if (!sock_flag(sk, SOCK_TIMESTAMP)) 486 if (!sock_flag(sk, SOCK_TIMESTAMP))
482 sock_enable_timestamp(sk); 487 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
483 ts = ktime_to_timespec(sk->sk_stamp); 488 ts = ktime_to_timespec(sk->sk_stamp);
484 if (ts.tv_sec == -1) 489 if (ts.tv_sec == -1)
485 return err; 490 return err;
diff --git a/net/core/dev.c b/net/core/dev.c
index f1129706ce7b..033d7ca28e6e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,6 +135,14 @@
135/* This should be increased if a protocol with a bigger head is added. */ 135/* This should be increased if a protocol with a bigger head is added. */
136#define GRO_MAX_HEAD (MAX_HEADER + 128) 136#define GRO_MAX_HEAD (MAX_HEADER + 128)
137 137
138enum {
139 GRO_MERGED,
140 GRO_MERGED_FREE,
141 GRO_HELD,
142 GRO_NORMAL,
143 GRO_DROP,
144};
145
138/* 146/*
139 * The list of packet types we will receive (as opposed to discard) 147 * The list of packet types we will receive (as opposed to discard)
140 * and the routines to invoke. 148 * and the routines to invoke.
@@ -1668,6 +1676,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1668 struct netdev_queue *txq) 1676 struct netdev_queue *txq)
1669{ 1677{
1670 const struct net_device_ops *ops = dev->netdev_ops; 1678 const struct net_device_ops *ops = dev->netdev_ops;
1679 int rc;
1671 1680
1672 prefetch(&dev->netdev_ops->ndo_start_xmit); 1681 prefetch(&dev->netdev_ops->ndo_start_xmit);
1673 if (likely(!skb->next)) { 1682 if (likely(!skb->next)) {
@@ -1681,13 +1690,27 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1681 goto gso; 1690 goto gso;
1682 } 1691 }
1683 1692
1684 return ops->ndo_start_xmit(skb, dev); 1693 rc = ops->ndo_start_xmit(skb, dev);
1694 /*
1695 * TODO: if skb_orphan() was called by
1696 * dev->hard_start_xmit() (for example, the unmodified
1697 * igb driver does that; bnx2 doesn't), then
1698 * skb_tx_software_timestamp() will be unable to send
1699 * back the time stamp.
1700 *
1701 * How can this be prevented? Always create another
1702 * reference to the socket before calling
1703 * dev->hard_start_xmit()? Prevent that skb_orphan()
1704 * does anything in dev->hard_start_xmit() by clearing
1705 * the skb destructor before the call and restoring it
1706 * afterwards, then doing the skb_orphan() ourselves?
1707 */
1708 return rc;
1685 } 1709 }
1686 1710
1687gso: 1711gso:
1688 do { 1712 do {
1689 struct sk_buff *nskb = skb->next; 1713 struct sk_buff *nskb = skb->next;
1690 int rc;
1691 1714
1692 skb->next = nskb->next; 1715 skb->next = nskb->next;
1693 nskb->next = NULL; 1716 nskb->next = NULL;
@@ -1708,56 +1731,20 @@ out_kfree_skb:
1708 return 0; 1731 return 0;
1709} 1732}
1710 1733
1711static u32 simple_tx_hashrnd; 1734static u32 skb_tx_hashrnd;
1712static int simple_tx_hashrnd_initialized = 0;
1713 1735
1714static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) 1736static u16 skb_tx_hash(struct net_device *dev, struct sk_buff *skb)
1715{ 1737{
1716 u32 addr1, addr2, ports; 1738 u32 hash;
1717 u32 hash, ihl;
1718 u8 ip_proto = 0;
1719
1720 if (unlikely(!simple_tx_hashrnd_initialized)) {
1721 get_random_bytes(&simple_tx_hashrnd, 4);
1722 simple_tx_hashrnd_initialized = 1;
1723 }
1724
1725 switch (skb->protocol) {
1726 case htons(ETH_P_IP):
1727 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1728 ip_proto = ip_hdr(skb)->protocol;
1729 addr1 = ip_hdr(skb)->saddr;
1730 addr2 = ip_hdr(skb)->daddr;
1731 ihl = ip_hdr(skb)->ihl;
1732 break;
1733 case htons(ETH_P_IPV6):
1734 ip_proto = ipv6_hdr(skb)->nexthdr;
1735 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1736 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1737 ihl = (40 >> 2);
1738 break;
1739 default:
1740 return 0;
1741 }
1742 1739
1740 if (skb_rx_queue_recorded(skb)) {
1741 hash = skb_get_rx_queue(skb);
1742 } else if (skb->sk && skb->sk->sk_hash) {
1743 hash = skb->sk->sk_hash;
1744 } else
1745 hash = skb->protocol;
1743 1746
1744 switch (ip_proto) { 1747 hash = jhash_1word(hash, skb_tx_hashrnd);
1745 case IPPROTO_TCP:
1746 case IPPROTO_UDP:
1747 case IPPROTO_DCCP:
1748 case IPPROTO_ESP:
1749 case IPPROTO_AH:
1750 case IPPROTO_SCTP:
1751 case IPPROTO_UDPLITE:
1752 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1753 break;
1754
1755 default:
1756 ports = 0;
1757 break;
1758 }
1759
1760 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1761 1748
1762 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); 1749 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1763} 1750}
@@ -1771,7 +1758,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1771 if (ops->ndo_select_queue) 1758 if (ops->ndo_select_queue)
1772 queue_index = ops->ndo_select_queue(dev, skb); 1759 queue_index = ops->ndo_select_queue(dev, skb);
1773 else if (dev->real_num_tx_queues > 1) 1760 else if (dev->real_num_tx_queues > 1)
1774 queue_index = simple_tx_hash(dev, skb); 1761 queue_index = skb_tx_hash(dev, skb);
1775 1762
1776 skb_set_queue_mapping(skb, queue_index); 1763 skb_set_queue_mapping(skb, queue_index);
1777 return netdev_get_tx_queue(dev, queue_index); 1764 return netdev_get_tx_queue(dev, queue_index);
@@ -2297,6 +2284,8 @@ ncls:
2297 if (!skb) 2284 if (!skb)
2298 goto out; 2285 goto out;
2299 2286
2287 skb_orphan(skb);
2288
2300 type = skb->protocol; 2289 type = skb->protocol;
2301 list_for_each_entry_rcu(ptype, 2290 list_for_each_entry_rcu(ptype,
2302 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2291 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
@@ -2366,7 +2355,6 @@ static int napi_gro_complete(struct sk_buff *skb)
2366 2355
2367out: 2356out:
2368 skb_shinfo(skb)->gso_size = 0; 2357 skb_shinfo(skb)->gso_size = 0;
2369 __skb_push(skb, -skb_network_offset(skb));
2370 return netif_receive_skb(skb); 2358 return netif_receive_skb(skb);
2371} 2359}
2372 2360
@@ -2380,20 +2368,40 @@ void napi_gro_flush(struct napi_struct *napi)
2380 napi_gro_complete(skb); 2368 napi_gro_complete(skb);
2381 } 2369 }
2382 2370
2371 napi->gro_count = 0;
2383 napi->gro_list = NULL; 2372 napi->gro_list = NULL;
2384} 2373}
2385EXPORT_SYMBOL(napi_gro_flush); 2374EXPORT_SYMBOL(napi_gro_flush);
2386 2375
2376void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
2377{
2378 unsigned int offset = skb_gro_offset(skb);
2379
2380 hlen += offset;
2381 if (hlen <= skb_headlen(skb))
2382 return skb->data + offset;
2383
2384 if (unlikely(!skb_shinfo(skb)->nr_frags ||
2385 skb_shinfo(skb)->frags[0].size <=
2386 hlen - skb_headlen(skb) ||
2387 PageHighMem(skb_shinfo(skb)->frags[0].page)))
2388 return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
2389
2390 return page_address(skb_shinfo(skb)->frags[0].page) +
2391 skb_shinfo(skb)->frags[0].page_offset +
2392 offset - skb_headlen(skb);
2393}
2394EXPORT_SYMBOL(skb_gro_header);
2395
2387int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2396int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2388{ 2397{
2389 struct sk_buff **pp = NULL; 2398 struct sk_buff **pp = NULL;
2390 struct packet_type *ptype; 2399 struct packet_type *ptype;
2391 __be16 type = skb->protocol; 2400 __be16 type = skb->protocol;
2392 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 2401 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2393 int count = 0;
2394 int same_flow; 2402 int same_flow;
2395 int mac_len; 2403 int mac_len;
2396 int free; 2404 int ret;
2397 2405
2398 if (!(skb->dev->features & NETIF_F_GRO)) 2406 if (!(skb->dev->features & NETIF_F_GRO))
2399 goto normal; 2407 goto normal;
@@ -2403,30 +2411,16 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2403 2411
2404 rcu_read_lock(); 2412 rcu_read_lock();
2405 list_for_each_entry_rcu(ptype, head, list) { 2413 list_for_each_entry_rcu(ptype, head, list) {
2406 struct sk_buff *p;
2407
2408 if (ptype->type != type || ptype->dev || !ptype->gro_receive) 2414 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2409 continue; 2415 continue;
2410 2416
2411 skb_reset_network_header(skb); 2417 skb_set_network_header(skb, skb_gro_offset(skb));
2412 mac_len = skb->network_header - skb->mac_header; 2418 mac_len = skb->network_header - skb->mac_header;
2413 skb->mac_len = mac_len; 2419 skb->mac_len = mac_len;
2414 NAPI_GRO_CB(skb)->same_flow = 0; 2420 NAPI_GRO_CB(skb)->same_flow = 0;
2415 NAPI_GRO_CB(skb)->flush = 0; 2421 NAPI_GRO_CB(skb)->flush = 0;
2416 NAPI_GRO_CB(skb)->free = 0; 2422 NAPI_GRO_CB(skb)->free = 0;
2417 2423
2418 for (p = napi->gro_list; p; p = p->next) {
2419 count++;
2420
2421 if (!NAPI_GRO_CB(p)->same_flow)
2422 continue;
2423
2424 if (p->mac_len != mac_len ||
2425 memcmp(skb_mac_header(p), skb_mac_header(skb),
2426 mac_len))
2427 NAPI_GRO_CB(p)->same_flow = 0;
2428 }
2429
2430 pp = ptype->gro_receive(&napi->gro_list, skb); 2424 pp = ptype->gro_receive(&napi->gro_list, skb);
2431 break; 2425 break;
2432 } 2426 }
@@ -2436,7 +2430,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2436 goto normal; 2430 goto normal;
2437 2431
2438 same_flow = NAPI_GRO_CB(skb)->same_flow; 2432 same_flow = NAPI_GRO_CB(skb)->same_flow;
2439 free = NAPI_GRO_CB(skb)->free; 2433 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
2440 2434
2441 if (pp) { 2435 if (pp) {
2442 struct sk_buff *nskb = *pp; 2436 struct sk_buff *nskb = *pp;
@@ -2444,27 +2438,35 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2444 *pp = nskb->next; 2438 *pp = nskb->next;
2445 nskb->next = NULL; 2439 nskb->next = NULL;
2446 napi_gro_complete(nskb); 2440 napi_gro_complete(nskb);
2447 count--; 2441 napi->gro_count--;
2448 } 2442 }
2449 2443
2450 if (same_flow) 2444 if (same_flow)
2451 goto ok; 2445 goto ok;
2452 2446
2453 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { 2447 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
2454 __skb_push(skb, -skb_network_offset(skb));
2455 goto normal; 2448 goto normal;
2456 }
2457 2449
2450 napi->gro_count++;
2458 NAPI_GRO_CB(skb)->count = 1; 2451 NAPI_GRO_CB(skb)->count = 1;
2459 skb_shinfo(skb)->gso_size = skb->len; 2452 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
2460 skb->next = napi->gro_list; 2453 skb->next = napi->gro_list;
2461 napi->gro_list = skb; 2454 napi->gro_list = skb;
2455 ret = GRO_HELD;
2456
2457pull:
2458 if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
2459 if (napi->gro_list == skb)
2460 napi->gro_list = skb->next;
2461 ret = GRO_DROP;
2462 }
2462 2463
2463ok: 2464ok:
2464 return free; 2465 return ret;
2465 2466
2466normal: 2467normal:
2467 return -1; 2468 ret = GRO_NORMAL;
2469 goto pull;
2468} 2470}
2469EXPORT_SYMBOL(dev_gro_receive); 2471EXPORT_SYMBOL(dev_gro_receive);
2470 2472
@@ -2473,28 +2475,43 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2473 struct sk_buff *p; 2475 struct sk_buff *p;
2474 2476
2475 for (p = napi->gro_list; p; p = p->next) { 2477 for (p = napi->gro_list; p; p = p->next) {
2476 NAPI_GRO_CB(p)->same_flow = 1; 2478 NAPI_GRO_CB(p)->same_flow = !compare_ether_header(
2479 skb_mac_header(p), skb_gro_mac_header(skb));
2477 NAPI_GRO_CB(p)->flush = 0; 2480 NAPI_GRO_CB(p)->flush = 0;
2478 } 2481 }
2479 2482
2480 return dev_gro_receive(napi, skb); 2483 return dev_gro_receive(napi, skb);
2481} 2484}
2482 2485
2483int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2486int napi_skb_finish(int ret, struct sk_buff *skb)
2484{ 2487{
2488 int err = NET_RX_SUCCESS;
2489
2485 if (netpoll_receive_skb(skb)) 2490 if (netpoll_receive_skb(skb))
2486 return NET_RX_DROP; 2491 return NET_RX_DROP;
2487 2492
2488 switch (__napi_gro_receive(napi, skb)) { 2493 switch (ret) {
2489 case -1: 2494 case GRO_NORMAL:
2490 return netif_receive_skb(skb); 2495 return netif_receive_skb(skb);
2491 2496
2492 case 1: 2497 case GRO_DROP:
2498 err = NET_RX_DROP;
2499 /* fall through */
2500
2501 case GRO_MERGED_FREE:
2493 kfree_skb(skb); 2502 kfree_skb(skb);
2494 break; 2503 break;
2495 } 2504 }
2496 2505
2497 return NET_RX_SUCCESS; 2506 return err;
2507}
2508EXPORT_SYMBOL(napi_skb_finish);
2509
2510int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2511{
2512 skb_gro_reset_offset(skb);
2513
2514 return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
2498} 2515}
2499EXPORT_SYMBOL(napi_gro_receive); 2516EXPORT_SYMBOL(napi_gro_receive);
2500 2517
@@ -2512,6 +2529,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
2512{ 2529{
2513 struct net_device *dev = napi->dev; 2530 struct net_device *dev = napi->dev;
2514 struct sk_buff *skb = napi->skb; 2531 struct sk_buff *skb = napi->skb;
2532 struct ethhdr *eth;
2533 skb_frag_t *frag;
2534 int i;
2515 2535
2516 napi->skb = NULL; 2536 napi->skb = NULL;
2517 2537
@@ -2524,20 +2544,36 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
2524 } 2544 }
2525 2545
2526 BUG_ON(info->nr_frags > MAX_SKB_FRAGS); 2546 BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
2547 frag = &info->frags[info->nr_frags - 1];
2548
2549 for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) {
2550 skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
2551 frag->size);
2552 frag++;
2553 }
2527 skb_shinfo(skb)->nr_frags = info->nr_frags; 2554 skb_shinfo(skb)->nr_frags = info->nr_frags;
2528 memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
2529 2555
2530 skb->data_len = info->len; 2556 skb->data_len = info->len;
2531 skb->len += info->len; 2557 skb->len += info->len;
2532 skb->truesize += info->len; 2558 skb->truesize += info->len;
2533 2559
2534 if (!pskb_may_pull(skb, ETH_HLEN)) { 2560 skb_reset_mac_header(skb);
2561 skb_gro_reset_offset(skb);
2562
2563 eth = skb_gro_header(skb, sizeof(*eth));
2564 if (!eth) {
2535 napi_reuse_skb(napi, skb); 2565 napi_reuse_skb(napi, skb);
2536 skb = NULL; 2566 skb = NULL;
2537 goto out; 2567 goto out;
2538 } 2568 }
2539 2569
2540 skb->protocol = eth_type_trans(skb, dev); 2570 skb_gro_pull(skb, sizeof(*eth));
2571
2572 /*
2573 * This works because the only protocols we care about don't require
2574 * special handling. We'll fix it up properly at the end.
2575 */
2576 skb->protocol = eth->h_proto;
2541 2577
2542 skb->ip_summed = info->ip_summed; 2578 skb->ip_summed = info->ip_summed;
2543 skb->csum = info->csum; 2579 skb->csum = info->csum;
@@ -2547,32 +2583,46 @@ out:
2547} 2583}
2548EXPORT_SYMBOL(napi_fraginfo_skb); 2584EXPORT_SYMBOL(napi_fraginfo_skb);
2549 2585
2550int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) 2586int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2551{ 2587{
2552 struct sk_buff *skb = napi_fraginfo_skb(napi, info); 2588 int err = NET_RX_SUCCESS;
2553 int err = NET_RX_DROP;
2554
2555 if (!skb)
2556 goto out;
2557 2589
2558 if (netpoll_receive_skb(skb)) 2590 if (netpoll_receive_skb(skb))
2559 goto out; 2591 return NET_RX_DROP;
2560 2592
2561 err = NET_RX_SUCCESS; 2593 switch (ret) {
2594 case GRO_NORMAL:
2595 case GRO_HELD:
2596 skb->protocol = eth_type_trans(skb, napi->dev);
2562 2597
2563 switch (__napi_gro_receive(napi, skb)) { 2598 if (ret == GRO_NORMAL)
2564 case -1: 2599 return netif_receive_skb(skb);
2565 return netif_receive_skb(skb);
2566 2600
2567 case 0: 2601 skb_gro_pull(skb, -ETH_HLEN);
2568 goto out; 2602 break;
2569 }
2570 2603
2571 napi_reuse_skb(napi, skb); 2604 case GRO_DROP:
2605 err = NET_RX_DROP;
2606 /* fall through */
2607
2608 case GRO_MERGED_FREE:
2609 napi_reuse_skb(napi, skb);
2610 break;
2611 }
2572 2612
2573out:
2574 return err; 2613 return err;
2575} 2614}
2615EXPORT_SYMBOL(napi_frags_finish);
2616
2617int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
2618{
2619 struct sk_buff *skb = napi_fraginfo_skb(napi, info);
2620
2621 if (!skb)
2622 return NET_RX_DROP;
2623
2624 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2625}
2576EXPORT_SYMBOL(napi_gro_frags); 2626EXPORT_SYMBOL(napi_gro_frags);
2577 2627
2578static int process_backlog(struct napi_struct *napi, int quota) 2628static int process_backlog(struct napi_struct *napi, int quota)
@@ -2652,6 +2702,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2652 int (*poll)(struct napi_struct *, int), int weight) 2702 int (*poll)(struct napi_struct *, int), int weight)
2653{ 2703{
2654 INIT_LIST_HEAD(&napi->poll_list); 2704 INIT_LIST_HEAD(&napi->poll_list);
2705 napi->gro_count = 0;
2655 napi->gro_list = NULL; 2706 napi->gro_list = NULL;
2656 napi->skb = NULL; 2707 napi->skb = NULL;
2657 napi->poll = poll; 2708 napi->poll = poll;
@@ -2680,6 +2731,7 @@ void netif_napi_del(struct napi_struct *napi)
2680 } 2731 }
2681 2732
2682 napi->gro_list = NULL; 2733 napi->gro_list = NULL;
2734 napi->gro_count = 0;
2683} 2735}
2684EXPORT_SYMBOL(netif_napi_del); 2736EXPORT_SYMBOL(netif_napi_del);
2685 2737
@@ -3948,6 +4000,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3948 cmd == SIOCSMIIREG || 4000 cmd == SIOCSMIIREG ||
3949 cmd == SIOCBRADDIF || 4001 cmd == SIOCBRADDIF ||
3950 cmd == SIOCBRDELIF || 4002 cmd == SIOCBRDELIF ||
4003 cmd == SIOCSHWTSTAMP ||
3951 cmd == SIOCWANDEV) { 4004 cmd == SIOCWANDEV) {
3952 err = -EOPNOTSUPP; 4005 err = -EOPNOTSUPP;
3953 if (ops->ndo_do_ioctl) { 4006 if (ops->ndo_do_ioctl) {
@@ -4102,6 +4155,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4102 case SIOCBONDCHANGEACTIVE: 4155 case SIOCBONDCHANGEACTIVE:
4103 case SIOCBRADDIF: 4156 case SIOCBRADDIF:
4104 case SIOCBRDELIF: 4157 case SIOCBRDELIF:
4158 case SIOCSHWTSTAMP:
4105 if (!capable(CAP_NET_ADMIN)) 4159 if (!capable(CAP_NET_ADMIN))
4106 return -EPERM; 4160 return -EPERM;
4107 /* fall through */ 4161 /* fall through */
@@ -5198,6 +5252,7 @@ static int __init net_dev_init(void)
5198 queue->backlog.poll = process_backlog; 5252 queue->backlog.poll = process_backlog;
5199 queue->backlog.weight = weight_p; 5253 queue->backlog.weight = weight_p;
5200 queue->backlog.gro_list = NULL; 5254 queue->backlog.gro_list = NULL;
5255 queue->backlog.gro_count = 0;
5201 } 5256 }
5202 5257
5203 dev_boot_phase = 0; 5258 dev_boot_phase = 0;
@@ -5230,6 +5285,14 @@ out:
5230 5285
5231subsys_initcall(net_dev_init); 5286subsys_initcall(net_dev_init);
5232 5287
5288static int __init initialize_hashrnd(void)
5289{
5290 get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
5291 return 0;
5292}
5293
5294late_initcall_sync(initialize_hashrnd);
5295
5233EXPORT_SYMBOL(__dev_get_by_index); 5296EXPORT_SYMBOL(__dev_get_by_index);
5234EXPORT_SYMBOL(__dev_get_by_name); 5297EXPORT_SYMBOL(__dev_get_by_name);
5235EXPORT_SYMBOL(__dev_remove_pack); 5298EXPORT_SYMBOL(__dev_remove_pack);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 947710a36ced..244ca56dffac 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -209,34 +209,62 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
209 return 0; 209 return 0;
210} 210}
211 211
212static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr) 212static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
213{ 213{
214 struct ethtool_rxnfc cmd; 214 struct ethtool_rxnfc cmd;
215 215
216 if (!dev->ethtool_ops->set_rxhash) 216 if (!dev->ethtool_ops->set_rxnfc)
217 return -EOPNOTSUPP; 217 return -EOPNOTSUPP;
218 218
219 if (copy_from_user(&cmd, useraddr, sizeof(cmd))) 219 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
220 return -EFAULT; 220 return -EFAULT;
221 221
222 return dev->ethtool_ops->set_rxhash(dev, &cmd); 222 return dev->ethtool_ops->set_rxnfc(dev, &cmd);
223} 223}
224 224
225static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr) 225static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
226{ 226{
227 struct ethtool_rxnfc info; 227 struct ethtool_rxnfc info;
228 const struct ethtool_ops *ops = dev->ethtool_ops;
229 int ret;
230 void *rule_buf = NULL;
228 231
229 if (!dev->ethtool_ops->get_rxhash) 232 if (!ops->get_rxnfc)
230 return -EOPNOTSUPP; 233 return -EOPNOTSUPP;
231 234
232 if (copy_from_user(&info, useraddr, sizeof(info))) 235 if (copy_from_user(&info, useraddr, sizeof(info)))
233 return -EFAULT; 236 return -EFAULT;
234 237
235 dev->ethtool_ops->get_rxhash(dev, &info); 238 if (info.cmd == ETHTOOL_GRXCLSRLALL) {
239 if (info.rule_cnt > 0) {
240 rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
241 GFP_USER);
242 if (!rule_buf)
243 return -ENOMEM;
244 }
245 }
236 246
247 ret = ops->get_rxnfc(dev, &info, rule_buf);
248 if (ret < 0)
249 goto err_out;
250
251 ret = -EFAULT;
237 if (copy_to_user(useraddr, &info, sizeof(info))) 252 if (copy_to_user(useraddr, &info, sizeof(info)))
238 return -EFAULT; 253 goto err_out;
239 return 0; 254
255 if (rule_buf) {
256 useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
257 if (copy_to_user(useraddr, rule_buf,
258 info.rule_cnt * sizeof(u32)))
259 goto err_out;
260 }
261 ret = 0;
262
263err_out:
264 if (rule_buf)
265 kfree(rule_buf);
266
267 return ret;
240} 268}
241 269
242static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) 270static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
@@ -901,6 +929,10 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
901 case ETHTOOL_GFLAGS: 929 case ETHTOOL_GFLAGS:
902 case ETHTOOL_GPFLAGS: 930 case ETHTOOL_GPFLAGS:
903 case ETHTOOL_GRXFH: 931 case ETHTOOL_GRXFH:
932 case ETHTOOL_GRXRINGS:
933 case ETHTOOL_GRXCLSRLCNT:
934 case ETHTOOL_GRXCLSRULE:
935 case ETHTOOL_GRXCLSRLALL:
904 break; 936 break;
905 default: 937 default:
906 if (!capable(CAP_NET_ADMIN)) 938 if (!capable(CAP_NET_ADMIN))
@@ -1052,10 +1084,16 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1052 dev->ethtool_ops->set_priv_flags); 1084 dev->ethtool_ops->set_priv_flags);
1053 break; 1085 break;
1054 case ETHTOOL_GRXFH: 1086 case ETHTOOL_GRXFH:
1055 rc = ethtool_get_rxhash(dev, useraddr); 1087 case ETHTOOL_GRXRINGS:
1088 case ETHTOOL_GRXCLSRLCNT:
1089 case ETHTOOL_GRXCLSRULE:
1090 case ETHTOOL_GRXCLSRLALL:
1091 rc = ethtool_get_rxnfc(dev, useraddr);
1056 break; 1092 break;
1057 case ETHTOOL_SRXFH: 1093 case ETHTOOL_SRXFH:
1058 rc = ethtool_set_rxhash(dev, useraddr); 1094 case ETHTOOL_SRXCLSRLDEL:
1095 case ETHTOOL_SRXCLSRLINS:
1096 rc = ethtool_set_rxnfc(dev, useraddr);
1059 break; 1097 break;
1060 case ETHTOOL_GGRO: 1098 case ETHTOOL_GGRO:
1061 rc = ethtool_get_gro(dev, useraddr); 1099 rc = ethtool_get_gro(dev, useraddr);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 32b3a0152d7a..98691e1466b8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -588,7 +588,8 @@ static void notify_rule_change(int event, struct fib_rule *rule,
588 goto errout; 588 goto errout;
589 } 589 }
590 590
591 err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); 591 rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
592 return;
592errout: 593errout:
593 if (err < 0) 594 if (err < 0)
594 rtnl_set_sk_err(net, ops->nlgroup, err); 595 rtnl_set_sk_err(net, ops->nlgroup, err);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 278a142d1047..a1cbce7fdae5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -871,8 +871,7 @@ static void neigh_timer_handler(unsigned long arg)
871 write_unlock(&neigh->lock); 871 write_unlock(&neigh->lock);
872 neigh->ops->solicit(neigh, skb); 872 neigh->ops->solicit(neigh, skb);
873 atomic_inc(&neigh->probes); 873 atomic_inc(&neigh->probes);
874 if (skb) 874 kfree_skb(skb);
875 kfree_skb(skb);
876 } else { 875 } else {
877out: 876out:
878 write_unlock(&neigh->lock); 877 write_unlock(&neigh->lock);
@@ -908,8 +907,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
908 neigh->updated = jiffies; 907 neigh->updated = jiffies;
909 write_unlock_bh(&neigh->lock); 908 write_unlock_bh(&neigh->lock);
910 909
911 if (skb) 910 kfree_skb(skb);
912 kfree_skb(skb);
913 return 1; 911 return 1;
914 } 912 }
915 } else if (neigh->nud_state & NUD_STALE) { 913 } else if (neigh->nud_state & NUD_STALE) {
@@ -1656,7 +1654,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1656 flags &= ~NEIGH_UPDATE_F_OVERRIDE; 1654 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1657 } 1655 }
1658 1656
1659 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); 1657 if (ndm->ndm_flags & NTF_USE) {
1658 neigh_event_send(neigh, NULL);
1659 err = 0;
1660 } else
1661 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1660 neigh_release(neigh); 1662 neigh_release(neigh);
1661 goto out_dev_put; 1663 goto out_dev_put;
1662 } 1664 }
@@ -2534,7 +2536,8 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
2534 kfree_skb(skb); 2536 kfree_skb(skb);
2535 goto errout; 2537 goto errout;
2536 } 2538 }
2537 err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 2539 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2540 return;
2538errout: 2541errout:
2539 if (err < 0) 2542 if (err < 0)
2540 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 2543 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 65498483325a..32d419f5ac98 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3275,8 +3275,7 @@ static void pktgen_stop(struct pktgen_thread *t)
3275 3275
3276 list_for_each_entry(pkt_dev, &t->if_list, list) { 3276 list_for_each_entry(pkt_dev, &t->if_list, list) {
3277 pktgen_stop_device(pkt_dev); 3277 pktgen_stop_device(pkt_dev);
3278 if (pkt_dev->skb) 3278 kfree_skb(pkt_dev->skb);
3279 kfree_skb(pkt_dev->skb);
3280 3279
3281 pkt_dev->skb = NULL; 3280 pkt_dev->skb = NULL;
3282 } 3281 }
@@ -3303,8 +3302,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
3303 if (!cur->removal_mark) 3302 if (!cur->removal_mark)
3304 continue; 3303 continue;
3305 3304
3306 if (cur->skb) 3305 kfree_skb(cur->skb);
3307 kfree_skb(cur->skb);
3308 cur->skb = NULL; 3306 cur->skb = NULL;
3309 3307
3310 pktgen_remove_device(t, cur); 3308 pktgen_remove_device(t, cur);
@@ -3328,8 +3326,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
3328 list_for_each_safe(q, n, &t->if_list) { 3326 list_for_each_safe(q, n, &t->if_list) {
3329 cur = list_entry(q, struct pktgen_dev, list); 3327 cur = list_entry(q, struct pktgen_dev, list);
3330 3328
3331 if (cur->skb) 3329 kfree_skb(cur->skb);
3332 kfree_skb(cur->skb);
3333 cur->skb = NULL; 3330 cur->skb = NULL;
3334 3331
3335 pktgen_remove_device(t, cur); 3332 pktgen_remove_device(t, cur);
@@ -3393,8 +3390,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3393 3390
3394 if (!netif_running(odev)) { 3391 if (!netif_running(odev)) {
3395 pktgen_stop_device(pkt_dev); 3392 pktgen_stop_device(pkt_dev);
3396 if (pkt_dev->skb) 3393 kfree_skb(pkt_dev->skb);
3397 kfree_skb(pkt_dev->skb);
3398 pkt_dev->skb = NULL; 3394 pkt_dev->skb = NULL;
3399 goto out; 3395 goto out;
3400 } 3396 }
@@ -3415,8 +3411,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3415 if ((++pkt_dev->clone_count >= pkt_dev->clone_skb) 3411 if ((++pkt_dev->clone_count >= pkt_dev->clone_skb)
3416 || (!pkt_dev->skb)) { 3412 || (!pkt_dev->skb)) {
3417 /* build a new pkt */ 3413 /* build a new pkt */
3418 if (pkt_dev->skb) 3414 kfree_skb(pkt_dev->skb);
3419 kfree_skb(pkt_dev->skb);
3420 3415
3421 pkt_dev->skb = fill_packet(odev, pkt_dev); 3416 pkt_dev->skb = fill_packet(odev, pkt_dev);
3422 if (pkt_dev->skb == NULL) { 3417 if (pkt_dev->skb == NULL) {
@@ -3498,8 +3493,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3498 3493
3499 /* Done with this */ 3494 /* Done with this */
3500 pktgen_stop_device(pkt_dev); 3495 pktgen_stop_device(pkt_dev);
3501 if (pkt_dev->skb) 3496 kfree_skb(pkt_dev->skb);
3502 kfree_skb(pkt_dev->skb);
3503 pkt_dev->skb = NULL; 3497 pkt_dev->skb = NULL;
3504 } 3498 }
3505out:; 3499out:;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 790dd205bb5d..d78030f88bd0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -455,8 +455,8 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
455 return nlmsg_unicast(rtnl, skb, pid); 455 return nlmsg_unicast(rtnl, skb, pid);
456} 456}
457 457
458int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, 458void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
459 struct nlmsghdr *nlh, gfp_t flags) 459 struct nlmsghdr *nlh, gfp_t flags)
460{ 460{
461 struct sock *rtnl = net->rtnl; 461 struct sock *rtnl = net->rtnl;
462 int report = 0; 462 int report = 0;
@@ -464,7 +464,7 @@ int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
464 if (nlh) 464 if (nlh)
465 report = nlmsg_report(nlh); 465 report = nlmsg_report(nlh);
466 466
467 return nlmsg_notify(rtnl, skb, pid, group, report, flags); 467 nlmsg_notify(rtnl, skb, pid, group, report, flags);
468} 468}
469 469
470void rtnl_set_sk_err(struct net *net, u32 group, int error) 470void rtnl_set_sk_err(struct net *net, u32 group, int error)
@@ -1246,7 +1246,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
1246 kfree_skb(skb); 1246 kfree_skb(skb);
1247 goto errout; 1247 goto errout;
1248 } 1248 }
1249 err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); 1249 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
1250 return;
1250errout: 1251errout:
1251 if (err < 0) 1252 if (err < 0)
1252 rtnl_set_sk_err(net, RTNLGRP_LINK, err); 1253 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c6a6b166f8d6..e5e2111a397d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -55,6 +55,7 @@
55#include <linux/rtnetlink.h> 55#include <linux/rtnetlink.h>
56#include <linux/init.h> 56#include <linux/init.h>
57#include <linux/scatterlist.h> 57#include <linux/scatterlist.h>
58#include <linux/errqueue.h>
58 59
59#include <net/protocol.h> 60#include <net/protocol.h>
60#include <net/dst.h> 61#include <net/dst.h>
@@ -123,6 +124,7 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
123 skb->dev ? skb->dev->name : "<NULL>"); 124 skb->dev ? skb->dev->name : "<NULL>");
124 BUG(); 125 BUG();
125} 126}
127EXPORT_SYMBOL(skb_over_panic);
126 128
127/** 129/**
128 * skb_under_panic - private function 130 * skb_under_panic - private function
@@ -142,6 +144,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
142 skb->dev ? skb->dev->name : "<NULL>"); 144 skb->dev ? skb->dev->name : "<NULL>");
143 BUG(); 145 BUG();
144} 146}
147EXPORT_SYMBOL(skb_under_panic);
145 148
146/* Allocate a new skbuff. We do this ourselves so we can fill in a few 149/* Allocate a new skbuff. We do this ourselves so we can fill in a few
147 * 'private' fields and also do memory statistics to find all the 150 * 'private' fields and also do memory statistics to find all the
@@ -205,7 +208,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
205 shinfo->gso_segs = 0; 208 shinfo->gso_segs = 0;
206 shinfo->gso_type = 0; 209 shinfo->gso_type = 0;
207 shinfo->ip6_frag_id = 0; 210 shinfo->ip6_frag_id = 0;
211 shinfo->tx_flags.flags = 0;
208 shinfo->frag_list = NULL; 212 shinfo->frag_list = NULL;
213 memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
209 214
210 if (fclone) { 215 if (fclone) {
211 struct sk_buff *child = skb + 1; 216 struct sk_buff *child = skb + 1;
@@ -223,6 +228,7 @@ nodata:
223 skb = NULL; 228 skb = NULL;
224 goto out; 229 goto out;
225} 230}
231EXPORT_SYMBOL(__alloc_skb);
226 232
227/** 233/**
228 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device 234 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
@@ -250,6 +256,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
250 } 256 }
251 return skb; 257 return skb;
252} 258}
259EXPORT_SYMBOL(__netdev_alloc_skb);
253 260
254struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask) 261struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
255{ 262{
@@ -418,6 +425,7 @@ void __kfree_skb(struct sk_buff *skb)
418 skb_release_all(skb); 425 skb_release_all(skb);
419 kfree_skbmem(skb); 426 kfree_skbmem(skb);
420} 427}
428EXPORT_SYMBOL(__kfree_skb);
421 429
422/** 430/**
423 * kfree_skb - free an sk_buff 431 * kfree_skb - free an sk_buff
@@ -436,6 +444,7 @@ void kfree_skb(struct sk_buff *skb)
436 return; 444 return;
437 __kfree_skb(skb); 445 __kfree_skb(skb);
438} 446}
447EXPORT_SYMBOL(kfree_skb);
439 448
440/** 449/**
441 * skb_recycle_check - check if skb can be reused for receive 450 * skb_recycle_check - check if skb can be reused for receive
@@ -605,6 +614,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
605 614
606 return __skb_clone(n, skb); 615 return __skb_clone(n, skb);
607} 616}
617EXPORT_SYMBOL(skb_clone);
608 618
609static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 619static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
610{ 620{
@@ -671,7 +681,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
671 copy_skb_header(n, skb); 681 copy_skb_header(n, skb);
672 return n; 682 return n;
673} 683}
674 684EXPORT_SYMBOL(skb_copy);
675 685
676/** 686/**
677 * pskb_copy - create copy of an sk_buff with private head. 687 * pskb_copy - create copy of an sk_buff with private head.
@@ -730,6 +740,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
730out: 740out:
731 return n; 741 return n;
732} 742}
743EXPORT_SYMBOL(pskb_copy);
733 744
734/** 745/**
735 * pskb_expand_head - reallocate header of &sk_buff 746 * pskb_expand_head - reallocate header of &sk_buff
@@ -813,6 +824,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
813nodata: 824nodata:
814 return -ENOMEM; 825 return -ENOMEM;
815} 826}
827EXPORT_SYMBOL(pskb_expand_head);
816 828
817/* Make private copy of skb with writable head and some headroom */ 829/* Make private copy of skb with writable head and some headroom */
818 830
@@ -833,7 +845,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
833 } 845 }
834 return skb2; 846 return skb2;
835} 847}
836 848EXPORT_SYMBOL(skb_realloc_headroom);
837 849
838/** 850/**
839 * skb_copy_expand - copy and expand sk_buff 851 * skb_copy_expand - copy and expand sk_buff
@@ -898,6 +910,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
898 910
899 return n; 911 return n;
900} 912}
913EXPORT_SYMBOL(skb_copy_expand);
901 914
902/** 915/**
903 * skb_pad - zero pad the tail of an skb 916 * skb_pad - zero pad the tail of an skb
@@ -943,6 +956,7 @@ free_skb:
943 kfree_skb(skb); 956 kfree_skb(skb);
944 return err; 957 return err;
945} 958}
959EXPORT_SYMBOL(skb_pad);
946 960
947/** 961/**
948 * skb_put - add data to a buffer 962 * skb_put - add data to a buffer
@@ -1100,6 +1114,7 @@ done:
1100 1114
1101 return 0; 1115 return 0;
1102} 1116}
1117EXPORT_SYMBOL(___pskb_trim);
1103 1118
1104/** 1119/**
1105 * __pskb_pull_tail - advance tail of skb header 1120 * __pskb_pull_tail - advance tail of skb header
@@ -1193,8 +1208,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
1193 insp = list; 1208 insp = list;
1194 } 1209 }
1195 if (!pskb_pull(list, eat)) { 1210 if (!pskb_pull(list, eat)) {
1196 if (clone) 1211 kfree_skb(clone);
1197 kfree_skb(clone);
1198 return NULL; 1212 return NULL;
1199 } 1213 }
1200 break; 1214 break;
@@ -1238,6 +1252,7 @@ pull_pages:
1238 1252
1239 return skb_tail_pointer(skb); 1253 return skb_tail_pointer(skb);
1240} 1254}
1255EXPORT_SYMBOL(__pskb_pull_tail);
1241 1256
1242/* Copy some data bits from skb to kernel buffer. */ 1257/* Copy some data bits from skb to kernel buffer. */
1243 1258
@@ -1315,6 +1330,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1315fault: 1330fault:
1316 return -EFAULT; 1331 return -EFAULT;
1317} 1332}
1333EXPORT_SYMBOL(skb_copy_bits);
1318 1334
1319/* 1335/*
1320 * Callback from splice_to_pipe(), if we need to release some pages 1336 * Callback from splice_to_pipe(), if we need to release some pages
@@ -1325,14 +1341,39 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
1325 put_page(spd->pages[i]); 1341 put_page(spd->pages[i]);
1326} 1342}
1327 1343
1328static inline struct page *linear_to_page(struct page *page, unsigned int len, 1344static inline struct page *linear_to_page(struct page *page, unsigned int *len,
1329 unsigned int offset) 1345 unsigned int *offset,
1346 struct sk_buff *skb)
1330{ 1347{
1331 struct page *p = alloc_pages(GFP_KERNEL, 0); 1348 struct sock *sk = skb->sk;
1349 struct page *p = sk->sk_sndmsg_page;
1350 unsigned int off;
1332 1351
1333 if (!p) 1352 if (!p) {
1334 return NULL; 1353new_page:
1335 memcpy(page_address(p) + offset, page_address(page) + offset, len); 1354 p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0);
1355 if (!p)
1356 return NULL;
1357
1358 off = sk->sk_sndmsg_off = 0;
1359 /* hold one ref to this page until it's full */
1360 } else {
1361 unsigned int mlen;
1362
1363 off = sk->sk_sndmsg_off;
1364 mlen = PAGE_SIZE - off;
1365 if (mlen < 64 && mlen < *len) {
1366 put_page(p);
1367 goto new_page;
1368 }
1369
1370 *len = min_t(unsigned int, *len, mlen);
1371 }
1372
1373 memcpy(page_address(p) + off, page_address(page) + *offset, *len);
1374 sk->sk_sndmsg_off += *len;
1375 *offset = off;
1376 get_page(p);
1336 1377
1337 return p; 1378 return p;
1338} 1379}
@@ -1341,21 +1382,21 @@ static inline struct page *linear_to_page(struct page *page, unsigned int len,
1341 * Fill page/offset/length into spd, if it can hold more pages. 1382 * Fill page/offset/length into spd, if it can hold more pages.
1342 */ 1383 */
1343static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, 1384static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
1344 unsigned int len, unsigned int offset, 1385 unsigned int *len, unsigned int offset,
1345 struct sk_buff *skb, int linear) 1386 struct sk_buff *skb, int linear)
1346{ 1387{
1347 if (unlikely(spd->nr_pages == PIPE_BUFFERS)) 1388 if (unlikely(spd->nr_pages == PIPE_BUFFERS))
1348 return 1; 1389 return 1;
1349 1390
1350 if (linear) { 1391 if (linear) {
1351 page = linear_to_page(page, len, offset); 1392 page = linear_to_page(page, len, &offset, skb);
1352 if (!page) 1393 if (!page)
1353 return 1; 1394 return 1;
1354 } else 1395 } else
1355 get_page(page); 1396 get_page(page);
1356 1397
1357 spd->pages[spd->nr_pages] = page; 1398 spd->pages[spd->nr_pages] = page;
1358 spd->partial[spd->nr_pages].len = len; 1399 spd->partial[spd->nr_pages].len = *len;
1359 spd->partial[spd->nr_pages].offset = offset; 1400 spd->partial[spd->nr_pages].offset = offset;
1360 spd->nr_pages++; 1401 spd->nr_pages++;
1361 1402
@@ -1365,8 +1406,13 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
1365static inline void __segment_seek(struct page **page, unsigned int *poff, 1406static inline void __segment_seek(struct page **page, unsigned int *poff,
1366 unsigned int *plen, unsigned int off) 1407 unsigned int *plen, unsigned int off)
1367{ 1408{
1409 unsigned long n;
1410
1368 *poff += off; 1411 *poff += off;
1369 *page += *poff / PAGE_SIZE; 1412 n = *poff / PAGE_SIZE;
1413 if (n)
1414 *page = nth_page(*page, n);
1415
1370 *poff = *poff % PAGE_SIZE; 1416 *poff = *poff % PAGE_SIZE;
1371 *plen -= off; 1417 *plen -= off;
1372} 1418}
@@ -1397,7 +1443,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
1397 /* the linear region may spread across several pages */ 1443 /* the linear region may spread across several pages */
1398 flen = min_t(unsigned int, flen, PAGE_SIZE - poff); 1444 flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
1399 1445
1400 if (spd_fill_page(spd, page, flen, poff, skb, linear)) 1446 if (spd_fill_page(spd, page, &flen, poff, skb, linear))
1401 return 1; 1447 return 1;
1402 1448
1403 __segment_seek(&page, &poff, &plen, flen); 1449 __segment_seek(&page, &poff, &plen, flen);
@@ -1590,7 +1636,6 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
1590fault: 1636fault:
1591 return -EFAULT; 1637 return -EFAULT;
1592} 1638}
1593
1594EXPORT_SYMBOL(skb_store_bits); 1639EXPORT_SYMBOL(skb_store_bits);
1595 1640
1596/* Checksum skb data. */ 1641/* Checksum skb data. */
@@ -1667,6 +1712,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
1667 1712
1668 return csum; 1713 return csum;
1669} 1714}
1715EXPORT_SYMBOL(skb_checksum);
1670 1716
1671/* Both of above in one bottle. */ 1717/* Both of above in one bottle. */
1672 1718
@@ -1748,6 +1794,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1748 BUG_ON(len); 1794 BUG_ON(len);
1749 return csum; 1795 return csum;
1750} 1796}
1797EXPORT_SYMBOL(skb_copy_and_csum_bits);
1751 1798
1752void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 1799void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1753{ 1800{
@@ -1774,6 +1821,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1774 *((__sum16 *)(to + csstuff)) = csum_fold(csum); 1821 *((__sum16 *)(to + csstuff)) = csum_fold(csum);
1775 } 1822 }
1776} 1823}
1824EXPORT_SYMBOL(skb_copy_and_csum_dev);
1777 1825
1778/** 1826/**
1779 * skb_dequeue - remove from the head of the queue 1827 * skb_dequeue - remove from the head of the queue
@@ -1794,6 +1842,7 @@ struct sk_buff *skb_dequeue(struct sk_buff_head *list)
1794 spin_unlock_irqrestore(&list->lock, flags); 1842 spin_unlock_irqrestore(&list->lock, flags);
1795 return result; 1843 return result;
1796} 1844}
1845EXPORT_SYMBOL(skb_dequeue);
1797 1846
1798/** 1847/**
1799 * skb_dequeue_tail - remove from the tail of the queue 1848 * skb_dequeue_tail - remove from the tail of the queue
@@ -1813,6 +1862,7 @@ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
1813 spin_unlock_irqrestore(&list->lock, flags); 1862 spin_unlock_irqrestore(&list->lock, flags);
1814 return result; 1863 return result;
1815} 1864}
1865EXPORT_SYMBOL(skb_dequeue_tail);
1816 1866
1817/** 1867/**
1818 * skb_queue_purge - empty a list 1868 * skb_queue_purge - empty a list
@@ -1828,6 +1878,7 @@ void skb_queue_purge(struct sk_buff_head *list)
1828 while ((skb = skb_dequeue(list)) != NULL) 1878 while ((skb = skb_dequeue(list)) != NULL)
1829 kfree_skb(skb); 1879 kfree_skb(skb);
1830} 1880}
1881EXPORT_SYMBOL(skb_queue_purge);
1831 1882
1832/** 1883/**
1833 * skb_queue_head - queue a buffer at the list head 1884 * skb_queue_head - queue a buffer at the list head
@@ -1848,6 +1899,7 @@ void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
1848 __skb_queue_head(list, newsk); 1899 __skb_queue_head(list, newsk);
1849 spin_unlock_irqrestore(&list->lock, flags); 1900 spin_unlock_irqrestore(&list->lock, flags);
1850} 1901}
1902EXPORT_SYMBOL(skb_queue_head);
1851 1903
1852/** 1904/**
1853 * skb_queue_tail - queue a buffer at the list tail 1905 * skb_queue_tail - queue a buffer at the list tail
@@ -1868,6 +1920,7 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
1868 __skb_queue_tail(list, newsk); 1920 __skb_queue_tail(list, newsk);
1869 spin_unlock_irqrestore(&list->lock, flags); 1921 spin_unlock_irqrestore(&list->lock, flags);
1870} 1922}
1923EXPORT_SYMBOL(skb_queue_tail);
1871 1924
1872/** 1925/**
1873 * skb_unlink - remove a buffer from a list 1926 * skb_unlink - remove a buffer from a list
@@ -1887,6 +1940,7 @@ void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
1887 __skb_unlink(skb, list); 1940 __skb_unlink(skb, list);
1888 spin_unlock_irqrestore(&list->lock, flags); 1941 spin_unlock_irqrestore(&list->lock, flags);
1889} 1942}
1943EXPORT_SYMBOL(skb_unlink);
1890 1944
1891/** 1945/**
1892 * skb_append - append a buffer 1946 * skb_append - append a buffer
@@ -1906,7 +1960,7 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
1906 __skb_queue_after(list, old, newsk); 1960 __skb_queue_after(list, old, newsk);
1907 spin_unlock_irqrestore(&list->lock, flags); 1961 spin_unlock_irqrestore(&list->lock, flags);
1908} 1962}
1909 1963EXPORT_SYMBOL(skb_append);
1910 1964
1911/** 1965/**
1912 * skb_insert - insert a buffer 1966 * skb_insert - insert a buffer
@@ -1928,6 +1982,7 @@ void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
1928 __skb_insert(newsk, old->prev, old, list); 1982 __skb_insert(newsk, old->prev, old, list);
1929 spin_unlock_irqrestore(&list->lock, flags); 1983 spin_unlock_irqrestore(&list->lock, flags);
1930} 1984}
1985EXPORT_SYMBOL(skb_insert);
1931 1986
1932static inline void skb_split_inside_header(struct sk_buff *skb, 1987static inline void skb_split_inside_header(struct sk_buff *skb,
1933 struct sk_buff* skb1, 1988 struct sk_buff* skb1,
@@ -2006,6 +2061,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
2006 else /* Second chunk has no header, nothing to copy. */ 2061 else /* Second chunk has no header, nothing to copy. */
2007 skb_split_no_header(skb, skb1, len, pos); 2062 skb_split_no_header(skb, skb1, len, pos);
2008} 2063}
2064EXPORT_SYMBOL(skb_split);
2009 2065
2010/* Shifting from/to a cloned skb is a no-go. 2066/* Shifting from/to a cloned skb is a no-go.
2011 * 2067 *
@@ -2168,6 +2224,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
2168 st->frag_idx = st->stepped_offset = 0; 2224 st->frag_idx = st->stepped_offset = 0;
2169 st->frag_data = NULL; 2225 st->frag_data = NULL;
2170} 2226}
2227EXPORT_SYMBOL(skb_prepare_seq_read);
2171 2228
2172/** 2229/**
2173 * skb_seq_read - Sequentially read skb data 2230 * skb_seq_read - Sequentially read skb data
@@ -2255,6 +2312,7 @@ next_skb:
2255 2312
2256 return 0; 2313 return 0;
2257} 2314}
2315EXPORT_SYMBOL(skb_seq_read);
2258 2316
2259/** 2317/**
2260 * skb_abort_seq_read - Abort a sequential read of skb data 2318 * skb_abort_seq_read - Abort a sequential read of skb data
@@ -2268,6 +2326,7 @@ void skb_abort_seq_read(struct skb_seq_state *st)
2268 if (st->frag_data) 2326 if (st->frag_data)
2269 kunmap_skb_frag(st->frag_data); 2327 kunmap_skb_frag(st->frag_data);
2270} 2328}
2329EXPORT_SYMBOL(skb_abort_seq_read);
2271 2330
2272#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) 2331#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
2273 2332
@@ -2310,6 +2369,7 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
2310 ret = textsearch_find(config, state); 2369 ret = textsearch_find(config, state);
2311 return (ret <= to - from ? ret : UINT_MAX); 2370 return (ret <= to - from ? ret : UINT_MAX);
2312} 2371}
2372EXPORT_SYMBOL(skb_find_text);
2313 2373
2314/** 2374/**
2315 * skb_append_datato_frags: - append the user data to a skb 2375 * skb_append_datato_frags: - append the user data to a skb
@@ -2382,6 +2442,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2382 2442
2383 return 0; 2443 return 0;
2384} 2444}
2445EXPORT_SYMBOL(skb_append_datato_frags);
2385 2446
2386/** 2447/**
2387 * skb_pull_rcsum - pull skb and update receive checksum 2448 * skb_pull_rcsum - pull skb and update receive checksum
@@ -2569,7 +2630,6 @@ err:
2569 } 2630 }
2570 return ERR_PTR(err); 2631 return ERR_PTR(err);
2571} 2632}
2572
2573EXPORT_SYMBOL_GPL(skb_segment); 2633EXPORT_SYMBOL_GPL(skb_segment);
2574 2634
2575int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) 2635int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
@@ -2577,17 +2637,23 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2577 struct sk_buff *p = *head; 2637 struct sk_buff *p = *head;
2578 struct sk_buff *nskb; 2638 struct sk_buff *nskb;
2579 unsigned int headroom; 2639 unsigned int headroom;
2580 unsigned int hlen = p->data - skb_mac_header(p); 2640 unsigned int len = skb_gro_len(skb);
2581 unsigned int len = skb->len;
2582 2641
2583 if (hlen + p->len + len >= 65536) 2642 if (p->len + len >= 65536)
2584 return -E2BIG; 2643 return -E2BIG;
2585 2644
2586 if (skb_shinfo(p)->frag_list) 2645 if (skb_shinfo(p)->frag_list)
2587 goto merge; 2646 goto merge;
2588 else if (!skb_headlen(p) && !skb_headlen(skb) && 2647 else if (skb_headlen(skb) <= skb_gro_offset(skb)) {
2589 skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags < 2648 if (skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags >
2590 MAX_SKB_FRAGS) { 2649 MAX_SKB_FRAGS)
2650 return -E2BIG;
2651
2652 skb_shinfo(skb)->frags[0].page_offset +=
2653 skb_gro_offset(skb) - skb_headlen(skb);
2654 skb_shinfo(skb)->frags[0].size -=
2655 skb_gro_offset(skb) - skb_headlen(skb);
2656
2591 memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags, 2657 memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
2592 skb_shinfo(skb)->frags, 2658 skb_shinfo(skb)->frags,
2593 skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); 2659 skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
@@ -2604,7 +2670,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2604 } 2670 }
2605 2671
2606 headroom = skb_headroom(p); 2672 headroom = skb_headroom(p);
2607 nskb = netdev_alloc_skb(p->dev, headroom); 2673 nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p));
2608 if (unlikely(!nskb)) 2674 if (unlikely(!nskb))
2609 return -ENOMEM; 2675 return -ENOMEM;
2610 2676
@@ -2612,12 +2678,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2612 nskb->mac_len = p->mac_len; 2678 nskb->mac_len = p->mac_len;
2613 2679
2614 skb_reserve(nskb, headroom); 2680 skb_reserve(nskb, headroom);
2681 __skb_put(nskb, skb_gro_offset(p));
2615 2682
2616 skb_set_mac_header(nskb, -hlen); 2683 skb_set_mac_header(nskb, skb_mac_header(p) - p->data);
2617 skb_set_network_header(nskb, skb_network_offset(p)); 2684 skb_set_network_header(nskb, skb_network_offset(p));
2618 skb_set_transport_header(nskb, skb_transport_offset(p)); 2685 skb_set_transport_header(nskb, skb_transport_offset(p));
2619 2686
2620 memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen); 2687 __skb_pull(p, skb_gro_offset(p));
2688 memcpy(skb_mac_header(nskb), skb_mac_header(p),
2689 p->data - skb_mac_header(p));
2621 2690
2622 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); 2691 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
2623 skb_shinfo(nskb)->frag_list = p; 2692 skb_shinfo(nskb)->frag_list = p;
@@ -2636,6 +2705,17 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2636 p = nskb; 2705 p = nskb;
2637 2706
2638merge: 2707merge:
2708 if (skb_gro_offset(skb) > skb_headlen(skb)) {
2709 skb_shinfo(skb)->frags[0].page_offset +=
2710 skb_gro_offset(skb) - skb_headlen(skb);
2711 skb_shinfo(skb)->frags[0].size -=
2712 skb_gro_offset(skb) - skb_headlen(skb);
2713 skb_gro_reset_offset(skb);
2714 skb_gro_pull(skb, skb_headlen(skb));
2715 }
2716
2717 __skb_pull(skb, skb_gro_offset(skb));
2718
2639 p->prev->next = skb; 2719 p->prev->next = skb;
2640 p->prev = skb; 2720 p->prev = skb;
2641 skb_header_release(skb); 2721 skb_header_release(skb);
@@ -2747,6 +2827,7 @@ int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int le
2747 2827
2748 return nsg; 2828 return nsg;
2749} 2829}
2830EXPORT_SYMBOL_GPL(skb_to_sgvec);
2750 2831
2751/** 2832/**
2752 * skb_cow_data - Check that a socket buffer's data buffers are writable 2833 * skb_cow_data - Check that a socket buffer's data buffers are writable
@@ -2856,6 +2937,45 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2856 2937
2857 return elt; 2938 return elt;
2858} 2939}
2940EXPORT_SYMBOL_GPL(skb_cow_data);
2941
2942void skb_tstamp_tx(struct sk_buff *orig_skb,
2943 struct skb_shared_hwtstamps *hwtstamps)
2944{
2945 struct sock *sk = orig_skb->sk;
2946 struct sock_exterr_skb *serr;
2947 struct sk_buff *skb;
2948 int err;
2949
2950 if (!sk)
2951 return;
2952
2953 skb = skb_clone(orig_skb, GFP_ATOMIC);
2954 if (!skb)
2955 return;
2956
2957 if (hwtstamps) {
2958 *skb_hwtstamps(skb) =
2959 *hwtstamps;
2960 } else {
2961 /*
2962 * no hardware time stamps available,
2963 * so keep the skb_shared_tx and only
2964 * store software time stamp
2965 */
2966 skb->tstamp = ktime_get_real();
2967 }
2968
2969 serr = SKB_EXT_ERR(skb);
2970 memset(serr, 0, sizeof(*serr));
2971 serr->ee.ee_errno = ENOMSG;
2972 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
2973 err = sock_queue_err_skb(sk, skb);
2974 if (err)
2975 kfree_skb(skb);
2976}
2977EXPORT_SYMBOL_GPL(skb_tstamp_tx);
2978
2859 2979
2860/** 2980/**
2861 * skb_partial_csum_set - set up and verify partial csum values for packet 2981 * skb_partial_csum_set - set up and verify partial csum values for packet
@@ -2884,6 +3004,7 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
2884 skb->csum_offset = off; 3004 skb->csum_offset = off;
2885 return true; 3005 return true;
2886} 3006}
3007EXPORT_SYMBOL_GPL(skb_partial_csum_set);
2887 3008
2888void __skb_warn_lro_forwarding(const struct sk_buff *skb) 3009void __skb_warn_lro_forwarding(const struct sk_buff *skb)
2889{ 3010{
@@ -2891,42 +3012,4 @@ void __skb_warn_lro_forwarding(const struct sk_buff *skb)
2891 pr_warning("%s: received packets cannot be forwarded" 3012 pr_warning("%s: received packets cannot be forwarded"
2892 " while LRO is enabled\n", skb->dev->name); 3013 " while LRO is enabled\n", skb->dev->name);
2893} 3014}
2894
2895EXPORT_SYMBOL(___pskb_trim);
2896EXPORT_SYMBOL(__kfree_skb);
2897EXPORT_SYMBOL(kfree_skb);
2898EXPORT_SYMBOL(__pskb_pull_tail);
2899EXPORT_SYMBOL(__alloc_skb);
2900EXPORT_SYMBOL(__netdev_alloc_skb);
2901EXPORT_SYMBOL(pskb_copy);
2902EXPORT_SYMBOL(pskb_expand_head);
2903EXPORT_SYMBOL(skb_checksum);
2904EXPORT_SYMBOL(skb_clone);
2905EXPORT_SYMBOL(skb_copy);
2906EXPORT_SYMBOL(skb_copy_and_csum_bits);
2907EXPORT_SYMBOL(skb_copy_and_csum_dev);
2908EXPORT_SYMBOL(skb_copy_bits);
2909EXPORT_SYMBOL(skb_copy_expand);
2910EXPORT_SYMBOL(skb_over_panic);
2911EXPORT_SYMBOL(skb_pad);
2912EXPORT_SYMBOL(skb_realloc_headroom);
2913EXPORT_SYMBOL(skb_under_panic);
2914EXPORT_SYMBOL(skb_dequeue);
2915EXPORT_SYMBOL(skb_dequeue_tail);
2916EXPORT_SYMBOL(skb_insert);
2917EXPORT_SYMBOL(skb_queue_purge);
2918EXPORT_SYMBOL(skb_queue_head);
2919EXPORT_SYMBOL(skb_queue_tail);
2920EXPORT_SYMBOL(skb_unlink);
2921EXPORT_SYMBOL(skb_append);
2922EXPORT_SYMBOL(skb_split);
2923EXPORT_SYMBOL(skb_prepare_seq_read);
2924EXPORT_SYMBOL(skb_seq_read);
2925EXPORT_SYMBOL(skb_abort_seq_read);
2926EXPORT_SYMBOL(skb_find_text);
2927EXPORT_SYMBOL(skb_append_datato_frags);
2928EXPORT_SYMBOL(__skb_warn_lro_forwarding); 3015EXPORT_SYMBOL(__skb_warn_lro_forwarding);
2929
2930EXPORT_SYMBOL_GPL(skb_to_sgvec);
2931EXPORT_SYMBOL_GPL(skb_cow_data);
2932EXPORT_SYMBOL_GPL(skb_partial_csum_set);
diff --git a/net/core/sock.c b/net/core/sock.c
index 5f97caa158e8..0620046e4eba 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -120,6 +120,7 @@
120#include <net/net_namespace.h> 120#include <net/net_namespace.h>
121#include <net/request_sock.h> 121#include <net/request_sock.h>
122#include <net/sock.h> 122#include <net/sock.h>
123#include <linux/net_tstamp.h>
123#include <net/xfrm.h> 124#include <net/xfrm.h>
124#include <linux/ipsec.h> 125#include <linux/ipsec.h>
125 126
@@ -149,7 +150,7 @@ static const char *af_family_key_strings[AF_MAX+1] = {
149 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , 150 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
150 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , 151 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
151 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , 152 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
152 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , 153 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
153 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , 154 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
154 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , 155 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
155 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 156 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
@@ -164,7 +165,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
164 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , 165 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
165 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , 166 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
166 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , 167 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
167 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" , 168 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
168 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , 169 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
169 "slock-27" , "slock-28" , "slock-AF_CAN" , 170 "slock-27" , "slock-28" , "slock-AF_CAN" ,
170 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 171 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
@@ -179,7 +180,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
179 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , 180 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
180 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , 181 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
181 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , 182 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
182 "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" , 183 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
183 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , 184 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
184 "clock-27" , "clock-28" , "clock-AF_CAN" , 185 "clock-27" , "clock-28" , "clock-AF_CAN" ,
185 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , 186 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
@@ -255,11 +256,14 @@ static void sock_warn_obsolete_bsdism(const char *name)
255 } 256 }
256} 257}
257 258
258static void sock_disable_timestamp(struct sock *sk) 259static void sock_disable_timestamp(struct sock *sk, int flag)
259{ 260{
260 if (sock_flag(sk, SOCK_TIMESTAMP)) { 261 if (sock_flag(sk, flag)) {
261 sock_reset_flag(sk, SOCK_TIMESTAMP); 262 sock_reset_flag(sk, flag);
262 net_disable_timestamp(); 263 if (!sock_flag(sk, SOCK_TIMESTAMP) &&
264 !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) {
265 net_disable_timestamp();
266 }
263 } 267 }
264} 268}
265 269
@@ -614,13 +618,38 @@ set_rcvbuf:
614 else 618 else
615 sock_set_flag(sk, SOCK_RCVTSTAMPNS); 619 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
616 sock_set_flag(sk, SOCK_RCVTSTAMP); 620 sock_set_flag(sk, SOCK_RCVTSTAMP);
617 sock_enable_timestamp(sk); 621 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
618 } else { 622 } else {
619 sock_reset_flag(sk, SOCK_RCVTSTAMP); 623 sock_reset_flag(sk, SOCK_RCVTSTAMP);
620 sock_reset_flag(sk, SOCK_RCVTSTAMPNS); 624 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
621 } 625 }
622 break; 626 break;
623 627
628 case SO_TIMESTAMPING:
629 if (val & ~SOF_TIMESTAMPING_MASK) {
630 ret = EINVAL;
631 break;
632 }
633 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
634 val & SOF_TIMESTAMPING_TX_HARDWARE);
635 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
636 val & SOF_TIMESTAMPING_TX_SOFTWARE);
637 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
638 val & SOF_TIMESTAMPING_RX_HARDWARE);
639 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
640 sock_enable_timestamp(sk,
641 SOCK_TIMESTAMPING_RX_SOFTWARE);
642 else
643 sock_disable_timestamp(sk,
644 SOCK_TIMESTAMPING_RX_SOFTWARE);
645 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
646 val & SOF_TIMESTAMPING_SOFTWARE);
647 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
648 val & SOF_TIMESTAMPING_SYS_HARDWARE);
649 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
650 val & SOF_TIMESTAMPING_RAW_HARDWARE);
651 break;
652
624 case SO_RCVLOWAT: 653 case SO_RCVLOWAT:
625 if (val < 0) 654 if (val < 0)
626 val = INT_MAX; 655 val = INT_MAX;
@@ -768,6 +797,24 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
768 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); 797 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
769 break; 798 break;
770 799
800 case SO_TIMESTAMPING:
801 v.val = 0;
802 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
803 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
804 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
805 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
806 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
807 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
808 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
809 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
810 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
811 v.val |= SOF_TIMESTAMPING_SOFTWARE;
812 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
813 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
814 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
815 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
816 break;
817
771 case SO_RCVTIMEO: 818 case SO_RCVTIMEO:
772 lv=sizeof(struct timeval); 819 lv=sizeof(struct timeval);
773 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { 820 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
@@ -969,7 +1016,8 @@ void sk_free(struct sock *sk)
969 rcu_assign_pointer(sk->sk_filter, NULL); 1016 rcu_assign_pointer(sk->sk_filter, NULL);
970 } 1017 }
971 1018
972 sock_disable_timestamp(sk); 1019 sock_disable_timestamp(sk, SOCK_TIMESTAMP);
1020 sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE);
973 1021
974 if (atomic_read(&sk->sk_omem_alloc)) 1022 if (atomic_read(&sk->sk_omem_alloc))
975 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", 1023 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
@@ -1255,10 +1303,9 @@ static long sock_wait_for_wmem(struct sock * sk, long timeo)
1255 * Generic send/receive buffer handlers 1303 * Generic send/receive buffer handlers
1256 */ 1304 */
1257 1305
1258static struct sk_buff *sock_alloc_send_pskb(struct sock *sk, 1306struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1259 unsigned long header_len, 1307 unsigned long data_len, int noblock,
1260 unsigned long data_len, 1308 int *errcode)
1261 int noblock, int *errcode)
1262{ 1309{
1263 struct sk_buff *skb; 1310 struct sk_buff *skb;
1264 gfp_t gfp_mask; 1311 gfp_t gfp_mask;
@@ -1338,6 +1385,7 @@ failure:
1338 *errcode = err; 1385 *errcode = err;
1339 return NULL; 1386 return NULL;
1340} 1387}
1388EXPORT_SYMBOL(sock_alloc_send_pskb);
1341 1389
1342struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 1390struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1343 int noblock, int *errcode) 1391 int noblock, int *errcode)
@@ -1786,7 +1834,7 @@ int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1786{ 1834{
1787 struct timeval tv; 1835 struct timeval tv;
1788 if (!sock_flag(sk, SOCK_TIMESTAMP)) 1836 if (!sock_flag(sk, SOCK_TIMESTAMP))
1789 sock_enable_timestamp(sk); 1837 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
1790 tv = ktime_to_timeval(sk->sk_stamp); 1838 tv = ktime_to_timeval(sk->sk_stamp);
1791 if (tv.tv_sec == -1) 1839 if (tv.tv_sec == -1)
1792 return -ENOENT; 1840 return -ENOENT;
@@ -1802,7 +1850,7 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1802{ 1850{
1803 struct timespec ts; 1851 struct timespec ts;
1804 if (!sock_flag(sk, SOCK_TIMESTAMP)) 1852 if (!sock_flag(sk, SOCK_TIMESTAMP))
1805 sock_enable_timestamp(sk); 1853 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
1806 ts = ktime_to_timespec(sk->sk_stamp); 1854 ts = ktime_to_timespec(sk->sk_stamp);
1807 if (ts.tv_sec == -1) 1855 if (ts.tv_sec == -1)
1808 return -ENOENT; 1856 return -ENOENT;
@@ -1814,11 +1862,20 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1814} 1862}
1815EXPORT_SYMBOL(sock_get_timestampns); 1863EXPORT_SYMBOL(sock_get_timestampns);
1816 1864
1817void sock_enable_timestamp(struct sock *sk) 1865void sock_enable_timestamp(struct sock *sk, int flag)
1818{ 1866{
1819 if (!sock_flag(sk, SOCK_TIMESTAMP)) { 1867 if (!sock_flag(sk, flag)) {
1820 sock_set_flag(sk, SOCK_TIMESTAMP); 1868 sock_set_flag(sk, flag);
1821 net_enable_timestamp(); 1869 /*
1870 * we just set one of the two flags which require net
1871 * time stamping, but time stamping might have been on
1872 * already because of the other one
1873 */
1874 if (!sock_flag(sk,
1875 flag == SOCK_TIMESTAMP ?
1876 SOCK_TIMESTAMPING_RX_SOFTWARE :
1877 SOCK_TIMESTAMP))
1878 net_enable_timestamp();
1822 } 1879 }
1823} 1880}
1824 1881
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 83d3398559ea..7db1de0497c6 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -11,6 +11,7 @@
11#include <linux/socket.h> 11#include <linux/socket.h>
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <net/ip.h>
14#include <net/sock.h> 15#include <net/sock.h>
15 16
16static struct ctl_table net_core_table[] = { 17static struct ctl_table net_core_table[] = {
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 45f95e55f873..7ea557b7c6b1 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -20,6 +20,9 @@
20/* We can spread an ack vector across multiple options */ 20/* We can spread an ack vector across multiple options */
21#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2) 21#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2)
22 22
23/* Estimated minimum average Ack Vector length - used for updating MPS */
24#define DCCPAV_MIN_OPTLEN 16
25
23#define DCCP_ACKVEC_STATE_RECEIVED 0 26#define DCCP_ACKVEC_STATE_RECEIVED 0
24#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) 27#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6)
25#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) 28#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f2230fc168e1..d6bc47363b1c 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -42,9 +42,11 @@
42extern int dccp_debug; 42extern int dccp_debug;
43#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a) 43#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a)
44#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) 44#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
45#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
45#else 46#else
46#define dccp_pr_debug(format, a...) 47#define dccp_pr_debug(format, a...)
47#define dccp_pr_debug_cat(format, a...) 48#define dccp_pr_debug_cat(format, a...)
49#define dccp_debug(format, a...)
48#endif 50#endif
49 51
50extern struct inet_hashinfo dccp_hashinfo; 52extern struct inet_hashinfo dccp_hashinfo;
@@ -61,11 +63,14 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
61 * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields 63 * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields
62 * Hence a safe upper bound for the maximum option length is 1020-28 = 992 64 * Hence a safe upper bound for the maximum option length is 1020-28 = 992
63 */ 65 */
64#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int)) 66#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(uint32_t))
65#define DCCP_MAX_PACKET_HDR 28 67#define DCCP_MAX_PACKET_HDR 28
66#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR) 68#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR)
67#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER) 69#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER)
68 70
71/* Upper bound for initial feature-negotiation overhead (padded to 32 bits) */
72#define DCCP_FEATNEG_OVERHEAD (32 * sizeof(uint32_t))
73
69#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT 74#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
70 * state, about 60 seconds */ 75 * state, about 60 seconds */
71 76
@@ -95,9 +100,6 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
95extern int sysctl_dccp_request_retries; 100extern int sysctl_dccp_request_retries;
96extern int sysctl_dccp_retries1; 101extern int sysctl_dccp_retries1;
97extern int sysctl_dccp_retries2; 102extern int sysctl_dccp_retries2;
98extern int sysctl_dccp_feat_sequence_window;
99extern int sysctl_dccp_feat_rx_ccid;
100extern int sysctl_dccp_feat_tx_ccid;
101extern int sysctl_dccp_tx_qlen; 103extern int sysctl_dccp_tx_qlen;
102extern int sysctl_dccp_sync_ratelimit; 104extern int sysctl_dccp_sync_ratelimit;
103 105
@@ -409,23 +411,21 @@ static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
409static inline void dccp_update_gsr(struct sock *sk, u64 seq) 411static inline void dccp_update_gsr(struct sock *sk, u64 seq)
410{ 412{
411 struct dccp_sock *dp = dccp_sk(sk); 413 struct dccp_sock *dp = dccp_sk(sk);
412 const struct dccp_minisock *dmsk = dccp_msk(sk);
413 414
414 dp->dccps_gsr = seq; 415 dp->dccps_gsr = seq;
415 dccp_set_seqno(&dp->dccps_swl, 416 /* Sequence validity window depends on remote Sequence Window (7.5.1) */
416 dp->dccps_gsr + 1 - (dmsk->dccpms_sequence_window / 4)); 417 dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
417 dccp_set_seqno(&dp->dccps_swh, 418 dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4);
418 dp->dccps_gsr + (3 * dmsk->dccpms_sequence_window) / 4);
419} 419}
420 420
421static inline void dccp_update_gss(struct sock *sk, u64 seq) 421static inline void dccp_update_gss(struct sock *sk, u64 seq)
422{ 422{
423 struct dccp_sock *dp = dccp_sk(sk); 423 struct dccp_sock *dp = dccp_sk(sk);
424 424
425 dp->dccps_awh = dp->dccps_gss = seq; 425 dp->dccps_gss = seq;
426 dccp_set_seqno(&dp->dccps_awl, 426 /* Ack validity window depends on local Sequence Window value (7.5.1) */
427 (dp->dccps_gss - 427 dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win);
428 dccp_msk(sk)->dccpms_sequence_window + 1)); 428 dp->dccps_awh = dp->dccps_gss;
429} 429}
430 430
431static inline int dccp_ack_pending(const struct sock *sk) 431static inline int dccp_ack_pending(const struct sock *sk)
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 4152308958ab..b04160a2eea5 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -25,6 +25,11 @@
25#include "ccid.h" 25#include "ccid.h"
26#include "feat.h" 26#include "feat.h"
27 27
28/* feature-specific sysctls - initialised to the defaults from RFC 4340, 6.4 */
29unsigned long sysctl_dccp_sequence_window __read_mostly = 100;
30int sysctl_dccp_rx_ccid __read_mostly = 2,
31 sysctl_dccp_tx_ccid __read_mostly = 2;
32
28/* 33/*
29 * Feature activation handlers. 34 * Feature activation handlers.
30 * 35 *
@@ -51,8 +56,17 @@ static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx)
51 56
52static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx) 57static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx)
53{ 58{
54 if (!rx) 59 struct dccp_sock *dp = dccp_sk(sk);
55 dccp_msk(sk)->dccpms_sequence_window = seq_win; 60
61 if (rx) {
62 dp->dccps_r_seq_win = seq_win;
63 /* propagate changes to update SWL/SWH */
64 dccp_update_gsr(sk, dp->dccps_gsr);
65 } else {
66 dp->dccps_l_seq_win = seq_win;
67 /* propagate changes to update AWL */
68 dccp_update_gss(sk, dp->dccps_gss);
69 }
56 return 0; 70 return 0;
57} 71}
58 72
@@ -194,6 +208,100 @@ static int dccp_feat_default_value(u8 feat_num)
194 return idx < 0 ? 0 : dccp_feat_table[idx].default_value; 208 return idx < 0 ? 0 : dccp_feat_table[idx].default_value;
195} 209}
196 210
211/*
212 * Debugging and verbose-printing section
213 */
214static const char *dccp_feat_fname(const u8 feat)
215{
216 static const char *feature_names[] = {
217 [DCCPF_RESERVED] = "Reserved",
218 [DCCPF_CCID] = "CCID",
219 [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos",
220 [DCCPF_SEQUENCE_WINDOW] = "Sequence Window",
221 [DCCPF_ECN_INCAPABLE] = "ECN Incapable",
222 [DCCPF_ACK_RATIO] = "Ack Ratio",
223 [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector",
224 [DCCPF_SEND_NDP_COUNT] = "Send NDP Count",
225 [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage",
226 [DCCPF_DATA_CHECKSUM] = "Send Data Checksum",
227 };
228 if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
229 return feature_names[DCCPF_RESERVED];
230
231 if (feat == DCCPF_SEND_LEV_RATE)
232 return "Send Loss Event Rate";
233 if (feat >= DCCPF_MIN_CCID_SPECIFIC)
234 return "CCID-specific";
235
236 return feature_names[feat];
237}
238
239static const char *dccp_feat_sname[] = { "DEFAULT", "INITIALISING", "CHANGING",
240 "UNSTABLE", "STABLE" };
241
242#ifdef CONFIG_IP_DCCP_DEBUG
243static const char *dccp_feat_oname(const u8 opt)
244{
245 switch (opt) {
246 case DCCPO_CHANGE_L: return "Change_L";
247 case DCCPO_CONFIRM_L: return "Confirm_L";
248 case DCCPO_CHANGE_R: return "Change_R";
249 case DCCPO_CONFIRM_R: return "Confirm_R";
250 }
251 return NULL;
252}
253
254static void dccp_feat_printval(u8 feat_num, dccp_feat_val const *val)
255{
256 u8 i, type = dccp_feat_type(feat_num);
257
258 if (val == NULL || (type == FEAT_SP && val->sp.vec == NULL))
259 dccp_pr_debug_cat("(NULL)");
260 else if (type == FEAT_SP)
261 for (i = 0; i < val->sp.len; i++)
262 dccp_pr_debug_cat("%s%u", i ? " " : "", val->sp.vec[i]);
263 else if (type == FEAT_NN)
264 dccp_pr_debug_cat("%llu", (unsigned long long)val->nn);
265 else
266 dccp_pr_debug_cat("unknown type %u", type);
267}
268
269static void dccp_feat_printvals(u8 feat_num, u8 *list, u8 len)
270{
271 u8 type = dccp_feat_type(feat_num);
272 dccp_feat_val fval = { .sp.vec = list, .sp.len = len };
273
274 if (type == FEAT_NN)
275 fval.nn = dccp_decode_value_var(list, len);
276 dccp_feat_printval(feat_num, &fval);
277}
278
279static void dccp_feat_print_entry(struct dccp_feat_entry const *entry)
280{
281 dccp_debug(" * %s %s = ", entry->is_local ? "local" : "remote",
282 dccp_feat_fname(entry->feat_num));
283 dccp_feat_printval(entry->feat_num, &entry->val);
284 dccp_pr_debug_cat(", state=%s %s\n", dccp_feat_sname[entry->state],
285 entry->needs_confirm ? "(Confirm pending)" : "");
286}
287
288#define dccp_feat_print_opt(opt, feat, val, len, mandatory) do { \
289 dccp_pr_debug("%s(%s, ", dccp_feat_oname(opt), dccp_feat_fname(feat));\
290 dccp_feat_printvals(feat, val, len); \
291 dccp_pr_debug_cat(") %s\n", mandatory ? "!" : ""); } while (0)
292
293#define dccp_feat_print_fnlist(fn_list) { \
294 const struct dccp_feat_entry *___entry; \
295 \
296 dccp_pr_debug("List Dump:\n"); \
297 list_for_each_entry(___entry, fn_list, node) \
298 dccp_feat_print_entry(___entry); \
299}
300#else /* ! CONFIG_IP_DCCP_DEBUG */
301#define dccp_feat_print_opt(opt, feat, val, len, mandatory)
302#define dccp_feat_print_fnlist(fn_list)
303#endif
304
197static int __dccp_feat_activate(struct sock *sk, const int idx, 305static int __dccp_feat_activate(struct sock *sk, const int idx,
198 const bool is_local, dccp_feat_val const *fval) 306 const bool is_local, dccp_feat_val const *fval)
199{ 307{
@@ -226,6 +334,10 @@ static int __dccp_feat_activate(struct sock *sk, const int idx,
226 /* Location is RX if this is a local-RX or remote-TX feature */ 334 /* Location is RX if this is a local-RX or remote-TX feature */
227 rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX)); 335 rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX));
228 336
337 dccp_debug(" -> activating %s %s, %sval=%llu\n", rx ? "RX" : "TX",
338 dccp_feat_fname(dccp_feat_table[idx].feat_num),
339 fval ? "" : "default ", (unsigned long long)val);
340
229 return dccp_feat_table[idx].activation_hdlr(sk, val, rx); 341 return dccp_feat_table[idx].activation_hdlr(sk, val, rx);
230} 342}
231 343
@@ -530,6 +642,7 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq,
530 return -1; 642 return -1;
531 } 643 }
532 } 644 }
645 dccp_feat_print_opt(opt, pos->feat_num, ptr, len, 0);
533 646
534 if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt)) 647 if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt))
535 return -1; 648 return -1;
@@ -783,6 +896,7 @@ int dccp_feat_finalise_settings(struct dccp_sock *dp)
783 while (i--) 896 while (i--)
784 if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i)) 897 if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i))
785 return -1; 898 return -1;
899 dccp_feat_print_fnlist(fn);
786 return 0; 900 return 0;
787} 901}
788 902
@@ -901,6 +1015,8 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
901 if (len == 0 || type == FEAT_UNKNOWN) /* 6.1 and 6.6.8 */ 1015 if (len == 0 || type == FEAT_UNKNOWN) /* 6.1 and 6.6.8 */
902 goto unknown_feature_or_value; 1016 goto unknown_feature_or_value;
903 1017
1018 dccp_feat_print_opt(opt, feat, val, len, is_mandatory);
1019
904 /* 1020 /*
905 * Negotiation of NN features: Change R is invalid, so there is no 1021 * Negotiation of NN features: Change R is invalid, so there is no
906 * simultaneous negotiation; hence we do not look up in the list. 1022 * simultaneous negotiation; hence we do not look up in the list.
@@ -1006,6 +1122,8 @@ static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
1006 const bool local = (opt == DCCPO_CONFIRM_R); 1122 const bool local = (opt == DCCPO_CONFIRM_R);
1007 struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local); 1123 struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local);
1008 1124
1125 dccp_feat_print_opt(opt, feat, val, len, is_mandatory);
1126
1009 if (entry == NULL) { /* nothing queued: ignore or handle error */ 1127 if (entry == NULL) { /* nothing queued: ignore or handle error */
1010 if (is_mandatory && type == FEAT_UNKNOWN) 1128 if (is_mandatory && type == FEAT_UNKNOWN)
1011 return DCCP_RESET_CODE_MANDATORY_ERROR; 1129 return DCCP_RESET_CODE_MANDATORY_ERROR;
@@ -1115,23 +1233,70 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
1115 return 0; /* ignore FN options in all other states */ 1233 return 0; /* ignore FN options in all other states */
1116} 1234}
1117 1235
1236/**
1237 * dccp_feat_init - Seed feature negotiation with host-specific defaults
1238 * This initialises global defaults, depending on the value of the sysctls.
1239 * These can later be overridden by registering changes via setsockopt calls.
1240 * The last link in the chain is finalise_settings, to make sure that between
1241 * here and the start of actual feature negotiation no inconsistencies enter.
1242 *
1243 * All features not appearing below use either defaults or are otherwise
1244 * later adjusted through dccp_feat_finalise_settings().
1245 */
1118int dccp_feat_init(struct sock *sk) 1246int dccp_feat_init(struct sock *sk)
1119{ 1247{
1120 struct dccp_sock *dp = dccp_sk(sk); 1248 struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
1121 struct dccp_minisock *dmsk = dccp_msk(sk); 1249 u8 on = 1, off = 0;
1122 int rc; 1250 int rc;
1251 struct {
1252 u8 *val;
1253 u8 len;
1254 } tx, rx;
1255
1256 /* Non-negotiable (NN) features */
1257 rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0,
1258 sysctl_dccp_sequence_window);
1259 if (rc)
1260 return rc;
1261
1262 /* Server-priority (SP) features */
1263
1264 /* Advertise that short seqnos are not supported (7.6.1) */
1265 rc = __feat_register_sp(fn, DCCPF_SHORT_SEQNOS, true, true, &off, 1);
1266 if (rc)
1267 return rc;
1123 1268
1124 INIT_LIST_HEAD(&dmsk->dccpms_pending); /* XXX no longer used */ 1269 /* RFC 4340 12.1: "If a DCCP is not ECN capable, ..." */
1125 INIT_LIST_HEAD(&dmsk->dccpms_conf); /* XXX no longer used */ 1270 rc = __feat_register_sp(fn, DCCPF_ECN_INCAPABLE, true, true, &on, 1);
1271 if (rc)
1272 return rc;
1273
1274 /*
1275 * We advertise the available list of CCIDs and reorder according to
1276 * preferences, to avoid failure resulting from negotiating different
1277 * singleton values (which always leads to failure).
1278 * These settings can still (later) be overridden via sockopts.
1279 */
1280 if (ccid_get_builtin_ccids(&tx.val, &tx.len) ||
1281 ccid_get_builtin_ccids(&rx.val, &rx.len))
1282 return -ENOBUFS;
1283
1284 if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) ||
1285 !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len))
1286 goto free_ccid_lists;
1287
1288 rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len);
1289 if (rc)
1290 goto free_ccid_lists;
1291
1292 rc = __feat_register_sp(fn, DCCPF_CCID, false, false, rx.val, rx.len);
1126 1293
1127 /* Ack ratio */ 1294free_ccid_lists:
1128 rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0, 1295 kfree(tx.val);
1129 dp->dccps_l_ack_ratio); 1296 kfree(rx.val);
1130 return rc; 1297 return rc;
1131} 1298}
1132 1299
1133EXPORT_SYMBOL_GPL(dccp_feat_init);
1134
1135int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list) 1300int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list)
1136{ 1301{
1137 struct dccp_sock *dp = dccp_sk(sk); 1302 struct dccp_sock *dp = dccp_sk(sk);
@@ -1156,9 +1321,10 @@ int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list)
1156 goto activation_failed; 1321 goto activation_failed;
1157 } 1322 }
1158 if (cur->state != FEAT_STABLE) { 1323 if (cur->state != FEAT_STABLE) {
1159 DCCP_CRIT("Negotiation of %s %u failed in state %u", 1324 DCCP_CRIT("Negotiation of %s %s failed in state %s",
1160 cur->is_local ? "local" : "remote", 1325 cur->is_local ? "local" : "remote",
1161 cur->feat_num, cur->state); 1326 dccp_feat_fname(cur->feat_num),
1327 dccp_feat_sname[cur->state]);
1162 goto activation_failed; 1328 goto activation_failed;
1163 } 1329 }
1164 fvals[idx][cur->is_local] = &cur->val; 1330 fvals[idx][cur->is_local] = &cur->val;
@@ -1199,43 +1365,3 @@ activation_failed:
1199 dp->dccps_hc_rx_ackvec = NULL; 1365 dp->dccps_hc_rx_ackvec = NULL;
1200 return -1; 1366 return -1;
1201} 1367}
1202
1203#ifdef CONFIG_IP_DCCP_DEBUG
1204const char *dccp_feat_typename(const u8 type)
1205{
1206 switch(type) {
1207 case DCCPO_CHANGE_L: return("ChangeL");
1208 case DCCPO_CONFIRM_L: return("ConfirmL");
1209 case DCCPO_CHANGE_R: return("ChangeR");
1210 case DCCPO_CONFIRM_R: return("ConfirmR");
1211 /* the following case must not appear in feature negotation */
1212 default: dccp_pr_debug("unknown type %d [BUG!]\n", type);
1213 }
1214 return NULL;
1215}
1216
1217const char *dccp_feat_name(const u8 feat)
1218{
1219 static const char *feature_names[] = {
1220 [DCCPF_RESERVED] = "Reserved",
1221 [DCCPF_CCID] = "CCID",
1222 [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos",
1223 [DCCPF_SEQUENCE_WINDOW] = "Sequence Window",
1224 [DCCPF_ECN_INCAPABLE] = "ECN Incapable",
1225 [DCCPF_ACK_RATIO] = "Ack Ratio",
1226 [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector",
1227 [DCCPF_SEND_NDP_COUNT] = "Send NDP Count",
1228 [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage",
1229 [DCCPF_DATA_CHECKSUM] = "Send Data Checksum",
1230 };
1231 if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
1232 return feature_names[DCCPF_RESERVED];
1233
1234 if (feat == DCCPF_SEND_LEV_RATE)
1235 return "Send Loss Event Rate";
1236 if (feat >= DCCPF_MIN_CCID_SPECIFIC)
1237 return "CCID-specific";
1238
1239 return feature_names[feat];
1240}
1241#endif /* CONFIG_IP_DCCP_DEBUG */
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 9b46e2a7866e..f96721619def 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -100,26 +100,21 @@ struct ccid_dependency {
100 u8 val; 100 u8 val;
101}; 101};
102 102
103#ifdef CONFIG_IP_DCCP_DEBUG 103/*
104extern const char *dccp_feat_typename(const u8 type); 104 * Sysctls to seed defaults for feature negotiation
105extern const char *dccp_feat_name(const u8 feat); 105 */
106 106extern unsigned long sysctl_dccp_sequence_window;
107static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) 107extern int sysctl_dccp_rx_ccid;
108{ 108extern int sysctl_dccp_tx_ccid;
109 dccp_pr_debug("%s(%s (%d), %d)\n", dccp_feat_typename(type),
110 dccp_feat_name(feat), feat, val);
111}
112#else
113#define dccp_feat_debug(type, feat, val)
114#endif /* CONFIG_IP_DCCP_DEBUG */
115 109
110extern int dccp_feat_init(struct sock *sk);
111extern void dccp_feat_initialise_sysctls(void);
116extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, 112extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
117 u8 const *list, u8 len); 113 u8 const *list, u8 len);
118extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val); 114extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
119extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, 115extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
120 u8 mand, u8 opt, u8 feat, u8 *val, u8 len); 116 u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
121extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); 117extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
122extern int dccp_feat_init(struct sock *sk);
123 118
124/* 119/*
125 * Encoding variable-length options and their maximum length. 120 * Encoding variable-length options and their maximum length.
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 6821ae33dd37..5ca49cec95f5 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -42,11 +42,6 @@ struct inet_timewait_death_row dccp_death_row = {
42 42
43EXPORT_SYMBOL_GPL(dccp_death_row); 43EXPORT_SYMBOL_GPL(dccp_death_row);
44 44
45void dccp_minisock_init(struct dccp_minisock *dmsk)
46{
47 dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
48}
49
50void dccp_time_wait(struct sock *sk, int state, int timeo) 45void dccp_time_wait(struct sock *sk, int state, int timeo)
51{ 46{
52 struct inet_timewait_sock *tw = NULL; 47 struct inet_timewait_sock *tw = NULL;
@@ -110,7 +105,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
110 struct dccp_request_sock *dreq = dccp_rsk(req); 105 struct dccp_request_sock *dreq = dccp_rsk(req);
111 struct inet_connection_sock *newicsk = inet_csk(newsk); 106 struct inet_connection_sock *newicsk = inet_csk(newsk);
112 struct dccp_sock *newdp = dccp_sk(newsk); 107 struct dccp_sock *newdp = dccp_sk(newsk);
113 struct dccp_minisock *newdmsk = dccp_msk(newsk);
114 108
115 newdp->dccps_role = DCCP_ROLE_SERVER; 109 newdp->dccps_role = DCCP_ROLE_SERVER;
116 newdp->dccps_hc_rx_ackvec = NULL; 110 newdp->dccps_hc_rx_ackvec = NULL;
@@ -128,10 +122,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
128 * Initialize S.GAR := S.ISS 122 * Initialize S.GAR := S.ISS
129 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies 123 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
130 */ 124 */
131
132 /* See dccp_v4_conn_request */
133 newdmsk->dccpms_sequence_window = req->rcv_wnd;
134
135 newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss; 125 newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss;
136 dccp_update_gss(newsk, dreq->dreq_iss); 126 dccp_update_gss(newsk, dreq->dreq_iss);
137 127
@@ -290,7 +280,6 @@ int dccp_reqsk_init(struct request_sock *req,
290 inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; 280 inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
291 inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport; 281 inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport;
292 inet_rsk(req)->acked = 0; 282 inet_rsk(req)->acked = 0;
293 req->rcv_wnd = sysctl_dccp_feat_sequence_window;
294 dreq->dreq_timestamp_echo = 0; 283 dreq->dreq_timestamp_echo = 0;
295 284
296 /* inherit feature negotiation options from listening socket */ 285 /* inherit feature negotiation options from listening socket */
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 7b1165c21f51..1b08cae9c65b 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -23,10 +23,6 @@
23#include "dccp.h" 23#include "dccp.h"
24#include "feat.h" 24#include "feat.h"
25 25
26int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
27int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID;
28int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID;
29
30u64 dccp_decode_value_var(const u8 *bf, const u8 len) 26u64 dccp_decode_value_var(const u8 *bf, const u8 len)
31{ 27{
32 u64 value = 0; 28 u64 value = 0;
@@ -502,10 +498,6 @@ int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
502 *to++ = *val; 498 *to++ = *val;
503 if (len) 499 if (len)
504 memcpy(to, val, len); 500 memcpy(to, val, len);
505
506 dccp_pr_debug("%s(%s (%d), ...), length %d\n",
507 dccp_feat_typename(type),
508 dccp_feat_name(feat), feat, len);
509 return 0; 501 return 0;
510} 502}
511 503
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 22a618af4893..36bcc00654d3 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -161,21 +161,27 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
161 struct inet_connection_sock *icsk = inet_csk(sk); 161 struct inet_connection_sock *icsk = inet_csk(sk);
162 struct dccp_sock *dp = dccp_sk(sk); 162 struct dccp_sock *dp = dccp_sk(sk);
163 u32 ccmps = dccp_determine_ccmps(dp); 163 u32 ccmps = dccp_determine_ccmps(dp);
164 int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; 164 u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
165 165
166 /* Account for header lengths and IPv4/v6 option overhead */ 166 /* Account for header lengths and IPv4/v6 option overhead */
167 cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + 167 cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len +
168 sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); 168 sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext));
169 169
170 /* 170 /*
171 * FIXME: this should come from the CCID infrastructure, where, say, 171 * Leave enough headroom for common DCCP header options.
172 * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets 172 * This only considers options which may appear on DCCP-Data packets, as
173 * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED 173 * per table 3 in RFC 4340, 5.8. When running out of space for other
174 * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to 174 * options (eg. Ack Vector which can take up to 255 bytes), it is better
175 * make it a multiple of 4 175 * to schedule a separate Ack. Thus we leave headroom for the following:
176 * - 1 byte for Slow Receiver (11.6)
177 * - 6 bytes for Timestamp (13.1)
178 * - 10 bytes for Timestamp Echo (13.3)
179 * - 8 bytes for NDP count (7.7, when activated)
180 * - 6 bytes for Data Checksum (9.3)
181 * - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled)
176 */ 182 */
177 183 cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 +
178 cur_mps -= roundup(5 + 6 + 10 + 6 + 6 + 6, 4); 184 (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4);
179 185
180 /* And store cached results */ 186 /* And store cached results */
181 icsk->icsk_pmtu_cookie = pmtu; 187 icsk->icsk_pmtu_cookie = pmtu;
@@ -270,7 +276,20 @@ void dccp_write_xmit(struct sock *sk, int block)
270 const int len = skb->len; 276 const int len = skb->len;
271 277
272 if (sk->sk_state == DCCP_PARTOPEN) { 278 if (sk->sk_state == DCCP_PARTOPEN) {
273 /* See 8.1.5. Handshake Completion */ 279 const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
280 /*
281 * See 8.1.5 - Handshake Completion.
282 *
283 * For robustness we resend Confirm options until the client has
284 * entered OPEN. During the initial feature negotiation, the MPS
285 * is smaller than usual, reduced by the Change/Confirm options.
286 */
287 if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
288 DCCP_WARN("Payload too large (%d) for featneg.\n", len);
289 dccp_send_ack(sk);
290 dccp_feat_list_purge(&dp->dccps_featneg);
291 }
292
274 inet_csk_schedule_ack(sk); 293 inet_csk_schedule_ack(sk);
275 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 294 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
276 inet_csk(sk)->icsk_rto, 295 inet_csk(sk)->icsk_rto,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 945b4d5d23b3..314a1b5c033c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -174,8 +174,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
174 struct dccp_sock *dp = dccp_sk(sk); 174 struct dccp_sock *dp = dccp_sk(sk);
175 struct inet_connection_sock *icsk = inet_csk(sk); 175 struct inet_connection_sock *icsk = inet_csk(sk);
176 176
177 dccp_minisock_init(&dp->dccps_minisock);
178
179 icsk->icsk_rto = DCCP_TIMEOUT_INIT; 177 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
180 icsk->icsk_syn_retries = sysctl_dccp_request_retries; 178 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
181 sk->sk_state = DCCP_CLOSED; 179 sk->sk_state = DCCP_CLOSED;
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 018e210875e1..a5a1856234e7 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -18,55 +18,72 @@
18#error This file should not be compiled without CONFIG_SYSCTL defined 18#error This file should not be compiled without CONFIG_SYSCTL defined
19#endif 19#endif
20 20
21/* Boundary values */
22static int zero = 0,
23 u8_max = 0xFF;
24static unsigned long seqw_min = 32;
25
21static struct ctl_table dccp_default_table[] = { 26static struct ctl_table dccp_default_table[] = {
22 { 27 {
23 .procname = "seq_window", 28 .procname = "seq_window",
24 .data = &sysctl_dccp_feat_sequence_window, 29 .data = &sysctl_dccp_sequence_window,
25 .maxlen = sizeof(sysctl_dccp_feat_sequence_window), 30 .maxlen = sizeof(sysctl_dccp_sequence_window),
26 .mode = 0644, 31 .mode = 0644,
27 .proc_handler = proc_dointvec, 32 .proc_handler = proc_doulongvec_minmax,
33 .extra1 = &seqw_min, /* RFC 4340, 7.5.2 */
28 }, 34 },
29 { 35 {
30 .procname = "rx_ccid", 36 .procname = "rx_ccid",
31 .data = &sysctl_dccp_feat_rx_ccid, 37 .data = &sysctl_dccp_rx_ccid,
32 .maxlen = sizeof(sysctl_dccp_feat_rx_ccid), 38 .maxlen = sizeof(sysctl_dccp_rx_ccid),
33 .mode = 0644, 39 .mode = 0644,
34 .proc_handler = proc_dointvec, 40 .proc_handler = proc_dointvec_minmax,
41 .extra1 = &zero,
42 .extra2 = &u8_max, /* RFC 4340, 10. */
35 }, 43 },
36 { 44 {
37 .procname = "tx_ccid", 45 .procname = "tx_ccid",
38 .data = &sysctl_dccp_feat_tx_ccid, 46 .data = &sysctl_dccp_tx_ccid,
39 .maxlen = sizeof(sysctl_dccp_feat_tx_ccid), 47 .maxlen = sizeof(sysctl_dccp_tx_ccid),
40 .mode = 0644, 48 .mode = 0644,
41 .proc_handler = proc_dointvec, 49 .proc_handler = proc_dointvec_minmax,
50 .extra1 = &zero,
51 .extra2 = &u8_max, /* RFC 4340, 10. */
42 }, 52 },
43 { 53 {
44 .procname = "request_retries", 54 .procname = "request_retries",
45 .data = &sysctl_dccp_request_retries, 55 .data = &sysctl_dccp_request_retries,
46 .maxlen = sizeof(sysctl_dccp_request_retries), 56 .maxlen = sizeof(sysctl_dccp_request_retries),
47 .mode = 0644, 57 .mode = 0644,
48 .proc_handler = proc_dointvec, 58 .proc_handler = proc_dointvec_minmax,
59 .extra1 = &zero,
60 .extra2 = &u8_max,
49 }, 61 },
50 { 62 {
51 .procname = "retries1", 63 .procname = "retries1",
52 .data = &sysctl_dccp_retries1, 64 .data = &sysctl_dccp_retries1,
53 .maxlen = sizeof(sysctl_dccp_retries1), 65 .maxlen = sizeof(sysctl_dccp_retries1),
54 .mode = 0644, 66 .mode = 0644,
55 .proc_handler = proc_dointvec, 67 .proc_handler = proc_dointvec_minmax,
68 .extra1 = &zero,
69 .extra2 = &u8_max,
56 }, 70 },
57 { 71 {
58 .procname = "retries2", 72 .procname = "retries2",
59 .data = &sysctl_dccp_retries2, 73 .data = &sysctl_dccp_retries2,
60 .maxlen = sizeof(sysctl_dccp_retries2), 74 .maxlen = sizeof(sysctl_dccp_retries2),
61 .mode = 0644, 75 .mode = 0644,
62 .proc_handler = proc_dointvec, 76 .proc_handler = proc_dointvec_minmax,
77 .extra1 = &zero,
78 .extra2 = &u8_max,
63 }, 79 },
64 { 80 {
65 .procname = "tx_qlen", 81 .procname = "tx_qlen",
66 .data = &sysctl_dccp_tx_qlen, 82 .data = &sysctl_dccp_tx_qlen,
67 .maxlen = sizeof(sysctl_dccp_tx_qlen), 83 .maxlen = sizeof(sysctl_dccp_tx_qlen),
68 .mode = 0644, 84 .mode = 0644,
69 .proc_handler = proc_dointvec, 85 .proc_handler = proc_dointvec_minmax,
86 .extra1 = &zero,
70 }, 87 },
71 { 88 {
72 .procname = "sync_ratelimit", 89 .procname = "sync_ratelimit",
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index cf0e18499297..ec233b64f853 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1246,11 +1246,12 @@ static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1246 1246
1247 case TIOCINQ: 1247 case TIOCINQ:
1248 lock_sock(sk); 1248 lock_sock(sk);
1249 if ((skb = skb_peek(&scp->other_receive_queue)) != NULL) { 1249 skb = skb_peek(&scp->other_receive_queue);
1250 if (skb) {
1250 amount = skb->len; 1251 amount = skb->len;
1251 } else { 1252 } else {
1252 struct sk_buff *skb = sk->sk_receive_queue.next; 1253 skb = sk->sk_receive_queue.next;
1253 for(;;) { 1254 for (;;) {
1254 if (skb == 1255 if (skb ==
1255 (struct sk_buff *)&sk->sk_receive_queue) 1256 (struct sk_buff *)&sk->sk_receive_queue)
1256 break; 1257 break;
@@ -1579,16 +1580,16 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
1579 default: 1580 default:
1580#ifdef CONFIG_NETFILTER 1581#ifdef CONFIG_NETFILTER
1581 { 1582 {
1582 int val, len; 1583 int ret, len;
1583 1584
1584 if(get_user(len, optlen)) 1585 if(get_user(len, optlen))
1585 return -EFAULT; 1586 return -EFAULT;
1586 1587
1587 val = nf_getsockopt(sk, PF_DECnet, optname, 1588 ret = nf_getsockopt(sk, PF_DECnet, optname,
1588 optval, &len); 1589 optval, &len);
1589 if (val >= 0) 1590 if (ret >= 0)
1590 val = put_user(len, optlen); 1591 ret = put_user(len, optlen);
1591 return val; 1592 return ret;
1592 } 1593 }
1593#endif 1594#endif
1594 case DSO_STREAM: 1595 case DSO_STREAM:
@@ -2071,8 +2072,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
2071 } 2072 }
2072out: 2073out:
2073 2074
2074 if (skb) 2075 kfree_skb(skb);
2075 kfree_skb(skb);
2076 2076
2077 release_sock(sk); 2077 release_sock(sk);
2078 2078
@@ -2113,7 +2113,7 @@ static struct notifier_block dn_dev_notifier = {
2113extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); 2113extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
2114 2114
2115static struct packet_type dn_dix_packet_type = { 2115static struct packet_type dn_dix_packet_type = {
2116 .type = __constant_htons(ETH_P_DNA_RT), 2116 .type = cpu_to_be16(ETH_P_DNA_RT),
2117 .dev = NULL, /* All devices */ 2117 .dev = NULL, /* All devices */
2118 .func = dn_route_rcv, 2118 .func = dn_route_rcv,
2119}; 2119};
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index daf2b98b15fe..1c6a5bb6f0c8 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -684,7 +684,6 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
684 return -ENODEV; 684 return -ENODEV;
685 685
686 if ((dn_db = dev->dn_ptr) == NULL) { 686 if ((dn_db = dev->dn_ptr) == NULL) {
687 int err;
688 dn_db = dn_dev_create(dev, &err); 687 dn_db = dn_dev_create(dev, &err);
689 if (!dn_db) 688 if (!dn_db)
690 return err; 689 return err;
@@ -769,7 +768,8 @@ static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa)
769 kfree_skb(skb); 768 kfree_skb(skb);
770 goto errout; 769 goto errout;
771 } 770 }
772 err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); 771 rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
772 return;
773errout: 773errout:
774 if (err < 0) 774 if (err < 0)
775 rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err); 775 rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err);
@@ -1322,6 +1322,7 @@ static inline int is_dn_dev(struct net_device *dev)
1322} 1322}
1323 1323
1324static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) 1324static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
1325 __acquires(&dev_base_lock)
1325{ 1326{
1326 int i; 1327 int i;
1327 struct net_device *dev; 1328 struct net_device *dev;
@@ -1364,6 +1365,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1364} 1365}
1365 1366
1366static void dn_dev_seq_stop(struct seq_file *seq, void *v) 1367static void dn_dev_seq_stop(struct seq_file *seq, void *v)
1368 __releases(&dev_base_lock)
1367{ 1369{
1368 read_unlock(&dev_base_lock); 1370 read_unlock(&dev_base_lock);
1369} 1371}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index c754670b7fca..5130dee0b384 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -124,7 +124,7 @@ int decnet_dst_gc_interval = 2;
124 124
125static struct dst_ops dn_dst_ops = { 125static struct dst_ops dn_dst_ops = {
126 .family = PF_DECnet, 126 .family = PF_DECnet,
127 .protocol = __constant_htons(ETH_P_DNA_RT), 127 .protocol = cpu_to_be16(ETH_P_DNA_RT),
128 .gc_thresh = 128, 128 .gc_thresh = 128,
129 .gc = dn_dst_gc, 129 .gc = dn_dst_gc,
130 .check = dn_dst_check, 130 .check = dn_dst_check,
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 69ad9280c693..67054b0d550f 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -375,7 +375,8 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
375 kfree_skb(skb); 375 kfree_skb(skb);
376 goto errout; 376 goto errout;
377 } 377 }
378 err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); 378 rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
379 return;
379errout: 380errout:
380 if (err < 0) 381 if (err < 0)
381 rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err); 382 rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 965397af9a80..5bcd592ae6dd 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -179,7 +179,7 @@ static int dn_node_address_handler(ctl_table *table, int write,
179 } 179 }
180 180
181 if (write) { 181 if (write) {
182 int len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1); 182 len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1);
183 183
184 if (copy_from_user(addr, buffer, len)) 184 if (copy_from_user(addr, buffer, len))
185 return -EFAULT; 185 return -EFAULT;
diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c
index ec8c6a0482d3..100318722214 100644
--- a/net/dsa/mv88e6123_61_65.c
+++ b/net/dsa/mv88e6123_61_65.c
@@ -394,7 +394,7 @@ static int mv88e6123_61_65_get_sset_count(struct dsa_switch *ds)
394} 394}
395 395
396static struct dsa_switch_driver mv88e6123_61_65_switch_driver = { 396static struct dsa_switch_driver mv88e6123_61_65_switch_driver = {
397 .tag_protocol = __constant_htons(ETH_P_EDSA), 397 .tag_protocol = cpu_to_be16(ETH_P_EDSA),
398 .priv_size = sizeof(struct mv88e6xxx_priv_state), 398 .priv_size = sizeof(struct mv88e6xxx_priv_state),
399 .probe = mv88e6123_61_65_probe, 399 .probe = mv88e6123_61_65_probe,
400 .setup = mv88e6123_61_65_setup, 400 .setup = mv88e6123_61_65_setup,
diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c
index 374d46a01265..70fae2444cb6 100644
--- a/net/dsa/mv88e6131.c
+++ b/net/dsa/mv88e6131.c
@@ -353,7 +353,7 @@ static int mv88e6131_get_sset_count(struct dsa_switch *ds)
353} 353}
354 354
355static struct dsa_switch_driver mv88e6131_switch_driver = { 355static struct dsa_switch_driver mv88e6131_switch_driver = {
356 .tag_protocol = __constant_htons(ETH_P_DSA), 356 .tag_protocol = cpu_to_be16(ETH_P_DSA),
357 .priv_size = sizeof(struct mv88e6xxx_priv_state), 357 .priv_size = sizeof(struct mv88e6xxx_priv_state),
358 .probe = mv88e6131_probe, 358 .probe = mv88e6131_probe,
359 .setup = mv88e6131_setup, 359 .setup = mv88e6131_setup,
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index f99a019b939e..63e532a69fdb 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -176,7 +176,7 @@ out:
176} 176}
177 177
178static struct packet_type dsa_packet_type = { 178static struct packet_type dsa_packet_type = {
179 .type = __constant_htons(ETH_P_DSA), 179 .type = cpu_to_be16(ETH_P_DSA),
180 .func = dsa_rcv, 180 .func = dsa_rcv,
181}; 181};
182 182
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 328ec957f786..6197f9a7ef42 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -195,7 +195,7 @@ out:
195} 195}
196 196
197static struct packet_type edsa_packet_type = { 197static struct packet_type edsa_packet_type = {
198 .type = __constant_htons(ETH_P_EDSA), 198 .type = cpu_to_be16(ETH_P_EDSA),
199 .func = edsa_rcv, 199 .func = edsa_rcv,
200}; 200};
201 201
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index b59132878ad1..d7e7f424ff0c 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -112,7 +112,7 @@ out:
112} 112}
113 113
114static struct packet_type trailer_packet_type = { 114static struct packet_type trailer_packet_type = {
115 .type = __constant_htons(ETH_P_TRAILER), 115 .type = cpu_to_be16(ETH_P_TRAILER),
116 .func = trailer_rcv, 116 .func = trailer_rcv,
117}; 117};
118 118
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 8789d2bb1b06..7bf35582f656 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -1103,7 +1103,7 @@ drop:
1103} 1103}
1104 1104
1105static struct packet_type econet_packet_type = { 1105static struct packet_type econet_packet_type = {
1106 .type = __constant_htons(ETH_P_ECONET), 1106 .type = cpu_to_be16(ETH_P_ECONET),
1107 .func = econet_rcv, 1107 .func = econet_rcv,
1108}; 1108};
1109 1109
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 691268f3a359..b2cf91e4ccaa 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -35,7 +35,7 @@ config IP_ADVANCED_ROUTER
35 35
36 at boot time after the /proc file system has been mounted. 36 at boot time after the /proc file system has been mounted.
37 37
38 If you turn on IP forwarding, you will also get the rp_filter, which 38 If you turn on IP forwarding, you should consider the rp_filter, which
39 automatically rejects incoming packets if the routing table entry 39 automatically rejects incoming packets if the routing table entry
40 for their source address doesn't match the network interface they're 40 for their source address doesn't match the network interface they're
41 arriving on. This has security advantages because it prevents the 41 arriving on. This has security advantages because it prevents the
@@ -46,12 +46,16 @@ config IP_ADVANCED_ROUTER
46 rp_filter on use: 46 rp_filter on use:
47 47
48 echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter 48 echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter
49 or 49 and
50 echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter 50 echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter
51 51
52 Note that some distributions enable it in startup scripts.
53 For details about rp_filter strict and loose mode read
54 <file:Documentation/networking/ip-sysctl.txt>.
55
52 If unsure, say N here. 56 If unsure, say N here.
53 57
54choice 58choice
55 prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" 59 prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)"
56 depends on IP_ADVANCED_ROUTER 60 depends on IP_ADVANCED_ROUTER
57 default ASK_IP_FIB_HASH 61 default ASK_IP_FIB_HASH
@@ -59,27 +63,29 @@ choice
59config ASK_IP_FIB_HASH 63config ASK_IP_FIB_HASH
60 bool "FIB_HASH" 64 bool "FIB_HASH"
61 ---help--- 65 ---help---
62 Current FIB is very proven and good enough for most users. 66 Current FIB is very proven and good enough for most users.
63 67
64config IP_FIB_TRIE 68config IP_FIB_TRIE
65 bool "FIB_TRIE" 69 bool "FIB_TRIE"
66 ---help--- 70 ---help---
67 Use new experimental LC-trie as FIB lookup algorithm. 71 Use new experimental LC-trie as FIB lookup algorithm.
68 This improves lookup performance if you have a large 72 This improves lookup performance if you have a large
69 number of routes. 73 number of routes.
70 74
71 LC-trie is a longest matching prefix lookup algorithm which 75 LC-trie is a longest matching prefix lookup algorithm which
72 performs better than FIB_HASH for large routing tables. 76 performs better than FIB_HASH for large routing tables.
73 But, it consumes more memory and is more complex. 77 But, it consumes more memory and is more complex.
74 78
75 LC-trie is described in: 79 LC-trie is described in:
76 80
77 IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 81 IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
78 IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 82 IEEE Journal on Selected Areas in Communications, 17(6):1083-1092,
79 An experimental study of compression methods for dynamic tries 83 June 1999
80 Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 84
81 http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ 85 An experimental study of compression methods for dynamic tries
82 86 Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
87 http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
88
83endchoice 89endchoice
84 90
85config IP_FIB_HASH 91config IP_FIB_HASH
@@ -191,7 +197,7 @@ config IP_PNP_RARP
191 <file:Documentation/filesystems/nfsroot.txt> for details. 197 <file:Documentation/filesystems/nfsroot.txt> for details.
192 198
193# not yet ready.. 199# not yet ready..
194# bool ' IP: ARP support' CONFIG_IP_PNP_ARP 200# bool ' IP: ARP support' CONFIG_IP_PNP_ARP
195config NET_IPIP 201config NET_IPIP
196 tristate "IP: tunneling" 202 tristate "IP: tunneling"
197 select INET_TUNNEL 203 select INET_TUNNEL
@@ -361,7 +367,7 @@ config INET_IPCOMP
361 ---help--- 367 ---help---
362 Support for IP Payload Compression Protocol (IPComp) (RFC3173), 368 Support for IP Payload Compression Protocol (IPComp) (RFC3173),
363 typically needed for IPsec. 369 typically needed for IPsec.
364 370
365 If unsure, say Y. 371 If unsure, say Y.
366 372
367config INET_XFRM_TUNNEL 373config INET_XFRM_TUNNEL
@@ -415,7 +421,7 @@ config INET_DIAG
415 Support for INET (TCP, DCCP, etc) socket monitoring interface used by 421 Support for INET (TCP, DCCP, etc) socket monitoring interface used by
416 native Linux tools such as ss. ss is included in iproute2, currently 422 native Linux tools such as ss. ss is included in iproute2, currently
417 downloadable at <http://linux-net.osdl.org/index.php/Iproute2>. 423 downloadable at <http://linux-net.osdl.org/index.php/Iproute2>.
418 424
419 If unsure, say Y. 425 If unsure, say Y.
420 426
421config INET_TCP_DIAG 427config INET_TCP_DIAG
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 743f5542d65a..627be4dc7fb0 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -369,7 +369,6 @@ lookup_protocol:
369 sock_init_data(sock, sk); 369 sock_init_data(sock, sk);
370 370
371 sk->sk_destruct = inet_sock_destruct; 371 sk->sk_destruct = inet_sock_destruct;
372 sk->sk_family = PF_INET;
373 sk->sk_protocol = protocol; 372 sk->sk_protocol = protocol;
374 sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; 373 sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
375 374
@@ -1253,10 +1252,10 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1253 int proto; 1252 int proto;
1254 int id; 1253 int id;
1255 1254
1256 if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) 1255 iph = skb_gro_header(skb, sizeof(*iph));
1256 if (unlikely(!iph))
1257 goto out; 1257 goto out;
1258 1258
1259 iph = ip_hdr(skb);
1260 proto = iph->protocol & (MAX_INET_PROTOS - 1); 1259 proto = iph->protocol & (MAX_INET_PROTOS - 1);
1261 1260
1262 rcu_read_lock(); 1261 rcu_read_lock();
@@ -1264,13 +1263,13 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1264 if (!ops || !ops->gro_receive) 1263 if (!ops || !ops->gro_receive)
1265 goto out_unlock; 1264 goto out_unlock;
1266 1265
1267 if (iph->version != 4 || iph->ihl != 5) 1266 if (*(u8 *)iph != 0x45)
1268 goto out_unlock; 1267 goto out_unlock;
1269 1268
1270 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 1269 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
1271 goto out_unlock; 1270 goto out_unlock;
1272 1271
1273 flush = ntohs(iph->tot_len) != skb->len || 1272 flush = ntohs(iph->tot_len) != skb_gro_len(skb) ||
1274 iph->frag_off != htons(IP_DF); 1273 iph->frag_off != htons(IP_DF);
1275 id = ntohs(iph->id); 1274 id = ntohs(iph->id);
1276 1275
@@ -1282,24 +1281,25 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1282 1281
1283 iph2 = ip_hdr(p); 1282 iph2 = ip_hdr(p);
1284 1283
1285 if (iph->protocol != iph2->protocol || 1284 if ((iph->protocol ^ iph2->protocol) |
1286 iph->tos != iph2->tos || 1285 (iph->tos ^ iph2->tos) |
1287 memcmp(&iph->saddr, &iph2->saddr, 8)) { 1286 (iph->saddr ^ iph2->saddr) |
1287 (iph->daddr ^ iph2->daddr)) {
1288 NAPI_GRO_CB(p)->same_flow = 0; 1288 NAPI_GRO_CB(p)->same_flow = 0;
1289 continue; 1289 continue;
1290 } 1290 }
1291 1291
1292 /* All fields must match except length and checksum. */ 1292 /* All fields must match except length and checksum. */
1293 NAPI_GRO_CB(p)->flush |= 1293 NAPI_GRO_CB(p)->flush |=
1294 memcmp(&iph->frag_off, &iph2->frag_off, 4) || 1294 (iph->ttl ^ iph2->ttl) |
1295 (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id; 1295 ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
1296 1296
1297 NAPI_GRO_CB(p)->flush |= flush; 1297 NAPI_GRO_CB(p)->flush |= flush;
1298 } 1298 }
1299 1299
1300 NAPI_GRO_CB(skb)->flush |= flush; 1300 NAPI_GRO_CB(skb)->flush |= flush;
1301 __skb_pull(skb, sizeof(*iph)); 1301 skb_gro_pull(skb, sizeof(*iph));
1302 skb_reset_transport_header(skb); 1302 skb_set_transport_header(skb, skb_gro_offset(skb));
1303 1303
1304 pp = ops->gro_receive(head, skb); 1304 pp = ops->gro_receive(head, skb);
1305 1305
@@ -1501,7 +1501,7 @@ static int ipv4_proc_init(void);
1501 */ 1501 */
1502 1502
1503static struct packet_type ip_packet_type = { 1503static struct packet_type ip_packet_type = {
1504 .type = __constant_htons(ETH_P_IP), 1504 .type = cpu_to_be16(ETH_P_IP),
1505 .func = ip_rcv, 1505 .func = ip_rcv,
1506 .gso_send_check = inet_gso_send_check, 1506 .gso_send_check = inet_gso_send_check,
1507 .gso_segment = inet_gso_segment, 1507 .gso_segment = inet_gso_segment,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 29a74c01d8de..3f6b7354699b 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -1226,7 +1226,7 @@ void arp_ifdown(struct net_device *dev)
1226 */ 1226 */
1227 1227
1228static struct packet_type arp_packet_type = { 1228static struct packet_type arp_packet_type = {
1229 .type = __constant_htons(ETH_P_ARP), 1229 .type = cpu_to_be16(ETH_P_ARP),
1230 .func = arp_rcv, 1230 .func = arp_rcv,
1231}; 1231};
1232 1232
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 309997edc8a5..126bb911880f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1075,6 +1075,14 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1075 } 1075 }
1076 } 1076 }
1077 ip_mc_up(in_dev); 1077 ip_mc_up(in_dev);
1078 /* fall through */
1079 case NETDEV_CHANGEADDR:
1080 if (IN_DEV_ARP_NOTIFY(in_dev))
1081 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1082 in_dev->ifa_list->ifa_address,
1083 dev,
1084 in_dev->ifa_list->ifa_address,
1085 NULL, dev->dev_addr, NULL);
1078 break; 1086 break;
1079 case NETDEV_DOWN: 1087 case NETDEV_DOWN:
1080 ip_mc_down(in_dev); 1088 ip_mc_down(in_dev);
@@ -1208,7 +1216,8 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1208 kfree_skb(skb); 1216 kfree_skb(skb);
1209 goto errout; 1217 goto errout;
1210 } 1218 }
1211 err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1219 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1220 return;
1212errout: 1221errout:
1213 if (err < 0) 1222 if (err < 0)
1214 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); 1223 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
@@ -1439,6 +1448,7 @@ static struct devinet_sysctl_table {
1439 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), 1448 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1440 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), 1449 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1441 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 1450 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1451 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1442 1452
1443 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 1453 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1444 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 1454 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 741e4fa3e474..cafcc49d0993 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -275,7 +275,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
275 fib_res_put(&res); 275 fib_res_put(&res);
276 if (no_addr) 276 if (no_addr)
277 goto last_resort; 277 goto last_resort;
278 if (rpf) 278 if (rpf == 1)
279 goto e_inval; 279 goto e_inval;
280 fl.oif = dev->ifindex; 280 fl.oif = dev->ifindex;
281 281
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 4817dea3bc73..f831df500907 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -322,8 +322,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
322 kfree_skb(skb); 322 kfree_skb(skb);
323 goto errout; 323 goto errout;
324 } 324 }
325 err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, 325 rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
326 info->nlh, GFP_KERNEL); 326 info->nlh, GFP_KERNEL);
327 return;
327errout: 328errout:
328 if (err < 0) 329 if (err < 0)
329 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 330 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fc562d29cc46..3f50807237e0 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -375,6 +375,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
375 inet->tos = ip_hdr(skb)->tos; 375 inet->tos = ip_hdr(skb)->tos;
376 daddr = ipc.addr = rt->rt_src; 376 daddr = ipc.addr = rt->rt_src;
377 ipc.opt = NULL; 377 ipc.opt = NULL;
378 ipc.shtx.flags = 0;
378 if (icmp_param->replyopts.optlen) { 379 if (icmp_param->replyopts.optlen) {
379 ipc.opt = &icmp_param->replyopts; 380 ipc.opt = &icmp_param->replyopts;
380 if (ipc.opt->srr) 381 if (ipc.opt->srr)
@@ -532,6 +533,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
532 inet_sk(sk)->tos = tos; 533 inet_sk(sk)->tos = tos;
533 ipc.addr = iph->saddr; 534 ipc.addr = iph->saddr;
534 ipc.opt = &icmp_param.replyopts; 535 ipc.opt = &icmp_param.replyopts;
536 ipc.shtx.flags = 0;
535 537
536 { 538 {
537 struct flowi fl = { 539 struct flowi fl = {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f26ab38680de..22cd19ee44e5 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -93,24 +93,40 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
93 struct inet_bind_hashbucket *head; 93 struct inet_bind_hashbucket *head;
94 struct hlist_node *node; 94 struct hlist_node *node;
95 struct inet_bind_bucket *tb; 95 struct inet_bind_bucket *tb;
96 int ret; 96 int ret, attempts = 5;
97 struct net *net = sock_net(sk); 97 struct net *net = sock_net(sk);
98 int smallest_size = -1, smallest_rover;
98 99
99 local_bh_disable(); 100 local_bh_disable();
100 if (!snum) { 101 if (!snum) {
101 int remaining, rover, low, high; 102 int remaining, rover, low, high;
102 103
104again:
103 inet_get_local_port_range(&low, &high); 105 inet_get_local_port_range(&low, &high);
104 remaining = (high - low) + 1; 106 remaining = (high - low) + 1;
105 rover = net_random() % remaining + low; 107 smallest_rover = rover = net_random() % remaining + low;
106 108
109 smallest_size = -1;
107 do { 110 do {
108 head = &hashinfo->bhash[inet_bhashfn(net, rover, 111 head = &hashinfo->bhash[inet_bhashfn(net, rover,
109 hashinfo->bhash_size)]; 112 hashinfo->bhash_size)];
110 spin_lock(&head->lock); 113 spin_lock(&head->lock);
111 inet_bind_bucket_for_each(tb, node, &head->chain) 114 inet_bind_bucket_for_each(tb, node, &head->chain)
112 if (ib_net(tb) == net && tb->port == rover) 115 if (ib_net(tb) == net && tb->port == rover) {
116 if (tb->fastreuse > 0 &&
117 sk->sk_reuse &&
118 sk->sk_state != TCP_LISTEN &&
119 (tb->num_owners < smallest_size || smallest_size == -1)) {
120 smallest_size = tb->num_owners;
121 smallest_rover = rover;
122 if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
123 spin_unlock(&head->lock);
124 snum = smallest_rover;
125 goto have_snum;
126 }
127 }
113 goto next; 128 goto next;
129 }
114 break; 130 break;
115 next: 131 next:
116 spin_unlock(&head->lock); 132 spin_unlock(&head->lock);
@@ -125,14 +141,19 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
125 * the top level, not from the 'break;' statement. 141 * the top level, not from the 'break;' statement.
126 */ 142 */
127 ret = 1; 143 ret = 1;
128 if (remaining <= 0) 144 if (remaining <= 0) {
145 if (smallest_size != -1) {
146 snum = smallest_rover;
147 goto have_snum;
148 }
129 goto fail; 149 goto fail;
130 150 }
131 /* OK, here is the one we will use. HEAD is 151 /* OK, here is the one we will use. HEAD is
132 * non-NULL and we hold it's mutex. 152 * non-NULL and we hold it's mutex.
133 */ 153 */
134 snum = rover; 154 snum = rover;
135 } else { 155 } else {
156have_snum:
136 head = &hashinfo->bhash[inet_bhashfn(net, snum, 157 head = &hashinfo->bhash[inet_bhashfn(net, snum,
137 hashinfo->bhash_size)]; 158 hashinfo->bhash_size)];
138 spin_lock(&head->lock); 159 spin_lock(&head->lock);
@@ -145,12 +166,19 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
145tb_found: 166tb_found:
146 if (!hlist_empty(&tb->owners)) { 167 if (!hlist_empty(&tb->owners)) {
147 if (tb->fastreuse > 0 && 168 if (tb->fastreuse > 0 &&
148 sk->sk_reuse && sk->sk_state != TCP_LISTEN) { 169 sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
170 smallest_size == -1) {
149 goto success; 171 goto success;
150 } else { 172 } else {
151 ret = 1; 173 ret = 1;
152 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) 174 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
175 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
176 smallest_size != -1 && --attempts >= 0) {
177 spin_unlock(&head->lock);
178 goto again;
179 }
153 goto fail_unlock; 180 goto fail_unlock;
181 }
154 } 182 }
155 } 183 }
156tb_not_found: 184tb_not_found:
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 6c52e08f786e..eaf3e2c8646a 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -267,6 +267,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
267 267
268struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, 268struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
269 struct inet_frags *f, void *key, unsigned int hash) 269 struct inet_frags *f, void *key, unsigned int hash)
270 __releases(&f->lock)
270{ 271{
271 struct inet_frag_queue *q; 272 struct inet_frag_queue *q;
272 struct hlist_node *n; 273 struct hlist_node *n;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 6a1045da48d2..625cc5f64c94 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -38,6 +38,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
38 write_pnet(&tb->ib_net, hold_net(net)); 38 write_pnet(&tb->ib_net, hold_net(net));
39 tb->port = snum; 39 tb->port = snum;
40 tb->fastreuse = 0; 40 tb->fastreuse = 0;
41 tb->num_owners = 0;
41 INIT_HLIST_HEAD(&tb->owners); 42 INIT_HLIST_HEAD(&tb->owners);
42 hlist_add_head(&tb->node, &head->chain); 43 hlist_add_head(&tb->node, &head->chain);
43 } 44 }
@@ -59,8 +60,13 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
59void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, 60void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
60 const unsigned short snum) 61 const unsigned short snum)
61{ 62{
63 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
64
65 atomic_inc(&hashinfo->bsockets);
66
62 inet_sk(sk)->num = snum; 67 inet_sk(sk)->num = snum;
63 sk_add_bind_node(sk, &tb->owners); 68 sk_add_bind_node(sk, &tb->owners);
69 tb->num_owners++;
64 inet_csk(sk)->icsk_bind_hash = tb; 70 inet_csk(sk)->icsk_bind_hash = tb;
65} 71}
66 72
@@ -75,9 +81,12 @@ static void __inet_put_port(struct sock *sk)
75 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; 81 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
76 struct inet_bind_bucket *tb; 82 struct inet_bind_bucket *tb;
77 83
84 atomic_dec(&hashinfo->bsockets);
85
78 spin_lock(&head->lock); 86 spin_lock(&head->lock);
79 tb = inet_csk(sk)->icsk_bind_hash; 87 tb = inet_csk(sk)->icsk_bind_hash;
80 __sk_del_bind_node(sk); 88 __sk_del_bind_node(sk);
89 tb->num_owners--;
81 inet_csk(sk)->icsk_bind_hash = NULL; 90 inet_csk(sk)->icsk_bind_hash = NULL;
82 inet_sk(sk)->num = 0; 91 inet_sk(sk)->num = 0;
83 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 92 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
@@ -444,9 +453,9 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
444 */ 453 */
445 inet_bind_bucket_for_each(tb, node, &head->chain) { 454 inet_bind_bucket_for_each(tb, node, &head->chain) {
446 if (ib_net(tb) == net && tb->port == port) { 455 if (ib_net(tb) == net && tb->port == port) {
447 WARN_ON(hlist_empty(&tb->owners));
448 if (tb->fastreuse >= 0) 456 if (tb->fastreuse >= 0)
449 goto next_port; 457 goto next_port;
458 WARN_ON(hlist_empty(&tb->owners));
450 if (!check_established(death_row, sk, 459 if (!check_established(death_row, sk,
451 port, &tw)) 460 port, &tw))
452 goto ok; 461 goto ok;
@@ -523,6 +532,7 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
523{ 532{
524 int i; 533 int i;
525 534
535 atomic_set(&h->bsockets, 0);
526 for (i = 0; i < INET_LHTABLE_SIZE; i++) { 536 for (i = 0; i < INET_LHTABLE_SIZE; i++) {
527 spin_lock_init(&h->listening_hash[i].lock); 537 spin_lock_init(&h->listening_hash[i].lock);
528 INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, 538 INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0101521f366b..e62510d5ea5a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -164,67 +164,124 @@ static DEFINE_RWLOCK(ipgre_lock);
164 164
165/* Given src, dst and key, find appropriate for input tunnel. */ 165/* Given src, dst and key, find appropriate for input tunnel. */
166 166
167static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net, 167static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
168 __be32 remote, __be32 local, 168 __be32 remote, __be32 local,
169 __be32 key, __be16 gre_proto) 169 __be32 key, __be16 gre_proto)
170{ 170{
171 struct net *net = dev_net(dev);
172 int link = dev->ifindex;
171 unsigned h0 = HASH(remote); 173 unsigned h0 = HASH(remote);
172 unsigned h1 = HASH(key); 174 unsigned h1 = HASH(key);
173 struct ip_tunnel *t; 175 struct ip_tunnel *t, *cand = NULL;
174 struct ip_tunnel *t2 = NULL;
175 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 176 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
176 int dev_type = (gre_proto == htons(ETH_P_TEB)) ? 177 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
177 ARPHRD_ETHER : ARPHRD_IPGRE; 178 ARPHRD_ETHER : ARPHRD_IPGRE;
179 int score, cand_score = 4;
178 180
179 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { 181 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
180 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 182 if (local != t->parms.iph.saddr ||
181 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 183 remote != t->parms.iph.daddr ||
182 if (t->dev->type == dev_type) 184 key != t->parms.i_key ||
183 return t; 185 !(t->dev->flags & IFF_UP))
184 if (t->dev->type == ARPHRD_IPGRE && !t2) 186 continue;
185 t2 = t; 187
186 } 188 if (t->dev->type != ARPHRD_IPGRE &&
189 t->dev->type != dev_type)
190 continue;
191
192 score = 0;
193 if (t->parms.link != link)
194 score |= 1;
195 if (t->dev->type != dev_type)
196 score |= 2;
197 if (score == 0)
198 return t;
199
200 if (score < cand_score) {
201 cand = t;
202 cand_score = score;
187 } 203 }
188 } 204 }
189 205
190 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { 206 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
191 if (remote == t->parms.iph.daddr) { 207 if (remote != t->parms.iph.daddr ||
192 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 208 key != t->parms.i_key ||
193 if (t->dev->type == dev_type) 209 !(t->dev->flags & IFF_UP))
194 return t; 210 continue;
195 if (t->dev->type == ARPHRD_IPGRE && !t2) 211
196 t2 = t; 212 if (t->dev->type != ARPHRD_IPGRE &&
197 } 213 t->dev->type != dev_type)
214 continue;
215
216 score = 0;
217 if (t->parms.link != link)
218 score |= 1;
219 if (t->dev->type != dev_type)
220 score |= 2;
221 if (score == 0)
222 return t;
223
224 if (score < cand_score) {
225 cand = t;
226 cand_score = score;
198 } 227 }
199 } 228 }
200 229
201 for (t = ign->tunnels_l[h1]; t; t = t->next) { 230 for (t = ign->tunnels_l[h1]; t; t = t->next) {
202 if (local == t->parms.iph.saddr || 231 if ((local != t->parms.iph.saddr &&
203 (local == t->parms.iph.daddr && 232 (local != t->parms.iph.daddr ||
204 ipv4_is_multicast(local))) { 233 !ipv4_is_multicast(local))) ||
205 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 234 key != t->parms.i_key ||
206 if (t->dev->type == dev_type) 235 !(t->dev->flags & IFF_UP))
207 return t; 236 continue;
208 if (t->dev->type == ARPHRD_IPGRE && !t2) 237
209 t2 = t; 238 if (t->dev->type != ARPHRD_IPGRE &&
210 } 239 t->dev->type != dev_type)
240 continue;
241
242 score = 0;
243 if (t->parms.link != link)
244 score |= 1;
245 if (t->dev->type != dev_type)
246 score |= 2;
247 if (score == 0)
248 return t;
249
250 if (score < cand_score) {
251 cand = t;
252 cand_score = score;
211 } 253 }
212 } 254 }
213 255
214 for (t = ign->tunnels_wc[h1]; t; t = t->next) { 256 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
215 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 257 if (t->parms.i_key != key ||
216 if (t->dev->type == dev_type) 258 !(t->dev->flags & IFF_UP))
217 return t; 259 continue;
218 if (t->dev->type == ARPHRD_IPGRE && !t2) 260
219 t2 = t; 261 if (t->dev->type != ARPHRD_IPGRE &&
262 t->dev->type != dev_type)
263 continue;
264
265 score = 0;
266 if (t->parms.link != link)
267 score |= 1;
268 if (t->dev->type != dev_type)
269 score |= 2;
270 if (score == 0)
271 return t;
272
273 if (score < cand_score) {
274 cand = t;
275 cand_score = score;
220 } 276 }
221 } 277 }
222 278
223 if (t2) 279 if (cand != NULL)
224 return t2; 280 return cand;
225 281
226 if (ign->fb_tunnel_dev->flags&IFF_UP) 282 if (ign->fb_tunnel_dev->flags & IFF_UP)
227 return netdev_priv(ign->fb_tunnel_dev); 283 return netdev_priv(ign->fb_tunnel_dev);
284
228 return NULL; 285 return NULL;
229} 286}
230 287
@@ -284,6 +341,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
284 __be32 remote = parms->iph.daddr; 341 __be32 remote = parms->iph.daddr;
285 __be32 local = parms->iph.saddr; 342 __be32 local = parms->iph.saddr;
286 __be32 key = parms->i_key; 343 __be32 key = parms->i_key;
344 int link = parms->link;
287 struct ip_tunnel *t, **tp; 345 struct ip_tunnel *t, **tp;
288 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 346 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
289 347
@@ -291,6 +349,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
291 if (local == t->parms.iph.saddr && 349 if (local == t->parms.iph.saddr &&
292 remote == t->parms.iph.daddr && 350 remote == t->parms.iph.daddr &&
293 key == t->parms.i_key && 351 key == t->parms.i_key &&
352 link == t->parms.link &&
294 type == t->dev->type) 353 type == t->dev->type)
295 break; 354 break;
296 355
@@ -421,7 +480,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
421 } 480 }
422 481
423 read_lock(&ipgre_lock); 482 read_lock(&ipgre_lock);
424 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr, 483 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
425 flags & GRE_KEY ? 484 flags & GRE_KEY ?
426 *(((__be32 *)p) + (grehlen / 4) - 1) : 0, 485 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
427 p[1]); 486 p[1]);
@@ -432,7 +491,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
432 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 491 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
433 goto out; 492 goto out;
434 493
435 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 494 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
436 t->err_count++; 495 t->err_count++;
437 else 496 else
438 t->err_count = 1; 497 t->err_count = 1;
@@ -518,7 +577,7 @@ static int ipgre_rcv(struct sk_buff *skb)
518 gre_proto = *(__be16 *)(h + 2); 577 gre_proto = *(__be16 *)(h + 2);
519 578
520 read_lock(&ipgre_lock); 579 read_lock(&ipgre_lock);
521 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), 580 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
522 iph->saddr, iph->daddr, key, 581 iph->saddr, iph->daddr, key,
523 gre_proto))) { 582 gre_proto))) {
524 struct net_device_stats *stats = &tunnel->dev->stats; 583 struct net_device_stats *stats = &tunnel->dev->stats;
@@ -744,7 +803,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
744#endif 803#endif
745 804
746 if (tunnel->err_count > 0) { 805 if (tunnel->err_count > 0) {
747 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 806 if (time_before(jiffies,
807 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
748 tunnel->err_count--; 808 tunnel->err_count--;
749 809
750 dst_link_failure(skb); 810 dst_link_failure(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8ebe86dd72af..3e7e910c7c0f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -935,6 +935,10 @@ alloc_new_skb:
935 sk->sk_allocation); 935 sk->sk_allocation);
936 if (unlikely(skb == NULL)) 936 if (unlikely(skb == NULL))
937 err = -ENOBUFS; 937 err = -ENOBUFS;
938 else
939 /* only the initial fragment is
940 time stamped */
941 ipc->shtx.flags = 0;
938 } 942 }
939 if (skb == NULL) 943 if (skb == NULL)
940 goto error; 944 goto error;
@@ -945,6 +949,7 @@ alloc_new_skb:
945 skb->ip_summed = csummode; 949 skb->ip_summed = csummode;
946 skb->csum = 0; 950 skb->csum = 0;
947 skb_reserve(skb, hh_len); 951 skb_reserve(skb, hh_len);
952 *skb_tx(skb) = ipc->shtx;
948 953
949 /* 954 /*
950 * Find where to start putting bytes. 955 * Find where to start putting bytes.
@@ -1364,6 +1369,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1364 1369
1365 daddr = ipc.addr = rt->rt_src; 1370 daddr = ipc.addr = rt->rt_src;
1366 ipc.opt = NULL; 1371 ipc.opt = NULL;
1372 ipc.shtx.flags = 0;
1367 1373
1368 if (replyopts.opt.optlen) { 1374 if (replyopts.opt.optlen) {
1369 ipc.opt = &replyopts.opt; 1375 ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index d722013c1cae..90d22ae0a419 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -100,8 +100,8 @@
100#define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers 100#define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers
101 - '3' from resolv.h */ 101 - '3' from resolv.h */
102 102
103#define NONE __constant_htonl(INADDR_NONE) 103#define NONE cpu_to_be32(INADDR_NONE)
104#define ANY __constant_htonl(INADDR_ANY) 104#define ANY cpu_to_be32(INADDR_ANY)
105 105
106/* 106/*
107 * Public IP configuration 107 * Public IP configuration
@@ -406,7 +406,7 @@ static int __init ic_defaults(void)
406static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); 406static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev);
407 407
408static struct packet_type rarp_packet_type __initdata = { 408static struct packet_type rarp_packet_type __initdata = {
409 .type = __constant_htons(ETH_P_RARP), 409 .type = cpu_to_be16(ETH_P_RARP),
410 .func = ic_rarp_recv, 410 .func = ic_rarp_recv,
411}; 411};
412 412
@@ -568,7 +568,7 @@ struct bootp_pkt { /* BOOTP packet format */
568static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); 568static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev);
569 569
570static struct packet_type bootp_packet_type __initdata = { 570static struct packet_type bootp_packet_type __initdata = {
571 .type = __constant_htons(ETH_P_IP), 571 .type = cpu_to_be16(ETH_P_IP),
572 .func = ic_bootp_recv, 572 .func = ic_bootp_recv,
573}; 573};
574 574
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 5079dfbc6f38..9054139795af 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -327,7 +327,7 @@ static int ipip_err(struct sk_buff *skb, u32 info)
327 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 327 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
328 goto out; 328 goto out;
329 329
330 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 330 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
331 t->err_count++; 331 t->err_count++;
332 else 332 else
333 t->err_count = 1; 333 t->err_count = 1;
@@ -466,7 +466,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
466 } 466 }
467 467
468 if (tunnel->err_count > 0) { 468 if (tunnel->err_count > 0) {
469 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 469 if (time_before(jiffies,
470 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
470 tunnel->err_count--; 471 tunnel->err_count--;
471 dst_link_failure(skb); 472 dst_link_failure(skb);
472 } else 473 } else
@@ -750,7 +751,7 @@ static struct xfrm_tunnel ipip_handler = {
750 .priority = 1, 751 .priority = 1,
751}; 752};
752 753
753static char banner[] __initdata = 754static const char banner[] __initconst =
754 KERN_INFO "IPv4 over IPv4 tunneling driver\n"; 755 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
755 756
756static void ipip_destroy_tunnels(struct ipip_net *ipn) 757static void ipip_destroy_tunnels(struct ipip_net *ipn)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 14666449dc1c..13e9dd3012b3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -67,9 +67,6 @@
67#define CONFIG_IP_PIMSM 1 67#define CONFIG_IP_PIMSM 1
68#endif 68#endif
69 69
70static struct sock *mroute_socket;
71
72
73/* Big lock, protecting vif table, mrt cache and mroute socket state. 70/* Big lock, protecting vif table, mrt cache and mroute socket state.
74 Note that the changes are semaphored via rtnl_lock. 71 Note that the changes are semaphored via rtnl_lock.
75 */ 72 */
@@ -80,18 +77,9 @@ static DEFINE_RWLOCK(mrt_lock);
80 * Multicast router control variables 77 * Multicast router control variables
81 */ 78 */
82 79
83static struct vif_device vif_table[MAXVIFS]; /* Devices */ 80#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
84static int maxvif;
85
86#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
87
88static int mroute_do_assert; /* Set in PIM assert */
89static int mroute_do_pim;
90
91static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
92 81
93static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ 82static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
94static atomic_t cache_resolve_queue_len; /* Size of unresolved */
95 83
96/* Special spinlock for queue of unresolved entries */ 84/* Special spinlock for queue of unresolved entries */
97static DEFINE_SPINLOCK(mfc_unres_lock); 85static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -107,7 +95,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
107static struct kmem_cache *mrt_cachep __read_mostly; 95static struct kmem_cache *mrt_cachep __read_mostly;
108 96
109static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); 97static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
110static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); 98static int ipmr_cache_report(struct net *net,
99 struct sk_buff *pkt, vifi_t vifi, int assert);
111static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); 100static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
112 101
113#ifdef CONFIG_IP_PIMSM_V2 102#ifdef CONFIG_IP_PIMSM_V2
@@ -120,9 +109,11 @@ static struct timer_list ipmr_expire_timer;
120 109
121static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 110static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
122{ 111{
112 struct net *net = dev_net(dev);
113
123 dev_close(dev); 114 dev_close(dev);
124 115
125 dev = __dev_get_by_name(&init_net, "tunl0"); 116 dev = __dev_get_by_name(net, "tunl0");
126 if (dev) { 117 if (dev) {
127 const struct net_device_ops *ops = dev->netdev_ops; 118 const struct net_device_ops *ops = dev->netdev_ops;
128 struct ifreq ifr; 119 struct ifreq ifr;
@@ -148,11 +139,11 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
148} 139}
149 140
150static 141static
151struct net_device *ipmr_new_tunnel(struct vifctl *v) 142struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
152{ 143{
153 struct net_device *dev; 144 struct net_device *dev;
154 145
155 dev = __dev_get_by_name(&init_net, "tunl0"); 146 dev = __dev_get_by_name(net, "tunl0");
156 147
157 if (dev) { 148 if (dev) {
158 const struct net_device_ops *ops = dev->netdev_ops; 149 const struct net_device_ops *ops = dev->netdev_ops;
@@ -181,7 +172,8 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
181 172
182 dev = NULL; 173 dev = NULL;
183 174
184 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) { 175 if (err == 0 &&
176 (dev = __dev_get_by_name(net, p.name)) != NULL) {
185 dev->flags |= IFF_MULTICAST; 177 dev->flags |= IFF_MULTICAST;
186 178
187 in_dev = __in_dev_get_rtnl(dev); 179 in_dev = __in_dev_get_rtnl(dev);
@@ -209,14 +201,15 @@ failure:
209 201
210#ifdef CONFIG_IP_PIMSM 202#ifdef CONFIG_IP_PIMSM
211 203
212static int reg_vif_num = -1;
213
214static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 204static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
215{ 205{
206 struct net *net = dev_net(dev);
207
216 read_lock(&mrt_lock); 208 read_lock(&mrt_lock);
217 dev->stats.tx_bytes += skb->len; 209 dev->stats.tx_bytes += skb->len;
218 dev->stats.tx_packets++; 210 dev->stats.tx_packets++;
219 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); 211 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
212 IGMPMSG_WHOLEPKT);
220 read_unlock(&mrt_lock); 213 read_unlock(&mrt_lock);
221 kfree_skb(skb); 214 kfree_skb(skb);
222 return 0; 215 return 0;
@@ -283,16 +276,16 @@ failure:
283 * @notify: Set to 1, if the caller is a notifier_call 276 * @notify: Set to 1, if the caller is a notifier_call
284 */ 277 */
285 278
286static int vif_delete(int vifi, int notify) 279static int vif_delete(struct net *net, int vifi, int notify)
287{ 280{
288 struct vif_device *v; 281 struct vif_device *v;
289 struct net_device *dev; 282 struct net_device *dev;
290 struct in_device *in_dev; 283 struct in_device *in_dev;
291 284
292 if (vifi < 0 || vifi >= maxvif) 285 if (vifi < 0 || vifi >= net->ipv4.maxvif)
293 return -EADDRNOTAVAIL; 286 return -EADDRNOTAVAIL;
294 287
295 v = &vif_table[vifi]; 288 v = &net->ipv4.vif_table[vifi];
296 289
297 write_lock_bh(&mrt_lock); 290 write_lock_bh(&mrt_lock);
298 dev = v->dev; 291 dev = v->dev;
@@ -304,17 +297,17 @@ static int vif_delete(int vifi, int notify)
304 } 297 }
305 298
306#ifdef CONFIG_IP_PIMSM 299#ifdef CONFIG_IP_PIMSM
307 if (vifi == reg_vif_num) 300 if (vifi == net->ipv4.mroute_reg_vif_num)
308 reg_vif_num = -1; 301 net->ipv4.mroute_reg_vif_num = -1;
309#endif 302#endif
310 303
311 if (vifi+1 == maxvif) { 304 if (vifi+1 == net->ipv4.maxvif) {
312 int tmp; 305 int tmp;
313 for (tmp=vifi-1; tmp>=0; tmp--) { 306 for (tmp=vifi-1; tmp>=0; tmp--) {
314 if (VIF_EXISTS(tmp)) 307 if (VIF_EXISTS(net, tmp))
315 break; 308 break;
316 } 309 }
317 maxvif = tmp+1; 310 net->ipv4.maxvif = tmp+1;
318 } 311 }
319 312
320 write_unlock_bh(&mrt_lock); 313 write_unlock_bh(&mrt_lock);
@@ -333,6 +326,12 @@ static int vif_delete(int vifi, int notify)
333 return 0; 326 return 0;
334} 327}
335 328
329static inline void ipmr_cache_free(struct mfc_cache *c)
330{
331 release_net(mfc_net(c));
332 kmem_cache_free(mrt_cachep, c);
333}
334
336/* Destroy an unresolved cache entry, killing queued skbs 335/* Destroy an unresolved cache entry, killing queued skbs
337 and reporting error to netlink readers. 336 and reporting error to netlink readers.
338 */ 337 */
@@ -341,8 +340,9 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
341{ 340{
342 struct sk_buff *skb; 341 struct sk_buff *skb;
343 struct nlmsgerr *e; 342 struct nlmsgerr *e;
343 struct net *net = mfc_net(c);
344 344
345 atomic_dec(&cache_resolve_queue_len); 345 atomic_dec(&net->ipv4.cache_resolve_queue_len);
346 346
347 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 347 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
348 if (ip_hdr(skb)->version == 0) { 348 if (ip_hdr(skb)->version == 0) {
@@ -354,12 +354,12 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
354 e->error = -ETIMEDOUT; 354 e->error = -ETIMEDOUT;
355 memset(&e->msg, 0, sizeof(e->msg)); 355 memset(&e->msg, 0, sizeof(e->msg));
356 356
357 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); 357 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
358 } else 358 } else
359 kfree_skb(skb); 359 kfree_skb(skb);
360 } 360 }
361 361
362 kmem_cache_free(mrt_cachep, c); 362 ipmr_cache_free(c);
363} 363}
364 364
365 365
@@ -376,7 +376,7 @@ static void ipmr_expire_process(unsigned long dummy)
376 return; 376 return;
377 } 377 }
378 378
379 if (atomic_read(&cache_resolve_queue_len) == 0) 379 if (mfc_unres_queue == NULL)
380 goto out; 380 goto out;
381 381
382 now = jiffies; 382 now = jiffies;
@@ -397,7 +397,7 @@ static void ipmr_expire_process(unsigned long dummy)
397 ipmr_destroy_unres(c); 397 ipmr_destroy_unres(c);
398 } 398 }
399 399
400 if (atomic_read(&cache_resolve_queue_len)) 400 if (mfc_unres_queue != NULL)
401 mod_timer(&ipmr_expire_timer, jiffies + expires); 401 mod_timer(&ipmr_expire_timer, jiffies + expires);
402 402
403out: 403out:
@@ -409,13 +409,15 @@ out:
409static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) 409static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
410{ 410{
411 int vifi; 411 int vifi;
412 struct net *net = mfc_net(cache);
412 413
413 cache->mfc_un.res.minvif = MAXVIFS; 414 cache->mfc_un.res.minvif = MAXVIFS;
414 cache->mfc_un.res.maxvif = 0; 415 cache->mfc_un.res.maxvif = 0;
415 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 416 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
416 417
417 for (vifi=0; vifi<maxvif; vifi++) { 418 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
418 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { 419 if (VIF_EXISTS(net, vifi) &&
420 ttls[vifi] && ttls[vifi] < 255) {
419 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 421 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
420 if (cache->mfc_un.res.minvif > vifi) 422 if (cache->mfc_un.res.minvif > vifi)
421 cache->mfc_un.res.minvif = vifi; 423 cache->mfc_un.res.minvif = vifi;
@@ -425,16 +427,16 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
425 } 427 }
426} 428}
427 429
428static int vif_add(struct vifctl *vifc, int mrtsock) 430static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
429{ 431{
430 int vifi = vifc->vifc_vifi; 432 int vifi = vifc->vifc_vifi;
431 struct vif_device *v = &vif_table[vifi]; 433 struct vif_device *v = &net->ipv4.vif_table[vifi];
432 struct net_device *dev; 434 struct net_device *dev;
433 struct in_device *in_dev; 435 struct in_device *in_dev;
434 int err; 436 int err;
435 437
436 /* Is vif busy ? */ 438 /* Is vif busy ? */
437 if (VIF_EXISTS(vifi)) 439 if (VIF_EXISTS(net, vifi))
438 return -EADDRINUSE; 440 return -EADDRINUSE;
439 441
440 switch (vifc->vifc_flags) { 442 switch (vifc->vifc_flags) {
@@ -444,7 +446,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
444 * Special Purpose VIF in PIM 446 * Special Purpose VIF in PIM
445 * All the packets will be sent to the daemon 447 * All the packets will be sent to the daemon
446 */ 448 */
447 if (reg_vif_num >= 0) 449 if (net->ipv4.mroute_reg_vif_num >= 0)
448 return -EADDRINUSE; 450 return -EADDRINUSE;
449 dev = ipmr_reg_vif(); 451 dev = ipmr_reg_vif();
450 if (!dev) 452 if (!dev)
@@ -458,7 +460,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
458 break; 460 break;
459#endif 461#endif
460 case VIFF_TUNNEL: 462 case VIFF_TUNNEL:
461 dev = ipmr_new_tunnel(vifc); 463 dev = ipmr_new_tunnel(net, vifc);
462 if (!dev) 464 if (!dev)
463 return -ENOBUFS; 465 return -ENOBUFS;
464 err = dev_set_allmulti(dev, 1); 466 err = dev_set_allmulti(dev, 1);
@@ -469,7 +471,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
469 } 471 }
470 break; 472 break;
471 case 0: 473 case 0:
472 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr); 474 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
473 if (!dev) 475 if (!dev)
474 return -EADDRNOTAVAIL; 476 return -EADDRNOTAVAIL;
475 err = dev_set_allmulti(dev, 1); 477 err = dev_set_allmulti(dev, 1);
@@ -510,20 +512,22 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
510 v->dev = dev; 512 v->dev = dev;
511#ifdef CONFIG_IP_PIMSM 513#ifdef CONFIG_IP_PIMSM
512 if (v->flags&VIFF_REGISTER) 514 if (v->flags&VIFF_REGISTER)
513 reg_vif_num = vifi; 515 net->ipv4.mroute_reg_vif_num = vifi;
514#endif 516#endif
515 if (vifi+1 > maxvif) 517 if (vifi+1 > net->ipv4.maxvif)
516 maxvif = vifi+1; 518 net->ipv4.maxvif = vifi+1;
517 write_unlock_bh(&mrt_lock); 519 write_unlock_bh(&mrt_lock);
518 return 0; 520 return 0;
519} 521}
520 522
521static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) 523static struct mfc_cache *ipmr_cache_find(struct net *net,
524 __be32 origin,
525 __be32 mcastgrp)
522{ 526{
523 int line = MFC_HASH(mcastgrp, origin); 527 int line = MFC_HASH(mcastgrp, origin);
524 struct mfc_cache *c; 528 struct mfc_cache *c;
525 529
526 for (c=mfc_cache_array[line]; c; c = c->next) { 530 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
527 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) 531 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
528 break; 532 break;
529 } 533 }
@@ -533,22 +537,24 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
533/* 537/*
534 * Allocate a multicast cache entry 538 * Allocate a multicast cache entry
535 */ 539 */
536static struct mfc_cache *ipmr_cache_alloc(void) 540static struct mfc_cache *ipmr_cache_alloc(struct net *net)
537{ 541{
538 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 542 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
539 if (c == NULL) 543 if (c == NULL)
540 return NULL; 544 return NULL;
541 c->mfc_un.res.minvif = MAXVIFS; 545 c->mfc_un.res.minvif = MAXVIFS;
546 mfc_net_set(c, net);
542 return c; 547 return c;
543} 548}
544 549
545static struct mfc_cache *ipmr_cache_alloc_unres(void) 550static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
546{ 551{
547 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 552 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
548 if (c == NULL) 553 if (c == NULL)
549 return NULL; 554 return NULL;
550 skb_queue_head_init(&c->mfc_un.unres.unresolved); 555 skb_queue_head_init(&c->mfc_un.unres.unresolved);
551 c->mfc_un.unres.expires = jiffies + 10*HZ; 556 c->mfc_un.unres.expires = jiffies + 10*HZ;
557 mfc_net_set(c, net);
552 return c; 558 return c;
553} 559}
554 560
@@ -581,7 +587,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
581 memset(&e->msg, 0, sizeof(e->msg)); 587 memset(&e->msg, 0, sizeof(e->msg));
582 } 588 }
583 589
584 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); 590 rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
585 } else 591 } else
586 ip_mr_forward(skb, c, 0); 592 ip_mr_forward(skb, c, 0);
587 } 593 }
@@ -594,7 +600,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
594 * Called under mrt_lock. 600 * Called under mrt_lock.
595 */ 601 */
596 602
597static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) 603static int ipmr_cache_report(struct net *net,
604 struct sk_buff *pkt, vifi_t vifi, int assert)
598{ 605{
599 struct sk_buff *skb; 606 struct sk_buff *skb;
600 const int ihl = ip_hdrlen(pkt); 607 const int ihl = ip_hdrlen(pkt);
@@ -626,7 +633,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
626 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 633 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
627 msg->im_msgtype = IGMPMSG_WHOLEPKT; 634 msg->im_msgtype = IGMPMSG_WHOLEPKT;
628 msg->im_mbz = 0; 635 msg->im_mbz = 0;
629 msg->im_vif = reg_vif_num; 636 msg->im_vif = net->ipv4.mroute_reg_vif_num;
630 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 637 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
631 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 638 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
632 sizeof(struct iphdr)); 639 sizeof(struct iphdr));
@@ -658,7 +665,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
658 skb->transport_header = skb->network_header; 665 skb->transport_header = skb->network_header;
659 } 666 }
660 667
661 if (mroute_socket == NULL) { 668 if (net->ipv4.mroute_sk == NULL) {
662 kfree_skb(skb); 669 kfree_skb(skb);
663 return -EINVAL; 670 return -EINVAL;
664 } 671 }
@@ -666,7 +673,8 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
666 /* 673 /*
667 * Deliver to mrouted 674 * Deliver to mrouted
668 */ 675 */
669 if ((ret = sock_queue_rcv_skb(mroute_socket, skb))<0) { 676 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
677 if (ret < 0) {
670 if (net_ratelimit()) 678 if (net_ratelimit())
671 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 679 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
672 kfree_skb(skb); 680 kfree_skb(skb);
@@ -680,7 +688,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
680 */ 688 */
681 689
682static int 690static int
683ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) 691ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
684{ 692{
685 int err; 693 int err;
686 struct mfc_cache *c; 694 struct mfc_cache *c;
@@ -688,7 +696,8 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
688 696
689 spin_lock_bh(&mfc_unres_lock); 697 spin_lock_bh(&mfc_unres_lock);
690 for (c=mfc_unres_queue; c; c=c->next) { 698 for (c=mfc_unres_queue; c; c=c->next) {
691 if (c->mfc_mcastgrp == iph->daddr && 699 if (net_eq(mfc_net(c), net) &&
700 c->mfc_mcastgrp == iph->daddr &&
692 c->mfc_origin == iph->saddr) 701 c->mfc_origin == iph->saddr)
693 break; 702 break;
694 } 703 }
@@ -698,8 +707,8 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
698 * Create a new entry if allowable 707 * Create a new entry if allowable
699 */ 708 */
700 709
701 if (atomic_read(&cache_resolve_queue_len) >= 10 || 710 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
702 (c=ipmr_cache_alloc_unres())==NULL) { 711 (c = ipmr_cache_alloc_unres(net)) == NULL) {
703 spin_unlock_bh(&mfc_unres_lock); 712 spin_unlock_bh(&mfc_unres_lock);
704 713
705 kfree_skb(skb); 714 kfree_skb(skb);
@@ -716,18 +725,19 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
716 /* 725 /*
717 * Reflect first query at mrouted. 726 * Reflect first query at mrouted.
718 */ 727 */
719 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) { 728 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
729 if (err < 0) {
720 /* If the report failed throw the cache entry 730 /* If the report failed throw the cache entry
721 out - Brad Parker 731 out - Brad Parker
722 */ 732 */
723 spin_unlock_bh(&mfc_unres_lock); 733 spin_unlock_bh(&mfc_unres_lock);
724 734
725 kmem_cache_free(mrt_cachep, c); 735 ipmr_cache_free(c);
726 kfree_skb(skb); 736 kfree_skb(skb);
727 return err; 737 return err;
728 } 738 }
729 739
730 atomic_inc(&cache_resolve_queue_len); 740 atomic_inc(&net->ipv4.cache_resolve_queue_len);
731 c->next = mfc_unres_queue; 741 c->next = mfc_unres_queue;
732 mfc_unres_queue = c; 742 mfc_unres_queue = c;
733 743
@@ -753,35 +763,37 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
753 * MFC cache manipulation by user space mroute daemon 763 * MFC cache manipulation by user space mroute daemon
754 */ 764 */
755 765
756static int ipmr_mfc_delete(struct mfcctl *mfc) 766static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
757{ 767{
758 int line; 768 int line;
759 struct mfc_cache *c, **cp; 769 struct mfc_cache *c, **cp;
760 770
761 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 771 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
762 772
763 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { 773 for (cp = &net->ipv4.mfc_cache_array[line];
774 (c = *cp) != NULL; cp = &c->next) {
764 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 775 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
765 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 776 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
766 write_lock_bh(&mrt_lock); 777 write_lock_bh(&mrt_lock);
767 *cp = c->next; 778 *cp = c->next;
768 write_unlock_bh(&mrt_lock); 779 write_unlock_bh(&mrt_lock);
769 780
770 kmem_cache_free(mrt_cachep, c); 781 ipmr_cache_free(c);
771 return 0; 782 return 0;
772 } 783 }
773 } 784 }
774 return -ENOENT; 785 return -ENOENT;
775} 786}
776 787
777static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) 788static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
778{ 789{
779 int line; 790 int line;
780 struct mfc_cache *uc, *c, **cp; 791 struct mfc_cache *uc, *c, **cp;
781 792
782 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 793 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
783 794
784 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { 795 for (cp = &net->ipv4.mfc_cache_array[line];
796 (c = *cp) != NULL; cp = &c->next) {
785 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 797 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
786 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) 798 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
787 break; 799 break;
@@ -800,7 +812,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
800 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 812 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
801 return -EINVAL; 813 return -EINVAL;
802 814
803 c = ipmr_cache_alloc(); 815 c = ipmr_cache_alloc(net);
804 if (c == NULL) 816 if (c == NULL)
805 return -ENOMEM; 817 return -ENOMEM;
806 818
@@ -812,8 +824,8 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
812 c->mfc_flags |= MFC_STATIC; 824 c->mfc_flags |= MFC_STATIC;
813 825
814 write_lock_bh(&mrt_lock); 826 write_lock_bh(&mrt_lock);
815 c->next = mfc_cache_array[line]; 827 c->next = net->ipv4.mfc_cache_array[line];
816 mfc_cache_array[line] = c; 828 net->ipv4.mfc_cache_array[line] = c;
817 write_unlock_bh(&mrt_lock); 829 write_unlock_bh(&mrt_lock);
818 830
819 /* 831 /*
@@ -823,19 +835,21 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
823 spin_lock_bh(&mfc_unres_lock); 835 spin_lock_bh(&mfc_unres_lock);
824 for (cp = &mfc_unres_queue; (uc=*cp) != NULL; 836 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
825 cp = &uc->next) { 837 cp = &uc->next) {
826 if (uc->mfc_origin == c->mfc_origin && 838 if (net_eq(mfc_net(uc), net) &&
839 uc->mfc_origin == c->mfc_origin &&
827 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 840 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
828 *cp = uc->next; 841 *cp = uc->next;
829 if (atomic_dec_and_test(&cache_resolve_queue_len)) 842 atomic_dec(&net->ipv4.cache_resolve_queue_len);
830 del_timer(&ipmr_expire_timer);
831 break; 843 break;
832 } 844 }
833 } 845 }
846 if (mfc_unres_queue == NULL)
847 del_timer(&ipmr_expire_timer);
834 spin_unlock_bh(&mfc_unres_lock); 848 spin_unlock_bh(&mfc_unres_lock);
835 849
836 if (uc) { 850 if (uc) {
837 ipmr_cache_resolve(uc, c); 851 ipmr_cache_resolve(uc, c);
838 kmem_cache_free(mrt_cachep, uc); 852 ipmr_cache_free(uc);
839 } 853 }
840 return 0; 854 return 0;
841} 855}
@@ -844,16 +858,16 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
844 * Close the multicast socket, and clear the vif tables etc 858 * Close the multicast socket, and clear the vif tables etc
845 */ 859 */
846 860
847static void mroute_clean_tables(struct sock *sk) 861static void mroute_clean_tables(struct net *net)
848{ 862{
849 int i; 863 int i;
850 864
851 /* 865 /*
852 * Shut down all active vif entries 866 * Shut down all active vif entries
853 */ 867 */
854 for (i=0; i<maxvif; i++) { 868 for (i = 0; i < net->ipv4.maxvif; i++) {
855 if (!(vif_table[i].flags&VIFF_STATIC)) 869 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
856 vif_delete(i, 0); 870 vif_delete(net, i, 0);
857 } 871 }
858 872
859 /* 873 /*
@@ -862,7 +876,7 @@ static void mroute_clean_tables(struct sock *sk)
862 for (i=0; i<MFC_LINES; i++) { 876 for (i=0; i<MFC_LINES; i++) {
863 struct mfc_cache *c, **cp; 877 struct mfc_cache *c, **cp;
864 878
865 cp = &mfc_cache_array[i]; 879 cp = &net->ipv4.mfc_cache_array[i];
866 while ((c = *cp) != NULL) { 880 while ((c = *cp) != NULL) {
867 if (c->mfc_flags&MFC_STATIC) { 881 if (c->mfc_flags&MFC_STATIC) {
868 cp = &c->next; 882 cp = &c->next;
@@ -872,22 +886,23 @@ static void mroute_clean_tables(struct sock *sk)
872 *cp = c->next; 886 *cp = c->next;
873 write_unlock_bh(&mrt_lock); 887 write_unlock_bh(&mrt_lock);
874 888
875 kmem_cache_free(mrt_cachep, c); 889 ipmr_cache_free(c);
876 } 890 }
877 } 891 }
878 892
879 if (atomic_read(&cache_resolve_queue_len) != 0) { 893 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
880 struct mfc_cache *c; 894 struct mfc_cache *c, **cp;
881 895
882 spin_lock_bh(&mfc_unres_lock); 896 spin_lock_bh(&mfc_unres_lock);
883 while (mfc_unres_queue != NULL) { 897 cp = &mfc_unres_queue;
884 c = mfc_unres_queue; 898 while ((c = *cp) != NULL) {
885 mfc_unres_queue = c->next; 899 if (!net_eq(mfc_net(c), net)) {
886 spin_unlock_bh(&mfc_unres_lock); 900 cp = &c->next;
901 continue;
902 }
903 *cp = c->next;
887 904
888 ipmr_destroy_unres(c); 905 ipmr_destroy_unres(c);
889
890 spin_lock_bh(&mfc_unres_lock);
891 } 906 }
892 spin_unlock_bh(&mfc_unres_lock); 907 spin_unlock_bh(&mfc_unres_lock);
893 } 908 }
@@ -895,15 +910,17 @@ static void mroute_clean_tables(struct sock *sk)
895 910
896static void mrtsock_destruct(struct sock *sk) 911static void mrtsock_destruct(struct sock *sk)
897{ 912{
913 struct net *net = sock_net(sk);
914
898 rtnl_lock(); 915 rtnl_lock();
899 if (sk == mroute_socket) { 916 if (sk == net->ipv4.mroute_sk) {
900 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--; 917 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
901 918
902 write_lock_bh(&mrt_lock); 919 write_lock_bh(&mrt_lock);
903 mroute_socket = NULL; 920 net->ipv4.mroute_sk = NULL;
904 write_unlock_bh(&mrt_lock); 921 write_unlock_bh(&mrt_lock);
905 922
906 mroute_clean_tables(sk); 923 mroute_clean_tables(net);
907 } 924 }
908 rtnl_unlock(); 925 rtnl_unlock();
909} 926}
@@ -920,9 +937,10 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
920 int ret; 937 int ret;
921 struct vifctl vif; 938 struct vifctl vif;
922 struct mfcctl mfc; 939 struct mfcctl mfc;
940 struct net *net = sock_net(sk);
923 941
924 if (optname != MRT_INIT) { 942 if (optname != MRT_INIT) {
925 if (sk != mroute_socket && !capable(CAP_NET_ADMIN)) 943 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
926 return -EACCES; 944 return -EACCES;
927 } 945 }
928 946
@@ -935,7 +953,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
935 return -ENOPROTOOPT; 953 return -ENOPROTOOPT;
936 954
937 rtnl_lock(); 955 rtnl_lock();
938 if (mroute_socket) { 956 if (net->ipv4.mroute_sk) {
939 rtnl_unlock(); 957 rtnl_unlock();
940 return -EADDRINUSE; 958 return -EADDRINUSE;
941 } 959 }
@@ -943,15 +961,15 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
943 ret = ip_ra_control(sk, 1, mrtsock_destruct); 961 ret = ip_ra_control(sk, 1, mrtsock_destruct);
944 if (ret == 0) { 962 if (ret == 0) {
945 write_lock_bh(&mrt_lock); 963 write_lock_bh(&mrt_lock);
946 mroute_socket = sk; 964 net->ipv4.mroute_sk = sk;
947 write_unlock_bh(&mrt_lock); 965 write_unlock_bh(&mrt_lock);
948 966
949 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++; 967 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
950 } 968 }
951 rtnl_unlock(); 969 rtnl_unlock();
952 return ret; 970 return ret;
953 case MRT_DONE: 971 case MRT_DONE:
954 if (sk != mroute_socket) 972 if (sk != net->ipv4.mroute_sk)
955 return -EACCES; 973 return -EACCES;
956 return ip_ra_control(sk, 0, NULL); 974 return ip_ra_control(sk, 0, NULL);
957 case MRT_ADD_VIF: 975 case MRT_ADD_VIF:
@@ -964,9 +982,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
964 return -ENFILE; 982 return -ENFILE;
965 rtnl_lock(); 983 rtnl_lock();
966 if (optname == MRT_ADD_VIF) { 984 if (optname == MRT_ADD_VIF) {
967 ret = vif_add(&vif, sk==mroute_socket); 985 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
968 } else { 986 } else {
969 ret = vif_delete(vif.vifc_vifi, 0); 987 ret = vif_delete(net, vif.vifc_vifi, 0);
970 } 988 }
971 rtnl_unlock(); 989 rtnl_unlock();
972 return ret; 990 return ret;
@@ -983,9 +1001,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
983 return -EFAULT; 1001 return -EFAULT;
984 rtnl_lock(); 1002 rtnl_lock();
985 if (optname == MRT_DEL_MFC) 1003 if (optname == MRT_DEL_MFC)
986 ret = ipmr_mfc_delete(&mfc); 1004 ret = ipmr_mfc_delete(net, &mfc);
987 else 1005 else
988 ret = ipmr_mfc_add(&mfc, sk==mroute_socket); 1006 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
989 rtnl_unlock(); 1007 rtnl_unlock();
990 return ret; 1008 return ret;
991 /* 1009 /*
@@ -996,7 +1014,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
996 int v; 1014 int v;
997 if (get_user(v,(int __user *)optval)) 1015 if (get_user(v,(int __user *)optval))
998 return -EFAULT; 1016 return -EFAULT;
999 mroute_do_assert=(v)?1:0; 1017 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1000 return 0; 1018 return 0;
1001 } 1019 }
1002#ifdef CONFIG_IP_PIMSM 1020#ifdef CONFIG_IP_PIMSM
@@ -1010,11 +1028,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
1010 1028
1011 rtnl_lock(); 1029 rtnl_lock();
1012 ret = 0; 1030 ret = 0;
1013 if (v != mroute_do_pim) { 1031 if (v != net->ipv4.mroute_do_pim) {
1014 mroute_do_pim = v; 1032 net->ipv4.mroute_do_pim = v;
1015 mroute_do_assert = v; 1033 net->ipv4.mroute_do_assert = v;
1016#ifdef CONFIG_IP_PIMSM_V2 1034#ifdef CONFIG_IP_PIMSM_V2
1017 if (mroute_do_pim) 1035 if (net->ipv4.mroute_do_pim)
1018 ret = inet_add_protocol(&pim_protocol, 1036 ret = inet_add_protocol(&pim_protocol,
1019 IPPROTO_PIM); 1037 IPPROTO_PIM);
1020 else 1038 else
@@ -1045,6 +1063,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1045{ 1063{
1046 int olr; 1064 int olr;
1047 int val; 1065 int val;
1066 struct net *net = sock_net(sk);
1048 1067
1049 if (optname != MRT_VERSION && 1068 if (optname != MRT_VERSION &&
1050#ifdef CONFIG_IP_PIMSM 1069#ifdef CONFIG_IP_PIMSM
@@ -1066,10 +1085,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1066 val = 0x0305; 1085 val = 0x0305;
1067#ifdef CONFIG_IP_PIMSM 1086#ifdef CONFIG_IP_PIMSM
1068 else if (optname == MRT_PIM) 1087 else if (optname == MRT_PIM)
1069 val = mroute_do_pim; 1088 val = net->ipv4.mroute_do_pim;
1070#endif 1089#endif
1071 else 1090 else
1072 val = mroute_do_assert; 1091 val = net->ipv4.mroute_do_assert;
1073 if (copy_to_user(optval, &val, olr)) 1092 if (copy_to_user(optval, &val, olr))
1074 return -EFAULT; 1093 return -EFAULT;
1075 return 0; 1094 return 0;
@@ -1085,16 +1104,17 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1085 struct sioc_vif_req vr; 1104 struct sioc_vif_req vr;
1086 struct vif_device *vif; 1105 struct vif_device *vif;
1087 struct mfc_cache *c; 1106 struct mfc_cache *c;
1107 struct net *net = sock_net(sk);
1088 1108
1089 switch (cmd) { 1109 switch (cmd) {
1090 case SIOCGETVIFCNT: 1110 case SIOCGETVIFCNT:
1091 if (copy_from_user(&vr, arg, sizeof(vr))) 1111 if (copy_from_user(&vr, arg, sizeof(vr)))
1092 return -EFAULT; 1112 return -EFAULT;
1093 if (vr.vifi >= maxvif) 1113 if (vr.vifi >= net->ipv4.maxvif)
1094 return -EINVAL; 1114 return -EINVAL;
1095 read_lock(&mrt_lock); 1115 read_lock(&mrt_lock);
1096 vif=&vif_table[vr.vifi]; 1116 vif = &net->ipv4.vif_table[vr.vifi];
1097 if (VIF_EXISTS(vr.vifi)) { 1117 if (VIF_EXISTS(net, vr.vifi)) {
1098 vr.icount = vif->pkt_in; 1118 vr.icount = vif->pkt_in;
1099 vr.ocount = vif->pkt_out; 1119 vr.ocount = vif->pkt_out;
1100 vr.ibytes = vif->bytes_in; 1120 vr.ibytes = vif->bytes_in;
@@ -1112,7 +1132,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1112 return -EFAULT; 1132 return -EFAULT;
1113 1133
1114 read_lock(&mrt_lock); 1134 read_lock(&mrt_lock);
1115 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr); 1135 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1116 if (c) { 1136 if (c) {
1117 sr.pktcnt = c->mfc_un.res.pkt; 1137 sr.pktcnt = c->mfc_un.res.pkt;
1118 sr.bytecnt = c->mfc_un.res.bytes; 1138 sr.bytecnt = c->mfc_un.res.bytes;
@@ -1134,18 +1154,19 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1134static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1154static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1135{ 1155{
1136 struct net_device *dev = ptr; 1156 struct net_device *dev = ptr;
1157 struct net *net = dev_net(dev);
1137 struct vif_device *v; 1158 struct vif_device *v;
1138 int ct; 1159 int ct;
1139 1160
1140 if (!net_eq(dev_net(dev), &init_net)) 1161 if (!net_eq(dev_net(dev), net))
1141 return NOTIFY_DONE; 1162 return NOTIFY_DONE;
1142 1163
1143 if (event != NETDEV_UNREGISTER) 1164 if (event != NETDEV_UNREGISTER)
1144 return NOTIFY_DONE; 1165 return NOTIFY_DONE;
1145 v=&vif_table[0]; 1166 v = &net->ipv4.vif_table[0];
1146 for (ct=0; ct<maxvif; ct++,v++) { 1167 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1147 if (v->dev == dev) 1168 if (v->dev == dev)
1148 vif_delete(ct, 1); 1169 vif_delete(net, ct, 1);
1149 } 1170 }
1150 return NOTIFY_DONE; 1171 return NOTIFY_DONE;
1151} 1172}
@@ -1205,8 +1226,9 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
1205 1226
1206static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) 1227static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1207{ 1228{
1229 struct net *net = mfc_net(c);
1208 const struct iphdr *iph = ip_hdr(skb); 1230 const struct iphdr *iph = ip_hdr(skb);
1209 struct vif_device *vif = &vif_table[vifi]; 1231 struct vif_device *vif = &net->ipv4.vif_table[vifi];
1210 struct net_device *dev; 1232 struct net_device *dev;
1211 struct rtable *rt; 1233 struct rtable *rt;
1212 int encap = 0; 1234 int encap = 0;
@@ -1220,9 +1242,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1220 vif->bytes_out += skb->len; 1242 vif->bytes_out += skb->len;
1221 vif->dev->stats.tx_bytes += skb->len; 1243 vif->dev->stats.tx_bytes += skb->len;
1222 vif->dev->stats.tx_packets++; 1244 vif->dev->stats.tx_packets++;
1223 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); 1245 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1224 kfree_skb(skb); 1246 goto out_free;
1225 return;
1226 } 1247 }
1227#endif 1248#endif
1228 1249
@@ -1233,7 +1254,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1233 .saddr = vif->local, 1254 .saddr = vif->local,
1234 .tos = RT_TOS(iph->tos) } }, 1255 .tos = RT_TOS(iph->tos) } },
1235 .proto = IPPROTO_IPIP }; 1256 .proto = IPPROTO_IPIP };
1236 if (ip_route_output_key(&init_net, &rt, &fl)) 1257 if (ip_route_output_key(net, &rt, &fl))
1237 goto out_free; 1258 goto out_free;
1238 encap = sizeof(struct iphdr); 1259 encap = sizeof(struct iphdr);
1239 } else { 1260 } else {
@@ -1242,7 +1263,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1242 { .daddr = iph->daddr, 1263 { .daddr = iph->daddr,
1243 .tos = RT_TOS(iph->tos) } }, 1264 .tos = RT_TOS(iph->tos) } },
1244 .proto = IPPROTO_IPIP }; 1265 .proto = IPPROTO_IPIP };
1245 if (ip_route_output_key(&init_net, &rt, &fl)) 1266 if (ip_route_output_key(net, &rt, &fl))
1246 goto out_free; 1267 goto out_free;
1247 } 1268 }
1248 1269
@@ -1306,9 +1327,10 @@ out_free:
1306 1327
1307static int ipmr_find_vif(struct net_device *dev) 1328static int ipmr_find_vif(struct net_device *dev)
1308{ 1329{
1330 struct net *net = dev_net(dev);
1309 int ct; 1331 int ct;
1310 for (ct=maxvif-1; ct>=0; ct--) { 1332 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1311 if (vif_table[ct].dev == dev) 1333 if (net->ipv4.vif_table[ct].dev == dev)
1312 break; 1334 break;
1313 } 1335 }
1314 return ct; 1336 return ct;
@@ -1320,6 +1342,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1320{ 1342{
1321 int psend = -1; 1343 int psend = -1;
1322 int vif, ct; 1344 int vif, ct;
1345 struct net *net = mfc_net(cache);
1323 1346
1324 vif = cache->mfc_parent; 1347 vif = cache->mfc_parent;
1325 cache->mfc_un.res.pkt++; 1348 cache->mfc_un.res.pkt++;
@@ -1328,7 +1351,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1328 /* 1351 /*
1329 * Wrong interface: drop packet and (maybe) send PIM assert. 1352 * Wrong interface: drop packet and (maybe) send PIM assert.
1330 */ 1353 */
1331 if (vif_table[vif].dev != skb->dev) { 1354 if (net->ipv4.vif_table[vif].dev != skb->dev) {
1332 int true_vifi; 1355 int true_vifi;
1333 1356
1334 if (skb->rtable->fl.iif == 0) { 1357 if (skb->rtable->fl.iif == 0) {
@@ -1349,23 +1372,24 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1349 cache->mfc_un.res.wrong_if++; 1372 cache->mfc_un.res.wrong_if++;
1350 true_vifi = ipmr_find_vif(skb->dev); 1373 true_vifi = ipmr_find_vif(skb->dev);
1351 1374
1352 if (true_vifi >= 0 && mroute_do_assert && 1375 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1353 /* pimsm uses asserts, when switching from RPT to SPT, 1376 /* pimsm uses asserts, when switching from RPT to SPT,
1354 so that we cannot check that packet arrived on an oif. 1377 so that we cannot check that packet arrived on an oif.
1355 It is bad, but otherwise we would need to move pretty 1378 It is bad, but otherwise we would need to move pretty
1356 large chunk of pimd to kernel. Ough... --ANK 1379 large chunk of pimd to kernel. Ough... --ANK
1357 */ 1380 */
1358 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && 1381 (net->ipv4.mroute_do_pim ||
1382 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1359 time_after(jiffies, 1383 time_after(jiffies,
1360 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1384 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1361 cache->mfc_un.res.last_assert = jiffies; 1385 cache->mfc_un.res.last_assert = jiffies;
1362 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF); 1386 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1363 } 1387 }
1364 goto dont_forward; 1388 goto dont_forward;
1365 } 1389 }
1366 1390
1367 vif_table[vif].pkt_in++; 1391 net->ipv4.vif_table[vif].pkt_in++;
1368 vif_table[vif].bytes_in += skb->len; 1392 net->ipv4.vif_table[vif].bytes_in += skb->len;
1369 1393
1370 /* 1394 /*
1371 * Forward the frame 1395 * Forward the frame
@@ -1405,6 +1429,7 @@ dont_forward:
1405int ip_mr_input(struct sk_buff *skb) 1429int ip_mr_input(struct sk_buff *skb)
1406{ 1430{
1407 struct mfc_cache *cache; 1431 struct mfc_cache *cache;
1432 struct net *net = dev_net(skb->dev);
1408 int local = skb->rtable->rt_flags&RTCF_LOCAL; 1433 int local = skb->rtable->rt_flags&RTCF_LOCAL;
1409 1434
1410 /* Packet is looped back after forward, it should not be 1435 /* Packet is looped back after forward, it should not be
@@ -1425,9 +1450,9 @@ int ip_mr_input(struct sk_buff *skb)
1425 that we can forward NO IGMP messages. 1450 that we can forward NO IGMP messages.
1426 */ 1451 */
1427 read_lock(&mrt_lock); 1452 read_lock(&mrt_lock);
1428 if (mroute_socket) { 1453 if (net->ipv4.mroute_sk) {
1429 nf_reset(skb); 1454 nf_reset(skb);
1430 raw_rcv(mroute_socket, skb); 1455 raw_rcv(net->ipv4.mroute_sk, skb);
1431 read_unlock(&mrt_lock); 1456 read_unlock(&mrt_lock);
1432 return 0; 1457 return 0;
1433 } 1458 }
@@ -1436,7 +1461,7 @@ int ip_mr_input(struct sk_buff *skb)
1436 } 1461 }
1437 1462
1438 read_lock(&mrt_lock); 1463 read_lock(&mrt_lock);
1439 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1464 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1440 1465
1441 /* 1466 /*
1442 * No usable cache entry 1467 * No usable cache entry
@@ -1456,7 +1481,7 @@ int ip_mr_input(struct sk_buff *skb)
1456 1481
1457 vif = ipmr_find_vif(skb->dev); 1482 vif = ipmr_find_vif(skb->dev);
1458 if (vif >= 0) { 1483 if (vif >= 0) {
1459 int err = ipmr_cache_unresolved(vif, skb); 1484 int err = ipmr_cache_unresolved(net, vif, skb);
1460 read_unlock(&mrt_lock); 1485 read_unlock(&mrt_lock);
1461 1486
1462 return err; 1487 return err;
@@ -1487,6 +1512,7 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1487{ 1512{
1488 struct net_device *reg_dev = NULL; 1513 struct net_device *reg_dev = NULL;
1489 struct iphdr *encap; 1514 struct iphdr *encap;
1515 struct net *net = dev_net(skb->dev);
1490 1516
1491 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 1517 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1492 /* 1518 /*
@@ -1501,8 +1527,8 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1501 return 1; 1527 return 1;
1502 1528
1503 read_lock(&mrt_lock); 1529 read_lock(&mrt_lock);
1504 if (reg_vif_num >= 0) 1530 if (net->ipv4.mroute_reg_vif_num >= 0)
1505 reg_dev = vif_table[reg_vif_num].dev; 1531 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1506 if (reg_dev) 1532 if (reg_dev)
1507 dev_hold(reg_dev); 1533 dev_hold(reg_dev);
1508 read_unlock(&mrt_lock); 1534 read_unlock(&mrt_lock);
@@ -1537,13 +1563,14 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1537int pim_rcv_v1(struct sk_buff * skb) 1563int pim_rcv_v1(struct sk_buff * skb)
1538{ 1564{
1539 struct igmphdr *pim; 1565 struct igmphdr *pim;
1566 struct net *net = dev_net(skb->dev);
1540 1567
1541 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1568 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1542 goto drop; 1569 goto drop;
1543 1570
1544 pim = igmp_hdr(skb); 1571 pim = igmp_hdr(skb);
1545 1572
1546 if (!mroute_do_pim || 1573 if (!net->ipv4.mroute_do_pim ||
1547 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1574 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1548 goto drop; 1575 goto drop;
1549 1576
@@ -1583,7 +1610,8 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1583{ 1610{
1584 int ct; 1611 int ct;
1585 struct rtnexthop *nhp; 1612 struct rtnexthop *nhp;
1586 struct net_device *dev = vif_table[c->mfc_parent].dev; 1613 struct net *net = mfc_net(c);
1614 struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1587 u8 *b = skb_tail_pointer(skb); 1615 u8 *b = skb_tail_pointer(skb);
1588 struct rtattr *mp_head; 1616 struct rtattr *mp_head;
1589 1617
@@ -1599,7 +1627,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1599 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 1627 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1600 nhp->rtnh_flags = 0; 1628 nhp->rtnh_flags = 0;
1601 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 1629 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1602 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; 1630 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1603 nhp->rtnh_len = sizeof(*nhp); 1631 nhp->rtnh_len = sizeof(*nhp);
1604 } 1632 }
1605 } 1633 }
@@ -1613,14 +1641,15 @@ rtattr_failure:
1613 return -EMSGSIZE; 1641 return -EMSGSIZE;
1614} 1642}
1615 1643
1616int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) 1644int ipmr_get_route(struct net *net,
1645 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1617{ 1646{
1618 int err; 1647 int err;
1619 struct mfc_cache *cache; 1648 struct mfc_cache *cache;
1620 struct rtable *rt = skb->rtable; 1649 struct rtable *rt = skb->rtable;
1621 1650
1622 read_lock(&mrt_lock); 1651 read_lock(&mrt_lock);
1623 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); 1652 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1624 1653
1625 if (cache == NULL) { 1654 if (cache == NULL) {
1626 struct sk_buff *skb2; 1655 struct sk_buff *skb2;
@@ -1651,7 +1680,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1651 iph->saddr = rt->rt_src; 1680 iph->saddr = rt->rt_src;
1652 iph->daddr = rt->rt_dst; 1681 iph->daddr = rt->rt_dst;
1653 iph->version = 0; 1682 iph->version = 0;
1654 err = ipmr_cache_unresolved(vif, skb2); 1683 err = ipmr_cache_unresolved(net, vif, skb2);
1655 read_unlock(&mrt_lock); 1684 read_unlock(&mrt_lock);
1656 return err; 1685 return err;
1657 } 1686 }
@@ -1668,17 +1697,19 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1668 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 1697 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1669 */ 1698 */
1670struct ipmr_vif_iter { 1699struct ipmr_vif_iter {
1700 struct seq_net_private p;
1671 int ct; 1701 int ct;
1672}; 1702};
1673 1703
1674static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter, 1704static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1705 struct ipmr_vif_iter *iter,
1675 loff_t pos) 1706 loff_t pos)
1676{ 1707{
1677 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { 1708 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1678 if (!VIF_EXISTS(iter->ct)) 1709 if (!VIF_EXISTS(net, iter->ct))
1679 continue; 1710 continue;
1680 if (pos-- == 0) 1711 if (pos-- == 0)
1681 return &vif_table[iter->ct]; 1712 return &net->ipv4.vif_table[iter->ct];
1682 } 1713 }
1683 return NULL; 1714 return NULL;
1684} 1715}
@@ -1686,23 +1717,26 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1686static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 1717static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1687 __acquires(mrt_lock) 1718 __acquires(mrt_lock)
1688{ 1719{
1720 struct net *net = seq_file_net(seq);
1721
1689 read_lock(&mrt_lock); 1722 read_lock(&mrt_lock);
1690 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) 1723 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1691 : SEQ_START_TOKEN; 1724 : SEQ_START_TOKEN;
1692} 1725}
1693 1726
1694static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1727static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1695{ 1728{
1696 struct ipmr_vif_iter *iter = seq->private; 1729 struct ipmr_vif_iter *iter = seq->private;
1730 struct net *net = seq_file_net(seq);
1697 1731
1698 ++*pos; 1732 ++*pos;
1699 if (v == SEQ_START_TOKEN) 1733 if (v == SEQ_START_TOKEN)
1700 return ipmr_vif_seq_idx(iter, 0); 1734 return ipmr_vif_seq_idx(net, iter, 0);
1701 1735
1702 while (++iter->ct < maxvif) { 1736 while (++iter->ct < net->ipv4.maxvif) {
1703 if (!VIF_EXISTS(iter->ct)) 1737 if (!VIF_EXISTS(net, iter->ct))
1704 continue; 1738 continue;
1705 return &vif_table[iter->ct]; 1739 return &net->ipv4.vif_table[iter->ct];
1706 } 1740 }
1707 return NULL; 1741 return NULL;
1708} 1742}
@@ -1715,6 +1749,8 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1715 1749
1716static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 1750static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1717{ 1751{
1752 struct net *net = seq_file_net(seq);
1753
1718 if (v == SEQ_START_TOKEN) { 1754 if (v == SEQ_START_TOKEN) {
1719 seq_puts(seq, 1755 seq_puts(seq,
1720 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 1756 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
@@ -1724,7 +1760,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1724 1760
1725 seq_printf(seq, 1761 seq_printf(seq,
1726 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 1762 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1727 vif - vif_table, 1763 vif - net->ipv4.vif_table,
1728 name, vif->bytes_in, vif->pkt_in, 1764 name, vif->bytes_in, vif->pkt_in,
1729 vif->bytes_out, vif->pkt_out, 1765 vif->bytes_out, vif->pkt_out,
1730 vif->flags, vif->local, vif->remote); 1766 vif->flags, vif->local, vif->remote);
@@ -1741,8 +1777,8 @@ static const struct seq_operations ipmr_vif_seq_ops = {
1741 1777
1742static int ipmr_vif_open(struct inode *inode, struct file *file) 1778static int ipmr_vif_open(struct inode *inode, struct file *file)
1743{ 1779{
1744 return seq_open_private(file, &ipmr_vif_seq_ops, 1780 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1745 sizeof(struct ipmr_vif_iter)); 1781 sizeof(struct ipmr_vif_iter));
1746} 1782}
1747 1783
1748static const struct file_operations ipmr_vif_fops = { 1784static const struct file_operations ipmr_vif_fops = {
@@ -1750,23 +1786,26 @@ static const struct file_operations ipmr_vif_fops = {
1750 .open = ipmr_vif_open, 1786 .open = ipmr_vif_open,
1751 .read = seq_read, 1787 .read = seq_read,
1752 .llseek = seq_lseek, 1788 .llseek = seq_lseek,
1753 .release = seq_release_private, 1789 .release = seq_release_net,
1754}; 1790};
1755 1791
1756struct ipmr_mfc_iter { 1792struct ipmr_mfc_iter {
1793 struct seq_net_private p;
1757 struct mfc_cache **cache; 1794 struct mfc_cache **cache;
1758 int ct; 1795 int ct;
1759}; 1796};
1760 1797
1761 1798
1762static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) 1799static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1800 struct ipmr_mfc_iter *it, loff_t pos)
1763{ 1801{
1764 struct mfc_cache *mfc; 1802 struct mfc_cache *mfc;
1765 1803
1766 it->cache = mfc_cache_array; 1804 it->cache = net->ipv4.mfc_cache_array;
1767 read_lock(&mrt_lock); 1805 read_lock(&mrt_lock);
1768 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 1806 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1769 for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) 1807 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1808 mfc; mfc = mfc->next)
1770 if (pos-- == 0) 1809 if (pos-- == 0)
1771 return mfc; 1810 return mfc;
1772 read_unlock(&mrt_lock); 1811 read_unlock(&mrt_lock);
@@ -1774,7 +1813,8 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1774 it->cache = &mfc_unres_queue; 1813 it->cache = &mfc_unres_queue;
1775 spin_lock_bh(&mfc_unres_lock); 1814 spin_lock_bh(&mfc_unres_lock);
1776 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) 1815 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1777 if (pos-- == 0) 1816 if (net_eq(mfc_net(mfc), net) &&
1817 pos-- == 0)
1778 return mfc; 1818 return mfc;
1779 spin_unlock_bh(&mfc_unres_lock); 1819 spin_unlock_bh(&mfc_unres_lock);
1780 1820
@@ -1786,9 +1826,11 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1786static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 1826static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1787{ 1827{
1788 struct ipmr_mfc_iter *it = seq->private; 1828 struct ipmr_mfc_iter *it = seq->private;
1829 struct net *net = seq_file_net(seq);
1830
1789 it->cache = NULL; 1831 it->cache = NULL;
1790 it->ct = 0; 1832 it->ct = 0;
1791 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) 1833 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1792 : SEQ_START_TOKEN; 1834 : SEQ_START_TOKEN;
1793} 1835}
1794 1836
@@ -1796,11 +1838,12 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1796{ 1838{
1797 struct mfc_cache *mfc = v; 1839 struct mfc_cache *mfc = v;
1798 struct ipmr_mfc_iter *it = seq->private; 1840 struct ipmr_mfc_iter *it = seq->private;
1841 struct net *net = seq_file_net(seq);
1799 1842
1800 ++*pos; 1843 ++*pos;
1801 1844
1802 if (v == SEQ_START_TOKEN) 1845 if (v == SEQ_START_TOKEN)
1803 return ipmr_mfc_seq_idx(seq->private, 0); 1846 return ipmr_mfc_seq_idx(net, seq->private, 0);
1804 1847
1805 if (mfc->next) 1848 if (mfc->next)
1806 return mfc->next; 1849 return mfc->next;
@@ -1808,10 +1851,10 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1808 if (it->cache == &mfc_unres_queue) 1851 if (it->cache == &mfc_unres_queue)
1809 goto end_of_list; 1852 goto end_of_list;
1810 1853
1811 BUG_ON(it->cache != mfc_cache_array); 1854 BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1812 1855
1813 while (++it->ct < MFC_LINES) { 1856 while (++it->ct < MFC_LINES) {
1814 mfc = mfc_cache_array[it->ct]; 1857 mfc = net->ipv4.mfc_cache_array[it->ct];
1815 if (mfc) 1858 if (mfc)
1816 return mfc; 1859 return mfc;
1817 } 1860 }
@@ -1823,6 +1866,8 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1823 1866
1824 spin_lock_bh(&mfc_unres_lock); 1867 spin_lock_bh(&mfc_unres_lock);
1825 mfc = mfc_unres_queue; 1868 mfc = mfc_unres_queue;
1869 while (mfc && !net_eq(mfc_net(mfc), net))
1870 mfc = mfc->next;
1826 if (mfc) 1871 if (mfc)
1827 return mfc; 1872 return mfc;
1828 1873
@@ -1836,16 +1881,18 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1836static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 1881static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1837{ 1882{
1838 struct ipmr_mfc_iter *it = seq->private; 1883 struct ipmr_mfc_iter *it = seq->private;
1884 struct net *net = seq_file_net(seq);
1839 1885
1840 if (it->cache == &mfc_unres_queue) 1886 if (it->cache == &mfc_unres_queue)
1841 spin_unlock_bh(&mfc_unres_lock); 1887 spin_unlock_bh(&mfc_unres_lock);
1842 else if (it->cache == mfc_cache_array) 1888 else if (it->cache == net->ipv4.mfc_cache_array)
1843 read_unlock(&mrt_lock); 1889 read_unlock(&mrt_lock);
1844} 1890}
1845 1891
1846static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 1892static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1847{ 1893{
1848 int n; 1894 int n;
1895 struct net *net = seq_file_net(seq);
1849 1896
1850 if (v == SEQ_START_TOKEN) { 1897 if (v == SEQ_START_TOKEN) {
1851 seq_puts(seq, 1898 seq_puts(seq,
@@ -1866,9 +1913,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1866 mfc->mfc_un.res.wrong_if); 1913 mfc->mfc_un.res.wrong_if);
1867 for (n = mfc->mfc_un.res.minvif; 1914 for (n = mfc->mfc_un.res.minvif;
1868 n < mfc->mfc_un.res.maxvif; n++ ) { 1915 n < mfc->mfc_un.res.maxvif; n++ ) {
1869 if (VIF_EXISTS(n) 1916 if (VIF_EXISTS(net, n) &&
1870 && mfc->mfc_un.res.ttls[n] < 255) 1917 mfc->mfc_un.res.ttls[n] < 255)
1871 seq_printf(seq, 1918 seq_printf(seq,
1872 " %2d:%-3d", 1919 " %2d:%-3d",
1873 n, mfc->mfc_un.res.ttls[n]); 1920 n, mfc->mfc_un.res.ttls[n]);
1874 } 1921 }
@@ -1892,8 +1939,8 @@ static const struct seq_operations ipmr_mfc_seq_ops = {
1892 1939
1893static int ipmr_mfc_open(struct inode *inode, struct file *file) 1940static int ipmr_mfc_open(struct inode *inode, struct file *file)
1894{ 1941{
1895 return seq_open_private(file, &ipmr_mfc_seq_ops, 1942 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1896 sizeof(struct ipmr_mfc_iter)); 1943 sizeof(struct ipmr_mfc_iter));
1897} 1944}
1898 1945
1899static const struct file_operations ipmr_mfc_fops = { 1946static const struct file_operations ipmr_mfc_fops = {
@@ -1901,7 +1948,7 @@ static const struct file_operations ipmr_mfc_fops = {
1901 .open = ipmr_mfc_open, 1948 .open = ipmr_mfc_open,
1902 .read = seq_read, 1949 .read = seq_read,
1903 .llseek = seq_lseek, 1950 .llseek = seq_lseek,
1904 .release = seq_release_private, 1951 .release = seq_release_net,
1905}; 1952};
1906#endif 1953#endif
1907 1954
@@ -1915,6 +1962,65 @@ static struct net_protocol pim_protocol = {
1915/* 1962/*
1916 * Setup for IP multicast routing 1963 * Setup for IP multicast routing
1917 */ 1964 */
1965static int __net_init ipmr_net_init(struct net *net)
1966{
1967 int err = 0;
1968
1969 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1970 GFP_KERNEL);
1971 if (!net->ipv4.vif_table) {
1972 err = -ENOMEM;
1973 goto fail;
1974 }
1975
1976 /* Forwarding cache */
1977 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1978 sizeof(struct mfc_cache *),
1979 GFP_KERNEL);
1980 if (!net->ipv4.mfc_cache_array) {
1981 err = -ENOMEM;
1982 goto fail_mfc_cache;
1983 }
1984
1985#ifdef CONFIG_IP_PIMSM
1986 net->ipv4.mroute_reg_vif_num = -1;
1987#endif
1988
1989#ifdef CONFIG_PROC_FS
1990 err = -ENOMEM;
1991 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1992 goto proc_vif_fail;
1993 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1994 goto proc_cache_fail;
1995#endif
1996 return 0;
1997
1998#ifdef CONFIG_PROC_FS
1999proc_cache_fail:
2000 proc_net_remove(net, "ip_mr_vif");
2001proc_vif_fail:
2002 kfree(net->ipv4.mfc_cache_array);
2003#endif
2004fail_mfc_cache:
2005 kfree(net->ipv4.vif_table);
2006fail:
2007 return err;
2008}
2009
2010static void __net_exit ipmr_net_exit(struct net *net)
2011{
2012#ifdef CONFIG_PROC_FS
2013 proc_net_remove(net, "ip_mr_cache");
2014 proc_net_remove(net, "ip_mr_vif");
2015#endif
2016 kfree(net->ipv4.mfc_cache_array);
2017 kfree(net->ipv4.vif_table);
2018}
2019
2020static struct pernet_operations ipmr_net_ops = {
2021 .init = ipmr_net_init,
2022 .exit = ipmr_net_exit,
2023};
1918 2024
1919int __init ip_mr_init(void) 2025int __init ip_mr_init(void)
1920{ 2026{
@@ -1927,26 +2033,20 @@ int __init ip_mr_init(void)
1927 if (!mrt_cachep) 2033 if (!mrt_cachep)
1928 return -ENOMEM; 2034 return -ENOMEM;
1929 2035
2036 err = register_pernet_subsys(&ipmr_net_ops);
2037 if (err)
2038 goto reg_pernet_fail;
2039
1930 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); 2040 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1931 err = register_netdevice_notifier(&ip_mr_notifier); 2041 err = register_netdevice_notifier(&ip_mr_notifier);
1932 if (err) 2042 if (err)
1933 goto reg_notif_fail; 2043 goto reg_notif_fail;
1934#ifdef CONFIG_PROC_FS
1935 err = -ENOMEM;
1936 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1937 goto proc_vif_fail;
1938 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1939 goto proc_cache_fail;
1940#endif
1941 return 0; 2044 return 0;
1942#ifdef CONFIG_PROC_FS 2045
1943proc_cache_fail:
1944 proc_net_remove(&init_net, "ip_mr_vif");
1945proc_vif_fail:
1946 unregister_netdevice_notifier(&ip_mr_notifier);
1947#endif
1948reg_notif_fail: 2046reg_notif_fail:
1949 del_timer(&ipmr_expire_timer); 2047 del_timer(&ipmr_expire_timer);
2048 unregister_pernet_subsys(&ipmr_net_ops);
2049reg_pernet_fail:
1950 kmem_cache_destroy(mrt_cachep); 2050 kmem_cache_destroy(mrt_cachep);
1951 return err; 2051 return err;
1952} 2052}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 182f845de92f..d9521f6f9ed0 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1292,7 +1292,7 @@ static struct nf_conntrack_helper snmp_helper __read_mostly = {
1292 .expect_policy = &snmp_exp_policy, 1292 .expect_policy = &snmp_exp_policy,
1293 .name = "snmp", 1293 .name = "snmp",
1294 .tuple.src.l3num = AF_INET, 1294 .tuple.src.l3num = AF_INET,
1295 .tuple.src.u.udp.port = __constant_htons(SNMP_PORT), 1295 .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
1296 .tuple.dst.protonum = IPPROTO_UDP, 1296 .tuple.dst.protonum = IPPROTO_UDP,
1297}; 1297};
1298 1298
@@ -1302,7 +1302,7 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
1302 .expect_policy = &snmp_exp_policy, 1302 .expect_policy = &snmp_exp_policy,
1303 .name = "snmp_trap", 1303 .name = "snmp_trap",
1304 .tuple.src.l3num = AF_INET, 1304 .tuple.src.l3num = AF_INET,
1305 .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT), 1305 .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT),
1306 .tuple.dst.protonum = IPPROTO_UDP, 1306 .tuple.dst.protonum = IPPROTO_UDP,
1307}; 1307};
1308 1308
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index eb62e58bff79..cf0cdeeb1db0 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -54,8 +54,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
54 int orphans, sockets; 54 int orphans, sockets;
55 55
56 local_bh_disable(); 56 local_bh_disable();
57 orphans = percpu_counter_sum_positive(&tcp_orphan_count), 57 orphans = percpu_counter_sum_positive(&tcp_orphan_count);
58 sockets = percpu_counter_sum_positive(&tcp_sockets_allocated), 58 sockets = percpu_counter_sum_positive(&tcp_sockets_allocated);
59 local_bh_enable(); 59 local_bh_enable();
60 60
61 socket_seq_show(seq); 61 socket_seq_show(seq);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index dff8bc4e0fac..f774651f0a47 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -493,6 +493,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
493 493
494 ipc.addr = inet->saddr; 494 ipc.addr = inet->saddr;
495 ipc.opt = NULL; 495 ipc.opt = NULL;
496 ipc.shtx.flags = 0;
496 ipc.oif = sk->sk_bound_dev_if; 497 ipc.oif = sk->sk_bound_dev_if;
497 498
498 if (msg->msg_controllen) { 499 if (msg->msg_controllen) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 97f71153584f..5caee609be06 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -151,7 +151,7 @@ static void rt_emergency_hash_rebuild(struct net *net);
151 151
152static struct dst_ops ipv4_dst_ops = { 152static struct dst_ops ipv4_dst_ops = {
153 .family = AF_INET, 153 .family = AF_INET,
154 .protocol = __constant_htons(ETH_P_IP), 154 .protocol = cpu_to_be16(ETH_P_IP),
155 .gc = rt_garbage_collect, 155 .gc = rt_garbage_collect,
156 .check = ipv4_dst_check, 156 .check = ipv4_dst_check,
157 .destroy = ipv4_dst_destroy, 157 .destroy = ipv4_dst_destroy,
@@ -2696,7 +2696,7 @@ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
2696 2696
2697static struct dst_ops ipv4_dst_blackhole_ops = { 2697static struct dst_ops ipv4_dst_blackhole_ops = {
2698 .family = AF_INET, 2698 .family = AF_INET,
2699 .protocol = __constant_htons(ETH_P_IP), 2699 .protocol = cpu_to_be16(ETH_P_IP),
2700 .destroy = ipv4_dst_destroy, 2700 .destroy = ipv4_dst_destroy,
2701 .check = ipv4_dst_check, 2701 .check = ipv4_dst_check,
2702 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2702 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
@@ -2779,7 +2779,8 @@ int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2779 return ip_route_output_flow(net, rp, flp, NULL, 0); 2779 return ip_route_output_flow(net, rp, flp, NULL, 0);
2780} 2780}
2781 2781
2782static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 2782static int rt_fill_info(struct net *net,
2783 struct sk_buff *skb, u32 pid, u32 seq, int event,
2783 int nowait, unsigned int flags) 2784 int nowait, unsigned int flags)
2784{ 2785{
2785 struct rtable *rt = skb->rtable; 2786 struct rtable *rt = skb->rtable;
@@ -2844,8 +2845,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2844 __be32 dst = rt->rt_dst; 2845 __be32 dst = rt->rt_dst;
2845 2846
2846 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && 2847 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2847 IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) { 2848 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2848 int err = ipmr_get_route(skb, r, nowait); 2849 int err = ipmr_get_route(net, skb, r, nowait);
2849 if (err <= 0) { 2850 if (err <= 0) {
2850 if (!nowait) { 2851 if (!nowait) {
2851 if (err == 0) 2852 if (err == 0)
@@ -2950,7 +2951,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2950 if (rtm->rtm_flags & RTM_F_NOTIFY) 2951 if (rtm->rtm_flags & RTM_F_NOTIFY)
2951 rt->rt_flags |= RTCF_NOTIFY; 2952 rt->rt_flags |= RTCF_NOTIFY;
2952 2953
2953 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2954 err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2954 RTM_NEWROUTE, 0, 0); 2955 RTM_NEWROUTE, 0, 0);
2955 if (err <= 0) 2956 if (err <= 0)
2956 goto errout_free; 2957 goto errout_free;
@@ -2988,7 +2989,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2988 if (rt_is_expired(rt)) 2989 if (rt_is_expired(rt))
2989 continue; 2990 continue;
2990 skb->dst = dst_clone(&rt->u.dst); 2991 skb->dst = dst_clone(&rt->u.dst);
2991 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 2992 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
2992 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 2993 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2993 1, NLM_F_MULTI) <= 0) { 2994 1, NLM_F_MULTI) <= 0) {
2994 dst_release(xchg(&skb->dst, NULL)); 2995 dst_release(xchg(&skb->dst, NULL));
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 76b148bcb0dc..d3f9beee74c0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -683,7 +683,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
683 683
684 err = -EPIPE; 684 err = -EPIPE;
685 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 685 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
686 goto do_error; 686 goto out_err;
687 687
688 while (psize > 0) { 688 while (psize > 0) {
689 struct sk_buff *skb = tcp_write_queue_tail(sk); 689 struct sk_buff *skb = tcp_write_queue_tail(sk);
@@ -854,7 +854,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
854 854
855 err = -EPIPE; 855 err = -EPIPE;
856 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 856 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
857 goto do_error; 857 goto out_err;
858 858
859 while (--iovlen >= 0) { 859 while (--iovlen >= 0) {
860 int seglen = iov->iov_len; 860 int seglen = iov->iov_len;
@@ -2478,23 +2478,23 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2478 struct tcphdr *th2; 2478 struct tcphdr *th2;
2479 unsigned int thlen; 2479 unsigned int thlen;
2480 unsigned int flags; 2480 unsigned int flags;
2481 unsigned int total;
2482 unsigned int mss = 1; 2481 unsigned int mss = 1;
2483 int flush = 1; 2482 int flush = 1;
2483 int i;
2484 2484
2485 if (!pskb_may_pull(skb, sizeof(*th))) 2485 th = skb_gro_header(skb, sizeof(*th));
2486 if (unlikely(!th))
2486 goto out; 2487 goto out;
2487 2488
2488 th = tcp_hdr(skb);
2489 thlen = th->doff * 4; 2489 thlen = th->doff * 4;
2490 if (thlen < sizeof(*th)) 2490 if (thlen < sizeof(*th))
2491 goto out; 2491 goto out;
2492 2492
2493 if (!pskb_may_pull(skb, thlen)) 2493 th = skb_gro_header(skb, thlen);
2494 if (unlikely(!th))
2494 goto out; 2495 goto out;
2495 2496
2496 th = tcp_hdr(skb); 2497 skb_gro_pull(skb, thlen);
2497 __skb_pull(skb, thlen);
2498 2498
2499 flags = tcp_flag_word(th); 2499 flags = tcp_flag_word(th);
2500 2500
@@ -2504,7 +2504,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2504 2504
2505 th2 = tcp_hdr(p); 2505 th2 = tcp_hdr(p);
2506 2506
2507 if (th->source != th2->source || th->dest != th2->dest) { 2507 if ((th->source ^ th2->source) | (th->dest ^ th2->dest)) {
2508 NAPI_GRO_CB(p)->same_flow = 0; 2508 NAPI_GRO_CB(p)->same_flow = 0;
2509 continue; 2509 continue;
2510 } 2510 }
@@ -2519,14 +2519,15 @@ found:
2519 flush |= flags & TCP_FLAG_CWR; 2519 flush |= flags & TCP_FLAG_CWR;
2520 flush |= (flags ^ tcp_flag_word(th2)) & 2520 flush |= (flags ^ tcp_flag_word(th2)) &
2521 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); 2521 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH);
2522 flush |= th->ack_seq != th2->ack_seq || th->window != th2->window; 2522 flush |= (th->ack_seq ^ th2->ack_seq) | (th->window ^ th2->window);
2523 flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th)); 2523 for (i = sizeof(*th); !flush && i < thlen; i += 4)
2524 flush |= *(u32 *)((u8 *)th + i) ^
2525 *(u32 *)((u8 *)th2 + i);
2524 2526
2525 total = p->len;
2526 mss = skb_shinfo(p)->gso_size; 2527 mss = skb_shinfo(p)->gso_size;
2527 2528
2528 flush |= skb->len > mss || skb->len <= 0; 2529 flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb);
2529 flush |= ntohl(th2->seq) + total != ntohl(th->seq); 2530 flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
2530 2531
2531 if (flush || skb_gro_receive(head, skb)) { 2532 if (flush || skb_gro_receive(head, skb)) {
2532 mss = 1; 2533 mss = 1;
@@ -2538,7 +2539,7 @@ found:
2538 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); 2539 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
2539 2540
2540out_check_final: 2541out_check_final:
2541 flush = skb->len < mss; 2542 flush = skb_gro_len(skb) < mss;
2542 flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | 2543 flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST |
2543 TCP_FLAG_SYN | TCP_FLAG_FIN); 2544 TCP_FLAG_SYN | TCP_FLAG_FIN);
2544 2545
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 7eb7636db0d0..3b53fd1af23f 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -149,16 +149,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
149 tcp_slow_start(tp); 149 tcp_slow_start(tp);
150 else { 150 else {
151 bictcp_update(ca, tp->snd_cwnd); 151 bictcp_update(ca, tp->snd_cwnd);
152 152 tcp_cong_avoid_ai(tp, ca->cnt);
153 /* In dangerous area, increase slowly.
154 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
155 */
156 if (tp->snd_cwnd_cnt >= ca->cnt) {
157 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
158 tp->snd_cwnd++;
159 tp->snd_cwnd_cnt = 0;
160 } else
161 tp->snd_cwnd_cnt++;
162 } 153 }
163 154
164} 155}
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 4ec5b4e97c4e..e92beb9e55e0 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -336,6 +336,19 @@ void tcp_slow_start(struct tcp_sock *tp)
336} 336}
337EXPORT_SYMBOL_GPL(tcp_slow_start); 337EXPORT_SYMBOL_GPL(tcp_slow_start);
338 338
339/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */
340void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w)
341{
342 if (tp->snd_cwnd_cnt >= w) {
343 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
344 tp->snd_cwnd++;
345 tp->snd_cwnd_cnt = 0;
346 } else {
347 tp->snd_cwnd_cnt++;
348 }
349}
350EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
351
339/* 352/*
340 * TCP Reno congestion control 353 * TCP Reno congestion control
341 * This is special case used for fallback as well. 354 * This is special case used for fallback as well.
@@ -365,13 +378,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
365 tp->snd_cwnd++; 378 tp->snd_cwnd++;
366 } 379 }
367 } else { 380 } else {
368 /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ 381 tcp_cong_avoid_ai(tp, tp->snd_cwnd);
369 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
370 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
371 tp->snd_cwnd++;
372 tp->snd_cwnd_cnt = 0;
373 } else
374 tp->snd_cwnd_cnt++;
375 } 382 }
376} 383}
377EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); 384EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index ee467ec40c4f..71d5f2f29fa6 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -294,16 +294,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
294 tcp_slow_start(tp); 294 tcp_slow_start(tp);
295 } else { 295 } else {
296 bictcp_update(ca, tp->snd_cwnd); 296 bictcp_update(ca, tp->snd_cwnd);
297 297 tcp_cong_avoid_ai(tp, ca->cnt);
298 /* In dangerous area, increase slowly.
299 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
300 */
301 if (tp->snd_cwnd_cnt >= ca->cnt) {
302 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
303 tp->snd_cwnd++;
304 tp->snd_cwnd_cnt = 0;
305 } else
306 tp->snd_cwnd_cnt++;
307 } 298 }
308 299
309} 300}
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 937549b8a921..26d5c7fc7de5 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -115,8 +115,7 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt
115 return; 115 return;
116 116
117 /* achieved throughput calculations */ 117 /* achieved throughput calculations */
118 if (icsk->icsk_ca_state != TCP_CA_Open && 118 if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_Disorder))) {
119 icsk->icsk_ca_state != TCP_CA_Disorder) {
120 ca->packetcount = 0; 119 ca->packetcount = 0;
121 ca->lasttime = now; 120 ca->lasttime = now;
122 return; 121 return;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c28976a7e596..5ecd7aa25979 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1178,10 +1178,18 @@ static void tcp_mark_lost_retrans(struct sock *sk)
1178 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) 1178 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1179 continue; 1179 continue;
1180 1180
1181 if (after(received_upto, ack_seq) && 1181 /* TODO: We would like to get rid of tcp_is_fack(tp) only
1182 (tcp_is_fack(tp) || 1182 * constraint here (see above) but figuring out that at
1183 !before(received_upto, 1183 * least tp->reordering SACK blocks reside between ack_seq
1184 ack_seq + tp->reordering * tp->mss_cache))) { 1184 * and received_upto is not easy task to do cheaply with
1185 * the available datastructures.
1186 *
1187 * Whether FACK should check here for tp->reordering segs
1188 * in-between one could argue for either way (it would be
1189 * rather simple to implement as we could count fack_count
1190 * during the walk and do tp->fackets_out - fack_count).
1191 */
1192 if (after(received_upto, ack_seq)) {
1185 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1193 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1186 tp->retrans_out -= tcp_skb_pcount(skb); 1194 tp->retrans_out -= tcp_skb_pcount(skb);
1187 1195
@@ -2453,6 +2461,44 @@ static int tcp_time_to_recover(struct sock *sk)
2453 return 0; 2461 return 0;
2454} 2462}
2455 2463
2464/* New heuristics: it is possible only after we switched to restart timer
2465 * each time when something is ACKed. Hence, we can detect timed out packets
2466 * during fast retransmit without falling to slow start.
2467 *
2468 * Usefulness of this as is very questionable, since we should know which of
2469 * the segments is the next to timeout which is relatively expensive to find
2470 * in general case unless we add some data structure just for that. The
2471 * current approach certainly won't find the right one too often and when it
2472 * finally does find _something_ it usually marks large part of the window
2473 * right away (because a retransmission with a larger timestamp blocks the
2474 * loop from advancing). -ij
2475 */
2476static void tcp_timeout_skbs(struct sock *sk)
2477{
2478 struct tcp_sock *tp = tcp_sk(sk);
2479 struct sk_buff *skb;
2480
2481 if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
2482 return;
2483
2484 skb = tp->scoreboard_skb_hint;
2485 if (tp->scoreboard_skb_hint == NULL)
2486 skb = tcp_write_queue_head(sk);
2487
2488 tcp_for_write_queue_from(skb, sk) {
2489 if (skb == tcp_send_head(sk))
2490 break;
2491 if (!tcp_skb_timedout(sk, skb))
2492 break;
2493
2494 tcp_skb_mark_lost(tp, skb);
2495 }
2496
2497 tp->scoreboard_skb_hint = skb;
2498
2499 tcp_verify_left_out(tp);
2500}
2501
2456/* Mark head of queue up as lost. With RFC3517 SACK, the packets is 2502/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
2457 * is against sacked "cnt", otherwise it's against facked "cnt" 2503 * is against sacked "cnt", otherwise it's against facked "cnt"
2458 */ 2504 */
@@ -2525,30 +2571,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2525 tcp_mark_head_lost(sk, sacked_upto); 2571 tcp_mark_head_lost(sk, sacked_upto);
2526 } 2572 }
2527 2573
2528 /* New heuristics: it is possible only after we switched 2574 tcp_timeout_skbs(sk);
2529 * to restart timer each time when something is ACKed.
2530 * Hence, we can detect timed out packets during fast
2531 * retransmit without falling to slow start.
2532 */
2533 if (tcp_is_fack(tp) && tcp_head_timedout(sk)) {
2534 struct sk_buff *skb;
2535
2536 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
2537 : tcp_write_queue_head(sk);
2538
2539 tcp_for_write_queue_from(skb, sk) {
2540 if (skb == tcp_send_head(sk))
2541 break;
2542 if (!tcp_skb_timedout(sk, skb))
2543 break;
2544
2545 tcp_skb_mark_lost(tp, skb);
2546 }
2547
2548 tp->scoreboard_skb_hint = skb;
2549
2550 tcp_verify_left_out(tp);
2551 }
2552} 2575}
2553 2576
2554/* CWND moderation, preventing bursts due to too big ACKs 2577/* CWND moderation, preventing bursts due to too big ACKs
@@ -3273,18 +3296,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3273 if (tcp_is_reno(tp)) { 3296 if (tcp_is_reno(tp)) {
3274 tcp_remove_reno_sacks(sk, pkts_acked); 3297 tcp_remove_reno_sacks(sk, pkts_acked);
3275 } else { 3298 } else {
3299 int delta;
3300
3276 /* Non-retransmitted hole got filled? That's reordering */ 3301 /* Non-retransmitted hole got filled? That's reordering */
3277 if (reord < prior_fackets) 3302 if (reord < prior_fackets)
3278 tcp_update_reordering(sk, tp->fackets_out - reord, 0); 3303 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3279 3304
3280 /* No need to care for underflows here because 3305 delta = tcp_is_fack(tp) ? pkts_acked :
3281 * the lost_skb_hint gets NULLed if we're past it 3306 prior_sacked - tp->sacked_out;
3282 * (or something non-trivial happened) 3307 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3283 */
3284 if (tcp_is_fack(tp))
3285 tp->lost_cnt_hint -= pkts_acked;
3286 else
3287 tp->lost_cnt_hint -= prior_sacked - tp->sacked_out;
3288 } 3308 }
3289 3309
3290 tp->fackets_out -= min(pkts_acked, tp->fackets_out); 3310 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
@@ -3396,7 +3416,7 @@ static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
3396 3416
3397 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { 3417 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3398 flag |= FLAG_WIN_UPDATE; 3418 flag |= FLAG_WIN_UPDATE;
3399 tcp_update_wl(tp, ack, ack_seq); 3419 tcp_update_wl(tp, ack_seq);
3400 3420
3401 if (tp->snd_wnd != nwin) { 3421 if (tp->snd_wnd != nwin) {
3402 tp->snd_wnd = nwin; 3422 tp->snd_wnd = nwin;
@@ -3601,7 +3621,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3601 * No more checks are required. 3621 * No more checks are required.
3602 * Note, we use the fact that SND.UNA>=SND.WL2. 3622 * Note, we use the fact that SND.UNA>=SND.WL2.
3603 */ 3623 */
3604 tcp_update_wl(tp, ack, ack_seq); 3624 tcp_update_wl(tp, ack_seq);
3605 tp->snd_una = ack; 3625 tp->snd_una = ack;
3606 flag |= FLAG_WIN_UPDATE; 3626 flag |= FLAG_WIN_UPDATE;
3607 3627
@@ -4079,7 +4099,6 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4079 tp->rx_opt.dsack = 1; 4099 tp->rx_opt.dsack = 1;
4080 tp->duplicate_sack[0].start_seq = seq; 4100 tp->duplicate_sack[0].start_seq = seq;
4081 tp->duplicate_sack[0].end_seq = end_seq; 4101 tp->duplicate_sack[0].end_seq = end_seq;
4082 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1;
4083 } 4102 }
4084} 4103}
4085 4104
@@ -4134,8 +4153,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4134 * Decrease num_sacks. 4153 * Decrease num_sacks.
4135 */ 4154 */
4136 tp->rx_opt.num_sacks--; 4155 tp->rx_opt.num_sacks--;
4137 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
4138 tp->rx_opt.dsack;
4139 for (i = this_sack; i < tp->rx_opt.num_sacks; i++) 4156 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4140 sp[i] = sp[i + 1]; 4157 sp[i] = sp[i + 1];
4141 continue; 4158 continue;
@@ -4198,7 +4215,6 @@ new_sack:
4198 sp->start_seq = seq; 4215 sp->start_seq = seq;
4199 sp->end_seq = end_seq; 4216 sp->end_seq = end_seq;
4200 tp->rx_opt.num_sacks++; 4217 tp->rx_opt.num_sacks++;
4201 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
4202} 4218}
4203 4219
4204/* RCV.NXT advances, some SACKs should be eaten. */ 4220/* RCV.NXT advances, some SACKs should be eaten. */
@@ -4212,7 +4228,6 @@ static void tcp_sack_remove(struct tcp_sock *tp)
4212 /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ 4228 /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
4213 if (skb_queue_empty(&tp->out_of_order_queue)) { 4229 if (skb_queue_empty(&tp->out_of_order_queue)) {
4214 tp->rx_opt.num_sacks = 0; 4230 tp->rx_opt.num_sacks = 0;
4215 tp->rx_opt.eff_sacks = tp->rx_opt.dsack;
4216 return; 4231 return;
4217 } 4232 }
4218 4233
@@ -4233,11 +4248,8 @@ static void tcp_sack_remove(struct tcp_sock *tp)
4233 this_sack++; 4248 this_sack++;
4234 sp++; 4249 sp++;
4235 } 4250 }
4236 if (num_sacks != tp->rx_opt.num_sacks) { 4251 if (num_sacks != tp->rx_opt.num_sacks)
4237 tp->rx_opt.num_sacks = num_sacks; 4252 tp->rx_opt.num_sacks = num_sacks;
4238 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
4239 tp->rx_opt.dsack;
4240 }
4241} 4253}
4242 4254
4243/* This one checks to see if we can put data from the 4255/* This one checks to see if we can put data from the
@@ -4313,10 +4325,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4313 4325
4314 TCP_ECN_accept_cwr(tp, skb); 4326 TCP_ECN_accept_cwr(tp, skb);
4315 4327
4316 if (tp->rx_opt.dsack) { 4328 if (tp->rx_opt.dsack)
4317 tp->rx_opt.dsack = 0; 4329 tp->rx_opt.dsack = 0;
4318 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
4319 }
4320 4330
4321 /* Queue data for delivery to the user. 4331 /* Queue data for delivery to the user.
4322 * Packets in sequence go to the receive queue. 4332 * Packets in sequence go to the receive queue.
@@ -4436,7 +4446,6 @@ drop:
4436 if (tcp_is_sack(tp)) { 4446 if (tcp_is_sack(tp)) {
4437 tp->rx_opt.num_sacks = 1; 4447 tp->rx_opt.num_sacks = 1;
4438 tp->rx_opt.dsack = 0; 4448 tp->rx_opt.dsack = 0;
4439 tp->rx_opt.eff_sacks = 1;
4440 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; 4449 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4441 tp->selective_acks[0].end_seq = 4450 tp->selective_acks[0].end_seq =
4442 TCP_SKB_CB(skb)->end_seq; 4451 TCP_SKB_CB(skb)->end_seq;
@@ -5409,7 +5418,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5409 * never scaled. 5418 * never scaled.
5410 */ 5419 */
5411 tp->snd_wnd = ntohs(th->window); 5420 tp->snd_wnd = ntohs(th->window);
5412 tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq); 5421 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5413 5422
5414 if (!tp->rx_opt.wscale_ok) { 5423 if (!tp->rx_opt.wscale_ok) {
5415 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; 5424 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
@@ -5670,8 +5679,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5670 tp->snd_una = TCP_SKB_CB(skb)->ack_seq; 5679 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5671 tp->snd_wnd = ntohs(th->window) << 5680 tp->snd_wnd = ntohs(th->window) <<
5672 tp->rx_opt.snd_wscale; 5681 tp->rx_opt.snd_wscale;
5673 tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, 5682 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5674 TCP_SKB_CB(skb)->seq);
5675 5683
5676 /* tcp_ack considers this ACK as duplicate 5684 /* tcp_ack considers this ACK as duplicate
5677 * and does not calculate rtt. 5685 * and does not calculate rtt.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cf74c416831a..a7381205bbfc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2355,7 +2355,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2355 2355
2356 switch (skb->ip_summed) { 2356 switch (skb->ip_summed) {
2357 case CHECKSUM_COMPLETE: 2357 case CHECKSUM_COMPLETE:
2358 if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr, 2358 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2359 skb->csum)) { 2359 skb->csum)) {
2360 skb->ip_summed = CHECKSUM_UNNECESSARY; 2360 skb->ip_summed = CHECKSUM_UNNECESSARY;
2361 break; 2361 break;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f67effbb102b..4b0df3e6b609 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -399,7 +399,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
399 399
400 tcp_prequeue_init(newtp); 400 tcp_prequeue_init(newtp);
401 401
402 tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn); 402 tcp_init_wl(newtp, treq->rcv_isn);
403 403
404 newtp->srtt = 0; 404 newtp->srtt = 0;
405 newtp->mdev = TCP_TIMEOUT_INIT; 405 newtp->mdev = TCP_TIMEOUT_INIT;
@@ -434,9 +434,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
434 newtp->rx_opt.saw_tstamp = 0; 434 newtp->rx_opt.saw_tstamp = 0;
435 435
436 newtp->rx_opt.dsack = 0; 436 newtp->rx_opt.dsack = 0;
437 newtp->rx_opt.eff_sacks = 0;
438
439 newtp->rx_opt.num_sacks = 0; 437 newtp->rx_opt.num_sacks = 0;
438
440 newtp->urg_data = 0; 439 newtp->urg_data = 0;
441 440
442 if (sock_flag(newsk, SOCK_KEEPOPEN)) 441 if (sock_flag(newsk, SOCK_KEEPOPEN))
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index da2c3b8794f2..eb285befdf3b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -441,10 +441,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
441 *ptr++ = htonl(sp[this_sack].end_seq); 441 *ptr++ = htonl(sp[this_sack].end_seq);
442 } 442 }
443 443
444 if (tp->rx_opt.dsack) { 444 if (tp->rx_opt.dsack)
445 tp->rx_opt.dsack = 0; 445 tp->rx_opt.dsack = 0;
446 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
447 }
448 } 446 }
449} 447}
450 448
@@ -550,6 +548,7 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
550 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; 548 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
551 struct tcp_sock *tp = tcp_sk(sk); 549 struct tcp_sock *tp = tcp_sk(sk);
552 unsigned size = 0; 550 unsigned size = 0;
551 unsigned int eff_sacks;
553 552
554#ifdef CONFIG_TCP_MD5SIG 553#ifdef CONFIG_TCP_MD5SIG
555 *md5 = tp->af_specific->md5_lookup(sk, sk); 554 *md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -568,10 +567,11 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
568 size += TCPOLEN_TSTAMP_ALIGNED; 567 size += TCPOLEN_TSTAMP_ALIGNED;
569 } 568 }
570 569
571 if (unlikely(tp->rx_opt.eff_sacks)) { 570 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
571 if (unlikely(eff_sacks)) {
572 const unsigned remaining = MAX_TCP_OPTION_SPACE - size; 572 const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
573 opts->num_sack_blocks = 573 opts->num_sack_blocks =
574 min_t(unsigned, tp->rx_opt.eff_sacks, 574 min_t(unsigned, eff_sacks,
575 (remaining - TCPOLEN_SACK_BASE_ALIGNED) / 575 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
576 TCPOLEN_SACK_PERBLOCK); 576 TCPOLEN_SACK_PERBLOCK);
577 size += TCPOLEN_SACK_BASE_ALIGNED + 577 size += TCPOLEN_SACK_BASE_ALIGNED +
@@ -663,10 +663,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
663 th->urg_ptr = 0; 663 th->urg_ptr = 0;
664 664
665 /* The urg_mode check is necessary during a below snd_una win probe */ 665 /* The urg_mode check is necessary during a below snd_una win probe */
666 if (unlikely(tcp_urg_mode(tp) && 666 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
667 between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { 667 if (before(tp->snd_up, tcb->seq + 0x10000)) {
668 th->urg_ptr = htons(tp->snd_up - tcb->seq); 668 th->urg_ptr = htons(tp->snd_up - tcb->seq);
669 th->urg = 1; 669 th->urg = 1;
670 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
671 th->urg_ptr = 0xFFFF;
672 th->urg = 1;
673 }
670 } 674 }
671 675
672 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); 676 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
@@ -763,11 +767,10 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
763 struct sk_buff *buff; 767 struct sk_buff *buff;
764 int nsize, old_factor; 768 int nsize, old_factor;
765 int nlen; 769 int nlen;
766 u16 flags; 770 u8 flags;
767 771
768 BUG_ON(len > skb->len); 772 BUG_ON(len > skb->len);
769 773
770 tcp_clear_retrans_hints_partial(tp);
771 nsize = skb_headlen(skb) - len; 774 nsize = skb_headlen(skb) - len;
772 if (nsize < 0) 775 if (nsize < 0)
773 nsize = 0; 776 nsize = 0;
@@ -850,6 +853,12 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
850 tcp_verify_left_out(tp); 853 tcp_verify_left_out(tp);
851 } 854 }
852 tcp_adjust_fackets_out(sk, skb, diff); 855 tcp_adjust_fackets_out(sk, skb, diff);
856
857 if (tp->lost_skb_hint &&
858 before(TCP_SKB_CB(skb)->seq,
859 TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
860 (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
861 tp->lost_cnt_hint -= diff;
853 } 862 }
854 863
855 /* Link BUFF into the send queue. */ 864 /* Link BUFF into the send queue. */
@@ -1273,7 +1282,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1273{ 1282{
1274 struct sk_buff *buff; 1283 struct sk_buff *buff;
1275 int nlen = skb->len - len; 1284 int nlen = skb->len - len;
1276 u16 flags; 1285 u8 flags;
1277 1286
1278 /* All of a TSO frame must be composed of paged data. */ 1287 /* All of a TSO frame must be composed of paged data. */
1279 if (skb->len != skb->data_len) 1288 if (skb->len != skb->data_len)
@@ -1352,6 +1361,10 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1352 if (limit >= sk->sk_gso_max_size) 1361 if (limit >= sk->sk_gso_max_size)
1353 goto send_now; 1362 goto send_now;
1354 1363
1364 /* Middle in queue won't get any more data, full sendable already? */
1365 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1366 goto send_now;
1367
1355 if (sysctl_tcp_tso_win_divisor) { 1368 if (sysctl_tcp_tso_win_divisor) {
1356 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); 1369 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1357 1370
@@ -1405,7 +1418,7 @@ static int tcp_mtu_probe(struct sock *sk)
1405 icsk->icsk_mtup.probe_size || 1418 icsk->icsk_mtup.probe_size ||
1406 inet_csk(sk)->icsk_ca_state != TCP_CA_Open || 1419 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1407 tp->snd_cwnd < 11 || 1420 tp->snd_cwnd < 11 ||
1408 tp->rx_opt.eff_sacks) 1421 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1409 return -1; 1422 return -1;
1410 1423
1411 /* Very simple search strategy: just double the MSS. */ 1424 /* Very simple search strategy: just double the MSS. */
@@ -1754,11 +1767,9 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1754 struct tcp_sock *tp = tcp_sk(sk); 1767 struct tcp_sock *tp = tcp_sk(sk);
1755 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); 1768 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
1756 int skb_size, next_skb_size; 1769 int skb_size, next_skb_size;
1757 u16 flags;
1758 1770
1759 skb_size = skb->len; 1771 skb_size = skb->len;
1760 next_skb_size = next_skb->len; 1772 next_skb_size = next_skb->len;
1761 flags = TCP_SKB_CB(skb)->flags;
1762 1773
1763 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); 1774 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
1764 1775
@@ -1778,9 +1789,8 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1778 /* Update sequence range on original skb. */ 1789 /* Update sequence range on original skb. */
1779 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; 1790 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
1780 1791
1781 /* Merge over control information. */ 1792 /* Merge over control information. This moves PSH/FIN etc. over */
1782 flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ 1793 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags;
1783 TCP_SKB_CB(skb)->flags = flags;
1784 1794
1785 /* All done, get rid of second SKB and account for it so 1795 /* All done, get rid of second SKB and account for it so
1786 * packet counting does not break. 1796 * packet counting does not break.
@@ -1908,6 +1918,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1908 if (skb->len > cur_mss) { 1918 if (skb->len > cur_mss) {
1909 if (tcp_fragment(sk, skb, cur_mss, cur_mss)) 1919 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
1910 return -ENOMEM; /* We'll try again later. */ 1920 return -ENOMEM; /* We'll try again later. */
1921 } else {
1922 tcp_init_tso_segs(sk, skb, cur_mss);
1911 } 1923 }
1912 1924
1913 tcp_retrans_try_collapse(sk, skb, cur_mss); 1925 tcp_retrans_try_collapse(sk, skb, cur_mss);
@@ -2061,7 +2073,7 @@ begin_fwd:
2061 goto begin_fwd; 2073 goto begin_fwd;
2062 2074
2063 } else if (!(sacked & TCPCB_LOST)) { 2075 } else if (!(sacked & TCPCB_LOST)) {
2064 if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS)) 2076 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2065 hole = skb; 2077 hole = skb;
2066 continue; 2078 continue;
2067 2079
@@ -2325,7 +2337,7 @@ static void tcp_connect_init(struct sock *sk)
2325 sk->sk_err = 0; 2337 sk->sk_err = 0;
2326 sock_reset_flag(sk, SOCK_DONE); 2338 sock_reset_flag(sk, SOCK_DONE);
2327 tp->snd_wnd = 0; 2339 tp->snd_wnd = 0;
2328 tcp_init_wl(tp, tp->write_seq, 0); 2340 tcp_init_wl(tp, 0);
2329 tp->snd_una = tp->write_seq; 2341 tp->snd_una = tp->write_seq;
2330 tp->snd_sml = tp->write_seq; 2342 tp->snd_sml = tp->write_seq;
2331 tp->snd_up = tp->write_seq; 2343 tp->snd_up = tp->write_seq;
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 4660b088a8ce..a76513779e2b 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -24,14 +24,8 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
24 24
25 if (tp->snd_cwnd <= tp->snd_ssthresh) 25 if (tp->snd_cwnd <= tp->snd_ssthresh)
26 tcp_slow_start(tp); 26 tcp_slow_start(tp);
27 else { 27 else
28 tp->snd_cwnd_cnt++; 28 tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT));
29 if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
30 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
31 tp->snd_cwnd++;
32 tp->snd_cwnd_cnt = 0;
33 }
34 }
35} 29}
36 30
37static u32 tcp_scalable_ssthresh(struct sock *sk) 31static u32 tcp_scalable_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 0170e914f1b0..b144a26359bc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -328,19 +328,16 @@ static void tcp_retransmit_timer(struct sock *sk)
328 if (icsk->icsk_retransmits == 0) { 328 if (icsk->icsk_retransmits == 0) {
329 int mib_idx; 329 int mib_idx;
330 330
331 if (icsk->icsk_ca_state == TCP_CA_Disorder || 331 if (icsk->icsk_ca_state == TCP_CA_Disorder) {
332 icsk->icsk_ca_state == TCP_CA_Recovery) { 332 if (tcp_is_sack(tp))
333 if (tcp_is_sack(tp)) { 333 mib_idx = LINUX_MIB_TCPSACKFAILURES;
334 if (icsk->icsk_ca_state == TCP_CA_Recovery) 334 else
335 mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; 335 mib_idx = LINUX_MIB_TCPRENOFAILURES;
336 else 336 } else if (icsk->icsk_ca_state == TCP_CA_Recovery) {
337 mib_idx = LINUX_MIB_TCPSACKFAILURES; 337 if (tcp_is_sack(tp))
338 } else { 338 mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
339 if (icsk->icsk_ca_state == TCP_CA_Recovery) 339 else
340 mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; 340 mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
341 else
342 mib_idx = LINUX_MIB_TCPRENOFAILURES;
343 }
344 } else if (icsk->icsk_ca_state == TCP_CA_Loss) { 341 } else if (icsk->icsk_ca_state == TCP_CA_Loss) {
345 mib_idx = LINUX_MIB_TCPLOSSFAILURES; 342 mib_idx = LINUX_MIB_TCPLOSSFAILURES;
346 } else { 343 } else {
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index d08b2e855c22..e9bbff746488 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -159,12 +159,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
159 /* In the "non-congestive state", increase cwnd 159 /* In the "non-congestive state", increase cwnd
160 * every rtt. 160 * every rtt.
161 */ 161 */
162 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { 162 tcp_cong_avoid_ai(tp, tp->snd_cwnd);
163 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
164 tp->snd_cwnd++;
165 tp->snd_cwnd_cnt = 0;
166 } else
167 tp->snd_cwnd_cnt++;
168 } else { 163 } else {
169 /* In the "congestive state", increase cwnd 164 /* In the "congestive state", increase cwnd
170 * every other rtt. 165 * every other rtt.
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 9ec843a9bbb2..66b6821b984e 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -94,14 +94,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
94 94
95 } else { 95 } else {
96 /* Reno */ 96 /* Reno */
97 97 tcp_cong_avoid_ai(tp, tp->snd_cwnd);
98 if (tp->snd_cwnd_cnt < tp->snd_cwnd)
99 tp->snd_cwnd_cnt++;
100
101 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
102 tp->snd_cwnd++;
103 tp->snd_cwnd_cnt = 0;
104 }
105 } 98 }
106 99
107 /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. 100 /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c47c989cb1fb..4bd178a111d5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -596,6 +596,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
596 return -EOPNOTSUPP; 596 return -EOPNOTSUPP;
597 597
598 ipc.opt = NULL; 598 ipc.opt = NULL;
599 ipc.shtx.flags = 0;
599 600
600 if (up->pending) { 601 if (up->pending) {
601 /* 602 /*
@@ -643,6 +644,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
643 ipc.addr = inet->saddr; 644 ipc.addr = inet->saddr;
644 645
645 ipc.oif = sk->sk_bound_dev_if; 646 ipc.oif = sk->sk_bound_dev_if;
647 err = sock_tx_timestamp(msg, sk, &ipc.shtx);
648 if (err)
649 return err;
646 if (msg->msg_controllen) { 650 if (msg->msg_controllen) {
647 err = ip_cmsg_send(sock_net(sk), msg, &ipc); 651 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
648 if (err) 652 if (err)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 2ad24ba31f9d..60d918c96a4f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -241,7 +241,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
241 241
242static struct dst_ops xfrm4_dst_ops = { 242static struct dst_ops xfrm4_dst_ops = {
243 .family = AF_INET, 243 .family = AF_INET,
244 .protocol = __constant_htons(ETH_P_IP), 244 .protocol = cpu_to_be16(ETH_P_IP),
245 .gc = xfrm4_garbage_collect, 245 .gc = xfrm4_garbage_collect,
246 .update_pmtu = xfrm4_update_pmtu, 246 .update_pmtu = xfrm4_update_pmtu,
247 .destroy = xfrm4_dst_destroy, 247 .destroy = xfrm4_dst_destroy,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1220e2c7831e..e83852ab4dc8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2227,10 +2227,24 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
2227 return err; 2227 return err;
2228} 2228}
2229 2229
2230static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
2231 int plen, int scope)
2232{
2233 struct inet6_ifaddr *ifp;
2234
2235 ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT);
2236 if (!IS_ERR(ifp)) {
2237 spin_lock_bh(&ifp->lock);
2238 ifp->flags &= ~IFA_F_TENTATIVE;
2239 spin_unlock_bh(&ifp->lock);
2240 ipv6_ifa_notify(RTM_NEWADDR, ifp);
2241 in6_ifa_put(ifp);
2242 }
2243}
2244
2230#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) 2245#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
2231static void sit_add_v4_addrs(struct inet6_dev *idev) 2246static void sit_add_v4_addrs(struct inet6_dev *idev)
2232{ 2247{
2233 struct inet6_ifaddr * ifp;
2234 struct in6_addr addr; 2248 struct in6_addr addr;
2235 struct net_device *dev; 2249 struct net_device *dev;
2236 struct net *net = dev_net(idev->dev); 2250 struct net *net = dev_net(idev->dev);
@@ -2249,14 +2263,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
2249 } 2263 }
2250 2264
2251 if (addr.s6_addr32[3]) { 2265 if (addr.s6_addr32[3]) {
2252 ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT); 2266 add_addr(idev, &addr, 128, scope);
2253 if (!IS_ERR(ifp)) {
2254 spin_lock_bh(&ifp->lock);
2255 ifp->flags &= ~IFA_F_TENTATIVE;
2256 spin_unlock_bh(&ifp->lock);
2257 ipv6_ifa_notify(RTM_NEWADDR, ifp);
2258 in6_ifa_put(ifp);
2259 }
2260 return; 2267 return;
2261 } 2268 }
2262 2269
@@ -2284,15 +2291,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
2284 else 2291 else
2285 plen = 96; 2292 plen = 96;
2286 2293
2287 ifp = ipv6_add_addr(idev, &addr, plen, flag, 2294 add_addr(idev, &addr, plen, flag);
2288 IFA_F_PERMANENT);
2289 if (!IS_ERR(ifp)) {
2290 spin_lock_bh(&ifp->lock);
2291 ifp->flags &= ~IFA_F_TENTATIVE;
2292 spin_unlock_bh(&ifp->lock);
2293 ipv6_ifa_notify(RTM_NEWADDR, ifp);
2294 in6_ifa_put(ifp);
2295 }
2296 } 2295 }
2297 } 2296 }
2298 } 2297 }
@@ -2302,7 +2301,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
2302static void init_loopback(struct net_device *dev) 2301static void init_loopback(struct net_device *dev)
2303{ 2302{
2304 struct inet6_dev *idev; 2303 struct inet6_dev *idev;
2305 struct inet6_ifaddr * ifp;
2306 2304
2307 /* ::1 */ 2305 /* ::1 */
2308 2306
@@ -2313,14 +2311,7 @@ static void init_loopback(struct net_device *dev)
2313 return; 2311 return;
2314 } 2312 }
2315 2313
2316 ifp = ipv6_add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFA_F_PERMANENT); 2314 add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
2317 if (!IS_ERR(ifp)) {
2318 spin_lock_bh(&ifp->lock);
2319 ifp->flags &= ~IFA_F_TENTATIVE;
2320 spin_unlock_bh(&ifp->lock);
2321 ipv6_ifa_notify(RTM_NEWADDR, ifp);
2322 in6_ifa_put(ifp);
2323 }
2324} 2315}
2325 2316
2326static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr) 2317static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
@@ -3647,7 +3638,8 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
3647 kfree_skb(skb); 3638 kfree_skb(skb);
3648 goto errout; 3639 goto errout;
3649 } 3640 }
3650 err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); 3641 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
3642 return;
3651errout: 3643errout:
3652 if (err < 0) 3644 if (err < 0)
3653 rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); 3645 rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
@@ -3858,7 +3850,8 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3858 kfree_skb(skb); 3850 kfree_skb(skb);
3859 goto errout; 3851 goto errout;
3860 } 3852 }
3861 err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); 3853 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
3854 return;
3862errout: 3855errout:
3863 if (err < 0) 3856 if (err < 0)
3864 rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); 3857 rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
@@ -3928,7 +3921,8 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
3928 kfree_skb(skb); 3921 kfree_skb(skb);
3929 goto errout; 3922 goto errout;
3930 } 3923 }
3931 err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); 3924 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
3925 return;
3932errout: 3926errout:
3933 if (err < 0) 3927 if (err < 0)
3934 rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); 3928 rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index da944eca2ca6..57b07da1212a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -803,24 +803,34 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
803 int proto; 803 int proto;
804 __wsum csum; 804 __wsum csum;
805 805
806 if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) 806 iph = skb_gro_header(skb, sizeof(*iph));
807 if (unlikely(!iph))
807 goto out; 808 goto out;
808 809
809 iph = ipv6_hdr(skb); 810 skb_gro_pull(skb, sizeof(*iph));
810 __skb_pull(skb, sizeof(*iph)); 811 skb_set_transport_header(skb, skb_gro_offset(skb));
811 812
812 flush += ntohs(iph->payload_len) != skb->len; 813 flush += ntohs(iph->payload_len) != skb_gro_len(skb);
813 814
814 rcu_read_lock(); 815 rcu_read_lock();
815 proto = ipv6_gso_pull_exthdrs(skb, iph->nexthdr); 816 proto = iph->nexthdr;
816 iph = ipv6_hdr(skb);
817 IPV6_GRO_CB(skb)->proto = proto;
818 ops = rcu_dereference(inet6_protos[proto]); 817 ops = rcu_dereference(inet6_protos[proto]);
819 if (!ops || !ops->gro_receive) 818 if (!ops || !ops->gro_receive) {
820 goto out_unlock; 819 __pskb_pull(skb, skb_gro_offset(skb));
820 proto = ipv6_gso_pull_exthdrs(skb, proto);
821 skb_gro_pull(skb, -skb_transport_offset(skb));
822 skb_reset_transport_header(skb);
823 __skb_push(skb, skb_gro_offset(skb));
824
825 if (!ops || !ops->gro_receive)
826 goto out_unlock;
827
828 iph = ipv6_hdr(skb);
829 }
830
831 IPV6_GRO_CB(skb)->proto = proto;
821 832
822 flush--; 833 flush--;
823 skb_reset_transport_header(skb);
824 nlen = skb_network_header_len(skb); 834 nlen = skb_network_header_len(skb);
825 835
826 for (p = *head; p; p = p->next) { 836 for (p = *head; p; p = p->next) {
@@ -884,7 +894,7 @@ out_unlock:
884} 894}
885 895
886static struct packet_type ipv6_packet_type = { 896static struct packet_type ipv6_packet_type = {
887 .type = __constant_htons(ETH_P_IPV6), 897 .type = cpu_to_be16(ETH_P_IPV6),
888 .func = ipv6_rcv, 898 .func = ipv6_rcv,
889 .gso_send_check = ipv6_gso_send_check, 899 .gso_send_check = ipv6_gso_send_check,
890 .gso_segment = ipv6_gso_segment, 900 .gso_segment = ipv6_gso_segment,
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 40f324655e24..d31df0f4bc9a 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -218,8 +218,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
218 if (opt) 218 if (opt)
219 sock_kfree_s(sk, opt, opt->tot_len); 219 sock_kfree_s(sk, opt, opt->tot_len);
220 pktopt = xchg(&np->pktoptions, NULL); 220 pktopt = xchg(&np->pktoptions, NULL);
221 if (pktopt) 221 kfree_skb(pktopt);
222 kfree_skb(pktopt);
223 222
224 sk->sk_destruct = inet_sock_destruct; 223 sk->sk_destruct = inet_sock_destruct;
225 /* 224 /*
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3e2970841bd8..9f061d1adbc2 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1095,11 +1095,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1095 &ipv6_hdr(ra)->saddr); 1095 &ipv6_hdr(ra)->saddr);
1096 nlmsg_end(skb, nlh); 1096 nlmsg_end(skb, nlh);
1097 1097
1098 err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, 1098 rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
1099 GFP_ATOMIC);
1100 if (err < 0)
1101 goto errout;
1102
1103 return; 1099 return;
1104 1100
1105nla_put_failure: 1101nla_put_failure:
@@ -1538,13 +1534,10 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1538 if (rt->rt6i_flags & RTF_GATEWAY) { 1534 if (rt->rt6i_flags & RTF_GATEWAY) {
1539 ND_PRINTK2(KERN_WARNING 1535 ND_PRINTK2(KERN_WARNING
1540 "ICMPv6 Redirect: destination is not a neighbour.\n"); 1536 "ICMPv6 Redirect: destination is not a neighbour.\n");
1541 dst_release(dst); 1537 goto release;
1542 return;
1543 }
1544 if (!xrlim_allow(dst, 1*HZ)) {
1545 dst_release(dst);
1546 return;
1547 } 1538 }
1539 if (!xrlim_allow(dst, 1*HZ))
1540 goto release;
1548 1541
1549 if (dev->addr_len) { 1542 if (dev->addr_len) {
1550 read_lock_bh(&neigh->lock); 1543 read_lock_bh(&neigh->lock);
@@ -1570,8 +1563,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1570 ND_PRINTK0(KERN_ERR 1563 ND_PRINTK0(KERN_ERR
1571 "ICMPv6 Redirect: %s() failed to allocate an skb.\n", 1564 "ICMPv6 Redirect: %s() failed to allocate an skb.\n",
1572 __func__); 1565 __func__);
1573 dst_release(dst); 1566 goto release;
1574 return;
1575 } 1567 }
1576 1568
1577 skb_reserve(buff, LL_RESERVED_SPACE(dev)); 1569 skb_reserve(buff, LL_RESERVED_SPACE(dev));
@@ -1631,6 +1623,10 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1631 1623
1632 if (likely(idev != NULL)) 1624 if (likely(idev != NULL))
1633 in6_dev_put(idev); 1625 in6_dev_put(idev);
1626 return;
1627
1628release:
1629 dst_release(dst);
1634} 1630}
1635 1631
1636static void pndisc_redo(struct sk_buff *skb) 1632static void pndisc_redo(struct sk_buff *skb)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9c574235c905..1394ddb6e35c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -98,7 +98,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
98 98
99static struct dst_ops ip6_dst_ops_template = { 99static struct dst_ops ip6_dst_ops_template = {
100 .family = AF_INET6, 100 .family = AF_INET6,
101 .protocol = __constant_htons(ETH_P_IPV6), 101 .protocol = cpu_to_be16(ETH_P_IPV6),
102 .gc = ip6_dst_gc, 102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024, 103 .gc_thresh = 1024,
104 .check = ip6_dst_check, 104 .check = ip6_dst_check,
@@ -117,7 +117,7 @@ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117 117
118static struct dst_ops ip6_dst_blackhole_ops = { 118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6, 119 .family = AF_INET6,
120 .protocol = __constant_htons(ETH_P_IPV6), 120 .protocol = cpu_to_be16(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy, 121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check, 122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu, 123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
@@ -2400,8 +2400,9 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2400 kfree_skb(skb); 2400 kfree_skb(skb);
2401 goto errout; 2401 goto errout;
2402 } 2402 }
2403 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2403 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2404 info->nlh, gfp_any()); 2404 info->nlh, gfp_any());
2405 return;
2405errout: 2406errout:
2406 if (err < 0) 2407 if (err < 0)
2407 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2408 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d3467e563f02..26915effb3bc 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -454,7 +454,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
454 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 454 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
455 goto out; 455 goto out;
456 456
457 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 457 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
458 t->err_count++; 458 t->err_count++;
459 else 459 else
460 t->err_count = 1; 460 t->err_count = 1;
@@ -658,7 +658,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
658 } 658 }
659 659
660 if (tunnel->err_count > 0) { 660 if (tunnel->err_count > 0) {
661 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 661 if (time_before(jiffies,
662 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
662 tunnel->err_count--; 663 tunnel->err_count--;
663 dst_link_failure(skb); 664 dst_link_failure(skb);
664 } else 665 } else
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e5b85d45bee8..4b5aa1854260 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -533,8 +533,7 @@ static inline void syn_flood_warning(struct sk_buff *skb)
533 533
534static void tcp_v6_reqsk_destructor(struct request_sock *req) 534static void tcp_v6_reqsk_destructor(struct request_sock *req)
535{ 535{
536 if (inet6_rsk(req)->pktopts) 536 kfree_skb(inet6_rsk(req)->pktopts);
537 kfree_skb(inet6_rsk(req)->pktopts);
538} 537}
539 538
540#ifdef CONFIG_TCP_MD5SIG 539#ifdef CONFIG_TCP_MD5SIG
@@ -948,7 +947,7 @@ struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb)
948 947
949 switch (skb->ip_summed) { 948 switch (skb->ip_summed) {
950 case CHECKSUM_COMPLETE: 949 case CHECKSUM_COMPLETE:
951 if (!tcp_v6_check(skb->len, &iph->saddr, &iph->daddr, 950 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
952 skb->csum)) { 951 skb->csum)) {
953 skb->ip_summed = CHECKSUM_UNNECESSARY; 952 skb->ip_summed = CHECKSUM_UNNECESSARY;
954 break; 953 break;
@@ -1611,8 +1610,7 @@ ipv6_pktoptions:
1611 } 1610 }
1612 } 1611 }
1613 1612
1614 if (opt_skb) 1613 kfree_skb(opt_skb);
1615 kfree_skb(opt_skb);
1616 return 0; 1614 return 0;
1617} 1615}
1618 1616
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 97ab068e8ccc..b4b16a43f277 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -272,7 +272,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
272 272
273static struct dst_ops xfrm6_dst_ops = { 273static struct dst_ops xfrm6_dst_ops = {
274 .family = AF_INET6, 274 .family = AF_INET6,
275 .protocol = __constant_htons(ETH_P_IPV6), 275 .protocol = cpu_to_be16(ETH_P_IPV6),
276 .gc = xfrm6_garbage_collect, 276 .gc = xfrm6_garbage_collect,
277 .update_pmtu = xfrm6_update_pmtu, 277 .update_pmtu = xfrm6_update_pmtu,
278 .destroy = xfrm6_dst_destroy, 278 .destroy = xfrm6_dst_destroy,
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 0e685b05496e..f417b77fa0e1 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -69,7 +69,7 @@ __xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass)
69 69
70 for (i = 0; i < n; i++) { 70 for (i = 0; i < n; i++) {
71 dst[count[class[i] - 1]++] = src[i]; 71 dst[count[class[i] - 1]++] = src[i];
72 src[i] = 0; 72 src[i] = NULL;
73 } 73 }
74 74
75 return 0; 75 return 0;
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index b6e70f92e7fb..43d0ffc6d565 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1959,12 +1959,12 @@ static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = {
1959SOCKOPS_WRAP(ipx_dgram, PF_IPX); 1959SOCKOPS_WRAP(ipx_dgram, PF_IPX);
1960 1960
1961static struct packet_type ipx_8023_packet_type = { 1961static struct packet_type ipx_8023_packet_type = {
1962 .type = __constant_htons(ETH_P_802_3), 1962 .type = cpu_to_be16(ETH_P_802_3),
1963 .func = ipx_rcv, 1963 .func = ipx_rcv,
1964}; 1964};
1965 1965
1966static struct packet_type ipx_dix_packet_type = { 1966static struct packet_type ipx_dix_packet_type = {
1967 .type = __constant_htons(ETH_P_IPX), 1967 .type = cpu_to_be16(ETH_P_IPX),
1968 .func = ipx_rcv, 1968 .func = ipx_rcv,
1969}; 1969};
1970 1970
diff --git a/net/irda/irmod.c b/net/irda/irmod.c
index 4c487a883725..1bb607f2f5c7 100644
--- a/net/irda/irmod.c
+++ b/net/irda/irmod.c
@@ -56,7 +56,7 @@ EXPORT_SYMBOL(irda_debug);
56 * Tell the kernel how IrDA packets should be handled. 56 * Tell the kernel how IrDA packets should be handled.
57 */ 57 */
58static struct packet_type irda_packet_type = { 58static struct packet_type irda_packet_type = {
59 .type = __constant_htons(ETH_P_IRDA), 59 .type = cpu_to_be16(ETH_P_IRDA),
60 .func = irlap_driver_rcv, /* Packet type handler irlap_frame.c */ 60 .func = irlap_driver_rcv, /* Packet type handler irlap_frame.c */
61}; 61};
62 62
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index eb8a2a0b6eb7..49e786535dc8 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1171,8 +1171,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
1171 1171
1172 spin_unlock_irqrestore(&list->lock, flags); 1172 spin_unlock_irqrestore(&list->lock, flags);
1173 1173
1174 if (this) 1174 kfree_skb(this);
1175 kfree_skb(this);
1176 } 1175 }
1177 BUG_ON(!this); 1176 BUG_ON(!this);
1178 1177
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 7dcbde3ea7d9..643c1be2d02e 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -313,8 +313,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
313 if (one_sk != NULL) 313 if (one_sk != NULL)
314 err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); 314 err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
315 315
316 if (skb2) 316 kfree_skb(skb2);
317 kfree_skb(skb2);
318 kfree_skb(skb); 317 kfree_skb(skb);
319 return err; 318 return err;
320} 319}
@@ -3573,8 +3572,7 @@ static int pfkey_sendmsg(struct kiocb *kiocb,
3573out: 3572out:
3574 if (err && hdr && pfkey_error(hdr, err, sk) == 0) 3573 if (err && hdr && pfkey_error(hdr, err, sk) == 0)
3575 err = 0; 3574 err = 0;
3576 if (skb) 3575 kfree_skb(skb);
3577 kfree_skb(skb);
3578 3576
3579 return err ? : len; 3577 return err ? : len;
3580} 3578}
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 56fd85ab358e..febae702685c 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1118,11 +1118,11 @@ static const struct proto_ops llc_ui_ops = {
1118 .sendpage = sock_no_sendpage, 1118 .sendpage = sock_no_sendpage,
1119}; 1119};
1120 1120
1121static char llc_proc_err_msg[] __initdata = 1121static const char llc_proc_err_msg[] __initconst =
1122 KERN_CRIT "LLC: Unable to register the proc_fs entries\n"; 1122 KERN_CRIT "LLC: Unable to register the proc_fs entries\n";
1123static char llc_sysctl_err_msg[] __initdata = 1123static const char llc_sysctl_err_msg[] __initconst =
1124 KERN_CRIT "LLC: Unable to register the sysctl entries\n"; 1124 KERN_CRIT "LLC: Unable to register the sysctl entries\n";
1125static char llc_sock_err_msg[] __initdata = 1125static const char llc_sock_err_msg[] __initconst =
1126 KERN_CRIT "LLC: Unable to register the network family\n"; 1126 KERN_CRIT "LLC: Unable to register the network family\n";
1127 1127
1128static int __init llc2_init(void) 1128static int __init llc2_init(void)
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 5c6d89c6d51d..3477624a4906 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -332,8 +332,7 @@ int llc_conn_remove_acked_pdus(struct sock *sk, u8 nr, u16 *how_many_unacked)
332 332
333 for (i = 0; i < pdu_pos && i < q_len; i++) { 333 for (i = 0; i < pdu_pos && i < q_len; i++) {
334 skb = skb_dequeue(&llc->pdu_unack_q); 334 skb = skb_dequeue(&llc->pdu_unack_q);
335 if (skb) 335 kfree_skb(skb);
336 kfree_skb(skb);
337 nbr_acked++; 336 nbr_acked++;
338 } 337 }
339out: 338out:
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 50d5b10e23a2..a7fe1adc378d 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -148,12 +148,12 @@ void llc_sap_close(struct llc_sap *sap)
148} 148}
149 149
150static struct packet_type llc_packet_type = { 150static struct packet_type llc_packet_type = {
151 .type = __constant_htons(ETH_P_802_2), 151 .type = cpu_to_be16(ETH_P_802_2),
152 .func = llc_rcv, 152 .func = llc_rcv,
153}; 153};
154 154
155static struct packet_type llc_tr_packet_type = { 155static struct packet_type llc_tr_packet_type = {
156 .type = __constant_htons(ETH_P_TR_802_2), 156 .type = cpu_to_be16(ETH_P_TR_802_2),
157 .func = llc_rcv, 157 .func = llc_rcv,
158}; 158};
159 159
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 7d4971aa443f..0e3ab88bb706 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -8,13 +8,15 @@ mac80211-y := \
8 wep.o \ 8 wep.o \
9 wpa.o \ 9 wpa.o \
10 scan.o \ 10 scan.o \
11 ht.o \ 11 ht.o agg-tx.o agg-rx.o \
12 ibss.o \
12 mlme.o \ 13 mlme.o \
13 iface.o \ 14 iface.o \
14 rate.o \ 15 rate.o \
15 michael.o \ 16 michael.o \
16 tkip.o \ 17 tkip.o \
17 aes_ccm.o \ 18 aes_ccm.o \
19 aes_cmac.o \
18 cfg.o \ 20 cfg.o \
19 rx.o \ 21 rx.o \
20 spectmgmt.o \ 22 spectmgmt.o \
@@ -37,6 +39,8 @@ mac80211-$(CONFIG_MAC80211_MESH) += \
37 mesh_plink.o \ 39 mesh_plink.o \
38 mesh_hwmp.o 40 mesh_hwmp.o
39 41
42mac80211-$(CONFIG_PM) += pm.o
43
40# objects for PID algorithm 44# objects for PID algorithm
41rc80211_pid-y := rc80211_pid_algo.o 45rc80211_pid-y := rc80211_pid_algo.o
42rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o 46rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
new file mode 100644
index 000000000000..3d097b3d7b62
--- /dev/null
+++ b/net/mac80211/aes_cmac.c
@@ -0,0 +1,135 @@
1/*
2 * AES-128-CMAC with TLen 16 for IEEE 802.11w BIP
3 * Copyright 2008, Jouni Malinen <j@w1.fi>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/types.h>
12#include <linux/crypto.h>
13#include <linux/err.h>
14
15#include <net/mac80211.h>
16#include "key.h"
17#include "aes_cmac.h"
18
19#define AES_BLOCK_SIZE 16
20#define AES_CMAC_KEY_LEN 16
21#define CMAC_TLEN 8 /* CMAC TLen = 64 bits (8 octets) */
22#define AAD_LEN 20
23
24
25static void gf_mulx(u8 *pad)
26{
27 int i, carry;
28
29 carry = pad[0] & 0x80;
30 for (i = 0; i < AES_BLOCK_SIZE - 1; i++)
31 pad[i] = (pad[i] << 1) | (pad[i + 1] >> 7);
32 pad[AES_BLOCK_SIZE - 1] <<= 1;
33 if (carry)
34 pad[AES_BLOCK_SIZE - 1] ^= 0x87;
35}
36
37
38static void aes_128_cmac_vector(struct crypto_cipher *tfm, u8 *scratch,
39 size_t num_elem,
40 const u8 *addr[], const size_t *len, u8 *mac)
41{
42 u8 *cbc, *pad;
43 const u8 *pos, *end;
44 size_t i, e, left, total_len;
45
46 cbc = scratch;
47 pad = scratch + AES_BLOCK_SIZE;
48
49 memset(cbc, 0, AES_BLOCK_SIZE);
50
51 total_len = 0;
52 for (e = 0; e < num_elem; e++)
53 total_len += len[e];
54 left = total_len;
55
56 e = 0;
57 pos = addr[0];
58 end = pos + len[0];
59
60 while (left >= AES_BLOCK_SIZE) {
61 for (i = 0; i < AES_BLOCK_SIZE; i++) {
62 cbc[i] ^= *pos++;
63 if (pos >= end) {
64 e++;
65 pos = addr[e];
66 end = pos + len[e];
67 }
68 }
69 if (left > AES_BLOCK_SIZE)
70 crypto_cipher_encrypt_one(tfm, cbc, cbc);
71 left -= AES_BLOCK_SIZE;
72 }
73
74 memset(pad, 0, AES_BLOCK_SIZE);
75 crypto_cipher_encrypt_one(tfm, pad, pad);
76 gf_mulx(pad);
77
78 if (left || total_len == 0) {
79 for (i = 0; i < left; i++) {
80 cbc[i] ^= *pos++;
81 if (pos >= end) {
82 e++;
83 pos = addr[e];
84 end = pos + len[e];
85 }
86 }
87 cbc[left] ^= 0x80;
88 gf_mulx(pad);
89 }
90
91 for (i = 0; i < AES_BLOCK_SIZE; i++)
92 pad[i] ^= cbc[i];
93 crypto_cipher_encrypt_one(tfm, pad, pad);
94 memcpy(mac, pad, CMAC_TLEN);
95}
96
97
98void ieee80211_aes_cmac(struct crypto_cipher *tfm, u8 *scratch, const u8 *aad,
99 const u8 *data, size_t data_len, u8 *mic)
100{
101 const u8 *addr[3];
102 size_t len[3];
103 u8 zero[CMAC_TLEN];
104
105 memset(zero, 0, CMAC_TLEN);
106 addr[0] = aad;
107 len[0] = AAD_LEN;
108 addr[1] = data;
109 len[1] = data_len - CMAC_TLEN;
110 addr[2] = zero;
111 len[2] = CMAC_TLEN;
112
113 aes_128_cmac_vector(tfm, scratch, 3, addr, len, mic);
114}
115
116
117struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[])
118{
119 struct crypto_cipher *tfm;
120
121 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
122 if (IS_ERR(tfm))
123 return NULL;
124
125 crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
126
127 return tfm;
128}
129
130
131void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm)
132{
133 if (tfm)
134 crypto_free_cipher(tfm);
135}
diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h
new file mode 100644
index 000000000000..0eb9a4831508
--- /dev/null
+++ b/net/mac80211/aes_cmac.h
@@ -0,0 +1,19 @@
1/*
2 * Copyright 2008, Jouni Malinen <j@w1.fi>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#ifndef AES_CMAC_H
10#define AES_CMAC_H
11
12#include <linux/crypto.h>
13
14struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[]);
15void ieee80211_aes_cmac(struct crypto_cipher *tfm, u8 *scratch, const u8 *aad,
16 const u8 *data, size_t data_len, u8 *mic);
17void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm);
18
19#endif /* AES_CMAC_H */
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
new file mode 100644
index 000000000000..a95affc94629
--- /dev/null
+++ b/net/mac80211/agg-rx.c
@@ -0,0 +1,302 @@
1/*
2 * HT handling
3 *
4 * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi>
5 * Copyright 2002-2005, Instant802 Networks, Inc.
6 * Copyright 2005-2006, Devicescape Software, Inc.
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2007-2008, Intel Corporation
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15
16#include <linux/ieee80211.h>
17#include <net/mac80211.h>
18#include "ieee80211_i.h"
19
20void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
21 u16 initiator, u16 reason)
22{
23 struct ieee80211_local *local = sta->local;
24 struct ieee80211_hw *hw = &local->hw;
25 int i;
26
27 /* check if TID is in operational state */
28 spin_lock_bh(&sta->lock);
29 if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) {
30 spin_unlock_bh(&sta->lock);
31 return;
32 }
33
34 sta->ampdu_mlme.tid_state_rx[tid] =
35 HT_AGG_STATE_REQ_STOP_BA_MSK |
36 (initiator << HT_AGG_STATE_INITIATOR_SHIFT);
37 spin_unlock_bh(&sta->lock);
38
39#ifdef CONFIG_MAC80211_HT_DEBUG
40 printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n",
41 sta->sta.addr, tid);
42#endif /* CONFIG_MAC80211_HT_DEBUG */
43
44 if (local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP,
45 &sta->sta, tid, NULL))
46 printk(KERN_DEBUG "HW problem - can not stop rx "
47 "aggregation for tid %d\n", tid);
48
49 /* shutdown timer has not expired */
50 if (initiator != WLAN_BACK_TIMER)
51 del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
52
53 /* check if this is a self generated aggregation halt */
54 if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER)
55 ieee80211_send_delba(sta->sdata, sta->sta.addr,
56 tid, 0, reason);
57
58 /* free the reordering buffer */
59 for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) {
60 if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) {
61 /* release the reordered frames */
62 dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]);
63 sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--;
64 sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL;
65 }
66 }
67
68 spin_lock_bh(&sta->lock);
69 /* free resources */
70 kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf);
71
72 if (!sta->ampdu_mlme.tid_rx[tid]->shutdown) {
73 kfree(sta->ampdu_mlme.tid_rx[tid]);
74 sta->ampdu_mlme.tid_rx[tid] = NULL;
75 }
76
77 sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE;
78 spin_unlock_bh(&sta->lock);
79}
80
81void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid,
82 u16 initiator, u16 reason)
83{
84 struct ieee80211_local *local = sdata->local;
85 struct sta_info *sta;
86
87 /* stop HW Rx aggregation. ampdu_action existence
88 * already verified in session init so we add the BUG_ON */
89 BUG_ON(!local->ops->ampdu_action);
90
91 rcu_read_lock();
92
93 sta = sta_info_get(local, ra);
94 if (!sta) {
95 rcu_read_unlock();
96 return;
97 }
98
99 __ieee80211_stop_rx_ba_session(sta, tid, initiator, reason);
100
101 rcu_read_unlock();
102}
103
104/*
105 * After accepting the AddBA Request we activated a timer,
106 * resetting it after each frame that arrives from the originator.
107 * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
108 */
109static void sta_rx_agg_session_timer_expired(unsigned long data)
110{
111 /* not an elegant detour, but there is no choice as the timer passes
112 * only one argument, and various sta_info are needed here, so init
113 * flow in sta_info_create gives the TID as data, while the timer_to_id
114 * array gives the sta through container_of */
115 u8 *ptid = (u8 *)data;
116 u8 *timer_to_id = ptid - *ptid;
117 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
118 timer_to_tid[0]);
119
120#ifdef CONFIG_MAC80211_HT_DEBUG
121 printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
122#endif
123 ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr,
124 (u16)*ptid, WLAN_BACK_TIMER,
125 WLAN_REASON_QSTA_TIMEOUT);
126}
127
128static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
129 u8 dialog_token, u16 status, u16 policy,
130 u16 buf_size, u16 timeout)
131{
132 struct ieee80211_local *local = sdata->local;
133 struct sk_buff *skb;
134 struct ieee80211_mgmt *mgmt;
135 u16 capab;
136
137 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
138
139 if (!skb) {
140 printk(KERN_DEBUG "%s: failed to allocate buffer "
141 "for addba resp frame\n", sdata->dev->name);
142 return;
143 }
144
145 skb_reserve(skb, local->hw.extra_tx_headroom);
146 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
147 memset(mgmt, 0, 24);
148 memcpy(mgmt->da, da, ETH_ALEN);
149 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
150 if (sdata->vif.type == NL80211_IFTYPE_AP ||
151 sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
152 memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
153 else if (sdata->vif.type == NL80211_IFTYPE_STATION)
154 memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
155
156 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
157 IEEE80211_STYPE_ACTION);
158
159 skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
160 mgmt->u.action.category = WLAN_CATEGORY_BACK;
161 mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
162 mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
163
164 capab = (u16)(policy << 1); /* bit 1 aggregation policy */
165 capab |= (u16)(tid << 2); /* bit 5:2 TID number */
166 capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */
167
168 mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab);
169 mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
170 mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
171
172 ieee80211_tx_skb(sdata, skb, 1);
173}
174
175void ieee80211_process_addba_request(struct ieee80211_local *local,
176 struct sta_info *sta,
177 struct ieee80211_mgmt *mgmt,
178 size_t len)
179{
180 struct ieee80211_hw *hw = &local->hw;
181 struct ieee80211_conf *conf = &hw->conf;
182 struct tid_ampdu_rx *tid_agg_rx;
183 u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status;
184 u8 dialog_token;
185 int ret = -EOPNOTSUPP;
186
187 /* extract session parameters from addba request frame */
188 dialog_token = mgmt->u.action.u.addba_req.dialog_token;
189 timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
190 start_seq_num =
191 le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4;
192
193 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
194 ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1;
195 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
196 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
197
198 status = WLAN_STATUS_REQUEST_DECLINED;
199
200 /* sanity check for incoming parameters:
201 * check if configuration can support the BA policy
202 * and if buffer size does not exceeds max value */
203 /* XXX: check own ht delayed BA capability?? */
204 if (((ba_policy != 1)
205 && (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA)))
206 || (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
207 status = WLAN_STATUS_INVALID_QOS_PARAM;
208#ifdef CONFIG_MAC80211_HT_DEBUG
209 if (net_ratelimit())
210 printk(KERN_DEBUG "AddBA Req with bad params from "
211 "%pM on tid %u. policy %d, buffer size %d\n",
212 mgmt->sa, tid, ba_policy,
213 buf_size);
214#endif /* CONFIG_MAC80211_HT_DEBUG */
215 goto end_no_lock;
216 }
217 /* determine default buffer size */
218 if (buf_size == 0) {
219 struct ieee80211_supported_band *sband;
220
221 sband = local->hw.wiphy->bands[conf->channel->band];
222 buf_size = IEEE80211_MIN_AMPDU_BUF;
223 buf_size = buf_size << sband->ht_cap.ampdu_factor;
224 }
225
226
227 /* examine state machine */
228 spin_lock_bh(&sta->lock);
229
230 if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) {
231#ifdef CONFIG_MAC80211_HT_DEBUG
232 if (net_ratelimit())
233 printk(KERN_DEBUG "unexpected AddBA Req from "
234 "%pM on tid %u\n",
235 mgmt->sa, tid);
236#endif /* CONFIG_MAC80211_HT_DEBUG */
237 goto end;
238 }
239
240 /* prepare A-MPDU MLME for Rx aggregation */
241 sta->ampdu_mlme.tid_rx[tid] =
242 kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC);
243 if (!sta->ampdu_mlme.tid_rx[tid]) {
244#ifdef CONFIG_MAC80211_HT_DEBUG
245 if (net_ratelimit())
246 printk(KERN_ERR "allocate rx mlme to tid %d failed\n",
247 tid);
248#endif
249 goto end;
250 }
251 /* rx timer */
252 sta->ampdu_mlme.tid_rx[tid]->session_timer.function =
253 sta_rx_agg_session_timer_expired;
254 sta->ampdu_mlme.tid_rx[tid]->session_timer.data =
255 (unsigned long)&sta->timer_to_tid[tid];
256 init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
257
258 tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
259
260 /* prepare reordering buffer */
261 tid_agg_rx->reorder_buf =
262 kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC);
263 if (!tid_agg_rx->reorder_buf) {
264#ifdef CONFIG_MAC80211_HT_DEBUG
265 if (net_ratelimit())
266 printk(KERN_ERR "can not allocate reordering buffer "
267 "to tid %d\n", tid);
268#endif
269 kfree(sta->ampdu_mlme.tid_rx[tid]);
270 goto end;
271 }
272
273 if (local->ops->ampdu_action)
274 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START,
275 &sta->sta, tid, &start_seq_num);
276#ifdef CONFIG_MAC80211_HT_DEBUG
277 printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret);
278#endif /* CONFIG_MAC80211_HT_DEBUG */
279
280 if (ret) {
281 kfree(tid_agg_rx->reorder_buf);
282 kfree(tid_agg_rx);
283 sta->ampdu_mlme.tid_rx[tid] = NULL;
284 goto end;
285 }
286
287 /* change state and send addba resp */
288 sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL;
289 tid_agg_rx->dialog_token = dialog_token;
290 tid_agg_rx->ssn = start_seq_num;
291 tid_agg_rx->head_seq_num = start_seq_num;
292 tid_agg_rx->buf_size = buf_size;
293 tid_agg_rx->timeout = timeout;
294 tid_agg_rx->stored_mpdu_num = 0;
295 status = WLAN_STATUS_SUCCESS;
296end:
297 spin_unlock_bh(&sta->lock);
298
299end_no_lock:
300 ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
301 dialog_token, status, 1, buf_size, timeout);
302}
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
new file mode 100644
index 000000000000..1df116d4d6e7
--- /dev/null
+++ b/net/mac80211/agg-tx.c
@@ -0,0 +1,701 @@
1/*
2 * HT handling
3 *
4 * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi>
5 * Copyright 2002-2005, Instant802 Networks, Inc.
6 * Copyright 2005-2006, Devicescape Software, Inc.
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2007-2009, Intel Corporation
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15
16#include <linux/ieee80211.h>
17#include <net/mac80211.h>
18#include "ieee80211_i.h"
19#include "wme.h"
20
21/**
22 * DOC: TX aggregation
23 *
24 * Aggregation on the TX side requires setting the hardware flag
25 * %IEEE80211_HW_AMPDU_AGGREGATION as well as, if present, the @ampdu_queues
26 * hardware parameter to the number of hardware AMPDU queues. If there are no
27 * hardware queues then the driver will (currently) have to do all frame
28 * buffering.
29 *
30 * When TX aggregation is started by some subsystem (usually the rate control
31 * algorithm would be appropriate) by calling the
32 * ieee80211_start_tx_ba_session() function, the driver will be notified via
33 * its @ampdu_action function, with the %IEEE80211_AMPDU_TX_START action.
34 *
35 * In response to that, the driver is later required to call the
36 * ieee80211_start_tx_ba_cb() (or ieee80211_start_tx_ba_cb_irqsafe())
37 * function, which will start the aggregation session.
38 *
39 * Similarly, when the aggregation session is stopped by
40 * ieee80211_stop_tx_ba_session(), the driver's @ampdu_action function will
41 * be called with the action %IEEE80211_AMPDU_TX_STOP. In this case, the
42 * call must not fail, and the driver must later call ieee80211_stop_tx_ba_cb()
43 * (or ieee80211_stop_tx_ba_cb_irqsafe()).
44 */
45
46static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
47 const u8 *da, u16 tid,
48 u8 dialog_token, u16 start_seq_num,
49 u16 agg_size, u16 timeout)
50{
51 struct ieee80211_local *local = sdata->local;
52 struct sk_buff *skb;
53 struct ieee80211_mgmt *mgmt;
54 u16 capab;
55
56 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
57
58 if (!skb) {
59 printk(KERN_ERR "%s: failed to allocate buffer "
60 "for addba request frame\n", sdata->dev->name);
61 return;
62 }
63 skb_reserve(skb, local->hw.extra_tx_headroom);
64 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
65 memset(mgmt, 0, 24);
66 memcpy(mgmt->da, da, ETH_ALEN);
67 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
68 if (sdata->vif.type == NL80211_IFTYPE_AP ||
69 sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
70 memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
71 else if (sdata->vif.type == NL80211_IFTYPE_STATION)
72 memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
73
74 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
75 IEEE80211_STYPE_ACTION);
76
77 skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req));
78
79 mgmt->u.action.category = WLAN_CATEGORY_BACK;
80 mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ;
81
82 mgmt->u.action.u.addba_req.dialog_token = dialog_token;
83 capab = (u16)(1 << 1); /* bit 1 aggregation policy */
84 capab |= (u16)(tid << 2); /* bit 5:2 TID number */
85 capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */
86
87 mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab);
88
89 mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout);
90 mgmt->u.action.u.addba_req.start_seq_num =
91 cpu_to_le16(start_seq_num << 4);
92
93 ieee80211_tx_skb(sdata, skb, 1);
94}
95
96void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn)
97{
98 struct ieee80211_local *local = sdata->local;
99 struct sk_buff *skb;
100 struct ieee80211_bar *bar;
101 u16 bar_control = 0;
102
103 skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom);
104 if (!skb) {
105 printk(KERN_ERR "%s: failed to allocate buffer for "
106 "bar frame\n", sdata->dev->name);
107 return;
108 }
109 skb_reserve(skb, local->hw.extra_tx_headroom);
110 bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar));
111 memset(bar, 0, sizeof(*bar));
112 bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
113 IEEE80211_STYPE_BACK_REQ);
114 memcpy(bar->ra, ra, ETH_ALEN);
115 memcpy(bar->ta, sdata->dev->dev_addr, ETH_ALEN);
116 bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL;
117 bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA;
118 bar_control |= (u16)(tid << 12);
119 bar->control = cpu_to_le16(bar_control);
120 bar->start_seq_num = cpu_to_le16(ssn);
121
122 ieee80211_tx_skb(sdata, skb, 0);
123}
124
125static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
126 enum ieee80211_back_parties initiator)
127{
128 struct ieee80211_local *local = sta->local;
129 int ret;
130 u8 *state;
131
132 state = &sta->ampdu_mlme.tid_state_tx[tid];
133
134 if (local->hw.ampdu_queues) {
135 if (initiator) {
136 /*
137 * Stop the AC queue to avoid issues where we send
138 * unaggregated frames already before the delba.
139 */
140 ieee80211_stop_queue_by_reason(&local->hw,
141 local->hw.queues + sta->tid_to_tx_q[tid],
142 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
143 }
144
145 /*
146 * Pretend the driver woke the queue, just in case
147 * it disabled it before the session was stopped.
148 */
149 ieee80211_wake_queue(
150 &local->hw, local->hw.queues + sta->tid_to_tx_q[tid]);
151 }
152 *state = HT_AGG_STATE_REQ_STOP_BA_MSK |
153 (initiator << HT_AGG_STATE_INITIATOR_SHIFT);
154
155 ret = local->ops->ampdu_action(&local->hw, IEEE80211_AMPDU_TX_STOP,
156 &sta->sta, tid, NULL);
157
158 /* HW shall not deny going back to legacy */
159 if (WARN_ON(ret)) {
160 *state = HT_AGG_STATE_OPERATIONAL;
161 }
162
163 return ret;
164}
165
166/*
167 * After sending add Block Ack request we activated a timer until
168 * add Block Ack response will arrive from the recipient.
169 * If this timer expires sta_addba_resp_timer_expired will be executed.
170 */
171static void sta_addba_resp_timer_expired(unsigned long data)
172{
173 /* not an elegant detour, but there is no choice as the timer passes
174 * only one argument, and both sta_info and TID are needed, so init
175 * flow in sta_info_create gives the TID as data, while the timer_to_id
176 * array gives the sta through container_of */
177 u16 tid = *(u8 *)data;
178 struct sta_info *sta = container_of((void *)data,
179 struct sta_info, timer_to_tid[tid]);
180 u8 *state;
181
182 state = &sta->ampdu_mlme.tid_state_tx[tid];
183
184 /* check if the TID waits for addBA response */
185 spin_lock_bh(&sta->lock);
186 if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
187 spin_unlock_bh(&sta->lock);
188 *state = HT_AGG_STATE_IDLE;
189#ifdef CONFIG_MAC80211_HT_DEBUG
190 printk(KERN_DEBUG "timer expired on tid %d but we are not "
191 "expecting addBA response there", tid);
192#endif
193 return;
194 }
195
196#ifdef CONFIG_MAC80211_HT_DEBUG
197 printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid);
198#endif
199
200 ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR);
201 spin_unlock_bh(&sta->lock);
202}
203
204static inline int ieee80211_ac_from_tid(int tid)
205{
206 return ieee802_1d_to_ac[tid & 7];
207}
208
209int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
210{
211 struct ieee80211_local *local = hw_to_local(hw);
212 struct sta_info *sta;
213 struct ieee80211_sub_if_data *sdata;
214 u8 *state;
215 int i, qn = -1, ret = 0;
216 u16 start_seq_num;
217
218 if (WARN_ON(!local->ops->ampdu_action))
219 return -EINVAL;
220
221 if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION))
222 return -EINVAL;
223
224#ifdef CONFIG_MAC80211_HT_DEBUG
225 printk(KERN_DEBUG "Open BA session requested for %pM tid %u\n",
226 ra, tid);
227#endif /* CONFIG_MAC80211_HT_DEBUG */
228
229 if (hw->ampdu_queues && ieee80211_ac_from_tid(tid) == 0) {
230#ifdef CONFIG_MAC80211_HT_DEBUG
231 printk(KERN_DEBUG "rejecting on voice AC\n");
232#endif
233 return -EINVAL;
234 }
235
236 rcu_read_lock();
237
238 sta = sta_info_get(local, ra);
239 if (!sta) {
240#ifdef CONFIG_MAC80211_HT_DEBUG
241 printk(KERN_DEBUG "Could not find the station\n");
242#endif
243 ret = -ENOENT;
244 goto unlock;
245 }
246
247 /*
248 * The aggregation code is not prepared to handle
249 * anything but STA/AP due to the BSSID handling.
250 * IBSS could work in the code but isn't supported
251 * by drivers or the standard.
252 */
253 if (sta->sdata->vif.type != NL80211_IFTYPE_STATION &&
254 sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
255 sta->sdata->vif.type != NL80211_IFTYPE_AP) {
256 ret = -EINVAL;
257 goto unlock;
258 }
259
260 spin_lock_bh(&sta->lock);
261
262 sdata = sta->sdata;
263
264 /* we have tried too many times, receiver does not want A-MPDU */
265 if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) {
266 ret = -EBUSY;
267 goto err_unlock_sta;
268 }
269
270 state = &sta->ampdu_mlme.tid_state_tx[tid];
271 /* check if the TID is not in aggregation flow already */
272 if (*state != HT_AGG_STATE_IDLE) {
273#ifdef CONFIG_MAC80211_HT_DEBUG
274 printk(KERN_DEBUG "BA request denied - session is not "
275 "idle on tid %u\n", tid);
276#endif /* CONFIG_MAC80211_HT_DEBUG */
277 ret = -EAGAIN;
278 goto err_unlock_sta;
279 }
280
281 if (hw->ampdu_queues) {
282 spin_lock(&local->queue_stop_reason_lock);
283 /* reserve a new queue for this session */
284 for (i = 0; i < local->hw.ampdu_queues; i++) {
285 if (local->ampdu_ac_queue[i] < 0) {
286 qn = i;
287 local->ampdu_ac_queue[qn] =
288 ieee80211_ac_from_tid(tid);
289 break;
290 }
291 }
292 spin_unlock(&local->queue_stop_reason_lock);
293
294 if (qn < 0) {
295#ifdef CONFIG_MAC80211_HT_DEBUG
296 printk(KERN_DEBUG "BA request denied - "
297 "queue unavailable for tid %d\n", tid);
298#endif /* CONFIG_MAC80211_HT_DEBUG */
299 ret = -ENOSPC;
300 goto err_unlock_sta;
301 }
302
303 /*
304 * If we successfully allocate the session, we can't have
305 * anything going on on the queue this TID maps into, so
306 * stop it for now. This is a "virtual" stop using the same
307 * mechanism that drivers will use.
308 *
309 * XXX: queue up frames for this session in the sta_info
310 * struct instead to avoid hitting all other STAs.
311 */
312 ieee80211_stop_queue_by_reason(
313 &local->hw, hw->queues + qn,
314 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
315 }
316
317 /* prepare A-MPDU MLME for Tx aggregation */
318 sta->ampdu_mlme.tid_tx[tid] =
319 kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
320 if (!sta->ampdu_mlme.tid_tx[tid]) {
321#ifdef CONFIG_MAC80211_HT_DEBUG
322 if (net_ratelimit())
323 printk(KERN_ERR "allocate tx mlme to tid %d failed\n",
324 tid);
325#endif
326 ret = -ENOMEM;
327 goto err_return_queue;
328 }
329
330 /* Tx timer */
331 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function =
332 sta_addba_resp_timer_expired;
333 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data =
334 (unsigned long)&sta->timer_to_tid[tid];
335 init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
336
337 /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the
338 * call back right away, it must see that the flow has begun */
339 *state |= HT_ADDBA_REQUESTED_MSK;
340
341 start_seq_num = sta->tid_seq[tid];
342
343 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START,
344 &sta->sta, tid, &start_seq_num);
345
346 if (ret) {
347#ifdef CONFIG_MAC80211_HT_DEBUG
348 printk(KERN_DEBUG "BA request denied - HW unavailable for"
349 " tid %d\n", tid);
350#endif /* CONFIG_MAC80211_HT_DEBUG */
351 *state = HT_AGG_STATE_IDLE;
352 goto err_free;
353 }
354 sta->tid_to_tx_q[tid] = qn;
355
356 spin_unlock_bh(&sta->lock);
357
358 /* send an addBA request */
359 sta->ampdu_mlme.dialog_token_allocator++;
360 sta->ampdu_mlme.tid_tx[tid]->dialog_token =
361 sta->ampdu_mlme.dialog_token_allocator;
362 sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num;
363
364 ieee80211_send_addba_request(sta->sdata, ra, tid,
365 sta->ampdu_mlme.tid_tx[tid]->dialog_token,
366 sta->ampdu_mlme.tid_tx[tid]->ssn,
367 0x40, 5000);
368 /* activate the timer for the recipient's addBA response */
369 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires =
370 jiffies + ADDBA_RESP_INTERVAL;
371 add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
372#ifdef CONFIG_MAC80211_HT_DEBUG
373 printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
374#endif
375 goto unlock;
376
377 err_free:
378 kfree(sta->ampdu_mlme.tid_tx[tid]);
379 sta->ampdu_mlme.tid_tx[tid] = NULL;
380 err_return_queue:
381 if (qn >= 0) {
382 /* We failed, so start queue again right away. */
383 ieee80211_wake_queue_by_reason(hw, hw->queues + qn,
384 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
385 /* give queue back to pool */
386 spin_lock(&local->queue_stop_reason_lock);
387 local->ampdu_ac_queue[qn] = -1;
388 spin_unlock(&local->queue_stop_reason_lock);
389 }
390 err_unlock_sta:
391 spin_unlock_bh(&sta->lock);
392 unlock:
393 rcu_read_unlock();
394 return ret;
395}
396EXPORT_SYMBOL(ieee80211_start_tx_ba_session);
397
398void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid)
399{
400 struct ieee80211_local *local = hw_to_local(hw);
401 struct sta_info *sta;
402 u8 *state;
403
404 if (tid >= STA_TID_NUM) {
405#ifdef CONFIG_MAC80211_HT_DEBUG
406 printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
407 tid, STA_TID_NUM);
408#endif
409 return;
410 }
411
412 rcu_read_lock();
413 sta = sta_info_get(local, ra);
414 if (!sta) {
415 rcu_read_unlock();
416#ifdef CONFIG_MAC80211_HT_DEBUG
417 printk(KERN_DEBUG "Could not find station: %pM\n", ra);
418#endif
419 return;
420 }
421
422 state = &sta->ampdu_mlme.tid_state_tx[tid];
423 spin_lock_bh(&sta->lock);
424
425 if (WARN_ON(!(*state & HT_ADDBA_REQUESTED_MSK))) {
426#ifdef CONFIG_MAC80211_HT_DEBUG
427 printk(KERN_DEBUG "addBA was not requested yet, state is %d\n",
428 *state);
429#endif
430 spin_unlock_bh(&sta->lock);
431 rcu_read_unlock();
432 return;
433 }
434
435 if (WARN_ON(*state & HT_ADDBA_DRV_READY_MSK))
436 goto out;
437
438 *state |= HT_ADDBA_DRV_READY_MSK;
439
440 if (*state == HT_AGG_STATE_OPERATIONAL) {
441#ifdef CONFIG_MAC80211_HT_DEBUG
442 printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid);
443#endif
444 if (hw->ampdu_queues) {
445 /*
446 * Wake up this queue, we stopped it earlier,
447 * this will in turn wake the entire AC.
448 */
449 ieee80211_wake_queue_by_reason(hw,
450 hw->queues + sta->tid_to_tx_q[tid],
451 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
452 }
453 }
454
455 out:
456 spin_unlock_bh(&sta->lock);
457 rcu_read_unlock();
458}
459EXPORT_SYMBOL(ieee80211_start_tx_ba_cb);
460
461void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
462 const u8 *ra, u16 tid)
463{
464 struct ieee80211_local *local = hw_to_local(hw);
465 struct ieee80211_ra_tid *ra_tid;
466 struct sk_buff *skb = dev_alloc_skb(0);
467
468 if (unlikely(!skb)) {
469#ifdef CONFIG_MAC80211_HT_DEBUG
470 if (net_ratelimit())
471 printk(KERN_WARNING "%s: Not enough memory, "
472 "dropping start BA session", skb->dev->name);
473#endif
474 return;
475 }
476 ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
477 memcpy(&ra_tid->ra, ra, ETH_ALEN);
478 ra_tid->tid = tid;
479
480 skb->pkt_type = IEEE80211_ADDBA_MSG;
481 skb_queue_tail(&local->skb_queue, skb);
482 tasklet_schedule(&local->tasklet);
483}
484EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
485
486int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
487 enum ieee80211_back_parties initiator)
488{
489 u8 *state;
490 int ret;
491
492 /* check if the TID is in aggregation */
493 state = &sta->ampdu_mlme.tid_state_tx[tid];
494 spin_lock_bh(&sta->lock);
495
496 if (*state != HT_AGG_STATE_OPERATIONAL) {
497 ret = -ENOENT;
498 goto unlock;
499 }
500
501#ifdef CONFIG_MAC80211_HT_DEBUG
502 printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n",
503 sta->sta.addr, tid);
504#endif /* CONFIG_MAC80211_HT_DEBUG */
505
506 ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator);
507
508 unlock:
509 spin_unlock_bh(&sta->lock);
510 return ret;
511}
512
513int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw,
514 u8 *ra, u16 tid,
515 enum ieee80211_back_parties initiator)
516{
517 struct ieee80211_local *local = hw_to_local(hw);
518 struct sta_info *sta;
519 int ret = 0;
520
521 if (WARN_ON(!local->ops->ampdu_action))
522 return -EINVAL;
523
524 if (tid >= STA_TID_NUM)
525 return -EINVAL;
526
527 rcu_read_lock();
528 sta = sta_info_get(local, ra);
529 if (!sta) {
530 rcu_read_unlock();
531 return -ENOENT;
532 }
533
534 ret = __ieee80211_stop_tx_ba_session(sta, tid, initiator);
535 rcu_read_unlock();
536 return ret;
537}
538EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
539
540void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
541{
542 struct ieee80211_local *local = hw_to_local(hw);
543 struct sta_info *sta;
544 u8 *state;
545
546 if (tid >= STA_TID_NUM) {
547#ifdef CONFIG_MAC80211_HT_DEBUG
548 printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
549 tid, STA_TID_NUM);
550#endif
551 return;
552 }
553
554#ifdef CONFIG_MAC80211_HT_DEBUG
555 printk(KERN_DEBUG "Stopping Tx BA session for %pM tid %d\n",
556 ra, tid);
557#endif /* CONFIG_MAC80211_HT_DEBUG */
558
559 rcu_read_lock();
560 sta = sta_info_get(local, ra);
561 if (!sta) {
562#ifdef CONFIG_MAC80211_HT_DEBUG
563 printk(KERN_DEBUG "Could not find station: %pM\n", ra);
564#endif
565 rcu_read_unlock();
566 return;
567 }
568 state = &sta->ampdu_mlme.tid_state_tx[tid];
569
570 /* NOTE: no need to use sta->lock in this state check, as
571 * ieee80211_stop_tx_ba_session will let only one stop call to
572 * pass through per sta/tid
573 */
574 if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) {
575#ifdef CONFIG_MAC80211_HT_DEBUG
576 printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
577#endif
578 rcu_read_unlock();
579 return;
580 }
581
582 if (*state & HT_AGG_STATE_INITIATOR_MSK)
583 ieee80211_send_delba(sta->sdata, ra, tid,
584 WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
585
586 spin_lock_bh(&sta->lock);
587
588 if (*state & HT_AGG_STATE_INITIATOR_MSK &&
589 hw->ampdu_queues) {
590 /*
591 * Wake up this queue, we stopped it earlier,
592 * this will in turn wake the entire AC.
593 */
594 ieee80211_wake_queue_by_reason(hw,
595 hw->queues + sta->tid_to_tx_q[tid],
596 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
597 }
598
599 *state = HT_AGG_STATE_IDLE;
600 sta->ampdu_mlme.addba_req_num[tid] = 0;
601 kfree(sta->ampdu_mlme.tid_tx[tid]);
602 sta->ampdu_mlme.tid_tx[tid] = NULL;
603 spin_unlock_bh(&sta->lock);
604
605 rcu_read_unlock();
606}
607EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb);
608
609void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
610 const u8 *ra, u16 tid)
611{
612 struct ieee80211_local *local = hw_to_local(hw);
613 struct ieee80211_ra_tid *ra_tid;
614 struct sk_buff *skb = dev_alloc_skb(0);
615
616 if (unlikely(!skb)) {
617#ifdef CONFIG_MAC80211_HT_DEBUG
618 if (net_ratelimit())
619 printk(KERN_WARNING "%s: Not enough memory, "
620 "dropping stop BA session", skb->dev->name);
621#endif
622 return;
623 }
624 ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
625 memcpy(&ra_tid->ra, ra, ETH_ALEN);
626 ra_tid->tid = tid;
627
628 skb->pkt_type = IEEE80211_DELBA_MSG;
629 skb_queue_tail(&local->skb_queue, skb);
630 tasklet_schedule(&local->tasklet);
631}
632EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe);
633
634
635void ieee80211_process_addba_resp(struct ieee80211_local *local,
636 struct sta_info *sta,
637 struct ieee80211_mgmt *mgmt,
638 size_t len)
639{
640 struct ieee80211_hw *hw = &local->hw;
641 u16 capab;
642 u16 tid, start_seq_num;
643 u8 *state;
644
645 capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
646 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
647
648 state = &sta->ampdu_mlme.tid_state_tx[tid];
649
650 spin_lock_bh(&sta->lock);
651
652 if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
653 spin_unlock_bh(&sta->lock);
654 return;
655 }
656
657 if (mgmt->u.action.u.addba_resp.dialog_token !=
658 sta->ampdu_mlme.tid_tx[tid]->dialog_token) {
659 spin_unlock_bh(&sta->lock);
660#ifdef CONFIG_MAC80211_HT_DEBUG
661 printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid);
662#endif /* CONFIG_MAC80211_HT_DEBUG */
663 return;
664 }
665
666 del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
667#ifdef CONFIG_MAC80211_HT_DEBUG
668 printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid);
669#endif /* CONFIG_MAC80211_HT_DEBUG */
670 if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
671 == WLAN_STATUS_SUCCESS) {
672 u8 curstate = *state;
673
674 *state |= HT_ADDBA_RECEIVED_MSK;
675
676 if (hw->ampdu_queues && *state != curstate &&
677 *state == HT_AGG_STATE_OPERATIONAL) {
678 /*
679 * Wake up this queue, we stopped it earlier,
680 * this will in turn wake the entire AC.
681 */
682 ieee80211_wake_queue_by_reason(hw,
683 hw->queues + sta->tid_to_tx_q[tid],
684 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
685 }
686 sta->ampdu_mlme.addba_req_num[tid] = 0;
687
688 if (local->ops->ampdu_action) {
689 (void)local->ops->ampdu_action(hw,
690 IEEE80211_AMPDU_TX_RESUME,
691 &sta->sta, tid, &start_seq_num);
692 }
693#ifdef CONFIG_MAC80211_HT_DEBUG
694 printk(KERN_DEBUG "Resuming TX aggregation for tid %d\n", tid);
695#endif /* CONFIG_MAC80211_HT_DEBUG */
696 } else {
697 sta->ampdu_mlme.addba_req_num[tid]++;
698 ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR);
699 }
700 spin_unlock_bh(&sta->lock);
701}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 9d4e4d846ec1..c43129efc3bf 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -133,6 +133,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
133 case WLAN_CIPHER_SUITE_CCMP: 133 case WLAN_CIPHER_SUITE_CCMP:
134 alg = ALG_CCMP; 134 alg = ALG_CCMP;
135 break; 135 break;
136 case WLAN_CIPHER_SUITE_AES_CMAC:
137 alg = ALG_AES_CMAC;
138 break;
136 default: 139 default:
137 return -EINVAL; 140 return -EINVAL;
138 } 141 }
@@ -275,6 +278,17 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
275 else 278 else
276 params.cipher = WLAN_CIPHER_SUITE_WEP104; 279 params.cipher = WLAN_CIPHER_SUITE_WEP104;
277 break; 280 break;
281 case ALG_AES_CMAC:
282 params.cipher = WLAN_CIPHER_SUITE_AES_CMAC;
283 seq[0] = key->u.aes_cmac.tx_pn[5];
284 seq[1] = key->u.aes_cmac.tx_pn[4];
285 seq[2] = key->u.aes_cmac.tx_pn[3];
286 seq[3] = key->u.aes_cmac.tx_pn[2];
287 seq[4] = key->u.aes_cmac.tx_pn[1];
288 seq[5] = key->u.aes_cmac.tx_pn[0];
289 params.seq = seq;
290 params.seq_len = 6;
291 break;
278 } 292 }
279 293
280 params.key = key->conf.key; 294 params.key = key->conf.key;
@@ -304,6 +318,22 @@ static int ieee80211_config_default_key(struct wiphy *wiphy,
304 return 0; 318 return 0;
305} 319}
306 320
321static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy,
322 struct net_device *dev,
323 u8 key_idx)
324{
325 struct ieee80211_sub_if_data *sdata;
326
327 rcu_read_lock();
328
329 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
330 ieee80211_set_default_mgmt_key(sdata, key_idx);
331
332 rcu_read_unlock();
333
334 return 0;
335}
336
307static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) 337static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
308{ 338{
309 struct ieee80211_sub_if_data *sdata = sta->sdata; 339 struct ieee80211_sub_if_data *sdata = sta->sdata;
@@ -311,11 +341,15 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
311 sinfo->filled = STATION_INFO_INACTIVE_TIME | 341 sinfo->filled = STATION_INFO_INACTIVE_TIME |
312 STATION_INFO_RX_BYTES | 342 STATION_INFO_RX_BYTES |
313 STATION_INFO_TX_BYTES | 343 STATION_INFO_TX_BYTES |
344 STATION_INFO_RX_PACKETS |
345 STATION_INFO_TX_PACKETS |
314 STATION_INFO_TX_BITRATE; 346 STATION_INFO_TX_BITRATE;
315 347
316 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); 348 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
317 sinfo->rx_bytes = sta->rx_bytes; 349 sinfo->rx_bytes = sta->rx_bytes;
318 sinfo->tx_bytes = sta->tx_bytes; 350 sinfo->tx_bytes = sta->tx_bytes;
351 sinfo->rx_packets = sta->rx_packets;
352 sinfo->tx_packets = sta->tx_packets;
319 353
320 if (sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { 354 if (sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) {
321 sinfo->filled |= STATION_INFO_SIGNAL; 355 sinfo->filled |= STATION_INFO_SIGNAL;
@@ -493,7 +527,8 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
493 527
494 kfree(old); 528 kfree(old);
495 529
496 return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); 530 return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON |
531 IEEE80211_IFCC_BEACON_ENABLED);
497} 532}
498 533
499static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, 534static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
@@ -553,7 +588,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
553 synchronize_rcu(); 588 synchronize_rcu();
554 kfree(old); 589 kfree(old);
555 590
556 return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); 591 return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON_ENABLED);
557} 592}
558 593
559/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */ 594/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
@@ -630,6 +665,10 @@ static void sta_apply_parameters(struct ieee80211_local *local,
630 sta->flags &= ~WLAN_STA_WME; 665 sta->flags &= ~WLAN_STA_WME;
631 if (params->station_flags & STATION_FLAG_WME) 666 if (params->station_flags & STATION_FLAG_WME)
632 sta->flags |= WLAN_STA_WME; 667 sta->flags |= WLAN_STA_WME;
668
669 sta->flags &= ~WLAN_STA_MFP;
670 if (params->station_flags & STATION_FLAG_MFP)
671 sta->flags |= WLAN_STA_MFP;
633 spin_unlock_bh(&sta->lock); 672 spin_unlock_bh(&sta->lock);
634 } 673 }
635 674
@@ -1141,6 +1180,125 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
1141 return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); 1180 return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
1142} 1181}
1143 1182
1183static int set_mgmt_extra_ie_sta(struct ieee80211_sub_if_data *sdata,
1184 u8 subtype, u8 *ies, size_t ies_len)
1185{
1186 struct ieee80211_local *local = sdata->local;
1187 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1188
1189 switch (subtype) {
1190 case IEEE80211_STYPE_PROBE_REQ >> 4:
1191 if (local->ops->hw_scan)
1192 break;
1193 kfree(ifmgd->ie_probereq);
1194 ifmgd->ie_probereq = ies;
1195 ifmgd->ie_probereq_len = ies_len;
1196 return 0;
1197 case IEEE80211_STYPE_PROBE_RESP >> 4:
1198 kfree(ifmgd->ie_proberesp);
1199 ifmgd->ie_proberesp = ies;
1200 ifmgd->ie_proberesp_len = ies_len;
1201 return 0;
1202 case IEEE80211_STYPE_AUTH >> 4:
1203 kfree(ifmgd->ie_auth);
1204 ifmgd->ie_auth = ies;
1205 ifmgd->ie_auth_len = ies_len;
1206 return 0;
1207 case IEEE80211_STYPE_ASSOC_REQ >> 4:
1208 kfree(ifmgd->ie_assocreq);
1209 ifmgd->ie_assocreq = ies;
1210 ifmgd->ie_assocreq_len = ies_len;
1211 return 0;
1212 case IEEE80211_STYPE_REASSOC_REQ >> 4:
1213 kfree(ifmgd->ie_reassocreq);
1214 ifmgd->ie_reassocreq = ies;
1215 ifmgd->ie_reassocreq_len = ies_len;
1216 return 0;
1217 case IEEE80211_STYPE_DEAUTH >> 4:
1218 kfree(ifmgd->ie_deauth);
1219 ifmgd->ie_deauth = ies;
1220 ifmgd->ie_deauth_len = ies_len;
1221 return 0;
1222 case IEEE80211_STYPE_DISASSOC >> 4:
1223 kfree(ifmgd->ie_disassoc);
1224 ifmgd->ie_disassoc = ies;
1225 ifmgd->ie_disassoc_len = ies_len;
1226 return 0;
1227 }
1228
1229 return -EOPNOTSUPP;
1230}
1231
1232static int ieee80211_set_mgmt_extra_ie(struct wiphy *wiphy,
1233 struct net_device *dev,
1234 struct mgmt_extra_ie_params *params)
1235{
1236 struct ieee80211_sub_if_data *sdata;
1237 u8 *ies;
1238 size_t ies_len;
1239 int ret = -EOPNOTSUPP;
1240
1241 if (params->ies) {
1242 ies = kmemdup(params->ies, params->ies_len, GFP_KERNEL);
1243 if (ies == NULL)
1244 return -ENOMEM;
1245 ies_len = params->ies_len;
1246 } else {
1247 ies = NULL;
1248 ies_len = 0;
1249 }
1250
1251 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1252
1253 switch (sdata->vif.type) {
1254 case NL80211_IFTYPE_STATION:
1255 ret = set_mgmt_extra_ie_sta(sdata, params->subtype,
1256 ies, ies_len);
1257 break;
1258 default:
1259 ret = -EOPNOTSUPP;
1260 break;
1261 }
1262
1263 if (ret)
1264 kfree(ies);
1265 return ret;
1266}
1267
1268#ifdef CONFIG_PM
1269static int ieee80211_suspend(struct wiphy *wiphy)
1270{
1271 return __ieee80211_suspend(wiphy_priv(wiphy));
1272}
1273
1274static int ieee80211_resume(struct wiphy *wiphy)
1275{
1276 return __ieee80211_resume(wiphy_priv(wiphy));
1277}
1278#else
1279#define ieee80211_suspend NULL
1280#define ieee80211_resume NULL
1281#endif
1282
1283static int ieee80211_scan(struct wiphy *wiphy,
1284 struct net_device *dev,
1285 struct cfg80211_scan_request *req)
1286{
1287 struct ieee80211_sub_if_data *sdata;
1288
1289 if (!netif_running(dev))
1290 return -ENETDOWN;
1291
1292 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1293
1294 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
1295 sdata->vif.type != NL80211_IFTYPE_ADHOC &&
1296 sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
1297 return -EOPNOTSUPP;
1298
1299 return ieee80211_request_scan(sdata, req);
1300}
1301
1144struct cfg80211_ops mac80211_config_ops = { 1302struct cfg80211_ops mac80211_config_ops = {
1145 .add_virtual_intf = ieee80211_add_iface, 1303 .add_virtual_intf = ieee80211_add_iface,
1146 .del_virtual_intf = ieee80211_del_iface, 1304 .del_virtual_intf = ieee80211_del_iface,
@@ -1149,6 +1307,7 @@ struct cfg80211_ops mac80211_config_ops = {
1149 .del_key = ieee80211_del_key, 1307 .del_key = ieee80211_del_key,
1150 .get_key = ieee80211_get_key, 1308 .get_key = ieee80211_get_key,
1151 .set_default_key = ieee80211_config_default_key, 1309 .set_default_key = ieee80211_config_default_key,
1310 .set_default_mgmt_key = ieee80211_config_default_mgmt_key,
1152 .add_beacon = ieee80211_add_beacon, 1311 .add_beacon = ieee80211_add_beacon,
1153 .set_beacon = ieee80211_set_beacon, 1312 .set_beacon = ieee80211_set_beacon,
1154 .del_beacon = ieee80211_del_beacon, 1313 .del_beacon = ieee80211_del_beacon,
@@ -1169,4 +1328,8 @@ struct cfg80211_ops mac80211_config_ops = {
1169 .change_bss = ieee80211_change_bss, 1328 .change_bss = ieee80211_change_bss,
1170 .set_txq_params = ieee80211_set_txq_params, 1329 .set_txq_params = ieee80211_set_txq_params,
1171 .set_channel = ieee80211_set_channel, 1330 .set_channel = ieee80211_set_channel,
1331 .set_mgmt_extra_ie = ieee80211_set_mgmt_extra_ie,
1332 .suspend = ieee80211_suspend,
1333 .resume = ieee80211_resume,
1334 .scan = ieee80211_scan,
1172}; 1335};
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 2697a2fe608f..e37f557de3f3 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -57,11 +57,62 @@ DEBUGFS_READONLY_FILE(long_retry_limit, 20, "%d",
57 local->hw.conf.long_frame_max_tx_count); 57 local->hw.conf.long_frame_max_tx_count);
58DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d", 58DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d",
59 local->total_ps_buffered); 59 local->total_ps_buffered);
60DEBUGFS_READONLY_FILE(wep_iv, 20, "%#06x", 60DEBUGFS_READONLY_FILE(wep_iv, 20, "%#08x",
61 local->wep_iv & 0xffffff); 61 local->wep_iv & 0xffffff);
62DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", 62DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s",
63 local->rate_ctrl ? local->rate_ctrl->ops->name : "<unset>"); 63 local->rate_ctrl ? local->rate_ctrl->ops->name : "<unset>");
64 64
65static ssize_t tsf_read(struct file *file, char __user *user_buf,
66 size_t count, loff_t *ppos)
67{
68 struct ieee80211_local *local = file->private_data;
69 u64 tsf = 0;
70 char buf[100];
71
72 if (local->ops->get_tsf)
73 tsf = local->ops->get_tsf(local_to_hw(local));
74
75 snprintf(buf, sizeof(buf), "0x%016llx\n", (unsigned long long) tsf);
76
77 return simple_read_from_buffer(user_buf, count, ppos, buf, 19);
78}
79
80static ssize_t tsf_write(struct file *file,
81 const char __user *user_buf,
82 size_t count, loff_t *ppos)
83{
84 struct ieee80211_local *local = file->private_data;
85 unsigned long long tsf;
86 char buf[100];
87 size_t len;
88
89 len = min(count, sizeof(buf) - 1);
90 if (copy_from_user(buf, user_buf, len))
91 return -EFAULT;
92 buf[len] = '\0';
93
94 if (strncmp(buf, "reset", 5) == 0) {
95 if (local->ops->reset_tsf) {
96 local->ops->reset_tsf(local_to_hw(local));
97 printk(KERN_INFO "%s: debugfs reset TSF\n", wiphy_name(local->hw.wiphy));
98 }
99 } else {
100 tsf = simple_strtoul(buf, NULL, 0);
101 if (local->ops->set_tsf) {
102 local->ops->set_tsf(local_to_hw(local), tsf);
103 printk(KERN_INFO "%s: debugfs set TSF to %#018llx\n", wiphy_name(local->hw.wiphy), tsf);
104 }
105 }
106
107 return count;
108}
109
110static const struct file_operations tsf_ops = {
111 .read = tsf_read,
112 .write = tsf_write,
113 .open = mac80211_open_file_generic
114};
115
65/* statistics stuff */ 116/* statistics stuff */
66 117
67#define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \ 118#define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \
@@ -136,8 +187,6 @@ DEBUGFS_STATS_FILE(multicast_received_frame_count, 20, "%u",
136 local->dot11MulticastReceivedFrameCount); 187 local->dot11MulticastReceivedFrameCount);
137DEBUGFS_STATS_FILE(transmitted_frame_count, 20, "%u", 188DEBUGFS_STATS_FILE(transmitted_frame_count, 20, "%u",
138 local->dot11TransmittedFrameCount); 189 local->dot11TransmittedFrameCount);
139DEBUGFS_STATS_FILE(wep_undecryptable_count, 20, "%u",
140 local->dot11WEPUndecryptableCount);
141#ifdef CONFIG_MAC80211_DEBUG_COUNTERS 190#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
142DEBUGFS_STATS_FILE(tx_handlers_drop, 20, "%u", 191DEBUGFS_STATS_FILE(tx_handlers_drop, 20, "%u",
143 local->tx_handlers_drop); 192 local->tx_handlers_drop);
@@ -204,6 +253,7 @@ void debugfs_hw_add(struct ieee80211_local *local)
204 DEBUGFS_ADD(long_retry_limit); 253 DEBUGFS_ADD(long_retry_limit);
205 DEBUGFS_ADD(total_ps_buffered); 254 DEBUGFS_ADD(total_ps_buffered);
206 DEBUGFS_ADD(wep_iv); 255 DEBUGFS_ADD(wep_iv);
256 DEBUGFS_ADD(tsf);
207 257
208 statsd = debugfs_create_dir("statistics", phyd); 258 statsd = debugfs_create_dir("statistics", phyd);
209 local->debugfs.statistics = statsd; 259 local->debugfs.statistics = statsd;
@@ -221,7 +271,6 @@ void debugfs_hw_add(struct ieee80211_local *local)
221 DEBUGFS_STATS_ADD(received_fragment_count); 271 DEBUGFS_STATS_ADD(received_fragment_count);
222 DEBUGFS_STATS_ADD(multicast_received_frame_count); 272 DEBUGFS_STATS_ADD(multicast_received_frame_count);
223 DEBUGFS_STATS_ADD(transmitted_frame_count); 273 DEBUGFS_STATS_ADD(transmitted_frame_count);
224 DEBUGFS_STATS_ADD(wep_undecryptable_count);
225#ifdef CONFIG_MAC80211_DEBUG_COUNTERS 274#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
226 DEBUGFS_STATS_ADD(tx_handlers_drop); 275 DEBUGFS_STATS_ADD(tx_handlers_drop);
227 DEBUGFS_STATS_ADD(tx_handlers_queued); 276 DEBUGFS_STATS_ADD(tx_handlers_queued);
@@ -258,6 +307,7 @@ void debugfs_hw_del(struct ieee80211_local *local)
258 DEBUGFS_DEL(long_retry_limit); 307 DEBUGFS_DEL(long_retry_limit);
259 DEBUGFS_DEL(total_ps_buffered); 308 DEBUGFS_DEL(total_ps_buffered);
260 DEBUGFS_DEL(wep_iv); 309 DEBUGFS_DEL(wep_iv);
310 DEBUGFS_DEL(tsf);
261 311
262 DEBUGFS_STATS_DEL(transmitted_fragment_count); 312 DEBUGFS_STATS_DEL(transmitted_fragment_count);
263 DEBUGFS_STATS_DEL(multicast_transmitted_frame_count); 313 DEBUGFS_STATS_DEL(multicast_transmitted_frame_count);
@@ -268,7 +318,6 @@ void debugfs_hw_del(struct ieee80211_local *local)
268 DEBUGFS_STATS_DEL(received_fragment_count); 318 DEBUGFS_STATS_DEL(received_fragment_count);
269 DEBUGFS_STATS_DEL(multicast_received_frame_count); 319 DEBUGFS_STATS_DEL(multicast_received_frame_count);
270 DEBUGFS_STATS_DEL(transmitted_frame_count); 320 DEBUGFS_STATS_DEL(transmitted_frame_count);
271 DEBUGFS_STATS_DEL(wep_undecryptable_count);
272 DEBUGFS_STATS_DEL(num_scans); 321 DEBUGFS_STATS_DEL(num_scans);
273#ifdef CONFIG_MAC80211_DEBUG_COUNTERS 322#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
274 DEBUGFS_STATS_DEL(tx_handlers_drop); 323 DEBUGFS_STATS_DEL(tx_handlers_drop);
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 6424ac565ae0..99c752588b30 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -76,6 +76,9 @@ static ssize_t key_algorithm_read(struct file *file,
76 case ALG_CCMP: 76 case ALG_CCMP:
77 alg = "CCMP\n"; 77 alg = "CCMP\n";
78 break; 78 break;
79 case ALG_AES_CMAC:
80 alg = "AES-128-CMAC\n";
81 break;
79 default: 82 default:
80 return 0; 83 return 0;
81 } 84 }
@@ -105,6 +108,12 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
105 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", 108 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
106 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); 109 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]);
107 break; 110 break;
111 case ALG_AES_CMAC:
112 tpn = key->u.aes_cmac.tx_pn;
113 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
114 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4],
115 tpn[5]);
116 break;
108 default: 117 default:
109 return 0; 118 return 0;
110 } 119 }
@@ -142,6 +151,14 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
142 } 151 }
143 len = p - buf; 152 len = p - buf;
144 break; 153 break;
154 case ALG_AES_CMAC:
155 rpn = key->u.aes_cmac.rx_pn;
156 p += scnprintf(p, sizeof(buf)+buf-p,
157 "%02x%02x%02x%02x%02x%02x\n",
158 rpn[0], rpn[1], rpn[2],
159 rpn[3], rpn[4], rpn[5]);
160 len = p - buf;
161 break;
145 default: 162 default:
146 return 0; 163 return 0;
147 } 164 }
@@ -156,13 +173,40 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
156 char buf[20]; 173 char buf[20];
157 int len; 174 int len;
158 175
159 if (key->conf.alg != ALG_CCMP) 176 switch (key->conf.alg) {
177 case ALG_CCMP:
178 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
179 break;
180 case ALG_AES_CMAC:
181 len = scnprintf(buf, sizeof(buf), "%u\n",
182 key->u.aes_cmac.replays);
183 break;
184 default:
160 return 0; 185 return 0;
161 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); 186 }
162 return simple_read_from_buffer(userbuf, count, ppos, buf, len); 187 return simple_read_from_buffer(userbuf, count, ppos, buf, len);
163} 188}
164KEY_OPS(replays); 189KEY_OPS(replays);
165 190
191static ssize_t key_icverrors_read(struct file *file, char __user *userbuf,
192 size_t count, loff_t *ppos)
193{
194 struct ieee80211_key *key = file->private_data;
195 char buf[20];
196 int len;
197
198 switch (key->conf.alg) {
199 case ALG_AES_CMAC:
200 len = scnprintf(buf, sizeof(buf), "%u\n",
201 key->u.aes_cmac.icverrors);
202 break;
203 default:
204 return 0;
205 }
206 return simple_read_from_buffer(userbuf, count, ppos, buf, len);
207}
208KEY_OPS(icverrors);
209
166static ssize_t key_key_read(struct file *file, char __user *userbuf, 210static ssize_t key_key_read(struct file *file, char __user *userbuf,
167 size_t count, loff_t *ppos) 211 size_t count, loff_t *ppos)
168{ 212{
@@ -222,6 +266,7 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key)
222 DEBUGFS_ADD(tx_spec); 266 DEBUGFS_ADD(tx_spec);
223 DEBUGFS_ADD(rx_spec); 267 DEBUGFS_ADD(rx_spec);
224 DEBUGFS_ADD(replays); 268 DEBUGFS_ADD(replays);
269 DEBUGFS_ADD(icverrors);
225 DEBUGFS_ADD(key); 270 DEBUGFS_ADD(key);
226 DEBUGFS_ADD(ifindex); 271 DEBUGFS_ADD(ifindex);
227}; 272};
@@ -243,6 +288,7 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key)
243 DEBUGFS_DEL(tx_spec); 288 DEBUGFS_DEL(tx_spec);
244 DEBUGFS_DEL(rx_spec); 289 DEBUGFS_DEL(rx_spec);
245 DEBUGFS_DEL(replays); 290 DEBUGFS_DEL(replays);
291 DEBUGFS_DEL(icverrors);
246 DEBUGFS_DEL(key); 292 DEBUGFS_DEL(key);
247 DEBUGFS_DEL(ifindex); 293 DEBUGFS_DEL(ifindex);
248 294
@@ -280,6 +326,35 @@ void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata)
280 sdata->common_debugfs.default_key = NULL; 326 sdata->common_debugfs.default_key = NULL;
281} 327}
282 328
329void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
330{
331 char buf[50];
332 struct ieee80211_key *key;
333
334 if (!sdata->debugfsdir)
335 return;
336
337 /* this is running under the key lock */
338
339 key = sdata->default_mgmt_key;
340 if (key) {
341 sprintf(buf, "../keys/%d", key->debugfs.cnt);
342 sdata->common_debugfs.default_mgmt_key =
343 debugfs_create_symlink("default_mgmt_key",
344 sdata->debugfsdir, buf);
345 } else
346 ieee80211_debugfs_key_remove_mgmt_default(sdata);
347}
348
349void ieee80211_debugfs_key_remove_mgmt_default(struct ieee80211_sub_if_data *sdata)
350{
351 if (!sdata)
352 return;
353
354 debugfs_remove(sdata->common_debugfs.default_mgmt_key);
355 sdata->common_debugfs.default_mgmt_key = NULL;
356}
357
283void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, 358void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key,
284 struct sta_info *sta) 359 struct sta_info *sta)
285{ 360{
diff --git a/net/mac80211/debugfs_key.h b/net/mac80211/debugfs_key.h
index b1a3754ee240..54717b4e1371 100644
--- a/net/mac80211/debugfs_key.h
+++ b/net/mac80211/debugfs_key.h
@@ -6,6 +6,10 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key);
6void ieee80211_debugfs_key_remove(struct ieee80211_key *key); 6void ieee80211_debugfs_key_remove(struct ieee80211_key *key);
7void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata); 7void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata);
8void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata); 8void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata);
9void ieee80211_debugfs_key_add_mgmt_default(
10 struct ieee80211_sub_if_data *sdata);
11void ieee80211_debugfs_key_remove_mgmt_default(
12 struct ieee80211_sub_if_data *sdata);
9void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, 13void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key,
10 struct sta_info *sta); 14 struct sta_info *sta);
11#else 15#else
@@ -19,6 +23,12 @@ static inline void ieee80211_debugfs_key_add_default(
19static inline void ieee80211_debugfs_key_remove_default( 23static inline void ieee80211_debugfs_key_remove_default(
20 struct ieee80211_sub_if_data *sdata) 24 struct ieee80211_sub_if_data *sdata)
21{} 25{}
26static inline void ieee80211_debugfs_key_add_mgmt_default(
27 struct ieee80211_sub_if_data *sdata)
28{}
29static inline void ieee80211_debugfs_key_remove_mgmt_default(
30 struct ieee80211_sub_if_data *sdata)
31{}
22static inline void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, 32static inline void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key,
23 struct sta_info *sta) 33 struct sta_info *sta)
24{} 34{}
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index c54219301724..e3420329f4e6 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -94,31 +94,31 @@ IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC);
94IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC); 94IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC);
95IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC); 95IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC);
96 96
97/* STA/IBSS attributes */ 97/* STA attributes */
98IEEE80211_IF_FILE(state, u.sta.state, DEC); 98IEEE80211_IF_FILE(state, u.mgd.state, DEC);
99IEEE80211_IF_FILE(bssid, u.sta.bssid, MAC); 99IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
100IEEE80211_IF_FILE(prev_bssid, u.sta.prev_bssid, MAC); 100IEEE80211_IF_FILE(prev_bssid, u.mgd.prev_bssid, MAC);
101IEEE80211_IF_FILE(ssid_len, u.sta.ssid_len, SIZE); 101IEEE80211_IF_FILE(ssid_len, u.mgd.ssid_len, SIZE);
102IEEE80211_IF_FILE(aid, u.sta.aid, DEC); 102IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
103IEEE80211_IF_FILE(ap_capab, u.sta.ap_capab, HEX); 103IEEE80211_IF_FILE(ap_capab, u.mgd.ap_capab, HEX);
104IEEE80211_IF_FILE(capab, u.sta.capab, HEX); 104IEEE80211_IF_FILE(capab, u.mgd.capab, HEX);
105IEEE80211_IF_FILE(extra_ie_len, u.sta.extra_ie_len, SIZE); 105IEEE80211_IF_FILE(extra_ie_len, u.mgd.extra_ie_len, SIZE);
106IEEE80211_IF_FILE(auth_tries, u.sta.auth_tries, DEC); 106IEEE80211_IF_FILE(auth_tries, u.mgd.auth_tries, DEC);
107IEEE80211_IF_FILE(assoc_tries, u.sta.assoc_tries, DEC); 107IEEE80211_IF_FILE(assoc_tries, u.mgd.assoc_tries, DEC);
108IEEE80211_IF_FILE(auth_algs, u.sta.auth_algs, HEX); 108IEEE80211_IF_FILE(auth_algs, u.mgd.auth_algs, HEX);
109IEEE80211_IF_FILE(auth_alg, u.sta.auth_alg, DEC); 109IEEE80211_IF_FILE(auth_alg, u.mgd.auth_alg, DEC);
110IEEE80211_IF_FILE(auth_transaction, u.sta.auth_transaction, DEC); 110IEEE80211_IF_FILE(auth_transaction, u.mgd.auth_transaction, DEC);
111 111
112static ssize_t ieee80211_if_fmt_flags( 112static ssize_t ieee80211_if_fmt_flags(
113 const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) 113 const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
114{ 114{
115 return scnprintf(buf, buflen, "%s%s%s%s%s%s%s\n", 115 return scnprintf(buf, buflen, "%s%s%s%s%s%s%s\n",
116 sdata->u.sta.flags & IEEE80211_STA_SSID_SET ? "SSID\n" : "", 116 sdata->u.mgd.flags & IEEE80211_STA_SSID_SET ? "SSID\n" : "",
117 sdata->u.sta.flags & IEEE80211_STA_BSSID_SET ? "BSSID\n" : "", 117 sdata->u.mgd.flags & IEEE80211_STA_BSSID_SET ? "BSSID\n" : "",
118 sdata->u.sta.flags & IEEE80211_STA_PREV_BSSID_SET ? "prev BSSID\n" : "", 118 sdata->u.mgd.flags & IEEE80211_STA_PREV_BSSID_SET ? "prev BSSID\n" : "",
119 sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "", 119 sdata->u.mgd.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "",
120 sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "", 120 sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "",
121 sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "", 121 sdata->u.mgd.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "",
122 sdata->vif.bss_conf.use_cts_prot ? "CTS prot\n" : ""); 122 sdata->vif.bss_conf.use_cts_prot ? "CTS prot\n" : "");
123} 123}
124__IEEE80211_IF_FILE(flags); 124__IEEE80211_IF_FILE(flags);
@@ -283,9 +283,11 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
283#endif 283#endif
284 break; 284 break;
285 case NL80211_IFTYPE_STATION: 285 case NL80211_IFTYPE_STATION:
286 case NL80211_IFTYPE_ADHOC:
287 add_sta_files(sdata); 286 add_sta_files(sdata);
288 break; 287 break;
288 case NL80211_IFTYPE_ADHOC:
289 /* XXX */
290 break;
289 case NL80211_IFTYPE_AP: 291 case NL80211_IFTYPE_AP:
290 add_ap_files(sdata); 292 add_ap_files(sdata);
291 break; 293 break;
@@ -418,9 +420,11 @@ static void del_files(struct ieee80211_sub_if_data *sdata)
418#endif 420#endif
419 break; 421 break;
420 case NL80211_IFTYPE_STATION: 422 case NL80211_IFTYPE_STATION:
421 case NL80211_IFTYPE_ADHOC:
422 del_sta_files(sdata); 423 del_sta_files(sdata);
423 break; 424 break;
425 case NL80211_IFTYPE_ADHOC:
426 /* XXX */
427 break;
424 case NL80211_IFTYPE_AP: 428 case NL80211_IFTYPE_AP:
425 del_ap_files(sdata); 429 del_ap_files(sdata);
426 break; 430 break;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index a2fbe0131312..90230c718b5b 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -67,14 +67,15 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
67 char buf[100]; 67 char buf[100];
68 struct sta_info *sta = file->private_data; 68 struct sta_info *sta = file->private_data;
69 u32 staflags = get_sta_flags(sta); 69 u32 staflags = get_sta_flags(sta);
70 int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s", 70 int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s",
71 staflags & WLAN_STA_AUTH ? "AUTH\n" : "", 71 staflags & WLAN_STA_AUTH ? "AUTH\n" : "",
72 staflags & WLAN_STA_ASSOC ? "ASSOC\n" : "", 72 staflags & WLAN_STA_ASSOC ? "ASSOC\n" : "",
73 staflags & WLAN_STA_PS ? "PS\n" : "", 73 staflags & WLAN_STA_PS ? "PS\n" : "",
74 staflags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "", 74 staflags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "",
75 staflags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "", 75 staflags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "",
76 staflags & WLAN_STA_WME ? "WME\n" : "", 76 staflags & WLAN_STA_WME ? "WME\n" : "",
77 staflags & WLAN_STA_WDS ? "WDS\n" : ""); 77 staflags & WLAN_STA_WDS ? "WDS\n" : "",
78 staflags & WLAN_STA_MFP ? "MFP\n" : "");
78 return simple_read_from_buffer(userbuf, count, ppos, buf, res); 79 return simple_read_from_buffer(userbuf, count, ppos, buf, res);
79} 80}
80STA_OPS(flags); 81STA_OPS(flags);
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index c5c0c5271096..4e3c72f20de7 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -17,8 +17,7 @@
17#include <net/wireless.h> 17#include <net/wireless.h>
18#include <net/mac80211.h> 18#include <net/mac80211.h>
19#include "ieee80211_i.h" 19#include "ieee80211_i.h"
20#include "sta_info.h" 20#include "rate.h"
21#include "wme.h"
22 21
23void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, 22void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
24 struct ieee80211_ht_cap *ht_cap_ie, 23 struct ieee80211_ht_cap *ht_cap_ie,
@@ -95,7 +94,9 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
95{ 94{
96 struct ieee80211_local *local = sdata->local; 95 struct ieee80211_local *local = sdata->local;
97 struct ieee80211_supported_band *sband; 96 struct ieee80211_supported_band *sband;
97 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
98 struct ieee80211_bss_ht_conf ht; 98 struct ieee80211_bss_ht_conf ht;
99 struct sta_info *sta;
99 u32 changed = 0; 100 u32 changed = 0;
100 bool enable_ht = true, ht_changed; 101 bool enable_ht = true, ht_changed;
101 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; 102 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
@@ -130,14 +131,25 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
130 } 131 }
131 } 132 }
132 133
133 ht_changed = local->hw.conf.ht.enabled != enable_ht || 134 ht_changed = conf_is_ht(&local->hw.conf) != enable_ht ||
134 channel_type != local->hw.conf.ht.channel_type; 135 channel_type != local->hw.conf.channel_type;
135 136
136 local->oper_channel_type = channel_type; 137 local->oper_channel_type = channel_type;
137 local->hw.conf.ht.enabled = enable_ht;
138 138
139 if (ht_changed) 139 if (ht_changed) {
140 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_HT); 140 /* channel_type change automatically detected */
141 ieee80211_hw_config(local, 0);
142
143 rcu_read_lock();
144
145 sta = sta_info_get(local, ifmgd->bssid);
146 if (sta)
147 rate_control_rate_update(local, sband, sta,
148 IEEE80211_RC_HT_CHANGED);
149
150 rcu_read_unlock();
151
152 }
141 153
142 /* disable HT */ 154 /* disable HT */
143 if (!enable_ht) 155 if (!enable_ht)
@@ -154,108 +166,22 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
154 return changed; 166 return changed;
155} 167}
156 168
157static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, 169void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta)
158 const u8 *da, u16 tid,
159 u8 dialog_token, u16 start_seq_num,
160 u16 agg_size, u16 timeout)
161{ 170{
162 struct ieee80211_local *local = sdata->local; 171 int i;
163 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
164 struct sk_buff *skb;
165 struct ieee80211_mgmt *mgmt;
166 u16 capab;
167
168 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
169
170 if (!skb) {
171 printk(KERN_ERR "%s: failed to allocate buffer "
172 "for addba request frame\n", sdata->dev->name);
173 return;
174 }
175 skb_reserve(skb, local->hw.extra_tx_headroom);
176 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
177 memset(mgmt, 0, 24);
178 memcpy(mgmt->da, da, ETH_ALEN);
179 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
180 if (sdata->vif.type == NL80211_IFTYPE_AP)
181 memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
182 else
183 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
184
185 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
186 IEEE80211_STYPE_ACTION);
187
188 skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req));
189
190 mgmt->u.action.category = WLAN_CATEGORY_BACK;
191 mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ;
192
193 mgmt->u.action.u.addba_req.dialog_token = dialog_token;
194 capab = (u16)(1 << 1); /* bit 1 aggregation policy */
195 capab |= (u16)(tid << 2); /* bit 5:2 TID number */
196 capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */
197
198 mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab);
199
200 mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout);
201 mgmt->u.action.u.addba_req.start_seq_num =
202 cpu_to_le16(start_seq_num << 4);
203
204 ieee80211_tx_skb(sdata, skb, 0);
205}
206
207static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
208 u8 dialog_token, u16 status, u16 policy,
209 u16 buf_size, u16 timeout)
210{
211 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
212 struct ieee80211_local *local = sdata->local;
213 struct sk_buff *skb;
214 struct ieee80211_mgmt *mgmt;
215 u16 capab;
216
217 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
218 172
219 if (!skb) { 173 for (i = 0; i < STA_TID_NUM; i++) {
220 printk(KERN_DEBUG "%s: failed to allocate buffer " 174 __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR);
221 "for addba resp frame\n", sdata->dev->name); 175 __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
222 return; 176 WLAN_REASON_QSTA_LEAVE_QBSS);
223 } 177 }
224
225 skb_reserve(skb, local->hw.extra_tx_headroom);
226 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
227 memset(mgmt, 0, 24);
228 memcpy(mgmt->da, da, ETH_ALEN);
229 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
230 if (sdata->vif.type == NL80211_IFTYPE_AP)
231 memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
232 else
233 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
234 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
235 IEEE80211_STYPE_ACTION);
236
237 skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
238 mgmt->u.action.category = WLAN_CATEGORY_BACK;
239 mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
240 mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
241
242 capab = (u16)(policy << 1); /* bit 1 aggregation policy */
243 capab |= (u16)(tid << 2); /* bit 5:2 TID number */
244 capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */
245
246 mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab);
247 mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
248 mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
249
250 ieee80211_tx_skb(sdata, skb, 0);
251} 178}
252 179
253static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, 180void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
254 const u8 *da, u16 tid, 181 const u8 *da, u16 tid,
255 u16 initiator, u16 reason_code) 182 u16 initiator, u16 reason_code)
256{ 183{
257 struct ieee80211_local *local = sdata->local; 184 struct ieee80211_local *local = sdata->local;
258 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
259 struct sk_buff *skb; 185 struct sk_buff *skb;
260 struct ieee80211_mgmt *mgmt; 186 struct ieee80211_mgmt *mgmt;
261 u16 params; 187 u16 params;
@@ -273,10 +199,12 @@ static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
273 memset(mgmt, 0, 24); 199 memset(mgmt, 0, 24);
274 memcpy(mgmt->da, da, ETH_ALEN); 200 memcpy(mgmt->da, da, ETH_ALEN);
275 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); 201 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
276 if (sdata->vif.type == NL80211_IFTYPE_AP) 202 if (sdata->vif.type == NL80211_IFTYPE_AP ||
203 sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
277 memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); 204 memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
278 else 205 else if (sdata->vif.type == NL80211_IFTYPE_STATION)
279 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); 206 memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
207
280 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | 208 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
281 IEEE80211_STYPE_ACTION); 209 IEEE80211_STYPE_ACTION);
282 210
@@ -290,770 +218,7 @@ static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
290 mgmt->u.action.u.delba.params = cpu_to_le16(params); 218 mgmt->u.action.u.delba.params = cpu_to_le16(params);
291 mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code); 219 mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code);
292 220
293 ieee80211_tx_skb(sdata, skb, 0); 221 ieee80211_tx_skb(sdata, skb, 1);
294}
295
296void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn)
297{
298 struct ieee80211_local *local = sdata->local;
299 struct sk_buff *skb;
300 struct ieee80211_bar *bar;
301 u16 bar_control = 0;
302
303 skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom);
304 if (!skb) {
305 printk(KERN_ERR "%s: failed to allocate buffer for "
306 "bar frame\n", sdata->dev->name);
307 return;
308 }
309 skb_reserve(skb, local->hw.extra_tx_headroom);
310 bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar));
311 memset(bar, 0, sizeof(*bar));
312 bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
313 IEEE80211_STYPE_BACK_REQ);
314 memcpy(bar->ra, ra, ETH_ALEN);
315 memcpy(bar->ta, sdata->dev->dev_addr, ETH_ALEN);
316 bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL;
317 bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA;
318 bar_control |= (u16)(tid << 12);
319 bar->control = cpu_to_le16(bar_control);
320 bar->start_seq_num = cpu_to_le16(ssn);
321
322 ieee80211_tx_skb(sdata, skb, 0);
323}
324
325void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid,
326 u16 initiator, u16 reason)
327{
328 struct ieee80211_local *local = sdata->local;
329 struct ieee80211_hw *hw = &local->hw;
330 struct sta_info *sta;
331 int ret, i;
332
333 rcu_read_lock();
334
335 sta = sta_info_get(local, ra);
336 if (!sta) {
337 rcu_read_unlock();
338 return;
339 }
340
341 /* check if TID is in operational state */
342 spin_lock_bh(&sta->lock);
343 if (sta->ampdu_mlme.tid_state_rx[tid]
344 != HT_AGG_STATE_OPERATIONAL) {
345 spin_unlock_bh(&sta->lock);
346 rcu_read_unlock();
347 return;
348 }
349 sta->ampdu_mlme.tid_state_rx[tid] =
350 HT_AGG_STATE_REQ_STOP_BA_MSK |
351 (initiator << HT_AGG_STATE_INITIATOR_SHIFT);
352 spin_unlock_bh(&sta->lock);
353
354 /* stop HW Rx aggregation. ampdu_action existence
355 * already verified in session init so we add the BUG_ON */
356 BUG_ON(!local->ops->ampdu_action);
357
358#ifdef CONFIG_MAC80211_HT_DEBUG
359 printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n",
360 ra, tid);
361#endif /* CONFIG_MAC80211_HT_DEBUG */
362
363 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP,
364 &sta->sta, tid, NULL);
365 if (ret)
366 printk(KERN_DEBUG "HW problem - can not stop rx "
367 "aggregation for tid %d\n", tid);
368
369 /* shutdown timer has not expired */
370 if (initiator != WLAN_BACK_TIMER)
371 del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
372
373 /* check if this is a self generated aggregation halt */
374 if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER)
375 ieee80211_send_delba(sdata, ra, tid, 0, reason);
376
377 /* free the reordering buffer */
378 for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) {
379 if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) {
380 /* release the reordered frames */
381 dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]);
382 sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--;
383 sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL;
384 }
385 }
386 /* free resources */
387 kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf);
388 kfree(sta->ampdu_mlme.tid_rx[tid]);
389 sta->ampdu_mlme.tid_rx[tid] = NULL;
390 sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE;
391
392 rcu_read_unlock();
393}
394
395
396/*
397 * After sending add Block Ack request we activated a timer until
398 * add Block Ack response will arrive from the recipient.
399 * If this timer expires sta_addba_resp_timer_expired will be executed.
400 */
401static void sta_addba_resp_timer_expired(unsigned long data)
402{
403 /* not an elegant detour, but there is no choice as the timer passes
404 * only one argument, and both sta_info and TID are needed, so init
405 * flow in sta_info_create gives the TID as data, while the timer_to_id
406 * array gives the sta through container_of */
407 u16 tid = *(u8 *)data;
408 struct sta_info *temp_sta = container_of((void *)data,
409 struct sta_info, timer_to_tid[tid]);
410
411 struct ieee80211_local *local = temp_sta->local;
412 struct ieee80211_hw *hw = &local->hw;
413 struct sta_info *sta;
414 u8 *state;
415
416 rcu_read_lock();
417
418 sta = sta_info_get(local, temp_sta->sta.addr);
419 if (!sta) {
420 rcu_read_unlock();
421 return;
422 }
423
424 state = &sta->ampdu_mlme.tid_state_tx[tid];
425 /* check if the TID waits for addBA response */
426 spin_lock_bh(&sta->lock);
427 if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
428 spin_unlock_bh(&sta->lock);
429 *state = HT_AGG_STATE_IDLE;
430#ifdef CONFIG_MAC80211_HT_DEBUG
431 printk(KERN_DEBUG "timer expired on tid %d but we are not "
432 "expecting addBA response there", tid);
433#endif
434 goto timer_expired_exit;
435 }
436
437#ifdef CONFIG_MAC80211_HT_DEBUG
438 printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid);
439#endif
440
441 /* go through the state check in stop_BA_session */
442 *state = HT_AGG_STATE_OPERATIONAL;
443 spin_unlock_bh(&sta->lock);
444 ieee80211_stop_tx_ba_session(hw, temp_sta->sta.addr, tid,
445 WLAN_BACK_INITIATOR);
446
447timer_expired_exit:
448 rcu_read_unlock();
449}
450
451void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr)
452{
453 struct ieee80211_local *local = sdata->local;
454 int i;
455
456 for (i = 0; i < STA_TID_NUM; i++) {
457 ieee80211_stop_tx_ba_session(&local->hw, addr, i,
458 WLAN_BACK_INITIATOR);
459 ieee80211_sta_stop_rx_ba_session(sdata, addr, i,
460 WLAN_BACK_RECIPIENT,
461 WLAN_REASON_QSTA_LEAVE_QBSS);
462 }
463}
464
465int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
466{
467 struct ieee80211_local *local = hw_to_local(hw);
468 struct sta_info *sta;
469 struct ieee80211_sub_if_data *sdata;
470 u16 start_seq_num;
471 u8 *state;
472 int ret = 0;
473
474 if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION))
475 return -EINVAL;
476
477#ifdef CONFIG_MAC80211_HT_DEBUG
478 printk(KERN_DEBUG "Open BA session requested for %pM tid %u\n",
479 ra, tid);
480#endif /* CONFIG_MAC80211_HT_DEBUG */
481
482 rcu_read_lock();
483
484 sta = sta_info_get(local, ra);
485 if (!sta) {
486#ifdef CONFIG_MAC80211_HT_DEBUG
487 printk(KERN_DEBUG "Could not find the station\n");
488#endif
489 ret = -ENOENT;
490 goto exit;
491 }
492
493 spin_lock_bh(&sta->lock);
494
495 /* we have tried too many times, receiver does not want A-MPDU */
496 if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) {
497 ret = -EBUSY;
498 goto err_unlock_sta;
499 }
500
501 state = &sta->ampdu_mlme.tid_state_tx[tid];
502 /* check if the TID is not in aggregation flow already */
503 if (*state != HT_AGG_STATE_IDLE) {
504#ifdef CONFIG_MAC80211_HT_DEBUG
505 printk(KERN_DEBUG "BA request denied - session is not "
506 "idle on tid %u\n", tid);
507#endif /* CONFIG_MAC80211_HT_DEBUG */
508 ret = -EAGAIN;
509 goto err_unlock_sta;
510 }
511
512 /* prepare A-MPDU MLME for Tx aggregation */
513 sta->ampdu_mlme.tid_tx[tid] =
514 kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
515 if (!sta->ampdu_mlme.tid_tx[tid]) {
516#ifdef CONFIG_MAC80211_HT_DEBUG
517 if (net_ratelimit())
518 printk(KERN_ERR "allocate tx mlme to tid %d failed\n",
519 tid);
520#endif
521 ret = -ENOMEM;
522 goto err_unlock_sta;
523 }
524 /* Tx timer */
525 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function =
526 sta_addba_resp_timer_expired;
527 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data =
528 (unsigned long)&sta->timer_to_tid[tid];
529 init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
530
531 if (hw->ampdu_queues) {
532 /* create a new queue for this aggregation */
533 ret = ieee80211_ht_agg_queue_add(local, sta, tid);
534
535 /* case no queue is available to aggregation
536 * don't switch to aggregation */
537 if (ret) {
538#ifdef CONFIG_MAC80211_HT_DEBUG
539 printk(KERN_DEBUG "BA request denied - "
540 "queue unavailable for tid %d\n", tid);
541#endif /* CONFIG_MAC80211_HT_DEBUG */
542 goto err_unlock_queue;
543 }
544 }
545 sdata = sta->sdata;
546
547 /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the
548 * call back right away, it must see that the flow has begun */
549 *state |= HT_ADDBA_REQUESTED_MSK;
550
551 /* This is slightly racy because the queue isn't stopped */
552 start_seq_num = sta->tid_seq[tid];
553
554 if (local->ops->ampdu_action)
555 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START,
556 &sta->sta, tid, &start_seq_num);
557
558 if (ret) {
559 /* No need to requeue the packets in the agg queue, since we
560 * held the tx lock: no packet could be enqueued to the newly
561 * allocated queue */
562 if (hw->ampdu_queues)
563 ieee80211_ht_agg_queue_remove(local, sta, tid, 0);
564#ifdef CONFIG_MAC80211_HT_DEBUG
565 printk(KERN_DEBUG "BA request denied - HW unavailable for"
566 " tid %d\n", tid);
567#endif /* CONFIG_MAC80211_HT_DEBUG */
568 *state = HT_AGG_STATE_IDLE;
569 goto err_unlock_queue;
570 }
571
572 /* Will put all the packets in the new SW queue */
573 if (hw->ampdu_queues)
574 ieee80211_requeue(local, ieee802_1d_to_ac[tid]);
575 spin_unlock_bh(&sta->lock);
576
577 /* send an addBA request */
578 sta->ampdu_mlme.dialog_token_allocator++;
579 sta->ampdu_mlme.tid_tx[tid]->dialog_token =
580 sta->ampdu_mlme.dialog_token_allocator;
581 sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num;
582
583
584 ieee80211_send_addba_request(sta->sdata, ra, tid,
585 sta->ampdu_mlme.tid_tx[tid]->dialog_token,
586 sta->ampdu_mlme.tid_tx[tid]->ssn,
587 0x40, 5000);
588 /* activate the timer for the recipient's addBA response */
589 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires =
590 jiffies + ADDBA_RESP_INTERVAL;
591 add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
592#ifdef CONFIG_MAC80211_HT_DEBUG
593 printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
594#endif
595 goto exit;
596
597err_unlock_queue:
598 kfree(sta->ampdu_mlme.tid_tx[tid]);
599 sta->ampdu_mlme.tid_tx[tid] = NULL;
600 ret = -EBUSY;
601err_unlock_sta:
602 spin_unlock_bh(&sta->lock);
603exit:
604 rcu_read_unlock();
605 return ret;
606}
607EXPORT_SYMBOL(ieee80211_start_tx_ba_session);
608
609int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw,
610 u8 *ra, u16 tid,
611 enum ieee80211_back_parties initiator)
612{
613 struct ieee80211_local *local = hw_to_local(hw);
614 struct sta_info *sta;
615 u8 *state;
616 int ret = 0;
617
618 if (tid >= STA_TID_NUM)
619 return -EINVAL;
620
621 rcu_read_lock();
622 sta = sta_info_get(local, ra);
623 if (!sta) {
624 rcu_read_unlock();
625 return -ENOENT;
626 }
627
628 /* check if the TID is in aggregation */
629 state = &sta->ampdu_mlme.tid_state_tx[tid];
630 spin_lock_bh(&sta->lock);
631
632 if (*state != HT_AGG_STATE_OPERATIONAL) {
633 ret = -ENOENT;
634 goto stop_BA_exit;
635 }
636
637#ifdef CONFIG_MAC80211_HT_DEBUG
638 printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n",
639 ra, tid);
640#endif /* CONFIG_MAC80211_HT_DEBUG */
641
642 if (hw->ampdu_queues)
643 ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]);
644
645 *state = HT_AGG_STATE_REQ_STOP_BA_MSK |
646 (initiator << HT_AGG_STATE_INITIATOR_SHIFT);
647
648 if (local->ops->ampdu_action)
649 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP,
650 &sta->sta, tid, NULL);
651
652 /* case HW denied going back to legacy */
653 if (ret) {
654 WARN_ON(ret != -EBUSY);
655 *state = HT_AGG_STATE_OPERATIONAL;
656 if (hw->ampdu_queues)
657 ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
658 goto stop_BA_exit;
659 }
660
661stop_BA_exit:
662 spin_unlock_bh(&sta->lock);
663 rcu_read_unlock();
664 return ret;
665}
666EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
667
668void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid)
669{
670 struct ieee80211_local *local = hw_to_local(hw);
671 struct sta_info *sta;
672 u8 *state;
673
674 if (tid >= STA_TID_NUM) {
675#ifdef CONFIG_MAC80211_HT_DEBUG
676 printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
677 tid, STA_TID_NUM);
678#endif
679 return;
680 }
681
682 rcu_read_lock();
683 sta = sta_info_get(local, ra);
684 if (!sta) {
685 rcu_read_unlock();
686#ifdef CONFIG_MAC80211_HT_DEBUG
687 printk(KERN_DEBUG "Could not find station: %pM\n", ra);
688#endif
689 return;
690 }
691
692 state = &sta->ampdu_mlme.tid_state_tx[tid];
693 spin_lock_bh(&sta->lock);
694
695 if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
696#ifdef CONFIG_MAC80211_HT_DEBUG
697 printk(KERN_DEBUG "addBA was not requested yet, state is %d\n",
698 *state);
699#endif
700 spin_unlock_bh(&sta->lock);
701 rcu_read_unlock();
702 return;
703 }
704
705 WARN_ON_ONCE(*state & HT_ADDBA_DRV_READY_MSK);
706
707 *state |= HT_ADDBA_DRV_READY_MSK;
708
709 if (*state == HT_AGG_STATE_OPERATIONAL) {
710#ifdef CONFIG_MAC80211_HT_DEBUG
711 printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid);
712#endif
713 if (hw->ampdu_queues)
714 ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
715 }
716 spin_unlock_bh(&sta->lock);
717 rcu_read_unlock();
718}
719EXPORT_SYMBOL(ieee80211_start_tx_ba_cb);
720
721void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
722{
723 struct ieee80211_local *local = hw_to_local(hw);
724 struct sta_info *sta;
725 u8 *state;
726 int agg_queue;
727
728 if (tid >= STA_TID_NUM) {
729#ifdef CONFIG_MAC80211_HT_DEBUG
730 printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
731 tid, STA_TID_NUM);
732#endif
733 return;
734 }
735
736#ifdef CONFIG_MAC80211_HT_DEBUG
737 printk(KERN_DEBUG "Stopping Tx BA session for %pM tid %d\n",
738 ra, tid);
739#endif /* CONFIG_MAC80211_HT_DEBUG */
740
741 rcu_read_lock();
742 sta = sta_info_get(local, ra);
743 if (!sta) {
744#ifdef CONFIG_MAC80211_HT_DEBUG
745 printk(KERN_DEBUG "Could not find station: %pM\n", ra);
746#endif
747 rcu_read_unlock();
748 return;
749 }
750 state = &sta->ampdu_mlme.tid_state_tx[tid];
751
752 /* NOTE: no need to use sta->lock in this state check, as
753 * ieee80211_stop_tx_ba_session will let only one stop call to
754 * pass through per sta/tid
755 */
756 if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) {
757#ifdef CONFIG_MAC80211_HT_DEBUG
758 printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
759#endif
760 rcu_read_unlock();
761 return;
762 }
763
764 if (*state & HT_AGG_STATE_INITIATOR_MSK)
765 ieee80211_send_delba(sta->sdata, ra, tid,
766 WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
767
768 if (hw->ampdu_queues) {
769 agg_queue = sta->tid_to_tx_q[tid];
770 ieee80211_ht_agg_queue_remove(local, sta, tid, 1);
771
772 /* We just requeued the all the frames that were in the
773 * removed queue, and since we might miss a softirq we do
774 * netif_schedule_queue. ieee80211_wake_queue is not used
775 * here as this queue is not necessarily stopped
776 */
777 netif_schedule_queue(netdev_get_tx_queue(local->mdev,
778 agg_queue));
779 }
780 spin_lock_bh(&sta->lock);
781 *state = HT_AGG_STATE_IDLE;
782 sta->ampdu_mlme.addba_req_num[tid] = 0;
783 kfree(sta->ampdu_mlme.tid_tx[tid]);
784 sta->ampdu_mlme.tid_tx[tid] = NULL;
785 spin_unlock_bh(&sta->lock);
786
787 rcu_read_unlock();
788}
789EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb);
790
791void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
792 const u8 *ra, u16 tid)
793{
794 struct ieee80211_local *local = hw_to_local(hw);
795 struct ieee80211_ra_tid *ra_tid;
796 struct sk_buff *skb = dev_alloc_skb(0);
797
798 if (unlikely(!skb)) {
799#ifdef CONFIG_MAC80211_HT_DEBUG
800 if (net_ratelimit())
801 printk(KERN_WARNING "%s: Not enough memory, "
802 "dropping start BA session", skb->dev->name);
803#endif
804 return;
805 }
806 ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
807 memcpy(&ra_tid->ra, ra, ETH_ALEN);
808 ra_tid->tid = tid;
809
810 skb->pkt_type = IEEE80211_ADDBA_MSG;
811 skb_queue_tail(&local->skb_queue, skb);
812 tasklet_schedule(&local->tasklet);
813}
814EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
815
816void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
817 const u8 *ra, u16 tid)
818{
819 struct ieee80211_local *local = hw_to_local(hw);
820 struct ieee80211_ra_tid *ra_tid;
821 struct sk_buff *skb = dev_alloc_skb(0);
822
823 if (unlikely(!skb)) {
824#ifdef CONFIG_MAC80211_HT_DEBUG
825 if (net_ratelimit())
826 printk(KERN_WARNING "%s: Not enough memory, "
827 "dropping stop BA session", skb->dev->name);
828#endif
829 return;
830 }
831 ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
832 memcpy(&ra_tid->ra, ra, ETH_ALEN);
833 ra_tid->tid = tid;
834
835 skb->pkt_type = IEEE80211_DELBA_MSG;
836 skb_queue_tail(&local->skb_queue, skb);
837 tasklet_schedule(&local->tasklet);
838}
839EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe);
840
841/*
842 * After accepting the AddBA Request we activated a timer,
843 * resetting it after each frame that arrives from the originator.
844 * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
845 */
846static void sta_rx_agg_session_timer_expired(unsigned long data)
847{
848 /* not an elegant detour, but there is no choice as the timer passes
849 * only one argument, and various sta_info are needed here, so init
850 * flow in sta_info_create gives the TID as data, while the timer_to_id
851 * array gives the sta through container_of */
852 u8 *ptid = (u8 *)data;
853 u8 *timer_to_id = ptid - *ptid;
854 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
855 timer_to_tid[0]);
856
857#ifdef CONFIG_MAC80211_HT_DEBUG
858 printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
859#endif
860 ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr,
861 (u16)*ptid, WLAN_BACK_TIMER,
862 WLAN_REASON_QSTA_TIMEOUT);
863}
864
865void ieee80211_process_addba_request(struct ieee80211_local *local,
866 struct sta_info *sta,
867 struct ieee80211_mgmt *mgmt,
868 size_t len)
869{
870 struct ieee80211_hw *hw = &local->hw;
871 struct ieee80211_conf *conf = &hw->conf;
872 struct tid_ampdu_rx *tid_agg_rx;
873 u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status;
874 u8 dialog_token;
875 int ret = -EOPNOTSUPP;
876
877 /* extract session parameters from addba request frame */
878 dialog_token = mgmt->u.action.u.addba_req.dialog_token;
879 timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
880 start_seq_num =
881 le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4;
882
883 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
884 ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1;
885 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
886 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
887
888 status = WLAN_STATUS_REQUEST_DECLINED;
889
890 /* sanity check for incoming parameters:
891 * check if configuration can support the BA policy
892 * and if buffer size does not exceeds max value */
893 /* XXX: check own ht delayed BA capability?? */
894 if (((ba_policy != 1)
895 && (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA)))
896 || (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
897 status = WLAN_STATUS_INVALID_QOS_PARAM;
898#ifdef CONFIG_MAC80211_HT_DEBUG
899 if (net_ratelimit())
900 printk(KERN_DEBUG "AddBA Req with bad params from "
901 "%pM on tid %u. policy %d, buffer size %d\n",
902 mgmt->sa, tid, ba_policy,
903 buf_size);
904#endif /* CONFIG_MAC80211_HT_DEBUG */
905 goto end_no_lock;
906 }
907 /* determine default buffer size */
908 if (buf_size == 0) {
909 struct ieee80211_supported_band *sband;
910
911 sband = local->hw.wiphy->bands[conf->channel->band];
912 buf_size = IEEE80211_MIN_AMPDU_BUF;
913 buf_size = buf_size << sband->ht_cap.ampdu_factor;
914 }
915
916
917 /* examine state machine */
918 spin_lock_bh(&sta->lock);
919
920 if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) {
921#ifdef CONFIG_MAC80211_HT_DEBUG
922 if (net_ratelimit())
923 printk(KERN_DEBUG "unexpected AddBA Req from "
924 "%pM on tid %u\n",
925 mgmt->sa, tid);
926#endif /* CONFIG_MAC80211_HT_DEBUG */
927 goto end;
928 }
929
930 /* prepare A-MPDU MLME for Rx aggregation */
931 sta->ampdu_mlme.tid_rx[tid] =
932 kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC);
933 if (!sta->ampdu_mlme.tid_rx[tid]) {
934#ifdef CONFIG_MAC80211_HT_DEBUG
935 if (net_ratelimit())
936 printk(KERN_ERR "allocate rx mlme to tid %d failed\n",
937 tid);
938#endif
939 goto end;
940 }
941 /* rx timer */
942 sta->ampdu_mlme.tid_rx[tid]->session_timer.function =
943 sta_rx_agg_session_timer_expired;
944 sta->ampdu_mlme.tid_rx[tid]->session_timer.data =
945 (unsigned long)&sta->timer_to_tid[tid];
946 init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
947
948 tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
949
950 /* prepare reordering buffer */
951 tid_agg_rx->reorder_buf =
952 kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC);
953 if (!tid_agg_rx->reorder_buf) {
954#ifdef CONFIG_MAC80211_HT_DEBUG
955 if (net_ratelimit())
956 printk(KERN_ERR "can not allocate reordering buffer "
957 "to tid %d\n", tid);
958#endif
959 kfree(sta->ampdu_mlme.tid_rx[tid]);
960 goto end;
961 }
962 memset(tid_agg_rx->reorder_buf, 0,
963 buf_size * sizeof(struct sk_buff *));
964
965 if (local->ops->ampdu_action)
966 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START,
967 &sta->sta, tid, &start_seq_num);
968#ifdef CONFIG_MAC80211_HT_DEBUG
969 printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret);
970#endif /* CONFIG_MAC80211_HT_DEBUG */
971
972 if (ret) {
973 kfree(tid_agg_rx->reorder_buf);
974 kfree(tid_agg_rx);
975 sta->ampdu_mlme.tid_rx[tid] = NULL;
976 goto end;
977 }
978
979 /* change state and send addba resp */
980 sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL;
981 tid_agg_rx->dialog_token = dialog_token;
982 tid_agg_rx->ssn = start_seq_num;
983 tid_agg_rx->head_seq_num = start_seq_num;
984 tid_agg_rx->buf_size = buf_size;
985 tid_agg_rx->timeout = timeout;
986 tid_agg_rx->stored_mpdu_num = 0;
987 status = WLAN_STATUS_SUCCESS;
988end:
989 spin_unlock_bh(&sta->lock);
990
991end_no_lock:
992 ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
993 dialog_token, status, 1, buf_size, timeout);
994}
995
996void ieee80211_process_addba_resp(struct ieee80211_local *local,
997 struct sta_info *sta,
998 struct ieee80211_mgmt *mgmt,
999 size_t len)
1000{
1001 struct ieee80211_hw *hw = &local->hw;
1002 u16 capab;
1003 u16 tid, start_seq_num;
1004 u8 *state;
1005
1006 capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
1007 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
1008
1009 state = &sta->ampdu_mlme.tid_state_tx[tid];
1010
1011 spin_lock_bh(&sta->lock);
1012
1013 if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
1014 spin_unlock_bh(&sta->lock);
1015 return;
1016 }
1017
1018 if (mgmt->u.action.u.addba_resp.dialog_token !=
1019 sta->ampdu_mlme.tid_tx[tid]->dialog_token) {
1020 spin_unlock_bh(&sta->lock);
1021#ifdef CONFIG_MAC80211_HT_DEBUG
1022 printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid);
1023#endif /* CONFIG_MAC80211_HT_DEBUG */
1024 return;
1025 }
1026
1027 del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
1028#ifdef CONFIG_MAC80211_HT_DEBUG
1029 printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid);
1030#endif /* CONFIG_MAC80211_HT_DEBUG */
1031 if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
1032 == WLAN_STATUS_SUCCESS) {
1033 *state |= HT_ADDBA_RECEIVED_MSK;
1034 sta->ampdu_mlme.addba_req_num[tid] = 0;
1035
1036 if (*state == HT_AGG_STATE_OPERATIONAL &&
1037 local->hw.ampdu_queues)
1038 ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
1039
1040 if (local->ops->ampdu_action) {
1041 (void)local->ops->ampdu_action(hw,
1042 IEEE80211_AMPDU_TX_RESUME,
1043 &sta->sta, tid, &start_seq_num);
1044 }
1045#ifdef CONFIG_MAC80211_HT_DEBUG
1046 printk(KERN_DEBUG "Resuming TX aggregation for tid %d\n", tid);
1047#endif /* CONFIG_MAC80211_HT_DEBUG */
1048 spin_unlock_bh(&sta->lock);
1049 } else {
1050 sta->ampdu_mlme.addba_req_num[tid]++;
1051 /* this will allow the state check in stop_BA_session */
1052 *state = HT_AGG_STATE_OPERATIONAL;
1053 spin_unlock_bh(&sta->lock);
1054 ieee80211_stop_tx_ba_session(hw, sta->sta.addr, tid,
1055 WLAN_BACK_INITIATOR);
1056 }
1057} 222}
1058 223
1059void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, 224void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
new file mode 100644
index 000000000000..a96ce9dfc6b5
--- /dev/null
+++ b/net/mac80211/ibss.c
@@ -0,0 +1,905 @@
1/*
2 * IBSS mode implementation
3 * Copyright 2003-2008, Jouni Malinen <j@w1.fi>
4 * Copyright 2004, Instant802 Networks, Inc.
5 * Copyright 2005, Devicescape Software, Inc.
6 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
7 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
8 * Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14
15#include <linux/delay.h>
16#include <linux/if_ether.h>
17#include <linux/skbuff.h>
18#include <linux/if_arp.h>
19#include <linux/etherdevice.h>
20#include <linux/rtnetlink.h>
21#include <net/mac80211.h>
22#include <asm/unaligned.h>
23
24#include "ieee80211_i.h"
25#include "rate.h"
26
27#define IEEE80211_SCAN_INTERVAL (2 * HZ)
28#define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ)
29#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ)
30
31#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ)
32#define IEEE80211_IBSS_MERGE_DELAY 0x400000
33#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ)
34
35#define IEEE80211_IBSS_MAX_STA_ENTRIES 128
36
37
38static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
39 struct ieee80211_mgmt *mgmt,
40 size_t len)
41{
42 u16 auth_alg, auth_transaction, status_code;
43
44 if (len < 24 + 6)
45 return;
46
47 auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
48 auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
49 status_code = le16_to_cpu(mgmt->u.auth.status_code);
50
51 /*
52 * IEEE 802.11 standard does not require authentication in IBSS
53 * networks and most implementations do not seem to use it.
54 * However, try to reply to authentication attempts if someone
55 * has actually implemented this.
56 */
57 if (auth_alg == WLAN_AUTH_OPEN && auth_transaction == 1)
58 ieee80211_send_auth(sdata, 2, WLAN_AUTH_OPEN, NULL, 0,
59 sdata->u.ibss.bssid, 0);
60}
61
62static int __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
63 const u8 *bssid, const int beacon_int,
64 const int freq,
65 const size_t supp_rates_len,
66 const u8 *supp_rates,
67 const u16 capability)
68{
69 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
70 struct ieee80211_local *local = sdata->local;
71 int res = 0, rates, i, j;
72 struct sk_buff *skb;
73 struct ieee80211_mgmt *mgmt;
74 u8 *pos;
75 struct ieee80211_supported_band *sband;
76 union iwreq_data wrqu;
77
78 if (local->ops->reset_tsf) {
79 /* Reset own TSF to allow time synchronization work. */
80 local->ops->reset_tsf(local_to_hw(local));
81 }
82
83 if ((ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET) &&
84 memcmp(ifibss->bssid, bssid, ETH_ALEN) == 0)
85 return res;
86
87 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
88 if (!skb) {
89 printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
90 "response\n", sdata->dev->name);
91 return -ENOMEM;
92 }
93
94 if (!(ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET)) {
95 /* Remove possible STA entries from other IBSS networks. */
96 sta_info_flush_delayed(sdata);
97 }
98
99 memcpy(ifibss->bssid, bssid, ETH_ALEN);
100 res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID);
101 if (res)
102 return res;
103
104 local->hw.conf.beacon_int = beacon_int >= 10 ? beacon_int : 10;
105
106 sdata->drop_unencrypted = capability &
107 WLAN_CAPABILITY_PRIVACY ? 1 : 0;
108
109 res = ieee80211_set_freq(sdata, freq);
110
111 if (res)
112 return res;
113
114 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
115
116 /* Build IBSS probe response */
117
118 skb_reserve(skb, local->hw.extra_tx_headroom);
119
120 mgmt = (struct ieee80211_mgmt *)
121 skb_put(skb, 24 + sizeof(mgmt->u.beacon));
122 memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon));
123 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
124 IEEE80211_STYPE_PROBE_RESP);
125 memset(mgmt->da, 0xff, ETH_ALEN);
126 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
127 memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN);
128 mgmt->u.beacon.beacon_int =
129 cpu_to_le16(local->hw.conf.beacon_int);
130 mgmt->u.beacon.capab_info = cpu_to_le16(capability);
131
132 pos = skb_put(skb, 2 + ifibss->ssid_len);
133 *pos++ = WLAN_EID_SSID;
134 *pos++ = ifibss->ssid_len;
135 memcpy(pos, ifibss->ssid, ifibss->ssid_len);
136
137 rates = supp_rates_len;
138 if (rates > 8)
139 rates = 8;
140 pos = skb_put(skb, 2 + rates);
141 *pos++ = WLAN_EID_SUPP_RATES;
142 *pos++ = rates;
143 memcpy(pos, supp_rates, rates);
144
145 if (sband->band == IEEE80211_BAND_2GHZ) {
146 pos = skb_put(skb, 2 + 1);
147 *pos++ = WLAN_EID_DS_PARAMS;
148 *pos++ = 1;
149 *pos++ = ieee80211_frequency_to_channel(freq);
150 }
151
152 pos = skb_put(skb, 2 + 2);
153 *pos++ = WLAN_EID_IBSS_PARAMS;
154 *pos++ = 2;
155 /* FIX: set ATIM window based on scan results */
156 *pos++ = 0;
157 *pos++ = 0;
158
159 if (supp_rates_len > 8) {
160 rates = supp_rates_len - 8;
161 pos = skb_put(skb, 2 + rates);
162 *pos++ = WLAN_EID_EXT_SUPP_RATES;
163 *pos++ = rates;
164 memcpy(pos, &supp_rates[8], rates);
165 }
166
167 ifibss->probe_resp = skb;
168
169 ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON |
170 IEEE80211_IFCC_BEACON_ENABLED);
171
172
173 rates = 0;
174 for (i = 0; i < supp_rates_len; i++) {
175 int bitrate = (supp_rates[i] & 0x7f) * 5;
176 for (j = 0; j < sband->n_bitrates; j++)
177 if (sband->bitrates[j].bitrate == bitrate)
178 rates |= BIT(j);
179 }
180
181 ieee80211_sta_def_wmm_params(sdata, supp_rates_len, supp_rates);
182
183 ifibss->flags |= IEEE80211_IBSS_PREV_BSSID_SET;
184 ifibss->state = IEEE80211_IBSS_MLME_JOINED;
185 mod_timer(&ifibss->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
186
187 memset(&wrqu, 0, sizeof(wrqu));
188 memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
189 wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL);
190
191 return res;
192}
193
194static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
195 struct ieee80211_bss *bss)
196{
197 return __ieee80211_sta_join_ibss(sdata,
198 bss->cbss.bssid,
199 bss->cbss.beacon_interval,
200 bss->cbss.channel->center_freq,
201 bss->supp_rates_len, bss->supp_rates,
202 bss->cbss.capability);
203}
204
205static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
206 struct ieee80211_mgmt *mgmt,
207 size_t len,
208 struct ieee80211_rx_status *rx_status,
209 struct ieee802_11_elems *elems,
210 bool beacon)
211{
212 struct ieee80211_local *local = sdata->local;
213 int freq;
214 struct ieee80211_bss *bss;
215 struct sta_info *sta;
216 struct ieee80211_channel *channel;
217 u64 beacon_timestamp, rx_timestamp;
218 u32 supp_rates = 0;
219 enum ieee80211_band band = rx_status->band;
220
221 if (elems->ds_params && elems->ds_params_len == 1)
222 freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
223 else
224 freq = rx_status->freq;
225
226 channel = ieee80211_get_channel(local->hw.wiphy, freq);
227
228 if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
229 return;
230
231 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates &&
232 memcmp(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) {
233 supp_rates = ieee80211_sta_get_rates(local, elems, band);
234
235 rcu_read_lock();
236
237 sta = sta_info_get(local, mgmt->sa);
238 if (sta) {
239 u32 prev_rates;
240
241 prev_rates = sta->sta.supp_rates[band];
242 /* make sure mandatory rates are always added */
243 sta->sta.supp_rates[band] = supp_rates |
244 ieee80211_mandatory_rates(local, band);
245
246#ifdef CONFIG_MAC80211_IBSS_DEBUG
247 if (sta->sta.supp_rates[band] != prev_rates)
248 printk(KERN_DEBUG "%s: updated supp_rates set "
249 "for %pM based on beacon info (0x%llx | "
250 "0x%llx -> 0x%llx)\n",
251 sdata->dev->name,
252 sta->sta.addr,
253 (unsigned long long) prev_rates,
254 (unsigned long long) supp_rates,
255 (unsigned long long) sta->sta.supp_rates[band]);
256#endif
257 } else
258 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates);
259
260 rcu_read_unlock();
261 }
262
263 bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
264 channel, beacon);
265 if (!bss)
266 return;
267
268 /* was just updated in ieee80211_bss_info_update */
269 beacon_timestamp = bss->cbss.tsf;
270
271 /* check if we need to merge IBSS */
272
273 /* merge only on beacons (???) */
274 if (!beacon)
275 goto put_bss;
276
277 /* we use a fixed BSSID */
278 if (sdata->u.ibss.flags & IEEE80211_IBSS_BSSID_SET)
279 goto put_bss;
280
281 /* not an IBSS */
282 if (!(bss->cbss.capability & WLAN_CAPABILITY_IBSS))
283 goto put_bss;
284
285 /* different channel */
286 if (bss->cbss.channel != local->oper_channel)
287 goto put_bss;
288
289 /* different SSID */
290 if (elems->ssid_len != sdata->u.ibss.ssid_len ||
291 memcmp(elems->ssid, sdata->u.ibss.ssid,
292 sdata->u.ibss.ssid_len))
293 goto put_bss;
294
295 /* same BSSID */
296 if (memcmp(bss->cbss.bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0)
297 goto put_bss;
298
299 if (rx_status->flag & RX_FLAG_TSFT) {
300 /*
301 * For correct IBSS merging we need mactime; since mactime is
302 * defined as the time the first data symbol of the frame hits
303 * the PHY, and the timestamp of the beacon is defined as "the
304 * time that the data symbol containing the first bit of the
305 * timestamp is transmitted to the PHY plus the transmitting
306 * STA's delays through its local PHY from the MAC-PHY
307 * interface to its interface with the WM" (802.11 11.1.2)
308 * - equals the time this bit arrives at the receiver - we have
309 * to take into account the offset between the two.
310 *
311 * E.g. at 1 MBit that means mactime is 192 usec earlier
312 * (=24 bytes * 8 usecs/byte) than the beacon timestamp.
313 */
314 int rate;
315
316 if (rx_status->flag & RX_FLAG_HT)
317 rate = 65; /* TODO: HT rates */
318 else
319 rate = local->hw.wiphy->bands[band]->
320 bitrates[rx_status->rate_idx].bitrate;
321
322 rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate);
323 } else if (local && local->ops && local->ops->get_tsf)
324 /* second best option: get current TSF */
325 rx_timestamp = local->ops->get_tsf(local_to_hw(local));
326 else
327 /* can't merge without knowing the TSF */
328 rx_timestamp = -1LLU;
329
330#ifdef CONFIG_MAC80211_IBSS_DEBUG
331 printk(KERN_DEBUG "RX beacon SA=%pM BSSID="
332 "%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n",
333 mgmt->sa, mgmt->bssid,
334 (unsigned long long)rx_timestamp,
335 (unsigned long long)beacon_timestamp,
336 (unsigned long long)(rx_timestamp - beacon_timestamp),
337 jiffies);
338#endif
339
340 /* give slow hardware some time to do the TSF sync */
341 if (rx_timestamp < IEEE80211_IBSS_MERGE_DELAY)
342 goto put_bss;
343
344 if (beacon_timestamp > rx_timestamp) {
345#ifdef CONFIG_MAC80211_IBSS_DEBUG
346 printk(KERN_DEBUG "%s: beacon TSF higher than "
347 "local TSF - IBSS merge with BSSID %pM\n",
348 sdata->dev->name, mgmt->bssid);
349#endif
350 ieee80211_sta_join_ibss(sdata, bss);
351 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates);
352 }
353
354 put_bss:
355 ieee80211_rx_bss_put(local, bss);
356}
357
358/*
359 * Add a new IBSS station, will also be called by the RX code when,
360 * in IBSS mode, receiving a frame from a yet-unknown station, hence
361 * must be callable in atomic context.
362 */
363struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
364 u8 *bssid,u8 *addr, u32 supp_rates)
365{
366 struct ieee80211_local *local = sdata->local;
367 struct sta_info *sta;
368 int band = local->hw.conf.channel->band;
369
370 /* TODO: Could consider removing the least recently used entry and
371 * allow new one to be added. */
372 if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) {
373 if (net_ratelimit()) {
374 printk(KERN_DEBUG "%s: No room for a new IBSS STA "
375 "entry %pM\n", sdata->dev->name, addr);
376 }
377 return NULL;
378 }
379
380 if (compare_ether_addr(bssid, sdata->u.ibss.bssid))
381 return NULL;
382
383#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
384 printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n",
385 wiphy_name(local->hw.wiphy), addr, sdata->dev->name);
386#endif
387
388 sta = sta_info_alloc(sdata, addr, GFP_ATOMIC);
389 if (!sta)
390 return NULL;
391
392 set_sta_flags(sta, WLAN_STA_AUTHORIZED);
393
394 /* make sure mandatory rates are always added */
395 sta->sta.supp_rates[band] = supp_rates |
396 ieee80211_mandatory_rates(local, band);
397
398 rate_control_rate_init(sta);
399
400 if (sta_info_insert(sta))
401 return NULL;
402
403 return sta;
404}
405
406static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
407{
408 struct ieee80211_local *local = sdata->local;
409 int active = 0;
410 struct sta_info *sta;
411
412 rcu_read_lock();
413
414 list_for_each_entry_rcu(sta, &local->sta_list, list) {
415 if (sta->sdata == sdata &&
416 time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL,
417 jiffies)) {
418 active++;
419 break;
420 }
421 }
422
423 rcu_read_unlock();
424
425 return active;
426}
427
428
429static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
430{
431 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
432
433 mod_timer(&ifibss->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
434
435 ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT);
436 if (ieee80211_sta_active_ibss(sdata))
437 return;
438
439 if ((ifibss->flags & IEEE80211_IBSS_BSSID_SET) &&
440 (!(ifibss->flags & IEEE80211_IBSS_AUTO_CHANNEL_SEL)))
441 return;
442
443 printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other "
444 "IBSS networks with same SSID (merge)\n", sdata->dev->name);
445
446 /* XXX maybe racy? */
447 if (sdata->local->scan_req)
448 return;
449
450 memcpy(sdata->local->int_scan_req.ssids[0].ssid,
451 ifibss->ssid, IEEE80211_MAX_SSID_LEN);
452 sdata->local->int_scan_req.ssids[0].ssid_len = ifibss->ssid_len;
453 ieee80211_request_scan(sdata, &sdata->local->int_scan_req);
454}
455
456static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
457{
458 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
459 struct ieee80211_local *local = sdata->local;
460 struct ieee80211_supported_band *sband;
461 u8 *pos;
462 u8 bssid[ETH_ALEN];
463 u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
464 u16 capability;
465 int i;
466
467 if (ifibss->flags & IEEE80211_IBSS_BSSID_SET) {
468 memcpy(bssid, ifibss->bssid, ETH_ALEN);
469 } else {
470 /* Generate random, not broadcast, locally administered BSSID. Mix in
471 * own MAC address to make sure that devices that do not have proper
472 * random number generator get different BSSID. */
473 get_random_bytes(bssid, ETH_ALEN);
474 for (i = 0; i < ETH_ALEN; i++)
475 bssid[i] ^= sdata->dev->dev_addr[i];
476 bssid[0] &= ~0x01;
477 bssid[0] |= 0x02;
478 }
479
480 printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n",
481 sdata->dev->name, bssid);
482
483 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
484
485 if (local->hw.conf.beacon_int == 0)
486 local->hw.conf.beacon_int = 100;
487
488 capability = WLAN_CAPABILITY_IBSS;
489
490 if (sdata->default_key)
491 capability |= WLAN_CAPABILITY_PRIVACY;
492 else
493 sdata->drop_unencrypted = 0;
494
495 pos = supp_rates;
496 for (i = 0; i < sband->n_bitrates; i++) {
497 int rate = sband->bitrates[i].bitrate;
498 *pos++ = (u8) (rate / 5);
499 }
500
501 return __ieee80211_sta_join_ibss(sdata,
502 bssid, local->hw.conf.beacon_int,
503 local->hw.conf.channel->center_freq,
504 sband->n_bitrates, supp_rates,
505 capability);
506}
507
508static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
509{
510 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
511 struct ieee80211_local *local = sdata->local;
512 struct ieee80211_bss *bss;
513 const u8 *bssid = NULL;
514 int active_ibss;
515
516 if (ifibss->ssid_len == 0)
517 return -EINVAL;
518
519 active_ibss = ieee80211_sta_active_ibss(sdata);
520#ifdef CONFIG_MAC80211_IBSS_DEBUG
521 printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n",
522 sdata->dev->name, active_ibss);
523#endif /* CONFIG_MAC80211_IBSS_DEBUG */
524
525 if (active_ibss)
526 return 0;
527
528 if (ifibss->flags & IEEE80211_IBSS_BSSID_SET)
529 bssid = ifibss->bssid;
530 bss = (void *)cfg80211_get_bss(local->hw.wiphy, NULL, bssid,
531 ifibss->ssid, ifibss->ssid_len,
532 WLAN_CAPABILITY_IBSS,
533 WLAN_CAPABILITY_IBSS);
534
535#ifdef CONFIG_MAC80211_IBSS_DEBUG
536 if (bss)
537 printk(KERN_DEBUG " sta_find_ibss: selected %pM current "
538 "%pM\n", bss->cbss.bssid, ifibss->bssid);
539#endif /* CONFIG_MAC80211_IBSS_DEBUG */
540
541 if (bss &&
542 (!(ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET) ||
543 memcmp(ifibss->bssid, bss->cbss.bssid, ETH_ALEN))) {
544 int ret;
545
546 printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM"
547 " based on configured SSID\n",
548 sdata->dev->name, bss->cbss.bssid);
549
550 ret = ieee80211_sta_join_ibss(sdata, bss);
551 ieee80211_rx_bss_put(local, bss);
552 return ret;
553 } else if (bss)
554 ieee80211_rx_bss_put(local, bss);
555
556#ifdef CONFIG_MAC80211_IBSS_DEBUG
557 printk(KERN_DEBUG " did not try to join ibss\n");
558#endif /* CONFIG_MAC80211_IBSS_DEBUG */
559
560 /* Selected IBSS not found in current scan results - try to scan */
561 if (ifibss->state == IEEE80211_IBSS_MLME_JOINED &&
562 !ieee80211_sta_active_ibss(sdata)) {
563 mod_timer(&ifibss->timer, jiffies +
564 IEEE80211_IBSS_MERGE_INTERVAL);
565 } else if (time_after(jiffies, local->last_scan_completed +
566 IEEE80211_SCAN_INTERVAL)) {
567 printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to "
568 "join\n", sdata->dev->name);
569
570 /* XXX maybe racy? */
571 if (local->scan_req)
572 return -EBUSY;
573
574 memcpy(local->int_scan_req.ssids[0].ssid,
575 ifibss->ssid, IEEE80211_MAX_SSID_LEN);
576 local->int_scan_req.ssids[0].ssid_len = ifibss->ssid_len;
577 return ieee80211_request_scan(sdata, &local->int_scan_req);
578 } else if (ifibss->state != IEEE80211_IBSS_MLME_JOINED) {
579 int interval = IEEE80211_SCAN_INTERVAL;
580
581 if (time_after(jiffies, ifibss->ibss_join_req +
582 IEEE80211_IBSS_JOIN_TIMEOUT)) {
583 if (!(local->oper_channel->flags &
584 IEEE80211_CHAN_NO_IBSS))
585 return ieee80211_sta_create_ibss(sdata);
586 printk(KERN_DEBUG "%s: IBSS not allowed on"
587 " %d MHz\n", sdata->dev->name,
588 local->hw.conf.channel->center_freq);
589
590 /* No IBSS found - decrease scan interval and continue
591 * scanning. */
592 interval = IEEE80211_SCAN_INTERVAL_SLOW;
593 }
594
595 ifibss->state = IEEE80211_IBSS_MLME_SEARCH;
596 mod_timer(&ifibss->timer, jiffies + interval);
597 return 0;
598 }
599
600 return 0;
601}
602
603static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
604 struct ieee80211_mgmt *mgmt,
605 size_t len)
606{
607 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
608 struct ieee80211_local *local = sdata->local;
609 int tx_last_beacon;
610 struct sk_buff *skb;
611 struct ieee80211_mgmt *resp;
612 u8 *pos, *end;
613
614 if (ifibss->state != IEEE80211_IBSS_MLME_JOINED ||
615 len < 24 + 2 || !ifibss->probe_resp)
616 return;
617
618 if (local->ops->tx_last_beacon)
619 tx_last_beacon = local->ops->tx_last_beacon(local_to_hw(local));
620 else
621 tx_last_beacon = 1;
622
623#ifdef CONFIG_MAC80211_IBSS_DEBUG
624 printk(KERN_DEBUG "%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM"
625 " (tx_last_beacon=%d)\n",
626 sdata->dev->name, mgmt->sa, mgmt->da,
627 mgmt->bssid, tx_last_beacon);
628#endif /* CONFIG_MAC80211_IBSS_DEBUG */
629
630 if (!tx_last_beacon)
631 return;
632
633 if (memcmp(mgmt->bssid, ifibss->bssid, ETH_ALEN) != 0 &&
634 memcmp(mgmt->bssid, "\xff\xff\xff\xff\xff\xff", ETH_ALEN) != 0)
635 return;
636
637 end = ((u8 *) mgmt) + len;
638 pos = mgmt->u.probe_req.variable;
639 if (pos[0] != WLAN_EID_SSID ||
640 pos + 2 + pos[1] > end) {
641#ifdef CONFIG_MAC80211_IBSS_DEBUG
642 printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq "
643 "from %pM\n",
644 sdata->dev->name, mgmt->sa);
645#endif
646 return;
647 }
648 if (pos[1] != 0 &&
649 (pos[1] != ifibss->ssid_len ||
650 memcmp(pos + 2, ifibss->ssid, ifibss->ssid_len) != 0)) {
651 /* Ignore ProbeReq for foreign SSID */
652 return;
653 }
654
655 /* Reply with ProbeResp */
656 skb = skb_copy(ifibss->probe_resp, GFP_KERNEL);
657 if (!skb)
658 return;
659
660 resp = (struct ieee80211_mgmt *) skb->data;
661 memcpy(resp->da, mgmt->sa, ETH_ALEN);
662#ifdef CONFIG_MAC80211_IBSS_DEBUG
663 printk(KERN_DEBUG "%s: Sending ProbeResp to %pM\n",
664 sdata->dev->name, resp->da);
665#endif /* CONFIG_MAC80211_IBSS_DEBUG */
666 ieee80211_tx_skb(sdata, skb, 0);
667}
668
669static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
670 struct ieee80211_mgmt *mgmt,
671 size_t len,
672 struct ieee80211_rx_status *rx_status)
673{
674 size_t baselen;
675 struct ieee802_11_elems elems;
676
677 if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN))
678 return; /* ignore ProbeResp to foreign address */
679
680 baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
681 if (baselen > len)
682 return;
683
684 ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
685 &elems);
686
687 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false);
688}
689
690static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
691 struct ieee80211_mgmt *mgmt,
692 size_t len,
693 struct ieee80211_rx_status *rx_status)
694{
695 size_t baselen;
696 struct ieee802_11_elems elems;
697
698 /* Process beacon from the current BSS */
699 baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
700 if (baselen > len)
701 return;
702
703 ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
704
705 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true);
706}
707
708static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
709 struct sk_buff *skb)
710{
711 struct ieee80211_rx_status *rx_status;
712 struct ieee80211_mgmt *mgmt;
713 u16 fc;
714
715 rx_status = (struct ieee80211_rx_status *) skb->cb;
716 mgmt = (struct ieee80211_mgmt *) skb->data;
717 fc = le16_to_cpu(mgmt->frame_control);
718
719 switch (fc & IEEE80211_FCTL_STYPE) {
720 case IEEE80211_STYPE_PROBE_REQ:
721 ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len);
722 break;
723 case IEEE80211_STYPE_PROBE_RESP:
724 ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len,
725 rx_status);
726 break;
727 case IEEE80211_STYPE_BEACON:
728 ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len,
729 rx_status);
730 break;
731 case IEEE80211_STYPE_AUTH:
732 ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len);
733 break;
734 }
735
736 kfree_skb(skb);
737}
738
739static void ieee80211_ibss_work(struct work_struct *work)
740{
741 struct ieee80211_sub_if_data *sdata =
742 container_of(work, struct ieee80211_sub_if_data, u.ibss.work);
743 struct ieee80211_local *local = sdata->local;
744 struct ieee80211_if_ibss *ifibss;
745 struct sk_buff *skb;
746
747 if (!netif_running(sdata->dev))
748 return;
749
750 if (local->sw_scanning || local->hw_scanning)
751 return;
752
753 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_ADHOC))
754 return;
755 ifibss = &sdata->u.ibss;
756
757 while ((skb = skb_dequeue(&ifibss->skb_queue)))
758 ieee80211_ibss_rx_queued_mgmt(sdata, skb);
759
760 if (!test_and_clear_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request))
761 return;
762
763 switch (ifibss->state) {
764 case IEEE80211_IBSS_MLME_SEARCH:
765 ieee80211_sta_find_ibss(sdata);
766 break;
767 case IEEE80211_IBSS_MLME_JOINED:
768 ieee80211_sta_merge_ibss(sdata);
769 break;
770 default:
771 WARN_ON(1);
772 break;
773 }
774}
775
776static void ieee80211_ibss_timer(unsigned long data)
777{
778 struct ieee80211_sub_if_data *sdata =
779 (struct ieee80211_sub_if_data *) data;
780 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
781 struct ieee80211_local *local = sdata->local;
782
783 set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request);
784 queue_work(local->hw.workqueue, &ifibss->work);
785}
786
787void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
788{
789 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
790
791 INIT_WORK(&ifibss->work, ieee80211_ibss_work);
792 setup_timer(&ifibss->timer, ieee80211_ibss_timer,
793 (unsigned long) sdata);
794 skb_queue_head_init(&ifibss->skb_queue);
795
796 ifibss->flags |= IEEE80211_IBSS_AUTO_BSSID_SEL |
797 IEEE80211_IBSS_AUTO_CHANNEL_SEL;
798}
799
800int ieee80211_ibss_commit(struct ieee80211_sub_if_data *sdata)
801{
802 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
803
804 ifibss->flags &= ~IEEE80211_IBSS_PREV_BSSID_SET;
805
806 if (ifibss->ssid_len)
807 ifibss->flags |= IEEE80211_IBSS_SSID_SET;
808 else
809 ifibss->flags &= ~IEEE80211_IBSS_SSID_SET;
810
811 ifibss->ibss_join_req = jiffies;
812 ifibss->state = IEEE80211_IBSS_MLME_SEARCH;
813
814 return ieee80211_sta_find_ibss(sdata);
815}
816
817int ieee80211_ibss_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len)
818{
819 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
820
821 if (len > IEEE80211_MAX_SSID_LEN)
822 return -EINVAL;
823
824 if (ifibss->ssid_len != len || memcmp(ifibss->ssid, ssid, len) != 0) {
825 memset(ifibss->ssid, 0, sizeof(ifibss->ssid));
826 memcpy(ifibss->ssid, ssid, len);
827 ifibss->ssid_len = len;
828 }
829
830 return ieee80211_ibss_commit(sdata);
831}
832
833int ieee80211_ibss_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len)
834{
835 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
836
837 memcpy(ssid, ifibss->ssid, ifibss->ssid_len);
838 *len = ifibss->ssid_len;
839
840 return 0;
841}
842
843int ieee80211_ibss_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid)
844{
845 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
846
847 if (is_valid_ether_addr(bssid)) {
848 memcpy(ifibss->bssid, bssid, ETH_ALEN);
849 ifibss->flags |= IEEE80211_IBSS_BSSID_SET;
850 } else {
851 memset(ifibss->bssid, 0, ETH_ALEN);
852 ifibss->flags &= ~IEEE80211_IBSS_BSSID_SET;
853 }
854
855 if (netif_running(sdata->dev)) {
856 if (ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID)) {
857 printk(KERN_DEBUG "%s: Failed to config new BSSID to "
858 "the low-level driver\n", sdata->dev->name);
859 }
860 }
861
862 return ieee80211_ibss_commit(sdata);
863}
864
865/* scan finished notification */
866void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
867{
868 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
869 struct ieee80211_if_ibss *ifibss;
870
871 if (sdata && sdata->vif.type == NL80211_IFTYPE_ADHOC) {
872 ifibss = &sdata->u.ibss;
873 if ((!(ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET)) ||
874 !ieee80211_sta_active_ibss(sdata))
875 ieee80211_sta_find_ibss(sdata);
876 }
877}
878
879ieee80211_rx_result
880ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
881 struct ieee80211_rx_status *rx_status)
882{
883 struct ieee80211_local *local = sdata->local;
884 struct ieee80211_mgmt *mgmt;
885 u16 fc;
886
887 if (skb->len < 24)
888 return RX_DROP_MONITOR;
889
890 mgmt = (struct ieee80211_mgmt *) skb->data;
891 fc = le16_to_cpu(mgmt->frame_control);
892
893 switch (fc & IEEE80211_FCTL_STYPE) {
894 case IEEE80211_STYPE_PROBE_RESP:
895 case IEEE80211_STYPE_BEACON:
896 memcpy(skb->cb, rx_status, sizeof(*rx_status));
897 case IEEE80211_STYPE_PROBE_REQ:
898 case IEEE80211_STYPE_AUTH:
899 skb_queue_tail(&sdata->u.ibss.skb_queue, skb);
900 queue_work(local->hw.workqueue, &sdata->u.ibss.work);
901 return RX_QUEUED;
902 }
903
904 return RX_DROP_MONITOR;
905}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f3eec989662b..ecbc8e0cb3e7 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -43,7 +43,7 @@ struct ieee80211_local;
43 43
44/* Required encryption head and tailroom */ 44/* Required encryption head and tailroom */
45#define IEEE80211_ENCRYPT_HEADROOM 8 45#define IEEE80211_ENCRYPT_HEADROOM 8
46#define IEEE80211_ENCRYPT_TAILROOM 12 46#define IEEE80211_ENCRYPT_TAILROOM 18
47 47
48/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent 48/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent
49 * reception of at least three fragmented frames. This limit can be increased 49 * reception of at least three fragmented frames. This limit can be increased
@@ -57,6 +57,8 @@ struct ieee80211_local;
57 */ 57 */
58#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ) 58#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ)
59 59
60#define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024))
61
60struct ieee80211_fragment_entry { 62struct ieee80211_fragment_entry {
61 unsigned long first_frag_time; 63 unsigned long first_frag_time;
62 unsigned int seq; 64 unsigned int seq;
@@ -70,43 +72,36 @@ struct ieee80211_fragment_entry {
70 72
71 73
72struct ieee80211_bss { 74struct ieee80211_bss {
73 struct list_head list; 75 /* Yes, this is a hack */
74 struct ieee80211_bss *hnext; 76 struct cfg80211_bss cbss;
75 size_t ssid_len;
76 77
77 atomic_t users; 78 /* don't want to look up all the time */
78 79 size_t ssid_len;
79 u8 bssid[ETH_ALEN];
80 u8 ssid[IEEE80211_MAX_SSID_LEN]; 80 u8 ssid[IEEE80211_MAX_SSID_LEN];
81
81 u8 dtim_period; 82 u8 dtim_period;
82 u16 capability; /* host byte order */ 83
83 enum ieee80211_band band;
84 int freq;
85 int signal, noise, qual;
86 u8 *ies; /* all information elements from the last Beacon or Probe
87 * Response frames; note Beacon frame is not allowed to
88 * override values from Probe Response */
89 size_t ies_len;
90 bool wmm_used; 84 bool wmm_used;
85
86 unsigned long last_probe_resp;
87
91#ifdef CONFIG_MAC80211_MESH 88#ifdef CONFIG_MAC80211_MESH
92 u8 *mesh_id; 89 u8 *mesh_id;
93 size_t mesh_id_len; 90 size_t mesh_id_len;
94 u8 *mesh_cfg; 91 u8 *mesh_cfg;
95#endif 92#endif
93
96#define IEEE80211_MAX_SUPP_RATES 32 94#define IEEE80211_MAX_SUPP_RATES 32
97 u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; 95 u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
98 size_t supp_rates_len; 96 size_t supp_rates_len;
99 u64 timestamp;
100 int beacon_int;
101
102 unsigned long last_probe_resp;
103 unsigned long last_update;
104 97
105 /* during assocation, we save an ERP value from a probe response so 98 /*
99 * During assocation, we save an ERP value from a probe response so
106 * that we can feed ERP info to the driver when handling the 100 * that we can feed ERP info to the driver when handling the
107 * association completes. these fields probably won't be up-to-date 101 * association completes. these fields probably won't be up-to-date
108 * otherwise, you probably don't want to use them. */ 102 * otherwise, you probably don't want to use them.
109 int has_erp_value; 103 */
104 bool has_erp_value;
110 u8 erp_value; 105 u8 erp_value;
111}; 106};
112 107
@@ -244,7 +239,7 @@ struct mesh_preq_queue {
244 u8 flags; 239 u8 flags;
245}; 240};
246 241
247/* flags used in struct ieee80211_if_sta.flags */ 242/* flags used in struct ieee80211_if_managed.flags */
248#define IEEE80211_STA_SSID_SET BIT(0) 243#define IEEE80211_STA_SSID_SET BIT(0)
249#define IEEE80211_STA_BSSID_SET BIT(1) 244#define IEEE80211_STA_BSSID_SET BIT(1)
250#define IEEE80211_STA_PREV_BSSID_SET BIT(2) 245#define IEEE80211_STA_PREV_BSSID_SET BIT(2)
@@ -258,37 +253,39 @@ struct mesh_preq_queue {
258#define IEEE80211_STA_AUTO_BSSID_SEL BIT(11) 253#define IEEE80211_STA_AUTO_BSSID_SEL BIT(11)
259#define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12) 254#define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12)
260#define IEEE80211_STA_PRIVACY_INVOKED BIT(13) 255#define IEEE80211_STA_PRIVACY_INVOKED BIT(13)
256#define IEEE80211_STA_TKIP_WEP_USED BIT(14)
257#define IEEE80211_STA_CSA_RECEIVED BIT(15)
258#define IEEE80211_STA_MFP_ENABLED BIT(16)
261/* flags for MLME request */ 259/* flags for MLME request */
262#define IEEE80211_STA_REQ_SCAN 0 260#define IEEE80211_STA_REQ_SCAN 0
263#define IEEE80211_STA_REQ_DIRECT_PROBE 1 261#define IEEE80211_STA_REQ_DIRECT_PROBE 1
264#define IEEE80211_STA_REQ_AUTH 2 262#define IEEE80211_STA_REQ_AUTH 2
265#define IEEE80211_STA_REQ_RUN 3 263#define IEEE80211_STA_REQ_RUN 3
266 264
267/* STA/IBSS MLME states */
268enum ieee80211_sta_mlme_state {
269 IEEE80211_STA_MLME_DISABLED,
270 IEEE80211_STA_MLME_DIRECT_PROBE,
271 IEEE80211_STA_MLME_AUTHENTICATE,
272 IEEE80211_STA_MLME_ASSOCIATE,
273 IEEE80211_STA_MLME_ASSOCIATED,
274 IEEE80211_STA_MLME_IBSS_SEARCH,
275 IEEE80211_STA_MLME_IBSS_JOINED,
276};
277
278/* bitfield of allowed auth algs */ 265/* bitfield of allowed auth algs */
279#define IEEE80211_AUTH_ALG_OPEN BIT(0) 266#define IEEE80211_AUTH_ALG_OPEN BIT(0)
280#define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1) 267#define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1)
281#define IEEE80211_AUTH_ALG_LEAP BIT(2) 268#define IEEE80211_AUTH_ALG_LEAP BIT(2)
282 269
283struct ieee80211_if_sta { 270struct ieee80211_if_managed {
284 struct timer_list timer; 271 struct timer_list timer;
272 struct timer_list chswitch_timer;
285 struct work_struct work; 273 struct work_struct work;
274 struct work_struct chswitch_work;
275
286 u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; 276 u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
277
287 u8 ssid[IEEE80211_MAX_SSID_LEN]; 278 u8 ssid[IEEE80211_MAX_SSID_LEN];
288 enum ieee80211_sta_mlme_state state;
289 size_t ssid_len; 279 size_t ssid_len;
290 u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; 280
291 size_t scan_ssid_len; 281 enum {
282 IEEE80211_STA_MLME_DISABLED,
283 IEEE80211_STA_MLME_DIRECT_PROBE,
284 IEEE80211_STA_MLME_AUTHENTICATE,
285 IEEE80211_STA_MLME_ASSOCIATE,
286 IEEE80211_STA_MLME_ASSOCIATED,
287 } state;
288
292 u16 aid; 289 u16 aid;
293 u16 ap_capab, capab; 290 u16 ap_capab, capab;
294 u8 *extra_ie; /* to be added to the end of AssocReq */ 291 u8 *extra_ie; /* to be added to the end of AssocReq */
@@ -315,11 +312,65 @@ struct ieee80211_if_sta {
315 int auth_alg; /* currently used IEEE 802.11 authentication algorithm */ 312 int auth_alg; /* currently used IEEE 802.11 authentication algorithm */
316 int auth_transaction; 313 int auth_transaction;
317 314
315 enum {
316 IEEE80211_MFP_DISABLED,
317 IEEE80211_MFP_OPTIONAL,
318 IEEE80211_MFP_REQUIRED
319 } mfp; /* management frame protection */
320
321 int wmm_last_param_set;
322
323 /* Extra IE data for management frames */
324 u8 *ie_probereq;
325 size_t ie_probereq_len;
326 u8 *ie_proberesp;
327 size_t ie_proberesp_len;
328 u8 *ie_auth;
329 size_t ie_auth_len;
330 u8 *ie_assocreq;
331 size_t ie_assocreq_len;
332 u8 *ie_reassocreq;
333 size_t ie_reassocreq_len;
334 u8 *ie_deauth;
335 size_t ie_deauth_len;
336 u8 *ie_disassoc;
337 size_t ie_disassoc_len;
338};
339
340enum ieee80211_ibss_flags {
341 IEEE80211_IBSS_AUTO_CHANNEL_SEL = BIT(0),
342 IEEE80211_IBSS_AUTO_BSSID_SEL = BIT(1),
343 IEEE80211_IBSS_BSSID_SET = BIT(2),
344 IEEE80211_IBSS_PREV_BSSID_SET = BIT(3),
345 IEEE80211_IBSS_SSID_SET = BIT(4),
346};
347
348enum ieee80211_ibss_request {
349 IEEE80211_IBSS_REQ_RUN = 0,
350};
351
352struct ieee80211_if_ibss {
353 struct timer_list timer;
354 struct work_struct work;
355
356 struct sk_buff_head skb_queue;
357
358 u8 ssid[IEEE80211_MAX_SSID_LEN];
359 u8 ssid_len;
360
361 u32 flags;
362
363 u8 bssid[ETH_ALEN];
364
365 unsigned long request;
366
318 unsigned long ibss_join_req; 367 unsigned long ibss_join_req;
319 struct sk_buff *probe_resp; /* ProbeResp template for IBSS */ 368 struct sk_buff *probe_resp; /* ProbeResp template for IBSS */
320 u32 supp_rates_bits[IEEE80211_NUM_BANDS];
321 369
322 int wmm_last_param_set; 370 enum {
371 IEEE80211_IBSS_MLME_SEARCH,
372 IEEE80211_IBSS_MLME_JOINED,
373 } state;
323}; 374};
324 375
325struct ieee80211_if_mesh { 376struct ieee80211_if_mesh {
@@ -404,8 +455,10 @@ struct ieee80211_sub_if_data {
404 unsigned int fragment_next; 455 unsigned int fragment_next;
405 456
406#define NUM_DEFAULT_KEYS 4 457#define NUM_DEFAULT_KEYS 4
407 struct ieee80211_key *keys[NUM_DEFAULT_KEYS]; 458#define NUM_DEFAULT_MGMT_KEYS 2
459 struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
408 struct ieee80211_key *default_key; 460 struct ieee80211_key *default_key;
461 struct ieee80211_key *default_mgmt_key;
409 462
410 u16 sequence_number; 463 u16 sequence_number;
411 464
@@ -423,7 +476,8 @@ struct ieee80211_sub_if_data {
423 struct ieee80211_if_ap ap; 476 struct ieee80211_if_ap ap;
424 struct ieee80211_if_wds wds; 477 struct ieee80211_if_wds wds;
425 struct ieee80211_if_vlan vlan; 478 struct ieee80211_if_vlan vlan;
426 struct ieee80211_if_sta sta; 479 struct ieee80211_if_managed mgd;
480 struct ieee80211_if_ibss ibss;
427#ifdef CONFIG_MAC80211_MESH 481#ifdef CONFIG_MAC80211_MESH
428 struct ieee80211_if_mesh mesh; 482 struct ieee80211_if_mesh mesh;
429#endif 483#endif
@@ -477,6 +531,7 @@ struct ieee80211_sub_if_data {
477 } debugfs; 531 } debugfs;
478 struct { 532 struct {
479 struct dentry *default_key; 533 struct dentry *default_key;
534 struct dentry *default_mgmt_key;
480 } common_debugfs; 535 } common_debugfs;
481 536
482#ifdef CONFIG_MAC80211_MESH 537#ifdef CONFIG_MAC80211_MESH
@@ -541,11 +596,10 @@ enum {
541enum queue_stop_reason { 596enum queue_stop_reason {
542 IEEE80211_QUEUE_STOP_REASON_DRIVER, 597 IEEE80211_QUEUE_STOP_REASON_DRIVER,
543 IEEE80211_QUEUE_STOP_REASON_PS, 598 IEEE80211_QUEUE_STOP_REASON_PS,
599 IEEE80211_QUEUE_STOP_REASON_CSA,
600 IEEE80211_QUEUE_STOP_REASON_AGGREGATION,
544}; 601};
545 602
546/* maximum number of hardware queues we support. */
547#define QD_MAX_QUEUES (IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_QUEUES)
548
549struct ieee80211_master_priv { 603struct ieee80211_master_priv {
550 struct ieee80211_local *local; 604 struct ieee80211_local *local;
551}; 605};
@@ -558,9 +612,15 @@ struct ieee80211_local {
558 612
559 const struct ieee80211_ops *ops; 613 const struct ieee80211_ops *ops;
560 614
561 unsigned long queue_pool[BITS_TO_LONGS(QD_MAX_QUEUES)]; 615 /* AC queue corresponding to each AMPDU queue */
562 unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES]; 616 s8 ampdu_ac_queue[IEEE80211_MAX_AMPDU_QUEUES];
617 unsigned int amdpu_ac_stop_refcnt[IEEE80211_MAX_AMPDU_QUEUES];
618
619 unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES +
620 IEEE80211_MAX_AMPDU_QUEUES];
621 /* also used to protect ampdu_ac_queue and amdpu_ac_stop_refcnt */
563 spinlock_t queue_stop_reason_lock; 622 spinlock_t queue_stop_reason_lock;
623
564 struct net_device *mdev; /* wmaster# - "master" 802.11 device */ 624 struct net_device *mdev; /* wmaster# - "master" 802.11 device */
565 int open_count; 625 int open_count;
566 int monitors, cooked_mntrs; 626 int monitors, cooked_mntrs;
@@ -568,7 +628,6 @@ struct ieee80211_local {
568 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss; 628 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss;
569 unsigned int filter_flags; /* FIF_* */ 629 unsigned int filter_flags; /* FIF_* */
570 struct iw_statistics wstats; 630 struct iw_statistics wstats;
571 u8 wstats_flags;
572 bool tim_in_locked_section; /* see ieee80211_beacon_get() */ 631 bool tim_in_locked_section; /* see ieee80211_beacon_get() */
573 int tx_headroom; /* required headroom for hardware/radiotap */ 632 int tx_headroom; /* required headroom for hardware/radiotap */
574 633
@@ -612,7 +671,9 @@ struct ieee80211_local {
612 struct crypto_blkcipher *wep_rx_tfm; 671 struct crypto_blkcipher *wep_rx_tfm;
613 u32 wep_iv; 672 u32 wep_iv;
614 673
674 /* see iface.c */
615 struct list_head interfaces; 675 struct list_head interfaces;
676 struct mutex iflist_mtx;
616 677
617 /* 678 /*
618 * Key lock, protects sdata's key_list and sta_info's 679 * Key lock, protects sdata's key_list and sta_info's
@@ -623,20 +684,18 @@ struct ieee80211_local {
623 684
624 /* Scanning and BSS list */ 685 /* Scanning and BSS list */
625 bool sw_scanning, hw_scanning; 686 bool sw_scanning, hw_scanning;
687 struct cfg80211_ssid scan_ssid;
688 struct cfg80211_scan_request int_scan_req;
689 struct cfg80211_scan_request *scan_req;
690 struct ieee80211_channel *scan_channel;
626 int scan_channel_idx; 691 int scan_channel_idx;
627 enum ieee80211_band scan_band;
628 692
629 enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state; 693 enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state;
630 unsigned long last_scan_completed; 694 unsigned long last_scan_completed;
631 struct delayed_work scan_work; 695 struct delayed_work scan_work;
632 struct ieee80211_sub_if_data *scan_sdata; 696 struct ieee80211_sub_if_data *scan_sdata;
633 struct ieee80211_channel *oper_channel, *scan_channel;
634 enum nl80211_channel_type oper_channel_type; 697 enum nl80211_channel_type oper_channel_type;
635 u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; 698 struct ieee80211_channel *oper_channel, *csa_channel;
636 size_t scan_ssid_len;
637 struct list_head bss_list;
638 struct ieee80211_bss *bss_hash[STA_HASH_SIZE];
639 spinlock_t bss_lock;
640 699
641 /* SNMP counters */ 700 /* SNMP counters */
642 /* dot11CountersTable */ 701 /* dot11CountersTable */
@@ -649,7 +708,6 @@ struct ieee80211_local {
649 u32 dot11ReceivedFragmentCount; 708 u32 dot11ReceivedFragmentCount;
650 u32 dot11MulticastReceivedFrameCount; 709 u32 dot11MulticastReceivedFrameCount;
651 u32 dot11TransmittedFrameCount; 710 u32 dot11TransmittedFrameCount;
652 u32 dot11WEPUndecryptableCount;
653 711
654#ifdef CONFIG_MAC80211_LEDS 712#ifdef CONFIG_MAC80211_LEDS
655 int tx_led_counter, rx_led_counter; 713 int tx_led_counter, rx_led_counter;
@@ -696,11 +754,14 @@ struct ieee80211_local {
696 unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */ 754 unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */
697 755
698 bool powersave; 756 bool powersave;
699 int dynamic_ps_timeout; 757 bool pspolling;
700 struct work_struct dynamic_ps_enable_work; 758 struct work_struct dynamic_ps_enable_work;
701 struct work_struct dynamic_ps_disable_work; 759 struct work_struct dynamic_ps_disable_work;
702 struct timer_list dynamic_ps_timer; 760 struct timer_list dynamic_ps_timer;
703 761
762 int user_power_level; /* in dBm */
763 int power_constr_level; /* in dBm */
764
704#ifdef CONFIG_MAC80211_DEBUGFS 765#ifdef CONFIG_MAC80211_DEBUGFS
705 struct local_debugfsdentries { 766 struct local_debugfsdentries {
706 struct dentry *rcdir; 767 struct dentry *rcdir;
@@ -712,6 +773,7 @@ struct ieee80211_local {
712 struct dentry *long_retry_limit; 773 struct dentry *long_retry_limit;
713 struct dentry *total_ps_buffered; 774 struct dentry *total_ps_buffered;
714 struct dentry *wep_iv; 775 struct dentry *wep_iv;
776 struct dentry *tsf;
715 struct dentry *statistics; 777 struct dentry *statistics;
716 struct local_debugfsdentries_statsdentries { 778 struct local_debugfsdentries_statsdentries {
717 struct dentry *transmitted_fragment_count; 779 struct dentry *transmitted_fragment_count;
@@ -805,6 +867,7 @@ struct ieee802_11_elems {
805 u8 *country_elem; 867 u8 *country_elem;
806 u8 *pwr_constr_elem; 868 u8 *pwr_constr_elem;
807 u8 *quiet_elem; /* first quite element */ 869 u8 *quiet_elem; /* first quite element */
870 u8 *timeout_int;
808 871
809 /* length of them, respectively */ 872 /* length of them, respectively */
810 u8 ssid_len; 873 u8 ssid_len;
@@ -832,6 +895,7 @@ struct ieee802_11_elems {
832 u8 pwr_constr_elem_len; 895 u8 pwr_constr_elem_len;
833 u8 quiet_elem_len; 896 u8 quiet_elem_len;
834 u8 num_of_quiet_elem; /* can be more the one */ 897 u8 num_of_quiet_elem; /* can be more the one */
898 u8 timeout_int_len;
835}; 899};
836 900
837static inline struct ieee80211_local *hw_to_local( 901static inline struct ieee80211_local *hw_to_local(
@@ -860,34 +924,43 @@ void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx);
860void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, 924void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
861 u32 changed); 925 u32 changed);
862void ieee80211_configure_filter(struct ieee80211_local *local); 926void ieee80211_configure_filter(struct ieee80211_local *local);
927u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
863 928
864/* wireless extensions */ 929/* wireless extensions */
865extern const struct iw_handler_def ieee80211_iw_handler_def; 930extern const struct iw_handler_def ieee80211_iw_handler_def;
866 931
867/* STA/IBSS code */ 932/* STA code */
868void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); 933void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata);
869void ieee80211_scan_work(struct work_struct *work); 934ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
870void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, 935 struct sk_buff *skb,
871 struct ieee80211_rx_status *rx_status); 936 struct ieee80211_rx_status *rx_status);
937int ieee80211_sta_commit(struct ieee80211_sub_if_data *sdata);
872int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len); 938int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len);
873int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len); 939int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len);
874int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid); 940int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid);
875void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, 941void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata);
876 struct ieee80211_if_sta *ifsta);
877struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
878 u8 *bssid, u8 *addr, u64 supp_rates);
879int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason); 942int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason);
880int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason); 943int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason);
881u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); 944void ieee80211_send_pspoll(struct ieee80211_local *local,
882u64 ieee80211_sta_get_rates(struct ieee80211_local *local, 945 struct ieee80211_sub_if_data *sdata);
883 struct ieee802_11_elems *elems, 946
884 enum ieee80211_band band); 947/* IBSS code */
885void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, 948int ieee80211_ibss_commit(struct ieee80211_sub_if_data *sdata);
886 u8 *ssid, size_t ssid_len); 949int ieee80211_ibss_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len);
950int ieee80211_ibss_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len);
951int ieee80211_ibss_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid);
952void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
953void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata);
954ieee80211_rx_result
955ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
956 struct ieee80211_rx_status *rx_status);
957struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
958 u8 *bssid, u8 *addr, u32 supp_rates);
887 959
888/* scan/BSS handling */ 960/* scan/BSS handling */
961void ieee80211_scan_work(struct work_struct *work);
889int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, 962int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
890 u8 *ssid, size_t ssid_len); 963 struct cfg80211_scan_request *req);
891int ieee80211_scan_results(struct ieee80211_local *local, 964int ieee80211_scan_results(struct ieee80211_local *local,
892 struct iw_request_info *info, 965 struct iw_request_info *info,
893 char *buf, size_t len); 966 char *buf, size_t len);
@@ -895,29 +968,27 @@ ieee80211_rx_result
895ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, 968ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata,
896 struct sk_buff *skb, 969 struct sk_buff *skb,
897 struct ieee80211_rx_status *rx_status); 970 struct ieee80211_rx_status *rx_status);
898void ieee80211_rx_bss_list_init(struct ieee80211_local *local);
899void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local);
900int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, 971int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata,
901 char *ie, size_t len); 972 char *ie, size_t len);
902 973
903void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local); 974void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local);
904int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, 975int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
905 u8 *ssid, size_t ssid_len); 976 struct cfg80211_scan_request *req);
906struct ieee80211_bss * 977struct ieee80211_bss *
907ieee80211_bss_info_update(struct ieee80211_local *local, 978ieee80211_bss_info_update(struct ieee80211_local *local,
908 struct ieee80211_rx_status *rx_status, 979 struct ieee80211_rx_status *rx_status,
909 struct ieee80211_mgmt *mgmt, 980 struct ieee80211_mgmt *mgmt,
910 size_t len, 981 size_t len,
911 struct ieee802_11_elems *elems, 982 struct ieee802_11_elems *elems,
912 int freq, bool beacon); 983 struct ieee80211_channel *channel,
913struct ieee80211_bss * 984 bool beacon);
914ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq,
915 u8 *ssid, u8 ssid_len);
916struct ieee80211_bss * 985struct ieee80211_bss *
917ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq, 986ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
918 u8 *ssid, u8 ssid_len); 987 u8 *ssid, u8 ssid_len);
919void ieee80211_rx_bss_put(struct ieee80211_local *local, 988void ieee80211_rx_bss_put(struct ieee80211_local *local,
920 struct ieee80211_bss *bss); 989 struct ieee80211_bss *bss);
990void ieee80211_rx_bss_remove(struct ieee80211_sub_if_data *sdata, u8 *bssid,
991 int freq, u8 *ssid, u8 ssid_len);
921 992
922/* interface handling */ 993/* interface handling */
923int ieee80211_if_add(struct ieee80211_local *local, const char *name, 994int ieee80211_if_add(struct ieee80211_local *local, const char *name,
@@ -943,10 +1014,15 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
943 struct ieee80211_ht_info *hti, 1014 struct ieee80211_ht_info *hti,
944 u16 ap_ht_cap_flags); 1015 u16 ap_ht_cap_flags);
945void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn); 1016void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn);
1017void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
1018 const u8 *da, u16 tid,
1019 u16 initiator, u16 reason_code);
946 1020
947void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da, 1021void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da,
948 u16 tid, u16 initiator, u16 reason); 1022 u16 tid, u16 initiator, u16 reason);
949void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr); 1023void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
1024 u16 initiator, u16 reason);
1025void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta);
950void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, 1026void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
951 struct sta_info *sta, 1027 struct sta_info *sta,
952 struct ieee80211_mgmt *mgmt, size_t len); 1028 struct ieee80211_mgmt *mgmt, size_t len);
@@ -959,10 +1035,25 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
959 struct ieee80211_mgmt *mgmt, 1035 struct ieee80211_mgmt *mgmt,
960 size_t len); 1036 size_t len);
961 1037
1038int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
1039 enum ieee80211_back_parties initiator);
1040
962/* Spectrum management */ 1041/* Spectrum management */
963void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, 1042void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
964 struct ieee80211_mgmt *mgmt, 1043 struct ieee80211_mgmt *mgmt,
965 size_t len); 1044 size_t len);
1045void ieee80211_chswitch_timer(unsigned long data);
1046void ieee80211_chswitch_work(struct work_struct *work);
1047void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata,
1048 struct ieee80211_channel_sw_ie *sw_elem,
1049 struct ieee80211_bss *bss);
1050void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
1051 u16 capab_info, u8 *pwr_constr_elem,
1052 u8 pwr_constr_elem_len);
1053
1054/* Suspend/resume */
1055int __ieee80211_suspend(struct ieee80211_hw *hw);
1056int __ieee80211_resume(struct ieee80211_hw *hw);
966 1057
967/* utility functions/constants */ 1058/* utility functions/constants */
968extern void *mac80211_wiphy_privid; /* for wiphy privid */ 1059extern void *mac80211_wiphy_privid; /* for wiphy privid */
@@ -980,17 +1071,39 @@ void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
980void ieee802_11_parse_elems(u8 *start, size_t len, 1071void ieee802_11_parse_elems(u8 *start, size_t len,
981 struct ieee802_11_elems *elems); 1072 struct ieee802_11_elems *elems);
982int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq); 1073int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq);
983u64 ieee80211_mandatory_rates(struct ieee80211_local *local, 1074u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
984 enum ieee80211_band band); 1075 enum ieee80211_band band);
985 1076
986void ieee80211_dynamic_ps_enable_work(struct work_struct *work); 1077void ieee80211_dynamic_ps_enable_work(struct work_struct *work);
987void ieee80211_dynamic_ps_disable_work(struct work_struct *work); 1078void ieee80211_dynamic_ps_disable_work(struct work_struct *work);
988void ieee80211_dynamic_ps_timer(unsigned long data); 1079void ieee80211_dynamic_ps_timer(unsigned long data);
1080void ieee80211_send_nullfunc(struct ieee80211_local *local,
1081 struct ieee80211_sub_if_data *sdata,
1082 int powersave);
989 1083
990void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, 1084void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
991 enum queue_stop_reason reason); 1085 enum queue_stop_reason reason);
992void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, 1086void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
993 enum queue_stop_reason reason); 1087 enum queue_stop_reason reason);
1088void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
1089 enum queue_stop_reason reason);
1090void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
1091 enum queue_stop_reason reason);
1092
1093void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
1094 u16 transaction, u16 auth_alg,
1095 u8 *extra, size_t extra_len,
1096 const u8 *bssid, int encrypt);
1097void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1098 u8 *ssid, size_t ssid_len,
1099 u8 *ie, size_t ie_len);
1100
1101void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
1102 const size_t supp_rates_len,
1103 const u8 *supp_rates);
1104u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
1105 struct ieee802_11_elems *elems,
1106 enum ieee80211_band band);
994 1107
995#ifdef CONFIG_MAC80211_NOINLINE 1108#ifdef CONFIG_MAC80211_NOINLINE
996#define debug_noinline noinline 1109#define debug_noinline noinline
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b9074824862a..2acc416e77e1 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -21,6 +21,23 @@
21#include "mesh.h" 21#include "mesh.h"
22#include "led.h" 22#include "led.h"
23 23
24/**
25 * DOC: Interface list locking
26 *
27 * The interface list in each struct ieee80211_local is protected
28 * three-fold:
29 *
30 * (1) modifications may only be done under the RTNL
31 * (2) modifications and readers are protected against each other by
32 * the iflist_mtx.
33 * (3) modifications are done in an RCU manner so atomic readers
34 * can traverse the list in RCU-safe blocks.
35 *
36 * As a consequence, reads (traversals) of the list can be protected
37 * by either the RTNL, the iflist_mtx or RCU.
38 */
39
40
24static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) 41static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
25{ 42{
26 int meshhdrlen; 43 int meshhdrlen;
@@ -219,7 +236,10 @@ static int ieee80211_open(struct net_device *dev)
219 break; 236 break;
220 case NL80211_IFTYPE_STATION: 237 case NL80211_IFTYPE_STATION:
221 case NL80211_IFTYPE_ADHOC: 238 case NL80211_IFTYPE_ADHOC:
222 sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; 239 if (sdata->vif.type == NL80211_IFTYPE_STATION)
240 sdata->u.mgd.flags &= ~IEEE80211_STA_PREV_BSSID_SET;
241 else
242 sdata->u.ibss.flags &= ~IEEE80211_IBSS_PREV_BSSID_SET;
223 /* fall through */ 243 /* fall through */
224 default: 244 default:
225 conf.vif = &sdata->vif; 245 conf.vif = &sdata->vif;
@@ -304,11 +324,10 @@ static int ieee80211_open(struct net_device *dev)
304 * yet be effective. Trigger execution of ieee80211_sta_work 324 * yet be effective. Trigger execution of ieee80211_sta_work
305 * to fix this. 325 * to fix this.
306 */ 326 */
307 if (sdata->vif.type == NL80211_IFTYPE_STATION || 327 if (sdata->vif.type == NL80211_IFTYPE_STATION)
308 sdata->vif.type == NL80211_IFTYPE_ADHOC) { 328 queue_work(local->hw.workqueue, &sdata->u.mgd.work);
309 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 329 else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
310 queue_work(local->hw.workqueue, &ifsta->work); 330 queue_work(local->hw.workqueue, &sdata->u.ibss.work);
311 }
312 331
313 netif_tx_start_all_queues(dev); 332 netif_tx_start_all_queues(dev);
314 333
@@ -345,8 +364,7 @@ static int ieee80211_stop(struct net_device *dev)
345 364
346 list_for_each_entry_rcu(sta, &local->sta_list, list) { 365 list_for_each_entry_rcu(sta, &local->sta_list, list) {
347 if (sta->sdata == sdata) 366 if (sta->sdata == sdata)
348 ieee80211_sta_tear_down_BA_sessions(sdata, 367 ieee80211_sta_tear_down_BA_sessions(sta);
349 sta->sta.addr);
350 } 368 }
351 369
352 rcu_read_unlock(); 370 rcu_read_unlock();
@@ -383,6 +401,8 @@ static int ieee80211_stop(struct net_device *dev)
383 atomic_dec(&local->iff_promiscs); 401 atomic_dec(&local->iff_promiscs);
384 402
385 dev_mc_unsync(local->mdev, dev); 403 dev_mc_unsync(local->mdev, dev);
404 del_timer_sync(&local->dynamic_ps_timer);
405 cancel_work_sync(&local->dynamic_ps_enable_work);
386 406
387 /* APs need special treatment */ 407 /* APs need special treatment */
388 if (sdata->vif.type == NL80211_IFTYPE_AP) { 408 if (sdata->vif.type == NL80211_IFTYPE_AP) {
@@ -434,14 +454,13 @@ static int ieee80211_stop(struct net_device *dev)
434 netif_addr_unlock_bh(local->mdev); 454 netif_addr_unlock_bh(local->mdev);
435 break; 455 break;
436 case NL80211_IFTYPE_STATION: 456 case NL80211_IFTYPE_STATION:
437 case NL80211_IFTYPE_ADHOC:
438 /* Announce that we are leaving the network. */ 457 /* Announce that we are leaving the network. */
439 if (sdata->u.sta.state != IEEE80211_STA_MLME_DISABLED) 458 if (sdata->u.mgd.state != IEEE80211_STA_MLME_DISABLED)
440 ieee80211_sta_deauthenticate(sdata, 459 ieee80211_sta_deauthenticate(sdata,
441 WLAN_REASON_DEAUTH_LEAVING); 460 WLAN_REASON_DEAUTH_LEAVING);
442 461 memset(sdata->u.mgd.bssid, 0, ETH_ALEN);
443 memset(sdata->u.sta.bssid, 0, ETH_ALEN); 462 del_timer_sync(&sdata->u.mgd.chswitch_timer);
444 del_timer_sync(&sdata->u.sta.timer); 463 del_timer_sync(&sdata->u.mgd.timer);
445 /* 464 /*
446 * If the timer fired while we waited for it, it will have 465 * If the timer fired while we waited for it, it will have
447 * requeued the work. Now the work will be running again 466 * requeued the work. Now the work will be running again
@@ -449,7 +468,8 @@ static int ieee80211_stop(struct net_device *dev)
449 * whether the interface is running, which, at this point, 468 * whether the interface is running, which, at this point,
450 * it no longer is. 469 * it no longer is.
451 */ 470 */
452 cancel_work_sync(&sdata->u.sta.work); 471 cancel_work_sync(&sdata->u.mgd.work);
472 cancel_work_sync(&sdata->u.mgd.chswitch_work);
453 /* 473 /*
454 * When we get here, the interface is marked down. 474 * When we get here, the interface is marked down.
455 * Call synchronize_rcu() to wait for the RX path 475 * Call synchronize_rcu() to wait for the RX path
@@ -457,12 +477,22 @@ static int ieee80211_stop(struct net_device *dev)
457 * frames at this very time on another CPU. 477 * frames at this very time on another CPU.
458 */ 478 */
459 synchronize_rcu(); 479 synchronize_rcu();
460 skb_queue_purge(&sdata->u.sta.skb_queue); 480 skb_queue_purge(&sdata->u.mgd.skb_queue);
461 481
462 sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; 482 sdata->u.mgd.flags &= ~(IEEE80211_STA_PRIVACY_INVOKED |
463 kfree(sdata->u.sta.extra_ie); 483 IEEE80211_STA_TKIP_WEP_USED);
464 sdata->u.sta.extra_ie = NULL; 484 kfree(sdata->u.mgd.extra_ie);
465 sdata->u.sta.extra_ie_len = 0; 485 sdata->u.mgd.extra_ie = NULL;
486 sdata->u.mgd.extra_ie_len = 0;
487 /* fall through */
488 case NL80211_IFTYPE_ADHOC:
489 if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
490 memset(sdata->u.ibss.bssid, 0, ETH_ALEN);
491 del_timer_sync(&sdata->u.ibss.timer);
492 cancel_work_sync(&sdata->u.ibss.work);
493 synchronize_rcu();
494 skb_queue_purge(&sdata->u.ibss.skb_queue);
495 }
466 /* fall through */ 496 /* fall through */
467 case NL80211_IFTYPE_MESH_POINT: 497 case NL80211_IFTYPE_MESH_POINT:
468 if (ieee80211_vif_is_mesh(&sdata->vif)) { 498 if (ieee80211_vif_is_mesh(&sdata->vif)) {
@@ -501,7 +531,7 @@ static int ieee80211_stop(struct net_device *dev)
501 * scan event to userspace -- the scan is incomplete. 531 * scan event to userspace -- the scan is incomplete.
502 */ 532 */
503 if (local->sw_scanning) 533 if (local->sw_scanning)
504 ieee80211_scan_completed(&local->hw); 534 ieee80211_scan_completed(&local->hw, true);
505 } 535 }
506 536
507 conf.vif = &sdata->vif; 537 conf.vif = &sdata->vif;
@@ -569,19 +599,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
569 dev_mc_sync(local->mdev, dev); 599 dev_mc_sync(local->mdev, dev);
570} 600}
571 601
572static void ieee80211_if_setup(struct net_device *dev)
573{
574 ether_setup(dev);
575 dev->hard_start_xmit = ieee80211_subif_start_xmit;
576 dev->wireless_handlers = &ieee80211_iw_handler_def;
577 dev->set_multicast_list = ieee80211_set_multicast_list;
578 dev->change_mtu = ieee80211_change_mtu;
579 dev->open = ieee80211_open;
580 dev->stop = ieee80211_stop;
581 dev->destructor = free_netdev;
582 /* we will validate the address ourselves in ->open */
583 dev->validate_addr = NULL;
584}
585/* 602/*
586 * Called when the netdev is removed or, by the code below, before 603 * Called when the netdev is removed or, by the code below, before
587 * the interface type changes. 604 * the interface type changes.
@@ -621,12 +638,20 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
621 if (ieee80211_vif_is_mesh(&sdata->vif)) 638 if (ieee80211_vif_is_mesh(&sdata->vif))
622 mesh_rmc_free(sdata); 639 mesh_rmc_free(sdata);
623 break; 640 break;
624 case NL80211_IFTYPE_STATION:
625 case NL80211_IFTYPE_ADHOC: 641 case NL80211_IFTYPE_ADHOC:
626 kfree(sdata->u.sta.extra_ie); 642 kfree_skb(sdata->u.ibss.probe_resp);
627 kfree(sdata->u.sta.assocreq_ies); 643 break;
628 kfree(sdata->u.sta.assocresp_ies); 644 case NL80211_IFTYPE_STATION:
629 kfree_skb(sdata->u.sta.probe_resp); 645 kfree(sdata->u.mgd.extra_ie);
646 kfree(sdata->u.mgd.assocreq_ies);
647 kfree(sdata->u.mgd.assocresp_ies);
648 kfree(sdata->u.mgd.ie_probereq);
649 kfree(sdata->u.mgd.ie_proberesp);
650 kfree(sdata->u.mgd.ie_auth);
651 kfree(sdata->u.mgd.ie_assocreq);
652 kfree(sdata->u.mgd.ie_reassocreq);
653 kfree(sdata->u.mgd.ie_deauth);
654 kfree(sdata->u.mgd.ie_disassoc);
630 break; 655 break;
631 case NL80211_IFTYPE_WDS: 656 case NL80211_IFTYPE_WDS:
632 case NL80211_IFTYPE_AP_VLAN: 657 case NL80211_IFTYPE_AP_VLAN:
@@ -642,6 +667,34 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
642 WARN_ON(flushed); 667 WARN_ON(flushed);
643} 668}
644 669
670static const struct net_device_ops ieee80211_dataif_ops = {
671 .ndo_open = ieee80211_open,
672 .ndo_stop = ieee80211_stop,
673 .ndo_uninit = ieee80211_teardown_sdata,
674 .ndo_start_xmit = ieee80211_subif_start_xmit,
675 .ndo_set_multicast_list = ieee80211_set_multicast_list,
676 .ndo_change_mtu = ieee80211_change_mtu,
677 .ndo_set_mac_address = eth_mac_addr,
678};
679
680static const struct net_device_ops ieee80211_monitorif_ops = {
681 .ndo_open = ieee80211_open,
682 .ndo_stop = ieee80211_stop,
683 .ndo_uninit = ieee80211_teardown_sdata,
684 .ndo_start_xmit = ieee80211_monitor_start_xmit,
685 .ndo_set_multicast_list = ieee80211_set_multicast_list,
686 .ndo_change_mtu = ieee80211_change_mtu,
687 .ndo_set_mac_address = eth_mac_addr,
688};
689
690static void ieee80211_if_setup(struct net_device *dev)
691{
692 ether_setup(dev);
693 dev->netdev_ops = &ieee80211_dataif_ops;
694 dev->wireless_handlers = &ieee80211_iw_handler_def;
695 dev->destructor = free_netdev;
696}
697
645/* 698/*
646 * Helper function to initialise an interface to a specific type. 699 * Helper function to initialise an interface to a specific type.
647 */ 700 */
@@ -653,7 +706,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
653 706
654 /* and set some type-dependent values */ 707 /* and set some type-dependent values */
655 sdata->vif.type = type; 708 sdata->vif.type = type;
656 sdata->dev->hard_start_xmit = ieee80211_subif_start_xmit; 709 sdata->dev->netdev_ops = &ieee80211_dataif_ops;
657 sdata->wdev.iftype = type; 710 sdata->wdev.iftype = type;
658 711
659 /* only monitor differs */ 712 /* only monitor differs */
@@ -665,16 +718,18 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
665 INIT_LIST_HEAD(&sdata->u.ap.vlans); 718 INIT_LIST_HEAD(&sdata->u.ap.vlans);
666 break; 719 break;
667 case NL80211_IFTYPE_STATION: 720 case NL80211_IFTYPE_STATION:
668 case NL80211_IFTYPE_ADHOC:
669 ieee80211_sta_setup_sdata(sdata); 721 ieee80211_sta_setup_sdata(sdata);
670 break; 722 break;
723 case NL80211_IFTYPE_ADHOC:
724 ieee80211_ibss_setup_sdata(sdata);
725 break;
671 case NL80211_IFTYPE_MESH_POINT: 726 case NL80211_IFTYPE_MESH_POINT:
672 if (ieee80211_vif_is_mesh(&sdata->vif)) 727 if (ieee80211_vif_is_mesh(&sdata->vif))
673 ieee80211_mesh_init_sdata(sdata); 728 ieee80211_mesh_init_sdata(sdata);
674 break; 729 break;
675 case NL80211_IFTYPE_MONITOR: 730 case NL80211_IFTYPE_MONITOR:
676 sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP; 731 sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP;
677 sdata->dev->hard_start_xmit = ieee80211_monitor_start_xmit; 732 sdata->dev->netdev_ops = &ieee80211_monitorif_ops;
678 sdata->u.mntr_flags = MONITOR_FLAG_CONTROL | 733 sdata->u.mntr_flags = MONITOR_FLAG_CONTROL |
679 MONITOR_FLAG_OTHER_BSS; 734 MONITOR_FLAG_OTHER_BSS;
680 break; 735 break;
@@ -755,6 +810,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
755 810
756 memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); 811 memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
757 SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); 812 SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
813 ndev->features |= NETIF_F_NETNS_LOCAL;
758 814
759 /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */ 815 /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */
760 sdata = netdev_priv(ndev); 816 sdata = netdev_priv(ndev);
@@ -780,15 +836,15 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
780 if (ret) 836 if (ret)
781 goto fail; 837 goto fail;
782 838
783 ndev->uninit = ieee80211_teardown_sdata;
784
785 if (ieee80211_vif_is_mesh(&sdata->vif) && 839 if (ieee80211_vif_is_mesh(&sdata->vif) &&
786 params && params->mesh_id_len) 840 params && params->mesh_id_len)
787 ieee80211_sdata_set_mesh_id(sdata, 841 ieee80211_sdata_set_mesh_id(sdata,
788 params->mesh_id_len, 842 params->mesh_id_len,
789 params->mesh_id); 843 params->mesh_id);
790 844
845 mutex_lock(&local->iflist_mtx);
791 list_add_tail_rcu(&sdata->list, &local->interfaces); 846 list_add_tail_rcu(&sdata->list, &local->interfaces);
847 mutex_unlock(&local->iflist_mtx);
792 848
793 if (new_dev) 849 if (new_dev)
794 *new_dev = ndev; 850 *new_dev = ndev;
@@ -804,7 +860,10 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
804{ 860{
805 ASSERT_RTNL(); 861 ASSERT_RTNL();
806 862
863 mutex_lock(&sdata->local->iflist_mtx);
807 list_del_rcu(&sdata->list); 864 list_del_rcu(&sdata->list);
865 mutex_unlock(&sdata->local->iflist_mtx);
866
808 synchronize_rcu(); 867 synchronize_rcu();
809 unregister_netdevice(sdata->dev); 868 unregister_netdevice(sdata->dev);
810} 869}
@@ -820,7 +879,16 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
820 ASSERT_RTNL(); 879 ASSERT_RTNL();
821 880
822 list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { 881 list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
882 /*
883 * we cannot hold the iflist_mtx across unregister_netdevice,
884 * but we only need to hold it for list modifications to lock
885 * out readers since we're under the RTNL here as all other
886 * writers.
887 */
888 mutex_lock(&local->iflist_mtx);
823 list_del(&sdata->list); 889 list_del(&sdata->list);
890 mutex_unlock(&local->iflist_mtx);
891
824 unregister_netdevice(sdata->dev); 892 unregister_netdevice(sdata->dev);
825 } 893 }
826} 894}
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 999f7aa42326..687acf23054d 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -18,6 +18,7 @@
18#include "ieee80211_i.h" 18#include "ieee80211_i.h"
19#include "debugfs_key.h" 19#include "debugfs_key.h"
20#include "aes_ccm.h" 20#include "aes_ccm.h"
21#include "aes_cmac.h"
21 22
22 23
23/** 24/**
@@ -47,7 +48,6 @@
47 */ 48 */
48 49
49static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; 50static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
50static const u8 zero_addr[ETH_ALEN];
51 51
52/* key mutex: used to synchronise todo runners */ 52/* key mutex: used to synchronise todo runners */
53static DEFINE_MUTEX(key_mutex); 53static DEFINE_MUTEX(key_mutex);
@@ -108,29 +108,18 @@ static void assert_key_lock(void)
108 WARN_ON(!mutex_is_locked(&key_mutex)); 108 WARN_ON(!mutex_is_locked(&key_mutex));
109} 109}
110 110
111static const u8 *get_mac_for_key(struct ieee80211_key *key) 111static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key)
112{ 112{
113 const u8 *addr = bcast_addr;
114
115 /*
116 * If we're an AP we won't ever receive frames with a non-WEP
117 * group key so we tell the driver that by using the zero MAC
118 * address to indicate a transmit-only key.
119 */
120 if (key->conf.alg != ALG_WEP &&
121 (key->sdata->vif.type == NL80211_IFTYPE_AP ||
122 key->sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
123 addr = zero_addr;
124
125 if (key->sta) 113 if (key->sta)
126 addr = key->sta->sta.addr; 114 return &key->sta->sta;
127 115
128 return addr; 116 return NULL;
129} 117}
130 118
131static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) 119static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
132{ 120{
133 const u8 *addr; 121 struct ieee80211_sub_if_data *sdata;
122 struct ieee80211_sta *sta;
134 int ret; 123 int ret;
135 124
136 assert_key_lock(); 125 assert_key_lock();
@@ -139,11 +128,16 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
139 if (!key->local->ops->set_key) 128 if (!key->local->ops->set_key)
140 return; 129 return;
141 130
142 addr = get_mac_for_key(key); 131 sta = get_sta_for_key(key);
132
133 sdata = key->sdata;
134 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
135 sdata = container_of(sdata->bss,
136 struct ieee80211_sub_if_data,
137 u.ap);
143 138
144 ret = key->local->ops->set_key(local_to_hw(key->local), SET_KEY, 139 ret = key->local->ops->set_key(local_to_hw(key->local), SET_KEY,
145 key->sdata->dev->dev_addr, addr, 140 &sdata->vif, sta, &key->conf);
146 &key->conf);
147 141
148 if (!ret) { 142 if (!ret) {
149 spin_lock(&todo_lock); 143 spin_lock(&todo_lock);
@@ -155,12 +149,13 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
155 printk(KERN_ERR "mac80211-%s: failed to set key " 149 printk(KERN_ERR "mac80211-%s: failed to set key "
156 "(%d, %pM) to hardware (%d)\n", 150 "(%d, %pM) to hardware (%d)\n",
157 wiphy_name(key->local->hw.wiphy), 151 wiphy_name(key->local->hw.wiphy),
158 key->conf.keyidx, addr, ret); 152 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
159} 153}
160 154
161static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) 155static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
162{ 156{
163 const u8 *addr; 157 struct ieee80211_sub_if_data *sdata;
158 struct ieee80211_sta *sta;
164 int ret; 159 int ret;
165 160
166 assert_key_lock(); 161 assert_key_lock();
@@ -176,17 +171,22 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
176 } 171 }
177 spin_unlock(&todo_lock); 172 spin_unlock(&todo_lock);
178 173
179 addr = get_mac_for_key(key); 174 sta = get_sta_for_key(key);
175 sdata = key->sdata;
176
177 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
178 sdata = container_of(sdata->bss,
179 struct ieee80211_sub_if_data,
180 u.ap);
180 181
181 ret = key->local->ops->set_key(local_to_hw(key->local), DISABLE_KEY, 182 ret = key->local->ops->set_key(local_to_hw(key->local), DISABLE_KEY,
182 key->sdata->dev->dev_addr, addr, 183 &sdata->vif, sta, &key->conf);
183 &key->conf);
184 184
185 if (ret) 185 if (ret)
186 printk(KERN_ERR "mac80211-%s: failed to remove key " 186 printk(KERN_ERR "mac80211-%s: failed to remove key "
187 "(%d, %pM) from hardware (%d)\n", 187 "(%d, %pM) from hardware (%d)\n",
188 wiphy_name(key->local->hw.wiphy), 188 wiphy_name(key->local->hw.wiphy),
189 key->conf.keyidx, addr, ret); 189 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
190 190
191 spin_lock(&todo_lock); 191 spin_lock(&todo_lock);
192 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; 192 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
@@ -216,13 +216,38 @@ void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx)
216 spin_unlock_irqrestore(&sdata->local->key_lock, flags); 216 spin_unlock_irqrestore(&sdata->local->key_lock, flags);
217} 217}
218 218
219static void
220__ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx)
221{
222 struct ieee80211_key *key = NULL;
223
224 if (idx >= NUM_DEFAULT_KEYS &&
225 idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
226 key = sdata->keys[idx];
227
228 rcu_assign_pointer(sdata->default_mgmt_key, key);
229
230 if (key)
231 add_todo(key, KEY_FLAG_TODO_DEFMGMTKEY);
232}
233
234void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
235 int idx)
236{
237 unsigned long flags;
238
239 spin_lock_irqsave(&sdata->local->key_lock, flags);
240 __ieee80211_set_default_mgmt_key(sdata, idx);
241 spin_unlock_irqrestore(&sdata->local->key_lock, flags);
242}
243
219 244
220static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, 245static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
221 struct sta_info *sta, 246 struct sta_info *sta,
222 struct ieee80211_key *old, 247 struct ieee80211_key *old,
223 struct ieee80211_key *new) 248 struct ieee80211_key *new)
224{ 249{
225 int idx, defkey; 250 int idx, defkey, defmgmtkey;
226 251
227 if (new) 252 if (new)
228 list_add(&new->list, &sdata->key_list); 253 list_add(&new->list, &sdata->key_list);
@@ -238,13 +263,19 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
238 idx = new->conf.keyidx; 263 idx = new->conf.keyidx;
239 264
240 defkey = old && sdata->default_key == old; 265 defkey = old && sdata->default_key == old;
266 defmgmtkey = old && sdata->default_mgmt_key == old;
241 267
242 if (defkey && !new) 268 if (defkey && !new)
243 __ieee80211_set_default_key(sdata, -1); 269 __ieee80211_set_default_key(sdata, -1);
270 if (defmgmtkey && !new)
271 __ieee80211_set_default_mgmt_key(sdata, -1);
244 272
245 rcu_assign_pointer(sdata->keys[idx], new); 273 rcu_assign_pointer(sdata->keys[idx], new);
246 if (defkey && new) 274 if (defkey && new)
247 __ieee80211_set_default_key(sdata, new->conf.keyidx); 275 __ieee80211_set_default_key(sdata, new->conf.keyidx);
276 if (defmgmtkey && new)
277 __ieee80211_set_default_mgmt_key(sdata,
278 new->conf.keyidx);
248 } 279 }
249 280
250 if (old) { 281 if (old) {
@@ -263,7 +294,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
263{ 294{
264 struct ieee80211_key *key; 295 struct ieee80211_key *key;
265 296
266 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS); 297 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS);
267 298
268 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); 299 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL);
269 if (!key) 300 if (!key)
@@ -292,6 +323,10 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
292 key->conf.iv_len = CCMP_HDR_LEN; 323 key->conf.iv_len = CCMP_HDR_LEN;
293 key->conf.icv_len = CCMP_MIC_LEN; 324 key->conf.icv_len = CCMP_MIC_LEN;
294 break; 325 break;
326 case ALG_AES_CMAC:
327 key->conf.iv_len = 0;
328 key->conf.icv_len = sizeof(struct ieee80211_mmie);
329 break;
295 } 330 }
296 memcpy(key->conf.key, key_data, key_len); 331 memcpy(key->conf.key, key_data, key_len);
297 INIT_LIST_HEAD(&key->list); 332 INIT_LIST_HEAD(&key->list);
@@ -309,6 +344,19 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
309 } 344 }
310 } 345 }
311 346
347 if (alg == ALG_AES_CMAC) {
348 /*
349 * Initialize AES key state here as an optimization so that
350 * it does not need to be initialized for every packet.
351 */
352 key->u.aes_cmac.tfm =
353 ieee80211_aes_cmac_key_setup(key_data);
354 if (!key->u.aes_cmac.tfm) {
355 kfree(key);
356 return NULL;
357 }
358 }
359
312 return key; 360 return key;
313} 361}
314 362
@@ -352,7 +400,7 @@ void ieee80211_key_link(struct ieee80211_key *key,
352 */ 400 */
353 401
354 /* same here, the AP could be using QoS */ 402 /* same here, the AP could be using QoS */
355 ap = sta_info_get(key->local, key->sdata->u.sta.bssid); 403 ap = sta_info_get(key->local, key->sdata->u.mgd.bssid);
356 if (ap) { 404 if (ap) {
357 if (test_sta_flags(ap, WLAN_STA_WME)) 405 if (test_sta_flags(ap, WLAN_STA_WME))
358 key->conf.flags |= 406 key->conf.flags |=
@@ -462,6 +510,8 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
462 510
463 if (key->conf.alg == ALG_CCMP) 511 if (key->conf.alg == ALG_CCMP)
464 ieee80211_aes_key_free(key->u.ccmp.tfm); 512 ieee80211_aes_key_free(key->u.ccmp.tfm);
513 if (key->conf.alg == ALG_AES_CMAC)
514 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
465 ieee80211_debugfs_key_remove(key); 515 ieee80211_debugfs_key_remove(key);
466 516
467 kfree(key); 517 kfree(key);
@@ -484,6 +534,7 @@ static void __ieee80211_key_todo(void)
484 list_del_init(&key->todo); 534 list_del_init(&key->todo);
485 todoflags = key->flags & (KEY_FLAG_TODO_ADD_DEBUGFS | 535 todoflags = key->flags & (KEY_FLAG_TODO_ADD_DEBUGFS |
486 KEY_FLAG_TODO_DEFKEY | 536 KEY_FLAG_TODO_DEFKEY |
537 KEY_FLAG_TODO_DEFMGMTKEY |
487 KEY_FLAG_TODO_HWACCEL_ADD | 538 KEY_FLAG_TODO_HWACCEL_ADD |
488 KEY_FLAG_TODO_HWACCEL_REMOVE | 539 KEY_FLAG_TODO_HWACCEL_REMOVE |
489 KEY_FLAG_TODO_DELETE); 540 KEY_FLAG_TODO_DELETE);
@@ -501,6 +552,11 @@ static void __ieee80211_key_todo(void)
501 ieee80211_debugfs_key_add_default(key->sdata); 552 ieee80211_debugfs_key_add_default(key->sdata);
502 work_done = true; 553 work_done = true;
503 } 554 }
555 if (todoflags & KEY_FLAG_TODO_DEFMGMTKEY) {
556 ieee80211_debugfs_key_remove_mgmt_default(key->sdata);
557 ieee80211_debugfs_key_add_mgmt_default(key->sdata);
558 work_done = true;
559 }
504 if (todoflags & KEY_FLAG_TODO_HWACCEL_ADD) { 560 if (todoflags & KEY_FLAG_TODO_HWACCEL_ADD) {
505 ieee80211_key_enable_hw_accel(key); 561 ieee80211_key_enable_hw_accel(key);
506 work_done = true; 562 work_done = true;
@@ -536,6 +592,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
536 ieee80211_key_lock(); 592 ieee80211_key_lock();
537 593
538 ieee80211_debugfs_key_remove_default(sdata); 594 ieee80211_debugfs_key_remove_default(sdata);
595 ieee80211_debugfs_key_remove_mgmt_default(sdata);
539 596
540 spin_lock_irqsave(&sdata->local->key_lock, flags); 597 spin_lock_irqsave(&sdata->local->key_lock, flags);
541 list_for_each_entry_safe(key, tmp, &sdata->key_list, list) 598 list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 425816e0996c..215d3ef42a4f 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -46,6 +46,8 @@ struct sta_info;
46 * acceleration. 46 * acceleration.
47 * @KEY_FLAG_TODO_DEFKEY: Key is default key and debugfs needs to be updated. 47 * @KEY_FLAG_TODO_DEFKEY: Key is default key and debugfs needs to be updated.
48 * @KEY_FLAG_TODO_ADD_DEBUGFS: Key needs to be added to debugfs. 48 * @KEY_FLAG_TODO_ADD_DEBUGFS: Key needs to be added to debugfs.
49 * @KEY_FLAG_TODO_DEFMGMTKEY: Key is default management key and debugfs needs
50 * to be updated.
49 */ 51 */
50enum ieee80211_internal_key_flags { 52enum ieee80211_internal_key_flags {
51 KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0), 53 KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0),
@@ -54,6 +56,7 @@ enum ieee80211_internal_key_flags {
54 KEY_FLAG_TODO_HWACCEL_REMOVE = BIT(3), 56 KEY_FLAG_TODO_HWACCEL_REMOVE = BIT(3),
55 KEY_FLAG_TODO_DEFKEY = BIT(4), 57 KEY_FLAG_TODO_DEFKEY = BIT(4),
56 KEY_FLAG_TODO_ADD_DEBUGFS = BIT(5), 58 KEY_FLAG_TODO_ADD_DEBUGFS = BIT(5),
59 KEY_FLAG_TODO_DEFMGMTKEY = BIT(6),
57}; 60};
58 61
59struct tkip_ctx { 62struct tkip_ctx {
@@ -96,6 +99,16 @@ struct ieee80211_key {
96 u8 tx_crypto_buf[6 * AES_BLOCK_LEN]; 99 u8 tx_crypto_buf[6 * AES_BLOCK_LEN];
97 u8 rx_crypto_buf[6 * AES_BLOCK_LEN]; 100 u8 rx_crypto_buf[6 * AES_BLOCK_LEN];
98 } ccmp; 101 } ccmp;
102 struct {
103 u8 tx_pn[6];
104 u8 rx_pn[6];
105 struct crypto_cipher *tfm;
106 u32 replays; /* dot11RSNAStatsCMACReplays */
107 u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
108 /* scratch buffers for virt_to_page() (crypto API) */
109 u8 tx_crypto_buf[2 * AES_BLOCK_LEN];
110 u8 rx_crypto_buf[2 * AES_BLOCK_LEN];
111 } aes_cmac;
99 } u; 112 } u;
100 113
101 /* number of times this key has been used */ 114 /* number of times this key has been used */
@@ -114,6 +127,7 @@ struct ieee80211_key {
114 struct dentry *tx_spec; 127 struct dentry *tx_spec;
115 struct dentry *rx_spec; 128 struct dentry *rx_spec;
116 struct dentry *replays; 129 struct dentry *replays;
130 struct dentry *icverrors;
117 struct dentry *key; 131 struct dentry *key;
118 struct dentry *ifindex; 132 struct dentry *ifindex;
119 int cnt; 133 int cnt;
@@ -140,6 +154,8 @@ void ieee80211_key_link(struct ieee80211_key *key,
140 struct sta_info *sta); 154 struct sta_info *sta);
141void ieee80211_key_free(struct ieee80211_key *key); 155void ieee80211_key_free(struct ieee80211_key *key);
142void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); 156void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx);
157void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
158 int idx);
143void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); 159void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata);
144void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); 160void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
145void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata); 161void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 24b14363d6e7..f38db4d37e5d 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -168,24 +168,67 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed)
168 return 0; 168 return 0;
169 169
170 memset(&conf, 0, sizeof(conf)); 170 memset(&conf, 0, sizeof(conf));
171 conf.changed = changed;
172 171
173 if (sdata->vif.type == NL80211_IFTYPE_STATION || 172 if (sdata->vif.type == NL80211_IFTYPE_STATION)
174 sdata->vif.type == NL80211_IFTYPE_ADHOC) 173 conf.bssid = sdata->u.mgd.bssid;
175 conf.bssid = sdata->u.sta.bssid; 174 else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
175 conf.bssid = sdata->u.ibss.bssid;
176 else if (sdata->vif.type == NL80211_IFTYPE_AP) 176 else if (sdata->vif.type == NL80211_IFTYPE_AP)
177 conf.bssid = sdata->dev->dev_addr; 177 conf.bssid = sdata->dev->dev_addr;
178 else if (ieee80211_vif_is_mesh(&sdata->vif)) { 178 else if (ieee80211_vif_is_mesh(&sdata->vif)) {
179 u8 zero[ETH_ALEN] = { 0 }; 179 static const u8 zero[ETH_ALEN] = { 0 };
180 conf.bssid = zero; 180 conf.bssid = zero;
181 } else { 181 } else {
182 WARN_ON(1); 182 WARN_ON(1);
183 return -EINVAL; 183 return -EINVAL;
184 } 184 }
185 185
186 switch (sdata->vif.type) {
187 case NL80211_IFTYPE_AP:
188 case NL80211_IFTYPE_ADHOC:
189 case NL80211_IFTYPE_MESH_POINT:
190 break;
191 default:
192 /* do not warn to simplify caller in scan.c */
193 changed &= ~IEEE80211_IFCC_BEACON_ENABLED;
194 if (WARN_ON(changed & IEEE80211_IFCC_BEACON))
195 return -EINVAL;
196 changed &= ~IEEE80211_IFCC_BEACON;
197 break;
198 }
199
200 if (changed & IEEE80211_IFCC_BEACON_ENABLED) {
201 if (local->sw_scanning) {
202 conf.enable_beacon = false;
203 } else {
204 /*
205 * Beacon should be enabled, but AP mode must
206 * check whether there is a beacon configured.
207 */
208 switch (sdata->vif.type) {
209 case NL80211_IFTYPE_AP:
210 conf.enable_beacon =
211 !!rcu_dereference(sdata->u.ap.beacon);
212 break;
213 case NL80211_IFTYPE_ADHOC:
214 conf.enable_beacon = !!sdata->u.ibss.probe_resp;
215 break;
216 case NL80211_IFTYPE_MESH_POINT:
217 conf.enable_beacon = true;
218 break;
219 default:
220 /* not reached */
221 WARN_ON(1);
222 break;
223 }
224 }
225 }
226
186 if (WARN_ON(!conf.bssid && (changed & IEEE80211_IFCC_BSSID))) 227 if (WARN_ON(!conf.bssid && (changed & IEEE80211_IFCC_BSSID)))
187 return -EINVAL; 228 return -EINVAL;
188 229
230 conf.changed = changed;
231
189 return local->ops->config_interface(local_to_hw(local), 232 return local->ops->config_interface(local_to_hw(local),
190 &sdata->vif, &conf); 233 &sdata->vif, &conf);
191} 234}
@@ -208,26 +251,22 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
208 } 251 }
209 252
210 if (chan != local->hw.conf.channel || 253 if (chan != local->hw.conf.channel ||
211 channel_type != local->hw.conf.ht.channel_type) { 254 channel_type != local->hw.conf.channel_type) {
212 local->hw.conf.channel = chan; 255 local->hw.conf.channel = chan;
213 local->hw.conf.ht.channel_type = channel_type; 256 local->hw.conf.channel_type = channel_type;
214 switch (channel_type) {
215 case NL80211_CHAN_NO_HT:
216 local->hw.conf.ht.enabled = false;
217 break;
218 case NL80211_CHAN_HT20:
219 case NL80211_CHAN_HT40MINUS:
220 case NL80211_CHAN_HT40PLUS:
221 local->hw.conf.ht.enabled = true;
222 break;
223 }
224 changed |= IEEE80211_CONF_CHANGE_CHANNEL; 257 changed |= IEEE80211_CONF_CHANGE_CHANNEL;
225 } 258 }
226 259
227 if (!local->hw.conf.power_level) 260 if (local->sw_scanning)
228 power = chan->max_power; 261 power = chan->max_power;
229 else 262 else
230 power = min(chan->max_power, local->hw.conf.power_level); 263 power = local->power_constr_level ?
264 (chan->max_power - local->power_constr_level) :
265 chan->max_power;
266
267 if (local->user_power_level)
268 power = min(power, local->user_power_level);
269
231 if (local->hw.conf.power_level != power) { 270 if (local->hw.conf.power_level != power) {
232 changed |= IEEE80211_CONF_CHANGE_POWER; 271 changed |= IEEE80211_CONF_CHANGE_POWER;
233 local->hw.conf.power_level = power; 272 local->hw.conf.power_level = power;
@@ -667,7 +706,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
667 const struct ieee80211_ops *ops) 706 const struct ieee80211_ops *ops)
668{ 707{
669 struct ieee80211_local *local; 708 struct ieee80211_local *local;
670 int priv_size; 709 int priv_size, i;
671 struct wiphy *wiphy; 710 struct wiphy *wiphy;
672 711
673 /* Ensure 32-byte alignment of our private data and hw private data. 712 /* Ensure 32-byte alignment of our private data and hw private data.
@@ -695,6 +734,10 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
695 return NULL; 734 return NULL;
696 735
697 wiphy->privid = mac80211_wiphy_privid; 736 wiphy->privid = mac80211_wiphy_privid;
737 wiphy->max_scan_ssids = 4;
738 /* Yes, putting cfg80211_bss into ieee80211_bss is a hack */
739 wiphy->bss_priv_size = sizeof(struct ieee80211_bss) -
740 sizeof(struct cfg80211_bss);
698 741
699 local = wiphy_priv(wiphy); 742 local = wiphy_priv(wiphy);
700 local->hw.wiphy = wiphy; 743 local->hw.wiphy = wiphy;
@@ -722,6 +765,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
722 local->hw.conf.radio_enabled = true; 765 local->hw.conf.radio_enabled = true;
723 766
724 INIT_LIST_HEAD(&local->interfaces); 767 INIT_LIST_HEAD(&local->interfaces);
768 mutex_init(&local->iflist_mtx);
725 769
726 spin_lock_init(&local->key_lock); 770 spin_lock_init(&local->key_lock);
727 771
@@ -736,6 +780,11 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
736 setup_timer(&local->dynamic_ps_timer, 780 setup_timer(&local->dynamic_ps_timer,
737 ieee80211_dynamic_ps_timer, (unsigned long) local); 781 ieee80211_dynamic_ps_timer, (unsigned long) local);
738 782
783 for (i = 0; i < IEEE80211_MAX_AMPDU_QUEUES; i++)
784 local->ampdu_ac_queue[i] = -1;
785 /* using an s8 won't work with more than that */
786 BUILD_BUG_ON(IEEE80211_MAX_AMPDU_QUEUES > 127);
787
739 sta_info_init(local); 788 sta_info_init(local);
740 789
741 tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, 790 tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending,
@@ -754,6 +803,23 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
754} 803}
755EXPORT_SYMBOL(ieee80211_alloc_hw); 804EXPORT_SYMBOL(ieee80211_alloc_hw);
756 805
806static const struct net_device_ops ieee80211_master_ops = {
807 .ndo_start_xmit = ieee80211_master_start_xmit,
808 .ndo_open = ieee80211_master_open,
809 .ndo_stop = ieee80211_master_stop,
810 .ndo_set_multicast_list = ieee80211_master_set_multicast_list,
811 .ndo_select_queue = ieee80211_select_queue,
812};
813
814static void ieee80211_master_setup(struct net_device *mdev)
815{
816 mdev->type = ARPHRD_IEEE80211;
817 mdev->netdev_ops = &ieee80211_master_ops;
818 mdev->header_ops = &ieee80211_header_ops;
819 mdev->tx_queue_len = 1000;
820 mdev->addr_len = ETH_ALEN;
821}
822
757int ieee80211_register_hw(struct ieee80211_hw *hw) 823int ieee80211_register_hw(struct ieee80211_hw *hw)
758{ 824{
759 struct ieee80211_local *local = hw_to_local(hw); 825 struct ieee80211_local *local = hw_to_local(hw);
@@ -761,25 +827,33 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
761 enum ieee80211_band band; 827 enum ieee80211_band band;
762 struct net_device *mdev; 828 struct net_device *mdev;
763 struct ieee80211_master_priv *mpriv; 829 struct ieee80211_master_priv *mpriv;
830 int channels, i, j;
764 831
765 /* 832 /*
766 * generic code guarantees at least one band, 833 * generic code guarantees at least one band,
767 * set this very early because much code assumes 834 * set this very early because much code assumes
768 * that hw.conf.channel is assigned 835 * that hw.conf.channel is assigned
769 */ 836 */
837 channels = 0;
770 for (band = 0; band < IEEE80211_NUM_BANDS; band++) { 838 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
771 struct ieee80211_supported_band *sband; 839 struct ieee80211_supported_band *sband;
772 840
773 sband = local->hw.wiphy->bands[band]; 841 sband = local->hw.wiphy->bands[band];
774 if (sband) { 842 if (sband && !local->oper_channel) {
775 /* init channel we're on */ 843 /* init channel we're on */
776 local->hw.conf.channel = 844 local->hw.conf.channel =
777 local->oper_channel = 845 local->oper_channel =
778 local->scan_channel = &sband->channels[0]; 846 local->scan_channel = &sband->channels[0];
779 break;
780 } 847 }
848 if (sband)
849 channels += sband->n_channels;
781 } 850 }
782 851
852 local->int_scan_req.n_channels = channels;
853 local->int_scan_req.channels = kzalloc(sizeof(void *) * channels, GFP_KERNEL);
854 if (!local->int_scan_req.channels)
855 return -ENOMEM;
856
783 /* if low-level driver supports AP, we also support VLAN */ 857 /* if low-level driver supports AP, we also support VLAN */
784 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) 858 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP))
785 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN); 859 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN);
@@ -787,9 +861,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
787 /* mac80211 always supports monitor */ 861 /* mac80211 always supports monitor */
788 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); 862 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
789 863
864 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
865 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
866 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
867 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
868
790 result = wiphy_register(local->hw.wiphy); 869 result = wiphy_register(local->hw.wiphy);
791 if (result < 0) 870 if (result < 0)
792 return result; 871 goto fail_wiphy_register;
793 872
794 /* 873 /*
795 * We use the number of queues for feature tests (QoS, HT) internally 874 * We use the number of queues for feature tests (QoS, HT) internally
@@ -803,8 +882,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
803 hw->ampdu_queues = 0; 882 hw->ampdu_queues = 0;
804 883
805 mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv), 884 mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv),
806 "wmaster%d", ether_setup, 885 "wmaster%d", ieee80211_master_setup,
807 ieee80211_num_queues(hw)); 886 hw->queues);
808 if (!mdev) 887 if (!mdev)
809 goto fail_mdev_alloc; 888 goto fail_mdev_alloc;
810 889
@@ -812,17 +891,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
812 mpriv->local = local; 891 mpriv->local = local;
813 local->mdev = mdev; 892 local->mdev = mdev;
814 893
815 ieee80211_rx_bss_list_init(local);
816
817 mdev->hard_start_xmit = ieee80211_master_start_xmit;
818 mdev->open = ieee80211_master_open;
819 mdev->stop = ieee80211_master_stop;
820 mdev->type = ARPHRD_IEEE80211;
821 mdev->header_ops = &ieee80211_header_ops;
822 mdev->set_multicast_list = ieee80211_master_set_multicast_list;
823
824 local->hw.workqueue = 894 local->hw.workqueue =
825 create_freezeable_workqueue(wiphy_name(local->hw.wiphy)); 895 create_singlethread_workqueue(wiphy_name(local->hw.wiphy));
826 if (!local->hw.workqueue) { 896 if (!local->hw.workqueue) {
827 result = -ENOMEM; 897 result = -ENOMEM;
828 goto fail_workqueue; 898 goto fail_workqueue;
@@ -846,15 +916,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
846 916
847 local->hw.conf.listen_interval = local->hw.max_listen_interval; 917 local->hw.conf.listen_interval = local->hw.max_listen_interval;
848 918
849 local->wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC |
850 IEEE80211_HW_SIGNAL_DB |
851 IEEE80211_HW_SIGNAL_DBM) ?
852 IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID;
853 local->wstats_flags |= local->hw.flags & IEEE80211_HW_NOISE_DBM ?
854 IW_QUAL_NOISE_UPDATED : IW_QUAL_NOISE_INVALID;
855 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
856 local->wstats_flags |= IW_QUAL_DBM;
857
858 result = sta_info_start(local); 919 result = sta_info_start(local);
859 if (result < 0) 920 if (result < 0)
860 goto fail_sta_info; 921 goto fail_sta_info;
@@ -866,6 +927,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
866 927
867 memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); 928 memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
868 SET_NETDEV_DEV(local->mdev, wiphy_dev(local->hw.wiphy)); 929 SET_NETDEV_DEV(local->mdev, wiphy_dev(local->hw.wiphy));
930 local->mdev->features |= NETIF_F_NETNS_LOCAL;
869 931
870 result = register_netdevice(local->mdev); 932 result = register_netdevice(local->mdev);
871 if (result < 0) 933 if (result < 0)
@@ -887,8 +949,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
887 goto fail_wep; 949 goto fail_wep;
888 } 950 }
889 951
890 local->mdev->select_queue = ieee80211_select_queue;
891
892 /* add one default STA interface if supported */ 952 /* add one default STA interface if supported */
893 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION)) { 953 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION)) {
894 result = ieee80211_if_add(local, "wlan%d", NULL, 954 result = ieee80211_if_add(local, "wlan%d", NULL,
@@ -902,6 +962,20 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
902 962
903 ieee80211_led_init(local); 963 ieee80211_led_init(local);
904 964
965 /* alloc internal scan request */
966 i = 0;
967 local->int_scan_req.ssids = &local->scan_ssid;
968 local->int_scan_req.n_ssids = 1;
969 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
970 if (!hw->wiphy->bands[band])
971 continue;
972 for (j = 0; j < hw->wiphy->bands[band]->n_channels; j++) {
973 local->int_scan_req.channels[i] =
974 &hw->wiphy->bands[band]->channels[j];
975 i++;
976 }
977 }
978
905 return 0; 979 return 0;
906 980
907fail_wep: 981fail_wep:
@@ -920,6 +994,8 @@ fail_workqueue:
920 free_netdev(local->mdev); 994 free_netdev(local->mdev);
921fail_mdev_alloc: 995fail_mdev_alloc:
922 wiphy_unregister(local->hw.wiphy); 996 wiphy_unregister(local->hw.wiphy);
997fail_wiphy_register:
998 kfree(local->int_scan_req.channels);
923 return result; 999 return result;
924} 1000}
925EXPORT_SYMBOL(ieee80211_register_hw); 1001EXPORT_SYMBOL(ieee80211_register_hw);
@@ -947,7 +1023,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
947 1023
948 rtnl_unlock(); 1024 rtnl_unlock();
949 1025
950 ieee80211_rx_bss_list_deinit(local);
951 ieee80211_clear_tx_pending(local); 1026 ieee80211_clear_tx_pending(local);
952 sta_info_stop(local); 1027 sta_info_stop(local);
953 rate_control_deinitialize(local); 1028 rate_control_deinitialize(local);
@@ -965,6 +1040,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
965 ieee80211_wep_free(local); 1040 ieee80211_wep_free(local);
966 ieee80211_led_exit(local); 1041 ieee80211_led_exit(local);
967 free_netdev(local->mdev); 1042 free_netdev(local->mdev);
1043 kfree(local->int_scan_req.channels);
968} 1044}
969EXPORT_SYMBOL(ieee80211_unregister_hw); 1045EXPORT_SYMBOL(ieee80211_unregister_hw);
970 1046
@@ -972,6 +1048,8 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
972{ 1048{
973 struct ieee80211_local *local = hw_to_local(hw); 1049 struct ieee80211_local *local = hw_to_local(hw);
974 1050
1051 mutex_destroy(&local->iflist_mtx);
1052
975 wiphy_free(local->hw.wiphy); 1053 wiphy_free(local->hw.wiphy);
976} 1054}
977EXPORT_SYMBOL(ieee80211_free_hw); 1055EXPORT_SYMBOL(ieee80211_free_hw);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 82f568e94365..9a3e5de0410a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -275,16 +275,6 @@ u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_t
275 & tbl->hash_mask; 275 & tbl->hash_mask;
276} 276}
277 277
278u8 mesh_id_hash(u8 *mesh_id, int mesh_id_len)
279{
280 if (!mesh_id_len)
281 return 1;
282 else if (mesh_id_len == 1)
283 return (u8) mesh_id[0];
284 else
285 return (u8) (mesh_id[0] + 2 * mesh_id[1]);
286}
287
288struct mesh_table *mesh_table_alloc(int size_order) 278struct mesh_table *mesh_table_alloc(int size_order)
289{ 279{
290 int i; 280 int i;
@@ -442,7 +432,8 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
442 432
443 ifmsh->housekeeping = true; 433 ifmsh->housekeeping = true;
444 queue_work(local->hw.workqueue, &ifmsh->work); 434 queue_work(local->hw.workqueue, &ifmsh->work);
445 ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); 435 ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON |
436 IEEE80211_IFCC_BEACON_ENABLED);
446} 437}
447 438
448void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) 439void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
@@ -476,7 +467,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
476 struct ieee80211_local *local = sdata->local; 467 struct ieee80211_local *local = sdata->local;
477 struct ieee802_11_elems elems; 468 struct ieee802_11_elems elems;
478 struct ieee80211_channel *channel; 469 struct ieee80211_channel *channel;
479 u64 supp_rates = 0; 470 u32 supp_rates = 0;
480 size_t baselen; 471 size_t baselen;
481 int freq; 472 int freq;
482 enum ieee80211_band band = rx_status->band; 473 enum ieee80211_band band = rx_status->band;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index c197ab545e54..d891d7ddccd7 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -24,15 +24,15 @@
24 * 24 *
25 * 25 *
26 * 26 *
27 * @MESH_PATH_ACTIVE: the mesh path is can be used for forwarding 27 * @MESH_PATH_ACTIVE: the mesh path can be used for forwarding
28 * @MESH_PATH_RESOLVED: the discovery process is running for this mesh path 28 * @MESH_PATH_RESOLVING: the discovery process is running for this mesh path
29 * @MESH_PATH_DSN_VALID: the mesh path contains a valid destination sequence 29 * @MESH_PATH_DSN_VALID: the mesh path contains a valid destination sequence
30 * number 30 * number
31 * @MESH_PATH_FIXED: the mesh path has been manually set and should not be 31 * @MESH_PATH_FIXED: the mesh path has been manually set and should not be
32 * modified 32 * modified
33 * @MESH_PATH_RESOLVED: the mesh path can has been resolved 33 * @MESH_PATH_RESOLVED: the mesh path can has been resolved
34 * 34 *
35 * MESH_PATH_RESOLVED and MESH_PATH_DELETE are used by the mesh path timer to 35 * MESH_PATH_RESOLVED is used by the mesh path timer to
36 * decide when to stop or cancel the mesh path discovery. 36 * decide when to stop or cancel the mesh path discovery.
37 */ 37 */
38enum mesh_path_flags { 38enum mesh_path_flags {
@@ -196,7 +196,6 @@ struct mesh_rmc {
196 196
197/* Public interfaces */ 197/* Public interfaces */
198/* Various */ 198/* Various */
199u8 mesh_id_hash(u8 *mesh_id, int mesh_id_len);
200int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); 199int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
201int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, 200int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
202 struct ieee80211_sub_if_data *sdata); 201 struct ieee80211_sub_if_data *sdata);
@@ -236,14 +235,13 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
236 struct ieee80211_mgmt *mgmt, size_t len); 235 struct ieee80211_mgmt *mgmt, size_t len);
237int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata); 236int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata);
238/* Mesh plinks */ 237/* Mesh plinks */
239void mesh_neighbour_update(u8 *hw_addr, u64 rates, 238void mesh_neighbour_update(u8 *hw_addr, u32 rates,
240 struct ieee80211_sub_if_data *sdata, bool add); 239 struct ieee80211_sub_if_data *sdata, bool add);
241bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); 240bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
242void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); 241void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
243void mesh_plink_broken(struct sta_info *sta); 242void mesh_plink_broken(struct sta_info *sta);
244void mesh_plink_deactivate(struct sta_info *sta); 243void mesh_plink_deactivate(struct sta_info *sta);
245int mesh_plink_open(struct sta_info *sta); 244int mesh_plink_open(struct sta_info *sta);
246int mesh_plink_close(struct sta_info *sta);
247void mesh_plink_block(struct sta_info *sta); 245void mesh_plink_block(struct sta_info *sta);
248void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, 246void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
249 struct ieee80211_mgmt *mgmt, size_t len, 247 struct ieee80211_mgmt *mgmt, size_t len,
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 71fe60961230..60b35accda91 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -58,7 +58,6 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae)
58#define PERR_IE_DST_ADDR(x) (x + 2) 58#define PERR_IE_DST_ADDR(x) (x + 2)
59#define PERR_IE_DST_DSN(x) u32_field_get(x, 8, 0); 59#define PERR_IE_DST_DSN(x) u32_field_get(x, 8, 0);
60 60
61#define TU_TO_EXP_TIME(x) (jiffies + msecs_to_jiffies(x * 1024 / 1000))
62#define MSEC_TO_TU(x) (x*1000/1024) 61#define MSEC_TO_TU(x) (x*1000/1024)
63#define DSN_GT(x, y) ((long) (y) - (long) (x) < 0) 62#define DSN_GT(x, y) ((long) (y) - (long) (x) < 0)
64#define DSN_LT(x, y) ((long) (x) - (long) (y) < 0) 63#define DSN_LT(x, y) ((long) (x) - (long) (y) < 0)
@@ -149,7 +148,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
149 pos += ETH_ALEN; 148 pos += ETH_ALEN;
150 memcpy(pos, &dst_dsn, 4); 149 memcpy(pos, &dst_dsn, 4);
151 150
152 ieee80211_tx_skb(sdata, skb, 0); 151 ieee80211_tx_skb(sdata, skb, 1);
153 return 0; 152 return 0;
154} 153}
155 154
@@ -198,7 +197,7 @@ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra,
198 pos += ETH_ALEN; 197 pos += ETH_ALEN;
199 memcpy(pos, &dst_dsn, 4); 198 memcpy(pos, &dst_dsn, 4);
200 199
201 ieee80211_tx_skb(sdata, skb, 0); 200 ieee80211_tx_skb(sdata, skb, 1);
202 return 0; 201 return 0;
203} 202}
204 203
@@ -759,7 +758,7 @@ enddiscovery:
759} 758}
760 759
761/** 760/**
762 * ieee80211s_lookup_nexthop - put the appropriate next hop on a mesh frame 761 * mesh_nexthop_lookup - put the appropriate next hop on a mesh frame
763 * 762 *
764 * @skb: 802.11 frame to be sent 763 * @skb: 802.11 frame to be sent
765 * @sdata: network subif the frame will be sent through 764 * @sdata: network subif the frame will be sent through
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 1159bdb4119c..a8bbdeca013a 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -93,7 +93,7 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta)
93 * on it in the lifecycle management section! 93 * on it in the lifecycle management section!
94 */ 94 */
95static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, 95static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata,
96 u8 *hw_addr, u64 rates) 96 u8 *hw_addr, u32 rates)
97{ 97{
98 struct ieee80211_local *local = sdata->local; 98 struct ieee80211_local *local = sdata->local;
99 struct sta_info *sta; 99 struct sta_info *sta;
@@ -218,11 +218,11 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
218 memcpy(pos, &reason, 2); 218 memcpy(pos, &reason, 2);
219 } 219 }
220 220
221 ieee80211_tx_skb(sdata, skb, 0); 221 ieee80211_tx_skb(sdata, skb, 1);
222 return 0; 222 return 0;
223} 223}
224 224
225void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct ieee80211_sub_if_data *sdata, 225void mesh_neighbour_update(u8 *hw_addr, u32 rates, struct ieee80211_sub_if_data *sdata,
226 bool peer_accepting_plinks) 226 bool peer_accepting_plinks)
227{ 227{
228 struct ieee80211_local *local = sdata->local; 228 struct ieee80211_local *local = sdata->local;
@@ -361,36 +361,6 @@ void mesh_plink_block(struct sta_info *sta)
361 spin_unlock_bh(&sta->lock); 361 spin_unlock_bh(&sta->lock);
362} 362}
363 363
364int mesh_plink_close(struct sta_info *sta)
365{
366 struct ieee80211_sub_if_data *sdata = sta->sdata;
367 __le16 llid, plid, reason;
368
369 mpl_dbg("Mesh plink: closing link with %pM\n", sta->sta.addr);
370 spin_lock_bh(&sta->lock);
371 sta->reason = cpu_to_le16(MESH_LINK_CANCELLED);
372 reason = sta->reason;
373
374 if (sta->plink_state == PLINK_LISTEN ||
375 sta->plink_state == PLINK_BLOCKED) {
376 mesh_plink_fsm_restart(sta);
377 spin_unlock_bh(&sta->lock);
378 return 0;
379 } else if (sta->plink_state == PLINK_ESTAB) {
380 __mesh_plink_deactivate(sta);
381 /* The timer should not be running */
382 mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
383 } else if (!mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)))
384 sta->ignore_plink_timer = true;
385
386 sta->plink_state = PLINK_HOLDING;
387 llid = sta->llid;
388 plid = sta->plid;
389 spin_unlock_bh(&sta->lock);
390 mesh_plink_frame_tx(sta->sdata, PLINK_CLOSE, sta->sta.addr, llid,
391 plid, reason);
392 return 0;
393}
394 364
395void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, 365void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt,
396 size_t len, struct ieee80211_rx_status *rx_status) 366 size_t len, struct ieee80211_rx_status *rx_status)
@@ -477,7 +447,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
477 spin_lock_bh(&sta->lock); 447 spin_lock_bh(&sta->lock);
478 } else if (!sta) { 448 } else if (!sta) {
479 /* ftype == PLINK_OPEN */ 449 /* ftype == PLINK_OPEN */
480 u64 rates; 450 u32 rates;
481 if (!mesh_plink_free_count(sdata)) { 451 if (!mesh_plink_free_count(sdata)) {
482 mpl_dbg("Mesh plink error: no more free plinks\n"); 452 mpl_dbg("Mesh plink error: no more free plinks\n");
483 rcu_read_unlock(); 453 rcu_read_unlock();
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2b890af01ba4..7f238589b6ff 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * BSS client mode implementation 2 * BSS client mode implementation
3 * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> 3 * Copyright 2003-2008, Jouni Malinen <j@w1.fi>
4 * Copyright 2004, Instant802 Networks, Inc. 4 * Copyright 2004, Instant802 Networks, Inc.
5 * Copyright 2005, Devicescape Software, Inc. 5 * Copyright 2005, Devicescape Software, Inc.
6 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 6 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
@@ -15,11 +15,8 @@
15#include <linux/if_ether.h> 15#include <linux/if_ether.h>
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17#include <linux/if_arp.h> 17#include <linux/if_arp.h>
18#include <linux/wireless.h>
19#include <linux/random.h>
20#include <linux/etherdevice.h> 18#include <linux/etherdevice.h>
21#include <linux/rtnetlink.h> 19#include <linux/rtnetlink.h>
22#include <net/iw_handler.h>
23#include <net/mac80211.h> 20#include <net/mac80211.h>
24#include <asm/unaligned.h> 21#include <asm/unaligned.h>
25 22
@@ -35,15 +32,6 @@
35#define IEEE80211_MONITORING_INTERVAL (2 * HZ) 32#define IEEE80211_MONITORING_INTERVAL (2 * HZ)
36#define IEEE80211_PROBE_INTERVAL (60 * HZ) 33#define IEEE80211_PROBE_INTERVAL (60 * HZ)
37#define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ) 34#define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ)
38#define IEEE80211_SCAN_INTERVAL (2 * HZ)
39#define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ)
40#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ)
41
42#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ)
43#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ)
44
45#define IEEE80211_IBSS_MAX_STA_ENTRIES 128
46
47 35
48/* utils */ 36/* utils */
49static int ecw2cw(int ecw) 37static int ecw2cw(int ecw)
@@ -55,10 +43,10 @@ static u8 *ieee80211_bss_get_ie(struct ieee80211_bss *bss, u8 ie)
55{ 43{
56 u8 *end, *pos; 44 u8 *end, *pos;
57 45
58 pos = bss->ies; 46 pos = bss->cbss.information_elements;
59 if (pos == NULL) 47 if (pos == NULL)
60 return NULL; 48 return NULL;
61 end = pos + bss->ies_len; 49 end = pos + bss->cbss.len_information_elements;
62 50
63 while (pos + 1 < end) { 51 while (pos + 1 < end) {
64 if (pos + 2 + pos[1] > end) 52 if (pos + 2 + pos[1] > end)
@@ -73,7 +61,7 @@ static u8 *ieee80211_bss_get_ie(struct ieee80211_bss *bss, u8 ie)
73 61
74static int ieee80211_compatible_rates(struct ieee80211_bss *bss, 62static int ieee80211_compatible_rates(struct ieee80211_bss *bss,
75 struct ieee80211_supported_band *sband, 63 struct ieee80211_supported_band *sband,
76 u64 *rates) 64 u32 *rates)
77{ 65{
78 int i, j, count; 66 int i, j, count;
79 *rates = 0; 67 *rates = 0;
@@ -92,160 +80,40 @@ static int ieee80211_compatible_rates(struct ieee80211_bss *bss,
92 return count; 80 return count;
93} 81}
94 82
95/* also used by mesh code */
96u64 ieee80211_sta_get_rates(struct ieee80211_local *local,
97 struct ieee802_11_elems *elems,
98 enum ieee80211_band band)
99{
100 struct ieee80211_supported_band *sband;
101 struct ieee80211_rate *bitrates;
102 size_t num_rates;
103 u64 supp_rates;
104 int i, j;
105 sband = local->hw.wiphy->bands[band];
106
107 if (!sband) {
108 WARN_ON(1);
109 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
110 }
111
112 bitrates = sband->bitrates;
113 num_rates = sband->n_bitrates;
114 supp_rates = 0;
115 for (i = 0; i < elems->supp_rates_len +
116 elems->ext_supp_rates_len; i++) {
117 u8 rate = 0;
118 int own_rate;
119 if (i < elems->supp_rates_len)
120 rate = elems->supp_rates[i];
121 else if (elems->ext_supp_rates)
122 rate = elems->ext_supp_rates
123 [i - elems->supp_rates_len];
124 own_rate = 5 * (rate & 0x7f);
125 for (j = 0; j < num_rates; j++)
126 if (bitrates[j].bitrate == own_rate)
127 supp_rates |= BIT(j);
128 }
129 return supp_rates;
130}
131
132/* frame sending functions */ 83/* frame sending functions */
133 84
134/* also used by scanning code */ 85static void add_extra_ies(struct sk_buff *skb, u8 *ies, size_t ies_len)
135void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
136 u8 *ssid, size_t ssid_len)
137{ 86{
138 struct ieee80211_local *local = sdata->local; 87 if (ies)
139 struct ieee80211_supported_band *sband; 88 memcpy(skb_put(skb, ies_len), ies, ies_len);
140 struct sk_buff *skb;
141 struct ieee80211_mgmt *mgmt;
142 u8 *pos, *supp_rates, *esupp_rates = NULL;
143 int i;
144
145 skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200);
146 if (!skb) {
147 printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
148 "request\n", sdata->dev->name);
149 return;
150 }
151 skb_reserve(skb, local->hw.extra_tx_headroom);
152
153 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
154 memset(mgmt, 0, 24);
155 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
156 IEEE80211_STYPE_PROBE_REQ);
157 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
158 if (dst) {
159 memcpy(mgmt->da, dst, ETH_ALEN);
160 memcpy(mgmt->bssid, dst, ETH_ALEN);
161 } else {
162 memset(mgmt->da, 0xff, ETH_ALEN);
163 memset(mgmt->bssid, 0xff, ETH_ALEN);
164 }
165 pos = skb_put(skb, 2 + ssid_len);
166 *pos++ = WLAN_EID_SSID;
167 *pos++ = ssid_len;
168 memcpy(pos, ssid, ssid_len);
169
170 supp_rates = skb_put(skb, 2);
171 supp_rates[0] = WLAN_EID_SUPP_RATES;
172 supp_rates[1] = 0;
173 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
174
175 for (i = 0; i < sband->n_bitrates; i++) {
176 struct ieee80211_rate *rate = &sband->bitrates[i];
177 if (esupp_rates) {
178 pos = skb_put(skb, 1);
179 esupp_rates[1]++;
180 } else if (supp_rates[1] == 8) {
181 esupp_rates = skb_put(skb, 3);
182 esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES;
183 esupp_rates[1] = 1;
184 pos = &esupp_rates[2];
185 } else {
186 pos = skb_put(skb, 1);
187 supp_rates[1]++;
188 }
189 *pos = rate->bitrate / 5;
190 }
191
192 ieee80211_tx_skb(sdata, skb, 0);
193} 89}
194 90
195static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, 91static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
196 struct ieee80211_if_sta *ifsta,
197 int transaction, u8 *extra, size_t extra_len,
198 int encrypt)
199{ 92{
93 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
200 struct ieee80211_local *local = sdata->local; 94 struct ieee80211_local *local = sdata->local;
201 struct sk_buff *skb; 95 struct sk_buff *skb;
202 struct ieee80211_mgmt *mgmt; 96 struct ieee80211_mgmt *mgmt;
203 97 u8 *pos, *ies, *ht_ie, *e_ies;
204 skb = dev_alloc_skb(local->hw.extra_tx_headroom +
205 sizeof(*mgmt) + 6 + extra_len);
206 if (!skb) {
207 printk(KERN_DEBUG "%s: failed to allocate buffer for auth "
208 "frame\n", sdata->dev->name);
209 return;
210 }
211 skb_reserve(skb, local->hw.extra_tx_headroom);
212
213 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6);
214 memset(mgmt, 0, 24 + 6);
215 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
216 IEEE80211_STYPE_AUTH);
217 if (encrypt)
218 mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
219 memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
220 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
221 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
222 mgmt->u.auth.auth_alg = cpu_to_le16(ifsta->auth_alg);
223 mgmt->u.auth.auth_transaction = cpu_to_le16(transaction);
224 ifsta->auth_transaction = transaction + 1;
225 mgmt->u.auth.status_code = cpu_to_le16(0);
226 if (extra)
227 memcpy(skb_put(skb, extra_len), extra, extra_len);
228
229 ieee80211_tx_skb(sdata, skb, encrypt);
230}
231
232static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
233 struct ieee80211_if_sta *ifsta)
234{
235 struct ieee80211_local *local = sdata->local;
236 struct sk_buff *skb;
237 struct ieee80211_mgmt *mgmt;
238 u8 *pos, *ies, *ht_ie;
239 int i, len, count, rates_len, supp_rates_len; 98 int i, len, count, rates_len, supp_rates_len;
240 u16 capab; 99 u16 capab;
241 struct ieee80211_bss *bss; 100 struct ieee80211_bss *bss;
242 int wmm = 0; 101 int wmm = 0;
243 struct ieee80211_supported_band *sband; 102 struct ieee80211_supported_band *sband;
244 u64 rates = 0; 103 u32 rates = 0;
104 size_t e_ies_len;
105
106 if (ifmgd->flags & IEEE80211_IBSS_PREV_BSSID_SET) {
107 e_ies = sdata->u.mgd.ie_reassocreq;
108 e_ies_len = sdata->u.mgd.ie_reassocreq_len;
109 } else {
110 e_ies = sdata->u.mgd.ie_assocreq;
111 e_ies_len = sdata->u.mgd.ie_assocreq_len;
112 }
245 113
246 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 114 skb = dev_alloc_skb(local->hw.extra_tx_headroom +
247 sizeof(*mgmt) + 200 + ifsta->extra_ie_len + 115 sizeof(*mgmt) + 200 + ifmgd->extra_ie_len +
248 ifsta->ssid_len); 116 ifmgd->ssid_len + e_ies_len);
249 if (!skb) { 117 if (!skb) {
250 printk(KERN_DEBUG "%s: failed to allocate buffer for assoc " 118 printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
251 "frame\n", sdata->dev->name); 119 "frame\n", sdata->dev->name);
@@ -255,7 +123,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
255 123
256 sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; 124 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
257 125
258 capab = ifsta->capab; 126 capab = ifmgd->capab;
259 127
260 if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) { 128 if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) {
261 if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) 129 if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
@@ -264,11 +132,11 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
264 capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; 132 capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
265 } 133 }
266 134
267 bss = ieee80211_rx_bss_get(local, ifsta->bssid, 135 bss = ieee80211_rx_bss_get(local, ifmgd->bssid,
268 local->hw.conf.channel->center_freq, 136 local->hw.conf.channel->center_freq,
269 ifsta->ssid, ifsta->ssid_len); 137 ifmgd->ssid, ifmgd->ssid_len);
270 if (bss) { 138 if (bss) {
271 if (bss->capability & WLAN_CAPABILITY_PRIVACY) 139 if (bss->cbss.capability & WLAN_CAPABILITY_PRIVACY)
272 capab |= WLAN_CAPABILITY_PRIVACY; 140 capab |= WLAN_CAPABILITY_PRIVACY;
273 if (bss->wmm_used) 141 if (bss->wmm_used)
274 wmm = 1; 142 wmm = 1;
@@ -279,7 +147,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
279 * b-only mode) */ 147 * b-only mode) */
280 rates_len = ieee80211_compatible_rates(bss, sband, &rates); 148 rates_len = ieee80211_compatible_rates(bss, sband, &rates);
281 149
282 if ((bss->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) && 150 if ((bss->cbss.capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
283 (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT)) 151 (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
284 capab |= WLAN_CAPABILITY_SPECTRUM_MGMT; 152 capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
285 153
@@ -291,18 +159,18 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
291 159
292 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); 160 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
293 memset(mgmt, 0, 24); 161 memset(mgmt, 0, 24);
294 memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); 162 memcpy(mgmt->da, ifmgd->bssid, ETH_ALEN);
295 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); 163 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
296 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); 164 memcpy(mgmt->bssid, ifmgd->bssid, ETH_ALEN);
297 165
298 if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { 166 if (ifmgd->flags & IEEE80211_STA_PREV_BSSID_SET) {
299 skb_put(skb, 10); 167 skb_put(skb, 10);
300 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | 168 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
301 IEEE80211_STYPE_REASSOC_REQ); 169 IEEE80211_STYPE_REASSOC_REQ);
302 mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab); 170 mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab);
303 mgmt->u.reassoc_req.listen_interval = 171 mgmt->u.reassoc_req.listen_interval =
304 cpu_to_le16(local->hw.conf.listen_interval); 172 cpu_to_le16(local->hw.conf.listen_interval);
305 memcpy(mgmt->u.reassoc_req.current_ap, ifsta->prev_bssid, 173 memcpy(mgmt->u.reassoc_req.current_ap, ifmgd->prev_bssid,
306 ETH_ALEN); 174 ETH_ALEN);
307 } else { 175 } else {
308 skb_put(skb, 4); 176 skb_put(skb, 4);
@@ -314,10 +182,10 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
314 } 182 }
315 183
316 /* SSID */ 184 /* SSID */
317 ies = pos = skb_put(skb, 2 + ifsta->ssid_len); 185 ies = pos = skb_put(skb, 2 + ifmgd->ssid_len);
318 *pos++ = WLAN_EID_SSID; 186 *pos++ = WLAN_EID_SSID;
319 *pos++ = ifsta->ssid_len; 187 *pos++ = ifmgd->ssid_len;
320 memcpy(pos, ifsta->ssid, ifsta->ssid_len); 188 memcpy(pos, ifmgd->ssid, ifmgd->ssid_len);
321 189
322 /* add all rates which were marked to be used above */ 190 /* add all rates which were marked to be used above */
323 supp_rates_len = rates_len; 191 supp_rates_len = rates_len;
@@ -372,12 +240,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
372 } 240 }
373 } 241 }
374 242
375 if (ifsta->extra_ie) { 243 if (ifmgd->extra_ie) {
376 pos = skb_put(skb, ifsta->extra_ie_len); 244 pos = skb_put(skb, ifmgd->extra_ie_len);
377 memcpy(pos, ifsta->extra_ie, ifsta->extra_ie_len); 245 memcpy(pos, ifmgd->extra_ie, ifmgd->extra_ie_len);
378 } 246 }
379 247
380 if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { 248 if (wmm && (ifmgd->flags & IEEE80211_STA_WMM_ENABLED)) {
381 pos = skb_put(skb, 9); 249 pos = skb_put(skb, 9);
382 *pos++ = WLAN_EID_VENDOR_SPECIFIC; 250 *pos++ = WLAN_EID_VENDOR_SPECIFIC;
383 *pos++ = 7; /* len */ 251 *pos++ = 7; /* len */
@@ -391,10 +259,17 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
391 } 259 }
392 260
393 /* wmm support is a must to HT */ 261 /* wmm support is a must to HT */
394 if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) && 262 /*
263 * IEEE802.11n does not allow TKIP/WEP as pairwise
264 * ciphers in HT mode. We still associate in non-ht
265 * mode (11a/b/g) if any one of these ciphers is
266 * configured as pairwise.
267 */
268 if (wmm && (ifmgd->flags & IEEE80211_STA_WMM_ENABLED) &&
395 sband->ht_cap.ht_supported && 269 sband->ht_cap.ht_supported &&
396 (ht_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_INFORMATION)) && 270 (ht_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_INFORMATION)) &&
397 ht_ie[1] >= sizeof(struct ieee80211_ht_info)) { 271 ht_ie[1] >= sizeof(struct ieee80211_ht_info) &&
272 (!(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED))) {
398 struct ieee80211_ht_info *ht_info = 273 struct ieee80211_ht_info *ht_info =
399 (struct ieee80211_ht_info *)(ht_ie + 2); 274 (struct ieee80211_ht_info *)(ht_ie + 2);
400 u16 cap = sband->ht_cap.cap; 275 u16 cap = sband->ht_cap.cap;
@@ -429,11 +304,13 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
429 memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs)); 304 memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
430 } 305 }
431 306
432 kfree(ifsta->assocreq_ies); 307 add_extra_ies(skb, e_ies, e_ies_len);
433 ifsta->assocreq_ies_len = (skb->data + skb->len) - ies; 308
434 ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_KERNEL); 309 kfree(ifmgd->assocreq_ies);
435 if (ifsta->assocreq_ies) 310 ifmgd->assocreq_ies_len = (skb->data + skb->len) - ies;
436 memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len); 311 ifmgd->assocreq_ies = kmalloc(ifmgd->assocreq_ies_len, GFP_KERNEL);
312 if (ifmgd->assocreq_ies)
313 memcpy(ifmgd->assocreq_ies, ies, ifmgd->assocreq_ies_len);
437 314
438 ieee80211_tx_skb(sdata, skb, 0); 315 ieee80211_tx_skb(sdata, skb, 0);
439} 316}
@@ -443,11 +320,22 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
443 u16 stype, u16 reason) 320 u16 stype, u16 reason)
444{ 321{
445 struct ieee80211_local *local = sdata->local; 322 struct ieee80211_local *local = sdata->local;
446 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 323 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
447 struct sk_buff *skb; 324 struct sk_buff *skb;
448 struct ieee80211_mgmt *mgmt; 325 struct ieee80211_mgmt *mgmt;
326 u8 *ies;
327 size_t ies_len;
328
329 if (stype == IEEE80211_STYPE_DEAUTH) {
330 ies = sdata->u.mgd.ie_deauth;
331 ies_len = sdata->u.mgd.ie_deauth_len;
332 } else {
333 ies = sdata->u.mgd.ie_disassoc;
334 ies_len = sdata->u.mgd.ie_disassoc_len;
335 }
449 336
450 skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt)); 337 skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) +
338 ies_len);
451 if (!skb) { 339 if (!skb) {
452 printk(KERN_DEBUG "%s: failed to allocate buffer for " 340 printk(KERN_DEBUG "%s: failed to allocate buffer for "
453 "deauth/disassoc frame\n", sdata->dev->name); 341 "deauth/disassoc frame\n", sdata->dev->name);
@@ -457,40 +345,53 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
457 345
458 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); 346 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
459 memset(mgmt, 0, 24); 347 memset(mgmt, 0, 24);
460 memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); 348 memcpy(mgmt->da, ifmgd->bssid, ETH_ALEN);
461 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); 349 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
462 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); 350 memcpy(mgmt->bssid, ifmgd->bssid, ETH_ALEN);
463 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype); 351 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype);
464 skb_put(skb, 2); 352 skb_put(skb, 2);
465 /* u.deauth.reason_code == u.disassoc.reason_code */ 353 /* u.deauth.reason_code == u.disassoc.reason_code */
466 mgmt->u.deauth.reason_code = cpu_to_le16(reason); 354 mgmt->u.deauth.reason_code = cpu_to_le16(reason);
467 355
468 ieee80211_tx_skb(sdata, skb, 0); 356 add_extra_ies(skb, ies, ies_len);
357
358 ieee80211_tx_skb(sdata, skb, ifmgd->flags & IEEE80211_STA_MFP_ENABLED);
469} 359}
470 360
471/* MLME */ 361void ieee80211_send_pspoll(struct ieee80211_local *local,
472static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, 362 struct ieee80211_sub_if_data *sdata)
473 struct ieee80211_bss *bss)
474{ 363{
475 struct ieee80211_local *local = sdata->local; 364 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
476 int i, have_higher_than_11mbit = 0; 365 struct ieee80211_pspoll *pspoll;
366 struct sk_buff *skb;
367 u16 fc;
477 368
478 /* cf. IEEE 802.11 9.2.12 */ 369 skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*pspoll));
479 for (i = 0; i < bss->supp_rates_len; i++) 370 if (!skb) {
480 if ((bss->supp_rates[i] & 0x7f) * 5 > 110) 371 printk(KERN_DEBUG "%s: failed to allocate buffer for "
481 have_higher_than_11mbit = 1; 372 "pspoll frame\n", sdata->dev->name);
373 return;
374 }
375 skb_reserve(skb, local->hw.extra_tx_headroom);
482 376
483 if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && 377 pspoll = (struct ieee80211_pspoll *) skb_put(skb, sizeof(*pspoll));
484 have_higher_than_11mbit) 378 memset(pspoll, 0, sizeof(*pspoll));
485 sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; 379 fc = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL | IEEE80211_FCTL_PM;
486 else 380 pspoll->frame_control = cpu_to_le16(fc);
487 sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; 381 pspoll->aid = cpu_to_le16(ifmgd->aid);
382
383 /* aid in PS-Poll has its two MSBs each set to 1 */
384 pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14);
488 385
489 ieee80211_set_wmm_default(sdata); 386 memcpy(pspoll->bssid, ifmgd->bssid, ETH_ALEN);
387 memcpy(pspoll->ta, sdata->dev->dev_addr, ETH_ALEN);
388
389 ieee80211_tx_skb(sdata, skb, 0);
490} 390}
491 391
392/* MLME */
492static void ieee80211_sta_wmm_params(struct ieee80211_local *local, 393static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
493 struct ieee80211_if_sta *ifsta, 394 struct ieee80211_if_managed *ifmgd,
494 u8 *wmm_param, size_t wmm_param_len) 395 u8 *wmm_param, size_t wmm_param_len)
495{ 396{
496 struct ieee80211_tx_queue_params params; 397 struct ieee80211_tx_queue_params params;
@@ -498,7 +399,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
498 int count; 399 int count;
499 u8 *pos; 400 u8 *pos;
500 401
501 if (!(ifsta->flags & IEEE80211_STA_WMM_ENABLED)) 402 if (!(ifmgd->flags & IEEE80211_STA_WMM_ENABLED))
502 return; 403 return;
503 404
504 if (!wmm_param) 405 if (!wmm_param)
@@ -507,9 +408,9 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
507 if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1) 408 if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1)
508 return; 409 return;
509 count = wmm_param[6] & 0x0f; 410 count = wmm_param[6] & 0x0f;
510 if (count == ifsta->wmm_last_param_set) 411 if (count == ifmgd->wmm_last_param_set)
511 return; 412 return;
512 ifsta->wmm_last_param_set = count; 413 ifmgd->wmm_last_param_set = count;
513 414
514 pos = wmm_param + 8; 415 pos = wmm_param + 8;
515 left = wmm_param_len - 8; 416 left = wmm_param_len - 8;
@@ -568,12 +469,33 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
568 } 469 }
569} 470}
570 471
472static bool ieee80211_check_tim(struct ieee802_11_elems *elems, u16 aid)
473{
474 u8 mask;
475 u8 index, indexn1, indexn2;
476 struct ieee80211_tim_ie *tim = (struct ieee80211_tim_ie *) elems->tim;
477
478 aid &= 0x3fff;
479 index = aid / 8;
480 mask = 1 << (aid & 7);
481
482 indexn1 = tim->bitmap_ctrl & 0xfe;
483 indexn2 = elems->tim_len + indexn1 - 4;
484
485 if (index < indexn1 || index > indexn2)
486 return false;
487
488 index -= indexn1;
489
490 return !!(tim->virtual_map[index] & mask);
491}
492
571static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, 493static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
572 u16 capab, bool erp_valid, u8 erp) 494 u16 capab, bool erp_valid, u8 erp)
573{ 495{
574 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; 496 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
575#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 497#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
576 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 498 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
577#endif 499#endif
578 u32 changed = 0; 500 u32 changed = 0;
579 bool use_protection; 501 bool use_protection;
@@ -596,7 +518,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
596 printk(KERN_DEBUG "%s: CTS protection %s (BSSID=%pM)\n", 518 printk(KERN_DEBUG "%s: CTS protection %s (BSSID=%pM)\n",
597 sdata->dev->name, 519 sdata->dev->name,
598 use_protection ? "enabled" : "disabled", 520 use_protection ? "enabled" : "disabled",
599 ifsta->bssid); 521 ifmgd->bssid);
600 } 522 }
601#endif 523#endif
602 bss_conf->use_cts_prot = use_protection; 524 bss_conf->use_cts_prot = use_protection;
@@ -610,7 +532,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
610 " (BSSID=%pM)\n", 532 " (BSSID=%pM)\n",
611 sdata->dev->name, 533 sdata->dev->name,
612 use_short_preamble ? "short" : "long", 534 use_short_preamble ? "short" : "long",
613 ifsta->bssid); 535 ifmgd->bssid);
614 } 536 }
615#endif 537#endif
616 bss_conf->use_short_preamble = use_short_preamble; 538 bss_conf->use_short_preamble = use_short_preamble;
@@ -624,7 +546,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
624 " (BSSID=%pM)\n", 546 " (BSSID=%pM)\n",
625 sdata->dev->name, 547 sdata->dev->name,
626 use_short_slot ? "short" : "long", 548 use_short_slot ? "short" : "long",
627 ifsta->bssid); 549 ifmgd->bssid);
628 } 550 }
629#endif 551#endif
630 bss_conf->use_short_slot = use_short_slot; 552 bss_conf->use_short_slot = use_short_slot;
@@ -634,57 +556,57 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
634 return changed; 556 return changed;
635} 557}
636 558
637static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata, 559static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata)
638 struct ieee80211_if_sta *ifsta)
639{ 560{
640 union iwreq_data wrqu; 561 union iwreq_data wrqu;
562
641 memset(&wrqu, 0, sizeof(wrqu)); 563 memset(&wrqu, 0, sizeof(wrqu));
642 if (ifsta->flags & IEEE80211_STA_ASSOCIATED) 564 if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED)
643 memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); 565 memcpy(wrqu.ap_addr.sa_data, sdata->u.mgd.bssid, ETH_ALEN);
644 wrqu.ap_addr.sa_family = ARPHRD_ETHER; 566 wrqu.ap_addr.sa_family = ARPHRD_ETHER;
645 wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); 567 wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL);
646} 568}
647 569
648static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata, 570static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata)
649 struct ieee80211_if_sta *ifsta)
650{ 571{
572 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
651 char *buf; 573 char *buf;
652 size_t len; 574 size_t len;
653 int i; 575 int i;
654 union iwreq_data wrqu; 576 union iwreq_data wrqu;
655 577
656 if (!ifsta->assocreq_ies && !ifsta->assocresp_ies) 578 if (!ifmgd->assocreq_ies && !ifmgd->assocresp_ies)
657 return; 579 return;
658 580
659 buf = kmalloc(50 + 2 * (ifsta->assocreq_ies_len + 581 buf = kmalloc(50 + 2 * (ifmgd->assocreq_ies_len +
660 ifsta->assocresp_ies_len), GFP_KERNEL); 582 ifmgd->assocresp_ies_len), GFP_KERNEL);
661 if (!buf) 583 if (!buf)
662 return; 584 return;
663 585
664 len = sprintf(buf, "ASSOCINFO("); 586 len = sprintf(buf, "ASSOCINFO(");
665 if (ifsta->assocreq_ies) { 587 if (ifmgd->assocreq_ies) {
666 len += sprintf(buf + len, "ReqIEs="); 588 len += sprintf(buf + len, "ReqIEs=");
667 for (i = 0; i < ifsta->assocreq_ies_len; i++) { 589 for (i = 0; i < ifmgd->assocreq_ies_len; i++) {
668 len += sprintf(buf + len, "%02x", 590 len += sprintf(buf + len, "%02x",
669 ifsta->assocreq_ies[i]); 591 ifmgd->assocreq_ies[i]);
670 } 592 }
671 } 593 }
672 if (ifsta->assocresp_ies) { 594 if (ifmgd->assocresp_ies) {
673 if (ifsta->assocreq_ies) 595 if (ifmgd->assocreq_ies)
674 len += sprintf(buf + len, " "); 596 len += sprintf(buf + len, " ");
675 len += sprintf(buf + len, "RespIEs="); 597 len += sprintf(buf + len, "RespIEs=");
676 for (i = 0; i < ifsta->assocresp_ies_len; i++) { 598 for (i = 0; i < ifmgd->assocresp_ies_len; i++) {
677 len += sprintf(buf + len, "%02x", 599 len += sprintf(buf + len, "%02x",
678 ifsta->assocresp_ies[i]); 600 ifmgd->assocresp_ies[i]);
679 } 601 }
680 } 602 }
681 len += sprintf(buf + len, ")"); 603 len += sprintf(buf + len, ")");
682 604
683 if (len > IW_CUSTOM_MAX) { 605 if (len > IW_CUSTOM_MAX) {
684 len = sprintf(buf, "ASSOCRESPIE="); 606 len = sprintf(buf, "ASSOCRESPIE=");
685 for (i = 0; i < ifsta->assocresp_ies_len; i++) { 607 for (i = 0; i < ifmgd->assocresp_ies_len; i++) {
686 len += sprintf(buf + len, "%02x", 608 len += sprintf(buf + len, "%02x",
687 ifsta->assocresp_ies[i]); 609 ifmgd->assocresp_ies[i]);
688 } 610 }
689 } 611 }
690 612
@@ -699,40 +621,37 @@ static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata,
699 621
700 622
701static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, 623static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
702 struct ieee80211_if_sta *ifsta,
703 u32 bss_info_changed) 624 u32 bss_info_changed)
704{ 625{
626 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
705 struct ieee80211_local *local = sdata->local; 627 struct ieee80211_local *local = sdata->local;
706 struct ieee80211_conf *conf = &local_to_hw(local)->conf; 628 struct ieee80211_conf *conf = &local_to_hw(local)->conf;
707 629
708 struct ieee80211_bss *bss; 630 struct ieee80211_bss *bss;
709 631
710 bss_info_changed |= BSS_CHANGED_ASSOC; 632 bss_info_changed |= BSS_CHANGED_ASSOC;
711 ifsta->flags |= IEEE80211_STA_ASSOCIATED; 633 ifmgd->flags |= IEEE80211_STA_ASSOCIATED;
712 634
713 if (sdata->vif.type != NL80211_IFTYPE_STATION) 635 bss = ieee80211_rx_bss_get(local, ifmgd->bssid,
714 return;
715
716 bss = ieee80211_rx_bss_get(local, ifsta->bssid,
717 conf->channel->center_freq, 636 conf->channel->center_freq,
718 ifsta->ssid, ifsta->ssid_len); 637 ifmgd->ssid, ifmgd->ssid_len);
719 if (bss) { 638 if (bss) {
720 /* set timing information */ 639 /* set timing information */
721 sdata->vif.bss_conf.beacon_int = bss->beacon_int; 640 sdata->vif.bss_conf.beacon_int = bss->cbss.beacon_interval;
722 sdata->vif.bss_conf.timestamp = bss->timestamp; 641 sdata->vif.bss_conf.timestamp = bss->cbss.tsf;
723 sdata->vif.bss_conf.dtim_period = bss->dtim_period; 642 sdata->vif.bss_conf.dtim_period = bss->dtim_period;
724 643
725 bss_info_changed |= ieee80211_handle_bss_capability(sdata, 644 bss_info_changed |= ieee80211_handle_bss_capability(sdata,
726 bss->capability, bss->has_erp_value, bss->erp_value); 645 bss->cbss.capability, bss->has_erp_value, bss->erp_value);
727 646
728 ieee80211_rx_bss_put(local, bss); 647 ieee80211_rx_bss_put(local, bss);
729 } 648 }
730 649
731 ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; 650 ifmgd->flags |= IEEE80211_STA_PREV_BSSID_SET;
732 memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); 651 memcpy(ifmgd->prev_bssid, sdata->u.mgd.bssid, ETH_ALEN);
733 ieee80211_sta_send_associnfo(sdata, ifsta); 652 ieee80211_sta_send_associnfo(sdata);
734 653
735 ifsta->last_probe = jiffies; 654 ifmgd->last_probe = jiffies;
736 ieee80211_led_assoc(local, 1); 655 ieee80211_led_assoc(local, 1);
737 656
738 sdata->vif.bss_conf.assoc = 1; 657 sdata->vif.bss_conf.assoc = 1;
@@ -745,72 +664,90 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
745 ieee80211_bss_info_change_notify(sdata, bss_info_changed); 664 ieee80211_bss_info_change_notify(sdata, bss_info_changed);
746 665
747 if (local->powersave) { 666 if (local->powersave) {
748 if (local->dynamic_ps_timeout > 0) 667 if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) &&
668 local->hw.conf.dynamic_ps_timeout > 0) {
749 mod_timer(&local->dynamic_ps_timer, jiffies + 669 mod_timer(&local->dynamic_ps_timer, jiffies +
750 msecs_to_jiffies(local->dynamic_ps_timeout)); 670 msecs_to_jiffies(
751 else { 671 local->hw.conf.dynamic_ps_timeout));
672 } else {
673 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
674 ieee80211_send_nullfunc(local, sdata, 1);
752 conf->flags |= IEEE80211_CONF_PS; 675 conf->flags |= IEEE80211_CONF_PS;
753 ieee80211_hw_config(local, 676 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
754 IEEE80211_CONF_CHANGE_PS);
755 } 677 }
756 } 678 }
757 679
758 netif_tx_start_all_queues(sdata->dev); 680 netif_tx_start_all_queues(sdata->dev);
759 netif_carrier_on(sdata->dev); 681 netif_carrier_on(sdata->dev);
760 682
761 ieee80211_sta_send_apinfo(sdata, ifsta); 683 ieee80211_sta_send_apinfo(sdata);
762} 684}
763 685
764static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata, 686static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata)
765 struct ieee80211_if_sta *ifsta)
766{ 687{
767 ifsta->direct_probe_tries++; 688 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
768 if (ifsta->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) { 689
690 ifmgd->direct_probe_tries++;
691 if (ifmgd->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) {
769 printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n", 692 printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n",
770 sdata->dev->name, ifsta->bssid); 693 sdata->dev->name, ifmgd->bssid);
771 ifsta->state = IEEE80211_STA_MLME_DISABLED; 694 ifmgd->state = IEEE80211_STA_MLME_DISABLED;
772 ieee80211_sta_send_apinfo(sdata, ifsta); 695 ieee80211_sta_send_apinfo(sdata);
696
697 /*
698 * Most likely AP is not in the range so remove the
699 * bss information associated to the AP
700 */
701 ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
702 sdata->local->hw.conf.channel->center_freq,
703 ifmgd->ssid, ifmgd->ssid_len);
773 return; 704 return;
774 } 705 }
775 706
776 printk(KERN_DEBUG "%s: direct probe to AP %pM try %d\n", 707 printk(KERN_DEBUG "%s: direct probe to AP %pM try %d\n",
777 sdata->dev->name, ifsta->bssid, 708 sdata->dev->name, ifmgd->bssid,
778 ifsta->direct_probe_tries); 709 ifmgd->direct_probe_tries);
779 710
780 ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; 711 ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE;
781 712
782 set_bit(IEEE80211_STA_REQ_DIRECT_PROBE, &ifsta->request); 713 set_bit(IEEE80211_STA_REQ_DIRECT_PROBE, &ifmgd->request);
783 714
784 /* Direct probe is sent to broadcast address as some APs 715 /* Direct probe is sent to broadcast address as some APs
785 * will not answer to direct packet in unassociated state. 716 * will not answer to direct packet in unassociated state.
786 */ 717 */
787 ieee80211_send_probe_req(sdata, NULL, 718 ieee80211_send_probe_req(sdata, NULL,
788 ifsta->ssid, ifsta->ssid_len); 719 ifmgd->ssid, ifmgd->ssid_len, NULL, 0);
789 720
790 mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); 721 mod_timer(&ifmgd->timer, jiffies + IEEE80211_AUTH_TIMEOUT);
791} 722}
792 723
793 724
794static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata, 725static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata)
795 struct ieee80211_if_sta *ifsta)
796{ 726{
797 ifsta->auth_tries++; 727 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
798 if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) { 728
729 ifmgd->auth_tries++;
730 if (ifmgd->auth_tries > IEEE80211_AUTH_MAX_TRIES) {
799 printk(KERN_DEBUG "%s: authentication with AP %pM" 731 printk(KERN_DEBUG "%s: authentication with AP %pM"
800 " timed out\n", 732 " timed out\n",
801 sdata->dev->name, ifsta->bssid); 733 sdata->dev->name, ifmgd->bssid);
802 ifsta->state = IEEE80211_STA_MLME_DISABLED; 734 ifmgd->state = IEEE80211_STA_MLME_DISABLED;
803 ieee80211_sta_send_apinfo(sdata, ifsta); 735 ieee80211_sta_send_apinfo(sdata);
736 ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
737 sdata->local->hw.conf.channel->center_freq,
738 ifmgd->ssid, ifmgd->ssid_len);
804 return; 739 return;
805 } 740 }
806 741
807 ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; 742 ifmgd->state = IEEE80211_STA_MLME_AUTHENTICATE;
808 printk(KERN_DEBUG "%s: authenticate with AP %pM\n", 743 printk(KERN_DEBUG "%s: authenticate with AP %pM\n",
809 sdata->dev->name, ifsta->bssid); 744 sdata->dev->name, ifmgd->bssid);
810 745
811 ieee80211_send_auth(sdata, ifsta, 1, NULL, 0, 0); 746 ieee80211_send_auth(sdata, 1, ifmgd->auth_alg, NULL, 0,
747 ifmgd->bssid, 0);
748 ifmgd->auth_transaction = 2;
812 749
813 mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); 750 mod_timer(&ifmgd->timer, jiffies + IEEE80211_AUTH_TIMEOUT);
814} 751}
815 752
816/* 753/*
@@ -818,32 +755,33 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
818 * if self disconnected or a reason code from the AP. 755 * if self disconnected or a reason code from the AP.
819 */ 756 */
820static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, 757static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
821 struct ieee80211_if_sta *ifsta, bool deauth, 758 bool deauth, bool self_disconnected,
822 bool self_disconnected, u16 reason) 759 u16 reason)
823{ 760{
761 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
824 struct ieee80211_local *local = sdata->local; 762 struct ieee80211_local *local = sdata->local;
825 struct sta_info *sta; 763 struct sta_info *sta;
826 u32 changed = 0, config_changed = 0; 764 u32 changed = 0, config_changed = 0;
827 765
828 rcu_read_lock(); 766 rcu_read_lock();
829 767
830 sta = sta_info_get(local, ifsta->bssid); 768 sta = sta_info_get(local, ifmgd->bssid);
831 if (!sta) { 769 if (!sta) {
832 rcu_read_unlock(); 770 rcu_read_unlock();
833 return; 771 return;
834 } 772 }
835 773
836 if (deauth) { 774 if (deauth) {
837 ifsta->direct_probe_tries = 0; 775 ifmgd->direct_probe_tries = 0;
838 ifsta->auth_tries = 0; 776 ifmgd->auth_tries = 0;
839 } 777 }
840 ifsta->assoc_scan_tries = 0; 778 ifmgd->assoc_scan_tries = 0;
841 ifsta->assoc_tries = 0; 779 ifmgd->assoc_tries = 0;
842 780
843 netif_tx_stop_all_queues(sdata->dev); 781 netif_tx_stop_all_queues(sdata->dev);
844 netif_carrier_off(sdata->dev); 782 netif_carrier_off(sdata->dev);
845 783
846 ieee80211_sta_tear_down_BA_sessions(sdata, sta->sta.addr); 784 ieee80211_sta_tear_down_BA_sessions(sta);
847 785
848 if (self_disconnected) { 786 if (self_disconnected) {
849 if (deauth) 787 if (deauth)
@@ -854,23 +792,28 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
854 IEEE80211_STYPE_DISASSOC, reason); 792 IEEE80211_STYPE_DISASSOC, reason);
855 } 793 }
856 794
857 ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; 795 ifmgd->flags &= ~IEEE80211_STA_ASSOCIATED;
858 changed |= ieee80211_reset_erp_info(sdata); 796 changed |= ieee80211_reset_erp_info(sdata);
859 797
860 ieee80211_led_assoc(local, 0); 798 ieee80211_led_assoc(local, 0);
861 changed |= BSS_CHANGED_ASSOC; 799 changed |= BSS_CHANGED_ASSOC;
862 sdata->vif.bss_conf.assoc = false; 800 sdata->vif.bss_conf.assoc = false;
863 801
864 ieee80211_sta_send_apinfo(sdata, ifsta); 802 ieee80211_sta_send_apinfo(sdata);
865 803
866 if (self_disconnected || reason == WLAN_REASON_DISASSOC_STA_HAS_LEFT) 804 if (self_disconnected || reason == WLAN_REASON_DISASSOC_STA_HAS_LEFT) {
867 ifsta->state = IEEE80211_STA_MLME_DISABLED; 805 ifmgd->state = IEEE80211_STA_MLME_DISABLED;
806 ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
807 sdata->local->hw.conf.channel->center_freq,
808 ifmgd->ssid, ifmgd->ssid_len);
809 }
868 810
869 rcu_read_unlock(); 811 rcu_read_unlock();
870 812
871 local->hw.conf.ht.enabled = false; 813 /* channel(_type) changes are handled by ieee80211_hw_config */
872 local->oper_channel_type = NL80211_CHAN_NO_HT; 814 local->oper_channel_type = NL80211_CHAN_NO_HT;
873 config_changed |= IEEE80211_CONF_CHANGE_HT; 815
816 local->power_constr_level = 0;
874 817
875 del_timer_sync(&local->dynamic_ps_timer); 818 del_timer_sync(&local->dynamic_ps_timer);
876 cancel_work_sync(&local->dynamic_ps_enable_work); 819 cancel_work_sync(&local->dynamic_ps_enable_work);
@@ -885,7 +828,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
885 828
886 rcu_read_lock(); 829 rcu_read_lock();
887 830
888 sta = sta_info_get(local, ifsta->bssid); 831 sta = sta_info_get(local, ifmgd->bssid);
889 if (!sta) { 832 if (!sta) {
890 rcu_read_unlock(); 833 rcu_read_unlock();
891 return; 834 return;
@@ -906,27 +849,27 @@ static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata)
906 return 1; 849 return 1;
907} 850}
908 851
909static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata, 852static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata)
910 struct ieee80211_if_sta *ifsta)
911{ 853{
854 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
912 struct ieee80211_local *local = sdata->local; 855 struct ieee80211_local *local = sdata->local;
913 struct ieee80211_bss *bss; 856 struct ieee80211_bss *bss;
914 int bss_privacy; 857 int bss_privacy;
915 int wep_privacy; 858 int wep_privacy;
916 int privacy_invoked; 859 int privacy_invoked;
917 860
918 if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL)) 861 if (!ifmgd || (ifmgd->flags & IEEE80211_STA_MIXED_CELL))
919 return 0; 862 return 0;
920 863
921 bss = ieee80211_rx_bss_get(local, ifsta->bssid, 864 bss = ieee80211_rx_bss_get(local, ifmgd->bssid,
922 local->hw.conf.channel->center_freq, 865 local->hw.conf.channel->center_freq,
923 ifsta->ssid, ifsta->ssid_len); 866 ifmgd->ssid, ifmgd->ssid_len);
924 if (!bss) 867 if (!bss)
925 return 0; 868 return 0;
926 869
927 bss_privacy = !!(bss->capability & WLAN_CAPABILITY_PRIVACY); 870 bss_privacy = !!(bss->cbss.capability & WLAN_CAPABILITY_PRIVACY);
928 wep_privacy = !!ieee80211_sta_wep_configured(sdata); 871 wep_privacy = !!ieee80211_sta_wep_configured(sdata);
929 privacy_invoked = !!(ifsta->flags & IEEE80211_STA_PRIVACY_INVOKED); 872 privacy_invoked = !!(ifmgd->flags & IEEE80211_STA_PRIVACY_INVOKED);
930 873
931 ieee80211_rx_bss_put(local, bss); 874 ieee80211_rx_bss_put(local, bss);
932 875
@@ -936,38 +879,42 @@ static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata,
936 return 1; 879 return 1;
937} 880}
938 881
939static void ieee80211_associate(struct ieee80211_sub_if_data *sdata, 882static void ieee80211_associate(struct ieee80211_sub_if_data *sdata)
940 struct ieee80211_if_sta *ifsta)
941{ 883{
942 ifsta->assoc_tries++; 884 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
943 if (ifsta->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) { 885
886 ifmgd->assoc_tries++;
887 if (ifmgd->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) {
944 printk(KERN_DEBUG "%s: association with AP %pM" 888 printk(KERN_DEBUG "%s: association with AP %pM"
945 " timed out\n", 889 " timed out\n",
946 sdata->dev->name, ifsta->bssid); 890 sdata->dev->name, ifmgd->bssid);
947 ifsta->state = IEEE80211_STA_MLME_DISABLED; 891 ifmgd->state = IEEE80211_STA_MLME_DISABLED;
948 ieee80211_sta_send_apinfo(sdata, ifsta); 892 ieee80211_sta_send_apinfo(sdata);
893 ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
894 sdata->local->hw.conf.channel->center_freq,
895 ifmgd->ssid, ifmgd->ssid_len);
949 return; 896 return;
950 } 897 }
951 898
952 ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; 899 ifmgd->state = IEEE80211_STA_MLME_ASSOCIATE;
953 printk(KERN_DEBUG "%s: associate with AP %pM\n", 900 printk(KERN_DEBUG "%s: associate with AP %pM\n",
954 sdata->dev->name, ifsta->bssid); 901 sdata->dev->name, ifmgd->bssid);
955 if (ieee80211_privacy_mismatch(sdata, ifsta)) { 902 if (ieee80211_privacy_mismatch(sdata)) {
956 printk(KERN_DEBUG "%s: mismatch in privacy configuration and " 903 printk(KERN_DEBUG "%s: mismatch in privacy configuration and "
957 "mixed-cell disabled - abort association\n", sdata->dev->name); 904 "mixed-cell disabled - abort association\n", sdata->dev->name);
958 ifsta->state = IEEE80211_STA_MLME_DISABLED; 905 ifmgd->state = IEEE80211_STA_MLME_DISABLED;
959 return; 906 return;
960 } 907 }
961 908
962 ieee80211_send_assoc(sdata, ifsta); 909 ieee80211_send_assoc(sdata);
963 910
964 mod_timer(&ifsta->timer, jiffies + IEEE80211_ASSOC_TIMEOUT); 911 mod_timer(&ifmgd->timer, jiffies + IEEE80211_ASSOC_TIMEOUT);
965} 912}
966 913
967 914
968static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, 915static void ieee80211_associated(struct ieee80211_sub_if_data *sdata)
969 struct ieee80211_if_sta *ifsta)
970{ 916{
917 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
971 struct ieee80211_local *local = sdata->local; 918 struct ieee80211_local *local = sdata->local;
972 struct sta_info *sta; 919 struct sta_info *sta;
973 int disassoc; 920 int disassoc;
@@ -977,38 +924,40 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata,
977 * for better APs. */ 924 * for better APs. */
978 /* TODO: remove expired BSSes */ 925 /* TODO: remove expired BSSes */
979 926
980 ifsta->state = IEEE80211_STA_MLME_ASSOCIATED; 927 ifmgd->state = IEEE80211_STA_MLME_ASSOCIATED;
981 928
982 rcu_read_lock(); 929 rcu_read_lock();
983 930
984 sta = sta_info_get(local, ifsta->bssid); 931 sta = sta_info_get(local, ifmgd->bssid);
985 if (!sta) { 932 if (!sta) {
986 printk(KERN_DEBUG "%s: No STA entry for own AP %pM\n", 933 printk(KERN_DEBUG "%s: No STA entry for own AP %pM\n",
987 sdata->dev->name, ifsta->bssid); 934 sdata->dev->name, ifmgd->bssid);
988 disassoc = 1; 935 disassoc = 1;
989 } else { 936 } else {
990 disassoc = 0; 937 disassoc = 0;
991 if (time_after(jiffies, 938 if (time_after(jiffies,
992 sta->last_rx + IEEE80211_MONITORING_INTERVAL)) { 939 sta->last_rx + IEEE80211_MONITORING_INTERVAL)) {
993 if (ifsta->flags & IEEE80211_STA_PROBEREQ_POLL) { 940 if (ifmgd->flags & IEEE80211_STA_PROBEREQ_POLL) {
994 printk(KERN_DEBUG "%s: No ProbeResp from " 941 printk(KERN_DEBUG "%s: No ProbeResp from "
995 "current AP %pM - assume out of " 942 "current AP %pM - assume out of "
996 "range\n", 943 "range\n",
997 sdata->dev->name, ifsta->bssid); 944 sdata->dev->name, ifmgd->bssid);
998 disassoc = 1; 945 disassoc = 1;
999 } else 946 } else
1000 ieee80211_send_probe_req(sdata, ifsta->bssid, 947 ieee80211_send_probe_req(sdata, ifmgd->bssid,
1001 ifsta->ssid, 948 ifmgd->ssid,
1002 ifsta->ssid_len); 949 ifmgd->ssid_len,
1003 ifsta->flags ^= IEEE80211_STA_PROBEREQ_POLL; 950 NULL, 0);
951 ifmgd->flags ^= IEEE80211_STA_PROBEREQ_POLL;
1004 } else { 952 } else {
1005 ifsta->flags &= ~IEEE80211_STA_PROBEREQ_POLL; 953 ifmgd->flags &= ~IEEE80211_STA_PROBEREQ_POLL;
1006 if (time_after(jiffies, ifsta->last_probe + 954 if (time_after(jiffies, ifmgd->last_probe +
1007 IEEE80211_PROBE_INTERVAL)) { 955 IEEE80211_PROBE_INTERVAL)) {
1008 ifsta->last_probe = jiffies; 956 ifmgd->last_probe = jiffies;
1009 ieee80211_send_probe_req(sdata, ifsta->bssid, 957 ieee80211_send_probe_req(sdata, ifmgd->bssid,
1010 ifsta->ssid, 958 ifmgd->ssid,
1011 ifsta->ssid_len); 959 ifmgd->ssid_len,
960 NULL, 0);
1012 } 961 }
1013 } 962 }
1014 } 963 }
@@ -1016,25 +965,25 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata,
1016 rcu_read_unlock(); 965 rcu_read_unlock();
1017 966
1018 if (disassoc) 967 if (disassoc)
1019 ieee80211_set_disassoc(sdata, ifsta, true, true, 968 ieee80211_set_disassoc(sdata, true, true,
1020 WLAN_REASON_PREV_AUTH_NOT_VALID); 969 WLAN_REASON_PREV_AUTH_NOT_VALID);
1021 else 970 else
1022 mod_timer(&ifsta->timer, jiffies + 971 mod_timer(&ifmgd->timer, jiffies +
1023 IEEE80211_MONITORING_INTERVAL); 972 IEEE80211_MONITORING_INTERVAL);
1024} 973}
1025 974
1026 975
1027static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata, 976static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata)
1028 struct ieee80211_if_sta *ifsta)
1029{ 977{
978 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
979
1030 printk(KERN_DEBUG "%s: authenticated\n", sdata->dev->name); 980 printk(KERN_DEBUG "%s: authenticated\n", sdata->dev->name);
1031 ifsta->flags |= IEEE80211_STA_AUTHENTICATED; 981 ifmgd->flags |= IEEE80211_STA_AUTHENTICATED;
1032 ieee80211_associate(sdata, ifsta); 982 ieee80211_associate(sdata);
1033} 983}
1034 984
1035 985
1036static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, 986static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
1037 struct ieee80211_if_sta *ifsta,
1038 struct ieee80211_mgmt *mgmt, 987 struct ieee80211_mgmt *mgmt,
1039 size_t len) 988 size_t len)
1040{ 989{
@@ -1045,50 +994,37 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
1045 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); 994 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
1046 if (!elems.challenge) 995 if (!elems.challenge)
1047 return; 996 return;
1048 ieee80211_send_auth(sdata, ifsta, 3, elems.challenge - 2, 997 ieee80211_send_auth(sdata, 3, sdata->u.mgd.auth_alg,
1049 elems.challenge_len + 2, 1); 998 elems.challenge - 2, elems.challenge_len + 2,
999 sdata->u.mgd.bssid, 1);
1000 sdata->u.mgd.auth_transaction = 4;
1050} 1001}
1051 1002
1052static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, 1003static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
1053 struct ieee80211_if_sta *ifsta,
1054 struct ieee80211_mgmt *mgmt, 1004 struct ieee80211_mgmt *mgmt,
1055 size_t len) 1005 size_t len)
1056{ 1006{
1007 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1057 u16 auth_alg, auth_transaction, status_code; 1008 u16 auth_alg, auth_transaction, status_code;
1058 1009
1059 if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && 1010 if (ifmgd->state != IEEE80211_STA_MLME_AUTHENTICATE)
1060 sdata->vif.type != NL80211_IFTYPE_ADHOC)
1061 return; 1011 return;
1062 1012
1063 if (len < 24 + 6) 1013 if (len < 24 + 6)
1064 return; 1014 return;
1065 1015
1066 if (sdata->vif.type != NL80211_IFTYPE_ADHOC && 1016 if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN) != 0)
1067 memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0)
1068 return; 1017 return;
1069 1018
1070 if (sdata->vif.type != NL80211_IFTYPE_ADHOC && 1019 if (memcmp(ifmgd->bssid, mgmt->bssid, ETH_ALEN) != 0)
1071 memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0)
1072 return; 1020 return;
1073 1021
1074 auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); 1022 auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
1075 auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); 1023 auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
1076 status_code = le16_to_cpu(mgmt->u.auth.status_code); 1024 status_code = le16_to_cpu(mgmt->u.auth.status_code);
1077 1025
1078 if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { 1026 if (auth_alg != ifmgd->auth_alg ||
1079 /* 1027 auth_transaction != ifmgd->auth_transaction)
1080 * IEEE 802.11 standard does not require authentication in IBSS
1081 * networks and most implementations do not seem to use it.
1082 * However, try to reply to authentication attempts if someone
1083 * has actually implemented this.
1084 */
1085 if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1)
1086 return;
1087 ieee80211_send_auth(sdata, ifsta, 2, NULL, 0, 0);
1088 }
1089
1090 if (auth_alg != ifsta->auth_alg ||
1091 auth_transaction != ifsta->auth_transaction)
1092 return; 1028 return;
1093 1029
1094 if (status_code != WLAN_STATUS_SUCCESS) { 1030 if (status_code != WLAN_STATUS_SUCCESS) {
@@ -1097,15 +1033,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
1097 const int num_algs = ARRAY_SIZE(algs); 1033 const int num_algs = ARRAY_SIZE(algs);
1098 int i, pos; 1034 int i, pos;
1099 algs[0] = algs[1] = algs[2] = 0xff; 1035 algs[0] = algs[1] = algs[2] = 0xff;
1100 if (ifsta->auth_algs & IEEE80211_AUTH_ALG_OPEN) 1036 if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_OPEN)
1101 algs[0] = WLAN_AUTH_OPEN; 1037 algs[0] = WLAN_AUTH_OPEN;
1102 if (ifsta->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY) 1038 if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY)
1103 algs[1] = WLAN_AUTH_SHARED_KEY; 1039 algs[1] = WLAN_AUTH_SHARED_KEY;
1104 if (ifsta->auth_algs & IEEE80211_AUTH_ALG_LEAP) 1040 if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_LEAP)
1105 algs[2] = WLAN_AUTH_LEAP; 1041 algs[2] = WLAN_AUTH_LEAP;
1106 if (ifsta->auth_alg == WLAN_AUTH_OPEN) 1042 if (ifmgd->auth_alg == WLAN_AUTH_OPEN)
1107 pos = 0; 1043 pos = 0;
1108 else if (ifsta->auth_alg == WLAN_AUTH_SHARED_KEY) 1044 else if (ifmgd->auth_alg == WLAN_AUTH_SHARED_KEY)
1109 pos = 1; 1045 pos = 1;
1110 else 1046 else
1111 pos = 2; 1047 pos = 2;
@@ -1113,105 +1049,105 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
1113 pos++; 1049 pos++;
1114 if (pos >= num_algs) 1050 if (pos >= num_algs)
1115 pos = 0; 1051 pos = 0;
1116 if (algs[pos] == ifsta->auth_alg || 1052 if (algs[pos] == ifmgd->auth_alg ||
1117 algs[pos] == 0xff) 1053 algs[pos] == 0xff)
1118 continue; 1054 continue;
1119 if (algs[pos] == WLAN_AUTH_SHARED_KEY && 1055 if (algs[pos] == WLAN_AUTH_SHARED_KEY &&
1120 !ieee80211_sta_wep_configured(sdata)) 1056 !ieee80211_sta_wep_configured(sdata))
1121 continue; 1057 continue;
1122 ifsta->auth_alg = algs[pos]; 1058 ifmgd->auth_alg = algs[pos];
1123 break; 1059 break;
1124 } 1060 }
1125 } 1061 }
1126 return; 1062 return;
1127 } 1063 }
1128 1064
1129 switch (ifsta->auth_alg) { 1065 switch (ifmgd->auth_alg) {
1130 case WLAN_AUTH_OPEN: 1066 case WLAN_AUTH_OPEN:
1131 case WLAN_AUTH_LEAP: 1067 case WLAN_AUTH_LEAP:
1132 ieee80211_auth_completed(sdata, ifsta); 1068 ieee80211_auth_completed(sdata);
1133 break; 1069 break;
1134 case WLAN_AUTH_SHARED_KEY: 1070 case WLAN_AUTH_SHARED_KEY:
1135 if (ifsta->auth_transaction == 4) 1071 if (ifmgd->auth_transaction == 4)
1136 ieee80211_auth_completed(sdata, ifsta); 1072 ieee80211_auth_completed(sdata);
1137 else 1073 else
1138 ieee80211_auth_challenge(sdata, ifsta, mgmt, len); 1074 ieee80211_auth_challenge(sdata, mgmt, len);
1139 break; 1075 break;
1140 } 1076 }
1141} 1077}
1142 1078
1143 1079
1144static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, 1080static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
1145 struct ieee80211_if_sta *ifsta,
1146 struct ieee80211_mgmt *mgmt, 1081 struct ieee80211_mgmt *mgmt,
1147 size_t len) 1082 size_t len)
1148{ 1083{
1084 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1149 u16 reason_code; 1085 u16 reason_code;
1150 1086
1151 if (len < 24 + 2) 1087 if (len < 24 + 2)
1152 return; 1088 return;
1153 1089
1154 if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN)) 1090 if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN))
1155 return; 1091 return;
1156 1092
1157 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); 1093 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
1158 1094
1159 if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) 1095 if (ifmgd->flags & IEEE80211_STA_AUTHENTICATED)
1160 printk(KERN_DEBUG "%s: deauthenticated (Reason: %u)\n", 1096 printk(KERN_DEBUG "%s: deauthenticated (Reason: %u)\n",
1161 sdata->dev->name, reason_code); 1097 sdata->dev->name, reason_code);
1162 1098
1163 if (ifsta->state == IEEE80211_STA_MLME_AUTHENTICATE || 1099 if (ifmgd->state == IEEE80211_STA_MLME_AUTHENTICATE ||
1164 ifsta->state == IEEE80211_STA_MLME_ASSOCIATE || 1100 ifmgd->state == IEEE80211_STA_MLME_ASSOCIATE ||
1165 ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { 1101 ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) {
1166 ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; 1102 ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE;
1167 mod_timer(&ifsta->timer, jiffies + 1103 mod_timer(&ifmgd->timer, jiffies +
1168 IEEE80211_RETRY_AUTH_INTERVAL); 1104 IEEE80211_RETRY_AUTH_INTERVAL);
1169 } 1105 }
1170 1106
1171 ieee80211_set_disassoc(sdata, ifsta, true, false, 0); 1107 ieee80211_set_disassoc(sdata, true, false, 0);
1172 ifsta->flags &= ~IEEE80211_STA_AUTHENTICATED; 1108 ifmgd->flags &= ~IEEE80211_STA_AUTHENTICATED;
1173} 1109}
1174 1110
1175 1111
1176static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, 1112static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
1177 struct ieee80211_if_sta *ifsta,
1178 struct ieee80211_mgmt *mgmt, 1113 struct ieee80211_mgmt *mgmt,
1179 size_t len) 1114 size_t len)
1180{ 1115{
1116 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1181 u16 reason_code; 1117 u16 reason_code;
1182 1118
1183 if (len < 24 + 2) 1119 if (len < 24 + 2)
1184 return; 1120 return;
1185 1121
1186 if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN)) 1122 if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN))
1187 return; 1123 return;
1188 1124
1189 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); 1125 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
1190 1126
1191 if (ifsta->flags & IEEE80211_STA_ASSOCIATED) 1127 if (ifmgd->flags & IEEE80211_STA_ASSOCIATED)
1192 printk(KERN_DEBUG "%s: disassociated (Reason: %u)\n", 1128 printk(KERN_DEBUG "%s: disassociated (Reason: %u)\n",
1193 sdata->dev->name, reason_code); 1129 sdata->dev->name, reason_code);
1194 1130
1195 if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { 1131 if (ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) {
1196 ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; 1132 ifmgd->state = IEEE80211_STA_MLME_ASSOCIATE;
1197 mod_timer(&ifsta->timer, jiffies + 1133 mod_timer(&ifmgd->timer, jiffies +
1198 IEEE80211_RETRY_AUTH_INTERVAL); 1134 IEEE80211_RETRY_AUTH_INTERVAL);
1199 } 1135 }
1200 1136
1201 ieee80211_set_disassoc(sdata, ifsta, false, false, reason_code); 1137 ieee80211_set_disassoc(sdata, false, false, reason_code);
1202} 1138}
1203 1139
1204 1140
1205static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, 1141static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1206 struct ieee80211_if_sta *ifsta,
1207 struct ieee80211_mgmt *mgmt, 1142 struct ieee80211_mgmt *mgmt,
1208 size_t len, 1143 size_t len,
1209 int reassoc) 1144 int reassoc)
1210{ 1145{
1146 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1211 struct ieee80211_local *local = sdata->local; 1147 struct ieee80211_local *local = sdata->local;
1212 struct ieee80211_supported_band *sband; 1148 struct ieee80211_supported_band *sband;
1213 struct sta_info *sta; 1149 struct sta_info *sta;
1214 u64 rates, basic_rates; 1150 u32 rates, basic_rates;
1215 u16 capab_info, status_code, aid; 1151 u16 capab_info, status_code, aid;
1216 struct ieee802_11_elems elems; 1152 struct ieee802_11_elems elems;
1217 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; 1153 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
@@ -1224,13 +1160,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1224 /* AssocResp and ReassocResp have identical structure, so process both 1160 /* AssocResp and ReassocResp have identical structure, so process both
1225 * of them in this function. */ 1161 * of them in this function. */
1226 1162
1227 if (ifsta->state != IEEE80211_STA_MLME_ASSOCIATE) 1163 if (ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE)
1228 return; 1164 return;
1229 1165
1230 if (len < 24 + 6) 1166 if (len < 24 + 6)
1231 return; 1167 return;
1232 1168
1233 if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) 1169 if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN) != 0)
1234 return; 1170 return;
1235 1171
1236 capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); 1172 capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
@@ -1242,13 +1178,31 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1242 sdata->dev->name, reassoc ? "Rea" : "A", mgmt->sa, 1178 sdata->dev->name, reassoc ? "Rea" : "A", mgmt->sa,
1243 capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); 1179 capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
1244 1180
1181 pos = mgmt->u.assoc_resp.variable;
1182 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
1183
1184 if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
1185 elems.timeout_int && elems.timeout_int_len == 5 &&
1186 elems.timeout_int[0] == WLAN_TIMEOUT_ASSOC_COMEBACK) {
1187 u32 tu, ms;
1188 tu = get_unaligned_le32(elems.timeout_int + 1);
1189 ms = tu * 1024 / 1000;
1190 printk(KERN_DEBUG "%s: AP rejected association temporarily; "
1191 "comeback duration %u TU (%u ms)\n",
1192 sdata->dev->name, tu, ms);
1193 if (ms > IEEE80211_ASSOC_TIMEOUT)
1194 mod_timer(&ifmgd->timer,
1195 jiffies + msecs_to_jiffies(ms));
1196 return;
1197 }
1198
1245 if (status_code != WLAN_STATUS_SUCCESS) { 1199 if (status_code != WLAN_STATUS_SUCCESS) {
1246 printk(KERN_DEBUG "%s: AP denied association (code=%d)\n", 1200 printk(KERN_DEBUG "%s: AP denied association (code=%d)\n",
1247 sdata->dev->name, status_code); 1201 sdata->dev->name, status_code);
1248 /* if this was a reassociation, ensure we try a "full" 1202 /* if this was a reassociation, ensure we try a "full"
1249 * association next time. This works around some broken APs 1203 * association next time. This works around some broken APs
1250 * which do not correctly reject reassociation requests. */ 1204 * which do not correctly reject reassociation requests. */
1251 ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; 1205 ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET;
1252 return; 1206 return;
1253 } 1207 }
1254 1208
@@ -1257,9 +1211,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1257 "set\n", sdata->dev->name, aid); 1211 "set\n", sdata->dev->name, aid);
1258 aid &= ~(BIT(15) | BIT(14)); 1212 aid &= ~(BIT(15) | BIT(14));
1259 1213
1260 pos = mgmt->u.assoc_resp.variable;
1261 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
1262
1263 if (!elems.supp_rates) { 1214 if (!elems.supp_rates) {
1264 printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n", 1215 printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n",
1265 sdata->dev->name); 1216 sdata->dev->name);
@@ -1267,40 +1218,29 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1267 } 1218 }
1268 1219
1269 printk(KERN_DEBUG "%s: associated\n", sdata->dev->name); 1220 printk(KERN_DEBUG "%s: associated\n", sdata->dev->name);
1270 ifsta->aid = aid; 1221 ifmgd->aid = aid;
1271 ifsta->ap_capab = capab_info; 1222 ifmgd->ap_capab = capab_info;
1272 1223
1273 kfree(ifsta->assocresp_ies); 1224 kfree(ifmgd->assocresp_ies);
1274 ifsta->assocresp_ies_len = len - (pos - (u8 *) mgmt); 1225 ifmgd->assocresp_ies_len = len - (pos - (u8 *) mgmt);
1275 ifsta->assocresp_ies = kmalloc(ifsta->assocresp_ies_len, GFP_KERNEL); 1226 ifmgd->assocresp_ies = kmalloc(ifmgd->assocresp_ies_len, GFP_KERNEL);
1276 if (ifsta->assocresp_ies) 1227 if (ifmgd->assocresp_ies)
1277 memcpy(ifsta->assocresp_ies, pos, ifsta->assocresp_ies_len); 1228 memcpy(ifmgd->assocresp_ies, pos, ifmgd->assocresp_ies_len);
1278 1229
1279 rcu_read_lock(); 1230 rcu_read_lock();
1280 1231
1281 /* Add STA entry for the AP */ 1232 /* Add STA entry for the AP */
1282 sta = sta_info_get(local, ifsta->bssid); 1233 sta = sta_info_get(local, ifmgd->bssid);
1283 if (!sta) { 1234 if (!sta) {
1284 struct ieee80211_bss *bss;
1285
1286 newsta = true; 1235 newsta = true;
1287 1236
1288 sta = sta_info_alloc(sdata, ifsta->bssid, GFP_ATOMIC); 1237 sta = sta_info_alloc(sdata, ifmgd->bssid, GFP_ATOMIC);
1289 if (!sta) { 1238 if (!sta) {
1290 printk(KERN_DEBUG "%s: failed to alloc STA entry for" 1239 printk(KERN_DEBUG "%s: failed to alloc STA entry for"
1291 " the AP\n", sdata->dev->name); 1240 " the AP\n", sdata->dev->name);
1292 rcu_read_unlock(); 1241 rcu_read_unlock();
1293 return; 1242 return;
1294 } 1243 }
1295 bss = ieee80211_rx_bss_get(local, ifsta->bssid,
1296 local->hw.conf.channel->center_freq,
1297 ifsta->ssid, ifsta->ssid_len);
1298 if (bss) {
1299 sta->last_signal = bss->signal;
1300 sta->last_qual = bss->qual;
1301 sta->last_noise = bss->noise;
1302 ieee80211_rx_bss_put(local, bss);
1303 }
1304 1244
1305 /* update new sta with its last rx activity */ 1245 /* update new sta with its last rx activity */
1306 sta->last_rx = jiffies; 1246 sta->last_rx = jiffies;
@@ -1375,6 +1315,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1375 1315
1376 rate_control_rate_init(sta); 1316 rate_control_rate_init(sta);
1377 1317
1318 if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED)
1319 set_sta_flags(sta, WLAN_STA_MFP);
1320
1378 if (elems.wmm_param) 1321 if (elems.wmm_param)
1379 set_sta_flags(sta, WLAN_STA_WME); 1322 set_sta_flags(sta, WLAN_STA_WME);
1380 1323
@@ -1391,11 +1334,12 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1391 rcu_read_unlock(); 1334 rcu_read_unlock();
1392 1335
1393 if (elems.wmm_param) 1336 if (elems.wmm_param)
1394 ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, 1337 ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param,
1395 elems.wmm_param_len); 1338 elems.wmm_param_len);
1396 1339
1397 if (elems.ht_info_elem && elems.wmm_param && 1340 if (elems.ht_info_elem && elems.wmm_param &&
1398 (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) 1341 (ifmgd->flags & IEEE80211_STA_WMM_ENABLED) &&
1342 !(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED))
1399 changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, 1343 changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem,
1400 ap_ht_cap_flags); 1344 ap_ht_cap_flags);
1401 1345
@@ -1403,136 +1347,12 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1403 * ieee80211_set_associated() will tell the driver */ 1347 * ieee80211_set_associated() will tell the driver */
1404 bss_conf->aid = aid; 1348 bss_conf->aid = aid;
1405 bss_conf->assoc_capability = capab_info; 1349 bss_conf->assoc_capability = capab_info;
1406 ieee80211_set_associated(sdata, ifsta, changed); 1350 ieee80211_set_associated(sdata, changed);
1407 1351
1408 ieee80211_associated(sdata, ifsta); 1352 ieee80211_associated(sdata);
1409} 1353}
1410 1354
1411 1355
1412static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
1413 struct ieee80211_if_sta *ifsta,
1414 struct ieee80211_bss *bss)
1415{
1416 struct ieee80211_local *local = sdata->local;
1417 int res, rates, i, j;
1418 struct sk_buff *skb;
1419 struct ieee80211_mgmt *mgmt;
1420 u8 *pos;
1421 struct ieee80211_supported_band *sband;
1422 union iwreq_data wrqu;
1423
1424 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
1425 if (!skb) {
1426 printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
1427 "response\n", sdata->dev->name);
1428 return -ENOMEM;
1429 }
1430
1431 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
1432
1433 /* Remove possible STA entries from other IBSS networks. */
1434 sta_info_flush_delayed(sdata);
1435
1436 if (local->ops->reset_tsf) {
1437 /* Reset own TSF to allow time synchronization work. */
1438 local->ops->reset_tsf(local_to_hw(local));
1439 }
1440 memcpy(ifsta->bssid, bss->bssid, ETH_ALEN);
1441 res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID);
1442 if (res)
1443 return res;
1444
1445 local->hw.conf.beacon_int = bss->beacon_int >= 10 ? bss->beacon_int : 10;
1446
1447 sdata->drop_unencrypted = bss->capability &
1448 WLAN_CAPABILITY_PRIVACY ? 1 : 0;
1449
1450 res = ieee80211_set_freq(sdata, bss->freq);
1451
1452 if (res)
1453 return res;
1454
1455 /* Build IBSS probe response */
1456
1457 skb_reserve(skb, local->hw.extra_tx_headroom);
1458
1459 mgmt = (struct ieee80211_mgmt *)
1460 skb_put(skb, 24 + sizeof(mgmt->u.beacon));
1461 memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon));
1462 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
1463 IEEE80211_STYPE_PROBE_RESP);
1464 memset(mgmt->da, 0xff, ETH_ALEN);
1465 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
1466 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
1467 mgmt->u.beacon.beacon_int =
1468 cpu_to_le16(local->hw.conf.beacon_int);
1469 mgmt->u.beacon.timestamp = cpu_to_le64(bss->timestamp);
1470 mgmt->u.beacon.capab_info = cpu_to_le16(bss->capability);
1471
1472 pos = skb_put(skb, 2 + ifsta->ssid_len);
1473 *pos++ = WLAN_EID_SSID;
1474 *pos++ = ifsta->ssid_len;
1475 memcpy(pos, ifsta->ssid, ifsta->ssid_len);
1476
1477 rates = bss->supp_rates_len;
1478 if (rates > 8)
1479 rates = 8;
1480 pos = skb_put(skb, 2 + rates);
1481 *pos++ = WLAN_EID_SUPP_RATES;
1482 *pos++ = rates;
1483 memcpy(pos, bss->supp_rates, rates);
1484
1485 if (bss->band == IEEE80211_BAND_2GHZ) {
1486 pos = skb_put(skb, 2 + 1);
1487 *pos++ = WLAN_EID_DS_PARAMS;
1488 *pos++ = 1;
1489 *pos++ = ieee80211_frequency_to_channel(bss->freq);
1490 }
1491
1492 pos = skb_put(skb, 2 + 2);
1493 *pos++ = WLAN_EID_IBSS_PARAMS;
1494 *pos++ = 2;
1495 /* FIX: set ATIM window based on scan results */
1496 *pos++ = 0;
1497 *pos++ = 0;
1498
1499 if (bss->supp_rates_len > 8) {
1500 rates = bss->supp_rates_len - 8;
1501 pos = skb_put(skb, 2 + rates);
1502 *pos++ = WLAN_EID_EXT_SUPP_RATES;
1503 *pos++ = rates;
1504 memcpy(pos, &bss->supp_rates[8], rates);
1505 }
1506
1507 ifsta->probe_resp = skb;
1508
1509 ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
1510
1511
1512 rates = 0;
1513 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
1514 for (i = 0; i < bss->supp_rates_len; i++) {
1515 int bitrate = (bss->supp_rates[i] & 0x7f) * 5;
1516 for (j = 0; j < sband->n_bitrates; j++)
1517 if (sband->bitrates[j].bitrate == bitrate)
1518 rates |= BIT(j);
1519 }
1520 ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates;
1521
1522 ieee80211_sta_def_wmm_params(sdata, bss);
1523
1524 ifsta->state = IEEE80211_STA_MLME_IBSS_JOINED;
1525 mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
1526
1527 ieee80211_led_assoc(local, true);
1528
1529 memset(&wrqu, 0, sizeof(wrqu));
1530 memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN);
1531 wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL);
1532
1533 return res;
1534}
1535
1536static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, 1356static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
1537 struct ieee80211_mgmt *mgmt, 1357 struct ieee80211_mgmt *mgmt,
1538 size_t len, 1358 size_t len,
@@ -1543,11 +1363,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
1543 struct ieee80211_local *local = sdata->local; 1363 struct ieee80211_local *local = sdata->local;
1544 int freq; 1364 int freq;
1545 struct ieee80211_bss *bss; 1365 struct ieee80211_bss *bss;
1546 struct sta_info *sta;
1547 struct ieee80211_channel *channel; 1366 struct ieee80211_channel *channel;
1548 u64 beacon_timestamp, rx_timestamp;
1549 u64 supp_rates = 0;
1550 enum ieee80211_band band = rx_status->band;
1551 1367
1552 if (elems->ds_params && elems->ds_params_len == 1) 1368 if (elems->ds_params && elems->ds_params_len == 1)
1553 freq = ieee80211_channel_to_frequency(elems->ds_params[0]); 1369 freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
@@ -1559,112 +1375,16 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
1559 if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) 1375 if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
1560 return; 1376 return;
1561 1377
1562 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates &&
1563 memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) {
1564 supp_rates = ieee80211_sta_get_rates(local, elems, band);
1565
1566 rcu_read_lock();
1567
1568 sta = sta_info_get(local, mgmt->sa);
1569 if (sta) {
1570 u64 prev_rates;
1571
1572 prev_rates = sta->sta.supp_rates[band];
1573 /* make sure mandatory rates are always added */
1574 sta->sta.supp_rates[band] = supp_rates |
1575 ieee80211_mandatory_rates(local, band);
1576
1577#ifdef CONFIG_MAC80211_IBSS_DEBUG
1578 if (sta->sta.supp_rates[band] != prev_rates)
1579 printk(KERN_DEBUG "%s: updated supp_rates set "
1580 "for %pM based on beacon info (0x%llx | "
1581 "0x%llx -> 0x%llx)\n",
1582 sdata->dev->name,
1583 sta->sta.addr,
1584 (unsigned long long) prev_rates,
1585 (unsigned long long) supp_rates,
1586 (unsigned long long) sta->sta.supp_rates[band]);
1587#endif
1588 } else {
1589 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates);
1590 }
1591
1592 rcu_read_unlock();
1593 }
1594
1595 bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, 1378 bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
1596 freq, beacon); 1379 channel, beacon);
1597 if (!bss) 1380 if (!bss)
1598 return; 1381 return;
1599 1382
1600 /* was just updated in ieee80211_bss_info_update */ 1383 if (elems->ch_switch_elem && (elems->ch_switch_elem_len == 3) &&
1601 beacon_timestamp = bss->timestamp; 1384 (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN) == 0)) {
1602 1385 struct ieee80211_channel_sw_ie *sw_elem =
1603 /* 1386 (struct ieee80211_channel_sw_ie *)elems->ch_switch_elem;
1604 * In STA mode, the remaining parameters should not be overridden 1387 ieee80211_process_chanswitch(sdata, sw_elem, bss);
1605 * by beacons because they're not necessarily accurate there.
1606 */
1607 if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
1608 bss->last_probe_resp && beacon) {
1609 ieee80211_rx_bss_put(local, bss);
1610 return;
1611 }
1612
1613 /* check if we need to merge IBSS */
1614 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && beacon &&
1615 bss->capability & WLAN_CAPABILITY_IBSS &&
1616 bss->freq == local->oper_channel->center_freq &&
1617 elems->ssid_len == sdata->u.sta.ssid_len &&
1618 memcmp(elems->ssid, sdata->u.sta.ssid,
1619 sdata->u.sta.ssid_len) == 0) {
1620 if (rx_status->flag & RX_FLAG_TSFT) {
1621 /* in order for correct IBSS merging we need mactime
1622 *
1623 * since mactime is defined as the time the first data
1624 * symbol of the frame hits the PHY, and the timestamp
1625 * of the beacon is defined as "the time that the data
1626 * symbol containing the first bit of the timestamp is
1627 * transmitted to the PHY plus the transmitting STA’s
1628 * delays through its local PHY from the MAC-PHY
1629 * interface to its interface with the WM"
1630 * (802.11 11.1.2) - equals the time this bit arrives at
1631 * the receiver - we have to take into account the
1632 * offset between the two.
1633 * e.g: at 1 MBit that means mactime is 192 usec earlier
1634 * (=24 bytes * 8 usecs/byte) than the beacon timestamp.
1635 */
1636 int rate;
1637 if (rx_status->flag & RX_FLAG_HT) {
1638 rate = 65; /* TODO: HT rates */
1639 } else {
1640 rate = local->hw.wiphy->bands[band]->
1641 bitrates[rx_status->rate_idx].bitrate;
1642 }
1643 rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate);
1644 } else if (local && local->ops && local->ops->get_tsf)
1645 /* second best option: get current TSF */
1646 rx_timestamp = local->ops->get_tsf(local_to_hw(local));
1647 else
1648 /* can't merge without knowing the TSF */
1649 rx_timestamp = -1LLU;
1650#ifdef CONFIG_MAC80211_IBSS_DEBUG
1651 printk(KERN_DEBUG "RX beacon SA=%pM BSSID="
1652 "%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n",
1653 mgmt->sa, mgmt->bssid,
1654 (unsigned long long)rx_timestamp,
1655 (unsigned long long)beacon_timestamp,
1656 (unsigned long long)(rx_timestamp - beacon_timestamp),
1657 jiffies);
1658#endif /* CONFIG_MAC80211_IBSS_DEBUG */
1659 if (beacon_timestamp > rx_timestamp) {
1660#ifdef CONFIG_MAC80211_IBSS_DEBUG
1661 printk(KERN_DEBUG "%s: beacon TSF higher than "
1662 "local TSF - IBSS merge with BSSID %pM\n",
1663 sdata->dev->name, mgmt->bssid);
1664#endif
1665 ieee80211_sta_join_ibss(sdata, &sdata->u.sta, bss);
1666 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates);
1667 }
1668 } 1388 }
1669 1389
1670 ieee80211_rx_bss_put(local, bss); 1390 ieee80211_rx_bss_put(local, bss);
@@ -1678,7 +1398,6 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
1678{ 1398{
1679 size_t baselen; 1399 size_t baselen;
1680 struct ieee802_11_elems elems; 1400 struct ieee802_11_elems elems;
1681 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
1682 1401
1683 if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN)) 1402 if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN))
1684 return; /* ignore ProbeResp to foreign address */ 1403 return; /* ignore ProbeResp to foreign address */
@@ -1694,25 +1413,24 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
1694 1413
1695 /* direct probe may be part of the association flow */ 1414 /* direct probe may be part of the association flow */
1696 if (test_and_clear_bit(IEEE80211_STA_REQ_DIRECT_PROBE, 1415 if (test_and_clear_bit(IEEE80211_STA_REQ_DIRECT_PROBE,
1697 &ifsta->request)) { 1416 &sdata->u.mgd.request)) {
1698 printk(KERN_DEBUG "%s direct probe responded\n", 1417 printk(KERN_DEBUG "%s direct probe responded\n",
1699 sdata->dev->name); 1418 sdata->dev->name);
1700 ieee80211_authenticate(sdata, ifsta); 1419 ieee80211_authenticate(sdata);
1701 } 1420 }
1702} 1421}
1703 1422
1704
1705static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, 1423static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1706 struct ieee80211_mgmt *mgmt, 1424 struct ieee80211_mgmt *mgmt,
1707 size_t len, 1425 size_t len,
1708 struct ieee80211_rx_status *rx_status) 1426 struct ieee80211_rx_status *rx_status)
1709{ 1427{
1710 struct ieee80211_if_sta *ifsta; 1428 struct ieee80211_if_managed *ifmgd;
1711 size_t baselen; 1429 size_t baselen;
1712 struct ieee802_11_elems elems; 1430 struct ieee802_11_elems elems;
1713 struct ieee80211_local *local = sdata->local; 1431 struct ieee80211_local *local = sdata->local;
1714 u32 changed = 0; 1432 u32 changed = 0;
1715 bool erp_valid; 1433 bool erp_valid, directed_tim;
1716 u8 erp_value = 0; 1434 u8 erp_value = 0;
1717 1435
1718 /* Process beacon from the current BSS */ 1436 /* Process beacon from the current BSS */
@@ -1726,15 +1444,44 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1726 1444
1727 if (sdata->vif.type != NL80211_IFTYPE_STATION) 1445 if (sdata->vif.type != NL80211_IFTYPE_STATION)
1728 return; 1446 return;
1729 ifsta = &sdata->u.sta;
1730 1447
1731 if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED) || 1448 ifmgd = &sdata->u.mgd;
1732 memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) 1449
1450 if (!(ifmgd->flags & IEEE80211_STA_ASSOCIATED) ||
1451 memcmp(ifmgd->bssid, mgmt->bssid, ETH_ALEN) != 0)
1733 return; 1452 return;
1734 1453
1735 ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, 1454 if (rx_status->freq != local->hw.conf.channel->center_freq)
1455 return;
1456
1457 ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param,
1736 elems.wmm_param_len); 1458 elems.wmm_param_len);
1737 1459
1460 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK &&
1461 local->hw.conf.flags & IEEE80211_CONF_PS) {
1462 directed_tim = ieee80211_check_tim(&elems, ifmgd->aid);
1463
1464 if (directed_tim) {
1465 if (local->hw.conf.dynamic_ps_timeout > 0) {
1466 local->hw.conf.flags &= ~IEEE80211_CONF_PS;
1467 ieee80211_hw_config(local,
1468 IEEE80211_CONF_CHANGE_PS);
1469 ieee80211_send_nullfunc(local, sdata, 0);
1470 } else {
1471 local->pspolling = true;
1472
1473 /*
1474 * Here is assumed that the driver will be
1475 * able to send ps-poll frame and receive a
1476 * response even though power save mode is
1477 * enabled, but some drivers might require
1478 * to disable power save here. This needs
1479 * to be investigated.
1480 */
1481 ieee80211_send_pspoll(local, sdata);
1482 }
1483 }
1484 }
1738 1485
1739 if (elems.erp_info && elems.erp_info_len >= 1) { 1486 if (elems.erp_info && elems.erp_info_len >= 1) {
1740 erp_valid = true; 1487 erp_valid = true;
@@ -1747,14 +1494,15 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1747 erp_valid, erp_value); 1494 erp_valid, erp_value);
1748 1495
1749 1496
1750 if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param) { 1497 if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param &&
1498 !(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED)) {
1751 struct sta_info *sta; 1499 struct sta_info *sta;
1752 struct ieee80211_supported_band *sband; 1500 struct ieee80211_supported_band *sband;
1753 u16 ap_ht_cap_flags; 1501 u16 ap_ht_cap_flags;
1754 1502
1755 rcu_read_lock(); 1503 rcu_read_lock();
1756 1504
1757 sta = sta_info_get(local, ifsta->bssid); 1505 sta = sta_info_get(local, ifmgd->bssid);
1758 if (!sta) { 1506 if (!sta) {
1759 rcu_read_unlock(); 1507 rcu_read_unlock();
1760 return; 1508 return;
@@ -1778,92 +1526,28 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1778 * for the BSSID we are associated to */ 1526 * for the BSSID we are associated to */
1779 regulatory_hint_11d(local->hw.wiphy, 1527 regulatory_hint_11d(local->hw.wiphy,
1780 elems.country_elem, elems.country_elem_len); 1528 elems.country_elem, elems.country_elem_len);
1781 }
1782
1783 ieee80211_bss_info_change_notify(sdata, changed);
1784}
1785
1786
1787static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
1788 struct ieee80211_if_sta *ifsta,
1789 struct ieee80211_mgmt *mgmt,
1790 size_t len,
1791 struct ieee80211_rx_status *rx_status)
1792{
1793 struct ieee80211_local *local = sdata->local;
1794 int tx_last_beacon;
1795 struct sk_buff *skb;
1796 struct ieee80211_mgmt *resp;
1797 u8 *pos, *end;
1798
1799 if (sdata->vif.type != NL80211_IFTYPE_ADHOC ||
1800 ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED ||
1801 len < 24 + 2 || !ifsta->probe_resp)
1802 return;
1803
1804 if (local->ops->tx_last_beacon)
1805 tx_last_beacon = local->ops->tx_last_beacon(local_to_hw(local));
1806 else
1807 tx_last_beacon = 1;
1808
1809#ifdef CONFIG_MAC80211_IBSS_DEBUG
1810 printk(KERN_DEBUG "%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM"
1811 " (tx_last_beacon=%d)\n",
1812 sdata->dev->name, mgmt->sa, mgmt->da,
1813 mgmt->bssid, tx_last_beacon);
1814#endif /* CONFIG_MAC80211_IBSS_DEBUG */
1815
1816 if (!tx_last_beacon)
1817 return;
1818
1819 if (memcmp(mgmt->bssid, ifsta->bssid, ETH_ALEN) != 0 &&
1820 memcmp(mgmt->bssid, "\xff\xff\xff\xff\xff\xff", ETH_ALEN) != 0)
1821 return;
1822 1529
1823 end = ((u8 *) mgmt) + len; 1530 /* TODO: IBSS also needs this */
1824 pos = mgmt->u.probe_req.variable; 1531 if (elems.pwr_constr_elem)
1825 if (pos[0] != WLAN_EID_SSID || 1532 ieee80211_handle_pwr_constr(sdata,
1826 pos + 2 + pos[1] > end) { 1533 le16_to_cpu(mgmt->u.probe_resp.capab_info),
1827#ifdef CONFIG_MAC80211_IBSS_DEBUG 1534 elems.pwr_constr_elem,
1828 printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq " 1535 elems.pwr_constr_elem_len);
1829 "from %pM\n",
1830 sdata->dev->name, mgmt->sa);
1831#endif
1832 return;
1833 }
1834 if (pos[1] != 0 &&
1835 (pos[1] != ifsta->ssid_len ||
1836 memcmp(pos + 2, ifsta->ssid, ifsta->ssid_len) != 0)) {
1837 /* Ignore ProbeReq for foreign SSID */
1838 return;
1839 } 1536 }
1840 1537
1841 /* Reply with ProbeResp */ 1538 ieee80211_bss_info_change_notify(sdata, changed);
1842 skb = skb_copy(ifsta->probe_resp, GFP_KERNEL);
1843 if (!skb)
1844 return;
1845
1846 resp = (struct ieee80211_mgmt *) skb->data;
1847 memcpy(resp->da, mgmt->sa, ETH_ALEN);
1848#ifdef CONFIG_MAC80211_IBSS_DEBUG
1849 printk(KERN_DEBUG "%s: Sending ProbeResp to %pM\n",
1850 sdata->dev->name, resp->da);
1851#endif /* CONFIG_MAC80211_IBSS_DEBUG */
1852 ieee80211_tx_skb(sdata, skb, 0);
1853} 1539}
1854 1540
1855void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, 1541ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
1856 struct ieee80211_rx_status *rx_status) 1542 struct sk_buff *skb,
1543 struct ieee80211_rx_status *rx_status)
1857{ 1544{
1858 struct ieee80211_local *local = sdata->local; 1545 struct ieee80211_local *local = sdata->local;
1859 struct ieee80211_if_sta *ifsta;
1860 struct ieee80211_mgmt *mgmt; 1546 struct ieee80211_mgmt *mgmt;
1861 u16 fc; 1547 u16 fc;
1862 1548
1863 if (skb->len < 24) 1549 if (skb->len < 24)
1864 goto fail; 1550 return RX_DROP_MONITOR;
1865
1866 ifsta = &sdata->u.sta;
1867 1551
1868 mgmt = (struct ieee80211_mgmt *) skb->data; 1552 mgmt = (struct ieee80211_mgmt *) skb->data;
1869 fc = le16_to_cpu(mgmt->frame_control); 1553 fc = le16_to_cpu(mgmt->frame_control);
@@ -1878,113 +1562,68 @@ void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *
1878 case IEEE80211_STYPE_REASSOC_RESP: 1562 case IEEE80211_STYPE_REASSOC_RESP:
1879 case IEEE80211_STYPE_DEAUTH: 1563 case IEEE80211_STYPE_DEAUTH:
1880 case IEEE80211_STYPE_DISASSOC: 1564 case IEEE80211_STYPE_DISASSOC:
1881 skb_queue_tail(&ifsta->skb_queue, skb); 1565 skb_queue_tail(&sdata->u.mgd.skb_queue, skb);
1882 queue_work(local->hw.workqueue, &ifsta->work); 1566 queue_work(local->hw.workqueue, &sdata->u.mgd.work);
1883 return; 1567 return RX_QUEUED;
1884 } 1568 }
1885 1569
1886 fail: 1570 return RX_DROP_MONITOR;
1887 kfree_skb(skb);
1888} 1571}
1889 1572
1890static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 1573static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1891 struct sk_buff *skb) 1574 struct sk_buff *skb)
1892{ 1575{
1893 struct ieee80211_rx_status *rx_status; 1576 struct ieee80211_rx_status *rx_status;
1894 struct ieee80211_if_sta *ifsta;
1895 struct ieee80211_mgmt *mgmt; 1577 struct ieee80211_mgmt *mgmt;
1896 u16 fc; 1578 u16 fc;
1897 1579
1898 ifsta = &sdata->u.sta;
1899
1900 rx_status = (struct ieee80211_rx_status *) skb->cb; 1580 rx_status = (struct ieee80211_rx_status *) skb->cb;
1901 mgmt = (struct ieee80211_mgmt *) skb->data; 1581 mgmt = (struct ieee80211_mgmt *) skb->data;
1902 fc = le16_to_cpu(mgmt->frame_control); 1582 fc = le16_to_cpu(mgmt->frame_control);
1903 1583
1904 switch (fc & IEEE80211_FCTL_STYPE) { 1584 switch (fc & IEEE80211_FCTL_STYPE) {
1905 case IEEE80211_STYPE_PROBE_REQ:
1906 ieee80211_rx_mgmt_probe_req(sdata, ifsta, mgmt, skb->len,
1907 rx_status);
1908 break;
1909 case IEEE80211_STYPE_PROBE_RESP: 1585 case IEEE80211_STYPE_PROBE_RESP:
1910 ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, rx_status); 1586 ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len,
1587 rx_status);
1911 break; 1588 break;
1912 case IEEE80211_STYPE_BEACON: 1589 case IEEE80211_STYPE_BEACON:
1913 ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status); 1590 ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len,
1591 rx_status);
1914 break; 1592 break;
1915 case IEEE80211_STYPE_AUTH: 1593 case IEEE80211_STYPE_AUTH:
1916 ieee80211_rx_mgmt_auth(sdata, ifsta, mgmt, skb->len); 1594 ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len);
1917 break; 1595 break;
1918 case IEEE80211_STYPE_ASSOC_RESP: 1596 case IEEE80211_STYPE_ASSOC_RESP:
1919 ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 0); 1597 ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, 0);
1920 break; 1598 break;
1921 case IEEE80211_STYPE_REASSOC_RESP: 1599 case IEEE80211_STYPE_REASSOC_RESP:
1922 ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 1); 1600 ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, 1);
1923 break; 1601 break;
1924 case IEEE80211_STYPE_DEAUTH: 1602 case IEEE80211_STYPE_DEAUTH:
1925 ieee80211_rx_mgmt_deauth(sdata, ifsta, mgmt, skb->len); 1603 ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len);
1926 break; 1604 break;
1927 case IEEE80211_STYPE_DISASSOC: 1605 case IEEE80211_STYPE_DISASSOC:
1928 ieee80211_rx_mgmt_disassoc(sdata, ifsta, mgmt, skb->len); 1606 ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len);
1929 break; 1607 break;
1930 } 1608 }
1931 1609
1932 kfree_skb(skb); 1610 kfree_skb(skb);
1933} 1611}
1934 1612
1935
1936static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
1937{
1938 struct ieee80211_local *local = sdata->local;
1939 int active = 0;
1940 struct sta_info *sta;
1941
1942 rcu_read_lock();
1943
1944 list_for_each_entry_rcu(sta, &local->sta_list, list) {
1945 if (sta->sdata == sdata &&
1946 time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL,
1947 jiffies)) {
1948 active++;
1949 break;
1950 }
1951 }
1952
1953 rcu_read_unlock();
1954
1955 return active;
1956}
1957
1958
1959static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata,
1960 struct ieee80211_if_sta *ifsta)
1961{
1962 mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
1963
1964 ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT);
1965 if (ieee80211_sta_active_ibss(sdata))
1966 return;
1967
1968 printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other "
1969 "IBSS networks with same SSID (merge)\n", sdata->dev->name);
1970 ieee80211_request_scan(sdata, ifsta->ssid, ifsta->ssid_len);
1971}
1972
1973
1974static void ieee80211_sta_timer(unsigned long data) 1613static void ieee80211_sta_timer(unsigned long data)
1975{ 1614{
1976 struct ieee80211_sub_if_data *sdata = 1615 struct ieee80211_sub_if_data *sdata =
1977 (struct ieee80211_sub_if_data *) data; 1616 (struct ieee80211_sub_if_data *) data;
1978 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 1617 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1979 struct ieee80211_local *local = sdata->local; 1618 struct ieee80211_local *local = sdata->local;
1980 1619
1981 set_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); 1620 set_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request);
1982 queue_work(local->hw.workqueue, &ifsta->work); 1621 queue_work(local->hw.workqueue, &ifmgd->work);
1983} 1622}
1984 1623
1985static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata, 1624static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata)
1986 struct ieee80211_if_sta *ifsta)
1987{ 1625{
1626 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1988 struct ieee80211_local *local = sdata->local; 1627 struct ieee80211_local *local = sdata->local;
1989 1628
1990 if (local->ops->reset_tsf) { 1629 if (local->ops->reset_tsf) {
@@ -1992,298 +1631,106 @@ static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata,
1992 local->ops->reset_tsf(local_to_hw(local)); 1631 local->ops->reset_tsf(local_to_hw(local));
1993 } 1632 }
1994 1633
1995 ifsta->wmm_last_param_set = -1; /* allow any WMM update */ 1634 ifmgd->wmm_last_param_set = -1; /* allow any WMM update */
1996 1635
1997 1636
1998 if (ifsta->auth_algs & IEEE80211_AUTH_ALG_OPEN) 1637 if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_OPEN)
1999 ifsta->auth_alg = WLAN_AUTH_OPEN; 1638 ifmgd->auth_alg = WLAN_AUTH_OPEN;
2000 else if (ifsta->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY) 1639 else if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY)
2001 ifsta->auth_alg = WLAN_AUTH_SHARED_KEY; 1640 ifmgd->auth_alg = WLAN_AUTH_SHARED_KEY;
2002 else if (ifsta->auth_algs & IEEE80211_AUTH_ALG_LEAP) 1641 else if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_LEAP)
2003 ifsta->auth_alg = WLAN_AUTH_LEAP; 1642 ifmgd->auth_alg = WLAN_AUTH_LEAP;
2004 else 1643 else
2005 ifsta->auth_alg = WLAN_AUTH_OPEN; 1644 ifmgd->auth_alg = WLAN_AUTH_OPEN;
2006 ifsta->auth_transaction = -1; 1645 ifmgd->auth_transaction = -1;
2007 ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; 1646 ifmgd->flags &= ~IEEE80211_STA_ASSOCIATED;
2008 ifsta->assoc_scan_tries = 0; 1647 ifmgd->assoc_scan_tries = 0;
2009 ifsta->direct_probe_tries = 0; 1648 ifmgd->direct_probe_tries = 0;
2010 ifsta->auth_tries = 0; 1649 ifmgd->auth_tries = 0;
2011 ifsta->assoc_tries = 0; 1650 ifmgd->assoc_tries = 0;
2012 netif_tx_stop_all_queues(sdata->dev); 1651 netif_tx_stop_all_queues(sdata->dev);
2013 netif_carrier_off(sdata->dev); 1652 netif_carrier_off(sdata->dev);
2014} 1653}
2015 1654
2016 1655static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata)
2017static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta,
2018 const char *ssid, int ssid_len)
2019{
2020 int tmp, hidden_ssid;
2021
2022 if (ssid_len == ifsta->ssid_len &&
2023 !memcmp(ifsta->ssid, ssid, ssid_len))
2024 return 1;
2025
2026 if (ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL)
2027 return 0;
2028
2029 hidden_ssid = 1;
2030 tmp = ssid_len;
2031 while (tmp--) {
2032 if (ssid[tmp] != '\0') {
2033 hidden_ssid = 0;
2034 break;
2035 }
2036 }
2037
2038 if (hidden_ssid && (ifsta->ssid_len == ssid_len || ssid_len == 0))
2039 return 1;
2040
2041 if (ssid_len == 1 && ssid[0] == ' ')
2042 return 1;
2043
2044 return 0;
2045}
2046
2047static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata,
2048 struct ieee80211_if_sta *ifsta)
2049{ 1656{
1657 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2050 struct ieee80211_local *local = sdata->local; 1658 struct ieee80211_local *local = sdata->local;
2051 struct ieee80211_bss *bss; 1659 struct ieee80211_bss *bss;
2052 struct ieee80211_supported_band *sband; 1660 u8 *bssid = ifmgd->bssid, *ssid = ifmgd->ssid;
2053 u8 bssid[ETH_ALEN], *pos; 1661 u8 ssid_len = ifmgd->ssid_len;
2054 int i; 1662 u16 capa_mask = WLAN_CAPABILITY_ESS;
2055 int ret; 1663 u16 capa_val = WLAN_CAPABILITY_ESS;
2056 1664 struct ieee80211_channel *chan = local->oper_channel;
2057#if 0
2058 /* Easier testing, use fixed BSSID. */
2059 memset(bssid, 0xfe, ETH_ALEN);
2060#else
2061 /* Generate random, not broadcast, locally administered BSSID. Mix in
2062 * own MAC address to make sure that devices that do not have proper
2063 * random number generator get different BSSID. */
2064 get_random_bytes(bssid, ETH_ALEN);
2065 for (i = 0; i < ETH_ALEN; i++)
2066 bssid[i] ^= sdata->dev->dev_addr[i];
2067 bssid[0] &= ~0x01;
2068 bssid[0] |= 0x02;
2069#endif
2070
2071 printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n",
2072 sdata->dev->name, bssid);
2073
2074 bss = ieee80211_rx_bss_add(local, bssid,
2075 local->hw.conf.channel->center_freq,
2076 sdata->u.sta.ssid, sdata->u.sta.ssid_len);
2077 if (!bss)
2078 return -ENOMEM;
2079
2080 bss->band = local->hw.conf.channel->band;
2081 sband = local->hw.wiphy->bands[bss->band];
2082
2083 if (local->hw.conf.beacon_int == 0)
2084 local->hw.conf.beacon_int = 100;
2085 bss->beacon_int = local->hw.conf.beacon_int;
2086 bss->last_update = jiffies;
2087 bss->capability = WLAN_CAPABILITY_IBSS;
2088 1665
2089 if (sdata->default_key) 1666 if (ifmgd->flags & (IEEE80211_STA_AUTO_SSID_SEL |
2090 bss->capability |= WLAN_CAPABILITY_PRIVACY; 1667 IEEE80211_STA_AUTO_BSSID_SEL |
2091 else 1668 IEEE80211_STA_AUTO_CHANNEL_SEL)) {
2092 sdata->drop_unencrypted = 0; 1669 capa_mask |= WLAN_CAPABILITY_PRIVACY;
2093 1670 if (sdata->default_key)
2094 bss->supp_rates_len = sband->n_bitrates; 1671 capa_val |= WLAN_CAPABILITY_PRIVACY;
2095 pos = bss->supp_rates;
2096 for (i = 0; i < sband->n_bitrates; i++) {
2097 int rate = sband->bitrates[i].bitrate;
2098 *pos++ = (u8) (rate / 5);
2099 } 1672 }
2100 1673
2101 ret = ieee80211_sta_join_ibss(sdata, ifsta, bss); 1674 if (ifmgd->flags & IEEE80211_STA_AUTO_CHANNEL_SEL)
2102 ieee80211_rx_bss_put(local, bss); 1675 chan = NULL;
2103 return ret;
2104}
2105
2106
2107static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata,
2108 struct ieee80211_if_sta *ifsta)
2109{
2110 struct ieee80211_local *local = sdata->local;
2111 struct ieee80211_bss *bss;
2112 int found = 0;
2113 u8 bssid[ETH_ALEN];
2114 int active_ibss;
2115 1676
2116 if (ifsta->ssid_len == 0) 1677 if (ifmgd->flags & IEEE80211_STA_AUTO_BSSID_SEL)
2117 return -EINVAL; 1678 bssid = NULL;
2118 1679
2119 active_ibss = ieee80211_sta_active_ibss(sdata); 1680 if (ifmgd->flags & IEEE80211_STA_AUTO_SSID_SEL) {
2120#ifdef CONFIG_MAC80211_IBSS_DEBUG 1681 ssid = NULL;
2121 printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n", 1682 ssid_len = 0;
2122 sdata->dev->name, active_ibss);
2123#endif /* CONFIG_MAC80211_IBSS_DEBUG */
2124 spin_lock_bh(&local->bss_lock);
2125 list_for_each_entry(bss, &local->bss_list, list) {
2126 if (ifsta->ssid_len != bss->ssid_len ||
2127 memcmp(ifsta->ssid, bss->ssid, bss->ssid_len) != 0
2128 || !(bss->capability & WLAN_CAPABILITY_IBSS))
2129 continue;
2130#ifdef CONFIG_MAC80211_IBSS_DEBUG
2131 printk(KERN_DEBUG " bssid=%pM found\n", bss->bssid);
2132#endif /* CONFIG_MAC80211_IBSS_DEBUG */
2133 memcpy(bssid, bss->bssid, ETH_ALEN);
2134 found = 1;
2135 if (active_ibss || memcmp(bssid, ifsta->bssid, ETH_ALEN) != 0)
2136 break;
2137 } 1683 }
2138 spin_unlock_bh(&local->bss_lock);
2139
2140#ifdef CONFIG_MAC80211_IBSS_DEBUG
2141 if (found)
2142 printk(KERN_DEBUG " sta_find_ibss: selected %pM current "
2143 "%pM\n", bssid, ifsta->bssid);
2144#endif /* CONFIG_MAC80211_IBSS_DEBUG */
2145 1684
2146 if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { 1685 bss = (void *)cfg80211_get_bss(local->hw.wiphy, chan,
2147 int ret; 1686 bssid, ssid, ssid_len,
2148 int search_freq; 1687 capa_mask, capa_val);
2149 1688
2150 if (ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) 1689 if (bss) {
2151 search_freq = bss->freq; 1690 ieee80211_set_freq(sdata, bss->cbss.channel->center_freq);
1691 if (!(ifmgd->flags & IEEE80211_STA_SSID_SET))
1692 ieee80211_sta_set_ssid(sdata, bss->ssid,
1693 bss->ssid_len);
1694 ieee80211_sta_set_bssid(sdata, bss->cbss.bssid);
1695 ieee80211_sta_def_wmm_params(sdata, bss->supp_rates_len,
1696 bss->supp_rates);
1697 if (sdata->u.mgd.mfp == IEEE80211_MFP_REQUIRED)
1698 sdata->u.mgd.flags |= IEEE80211_STA_MFP_ENABLED;
2152 else 1699 else
2153 search_freq = local->hw.conf.channel->center_freq; 1700 sdata->u.mgd.flags &= ~IEEE80211_STA_MFP_ENABLED;
2154
2155 bss = ieee80211_rx_bss_get(local, bssid, search_freq,
2156 ifsta->ssid, ifsta->ssid_len);
2157 if (!bss)
2158 goto dont_join;
2159
2160 printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM"
2161 " based on configured SSID\n",
2162 sdata->dev->name, bssid);
2163 ret = ieee80211_sta_join_ibss(sdata, ifsta, bss);
2164 ieee80211_rx_bss_put(local, bss);
2165 return ret;
2166 }
2167
2168dont_join:
2169#ifdef CONFIG_MAC80211_IBSS_DEBUG
2170 printk(KERN_DEBUG " did not try to join ibss\n");
2171#endif /* CONFIG_MAC80211_IBSS_DEBUG */
2172
2173 /* Selected IBSS not found in current scan results - try to scan */
2174 if (ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED &&
2175 !ieee80211_sta_active_ibss(sdata)) {
2176 mod_timer(&ifsta->timer, jiffies +
2177 IEEE80211_IBSS_MERGE_INTERVAL);
2178 } else if (time_after(jiffies, local->last_scan_completed +
2179 IEEE80211_SCAN_INTERVAL)) {
2180 printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to "
2181 "join\n", sdata->dev->name);
2182 return ieee80211_request_scan(sdata, ifsta->ssid,
2183 ifsta->ssid_len);
2184 } else if (ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED) {
2185 int interval = IEEE80211_SCAN_INTERVAL;
2186
2187 if (time_after(jiffies, ifsta->ibss_join_req +
2188 IEEE80211_IBSS_JOIN_TIMEOUT)) {
2189 if ((ifsta->flags & IEEE80211_STA_CREATE_IBSS) &&
2190 (!(local->oper_channel->flags &
2191 IEEE80211_CHAN_NO_IBSS)))
2192 return ieee80211_sta_create_ibss(sdata, ifsta);
2193 if (ifsta->flags & IEEE80211_STA_CREATE_IBSS) {
2194 printk(KERN_DEBUG "%s: IBSS not allowed on"
2195 " %d MHz\n", sdata->dev->name,
2196 local->hw.conf.channel->center_freq);
2197 }
2198
2199 /* No IBSS found - decrease scan interval and continue
2200 * scanning. */
2201 interval = IEEE80211_SCAN_INTERVAL_SLOW;
2202 }
2203
2204 ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH;
2205 mod_timer(&ifsta->timer, jiffies + interval);
2206 return 0;
2207 }
2208
2209 return 0;
2210}
2211
2212
2213static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
2214 struct ieee80211_if_sta *ifsta)
2215{
2216 struct ieee80211_local *local = sdata->local;
2217 struct ieee80211_bss *bss, *selected = NULL;
2218 int top_rssi = 0, freq;
2219
2220 spin_lock_bh(&local->bss_lock);
2221 freq = local->oper_channel->center_freq;
2222 list_for_each_entry(bss, &local->bss_list, list) {
2223 if (!(bss->capability & WLAN_CAPABILITY_ESS))
2224 continue;
2225
2226 if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
2227 IEEE80211_STA_AUTO_BSSID_SEL |
2228 IEEE80211_STA_AUTO_CHANNEL_SEL)) &&
2229 (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^
2230 !!sdata->default_key))
2231 continue;
2232
2233 if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) &&
2234 bss->freq != freq)
2235 continue;
2236
2237 if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) &&
2238 memcmp(bss->bssid, ifsta->bssid, ETH_ALEN))
2239 continue;
2240
2241 if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) &&
2242 !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len))
2243 continue;
2244
2245 if (!selected || top_rssi < bss->signal) {
2246 selected = bss;
2247 top_rssi = bss->signal;
2248 }
2249 }
2250 if (selected)
2251 atomic_inc(&selected->users);
2252 spin_unlock_bh(&local->bss_lock);
2253
2254 if (selected) {
2255 ieee80211_set_freq(sdata, selected->freq);
2256 if (!(ifsta->flags & IEEE80211_STA_SSID_SET))
2257 ieee80211_sta_set_ssid(sdata, selected->ssid,
2258 selected->ssid_len);
2259 ieee80211_sta_set_bssid(sdata, selected->bssid);
2260 ieee80211_sta_def_wmm_params(sdata, selected);
2261 1701
2262 /* Send out direct probe if no probe resp was received or 1702 /* Send out direct probe if no probe resp was received or
2263 * the one we have is outdated 1703 * the one we have is outdated
2264 */ 1704 */
2265 if (!selected->last_probe_resp || 1705 if (!bss->last_probe_resp ||
2266 time_after(jiffies, selected->last_probe_resp 1706 time_after(jiffies, bss->last_probe_resp
2267 + IEEE80211_SCAN_RESULT_EXPIRE)) 1707 + IEEE80211_SCAN_RESULT_EXPIRE))
2268 ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; 1708 ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE;
2269 else 1709 else
2270 ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; 1710 ifmgd->state = IEEE80211_STA_MLME_AUTHENTICATE;
2271 1711
2272 ieee80211_rx_bss_put(local, selected); 1712 ieee80211_rx_bss_put(local, bss);
2273 ieee80211_sta_reset_auth(sdata, ifsta); 1713 ieee80211_sta_reset_auth(sdata);
2274 return 0; 1714 return 0;
2275 } else { 1715 } else {
2276 if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) { 1716 if (ifmgd->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) {
2277 ifsta->assoc_scan_tries++; 1717 ifmgd->assoc_scan_tries++;
2278 if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) 1718 /* XXX maybe racy? */
2279 ieee80211_start_scan(sdata, NULL, 0); 1719 if (local->scan_req)
1720 return -1;
1721 memcpy(local->int_scan_req.ssids[0].ssid,
1722 ifmgd->ssid, IEEE80211_MAX_SSID_LEN);
1723 if (ifmgd->flags & IEEE80211_STA_AUTO_SSID_SEL)
1724 local->int_scan_req.ssids[0].ssid_len = 0;
2280 else 1725 else
2281 ieee80211_start_scan(sdata, ifsta->ssid, 1726 local->int_scan_req.ssids[0].ssid_len = ifmgd->ssid_len;
2282 ifsta->ssid_len); 1727 ieee80211_start_scan(sdata, &local->int_scan_req);
2283 ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; 1728 ifmgd->state = IEEE80211_STA_MLME_AUTHENTICATE;
2284 set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); 1729 set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request);
2285 } else 1730 } else {
2286 ifsta->state = IEEE80211_STA_MLME_DISABLED; 1731 ifmgd->assoc_scan_tries = 0;
1732 ifmgd->state = IEEE80211_STA_MLME_DISABLED;
1733 }
2287 } 1734 }
2288 return -1; 1735 return -1;
2289} 1736}
@@ -2292,9 +1739,9 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
2292static void ieee80211_sta_work(struct work_struct *work) 1739static void ieee80211_sta_work(struct work_struct *work)
2293{ 1740{
2294 struct ieee80211_sub_if_data *sdata = 1741 struct ieee80211_sub_if_data *sdata =
2295 container_of(work, struct ieee80211_sub_if_data, u.sta.work); 1742 container_of(work, struct ieee80211_sub_if_data, u.mgd.work);
2296 struct ieee80211_local *local = sdata->local; 1743 struct ieee80211_local *local = sdata->local;
2297 struct ieee80211_if_sta *ifsta; 1744 struct ieee80211_if_managed *ifmgd;
2298 struct sk_buff *skb; 1745 struct sk_buff *skb;
2299 1746
2300 if (!netif_running(sdata->dev)) 1747 if (!netif_running(sdata->dev))
@@ -2303,61 +1750,53 @@ static void ieee80211_sta_work(struct work_struct *work)
2303 if (local->sw_scanning || local->hw_scanning) 1750 if (local->sw_scanning || local->hw_scanning)
2304 return; 1751 return;
2305 1752
2306 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION && 1753 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
2307 sdata->vif.type != NL80211_IFTYPE_ADHOC))
2308 return; 1754 return;
2309 ifsta = &sdata->u.sta; 1755 ifmgd = &sdata->u.mgd;
2310 1756
2311 while ((skb = skb_dequeue(&ifsta->skb_queue))) 1757 while ((skb = skb_dequeue(&ifmgd->skb_queue)))
2312 ieee80211_sta_rx_queued_mgmt(sdata, skb); 1758 ieee80211_sta_rx_queued_mgmt(sdata, skb);
2313 1759
2314 if (ifsta->state != IEEE80211_STA_MLME_DIRECT_PROBE && 1760 if (ifmgd->state != IEEE80211_STA_MLME_DIRECT_PROBE &&
2315 ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && 1761 ifmgd->state != IEEE80211_STA_MLME_AUTHENTICATE &&
2316 ifsta->state != IEEE80211_STA_MLME_ASSOCIATE && 1762 ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE &&
2317 test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) { 1763 test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request)) {
2318 ieee80211_start_scan(sdata, ifsta->scan_ssid, 1764 ieee80211_start_scan(sdata, local->scan_req);
2319 ifsta->scan_ssid_len);
2320 return; 1765 return;
2321 } 1766 }
2322 1767
2323 if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) { 1768 if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request)) {
2324 if (ieee80211_sta_config_auth(sdata, ifsta)) 1769 if (ieee80211_sta_config_auth(sdata))
2325 return; 1770 return;
2326 clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); 1771 clear_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request);
2327 } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request)) 1772 } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request))
2328 return; 1773 return;
2329 1774
2330 switch (ifsta->state) { 1775 switch (ifmgd->state) {
2331 case IEEE80211_STA_MLME_DISABLED: 1776 case IEEE80211_STA_MLME_DISABLED:
2332 break; 1777 break;
2333 case IEEE80211_STA_MLME_DIRECT_PROBE: 1778 case IEEE80211_STA_MLME_DIRECT_PROBE:
2334 ieee80211_direct_probe(sdata, ifsta); 1779 ieee80211_direct_probe(sdata);
2335 break; 1780 break;
2336 case IEEE80211_STA_MLME_AUTHENTICATE: 1781 case IEEE80211_STA_MLME_AUTHENTICATE:
2337 ieee80211_authenticate(sdata, ifsta); 1782 ieee80211_authenticate(sdata);
2338 break; 1783 break;
2339 case IEEE80211_STA_MLME_ASSOCIATE: 1784 case IEEE80211_STA_MLME_ASSOCIATE:
2340 ieee80211_associate(sdata, ifsta); 1785 ieee80211_associate(sdata);
2341 break; 1786 break;
2342 case IEEE80211_STA_MLME_ASSOCIATED: 1787 case IEEE80211_STA_MLME_ASSOCIATED:
2343 ieee80211_associated(sdata, ifsta); 1788 ieee80211_associated(sdata);
2344 break;
2345 case IEEE80211_STA_MLME_IBSS_SEARCH:
2346 ieee80211_sta_find_ibss(sdata, ifsta);
2347 break;
2348 case IEEE80211_STA_MLME_IBSS_JOINED:
2349 ieee80211_sta_merge_ibss(sdata, ifsta);
2350 break; 1789 break;
2351 default: 1790 default:
2352 WARN_ON(1); 1791 WARN_ON(1);
2353 break; 1792 break;
2354 } 1793 }
2355 1794
2356 if (ieee80211_privacy_mismatch(sdata, ifsta)) { 1795 if (ieee80211_privacy_mismatch(sdata)) {
2357 printk(KERN_DEBUG "%s: privacy configuration mismatch and " 1796 printk(KERN_DEBUG "%s: privacy configuration mismatch and "
2358 "mixed-cell disabled - disassociate\n", sdata->dev->name); 1797 "mixed-cell disabled - disassociate\n", sdata->dev->name);
2359 1798
2360 ieee80211_set_disassoc(sdata, ifsta, false, true, 1799 ieee80211_set_disassoc(sdata, false, true,
2361 WLAN_REASON_UNSPECIFIED); 1800 WLAN_REASON_UNSPECIFIED);
2362 } 1801 }
2363} 1802}
@@ -2366,208 +1805,153 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
2366{ 1805{
2367 if (sdata->vif.type == NL80211_IFTYPE_STATION) 1806 if (sdata->vif.type == NL80211_IFTYPE_STATION)
2368 queue_work(sdata->local->hw.workqueue, 1807 queue_work(sdata->local->hw.workqueue,
2369 &sdata->u.sta.work); 1808 &sdata->u.mgd.work);
2370} 1809}
2371 1810
2372/* interface setup */ 1811/* interface setup */
2373void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) 1812void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
2374{ 1813{
2375 struct ieee80211_if_sta *ifsta; 1814 struct ieee80211_if_managed *ifmgd;
2376 1815
2377 ifsta = &sdata->u.sta; 1816 ifmgd = &sdata->u.mgd;
2378 INIT_WORK(&ifsta->work, ieee80211_sta_work); 1817 INIT_WORK(&ifmgd->work, ieee80211_sta_work);
2379 setup_timer(&ifsta->timer, ieee80211_sta_timer, 1818 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
1819 setup_timer(&ifmgd->timer, ieee80211_sta_timer,
2380 (unsigned long) sdata); 1820 (unsigned long) sdata);
2381 skb_queue_head_init(&ifsta->skb_queue); 1821 setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer,
1822 (unsigned long) sdata);
1823 skb_queue_head_init(&ifmgd->skb_queue);
2382 1824
2383 ifsta->capab = WLAN_CAPABILITY_ESS; 1825 ifmgd->capab = WLAN_CAPABILITY_ESS;
2384 ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN | 1826 ifmgd->auth_algs = IEEE80211_AUTH_ALG_OPEN |
2385 IEEE80211_AUTH_ALG_SHARED_KEY; 1827 IEEE80211_AUTH_ALG_SHARED_KEY;
2386 ifsta->flags |= IEEE80211_STA_CREATE_IBSS | 1828 ifmgd->flags |= IEEE80211_STA_CREATE_IBSS |
2387 IEEE80211_STA_AUTO_BSSID_SEL | 1829 IEEE80211_STA_AUTO_BSSID_SEL |
2388 IEEE80211_STA_AUTO_CHANNEL_SEL; 1830 IEEE80211_STA_AUTO_CHANNEL_SEL;
2389 if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4) 1831 if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4)
2390 ifsta->flags |= IEEE80211_STA_WMM_ENABLED; 1832 ifmgd->flags |= IEEE80211_STA_WMM_ENABLED;
2391}
2392
2393/*
2394 * Add a new IBSS station, will also be called by the RX code when,
2395 * in IBSS mode, receiving a frame from a yet-unknown station, hence
2396 * must be callable in atomic context.
2397 */
2398struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
2399 u8 *bssid,u8 *addr, u64 supp_rates)
2400{
2401 struct ieee80211_local *local = sdata->local;
2402 struct sta_info *sta;
2403 int band = local->hw.conf.channel->band;
2404
2405 /* TODO: Could consider removing the least recently used entry and
2406 * allow new one to be added. */
2407 if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) {
2408 if (net_ratelimit()) {
2409 printk(KERN_DEBUG "%s: No room for a new IBSS STA "
2410 "entry %pM\n", sdata->dev->name, addr);
2411 }
2412 return NULL;
2413 }
2414
2415 if (compare_ether_addr(bssid, sdata->u.sta.bssid))
2416 return NULL;
2417
2418#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
2419 printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n",
2420 wiphy_name(local->hw.wiphy), addr, sdata->dev->name);
2421#endif
2422
2423 sta = sta_info_alloc(sdata, addr, GFP_ATOMIC);
2424 if (!sta)
2425 return NULL;
2426
2427 set_sta_flags(sta, WLAN_STA_AUTHORIZED);
2428
2429 /* make sure mandatory rates are always added */
2430 sta->sta.supp_rates[band] = supp_rates |
2431 ieee80211_mandatory_rates(local, band);
2432
2433 rate_control_rate_init(sta);
2434
2435 if (sta_info_insert(sta))
2436 return NULL;
2437
2438 return sta;
2439} 1833}
2440 1834
2441/* configuration hooks */ 1835/* configuration hooks */
2442void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, 1836void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata)
2443 struct ieee80211_if_sta *ifsta)
2444{ 1837{
1838 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2445 struct ieee80211_local *local = sdata->local; 1839 struct ieee80211_local *local = sdata->local;
2446 1840
2447 if (sdata->vif.type != NL80211_IFTYPE_STATION) 1841 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
2448 return; 1842 return;
2449 1843
2450 if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | 1844 if ((ifmgd->flags & (IEEE80211_STA_BSSID_SET |
2451 IEEE80211_STA_AUTO_BSSID_SEL)) && 1845 IEEE80211_STA_AUTO_BSSID_SEL)) &&
2452 (ifsta->flags & (IEEE80211_STA_SSID_SET | 1846 (ifmgd->flags & (IEEE80211_STA_SSID_SET |
2453 IEEE80211_STA_AUTO_SSID_SEL))) { 1847 IEEE80211_STA_AUTO_SSID_SEL))) {
2454 1848
2455 if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) 1849 if (ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED)
2456 ieee80211_set_disassoc(sdata, ifsta, true, true, 1850 ieee80211_set_disassoc(sdata, true, true,
2457 WLAN_REASON_DEAUTH_LEAVING); 1851 WLAN_REASON_DEAUTH_LEAVING);
2458 1852
2459 set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); 1853 set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request);
2460 queue_work(local->hw.workqueue, &ifsta->work); 1854 queue_work(local->hw.workqueue, &ifmgd->work);
2461 } 1855 }
2462} 1856}
2463 1857
1858int ieee80211_sta_commit(struct ieee80211_sub_if_data *sdata)
1859{
1860 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1861
1862 ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET;
1863
1864 if (ifmgd->ssid_len)
1865 ifmgd->flags |= IEEE80211_STA_SSID_SET;
1866 else
1867 ifmgd->flags &= ~IEEE80211_STA_SSID_SET;
1868
1869 return 0;
1870}
1871
2464int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) 1872int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len)
2465{ 1873{
2466 struct ieee80211_if_sta *ifsta; 1874 struct ieee80211_if_managed *ifmgd;
2467 1875
2468 if (len > IEEE80211_MAX_SSID_LEN) 1876 if (len > IEEE80211_MAX_SSID_LEN)
2469 return -EINVAL; 1877 return -EINVAL;
2470 1878
2471 ifsta = &sdata->u.sta; 1879 ifmgd = &sdata->u.mgd;
2472
2473 if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) {
2474 memset(ifsta->ssid, 0, sizeof(ifsta->ssid));
2475 memcpy(ifsta->ssid, ssid, len);
2476 ifsta->ssid_len = len;
2477 ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET;
2478 }
2479
2480 if (len)
2481 ifsta->flags |= IEEE80211_STA_SSID_SET;
2482 else
2483 ifsta->flags &= ~IEEE80211_STA_SSID_SET;
2484 1880
2485 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && 1881 if (ifmgd->ssid_len != len || memcmp(ifmgd->ssid, ssid, len) != 0) {
2486 !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { 1882 memset(ifmgd->ssid, 0, sizeof(ifmgd->ssid));
2487 ifsta->ibss_join_req = jiffies; 1883 memcpy(ifmgd->ssid, ssid, len);
2488 ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH; 1884 ifmgd->ssid_len = len;
2489 return ieee80211_sta_find_ibss(sdata, ifsta);
2490 } 1885 }
2491 1886
2492 return 0; 1887 return ieee80211_sta_commit(sdata);
2493} 1888}
2494 1889
2495int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len) 1890int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len)
2496{ 1891{
2497 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 1892 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2498 memcpy(ssid, ifsta->ssid, ifsta->ssid_len); 1893 memcpy(ssid, ifmgd->ssid, ifmgd->ssid_len);
2499 *len = ifsta->ssid_len; 1894 *len = ifmgd->ssid_len;
2500 return 0; 1895 return 0;
2501} 1896}
2502 1897
2503int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) 1898int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid)
2504{ 1899{
2505 struct ieee80211_if_sta *ifsta; 1900 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2506 int res;
2507 1901
2508 ifsta = &sdata->u.sta; 1902 if (is_valid_ether_addr(bssid)) {
1903 memcpy(ifmgd->bssid, bssid, ETH_ALEN);
1904 ifmgd->flags |= IEEE80211_STA_BSSID_SET;
1905 } else {
1906 memset(ifmgd->bssid, 0, ETH_ALEN);
1907 ifmgd->flags &= ~IEEE80211_STA_BSSID_SET;
1908 }
2509 1909
2510 if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { 1910 if (netif_running(sdata->dev)) {
2511 memcpy(ifsta->bssid, bssid, ETH_ALEN); 1911 if (ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID)) {
2512 res = 0;
2513 /*
2514 * Hack! See also ieee80211_sta_set_ssid.
2515 */
2516 if (netif_running(sdata->dev))
2517 res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID);
2518 if (res) {
2519 printk(KERN_DEBUG "%s: Failed to config new BSSID to " 1912 printk(KERN_DEBUG "%s: Failed to config new BSSID to "
2520 "the low-level driver\n", sdata->dev->name); 1913 "the low-level driver\n", sdata->dev->name);
2521 return res;
2522 } 1914 }
2523 } 1915 }
2524 1916
2525 if (is_valid_ether_addr(bssid)) 1917 return ieee80211_sta_commit(sdata);
2526 ifsta->flags |= IEEE80211_STA_BSSID_SET;
2527 else
2528 ifsta->flags &= ~IEEE80211_STA_BSSID_SET;
2529
2530 return 0;
2531} 1918}
2532 1919
2533int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len) 1920int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len)
2534{ 1921{
2535 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 1922 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2536 1923
2537 kfree(ifsta->extra_ie); 1924 kfree(ifmgd->extra_ie);
2538 if (len == 0) { 1925 if (len == 0) {
2539 ifsta->extra_ie = NULL; 1926 ifmgd->extra_ie = NULL;
2540 ifsta->extra_ie_len = 0; 1927 ifmgd->extra_ie_len = 0;
2541 return 0; 1928 return 0;
2542 } 1929 }
2543 ifsta->extra_ie = kmalloc(len, GFP_KERNEL); 1930 ifmgd->extra_ie = kmalloc(len, GFP_KERNEL);
2544 if (!ifsta->extra_ie) { 1931 if (!ifmgd->extra_ie) {
2545 ifsta->extra_ie_len = 0; 1932 ifmgd->extra_ie_len = 0;
2546 return -ENOMEM; 1933 return -ENOMEM;
2547 } 1934 }
2548 memcpy(ifsta->extra_ie, ie, len); 1935 memcpy(ifmgd->extra_ie, ie, len);
2549 ifsta->extra_ie_len = len; 1936 ifmgd->extra_ie_len = len;
2550 return 0; 1937 return 0;
2551} 1938}
2552 1939
2553int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason) 1940int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason)
2554{ 1941{
2555 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
2556
2557 printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n", 1942 printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n",
2558 sdata->dev->name, reason); 1943 sdata->dev->name, reason);
2559 1944
2560 if (sdata->vif.type != NL80211_IFTYPE_STATION && 1945 if (sdata->vif.type != NL80211_IFTYPE_STATION)
2561 sdata->vif.type != NL80211_IFTYPE_ADHOC)
2562 return -EINVAL; 1946 return -EINVAL;
2563 1947
2564 ieee80211_set_disassoc(sdata, ifsta, true, true, reason); 1948 ieee80211_set_disassoc(sdata, true, true, reason);
2565 return 0; 1949 return 0;
2566} 1950}
2567 1951
2568int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason) 1952int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason)
2569{ 1953{
2570 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 1954 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2571 1955
2572 printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n", 1956 printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n",
2573 sdata->dev->name, reason); 1957 sdata->dev->name, reason);
@@ -2575,10 +1959,10 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason)
2575 if (sdata->vif.type != NL80211_IFTYPE_STATION) 1959 if (sdata->vif.type != NL80211_IFTYPE_STATION)
2576 return -EINVAL; 1960 return -EINVAL;
2577 1961
2578 if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED)) 1962 if (!(ifmgd->flags & IEEE80211_STA_ASSOCIATED))
2579 return -1; 1963 return -ENOLINK;
2580 1964
2581 ieee80211_set_disassoc(sdata, ifsta, false, true, reason); 1965 ieee80211_set_disassoc(sdata, false, true, reason);
2582 return 0; 1966 return 0;
2583} 1967}
2584 1968
@@ -2586,15 +1970,6 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason)
2586void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) 1970void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
2587{ 1971{
2588 struct ieee80211_sub_if_data *sdata = local->scan_sdata; 1972 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
2589 struct ieee80211_if_sta *ifsta;
2590
2591 if (sdata && sdata->vif.type == NL80211_IFTYPE_ADHOC) {
2592 ifsta = &sdata->u.sta;
2593 if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
2594 (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) &&
2595 !ieee80211_sta_active_ibss(sdata)))
2596 ieee80211_sta_find_ibss(sdata, ifsta);
2597 }
2598 1973
2599 /* Restart STA timers */ 1974 /* Restart STA timers */
2600 rcu_read_lock(); 1975 rcu_read_lock();
@@ -2623,12 +1998,15 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
2623 struct ieee80211_local *local = 1998 struct ieee80211_local *local =
2624 container_of(work, struct ieee80211_local, 1999 container_of(work, struct ieee80211_local,
2625 dynamic_ps_enable_work); 2000 dynamic_ps_enable_work);
2001 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
2626 2002
2627 if (local->hw.conf.flags & IEEE80211_CONF_PS) 2003 if (local->hw.conf.flags & IEEE80211_CONF_PS)
2628 return; 2004 return;
2629 2005
2630 local->hw.conf.flags |= IEEE80211_CONF_PS; 2006 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
2007 ieee80211_send_nullfunc(local, sdata, 1);
2631 2008
2009 local->hw.conf.flags |= IEEE80211_CONF_PS;
2632 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); 2010 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
2633} 2011}
2634 2012
@@ -2638,3 +2016,36 @@ void ieee80211_dynamic_ps_timer(unsigned long data)
2638 2016
2639 queue_work(local->hw.workqueue, &local->dynamic_ps_enable_work); 2017 queue_work(local->hw.workqueue, &local->dynamic_ps_enable_work);
2640} 2018}
2019
2020void ieee80211_send_nullfunc(struct ieee80211_local *local,
2021 struct ieee80211_sub_if_data *sdata,
2022 int powersave)
2023{
2024 struct sk_buff *skb;
2025 struct ieee80211_hdr *nullfunc;
2026 __le16 fc;
2027
2028 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
2029 return;
2030
2031 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24);
2032 if (!skb) {
2033 printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc "
2034 "frame\n", sdata->dev->name);
2035 return;
2036 }
2037 skb_reserve(skb, local->hw.extra_tx_headroom);
2038
2039 nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24);
2040 memset(nullfunc, 0, 24);
2041 fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC |
2042 IEEE80211_FCTL_TODS);
2043 if (powersave)
2044 fc |= cpu_to_le16(IEEE80211_FCTL_PM);
2045 nullfunc->frame_control = fc;
2046 memcpy(nullfunc->addr1, sdata->u.mgd.bssid, ETH_ALEN);
2047 memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN);
2048 memcpy(nullfunc->addr3, sdata->u.mgd.bssid, ETH_ALEN);
2049
2050 ieee80211_tx_skb(sdata, skb, 0);
2051}
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
new file mode 100644
index 000000000000..44525f517077
--- /dev/null
+++ b/net/mac80211/pm.c
@@ -0,0 +1,117 @@
1#include <net/mac80211.h>
2#include <net/rtnetlink.h>
3
4#include "ieee80211_i.h"
5#include "led.h"
6
7int __ieee80211_suspend(struct ieee80211_hw *hw)
8{
9 struct ieee80211_local *local = hw_to_local(hw);
10 struct ieee80211_sub_if_data *sdata;
11 struct ieee80211_if_init_conf conf;
12 struct sta_info *sta;
13
14 flush_workqueue(local->hw.workqueue);
15
16 /* disable keys */
17 list_for_each_entry(sdata, &local->interfaces, list)
18 ieee80211_disable_keys(sdata);
19
20 /* remove STAs */
21 list_for_each_entry(sta, &local->sta_list, list) {
22
23 if (local->ops->sta_notify) {
24 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
25 sdata = container_of(sdata->bss,
26 struct ieee80211_sub_if_data,
27 u.ap);
28
29 local->ops->sta_notify(hw, &sdata->vif,
30 STA_NOTIFY_REMOVE, &sta->sta);
31 }
32 }
33
34 /* remove all interfaces */
35 list_for_each_entry(sdata, &local->interfaces, list) {
36
37 if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
38 sdata->vif.type != NL80211_IFTYPE_MONITOR &&
39 netif_running(sdata->dev)) {
40 conf.vif = &sdata->vif;
41 conf.type = sdata->vif.type;
42 conf.mac_addr = sdata->dev->dev_addr;
43 local->ops->remove_interface(hw, &conf);
44 }
45 }
46
47 /* flush again, in case driver queued work */
48 flush_workqueue(local->hw.workqueue);
49
50 /* stop hardware */
51 if (local->open_count) {
52 ieee80211_led_radio(local, false);
53 local->ops->stop(hw);
54 }
55 return 0;
56}
57
58int __ieee80211_resume(struct ieee80211_hw *hw)
59{
60 struct ieee80211_local *local = hw_to_local(hw);
61 struct ieee80211_sub_if_data *sdata;
62 struct ieee80211_if_init_conf conf;
63 struct sta_info *sta;
64 int res;
65
66 /* restart hardware */
67 if (local->open_count) {
68 res = local->ops->start(hw);
69
70 ieee80211_led_radio(local, hw->conf.radio_enabled);
71 }
72
73 /* add interfaces */
74 list_for_each_entry(sdata, &local->interfaces, list) {
75
76 if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
77 sdata->vif.type != NL80211_IFTYPE_MONITOR &&
78 netif_running(sdata->dev)) {
79 conf.vif = &sdata->vif;
80 conf.type = sdata->vif.type;
81 conf.mac_addr = sdata->dev->dev_addr;
82 res = local->ops->add_interface(hw, &conf);
83 }
84 }
85
86 /* add STAs back */
87 list_for_each_entry(sta, &local->sta_list, list) {
88
89 if (local->ops->sta_notify) {
90 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
91 sdata = container_of(sdata->bss,
92 struct ieee80211_sub_if_data,
93 u.ap);
94
95 local->ops->sta_notify(hw, &sdata->vif,
96 STA_NOTIFY_ADD, &sta->sta);
97 }
98 }
99
100 /* add back keys */
101 list_for_each_entry(sdata, &local->interfaces, list)
102 if (netif_running(sdata->dev))
103 ieee80211_enable_keys(sdata);
104
105 /* setup RTS threshold */
106 if (local->ops->set_rts_threshold)
107 local->ops->set_rts_threshold(hw, local->rts_threshold);
108
109 /* reconfigure hardware */
110 ieee80211_hw_config(local, ~0);
111
112 netif_addr_lock_bh(local->mdev);
113 ieee80211_configure_filter(local);
114 netif_addr_unlock_bh(local->mdev);
115
116 return 0;
117}
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 928da625e281..b9164c9a9563 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -62,6 +62,18 @@ static inline void rate_control_rate_init(struct sta_info *sta)
62 ref->ops->rate_init(ref->priv, sband, ista, priv_sta); 62 ref->ops->rate_init(ref->priv, sband, ista, priv_sta);
63} 63}
64 64
65static inline void rate_control_rate_update(struct ieee80211_local *local,
66 struct ieee80211_supported_band *sband,
67 struct sta_info *sta, u32 changed)
68{
69 struct rate_control_ref *ref = local->rate_ctrl;
70 struct ieee80211_sta *ista = &sta->sta;
71 void *priv_sta = sta->rate_ctrl_priv;
72
73 if (ref->ops->rate_update)
74 ref->ops->rate_update(ref->priv, sband, ista,
75 priv_sta, changed);
76}
65 77
66static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, 78static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
67 struct ieee80211_sta *sta, 79 struct ieee80211_sta *sta,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7175ae80c36a..66f7ecf51b92 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -86,8 +86,7 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
86 86
87 if (status->flag & RX_FLAG_TSFT) 87 if (status->flag & RX_FLAG_TSFT)
88 len += 8; 88 len += 8;
89 if (local->hw.flags & IEEE80211_HW_SIGNAL_DB || 89 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
90 local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
91 len += 1; 90 len += 1;
92 if (local->hw.flags & IEEE80211_HW_NOISE_DBM) 91 if (local->hw.flags & IEEE80211_HW_NOISE_DBM)
93 len += 1; 92 len += 1;
@@ -102,7 +101,7 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
102 return len; 101 return len;
103} 102}
104 103
105/** 104/*
106 * ieee80211_add_rx_radiotap_header - add radiotap header 105 * ieee80211_add_rx_radiotap_header - add radiotap header
107 * 106 *
108 * add a radiotap header containing all the fields which the hardware provided. 107 * add a radiotap header containing all the fields which the hardware provided.
@@ -158,7 +157,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
158 */ 157 */
159 *pos = 0; 158 *pos = 0;
160 } else { 159 } else {
161 rthdr->it_present |= (1 << IEEE80211_RADIOTAP_RATE); 160 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
162 *pos = rate->bitrate / 5; 161 *pos = rate->bitrate / 5;
163 } 162 }
164 pos++; 163 pos++;
@@ -199,14 +198,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
199 *pos = status->antenna; 198 *pos = status->antenna;
200 pos++; 199 pos++;
201 200
202 /* IEEE80211_RADIOTAP_DB_ANTSIGNAL */
203 if (local->hw.flags & IEEE80211_HW_SIGNAL_DB) {
204 *pos = status->signal;
205 rthdr->it_present |=
206 cpu_to_le32(1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL);
207 pos++;
208 }
209
210 /* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */ 201 /* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */
211 202
212 /* IEEE80211_RADIOTAP_RX_FLAGS */ 203 /* IEEE80211_RADIOTAP_RX_FLAGS */
@@ -371,39 +362,50 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
371 rx->skb->priority = (tid > 7) ? 0 : tid; 362 rx->skb->priority = (tid > 7) ? 0 : tid;
372} 363}
373 364
374static void ieee80211_verify_ip_alignment(struct ieee80211_rx_data *rx) 365/**
366 * DOC: Packet alignment
367 *
368 * Drivers always need to pass packets that are aligned to two-byte boundaries
369 * to the stack.
370 *
371 * Additionally, should, if possible, align the payload data in a way that
372 * guarantees that the contained IP header is aligned to a four-byte
373 * boundary. In the case of regular frames, this simply means aligning the
374 * payload to a four-byte boundary (because either the IP header is directly
375 * contained, or IV/RFC1042 headers that have a length divisible by four are
376 * in front of it).
377 *
378 * With A-MSDU frames, however, the payload data address must yield two modulo
379 * four because there are 14-byte 802.3 headers within the A-MSDU frames that
380 * push the IP header further back to a multiple of four again. Thankfully, the
381 * specs were sane enough this time around to require padding each A-MSDU
382 * subframe to a length that is a multiple of four.
383 *
384 * Padding like Atheros hardware adds which is inbetween the 802.11 header and
385 * the payload is not supported, the driver is required to move the 802.11
386 * header to be directly in front of the payload in that case.
387 */
388static void ieee80211_verify_alignment(struct ieee80211_rx_data *rx)
375{ 389{
376#ifdef CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT
377 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 390 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
378 int hdrlen; 391 int hdrlen;
379 392
393#ifndef CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT
394 return;
395#endif
396
397 if (WARN_ONCE((unsigned long)rx->skb->data & 1,
398 "unaligned packet at 0x%p\n", rx->skb->data))
399 return;
400
380 if (!ieee80211_is_data_present(hdr->frame_control)) 401 if (!ieee80211_is_data_present(hdr->frame_control))
381 return; 402 return;
382 403
383 /*
384 * Drivers are required to align the payload data in a way that
385 * guarantees that the contained IP header is aligned to a four-
386 * byte boundary. In the case of regular frames, this simply means
387 * aligning the payload to a four-byte boundary (because either
388 * the IP header is directly contained, or IV/RFC1042 headers that
389 * have a length divisible by four are in front of it.
390 *
391 * With A-MSDU frames, however, the payload data address must
392 * yield two modulo four because there are 14-byte 802.3 headers
393 * within the A-MSDU frames that push the IP header further back
394 * to a multiple of four again. Thankfully, the specs were sane
395 * enough this time around to require padding each A-MSDU subframe
396 * to a length that is a multiple of four.
397 *
398 * Padding like atheros hardware adds which is inbetween the 802.11
399 * header and the payload is not supported, the driver is required
400 * to move the 802.11 header further back in that case.
401 */
402 hdrlen = ieee80211_hdrlen(hdr->frame_control); 404 hdrlen = ieee80211_hdrlen(hdr->frame_control);
403 if (rx->flags & IEEE80211_RX_AMSDU) 405 if (rx->flags & IEEE80211_RX_AMSDU)
404 hdrlen += ETH_HLEN; 406 hdrlen += ETH_HLEN;
405 WARN_ON_ONCE(((unsigned long)(rx->skb->data + hdrlen)) & 3); 407 WARN_ONCE(((unsigned long)(rx->skb->data + hdrlen)) & 3,
406#endif 408 "unaligned IP payload at 0x%p\n", rx->skb->data + hdrlen);
407} 409}
408 410
409 411
@@ -435,6 +437,52 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
435 return RX_CONTINUE; 437 return RX_CONTINUE;
436} 438}
437 439
440
441static int ieee80211_is_unicast_robust_mgmt_frame(struct sk_buff *skb)
442{
443 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
444
445 if (skb->len < 24 || is_multicast_ether_addr(hdr->addr1))
446 return 0;
447
448 return ieee80211_is_robust_mgmt_frame(hdr);
449}
450
451
452static int ieee80211_is_multicast_robust_mgmt_frame(struct sk_buff *skb)
453{
454 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
455
456 if (skb->len < 24 || !is_multicast_ether_addr(hdr->addr1))
457 return 0;
458
459 return ieee80211_is_robust_mgmt_frame(hdr);
460}
461
462
463/* Get the BIP key index from MMIE; return -1 if this is not a BIP frame */
464static int ieee80211_get_mmie_keyidx(struct sk_buff *skb)
465{
466 struct ieee80211_mgmt *hdr = (struct ieee80211_mgmt *) skb->data;
467 struct ieee80211_mmie *mmie;
468
469 if (skb->len < 24 + sizeof(*mmie) ||
470 !is_multicast_ether_addr(hdr->da))
471 return -1;
472
473 if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *) hdr))
474 return -1; /* not a robust management frame */
475
476 mmie = (struct ieee80211_mmie *)
477 (skb->data + skb->len - sizeof(*mmie));
478 if (mmie->element_id != WLAN_EID_MMIE ||
479 mmie->length != sizeof(*mmie) - 2)
480 return -1;
481
482 return le16_to_cpu(mmie->key_id);
483}
484
485
438static ieee80211_rx_result 486static ieee80211_rx_result
439ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) 487ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
440{ 488{
@@ -550,21 +598,23 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
550 int hdrlen; 598 int hdrlen;
551 ieee80211_rx_result result = RX_DROP_UNUSABLE; 599 ieee80211_rx_result result = RX_DROP_UNUSABLE;
552 struct ieee80211_key *stakey = NULL; 600 struct ieee80211_key *stakey = NULL;
601 int mmie_keyidx = -1;
553 602
554 /* 603 /*
555 * Key selection 101 604 * Key selection 101
556 * 605 *
557 * There are three types of keys: 606 * There are four types of keys:
558 * - GTK (group keys) 607 * - GTK (group keys)
608 * - IGTK (group keys for management frames)
559 * - PTK (pairwise keys) 609 * - PTK (pairwise keys)
560 * - STK (station-to-station pairwise keys) 610 * - STK (station-to-station pairwise keys)
561 * 611 *
562 * When selecting a key, we have to distinguish between multicast 612 * When selecting a key, we have to distinguish between multicast
563 * (including broadcast) and unicast frames, the latter can only 613 * (including broadcast) and unicast frames, the latter can only
564 * use PTKs and STKs while the former always use GTKs. Unless, of 614 * use PTKs and STKs while the former always use GTKs and IGTKs.
565 * course, actual WEP keys ("pre-RSNA") are used, then unicast 615 * Unless, of course, actual WEP keys ("pre-RSNA") are used, then
566 * frames can also use key indizes like GTKs. Hence, if we don't 616 * unicast frames can also use key indices like GTKs. Hence, if we
567 * have a PTK/STK we check the key index for a WEP key. 617 * don't have a PTK/STK we check the key index for a WEP key.
568 * 618 *
569 * Note that in a regular BSS, multicast frames are sent by the 619 * Note that in a regular BSS, multicast frames are sent by the
570 * AP only, associated stations unicast the frame to the AP first 620 * AP only, associated stations unicast the frame to the AP first
@@ -577,8 +627,14 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
577 * possible. 627 * possible.
578 */ 628 */
579 629
580 if (!ieee80211_has_protected(hdr->frame_control)) 630 if (!ieee80211_has_protected(hdr->frame_control)) {
581 return RX_CONTINUE; 631 if (!ieee80211_is_mgmt(hdr->frame_control) ||
632 rx->sta == NULL || !test_sta_flags(rx->sta, WLAN_STA_MFP))
633 return RX_CONTINUE;
634 mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
635 if (mmie_keyidx < 0)
636 return RX_CONTINUE;
637 }
582 638
583 /* 639 /*
584 * No point in finding a key and decrypting if the frame is neither 640 * No point in finding a key and decrypting if the frame is neither
@@ -592,6 +648,16 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
592 648
593 if (!is_multicast_ether_addr(hdr->addr1) && stakey) { 649 if (!is_multicast_ether_addr(hdr->addr1) && stakey) {
594 rx->key = stakey; 650 rx->key = stakey;
651 } else if (mmie_keyidx >= 0) {
652 /* Broadcast/multicast robust management frame / BIP */
653 if ((rx->status->flag & RX_FLAG_DECRYPTED) &&
654 (rx->status->flag & RX_FLAG_IV_STRIPPED))
655 return RX_CONTINUE;
656
657 if (mmie_keyidx < NUM_DEFAULT_KEYS ||
658 mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
659 return RX_DROP_MONITOR; /* unexpected BIP keyidx */
660 rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
595 } else { 661 } else {
596 /* 662 /*
597 * The device doesn't give us the IV so we won't be 663 * The device doesn't give us the IV so we won't be
@@ -654,6 +720,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
654 case ALG_CCMP: 720 case ALG_CCMP:
655 result = ieee80211_crypto_ccmp_decrypt(rx); 721 result = ieee80211_crypto_ccmp_decrypt(rx);
656 break; 722 break;
723 case ALG_AES_CMAC:
724 result = ieee80211_crypto_aes_cmac_decrypt(rx);
725 break;
657 } 726 }
658 727
659 /* either the frame has been decrypted or will be dropped */ 728 /* either the frame has been decrypted or will be dropped */
@@ -662,6 +731,39 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
662 return result; 731 return result;
663} 732}
664 733
734static ieee80211_rx_result debug_noinline
735ieee80211_rx_h_check_more_data(struct ieee80211_rx_data *rx)
736{
737 struct ieee80211_local *local;
738 struct ieee80211_hdr *hdr;
739 struct sk_buff *skb;
740
741 local = rx->local;
742 skb = rx->skb;
743 hdr = (struct ieee80211_hdr *) skb->data;
744
745 if (!local->pspolling)
746 return RX_CONTINUE;
747
748 if (!ieee80211_has_fromds(hdr->frame_control))
749 /* this is not from AP */
750 return RX_CONTINUE;
751
752 if (!ieee80211_is_data(hdr->frame_control))
753 return RX_CONTINUE;
754
755 if (!ieee80211_has_moredata(hdr->frame_control)) {
756 /* AP has no more frames buffered for us */
757 local->pspolling = false;
758 return RX_CONTINUE;
759 }
760
761 /* more data bit is set, let's request a new frame from the AP */
762 ieee80211_send_pspoll(local, rx->sdata);
763
764 return RX_CONTINUE;
765}
766
665static void ap_sta_ps_start(struct sta_info *sta) 767static void ap_sta_ps_start(struct sta_info *sta)
666{ 768{
667 struct ieee80211_sub_if_data *sdata = sta->sdata; 769 struct ieee80211_sub_if_data *sdata = sta->sdata;
@@ -736,7 +838,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
736 if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) { 838 if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) {
737 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, 839 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len,
738 NL80211_IFTYPE_ADHOC); 840 NL80211_IFTYPE_ADHOC);
739 if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) 841 if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0)
740 sta->last_rx = jiffies; 842 sta->last_rx = jiffies;
741 } else 843 } else
742 if (!is_multicast_ether_addr(hdr->addr1) || 844 if (!is_multicast_ether_addr(hdr->addr1) ||
@@ -1101,6 +1203,15 @@ ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
1101 /* Drop unencrypted frames if key is set. */ 1203 /* Drop unencrypted frames if key is set. */
1102 if (unlikely(!ieee80211_has_protected(fc) && 1204 if (unlikely(!ieee80211_has_protected(fc) &&
1103 !ieee80211_is_nullfunc(fc) && 1205 !ieee80211_is_nullfunc(fc) &&
1206 (!ieee80211_is_mgmt(fc) ||
1207 (ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
1208 rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP))) &&
1209 (rx->key || rx->sdata->drop_unencrypted)))
1210 return -EACCES;
1211 /* BIP does not use Protected field, so need to check MMIE */
1212 if (unlikely(rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP) &&
1213 ieee80211_is_multicast_robust_mgmt_frame(rx->skb) &&
1214 ieee80211_get_mmie_keyidx(rx->skb) < 0 &&
1104 (rx->key || rx->sdata->drop_unencrypted))) 1215 (rx->key || rx->sdata->drop_unencrypted)))
1105 return -EACCES; 1216 return -EACCES;
1106 1217
@@ -1138,12 +1249,12 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
1138 1249
1139 switch (hdr->frame_control & 1250 switch (hdr->frame_control &
1140 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { 1251 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
1141 case __constant_cpu_to_le16(IEEE80211_FCTL_TODS): 1252 case cpu_to_le16(IEEE80211_FCTL_TODS):
1142 if (unlikely(sdata->vif.type != NL80211_IFTYPE_AP && 1253 if (unlikely(sdata->vif.type != NL80211_IFTYPE_AP &&
1143 sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) 1254 sdata->vif.type != NL80211_IFTYPE_AP_VLAN))
1144 return -1; 1255 return -1;
1145 break; 1256 break;
1146 case __constant_cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): 1257 case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
1147 if (unlikely(sdata->vif.type != NL80211_IFTYPE_WDS && 1258 if (unlikely(sdata->vif.type != NL80211_IFTYPE_WDS &&
1148 sdata->vif.type != NL80211_IFTYPE_MESH_POINT)) 1259 sdata->vif.type != NL80211_IFTYPE_MESH_POINT))
1149 return -1; 1260 return -1;
@@ -1157,13 +1268,13 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
1157 } 1268 }
1158 } 1269 }
1159 break; 1270 break;
1160 case __constant_cpu_to_le16(IEEE80211_FCTL_FROMDS): 1271 case cpu_to_le16(IEEE80211_FCTL_FROMDS):
1161 if (sdata->vif.type != NL80211_IFTYPE_STATION || 1272 if (sdata->vif.type != NL80211_IFTYPE_STATION ||
1162 (is_multicast_ether_addr(dst) && 1273 (is_multicast_ether_addr(dst) &&
1163 !compare_ether_addr(src, dev->dev_addr))) 1274 !compare_ether_addr(src, dev->dev_addr)))
1164 return -1; 1275 return -1;
1165 break; 1276 break;
1166 case __constant_cpu_to_le16(0): 1277 case cpu_to_le16(0):
1167 if (sdata->vif.type != NL80211_IFTYPE_ADHOC) 1278 if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
1168 return -1; 1279 return -1;
1169 break; 1280 break;
@@ -1267,10 +1378,37 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
1267 } 1378 }
1268 1379
1269 if (skb) { 1380 if (skb) {
1270 /* deliver to local stack */ 1381 int align __maybe_unused;
1271 skb->protocol = eth_type_trans(skb, dev); 1382
1272 memset(skb->cb, 0, sizeof(skb->cb)); 1383#if defined(CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT) || !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
1273 netif_rx(skb); 1384 /*
1385 * 'align' will only take the values 0 or 2 here
1386 * since all frames are required to be aligned
1387 * to 2-byte boundaries when being passed to
1388 * mac80211. That also explains the __skb_push()
1389 * below.
1390 */
1391 align = (unsigned long)skb->data & 4;
1392 if (align) {
1393 if (WARN_ON(skb_headroom(skb) < 3)) {
1394 dev_kfree_skb(skb);
1395 skb = NULL;
1396 } else {
1397 u8 *data = skb->data;
1398 size_t len = skb->len;
1399 u8 *new = __skb_push(skb, align);
1400 memmove(new, data, len);
1401 __skb_trim(skb, len);
1402 }
1403 }
1404#endif
1405
1406 if (skb) {
1407 /* deliver to local stack */
1408 skb->protocol = eth_type_trans(skb, dev);
1409 memset(skb->cb, 0, sizeof(skb->cb));
1410 netif_rx(skb);
1411 }
1274 } 1412 }
1275 1413
1276 if (xmit_skb) { 1414 if (xmit_skb) {
@@ -1339,14 +1477,20 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
1339 if (remaining <= subframe_len + padding) 1477 if (remaining <= subframe_len + padding)
1340 frame = skb; 1478 frame = skb;
1341 else { 1479 else {
1342 frame = dev_alloc_skb(local->hw.extra_tx_headroom + 1480 /*
1343 subframe_len); 1481 * Allocate and reserve two bytes more for payload
1482 * alignment since sizeof(struct ethhdr) is 14.
1483 */
1484 frame = dev_alloc_skb(
1485 ALIGN(local->hw.extra_tx_headroom, 4) +
1486 subframe_len + 2);
1344 1487
1345 if (frame == NULL) 1488 if (frame == NULL)
1346 return RX_DROP_UNUSABLE; 1489 return RX_DROP_UNUSABLE;
1347 1490
1348 skb_reserve(frame, local->hw.extra_tx_headroom + 1491 skb_reserve(frame,
1349 sizeof(struct ethhdr)); 1492 ALIGN(local->hw.extra_tx_headroom, 4) +
1493 sizeof(struct ethhdr) + 2);
1350 memcpy(skb_put(frame, ntohs(len)), skb->data, 1494 memcpy(skb_put(frame, ntohs(len)), skb->data,
1351 ntohs(len)); 1495 ntohs(len));
1352 1496
@@ -1529,11 +1673,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
1529 start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4; 1673 start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4;
1530 1674
1531 /* reset session timer */ 1675 /* reset session timer */
1532 if (tid_agg_rx->timeout) { 1676 if (tid_agg_rx->timeout)
1533 unsigned long expires = 1677 mod_timer(&tid_agg_rx->session_timer,
1534 jiffies + (tid_agg_rx->timeout / 1000) * HZ; 1678 TU_TO_EXP_TIME(tid_agg_rx->timeout));
1535 mod_timer(&tid_agg_rx->session_timer, expires);
1536 }
1537 1679
1538 /* manage reordering buffer according to requested */ 1680 /* manage reordering buffer according to requested */
1539 /* sequence number */ 1681 /* sequence number */
@@ -1547,12 +1689,64 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
1547 return RX_CONTINUE; 1689 return RX_CONTINUE;
1548} 1690}
1549 1691
1692static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
1693 struct ieee80211_mgmt *mgmt,
1694 size_t len)
1695{
1696 struct ieee80211_local *local = sdata->local;
1697 struct sk_buff *skb;
1698 struct ieee80211_mgmt *resp;
1699
1700 if (compare_ether_addr(mgmt->da, sdata->dev->dev_addr) != 0) {
1701 /* Not to own unicast address */
1702 return;
1703 }
1704
1705 if (compare_ether_addr(mgmt->sa, sdata->u.mgd.bssid) != 0 ||
1706 compare_ether_addr(mgmt->bssid, sdata->u.mgd.bssid) != 0) {
1707 /* Not from the current AP. */
1708 return;
1709 }
1710
1711 if (sdata->u.mgd.state == IEEE80211_STA_MLME_ASSOCIATE) {
1712 /* Association in progress; ignore SA Query */
1713 return;
1714 }
1715
1716 if (len < 24 + 1 + sizeof(resp->u.action.u.sa_query)) {
1717 /* Too short SA Query request frame */
1718 return;
1719 }
1720
1721 skb = dev_alloc_skb(sizeof(*resp) + local->hw.extra_tx_headroom);
1722 if (skb == NULL)
1723 return;
1724
1725 skb_reserve(skb, local->hw.extra_tx_headroom);
1726 resp = (struct ieee80211_mgmt *) skb_put(skb, 24);
1727 memset(resp, 0, 24);
1728 memcpy(resp->da, mgmt->sa, ETH_ALEN);
1729 memcpy(resp->sa, sdata->dev->dev_addr, ETH_ALEN);
1730 memcpy(resp->bssid, sdata->u.mgd.bssid, ETH_ALEN);
1731 resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
1732 IEEE80211_STYPE_ACTION);
1733 skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query));
1734 resp->u.action.category = WLAN_CATEGORY_SA_QUERY;
1735 resp->u.action.u.sa_query.action = WLAN_ACTION_SA_QUERY_RESPONSE;
1736 memcpy(resp->u.action.u.sa_query.trans_id,
1737 mgmt->u.action.u.sa_query.trans_id,
1738 WLAN_SA_QUERY_TR_ID_LEN);
1739
1740 ieee80211_tx_skb(sdata, skb, 1);
1741}
1742
1550static ieee80211_rx_result debug_noinline 1743static ieee80211_rx_result debug_noinline
1551ieee80211_rx_h_action(struct ieee80211_rx_data *rx) 1744ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1552{ 1745{
1553 struct ieee80211_local *local = rx->local; 1746 struct ieee80211_local *local = rx->local;
1554 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); 1747 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
1555 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; 1748 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1749 struct ieee80211_bss *bss;
1556 int len = rx->skb->len; 1750 int len = rx->skb->len;
1557 1751
1558 if (!ieee80211_is_action(mgmt->frame_control)) 1752 if (!ieee80211_is_action(mgmt->frame_control))
@@ -1564,12 +1758,26 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1564 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 1758 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
1565 return RX_DROP_MONITOR; 1759 return RX_DROP_MONITOR;
1566 1760
1761 if (ieee80211_drop_unencrypted(rx, mgmt->frame_control))
1762 return RX_DROP_MONITOR;
1763
1567 /* all categories we currently handle have action_code */ 1764 /* all categories we currently handle have action_code */
1568 if (len < IEEE80211_MIN_ACTION_SIZE + 1) 1765 if (len < IEEE80211_MIN_ACTION_SIZE + 1)
1569 return RX_DROP_MONITOR; 1766 return RX_DROP_MONITOR;
1570 1767
1571 switch (mgmt->u.action.category) { 1768 switch (mgmt->u.action.category) {
1572 case WLAN_CATEGORY_BACK: 1769 case WLAN_CATEGORY_BACK:
1770 /*
1771 * The aggregation code is not prepared to handle
1772 * anything but STA/AP due to the BSSID handling;
1773 * IBSS could work in the code but isn't supported
1774 * by drivers or the standard.
1775 */
1776 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
1777 sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
1778 sdata->vif.type != NL80211_IFTYPE_AP)
1779 return RX_DROP_MONITOR;
1780
1573 switch (mgmt->u.action.u.addba_req.action_code) { 1781 switch (mgmt->u.action.u.addba_req.action_code) {
1574 case WLAN_ACTION_ADDBA_REQ: 1782 case WLAN_ACTION_ADDBA_REQ:
1575 if (len < (IEEE80211_MIN_ACTION_SIZE + 1783 if (len < (IEEE80211_MIN_ACTION_SIZE +
@@ -1594,6 +1802,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1594 case WLAN_CATEGORY_SPECTRUM_MGMT: 1802 case WLAN_CATEGORY_SPECTRUM_MGMT:
1595 if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) 1803 if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ)
1596 return RX_DROP_MONITOR; 1804 return RX_DROP_MONITOR;
1805
1806 if (sdata->vif.type != NL80211_IFTYPE_STATION)
1807 return RX_DROP_MONITOR;
1808
1597 switch (mgmt->u.action.u.measurement.action_code) { 1809 switch (mgmt->u.action.u.measurement.action_code) {
1598 case WLAN_ACTION_SPCT_MSR_REQ: 1810 case WLAN_ACTION_SPCT_MSR_REQ:
1599 if (len < (IEEE80211_MIN_ACTION_SIZE + 1811 if (len < (IEEE80211_MIN_ACTION_SIZE +
@@ -1601,6 +1813,43 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1601 return RX_DROP_MONITOR; 1813 return RX_DROP_MONITOR;
1602 ieee80211_process_measurement_req(sdata, mgmt, len); 1814 ieee80211_process_measurement_req(sdata, mgmt, len);
1603 break; 1815 break;
1816 case WLAN_ACTION_SPCT_CHL_SWITCH:
1817 if (len < (IEEE80211_MIN_ACTION_SIZE +
1818 sizeof(mgmt->u.action.u.chan_switch)))
1819 return RX_DROP_MONITOR;
1820
1821 if (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN))
1822 return RX_DROP_MONITOR;
1823
1824 bss = ieee80211_rx_bss_get(local, sdata->u.mgd.bssid,
1825 local->hw.conf.channel->center_freq,
1826 sdata->u.mgd.ssid,
1827 sdata->u.mgd.ssid_len);
1828 if (!bss)
1829 return RX_DROP_MONITOR;
1830
1831 ieee80211_process_chanswitch(sdata,
1832 &mgmt->u.action.u.chan_switch.sw_elem, bss);
1833 ieee80211_rx_bss_put(local, bss);
1834 break;
1835 }
1836 break;
1837 case WLAN_CATEGORY_SA_QUERY:
1838 if (len < (IEEE80211_MIN_ACTION_SIZE +
1839 sizeof(mgmt->u.action.u.sa_query)))
1840 return RX_DROP_MONITOR;
1841 switch (mgmt->u.action.u.sa_query.action) {
1842 case WLAN_ACTION_SA_QUERY_REQUEST:
1843 if (sdata->vif.type != NL80211_IFTYPE_STATION)
1844 return RX_DROP_MONITOR;
1845 ieee80211_process_sa_query_req(sdata, mgmt, len);
1846 break;
1847 case WLAN_ACTION_SA_QUERY_RESPONSE:
1848 /*
1849 * SA Query response is currently only used in AP mode
1850 * and it is processed in user space.
1851 */
1852 return RX_CONTINUE;
1604 } 1853 }
1605 break; 1854 break;
1606 default: 1855 default:
@@ -1616,10 +1865,14 @@ static ieee80211_rx_result debug_noinline
1616ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) 1865ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
1617{ 1866{
1618 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); 1867 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
1868 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1619 1869
1620 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 1870 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
1621 return RX_DROP_MONITOR; 1871 return RX_DROP_MONITOR;
1622 1872
1873 if (ieee80211_drop_unencrypted(rx, mgmt->frame_control))
1874 return RX_DROP_MONITOR;
1875
1623 if (ieee80211_vif_is_mesh(&sdata->vif)) 1876 if (ieee80211_vif_is_mesh(&sdata->vif))
1624 return ieee80211_mesh_rx_mgmt(sdata, rx->skb, rx->status); 1877 return ieee80211_mesh_rx_mgmt(sdata, rx->skb, rx->status);
1625 1878
@@ -1627,11 +1880,14 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
1627 sdata->vif.type != NL80211_IFTYPE_ADHOC) 1880 sdata->vif.type != NL80211_IFTYPE_ADHOC)
1628 return RX_DROP_MONITOR; 1881 return RX_DROP_MONITOR;
1629 1882
1630 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)
1631 return RX_DROP_MONITOR;
1632 1883
1633 ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status); 1884 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
1634 return RX_QUEUED; 1885 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)
1886 return RX_DROP_MONITOR;
1887 return ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status);
1888 }
1889
1890 return ieee80211_ibss_rx_mgmt(sdata, rx->skb, rx->status);
1635} 1891}
1636 1892
1637static void ieee80211_rx_michael_mic_report(struct net_device *dev, 1893static void ieee80211_rx_michael_mic_report(struct net_device *dev,
@@ -1780,6 +2036,7 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
1780 CALL_RXH(ieee80211_rx_h_passive_scan) 2036 CALL_RXH(ieee80211_rx_h_passive_scan)
1781 CALL_RXH(ieee80211_rx_h_check) 2037 CALL_RXH(ieee80211_rx_h_check)
1782 CALL_RXH(ieee80211_rx_h_decrypt) 2038 CALL_RXH(ieee80211_rx_h_decrypt)
2039 CALL_RXH(ieee80211_rx_h_check_more_data)
1783 CALL_RXH(ieee80211_rx_h_sta_process) 2040 CALL_RXH(ieee80211_rx_h_sta_process)
1784 CALL_RXH(ieee80211_rx_h_defragment) 2041 CALL_RXH(ieee80211_rx_h_defragment)
1785 CALL_RXH(ieee80211_rx_h_ps_poll) 2042 CALL_RXH(ieee80211_rx_h_ps_poll)
@@ -1823,16 +2080,17 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
1823/* main receive path */ 2080/* main receive path */
1824 2081
1825static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, 2082static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
1826 u8 *bssid, struct ieee80211_rx_data *rx, 2083 struct ieee80211_rx_data *rx,
1827 struct ieee80211_hdr *hdr) 2084 struct ieee80211_hdr *hdr)
1828{ 2085{
2086 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, sdata->vif.type);
1829 int multicast = is_multicast_ether_addr(hdr->addr1); 2087 int multicast = is_multicast_ether_addr(hdr->addr1);
1830 2088
1831 switch (sdata->vif.type) { 2089 switch (sdata->vif.type) {
1832 case NL80211_IFTYPE_STATION: 2090 case NL80211_IFTYPE_STATION:
1833 if (!bssid) 2091 if (!bssid)
1834 return 0; 2092 return 0;
1835 if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { 2093 if (!ieee80211_bssid_match(bssid, sdata->u.mgd.bssid)) {
1836 if (!(rx->flags & IEEE80211_RX_IN_SCAN)) 2094 if (!(rx->flags & IEEE80211_RX_IN_SCAN))
1837 return 0; 2095 return 0;
1838 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2096 rx->flags &= ~IEEE80211_RX_RA_MATCH;
@@ -1850,7 +2108,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
1850 if (ieee80211_is_beacon(hdr->frame_control)) { 2108 if (ieee80211_is_beacon(hdr->frame_control)) {
1851 return 1; 2109 return 1;
1852 } 2110 }
1853 else if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { 2111 else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) {
1854 if (!(rx->flags & IEEE80211_RX_IN_SCAN)) 2112 if (!(rx->flags & IEEE80211_RX_IN_SCAN))
1855 return 0; 2113 return 0;
1856 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2114 rx->flags &= ~IEEE80211_RX_RA_MATCH;
@@ -1928,7 +2186,6 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
1928 int prepares; 2186 int prepares;
1929 struct ieee80211_sub_if_data *prev = NULL; 2187 struct ieee80211_sub_if_data *prev = NULL;
1930 struct sk_buff *skb_new; 2188 struct sk_buff *skb_new;
1931 u8 *bssid;
1932 2189
1933 hdr = (struct ieee80211_hdr *)skb->data; 2190 hdr = (struct ieee80211_hdr *)skb->data;
1934 memset(&rx, 0, sizeof(rx)); 2191 memset(&rx, 0, sizeof(rx));
@@ -1956,7 +2213,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
1956 rx.flags |= IEEE80211_RX_IN_SCAN; 2213 rx.flags |= IEEE80211_RX_IN_SCAN;
1957 2214
1958 ieee80211_parse_qos(&rx); 2215 ieee80211_parse_qos(&rx);
1959 ieee80211_verify_ip_alignment(&rx); 2216 ieee80211_verify_alignment(&rx);
1960 2217
1961 skb = rx.skb; 2218 skb = rx.skb;
1962 2219
@@ -1967,9 +2224,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
1967 if (sdata->vif.type == NL80211_IFTYPE_MONITOR) 2224 if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
1968 continue; 2225 continue;
1969 2226
1970 bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type);
1971 rx.flags |= IEEE80211_RX_RA_MATCH; 2227 rx.flags |= IEEE80211_RX_RA_MATCH;
1972 prepares = prepare_for_handlers(sdata, bssid, &rx, hdr); 2228 prepares = prepare_for_handlers(sdata, &rx, hdr);
1973 2229
1974 if (!prepares) 2230 if (!prepares)
1975 continue; 2231 continue;
@@ -2174,11 +2430,9 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
2174 /* new un-ordered ampdu frame - process it */ 2430 /* new un-ordered ampdu frame - process it */
2175 2431
2176 /* reset session timer */ 2432 /* reset session timer */
2177 if (tid_agg_rx->timeout) { 2433 if (tid_agg_rx->timeout)
2178 unsigned long expires = 2434 mod_timer(&tid_agg_rx->session_timer,
2179 jiffies + (tid_agg_rx->timeout / 1000) * HZ; 2435 TU_TO_EXP_TIME(tid_agg_rx->timeout));
2180 mod_timer(&tid_agg_rx->session_timer, expires);
2181 }
2182 2436
2183 /* if this mpdu is fragmented - terminate rx aggregation session */ 2437 /* if this mpdu is fragmented - terminate rx aggregation session */
2184 sc = le16_to_cpu(hdr->seq_ctrl); 2438 sc = le16_to_cpu(hdr->seq_ctrl);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index f5c7c3371929..0e81e1633a66 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -12,14 +12,11 @@
12 * published by the Free Software Foundation. 12 * published by the Free Software Foundation.
13 */ 13 */
14 14
15/* TODO: 15/* TODO: figure out how to avoid that the "current BSS" expires */
16 * order BSS list by RSSI(?) ("quality of AP")
17 * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE,
18 * SSID)
19 */
20 16
21#include <linux/wireless.h> 17#include <linux/wireless.h>
22#include <linux/if_arp.h> 18#include <linux/if_arp.h>
19#include <linux/rtnetlink.h>
23#include <net/mac80211.h> 20#include <net/mac80211.h>
24#include <net/iw_handler.h> 21#include <net/iw_handler.h>
25 22
@@ -30,192 +27,29 @@
30#define IEEE80211_CHANNEL_TIME (HZ / 33) 27#define IEEE80211_CHANNEL_TIME (HZ / 33)
31#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5) 28#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5)
32 29
33void ieee80211_rx_bss_list_init(struct ieee80211_local *local)
34{
35 spin_lock_init(&local->bss_lock);
36 INIT_LIST_HEAD(&local->bss_list);
37}
38
39void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local)
40{
41 struct ieee80211_bss *bss, *tmp;
42
43 list_for_each_entry_safe(bss, tmp, &local->bss_list, list)
44 ieee80211_rx_bss_put(local, bss);
45}
46
47struct ieee80211_bss * 30struct ieee80211_bss *
48ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq, 31ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
49 u8 *ssid, u8 ssid_len) 32 u8 *ssid, u8 ssid_len)
50{ 33{
51 struct ieee80211_bss *bss; 34 return (void *)cfg80211_get_bss(local->hw.wiphy,
52 35 ieee80211_get_channel(local->hw.wiphy,
53 spin_lock_bh(&local->bss_lock); 36 freq),
54 bss = local->bss_hash[STA_HASH(bssid)]; 37 bssid, ssid, ssid_len,
55 while (bss) { 38 0, 0);
56 if (!bss_mesh_cfg(bss) &&
57 !memcmp(bss->bssid, bssid, ETH_ALEN) &&
58 bss->freq == freq &&
59 bss->ssid_len == ssid_len &&
60 (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) {
61 atomic_inc(&bss->users);
62 break;
63 }
64 bss = bss->hnext;
65 }
66 spin_unlock_bh(&local->bss_lock);
67 return bss;
68}
69
70/* Caller must hold local->bss_lock */
71static void __ieee80211_rx_bss_hash_add(struct ieee80211_local *local,
72 struct ieee80211_bss *bss)
73{
74 u8 hash_idx;
75
76 if (bss_mesh_cfg(bss))
77 hash_idx = mesh_id_hash(bss_mesh_id(bss),
78 bss_mesh_id_len(bss));
79 else
80 hash_idx = STA_HASH(bss->bssid);
81
82 bss->hnext = local->bss_hash[hash_idx];
83 local->bss_hash[hash_idx] = bss;
84}
85
86/* Caller must hold local->bss_lock */
87static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local,
88 struct ieee80211_bss *bss)
89{
90 struct ieee80211_bss *b, *prev = NULL;
91 b = local->bss_hash[STA_HASH(bss->bssid)];
92 while (b) {
93 if (b == bss) {
94 if (!prev)
95 local->bss_hash[STA_HASH(bss->bssid)] =
96 bss->hnext;
97 else
98 prev->hnext = bss->hnext;
99 break;
100 }
101 prev = b;
102 b = b->hnext;
103 }
104}
105
106struct ieee80211_bss *
107ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq,
108 u8 *ssid, u8 ssid_len)
109{
110 struct ieee80211_bss *bss;
111
112 bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
113 if (!bss)
114 return NULL;
115 atomic_set(&bss->users, 2);
116 memcpy(bss->bssid, bssid, ETH_ALEN);
117 bss->freq = freq;
118 if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) {
119 memcpy(bss->ssid, ssid, ssid_len);
120 bss->ssid_len = ssid_len;
121 }
122
123 spin_lock_bh(&local->bss_lock);
124 /* TODO: order by RSSI? */
125 list_add_tail(&bss->list, &local->bss_list);
126 __ieee80211_rx_bss_hash_add(local, bss);
127 spin_unlock_bh(&local->bss_lock);
128 return bss;
129}
130
131#ifdef CONFIG_MAC80211_MESH
132static struct ieee80211_bss *
133ieee80211_rx_mesh_bss_get(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
134 u8 *mesh_cfg, int freq)
135{
136 struct ieee80211_bss *bss;
137
138 spin_lock_bh(&local->bss_lock);
139 bss = local->bss_hash[mesh_id_hash(mesh_id, mesh_id_len)];
140 while (bss) {
141 if (bss_mesh_cfg(bss) &&
142 !memcmp(bss_mesh_cfg(bss), mesh_cfg, MESH_CFG_CMP_LEN) &&
143 bss->freq == freq &&
144 mesh_id_len == bss->mesh_id_len &&
145 (mesh_id_len == 0 || !memcmp(bss->mesh_id, mesh_id,
146 mesh_id_len))) {
147 atomic_inc(&bss->users);
148 break;
149 }
150 bss = bss->hnext;
151 }
152 spin_unlock_bh(&local->bss_lock);
153 return bss;
154} 39}
155 40
156static struct ieee80211_bss * 41static void ieee80211_rx_bss_free(struct cfg80211_bss *cbss)
157ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
158 u8 *mesh_cfg, int mesh_config_len, int freq)
159{ 42{
160 struct ieee80211_bss *bss; 43 struct ieee80211_bss *bss = (void *)cbss;
161
162 if (mesh_config_len != IEEE80211_MESH_CONFIG_LEN)
163 return NULL;
164
165 bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
166 if (!bss)
167 return NULL;
168
169 bss->mesh_cfg = kmalloc(MESH_CFG_CMP_LEN, GFP_ATOMIC);
170 if (!bss->mesh_cfg) {
171 kfree(bss);
172 return NULL;
173 }
174
175 if (mesh_id_len && mesh_id_len <= IEEE80211_MAX_MESH_ID_LEN) {
176 bss->mesh_id = kmalloc(mesh_id_len, GFP_ATOMIC);
177 if (!bss->mesh_id) {
178 kfree(bss->mesh_cfg);
179 kfree(bss);
180 return NULL;
181 }
182 memcpy(bss->mesh_id, mesh_id, mesh_id_len);
183 }
184
185 atomic_set(&bss->users, 2);
186 memcpy(bss->mesh_cfg, mesh_cfg, MESH_CFG_CMP_LEN);
187 bss->mesh_id_len = mesh_id_len;
188 bss->freq = freq;
189 spin_lock_bh(&local->bss_lock);
190 /* TODO: order by RSSI? */
191 list_add_tail(&bss->list, &local->bss_list);
192 __ieee80211_rx_bss_hash_add(local, bss);
193 spin_unlock_bh(&local->bss_lock);
194 return bss;
195}
196#endif
197 44
198static void ieee80211_rx_bss_free(struct ieee80211_bss *bss)
199{
200 kfree(bss->ies);
201 kfree(bss_mesh_id(bss)); 45 kfree(bss_mesh_id(bss));
202 kfree(bss_mesh_cfg(bss)); 46 kfree(bss_mesh_cfg(bss));
203 kfree(bss);
204} 47}
205 48
206void ieee80211_rx_bss_put(struct ieee80211_local *local, 49void ieee80211_rx_bss_put(struct ieee80211_local *local,
207 struct ieee80211_bss *bss) 50 struct ieee80211_bss *bss)
208{ 51{
209 local_bh_disable(); 52 cfg80211_put_bss((struct cfg80211_bss *)bss);
210 if (!atomic_dec_and_lock(&bss->users, &local->bss_lock)) {
211 local_bh_enable();
212 return;
213 }
214
215 __ieee80211_rx_bss_hash_del(local, bss);
216 list_del(&bss->list);
217 spin_unlock_bh(&local->bss_lock);
218 ieee80211_rx_bss_free(bss);
219} 53}
220 54
221struct ieee80211_bss * 55struct ieee80211_bss *
@@ -224,39 +58,25 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
224 struct ieee80211_mgmt *mgmt, 58 struct ieee80211_mgmt *mgmt,
225 size_t len, 59 size_t len,
226 struct ieee802_11_elems *elems, 60 struct ieee802_11_elems *elems,
227 int freq, bool beacon) 61 struct ieee80211_channel *channel,
62 bool beacon)
228{ 63{
229 struct ieee80211_bss *bss; 64 struct ieee80211_bss *bss;
230 int clen; 65 int clen;
66 s32 signal = 0;
231 67
232#ifdef CONFIG_MAC80211_MESH 68 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
233 if (elems->mesh_config) 69 signal = rx_status->signal * 100;
234 bss = ieee80211_rx_mesh_bss_get(local, elems->mesh_id, 70 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
235 elems->mesh_id_len, elems->mesh_config, freq); 71 signal = (rx_status->signal * 100) / local->hw.max_signal;
236 else 72
237#endif 73 bss = (void *)cfg80211_inform_bss_frame(local->hw.wiphy, channel,
238 bss = ieee80211_rx_bss_get(local, mgmt->bssid, freq, 74 mgmt, len, signal, GFP_ATOMIC);
239 elems->ssid, elems->ssid_len); 75
240 if (!bss) { 76 if (!bss)
241#ifdef CONFIG_MAC80211_MESH 77 return NULL;
242 if (elems->mesh_config) 78
243 bss = ieee80211_rx_mesh_bss_add(local, elems->mesh_id, 79 bss->cbss.free_priv = ieee80211_rx_bss_free;
244 elems->mesh_id_len, elems->mesh_config,
245 elems->mesh_config_len, freq);
246 else
247#endif
248 bss = ieee80211_rx_bss_add(local, mgmt->bssid, freq,
249 elems->ssid, elems->ssid_len);
250 if (!bss)
251 return NULL;
252 } else {
253#if 0
254 /* TODO: order by RSSI? */
255 spin_lock_bh(&local->bss_lock);
256 list_move_tail(&bss->list, &local->bss_list);
257 spin_unlock_bh(&local->bss_lock);
258#endif
259 }
260 80
261 /* save the ERP value so that it is available at association time */ 81 /* save the ERP value so that it is available at association time */
262 if (elems->erp_info && elems->erp_info_len >= 1) { 82 if (elems->erp_info && elems->erp_info_len >= 1) {
@@ -264,9 +84,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
264 bss->has_erp_value = 1; 84 bss->has_erp_value = 1;
265 } 85 }
266 86
267 bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int);
268 bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info);
269
270 if (elems->tim) { 87 if (elems->tim) {
271 struct ieee80211_tim_ie *tim_ie = 88 struct ieee80211_tim_ie *tim_ie =
272 (struct ieee80211_tim_ie *)elems->tim; 89 (struct ieee80211_tim_ie *)elems->tim;
@@ -295,37 +112,27 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
295 bss->supp_rates_len += clen; 112 bss->supp_rates_len += clen;
296 } 113 }
297 114
298 bss->band = rx_status->band;
299
300 bss->timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
301 bss->last_update = jiffies;
302 bss->signal = rx_status->signal;
303 bss->noise = rx_status->noise;
304 bss->qual = rx_status->qual;
305 bss->wmm_used = elems->wmm_param || elems->wmm_info; 115 bss->wmm_used = elems->wmm_param || elems->wmm_info;
306 116
307 if (!beacon) 117 if (!beacon)
308 bss->last_probe_resp = jiffies; 118 bss->last_probe_resp = jiffies;
309 119
310 /*
311 * For probe responses, or if we don't have any information yet,
312 * use the IEs from the beacon.
313 */
314 if (!bss->ies || !beacon) {
315 if (bss->ies == NULL || bss->ies_len < elems->total_len) {
316 kfree(bss->ies);
317 bss->ies = kmalloc(elems->total_len, GFP_ATOMIC);
318 }
319 if (bss->ies) {
320 memcpy(bss->ies, elems->ie_start, elems->total_len);
321 bss->ies_len = elems->total_len;
322 } else
323 bss->ies_len = 0;
324 }
325
326 return bss; 120 return bss;
327} 121}
328 122
123void ieee80211_rx_bss_remove(struct ieee80211_sub_if_data *sdata, u8 *bssid,
124 int freq, u8 *ssid, u8 ssid_len)
125{
126 struct ieee80211_bss *bss;
127 struct ieee80211_local *local = sdata->local;
128
129 bss = ieee80211_rx_bss_get(local, bssid, freq, ssid, ssid_len);
130 if (bss) {
131 cfg80211_unlink_bss(local->hw.wiphy, (void *)bss);
132 ieee80211_rx_bss_put(local, bss);
133 }
134}
135
329ieee80211_rx_result 136ieee80211_rx_result
330ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, 137ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
331 struct ieee80211_rx_status *rx_status) 138 struct ieee80211_rx_status *rx_status)
@@ -387,7 +194,7 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
387 194
388 bss = ieee80211_bss_info_update(sdata->local, rx_status, 195 bss = ieee80211_bss_info_update(sdata->local, rx_status,
389 mgmt, skb->len, &elems, 196 mgmt, skb->len, &elems,
390 freq, beacon); 197 channel, beacon);
391 if (bss) 198 if (bss)
392 ieee80211_rx_bss_put(sdata->local, bss); 199 ieee80211_rx_bss_put(sdata->local, bss);
393 200
@@ -395,56 +202,22 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
395 return RX_QUEUED; 202 return RX_QUEUED;
396} 203}
397 204
398static void ieee80211_send_nullfunc(struct ieee80211_local *local, 205void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
399 struct ieee80211_sub_if_data *sdata,
400 int powersave)
401{
402 struct sk_buff *skb;
403 struct ieee80211_hdr *nullfunc;
404 __le16 fc;
405
406 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24);
407 if (!skb) {
408 printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc "
409 "frame\n", sdata->dev->name);
410 return;
411 }
412 skb_reserve(skb, local->hw.extra_tx_headroom);
413
414 nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24);
415 memset(nullfunc, 0, 24);
416 fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC |
417 IEEE80211_FCTL_TODS);
418 if (powersave)
419 fc |= cpu_to_le16(IEEE80211_FCTL_PM);
420 nullfunc->frame_control = fc;
421 memcpy(nullfunc->addr1, sdata->u.sta.bssid, ETH_ALEN);
422 memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN);
423 memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN);
424
425 ieee80211_tx_skb(sdata, skb, 0);
426}
427
428void ieee80211_scan_completed(struct ieee80211_hw *hw)
429{ 206{
430 struct ieee80211_local *local = hw_to_local(hw); 207 struct ieee80211_local *local = hw_to_local(hw);
431 struct ieee80211_sub_if_data *sdata; 208 struct ieee80211_sub_if_data *sdata;
432 union iwreq_data wrqu;
433 209
434 if (WARN_ON(!local->hw_scanning && !local->sw_scanning)) 210 if (WARN_ON(!local->hw_scanning && !local->sw_scanning))
435 return; 211 return;
436 212
437 local->last_scan_completed = jiffies; 213 if (WARN_ON(!local->scan_req))
438 memset(&wrqu, 0, sizeof(wrqu)); 214 return;
439 215
440 /* 216 if (local->scan_req != &local->int_scan_req)
441 * local->scan_sdata could have been NULLed by the interface 217 cfg80211_scan_done(local->scan_req, aborted);
442 * down code in case we were scanning on an interface that is 218 local->scan_req = NULL;
443 * being taken down. 219
444 */ 220 local->last_scan_completed = jiffies;
445 sdata = local->scan_sdata;
446 if (sdata)
447 wireless_send_event(sdata->dev, SIOCGIWSCAN, &wrqu, NULL);
448 221
449 if (local->hw_scanning) { 222 if (local->hw_scanning) {
450 local->hw_scanning = false; 223 local->hw_scanning = false;
@@ -472,34 +245,46 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
472 netif_addr_unlock(local->mdev); 245 netif_addr_unlock(local->mdev);
473 netif_tx_unlock_bh(local->mdev); 246 netif_tx_unlock_bh(local->mdev);
474 247
475 rcu_read_lock(); 248 if (local->ops->sw_scan_complete)
476 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 249 local->ops->sw_scan_complete(local_to_hw(local));
250
251 mutex_lock(&local->iflist_mtx);
252 list_for_each_entry(sdata, &local->interfaces, list) {
253 if (!netif_running(sdata->dev))
254 continue;
255
477 /* Tell AP we're back */ 256 /* Tell AP we're back */
478 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 257 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
479 if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { 258 if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) {
480 ieee80211_send_nullfunc(local, sdata, 0); 259 ieee80211_send_nullfunc(local, sdata, 0);
481 netif_tx_wake_all_queues(sdata->dev); 260 netif_tx_wake_all_queues(sdata->dev);
482 } 261 }
483 } else 262 } else
484 netif_tx_wake_all_queues(sdata->dev); 263 netif_tx_wake_all_queues(sdata->dev);
264
265 /* re-enable beaconing */
266 if (sdata->vif.type == NL80211_IFTYPE_AP ||
267 sdata->vif.type == NL80211_IFTYPE_ADHOC ||
268 sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
269 ieee80211_if_config(sdata,
270 IEEE80211_IFCC_BEACON_ENABLED);
485 } 271 }
486 rcu_read_unlock(); 272 mutex_unlock(&local->iflist_mtx);
487 273
488 done: 274 done:
489 ieee80211_mlme_notify_scan_completed(local); 275 ieee80211_mlme_notify_scan_completed(local);
276 ieee80211_ibss_notify_scan_completed(local);
490 ieee80211_mesh_notify_scan_completed(local); 277 ieee80211_mesh_notify_scan_completed(local);
491} 278}
492EXPORT_SYMBOL(ieee80211_scan_completed); 279EXPORT_SYMBOL(ieee80211_scan_completed);
493 280
494
495void ieee80211_scan_work(struct work_struct *work) 281void ieee80211_scan_work(struct work_struct *work)
496{ 282{
497 struct ieee80211_local *local = 283 struct ieee80211_local *local =
498 container_of(work, struct ieee80211_local, scan_work.work); 284 container_of(work, struct ieee80211_local, scan_work.work);
499 struct ieee80211_sub_if_data *sdata = local->scan_sdata; 285 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
500 struct ieee80211_supported_band *sband;
501 struct ieee80211_channel *chan; 286 struct ieee80211_channel *chan;
502 int skip; 287 int skip, i;
503 unsigned long next_delay = 0; 288 unsigned long next_delay = 0;
504 289
505 /* 290 /*
@@ -510,33 +295,13 @@ void ieee80211_scan_work(struct work_struct *work)
510 295
511 switch (local->scan_state) { 296 switch (local->scan_state) {
512 case SCAN_SET_CHANNEL: 297 case SCAN_SET_CHANNEL:
513 /*
514 * Get current scan band. scan_band may be IEEE80211_NUM_BANDS
515 * after we successfully scanned the last channel of the last
516 * band (and the last band is supported by the hw)
517 */
518 if (local->scan_band < IEEE80211_NUM_BANDS)
519 sband = local->hw.wiphy->bands[local->scan_band];
520 else
521 sband = NULL;
522
523 /*
524 * If we are at an unsupported band and have more bands
525 * left to scan, advance to the next supported one.
526 */
527 while (!sband && local->scan_band < IEEE80211_NUM_BANDS - 1) {
528 local->scan_band++;
529 sband = local->hw.wiphy->bands[local->scan_band];
530 local->scan_channel_idx = 0;
531 }
532
533 /* if no more bands/channels left, complete scan */ 298 /* if no more bands/channels left, complete scan */
534 if (!sband || local->scan_channel_idx >= sband->n_channels) { 299 if (local->scan_channel_idx >= local->scan_req->n_channels) {
535 ieee80211_scan_completed(local_to_hw(local)); 300 ieee80211_scan_completed(local_to_hw(local), false);
536 return; 301 return;
537 } 302 }
538 skip = 0; 303 skip = 0;
539 chan = &sband->channels[local->scan_channel_idx]; 304 chan = local->scan_req->channels[local->scan_channel_idx];
540 305
541 if (chan->flags & IEEE80211_CHAN_DISABLED || 306 if (chan->flags & IEEE80211_CHAN_DISABLED ||
542 (sdata->vif.type == NL80211_IFTYPE_ADHOC && 307 (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
@@ -552,15 +317,6 @@ void ieee80211_scan_work(struct work_struct *work)
552 317
553 /* advance state machine to next channel/band */ 318 /* advance state machine to next channel/band */
554 local->scan_channel_idx++; 319 local->scan_channel_idx++;
555 if (local->scan_channel_idx >= sband->n_channels) {
556 /*
557 * scan_band may end up == IEEE80211_NUM_BANDS, but
558 * we'll catch that case above and complete the scan
559 * if that is the case.
560 */
561 local->scan_band++;
562 local->scan_channel_idx = 0;
563 }
564 320
565 if (skip) 321 if (skip)
566 break; 322 break;
@@ -573,10 +329,15 @@ void ieee80211_scan_work(struct work_struct *work)
573 next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; 329 next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
574 local->scan_state = SCAN_SET_CHANNEL; 330 local->scan_state = SCAN_SET_CHANNEL;
575 331
576 if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN) 332 if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN ||
333 !local->scan_req->n_ssids)
577 break; 334 break;
578 ieee80211_send_probe_req(sdata, NULL, local->scan_ssid, 335 for (i = 0; i < local->scan_req->n_ssids; i++)
579 local->scan_ssid_len); 336 ieee80211_send_probe_req(
337 sdata, NULL,
338 local->scan_req->ssids[i].ssid,
339 local->scan_req->ssids[i].ssid_len,
340 local->scan_req->ie, local->scan_req->ie_len);
580 next_delay = IEEE80211_CHANNEL_TIME; 341 next_delay = IEEE80211_CHANNEL_TIME;
581 break; 342 break;
582 } 343 }
@@ -587,14 +348,19 @@ void ieee80211_scan_work(struct work_struct *work)
587 348
588 349
589int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, 350int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
590 u8 *ssid, size_t ssid_len) 351 struct cfg80211_scan_request *req)
591{ 352{
592 struct ieee80211_local *local = scan_sdata->local; 353 struct ieee80211_local *local = scan_sdata->local;
593 struct ieee80211_sub_if_data *sdata; 354 struct ieee80211_sub_if_data *sdata;
594 355
595 if (ssid_len > IEEE80211_MAX_SSID_LEN) 356 if (!req)
596 return -EINVAL; 357 return -EINVAL;
597 358
359 if (local->scan_req && local->scan_req != req)
360 return -EBUSY;
361
362 local->scan_req = req;
363
598 /* MLME-SCAN.request (page 118) page 144 (11.1.3.1) 364 /* MLME-SCAN.request (page 118) page 144 (11.1.3.1)
599 * BSSType: INFRASTRUCTURE, INDEPENDENT, ANY_BSS 365 * BSSType: INFRASTRUCTURE, INDEPENDENT, ANY_BSS
600 * BSSID: MACAddress 366 * BSSID: MACAddress
@@ -622,7 +388,7 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
622 int rc; 388 int rc;
623 389
624 local->hw_scanning = true; 390 local->hw_scanning = true;
625 rc = local->ops->hw_scan(local_to_hw(local), ssid, ssid_len); 391 rc = local->ops->hw_scan(local_to_hw(local), req);
626 if (rc) { 392 if (rc) {
627 local->hw_scanning = false; 393 local->hw_scanning = false;
628 return rc; 394 return rc;
@@ -632,28 +398,35 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
632 } 398 }
633 399
634 local->sw_scanning = true; 400 local->sw_scanning = true;
401 if (local->ops->sw_scan_start)
402 local->ops->sw_scan_start(local_to_hw(local));
403
404 mutex_lock(&local->iflist_mtx);
405 list_for_each_entry(sdata, &local->interfaces, list) {
406 if (!netif_running(sdata->dev))
407 continue;
408
409 /* disable beaconing */
410 if (sdata->vif.type == NL80211_IFTYPE_AP ||
411 sdata->vif.type == NL80211_IFTYPE_ADHOC ||
412 sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
413 ieee80211_if_config(sdata,
414 IEEE80211_IFCC_BEACON_ENABLED);
635 415
636 rcu_read_lock();
637 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
638 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 416 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
639 if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { 417 if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) {
640 netif_tx_stop_all_queues(sdata->dev); 418 netif_tx_stop_all_queues(sdata->dev);
641 ieee80211_send_nullfunc(local, sdata, 1); 419 ieee80211_send_nullfunc(local, sdata, 1);
642 } 420 }
643 } else 421 } else
644 netif_tx_stop_all_queues(sdata->dev); 422 netif_tx_stop_all_queues(sdata->dev);
645 } 423 }
646 rcu_read_unlock(); 424 mutex_unlock(&local->iflist_mtx);
647 425
648 if (ssid) {
649 local->scan_ssid_len = ssid_len;
650 memcpy(local->scan_ssid, ssid, ssid_len);
651 } else
652 local->scan_ssid_len = 0;
653 local->scan_state = SCAN_SET_CHANNEL; 426 local->scan_state = SCAN_SET_CHANNEL;
654 local->scan_channel_idx = 0; 427 local->scan_channel_idx = 0;
655 local->scan_band = IEEE80211_BAND_2GHZ;
656 local->scan_sdata = scan_sdata; 428 local->scan_sdata = scan_sdata;
429 local->scan_req = req;
657 430
658 netif_addr_lock_bh(local->mdev); 431 netif_addr_lock_bh(local->mdev);
659 local->filter_flags |= FIF_BCN_PRBRESP_PROMISC; 432 local->filter_flags |= FIF_BCN_PRBRESP_PROMISC;
@@ -673,13 +446,21 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
673 446
674 447
675int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, 448int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
676 u8 *ssid, size_t ssid_len) 449 struct cfg80211_scan_request *req)
677{ 450{
678 struct ieee80211_local *local = sdata->local; 451 struct ieee80211_local *local = sdata->local;
679 struct ieee80211_if_sta *ifsta; 452 struct ieee80211_if_managed *ifmgd;
453
454 if (!req)
455 return -EINVAL;
456
457 if (local->scan_req && local->scan_req != req)
458 return -EBUSY;
459
460 local->scan_req = req;
680 461
681 if (sdata->vif.type != NL80211_IFTYPE_STATION) 462 if (sdata->vif.type != NL80211_IFTYPE_STATION)
682 return ieee80211_start_scan(sdata, ssid, ssid_len); 463 return ieee80211_start_scan(sdata, req);
683 464
684 /* 465 /*
685 * STA has a state machine that might need to defer scanning 466 * STA has a state machine that might need to defer scanning
@@ -693,242 +474,9 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
693 return -EBUSY; 474 return -EBUSY;
694 } 475 }
695 476
696 ifsta = &sdata->u.sta; 477 ifmgd = &sdata->u.mgd;
697 478 set_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request);
698 ifsta->scan_ssid_len = ssid_len; 479 queue_work(local->hw.workqueue, &ifmgd->work);
699 if (ssid_len)
700 memcpy(ifsta->scan_ssid, ssid, ssid_len);
701 set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request);
702 queue_work(local->hw.workqueue, &ifsta->work);
703 480
704 return 0; 481 return 0;
705} 482}
706
707
708static void ieee80211_scan_add_ies(struct iw_request_info *info,
709 struct ieee80211_bss *bss,
710 char **current_ev, char *end_buf)
711{
712 u8 *pos, *end, *next;
713 struct iw_event iwe;
714
715 if (bss == NULL || bss->ies == NULL)
716 return;
717
718 /*
719 * If needed, fragment the IEs buffer (at IE boundaries) into short
720 * enough fragments to fit into IW_GENERIC_IE_MAX octet messages.
721 */
722 pos = bss->ies;
723 end = pos + bss->ies_len;
724
725 while (end - pos > IW_GENERIC_IE_MAX) {
726 next = pos + 2 + pos[1];
727 while (next + 2 + next[1] - pos < IW_GENERIC_IE_MAX)
728 next = next + 2 + next[1];
729
730 memset(&iwe, 0, sizeof(iwe));
731 iwe.cmd = IWEVGENIE;
732 iwe.u.data.length = next - pos;
733 *current_ev = iwe_stream_add_point(info, *current_ev,
734 end_buf, &iwe, pos);
735
736 pos = next;
737 }
738
739 if (end > pos) {
740 memset(&iwe, 0, sizeof(iwe));
741 iwe.cmd = IWEVGENIE;
742 iwe.u.data.length = end - pos;
743 *current_ev = iwe_stream_add_point(info, *current_ev,
744 end_buf, &iwe, pos);
745 }
746}
747
748
749static char *
750ieee80211_scan_result(struct ieee80211_local *local,
751 struct iw_request_info *info,
752 struct ieee80211_bss *bss,
753 char *current_ev, char *end_buf)
754{
755 struct iw_event iwe;
756 char *buf;
757
758 if (time_after(jiffies,
759 bss->last_update + IEEE80211_SCAN_RESULT_EXPIRE))
760 return current_ev;
761
762 memset(&iwe, 0, sizeof(iwe));
763 iwe.cmd = SIOCGIWAP;
764 iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
765 memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN);
766 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
767 IW_EV_ADDR_LEN);
768
769 memset(&iwe, 0, sizeof(iwe));
770 iwe.cmd = SIOCGIWESSID;
771 if (bss_mesh_cfg(bss)) {
772 iwe.u.data.length = bss_mesh_id_len(bss);
773 iwe.u.data.flags = 1;
774 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
775 &iwe, bss_mesh_id(bss));
776 } else {
777 iwe.u.data.length = bss->ssid_len;
778 iwe.u.data.flags = 1;
779 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
780 &iwe, bss->ssid);
781 }
782
783 if (bss->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)
784 || bss_mesh_cfg(bss)) {
785 memset(&iwe, 0, sizeof(iwe));
786 iwe.cmd = SIOCGIWMODE;
787 if (bss_mesh_cfg(bss))
788 iwe.u.mode = IW_MODE_MESH;
789 else if (bss->capability & WLAN_CAPABILITY_ESS)
790 iwe.u.mode = IW_MODE_MASTER;
791 else
792 iwe.u.mode = IW_MODE_ADHOC;
793 current_ev = iwe_stream_add_event(info, current_ev, end_buf,
794 &iwe, IW_EV_UINT_LEN);
795 }
796
797 memset(&iwe, 0, sizeof(iwe));
798 iwe.cmd = SIOCGIWFREQ;
799 iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq);
800 iwe.u.freq.e = 0;
801 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
802 IW_EV_FREQ_LEN);
803
804 memset(&iwe, 0, sizeof(iwe));
805 iwe.cmd = SIOCGIWFREQ;
806 iwe.u.freq.m = bss->freq;
807 iwe.u.freq.e = 6;
808 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
809 IW_EV_FREQ_LEN);
810 memset(&iwe, 0, sizeof(iwe));
811 iwe.cmd = IWEVQUAL;
812 iwe.u.qual.qual = bss->qual;
813 iwe.u.qual.level = bss->signal;
814 iwe.u.qual.noise = bss->noise;
815 iwe.u.qual.updated = local->wstats_flags;
816 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
817 IW_EV_QUAL_LEN);
818
819 memset(&iwe, 0, sizeof(iwe));
820 iwe.cmd = SIOCGIWENCODE;
821 if (bss->capability & WLAN_CAPABILITY_PRIVACY)
822 iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
823 else
824 iwe.u.data.flags = IW_ENCODE_DISABLED;
825 iwe.u.data.length = 0;
826 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
827 &iwe, "");
828
829 ieee80211_scan_add_ies(info, bss, &current_ev, end_buf);
830
831 if (bss->supp_rates_len > 0) {
832 /* display all supported rates in readable format */
833 char *p = current_ev + iwe_stream_lcp_len(info);
834 int i;
835
836 memset(&iwe, 0, sizeof(iwe));
837 iwe.cmd = SIOCGIWRATE;
838 /* Those two flags are ignored... */
839 iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
840
841 for (i = 0; i < bss->supp_rates_len; i++) {
842 iwe.u.bitrate.value = ((bss->supp_rates[i] &
843 0x7f) * 500000);
844 p = iwe_stream_add_value(info, current_ev, p,
845 end_buf, &iwe, IW_EV_PARAM_LEN);
846 }
847 current_ev = p;
848 }
849
850 buf = kmalloc(30, GFP_ATOMIC);
851 if (buf) {
852 memset(&iwe, 0, sizeof(iwe));
853 iwe.cmd = IWEVCUSTOM;
854 sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp));
855 iwe.u.data.length = strlen(buf);
856 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
857 &iwe, buf);
858 memset(&iwe, 0, sizeof(iwe));
859 iwe.cmd = IWEVCUSTOM;
860 sprintf(buf, " Last beacon: %dms ago",
861 jiffies_to_msecs(jiffies - bss->last_update));
862 iwe.u.data.length = strlen(buf);
863 current_ev = iwe_stream_add_point(info, current_ev,
864 end_buf, &iwe, buf);
865 kfree(buf);
866 }
867
868 if (bss_mesh_cfg(bss)) {
869 u8 *cfg = bss_mesh_cfg(bss);
870 buf = kmalloc(50, GFP_ATOMIC);
871 if (buf) {
872 memset(&iwe, 0, sizeof(iwe));
873 iwe.cmd = IWEVCUSTOM;
874 sprintf(buf, "Mesh network (version %d)", cfg[0]);
875 iwe.u.data.length = strlen(buf);
876 current_ev = iwe_stream_add_point(info, current_ev,
877 end_buf,
878 &iwe, buf);
879 sprintf(buf, "Path Selection Protocol ID: "
880 "0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3],
881 cfg[4]);
882 iwe.u.data.length = strlen(buf);
883 current_ev = iwe_stream_add_point(info, current_ev,
884 end_buf,
885 &iwe, buf);
886 sprintf(buf, "Path Selection Metric ID: "
887 "0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7],
888 cfg[8]);
889 iwe.u.data.length = strlen(buf);
890 current_ev = iwe_stream_add_point(info, current_ev,
891 end_buf,
892 &iwe, buf);
893 sprintf(buf, "Congestion Control Mode ID: "
894 "0x%02X%02X%02X%02X", cfg[9], cfg[10],
895 cfg[11], cfg[12]);
896 iwe.u.data.length = strlen(buf);
897 current_ev = iwe_stream_add_point(info, current_ev,
898 end_buf,
899 &iwe, buf);
900 sprintf(buf, "Channel Precedence: "
901 "0x%02X%02X%02X%02X", cfg[13], cfg[14],
902 cfg[15], cfg[16]);
903 iwe.u.data.length = strlen(buf);
904 current_ev = iwe_stream_add_point(info, current_ev,
905 end_buf,
906 &iwe, buf);
907 kfree(buf);
908 }
909 }
910
911 return current_ev;
912}
913
914
915int ieee80211_scan_results(struct ieee80211_local *local,
916 struct iw_request_info *info,
917 char *buf, size_t len)
918{
919 char *current_ev = buf;
920 char *end_buf = buf + len;
921 struct ieee80211_bss *bss;
922
923 spin_lock_bh(&local->bss_lock);
924 list_for_each_entry(bss, &local->bss_list, list) {
925 if (buf + len - current_ev <= IW_EV_ADDR_LEN) {
926 spin_unlock_bh(&local->bss_lock);
927 return -E2BIG;
928 }
929 current_ev = ieee80211_scan_result(local, info, bss,
930 current_ev, end_buf);
931 }
932 spin_unlock_bh(&local->bss_lock);
933 return current_ev - buf;
934}
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index f72bad636d8e..5f7a2624ed74 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -65,7 +65,7 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da
65 IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED; 65 IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED;
66 msr_report->u.action.u.measurement.msr_elem.type = request_ie->type; 66 msr_report->u.action.u.measurement.msr_elem.type = request_ie->type;
67 67
68 ieee80211_tx_skb(sdata, skb, 0); 68 ieee80211_tx_skb(sdata, skb, 1);
69} 69}
70 70
71void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, 71void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -84,3 +84,104 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
84 mgmt->sa, mgmt->bssid, 84 mgmt->sa, mgmt->bssid,
85 mgmt->u.action.u.measurement.dialog_token); 85 mgmt->u.action.u.measurement.dialog_token);
86} 86}
87
88void ieee80211_chswitch_work(struct work_struct *work)
89{
90 struct ieee80211_sub_if_data *sdata =
91 container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work);
92 struct ieee80211_bss *bss;
93 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
94
95 if (!netif_running(sdata->dev))
96 return;
97
98 bss = ieee80211_rx_bss_get(sdata->local, ifmgd->bssid,
99 sdata->local->hw.conf.channel->center_freq,
100 ifmgd->ssid, ifmgd->ssid_len);
101 if (!bss)
102 goto exit;
103
104 sdata->local->oper_channel = sdata->local->csa_channel;
105 /* XXX: shouldn't really modify cfg80211-owned data! */
106 if (!ieee80211_hw_config(sdata->local, IEEE80211_CONF_CHANGE_CHANNEL))
107 bss->cbss.channel = sdata->local->oper_channel;
108
109 ieee80211_rx_bss_put(sdata->local, bss);
110exit:
111 ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
112 ieee80211_wake_queues_by_reason(&sdata->local->hw,
113 IEEE80211_QUEUE_STOP_REASON_CSA);
114}
115
116void ieee80211_chswitch_timer(unsigned long data)
117{
118 struct ieee80211_sub_if_data *sdata =
119 (struct ieee80211_sub_if_data *) data;
120 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
121
122 queue_work(sdata->local->hw.workqueue, &ifmgd->chswitch_work);
123}
124
125void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata,
126 struct ieee80211_channel_sw_ie *sw_elem,
127 struct ieee80211_bss *bss)
128{
129 struct ieee80211_channel *new_ch;
130 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
131 int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num);
132
133 /* FIXME: Handle ADHOC later */
134 if (sdata->vif.type != NL80211_IFTYPE_STATION)
135 return;
136
137 if (ifmgd->state != IEEE80211_STA_MLME_ASSOCIATED)
138 return;
139
140 if (sdata->local->sw_scanning || sdata->local->hw_scanning)
141 return;
142
143 /* Disregard subsequent beacons if we are already running a timer
144 processing a CSA */
145
146 if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED)
147 return;
148
149 new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq);
150 if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED)
151 return;
152
153 sdata->local->csa_channel = new_ch;
154
155 if (sw_elem->count <= 1) {
156 queue_work(sdata->local->hw.workqueue, &ifmgd->chswitch_work);
157 } else {
158 ieee80211_stop_queues_by_reason(&sdata->local->hw,
159 IEEE80211_QUEUE_STOP_REASON_CSA);
160 ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
161 mod_timer(&ifmgd->chswitch_timer,
162 jiffies +
163 msecs_to_jiffies(sw_elem->count *
164 bss->cbss.beacon_interval));
165 }
166}
167
168void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
169 u16 capab_info, u8 *pwr_constr_elem,
170 u8 pwr_constr_elem_len)
171{
172 struct ieee80211_conf *conf = &sdata->local->hw.conf;
173
174 if (!(capab_info & WLAN_CAPABILITY_SPECTRUM_MGMT))
175 return;
176
177 /* Power constraint IE length should be 1 octet */
178 if (pwr_constr_elem_len != 1)
179 return;
180
181 if ((*pwr_constr_elem <= conf->channel->max_power) &&
182 (*pwr_constr_elem != sdata->local->power_constr_level)) {
183 sdata->local->power_constr_level = *pwr_constr_elem;
184 ieee80211_hw_config(sdata->local, 0);
185 }
186}
187
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 10c5539c20ab..4ba3c540fcf3 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -194,12 +194,53 @@ void sta_info_destroy(struct sta_info *sta)
194 dev_kfree_skb_any(skb); 194 dev_kfree_skb_any(skb);
195 195
196 for (i = 0; i < STA_TID_NUM; i++) { 196 for (i = 0; i < STA_TID_NUM; i++) {
197 struct tid_ampdu_rx *tid_rx;
198 struct tid_ampdu_tx *tid_tx;
199
197 spin_lock_bh(&sta->lock); 200 spin_lock_bh(&sta->lock);
198 if (sta->ampdu_mlme.tid_rx[i]) 201 tid_rx = sta->ampdu_mlme.tid_rx[i];
199 del_timer_sync(&sta->ampdu_mlme.tid_rx[i]->session_timer); 202 /* Make sure timer won't free the tid_rx struct, see below */
200 if (sta->ampdu_mlme.tid_tx[i]) 203 if (tid_rx)
201 del_timer_sync(&sta->ampdu_mlme.tid_tx[i]->addba_resp_timer); 204 tid_rx->shutdown = true;
205
206 /*
207 * The stop callback cannot find this station any more, but
208 * it didn't complete its work -- start the queue if necessary
209 */
210 if (sta->ampdu_mlme.tid_state_tx[i] & HT_AGG_STATE_INITIATOR_MSK &&
211 sta->ampdu_mlme.tid_state_tx[i] & HT_AGG_STATE_REQ_STOP_BA_MSK &&
212 local->hw.ampdu_queues)
213 ieee80211_wake_queue_by_reason(&local->hw,
214 local->hw.queues + sta->tid_to_tx_q[i],
215 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
216
202 spin_unlock_bh(&sta->lock); 217 spin_unlock_bh(&sta->lock);
218
219 /*
220 * Outside spinlock - shutdown is true now so that the timer
221 * won't free tid_rx, we have to do that now. Can't let the
222 * timer do it because we have to sync the timer outside the
223 * lock that it takes itself.
224 */
225 if (tid_rx) {
226 del_timer_sync(&tid_rx->session_timer);
227 kfree(tid_rx);
228 }
229
230 /*
231 * No need to do such complications for TX agg sessions, the
232 * path leading to freeing the tid_tx struct goes via a call
233 * from the driver, and thus needs to look up the sta struct
234 * again, which cannot be found when we get here. Hence, we
235 * just need to delete the timer and free the aggregation
236 * info; we won't be telling the peer about it then but that
237 * doesn't matter if we're not talking to it again anyway.
238 */
239 tid_tx = sta->ampdu_mlme.tid_tx[i];
240 if (tid_tx) {
241 del_timer_sync(&tid_tx->addba_resp_timer);
242 kfree(tid_tx);
243 }
203 } 244 }
204 245
205 __sta_info_free(local, sta); 246 __sta_info_free(local, sta);
@@ -246,8 +287,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
246 * enable session_timer's data differentiation. refer to 287 * enable session_timer's data differentiation. refer to
247 * sta_rx_agg_session_timer_expired for useage */ 288 * sta_rx_agg_session_timer_expired for useage */
248 sta->timer_to_tid[i] = i; 289 sta->timer_to_tid[i] = i;
249 /* tid to tx queue: initialize according to HW (0 is valid) */ 290 sta->tid_to_tx_q[i] = -1;
250 sta->tid_to_tx_q[i] = ieee80211_num_queues(&local->hw);
251 /* rx */ 291 /* rx */
252 sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE; 292 sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE;
253 sta->ampdu_mlme.tid_rx[i] = NULL; 293 sta->ampdu_mlme.tid_rx[i] = NULL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index e49a5b99cf10..1f45573c580c 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -34,6 +34,7 @@
34 * @WLAN_STA_CLEAR_PS_FILT: Clear PS filter in hardware (using the 34 * @WLAN_STA_CLEAR_PS_FILT: Clear PS filter in hardware (using the
35 * IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next 35 * IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next
36 * frame to this station is transmitted. 36 * frame to this station is transmitted.
37 * @WLAN_STA_MFP: Management frame protection is used with this STA.
37 */ 38 */
38enum ieee80211_sta_info_flags { 39enum ieee80211_sta_info_flags {
39 WLAN_STA_AUTH = 1<<0, 40 WLAN_STA_AUTH = 1<<0,
@@ -46,6 +47,7 @@ enum ieee80211_sta_info_flags {
46 WLAN_STA_WDS = 1<<7, 47 WLAN_STA_WDS = 1<<7,
47 WLAN_STA_PSPOLL = 1<<8, 48 WLAN_STA_PSPOLL = 1<<8,
48 WLAN_STA_CLEAR_PS_FILT = 1<<9, 49 WLAN_STA_CLEAR_PS_FILT = 1<<9,
50 WLAN_STA_MFP = 1<<10,
49}; 51};
50 52
51#define STA_TID_NUM 16 53#define STA_TID_NUM 16
@@ -63,7 +65,6 @@ enum ieee80211_sta_info_flags {
63#define HT_AGG_STATE_OPERATIONAL (HT_ADDBA_REQUESTED_MSK | \ 65#define HT_AGG_STATE_OPERATIONAL (HT_ADDBA_REQUESTED_MSK | \
64 HT_ADDBA_DRV_READY_MSK | \ 66 HT_ADDBA_DRV_READY_MSK | \
65 HT_ADDBA_RECEIVED_MSK) 67 HT_ADDBA_RECEIVED_MSK)
66#define HT_AGG_STATE_DEBUGFS_CTL BIT(7)
67 68
68/** 69/**
69 * struct tid_ampdu_tx - TID aggregation information (Tx). 70 * struct tid_ampdu_tx - TID aggregation information (Tx).
@@ -87,8 +88,9 @@ struct tid_ampdu_tx {
87 * @stored_mpdu_num: number of MPDUs in reordering buffer 88 * @stored_mpdu_num: number of MPDUs in reordering buffer
88 * @ssn: Starting Sequence Number expected to be aggregated. 89 * @ssn: Starting Sequence Number expected to be aggregated.
89 * @buf_size: buffer size for incoming A-MPDUs 90 * @buf_size: buffer size for incoming A-MPDUs
90 * @timeout: reset timer value. 91 * @timeout: reset timer value (in TUs).
91 * @dialog_token: dialog token for aggregation session 92 * @dialog_token: dialog token for aggregation session
93 * @shutdown: this session is being shut down due to STA removal
92 */ 94 */
93struct tid_ampdu_rx { 95struct tid_ampdu_rx {
94 struct sk_buff **reorder_buf; 96 struct sk_buff **reorder_buf;
@@ -99,6 +101,7 @@ struct tid_ampdu_rx {
99 u16 buf_size; 101 u16 buf_size;
100 u16 timeout; 102 u16 timeout;
101 u8 dialog_token; 103 u8 dialog_token;
104 bool shutdown;
102}; 105};
103 106
104/** 107/**
@@ -198,7 +201,7 @@ struct sta_ampdu_mlme {
198 * @tid_seq: per-TID sequence numbers for sending to this STA 201 * @tid_seq: per-TID sequence numbers for sending to this STA
199 * @ampdu_mlme: A-MPDU state machine state 202 * @ampdu_mlme: A-MPDU state machine state
200 * @timer_to_tid: identity mapping to ID timers 203 * @timer_to_tid: identity mapping to ID timers
201 * @tid_to_tx_q: map tid to tx queue 204 * @tid_to_tx_q: map tid to tx queue (invalid == negative values)
202 * @llid: Local link ID 205 * @llid: Local link ID
203 * @plid: Peer link ID 206 * @plid: Peer link ID
204 * @reason: Cancel reason on PLINK_HOLDING state 207 * @reason: Cancel reason on PLINK_HOLDING state
@@ -273,7 +276,7 @@ struct sta_info {
273 */ 276 */
274 struct sta_ampdu_mlme ampdu_mlme; 277 struct sta_ampdu_mlme ampdu_mlme;
275 u8 timer_to_tid[STA_TID_NUM]; 278 u8 timer_to_tid[STA_TID_NUM];
276 u8 tid_to_tx_q[STA_TID_NUM]; 279 s8 tid_to_tx_q[STA_TID_NUM];
277 280
278#ifdef CONFIG_MAC80211_MESH 281#ifdef CONFIG_MAC80211_MESH
279 /* 282 /*
@@ -382,8 +385,6 @@ static inline u32 get_sta_flags(struct sta_info *sta)
382} 385}
383 386
384 387
385/* Maximum number of concurrently registered stations */
386#define MAX_STA_COUNT 2007
387 388
388#define STA_HASH_SIZE 256 389#define STA_HASH_SIZE 256
389#define STA_HASH(sta) (sta[5]) 390#define STA_HASH(sta) (sta[5])
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 94de5033f0b6..c3f0e950125b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -35,6 +35,7 @@
35#define IEEE80211_TX_OK 0 35#define IEEE80211_TX_OK 0
36#define IEEE80211_TX_AGAIN 1 36#define IEEE80211_TX_AGAIN 1
37#define IEEE80211_TX_FRAG_AGAIN 2 37#define IEEE80211_TX_FRAG_AGAIN 2
38#define IEEE80211_TX_PENDING 3
38 39
39/* misc utils */ 40/* misc utils */
40 41
@@ -330,6 +331,22 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
330 return TX_CONTINUE; 331 return TX_CONTINUE;
331} 332}
332 333
334static int ieee80211_use_mfp(__le16 fc, struct sta_info *sta,
335 struct sk_buff *skb)
336{
337 if (!ieee80211_is_mgmt(fc))
338 return 0;
339
340 if (sta == NULL || !test_sta_flags(sta, WLAN_STA_MFP))
341 return 0;
342
343 if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *)
344 skb->data))
345 return 0;
346
347 return 1;
348}
349
333static ieee80211_tx_result 350static ieee80211_tx_result
334ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) 351ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
335{ 352{
@@ -409,11 +426,17 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
409 tx->key = NULL; 426 tx->key = NULL;
410 else if (tx->sta && (key = rcu_dereference(tx->sta->key))) 427 else if (tx->sta && (key = rcu_dereference(tx->sta->key)))
411 tx->key = key; 428 tx->key = key;
429 else if (ieee80211_is_mgmt(hdr->frame_control) &&
430 (key = rcu_dereference(tx->sdata->default_mgmt_key)))
431 tx->key = key;
412 else if ((key = rcu_dereference(tx->sdata->default_key))) 432 else if ((key = rcu_dereference(tx->sdata->default_key)))
413 tx->key = key; 433 tx->key = key;
414 else if (tx->sdata->drop_unencrypted && 434 else if (tx->sdata->drop_unencrypted &&
415 (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) && 435 (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) &&
416 !(info->flags & IEEE80211_TX_CTL_INJECTED)) { 436 !(info->flags & IEEE80211_TX_CTL_INJECTED) &&
437 (!ieee80211_is_robust_mgmt_frame(hdr) ||
438 (ieee80211_is_action(hdr->frame_control) &&
439 tx->sta && test_sta_flags(tx->sta, WLAN_STA_MFP)))) {
417 I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); 440 I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
418 return TX_DROP; 441 return TX_DROP;
419 } else 442 } else
@@ -428,10 +451,19 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
428 if (ieee80211_is_auth(hdr->frame_control)) 451 if (ieee80211_is_auth(hdr->frame_control))
429 break; 452 break;
430 case ALG_TKIP: 453 case ALG_TKIP:
431 case ALG_CCMP:
432 if (!ieee80211_is_data_present(hdr->frame_control)) 454 if (!ieee80211_is_data_present(hdr->frame_control))
433 tx->key = NULL; 455 tx->key = NULL;
434 break; 456 break;
457 case ALG_CCMP:
458 if (!ieee80211_is_data_present(hdr->frame_control) &&
459 !ieee80211_use_mfp(hdr->frame_control, tx->sta,
460 tx->skb))
461 tx->key = NULL;
462 break;
463 case ALG_AES_CMAC:
464 if (!ieee80211_is_mgmt(hdr->frame_control))
465 tx->key = NULL;
466 break;
435 } 467 }
436 } 468 }
437 469
@@ -787,6 +819,8 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
787 return ieee80211_crypto_tkip_encrypt(tx); 819 return ieee80211_crypto_tkip_encrypt(tx);
788 case ALG_CCMP: 820 case ALG_CCMP:
789 return ieee80211_crypto_ccmp_encrypt(tx); 821 return ieee80211_crypto_ccmp_encrypt(tx);
822 case ALG_AES_CMAC:
823 return ieee80211_crypto_aes_cmac_encrypt(tx);
790 } 824 }
791 825
792 /* not reached */ 826 /* not reached */
@@ -842,7 +876,6 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
842 return TX_CONTINUE; 876 return TX_CONTINUE;
843} 877}
844 878
845
846/* actual transmit path */ 879/* actual transmit path */
847 880
848/* 881/*
@@ -982,12 +1015,20 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
982 tx->sta = sta_info_get(local, hdr->addr1); 1015 tx->sta = sta_info_get(local, hdr->addr1);
983 1016
984 if (tx->sta && ieee80211_is_data_qos(hdr->frame_control)) { 1017 if (tx->sta && ieee80211_is_data_qos(hdr->frame_control)) {
1018 unsigned long flags;
985 qc = ieee80211_get_qos_ctl(hdr); 1019 qc = ieee80211_get_qos_ctl(hdr);
986 tid = *qc & IEEE80211_QOS_CTL_TID_MASK; 1020 tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
987 1021
1022 spin_lock_irqsave(&tx->sta->lock, flags);
988 state = &tx->sta->ampdu_mlme.tid_state_tx[tid]; 1023 state = &tx->sta->ampdu_mlme.tid_state_tx[tid];
989 if (*state == HT_AGG_STATE_OPERATIONAL) 1024 if (*state == HT_AGG_STATE_OPERATIONAL) {
990 info->flags |= IEEE80211_TX_CTL_AMPDU; 1025 info->flags |= IEEE80211_TX_CTL_AMPDU;
1026 if (local->hw.ampdu_queues)
1027 skb_set_queue_mapping(
1028 skb, tx->local->hw.queues +
1029 tx->sta->tid_to_tx_q[tid]);
1030 }
1031 spin_unlock_irqrestore(&tx->sta->lock, flags);
991 } 1032 }
992 1033
993 if (is_multicast_ether_addr(hdr->addr1)) { 1034 if (is_multicast_ether_addr(hdr->addr1)) {
@@ -1051,9 +1092,9 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
1051 int ret, i; 1092 int ret, i;
1052 1093
1053 if (skb) { 1094 if (skb) {
1054 if (netif_subqueue_stopped(local->mdev, skb)) 1095 if (ieee80211_queue_stopped(&local->hw,
1055 return IEEE80211_TX_AGAIN; 1096 skb_get_queue_mapping(skb)))
1056 info = IEEE80211_SKB_CB(skb); 1097 return IEEE80211_TX_PENDING;
1057 1098
1058 ret = local->ops->tx(local_to_hw(local), skb); 1099 ret = local->ops->tx(local_to_hw(local), skb);
1059 if (ret) 1100 if (ret)
@@ -1068,8 +1109,8 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb,
1068 info = IEEE80211_SKB_CB(tx->extra_frag[i]); 1109 info = IEEE80211_SKB_CB(tx->extra_frag[i]);
1069 info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT | 1110 info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT |
1070 IEEE80211_TX_CTL_FIRST_FRAGMENT); 1111 IEEE80211_TX_CTL_FIRST_FRAGMENT);
1071 if (netif_subqueue_stopped(local->mdev, 1112 if (ieee80211_queue_stopped(&local->hw,
1072 tx->extra_frag[i])) 1113 skb_get_queue_mapping(tx->extra_frag[i])))
1073 return IEEE80211_TX_FRAG_AGAIN; 1114 return IEEE80211_TX_FRAG_AGAIN;
1074 1115
1075 ret = local->ops->tx(local_to_hw(local), 1116 ret = local->ops->tx(local_to_hw(local),
@@ -1179,8 +1220,9 @@ retry:
1179 * queues, there's no reason for a driver to reject 1220 * queues, there's no reason for a driver to reject
1180 * a frame there, warn and drop it. 1221 * a frame there, warn and drop it.
1181 */ 1222 */
1182 if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) 1223 if (ret != IEEE80211_TX_PENDING)
1183 goto drop; 1224 if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU))
1225 goto drop;
1184 1226
1185 store = &local->pending_packet[queue]; 1227 store = &local->pending_packet[queue];
1186 1228
@@ -1296,6 +1338,19 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
1296 return 0; 1338 return 0;
1297 } 1339 }
1298 1340
1341 if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
1342 local->hw.conf.dynamic_ps_timeout > 0) {
1343 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
1344 ieee80211_stop_queues_by_reason(&local->hw,
1345 IEEE80211_QUEUE_STOP_REASON_PS);
1346 queue_work(local->hw.workqueue,
1347 &local->dynamic_ps_disable_work);
1348 }
1349
1350 mod_timer(&local->dynamic_ps_timer, jiffies +
1351 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
1352 }
1353
1299 memset(info, 0, sizeof(*info)); 1354 memset(info, 0, sizeof(*info));
1300 1355
1301 info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; 1356 info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
@@ -1390,10 +1445,31 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb,
1390 struct net_device *dev) 1445 struct net_device *dev)
1391{ 1446{
1392 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 1447 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1448 struct ieee80211_channel *chan = local->hw.conf.channel;
1393 struct ieee80211_radiotap_header *prthdr = 1449 struct ieee80211_radiotap_header *prthdr =
1394 (struct ieee80211_radiotap_header *)skb->data; 1450 (struct ieee80211_radiotap_header *)skb->data;
1395 u16 len_rthdr; 1451 u16 len_rthdr;
1396 1452
1453 /*
1454 * Frame injection is not allowed if beaconing is not allowed
1455 * or if we need radar detection. Beaconing is usually not allowed when
1456 * the mode or operation (Adhoc, AP, Mesh) does not support DFS.
1457 * Passive scan is also used in world regulatory domains where
1458 * your country is not known and as such it should be treated as
1459 * NO TX unless the channel is explicitly allowed in which case
1460 * your current regulatory domain would not have the passive scan
1461 * flag.
1462 *
1463 * Since AP mode uses monitor interfaces to inject/TX management
1464 * frames we can make AP mode the exception to this rule once it
1465 * supports radar detection as its implementation can deal with
1466 * radar detection by itself. We can do that later by adding a
1467 * monitor flag interfaces used for AP support.
1468 */
1469 if ((chan->flags & (IEEE80211_CHAN_NO_IBSS | IEEE80211_CHAN_RADAR |
1470 IEEE80211_CHAN_PASSIVE_SCAN)))
1471 goto fail;
1472
1397 /* check for not even having the fixed radiotap header part */ 1473 /* check for not even having the fixed radiotap header part */
1398 if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) 1474 if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
1399 goto fail; /* too short to be possibly valid */ 1475 goto fail; /* too short to be possibly valid */
@@ -1477,19 +1553,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1477 goto fail; 1553 goto fail;
1478 } 1554 }
1479 1555
1480 if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) &&
1481 local->dynamic_ps_timeout > 0) {
1482 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
1483 ieee80211_stop_queues_by_reason(&local->hw,
1484 IEEE80211_QUEUE_STOP_REASON_PS);
1485 queue_work(local->hw.workqueue,
1486 &local->dynamic_ps_disable_work);
1487 }
1488
1489 mod_timer(&local->dynamic_ps_timer, jiffies +
1490 msecs_to_jiffies(local->dynamic_ps_timeout));
1491 }
1492
1493 nh_pos = skb_network_header(skb) - skb->data; 1556 nh_pos = skb_network_header(skb) - skb->data;
1494 h_pos = skb_transport_header(skb) - skb->data; 1557 h_pos = skb_transport_header(skb) - skb->data;
1495 1558
@@ -1570,7 +1633,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1570 case NL80211_IFTYPE_STATION: 1633 case NL80211_IFTYPE_STATION:
1571 fc |= cpu_to_le16(IEEE80211_FCTL_TODS); 1634 fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
1572 /* BSSID SA DA */ 1635 /* BSSID SA DA */
1573 memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); 1636 memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN);
1574 memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); 1637 memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
1575 memcpy(hdr.addr3, skb->data, ETH_ALEN); 1638 memcpy(hdr.addr3, skb->data, ETH_ALEN);
1576 hdrlen = 24; 1639 hdrlen = 24;
@@ -1579,7 +1642,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1579 /* DA SA BSSID */ 1642 /* DA SA BSSID */
1580 memcpy(hdr.addr1, skb->data, ETH_ALEN); 1643 memcpy(hdr.addr1, skb->data, ETH_ALEN);
1581 memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); 1644 memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
1582 memcpy(hdr.addr3, sdata->u.sta.bssid, ETH_ALEN); 1645 memcpy(hdr.addr3, sdata->u.ibss.bssid, ETH_ALEN);
1583 hdrlen = 24; 1646 hdrlen = 24;
1584 break; 1647 break;
1585 default: 1648 default:
@@ -1865,7 +1928,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
1865 struct ieee80211_tx_info *info; 1928 struct ieee80211_tx_info *info;
1866 struct ieee80211_sub_if_data *sdata = NULL; 1929 struct ieee80211_sub_if_data *sdata = NULL;
1867 struct ieee80211_if_ap *ap = NULL; 1930 struct ieee80211_if_ap *ap = NULL;
1868 struct ieee80211_if_sta *ifsta = NULL;
1869 struct beacon_data *beacon; 1931 struct beacon_data *beacon;
1870 struct ieee80211_supported_band *sband; 1932 struct ieee80211_supported_band *sband;
1871 enum ieee80211_band band = local->hw.conf.channel->band; 1933 enum ieee80211_band band = local->hw.conf.channel->band;
@@ -1917,13 +1979,13 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
1917 } else 1979 } else
1918 goto out; 1980 goto out;
1919 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { 1981 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
1982 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
1920 struct ieee80211_hdr *hdr; 1983 struct ieee80211_hdr *hdr;
1921 ifsta = &sdata->u.sta;
1922 1984
1923 if (!ifsta->probe_resp) 1985 if (!ifibss->probe_resp)
1924 goto out; 1986 goto out;
1925 1987
1926 skb = skb_copy(ifsta->probe_resp, GFP_ATOMIC); 1988 skb = skb_copy(ifibss->probe_resp, GFP_ATOMIC);
1927 if (!skb) 1989 if (!skb)
1928 goto out; 1990 goto out;
1929 1991
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index fb89e1d0aa03..e0431a1d218b 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -41,6 +41,15 @@ const unsigned char rfc1042_header[] __aligned(2) =
41const unsigned char bridge_tunnel_header[] __aligned(2) = 41const unsigned char bridge_tunnel_header[] __aligned(2) =
42 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; 42 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
43 43
44struct ieee80211_hw *wiphy_to_ieee80211_hw(struct wiphy *wiphy)
45{
46 struct ieee80211_local *local;
47 BUG_ON(!wiphy);
48
49 local = wiphy_priv(wiphy);
50 return &local->hw;
51}
52EXPORT_SYMBOL(wiphy_to_ieee80211_hw);
44 53
45u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, 54u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
46 enum nl80211_iftype type) 55 enum nl80211_iftype type)
@@ -335,15 +344,36 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
335{ 344{
336 struct ieee80211_local *local = hw_to_local(hw); 345 struct ieee80211_local *local = hw_to_local(hw);
337 346
338 /* we don't need to track ampdu queues */ 347 if (queue >= hw->queues) {
339 if (queue < ieee80211_num_regular_queues(hw)) { 348 if (local->ampdu_ac_queue[queue - hw->queues] < 0)
340 __clear_bit(reason, &local->queue_stop_reasons[queue]); 349 return;
350
351 /*
352 * for virtual aggregation queues, we need to refcount the
353 * internal mac80211 disable (multiple times!), keep track of
354 * driver disable _and_ make sure the regular queue is
355 * actually enabled.
356 */
357 if (reason == IEEE80211_QUEUE_STOP_REASON_AGGREGATION)
358 local->amdpu_ac_stop_refcnt[queue - hw->queues]--;
359 else
360 __clear_bit(reason, &local->queue_stop_reasons[queue]);
341 361
342 if (local->queue_stop_reasons[queue] != 0) 362 if (local->queue_stop_reasons[queue] ||
343 /* someone still has this queue stopped */ 363 local->amdpu_ac_stop_refcnt[queue - hw->queues])
344 return; 364 return;
365
366 /* now go on to treat the corresponding regular queue */
367 queue = local->ampdu_ac_queue[queue - hw->queues];
368 reason = IEEE80211_QUEUE_STOP_REASON_AGGREGATION;
345 } 369 }
346 370
371 __clear_bit(reason, &local->queue_stop_reasons[queue]);
372
373 if (local->queue_stop_reasons[queue] != 0)
374 /* someone still has this queue stopped */
375 return;
376
347 if (test_bit(queue, local->queues_pending)) { 377 if (test_bit(queue, local->queues_pending)) {
348 set_bit(queue, local->queues_pending_run); 378 set_bit(queue, local->queues_pending_run);
349 tasklet_schedule(&local->tx_pending_tasklet); 379 tasklet_schedule(&local->tx_pending_tasklet);
@@ -375,9 +405,27 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
375{ 405{
376 struct ieee80211_local *local = hw_to_local(hw); 406 struct ieee80211_local *local = hw_to_local(hw);
377 407
378 /* we don't need to track ampdu queues */ 408 if (queue >= hw->queues) {
379 if (queue < ieee80211_num_regular_queues(hw)) 409 if (local->ampdu_ac_queue[queue - hw->queues] < 0)
380 __set_bit(reason, &local->queue_stop_reasons[queue]); 410 return;
411
412 /*
413 * for virtual aggregation queues, we need to refcount the
414 * internal mac80211 disable (multiple times!), keep track of
415 * driver disable _and_ make sure the regular queue is
416 * actually enabled.
417 */
418 if (reason == IEEE80211_QUEUE_STOP_REASON_AGGREGATION)
419 local->amdpu_ac_stop_refcnt[queue - hw->queues]++;
420 else
421 __set_bit(reason, &local->queue_stop_reasons[queue]);
422
423 /* now go on to treat the corresponding regular queue */
424 queue = local->ampdu_ac_queue[queue - hw->queues];
425 reason = IEEE80211_QUEUE_STOP_REASON_AGGREGATION;
426 }
427
428 __set_bit(reason, &local->queue_stop_reasons[queue]);
381 429
382 netif_stop_subqueue(local->mdev, queue); 430 netif_stop_subqueue(local->mdev, queue);
383} 431}
@@ -409,7 +457,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
409 457
410 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 458 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
411 459
412 for (i = 0; i < ieee80211_num_queues(hw); i++) 460 for (i = 0; i < hw->queues; i++)
413 __ieee80211_stop_queue(hw, i, reason); 461 __ieee80211_stop_queue(hw, i, reason);
414 462
415 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 463 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -425,6 +473,16 @@ EXPORT_SYMBOL(ieee80211_stop_queues);
425int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue) 473int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue)
426{ 474{
427 struct ieee80211_local *local = hw_to_local(hw); 475 struct ieee80211_local *local = hw_to_local(hw);
476 unsigned long flags;
477
478 if (queue >= hw->queues) {
479 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
480 queue = local->ampdu_ac_queue[queue - hw->queues];
481 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
482 if (queue < 0)
483 return true;
484 }
485
428 return __netif_subqueue_stopped(local->mdev, queue); 486 return __netif_subqueue_stopped(local->mdev, queue);
429} 487}
430EXPORT_SYMBOL(ieee80211_queue_stopped); 488EXPORT_SYMBOL(ieee80211_queue_stopped);
@@ -459,7 +517,7 @@ void ieee80211_iterate_active_interfaces(
459 struct ieee80211_local *local = hw_to_local(hw); 517 struct ieee80211_local *local = hw_to_local(hw);
460 struct ieee80211_sub_if_data *sdata; 518 struct ieee80211_sub_if_data *sdata;
461 519
462 rtnl_lock(); 520 mutex_lock(&local->iflist_mtx);
463 521
464 list_for_each_entry(sdata, &local->interfaces, list) { 522 list_for_each_entry(sdata, &local->interfaces, list) {
465 switch (sdata->vif.type) { 523 switch (sdata->vif.type) {
@@ -480,7 +538,7 @@ void ieee80211_iterate_active_interfaces(
480 &sdata->vif); 538 &sdata->vif);
481 } 539 }
482 540
483 rtnl_unlock(); 541 mutex_unlock(&local->iflist_mtx);
484} 542}
485EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces); 543EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
486 544
@@ -653,6 +711,10 @@ void ieee802_11_parse_elems(u8 *start, size_t len,
653 elems->pwr_constr_elem = pos; 711 elems->pwr_constr_elem = pos;
654 elems->pwr_constr_elem_len = elen; 712 elems->pwr_constr_elem_len = elen;
655 break; 713 break;
714 case WLAN_EID_TIMEOUT_INTERVAL:
715 elems->timeout_int = pos;
716 elems->timeout_int_len = elen;
717 break;
656 default: 718 default:
657 break; 719 break;
658 } 720 }
@@ -688,6 +750,27 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
688 local->ops->conf_tx(local_to_hw(local), i, &qparam); 750 local->ops->conf_tx(local_to_hw(local), i, &qparam);
689} 751}
690 752
753void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
754 const size_t supp_rates_len,
755 const u8 *supp_rates)
756{
757 struct ieee80211_local *local = sdata->local;
758 int i, have_higher_than_11mbit = 0;
759
760 /* cf. IEEE 802.11 9.2.12 */
761 for (i = 0; i < supp_rates_len; i++)
762 if ((supp_rates[i] & 0x7f) * 5 > 110)
763 have_higher_than_11mbit = 1;
764
765 if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ &&
766 have_higher_than_11mbit)
767 sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
768 else
769 sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
770
771 ieee80211_set_wmm_default(sdata);
772}
773
691void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, 774void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
692 int encrypt) 775 int encrypt)
693{ 776{
@@ -727,12 +810,12 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz)
727 return ret; 810 return ret;
728} 811}
729 812
730u64 ieee80211_mandatory_rates(struct ieee80211_local *local, 813u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
731 enum ieee80211_band band) 814 enum ieee80211_band band)
732{ 815{
733 struct ieee80211_supported_band *sband; 816 struct ieee80211_supported_band *sband;
734 struct ieee80211_rate *bitrates; 817 struct ieee80211_rate *bitrates;
735 u64 mandatory_rates; 818 u32 mandatory_rates;
736 enum ieee80211_rate_flags mandatory_flag; 819 enum ieee80211_rate_flags mandatory_flag;
737 int i; 820 int i;
738 821
@@ -754,3 +837,161 @@ u64 ieee80211_mandatory_rates(struct ieee80211_local *local,
754 mandatory_rates |= BIT(i); 837 mandatory_rates |= BIT(i);
755 return mandatory_rates; 838 return mandatory_rates;
756} 839}
840
841void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
842 u16 transaction, u16 auth_alg,
843 u8 *extra, size_t extra_len,
844 const u8 *bssid, int encrypt)
845{
846 struct ieee80211_local *local = sdata->local;
847 struct sk_buff *skb;
848 struct ieee80211_mgmt *mgmt;
849 const u8 *ie_auth = NULL;
850 int ie_auth_len = 0;
851
852 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
853 ie_auth_len = sdata->u.mgd.ie_auth_len;
854 ie_auth = sdata->u.mgd.ie_auth;
855 }
856
857 skb = dev_alloc_skb(local->hw.extra_tx_headroom +
858 sizeof(*mgmt) + 6 + extra_len + ie_auth_len);
859 if (!skb) {
860 printk(KERN_DEBUG "%s: failed to allocate buffer for auth "
861 "frame\n", sdata->dev->name);
862 return;
863 }
864 skb_reserve(skb, local->hw.extra_tx_headroom);
865
866 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6);
867 memset(mgmt, 0, 24 + 6);
868 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
869 IEEE80211_STYPE_AUTH);
870 if (encrypt)
871 mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
872 memcpy(mgmt->da, bssid, ETH_ALEN);
873 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
874 memcpy(mgmt->bssid, bssid, ETH_ALEN);
875 mgmt->u.auth.auth_alg = cpu_to_le16(auth_alg);
876 mgmt->u.auth.auth_transaction = cpu_to_le16(transaction);
877 mgmt->u.auth.status_code = cpu_to_le16(0);
878 if (extra)
879 memcpy(skb_put(skb, extra_len), extra, extra_len);
880 if (ie_auth)
881 memcpy(skb_put(skb, ie_auth_len), ie_auth, ie_auth_len);
882
883 ieee80211_tx_skb(sdata, skb, encrypt);
884}
885
886void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
887 u8 *ssid, size_t ssid_len,
888 u8 *ie, size_t ie_len)
889{
890 struct ieee80211_local *local = sdata->local;
891 struct ieee80211_supported_band *sband;
892 struct sk_buff *skb;
893 struct ieee80211_mgmt *mgmt;
894 u8 *pos, *supp_rates, *esupp_rates = NULL, *extra_preq_ie = NULL;
895 int i, extra_preq_ie_len = 0;
896
897 switch (sdata->vif.type) {
898 case NL80211_IFTYPE_STATION:
899 extra_preq_ie_len = sdata->u.mgd.ie_probereq_len;
900 extra_preq_ie = sdata->u.mgd.ie_probereq;
901 break;
902 default:
903 break;
904 }
905
906 skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 +
907 ie_len + extra_preq_ie_len);
908 if (!skb) {
909 printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
910 "request\n", sdata->dev->name);
911 return;
912 }
913 skb_reserve(skb, local->hw.extra_tx_headroom);
914
915 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
916 memset(mgmt, 0, 24);
917 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
918 IEEE80211_STYPE_PROBE_REQ);
919 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
920 if (dst) {
921 memcpy(mgmt->da, dst, ETH_ALEN);
922 memcpy(mgmt->bssid, dst, ETH_ALEN);
923 } else {
924 memset(mgmt->da, 0xff, ETH_ALEN);
925 memset(mgmt->bssid, 0xff, ETH_ALEN);
926 }
927 pos = skb_put(skb, 2 + ssid_len);
928 *pos++ = WLAN_EID_SSID;
929 *pos++ = ssid_len;
930 memcpy(pos, ssid, ssid_len);
931
932 supp_rates = skb_put(skb, 2);
933 supp_rates[0] = WLAN_EID_SUPP_RATES;
934 supp_rates[1] = 0;
935 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
936
937 for (i = 0; i < sband->n_bitrates; i++) {
938 struct ieee80211_rate *rate = &sband->bitrates[i];
939 if (esupp_rates) {
940 pos = skb_put(skb, 1);
941 esupp_rates[1]++;
942 } else if (supp_rates[1] == 8) {
943 esupp_rates = skb_put(skb, 3);
944 esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES;
945 esupp_rates[1] = 1;
946 pos = &esupp_rates[2];
947 } else {
948 pos = skb_put(skb, 1);
949 supp_rates[1]++;
950 }
951 *pos = rate->bitrate / 5;
952 }
953
954 if (ie)
955 memcpy(skb_put(skb, ie_len), ie, ie_len);
956 if (extra_preq_ie)
957 memcpy(skb_put(skb, extra_preq_ie_len), extra_preq_ie,
958 extra_preq_ie_len);
959
960 ieee80211_tx_skb(sdata, skb, 0);
961}
962
963u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
964 struct ieee802_11_elems *elems,
965 enum ieee80211_band band)
966{
967 struct ieee80211_supported_band *sband;
968 struct ieee80211_rate *bitrates;
969 size_t num_rates;
970 u32 supp_rates;
971 int i, j;
972 sband = local->hw.wiphy->bands[band];
973
974 if (!sband) {
975 WARN_ON(1);
976 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
977 }
978
979 bitrates = sband->bitrates;
980 num_rates = sband->n_bitrates;
981 supp_rates = 0;
982 for (i = 0; i < elems->supp_rates_len +
983 elems->ext_supp_rates_len; i++) {
984 u8 rate = 0;
985 int own_rate;
986 if (i < elems->supp_rates_len)
987 rate = elems->supp_rates[i];
988 else if (elems->ext_supp_rates)
989 rate = elems->ext_supp_rates
990 [i - elems->supp_rates_len];
991 own_rate = 5 * (rate & 0x7f);
992 for (j = 0; j < num_rates; j++)
993 if (bitrates[j].bitrate == own_rate)
994 supp_rates |= BIT(j);
995 }
996 return supp_rates;
997}
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 7162d5816f39..f6924fc065d3 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -37,7 +37,14 @@ static int ieee80211_set_encryption(struct ieee80211_sub_if_data *sdata, u8 *sta
37 struct ieee80211_key *key; 37 struct ieee80211_key *key;
38 int err; 38 int err;
39 39
40 if (idx < 0 || idx >= NUM_DEFAULT_KEYS) { 40 if (alg == ALG_AES_CMAC) {
41 if (idx < NUM_DEFAULT_KEYS ||
42 idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) {
43 printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d "
44 "(BIP)\n", sdata->dev->name, idx);
45 return -EINVAL;
46 }
47 } else if (idx < 0 || idx >= NUM_DEFAULT_KEYS) {
41 printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n", 48 printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n",
42 sdata->dev->name, idx); 49 sdata->dev->name, idx);
43 return -EINVAL; 50 return -EINVAL;
@@ -103,6 +110,9 @@ static int ieee80211_set_encryption(struct ieee80211_sub_if_data *sdata, u8 *sta
103 110
104 if (set_tx_key || (!sta && !sdata->default_key && key)) 111 if (set_tx_key || (!sta && !sdata->default_key && key))
105 ieee80211_set_default_key(sdata, idx); 112 ieee80211_set_default_key(sdata, idx);
113 if (alg == ALG_AES_CMAC &&
114 (set_tx_key || (!sta && !sdata->default_mgmt_key && key)))
115 ieee80211_set_default_mgmt_key(sdata, idx);
106 } 116 }
107 117
108 out_unlock: 118 out_unlock:
@@ -122,122 +132,37 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev,
122 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) 132 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)
123 return -EOPNOTSUPP; 133 return -EOPNOTSUPP;
124 134
125 if (sdata->vif.type == NL80211_IFTYPE_STATION || 135 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
126 sdata->vif.type == NL80211_IFTYPE_ADHOC) {
127 int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length); 136 int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length);
128 if (ret) 137 if (ret)
129 return ret; 138 return ret;
130 sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; 139 sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
131 ieee80211_sta_req_auth(sdata, &sdata->u.sta); 140 ieee80211_sta_req_auth(sdata);
132 return 0; 141 return 0;
133 } 142 }
134 143
135 return -EOPNOTSUPP; 144 return -EOPNOTSUPP;
136} 145}
137 146
138static int ieee80211_ioctl_giwrange(struct net_device *dev,
139 struct iw_request_info *info,
140 struct iw_point *data, char *extra)
141{
142 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
143 struct iw_range *range = (struct iw_range *) extra;
144 enum ieee80211_band band;
145 int c = 0;
146
147 data->length = sizeof(struct iw_range);
148 memset(range, 0, sizeof(struct iw_range));
149
150 range->we_version_compiled = WIRELESS_EXT;
151 range->we_version_source = 21;
152 range->retry_capa = IW_RETRY_LIMIT;
153 range->retry_flags = IW_RETRY_LIMIT;
154 range->min_retry = 0;
155 range->max_retry = 255;
156 range->min_rts = 0;
157 range->max_rts = 2347;
158 range->min_frag = 256;
159 range->max_frag = 2346;
160
161 range->encoding_size[0] = 5;
162 range->encoding_size[1] = 13;
163 range->num_encoding_sizes = 2;
164 range->max_encoding_tokens = NUM_DEFAULT_KEYS;
165
166 if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC ||
167 local->hw.flags & IEEE80211_HW_SIGNAL_DB)
168 range->max_qual.level = local->hw.max_signal;
169 else if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
170 range->max_qual.level = -110;
171 else
172 range->max_qual.level = 0;
173
174 if (local->hw.flags & IEEE80211_HW_NOISE_DBM)
175 range->max_qual.noise = -110;
176 else
177 range->max_qual.noise = 0;
178
179 range->max_qual.qual = 100;
180 range->max_qual.updated = local->wstats_flags;
181
182 range->avg_qual.qual = 50;
183 /* not always true but better than nothing */
184 range->avg_qual.level = range->max_qual.level / 2;
185 range->avg_qual.noise = range->max_qual.noise / 2;
186 range->avg_qual.updated = local->wstats_flags;
187
188 range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 |
189 IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP;
190
191
192 for (band = 0; band < IEEE80211_NUM_BANDS; band ++) {
193 int i;
194 struct ieee80211_supported_band *sband;
195
196 sband = local->hw.wiphy->bands[band];
197
198 if (!sband)
199 continue;
200
201 for (i = 0; i < sband->n_channels && c < IW_MAX_FREQUENCIES; i++) {
202 struct ieee80211_channel *chan = &sband->channels[i];
203
204 if (!(chan->flags & IEEE80211_CHAN_DISABLED)) {
205 range->freq[c].i =
206 ieee80211_frequency_to_channel(
207 chan->center_freq);
208 range->freq[c].m = chan->center_freq;
209 range->freq[c].e = 6;
210 c++;
211 }
212 }
213 }
214 range->num_channels = c;
215 range->num_frequency = c;
216
217 IW_EVENT_CAPA_SET_KERNEL(range->event_capa);
218 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP);
219 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN);
220
221 range->scan_capa |= IW_SCAN_CAPA_ESSID;
222
223 return 0;
224}
225
226
227static int ieee80211_ioctl_siwfreq(struct net_device *dev, 147static int ieee80211_ioctl_siwfreq(struct net_device *dev,
228 struct iw_request_info *info, 148 struct iw_request_info *info,
229 struct iw_freq *freq, char *extra) 149 struct iw_freq *freq, char *extra)
230{ 150{
231 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 151 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
232 152
233 if (sdata->vif.type == NL80211_IFTYPE_STATION) 153 if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
234 sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; 154 sdata->u.ibss.flags &= ~IEEE80211_IBSS_AUTO_CHANNEL_SEL;
155 else if (sdata->vif.type == NL80211_IFTYPE_STATION)
156 sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL;
235 157
236 /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ 158 /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */
237 if (freq->e == 0) { 159 if (freq->e == 0) {
238 if (freq->m < 0) { 160 if (freq->m < 0) {
239 if (sdata->vif.type == NL80211_IFTYPE_STATION) 161 if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
240 sdata->u.sta.flags |= 162 sdata->u.ibss.flags |=
163 IEEE80211_IBSS_AUTO_CHANNEL_SEL;
164 else if (sdata->vif.type == NL80211_IFTYPE_STATION)
165 sdata->u.mgd.flags |=
241 IEEE80211_STA_AUTO_CHANNEL_SEL; 166 IEEE80211_STA_AUTO_CHANNEL_SEL;
242 return 0; 167 return 0;
243 } else 168 } else
@@ -274,32 +199,35 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
274{ 199{
275 struct ieee80211_sub_if_data *sdata; 200 struct ieee80211_sub_if_data *sdata;
276 size_t len = data->length; 201 size_t len = data->length;
202 int ret;
277 203
278 /* iwconfig uses nul termination in SSID.. */ 204 /* iwconfig uses nul termination in SSID.. */
279 if (len > 0 && ssid[len - 1] == '\0') 205 if (len > 0 && ssid[len - 1] == '\0')
280 len--; 206 len--;
281 207
282 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 208 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
283 if (sdata->vif.type == NL80211_IFTYPE_STATION || 209 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
284 sdata->vif.type == NL80211_IFTYPE_ADHOC) {
285 int ret;
286 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { 210 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) {
287 if (len > IEEE80211_MAX_SSID_LEN) 211 if (len > IEEE80211_MAX_SSID_LEN)
288 return -EINVAL; 212 return -EINVAL;
289 memcpy(sdata->u.sta.ssid, ssid, len); 213 memcpy(sdata->u.mgd.ssid, ssid, len);
290 sdata->u.sta.ssid_len = len; 214 sdata->u.mgd.ssid_len = len;
291 return 0; 215 return 0;
292 } 216 }
217
293 if (data->flags) 218 if (data->flags)
294 sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; 219 sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_SSID_SEL;
295 else 220 else
296 sdata->u.sta.flags |= IEEE80211_STA_AUTO_SSID_SEL; 221 sdata->u.mgd.flags |= IEEE80211_STA_AUTO_SSID_SEL;
222
297 ret = ieee80211_sta_set_ssid(sdata, ssid, len); 223 ret = ieee80211_sta_set_ssid(sdata, ssid, len);
298 if (ret) 224 if (ret)
299 return ret; 225 return ret;
300 ieee80211_sta_req_auth(sdata, &sdata->u.sta); 226
227 ieee80211_sta_req_auth(sdata);
301 return 0; 228 return 0;
302 } 229 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
230 return ieee80211_ibss_set_ssid(sdata, ssid, len);
303 231
304 return -EOPNOTSUPP; 232 return -EOPNOTSUPP;
305} 233}
@@ -313,8 +241,7 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
313 241
314 struct ieee80211_sub_if_data *sdata; 242 struct ieee80211_sub_if_data *sdata;
315 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 243 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
316 if (sdata->vif.type == NL80211_IFTYPE_STATION || 244 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
317 sdata->vif.type == NL80211_IFTYPE_ADHOC) {
318 int res = ieee80211_sta_get_ssid(sdata, ssid, &len); 245 int res = ieee80211_sta_get_ssid(sdata, ssid, &len);
319 if (res == 0) { 246 if (res == 0) {
320 data->length = len; 247 data->length = len;
@@ -322,6 +249,14 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
322 } else 249 } else
323 data->flags = 0; 250 data->flags = 0;
324 return res; 251 return res;
252 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
253 int res = ieee80211_ibss_get_ssid(sdata, ssid, &len);
254 if (res == 0) {
255 data->length = len;
256 data->flags = 1;
257 } else
258 data->flags = 0;
259 return res;
325 } 260 }
326 261
327 return -EOPNOTSUPP; 262 return -EOPNOTSUPP;
@@ -335,26 +270,35 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
335 struct ieee80211_sub_if_data *sdata; 270 struct ieee80211_sub_if_data *sdata;
336 271
337 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 272 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
338 if (sdata->vif.type == NL80211_IFTYPE_STATION || 273 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
339 sdata->vif.type == NL80211_IFTYPE_ADHOC) {
340 int ret; 274 int ret;
341 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { 275 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) {
342 memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data, 276 memcpy(sdata->u.mgd.bssid, (u8 *) &ap_addr->sa_data,
343 ETH_ALEN); 277 ETH_ALEN);
344 return 0; 278 return 0;
345 } 279 }
346 if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) 280 if (is_zero_ether_addr((u8 *) &ap_addr->sa_data))
347 sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL | 281 sdata->u.mgd.flags |= IEEE80211_STA_AUTO_BSSID_SEL |
348 IEEE80211_STA_AUTO_CHANNEL_SEL; 282 IEEE80211_STA_AUTO_CHANNEL_SEL;
349 else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data)) 283 else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data))
350 sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL; 284 sdata->u.mgd.flags |= IEEE80211_STA_AUTO_BSSID_SEL;
351 else 285 else
352 sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; 286 sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
353 ret = ieee80211_sta_set_bssid(sdata, (u8 *) &ap_addr->sa_data); 287 ret = ieee80211_sta_set_bssid(sdata, (u8 *) &ap_addr->sa_data);
354 if (ret) 288 if (ret)
355 return ret; 289 return ret;
356 ieee80211_sta_req_auth(sdata, &sdata->u.sta); 290 ieee80211_sta_req_auth(sdata);
357 return 0; 291 return 0;
292 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
293 if (is_zero_ether_addr((u8 *) &ap_addr->sa_data))
294 sdata->u.ibss.flags |= IEEE80211_IBSS_AUTO_BSSID_SEL |
295 IEEE80211_IBSS_AUTO_CHANNEL_SEL;
296 else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data))
297 sdata->u.ibss.flags |= IEEE80211_IBSS_AUTO_BSSID_SEL;
298 else
299 sdata->u.ibss.flags &= ~IEEE80211_IBSS_AUTO_BSSID_SEL;
300
301 return ieee80211_ibss_set_bssid(sdata, (u8 *) &ap_addr->sa_data);
358 } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { 302 } else if (sdata->vif.type == NL80211_IFTYPE_WDS) {
359 /* 303 /*
360 * If it is necessary to update the WDS peer address 304 * If it is necessary to update the WDS peer address
@@ -383,17 +327,20 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
383 struct ieee80211_sub_if_data *sdata; 327 struct ieee80211_sub_if_data *sdata;
384 328
385 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 329 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
386 if (sdata->vif.type == NL80211_IFTYPE_STATION || 330 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
387 sdata->vif.type == NL80211_IFTYPE_ADHOC) { 331 if (sdata->u.mgd.state == IEEE80211_STA_MLME_ASSOCIATED) {
388 if (sdata->u.sta.state == IEEE80211_STA_MLME_ASSOCIATED ||
389 sdata->u.sta.state == IEEE80211_STA_MLME_IBSS_JOINED) {
390 ap_addr->sa_family = ARPHRD_ETHER; 332 ap_addr->sa_family = ARPHRD_ETHER;
391 memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); 333 memcpy(&ap_addr->sa_data, sdata->u.mgd.bssid, ETH_ALEN);
392 return 0; 334 } else
393 } else {
394 memset(&ap_addr->sa_data, 0, ETH_ALEN); 335 memset(&ap_addr->sa_data, 0, ETH_ALEN);
395 return 0; 336 return 0;
396 } 337 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
338 if (sdata->u.ibss.state == IEEE80211_IBSS_MLME_JOINED) {
339 ap_addr->sa_family = ARPHRD_ETHER;
340 memcpy(&ap_addr->sa_data, sdata->u.ibss.bssid, ETH_ALEN);
341 } else
342 memset(&ap_addr->sa_data, 0, ETH_ALEN);
343 return 0;
397 } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { 344 } else if (sdata->vif.type == NL80211_IFTYPE_WDS) {
398 ap_addr->sa_family = ARPHRD_ETHER; 345 ap_addr->sa_family = ARPHRD_ETHER;
399 memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); 346 memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN);
@@ -404,58 +351,6 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
404} 351}
405 352
406 353
407static int ieee80211_ioctl_siwscan(struct net_device *dev,
408 struct iw_request_info *info,
409 union iwreq_data *wrqu, char *extra)
410{
411 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
412 struct iw_scan_req *req = NULL;
413 u8 *ssid = NULL;
414 size_t ssid_len = 0;
415
416 if (!netif_running(dev))
417 return -ENETDOWN;
418
419 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
420 sdata->vif.type != NL80211_IFTYPE_ADHOC &&
421 sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
422 return -EOPNOTSUPP;
423
424 /* if SSID was specified explicitly then use that */
425 if (wrqu->data.length == sizeof(struct iw_scan_req) &&
426 wrqu->data.flags & IW_SCAN_THIS_ESSID) {
427 req = (struct iw_scan_req *)extra;
428 ssid = req->essid;
429 ssid_len = req->essid_len;
430 }
431
432 return ieee80211_request_scan(sdata, ssid, ssid_len);
433}
434
435
436static int ieee80211_ioctl_giwscan(struct net_device *dev,
437 struct iw_request_info *info,
438 struct iw_point *data, char *extra)
439{
440 int res;
441 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
442 struct ieee80211_sub_if_data *sdata;
443
444 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
445
446 if (local->sw_scanning || local->hw_scanning)
447 return -EAGAIN;
448
449 res = ieee80211_scan_results(local, info, extra, data->length);
450 if (res >= 0) {
451 data->length = res;
452 return 0;
453 }
454 data->length = 0;
455 return res;
456}
457
458
459static int ieee80211_ioctl_siwrate(struct net_device *dev, 354static int ieee80211_ioctl_siwrate(struct net_device *dev,
460 struct iw_request_info *info, 355 struct iw_request_info *info,
461 struct iw_param *rate, char *extra) 356 struct iw_param *rate, char *extra)
@@ -511,7 +406,7 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev,
511 406
512 rcu_read_lock(); 407 rcu_read_lock();
513 408
514 sta = sta_info_get(local, sdata->u.sta.bssid); 409 sta = sta_info_get(local, sdata->u.mgd.bssid);
515 410
516 if (sta && !(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) 411 if (sta && !(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS))
517 rate->value = sband->bitrates[sta->last_tx_rate.idx].bitrate; 412 rate->value = sband->bitrates[sta->last_tx_rate.idx].bitrate;
@@ -549,10 +444,9 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
549 else /* Automatic power level setting */ 444 else /* Automatic power level setting */
550 new_power_level = chan->max_power; 445 new_power_level = chan->max_power;
551 446
552 if (local->hw.conf.power_level != new_power_level) { 447 local->user_power_level = new_power_level;
553 local->hw.conf.power_level = new_power_level; 448 if (local->hw.conf.power_level != new_power_level)
554 reconf_flags |= IEEE80211_CONF_CHANGE_POWER; 449 reconf_flags |= IEEE80211_CONF_CHANGE_POWER;
555 }
556 450
557 if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { 451 if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) {
558 local->hw.conf.radio_enabled = !(data->txpower.disabled); 452 local->hw.conf.radio_enabled = !(data->txpower.disabled);
@@ -713,8 +607,7 @@ static int ieee80211_ioctl_siwmlme(struct net_device *dev,
713 struct iw_mlme *mlme = (struct iw_mlme *) extra; 607 struct iw_mlme *mlme = (struct iw_mlme *) extra;
714 608
715 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 609 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
716 if (sdata->vif.type != NL80211_IFTYPE_STATION && 610 if (!(sdata->vif.type == NL80211_IFTYPE_STATION))
717 sdata->vif.type != NL80211_IFTYPE_ADHOC)
718 return -EINVAL; 611 return -EINVAL;
719 612
720 switch (mlme->cmd) { 613 switch (mlme->cmd) {
@@ -810,8 +703,7 @@ static int ieee80211_ioctl_giwencode(struct net_device *dev,
810 erq->flags |= IW_ENCODE_ENABLED; 703 erq->flags |= IW_ENCODE_ENABLED;
811 704
812 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 705 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
813 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 706 switch (sdata->u.mgd.auth_alg) {
814 switch (ifsta->auth_alg) {
815 case WLAN_AUTH_OPEN: 707 case WLAN_AUTH_OPEN:
816 case WLAN_AUTH_LEAP: 708 case WLAN_AUTH_LEAP:
817 erq->flags |= IW_ENCODE_OPEN; 709 erq->flags |= IW_ENCODE_OPEN;
@@ -836,6 +728,9 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev,
836 int ret = 0, timeout = 0; 728 int ret = 0, timeout = 0;
837 bool ps; 729 bool ps;
838 730
731 if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS))
732 return -EOPNOTSUPP;
733
839 if (sdata->vif.type != NL80211_IFTYPE_STATION) 734 if (sdata->vif.type != NL80211_IFTYPE_STATION)
840 return -EINVAL; 735 return -EINVAL;
841 736
@@ -852,31 +747,49 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev,
852 ps = true; 747 ps = true;
853 break; 748 break;
854 default: /* Otherwise we ignore */ 749 default: /* Otherwise we ignore */
855 break; 750 return -EINVAL;
856 } 751 }
857 752
753 if (wrq->flags & ~(IW_POWER_MODE | IW_POWER_TIMEOUT))
754 return -EINVAL;
755
858 if (wrq->flags & IW_POWER_TIMEOUT) 756 if (wrq->flags & IW_POWER_TIMEOUT)
859 timeout = wrq->value / 1000; 757 timeout = wrq->value / 1000;
860 758
861set: 759 set:
862 if (ps == local->powersave && timeout == local->dynamic_ps_timeout) 760 if (ps == local->powersave && timeout == conf->dynamic_ps_timeout)
863 return ret; 761 return ret;
864 762
865 local->powersave = ps; 763 local->powersave = ps;
866 local->dynamic_ps_timeout = timeout; 764 conf->dynamic_ps_timeout = timeout;
867 765
868 if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { 766 if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
869 if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) && 767 ret = ieee80211_hw_config(local,
870 local->dynamic_ps_timeout > 0) 768 IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT);
871 mod_timer(&local->dynamic_ps_timer, jiffies + 769
872 msecs_to_jiffies(local->dynamic_ps_timeout)); 770 if (!(sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED))
873 else { 771 return ret;
874 if (local->powersave) 772
875 conf->flags |= IEEE80211_CONF_PS; 773 if (conf->dynamic_ps_timeout > 0 &&
876 else 774 !(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)) {
877 conf->flags &= ~IEEE80211_CONF_PS; 775 mod_timer(&local->dynamic_ps_timer, jiffies +
776 msecs_to_jiffies(conf->dynamic_ps_timeout));
777 } else {
778 if (local->powersave) {
779 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
780 ieee80211_send_nullfunc(local, sdata, 1);
781 conf->flags |= IEEE80211_CONF_PS;
782 ret = ieee80211_hw_config(local,
783 IEEE80211_CONF_CHANGE_PS);
784 } else {
785 conf->flags &= ~IEEE80211_CONF_PS;
786 ret = ieee80211_hw_config(local,
787 IEEE80211_CONF_CHANGE_PS);
788 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
789 ieee80211_send_nullfunc(local, sdata, 0);
790 del_timer_sync(&local->dynamic_ps_timer);
791 cancel_work_sync(&local->dynamic_ps_enable_work);
878 } 792 }
879 ret = ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
880 } 793 }
881 794
882 return ret; 795 return ret;
@@ -903,11 +816,22 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
903 816
904 switch (data->flags & IW_AUTH_INDEX) { 817 switch (data->flags & IW_AUTH_INDEX) {
905 case IW_AUTH_WPA_VERSION: 818 case IW_AUTH_WPA_VERSION:
906 case IW_AUTH_CIPHER_PAIRWISE:
907 case IW_AUTH_CIPHER_GROUP: 819 case IW_AUTH_CIPHER_GROUP:
908 case IW_AUTH_WPA_ENABLED: 820 case IW_AUTH_WPA_ENABLED:
909 case IW_AUTH_RX_UNENCRYPTED_EAPOL: 821 case IW_AUTH_RX_UNENCRYPTED_EAPOL:
910 case IW_AUTH_KEY_MGMT: 822 case IW_AUTH_KEY_MGMT:
823 case IW_AUTH_CIPHER_GROUP_MGMT:
824 break;
825 case IW_AUTH_CIPHER_PAIRWISE:
826 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
827 if (data->value & (IW_AUTH_CIPHER_WEP40 |
828 IW_AUTH_CIPHER_WEP104 | IW_AUTH_CIPHER_TKIP))
829 sdata->u.mgd.flags |=
830 IEEE80211_STA_TKIP_WEP_USED;
831 else
832 sdata->u.mgd.flags &=
833 ~IEEE80211_STA_TKIP_WEP_USED;
834 }
911 break; 835 break;
912 case IW_AUTH_DROP_UNENCRYPTED: 836 case IW_AUTH_DROP_UNENCRYPTED:
913 sdata->drop_unencrypted = !!data->value; 837 sdata->drop_unencrypted = !!data->value;
@@ -916,24 +840,45 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
916 if (sdata->vif.type != NL80211_IFTYPE_STATION) 840 if (sdata->vif.type != NL80211_IFTYPE_STATION)
917 ret = -EINVAL; 841 ret = -EINVAL;
918 else { 842 else {
919 sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; 843 sdata->u.mgd.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
920 /* 844 /*
921 * Privacy invoked by wpa_supplicant, store the 845 * Privacy invoked by wpa_supplicant, store the
922 * value and allow associating to a protected 846 * value and allow associating to a protected
923 * network without having a key up front. 847 * network without having a key up front.
924 */ 848 */
925 if (data->value) 849 if (data->value)
926 sdata->u.sta.flags |= 850 sdata->u.mgd.flags |=
927 IEEE80211_STA_PRIVACY_INVOKED; 851 IEEE80211_STA_PRIVACY_INVOKED;
928 } 852 }
929 break; 853 break;
930 case IW_AUTH_80211_AUTH_ALG: 854 case IW_AUTH_80211_AUTH_ALG:
931 if (sdata->vif.type == NL80211_IFTYPE_STATION || 855 if (sdata->vif.type == NL80211_IFTYPE_STATION)
932 sdata->vif.type == NL80211_IFTYPE_ADHOC) 856 sdata->u.mgd.auth_algs = data->value;
933 sdata->u.sta.auth_algs = data->value;
934 else 857 else
935 ret = -EOPNOTSUPP; 858 ret = -EOPNOTSUPP;
936 break; 859 break;
860 case IW_AUTH_MFP:
861 if (!(sdata->local->hw.flags & IEEE80211_HW_MFP_CAPABLE)) {
862 ret = -EOPNOTSUPP;
863 break;
864 }
865 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
866 switch (data->value) {
867 case IW_AUTH_MFP_DISABLED:
868 sdata->u.mgd.mfp = IEEE80211_MFP_DISABLED;
869 break;
870 case IW_AUTH_MFP_OPTIONAL:
871 sdata->u.mgd.mfp = IEEE80211_MFP_OPTIONAL;
872 break;
873 case IW_AUTH_MFP_REQUIRED:
874 sdata->u.mgd.mfp = IEEE80211_MFP_REQUIRED;
875 break;
876 default:
877 ret = -EINVAL;
878 }
879 } else
880 ret = -EOPNOTSUPP;
881 break;
937 default: 882 default:
938 ret = -EOPNOTSUPP; 883 ret = -EOPNOTSUPP;
939 break; 884 break;
@@ -941,6 +886,21 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
941 return ret; 886 return ret;
942} 887}
943 888
889static u8 ieee80211_get_wstats_flags(struct ieee80211_local *local)
890{
891 u8 wstats_flags = 0;
892
893 wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC |
894 IEEE80211_HW_SIGNAL_DBM) ?
895 IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID;
896 wstats_flags |= local->hw.flags & IEEE80211_HW_NOISE_DBM ?
897 IW_QUAL_NOISE_UPDATED : IW_QUAL_NOISE_INVALID;
898 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
899 wstats_flags |= IW_QUAL_DBM;
900
901 return wstats_flags;
902}
903
944/* Get wireless statistics. Called by /proc/net/wireless and by SIOCGIWSTATS */ 904/* Get wireless statistics. Called by /proc/net/wireless and by SIOCGIWSTATS */
945static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev) 905static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev)
946{ 906{
@@ -951,9 +911,9 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev
951 911
952 rcu_read_lock(); 912 rcu_read_lock();
953 913
954 if (sdata->vif.type == NL80211_IFTYPE_STATION || 914 if (sdata->vif.type == NL80211_IFTYPE_STATION)
955 sdata->vif.type == NL80211_IFTYPE_ADHOC) 915 sta = sta_info_get(local, sdata->u.mgd.bssid);
956 sta = sta_info_get(local, sdata->u.sta.bssid); 916
957 if (!sta) { 917 if (!sta) {
958 wstats->discard.fragment = 0; 918 wstats->discard.fragment = 0;
959 wstats->discard.misc = 0; 919 wstats->discard.misc = 0;
@@ -965,7 +925,7 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev
965 wstats->qual.level = sta->last_signal; 925 wstats->qual.level = sta->last_signal;
966 wstats->qual.qual = sta->last_qual; 926 wstats->qual.qual = sta->last_qual;
967 wstats->qual.noise = sta->last_noise; 927 wstats->qual.noise = sta->last_noise;
968 wstats->qual.updated = local->wstats_flags; 928 wstats->qual.updated = ieee80211_get_wstats_flags(local);
969 } 929 }
970 930
971 rcu_read_unlock(); 931 rcu_read_unlock();
@@ -982,9 +942,8 @@ static int ieee80211_ioctl_giwauth(struct net_device *dev,
982 942
983 switch (data->flags & IW_AUTH_INDEX) { 943 switch (data->flags & IW_AUTH_INDEX) {
984 case IW_AUTH_80211_AUTH_ALG: 944 case IW_AUTH_80211_AUTH_ALG:
985 if (sdata->vif.type == NL80211_IFTYPE_STATION || 945 if (sdata->vif.type == NL80211_IFTYPE_STATION)
986 sdata->vif.type == NL80211_IFTYPE_ADHOC) 946 data->value = sdata->u.mgd.auth_algs;
987 data->value = sdata->u.sta.auth_algs;
988 else 947 else
989 ret = -EOPNOTSUPP; 948 ret = -EOPNOTSUPP;
990 break; 949 break;
@@ -1017,6 +976,9 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev,
1017 case IW_ENCODE_ALG_CCMP: 976 case IW_ENCODE_ALG_CCMP:
1018 alg = ALG_CCMP; 977 alg = ALG_CCMP;
1019 break; 978 break;
979 case IW_ENCODE_ALG_AES_CMAC:
980 alg = ALG_AES_CMAC;
981 break;
1020 default: 982 default:
1021 return -EOPNOTSUPP; 983 return -EOPNOTSUPP;
1022 } 984 }
@@ -1025,20 +987,41 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev,
1025 remove = 1; 987 remove = 1;
1026 988
1027 idx = erq->flags & IW_ENCODE_INDEX; 989 idx = erq->flags & IW_ENCODE_INDEX;
1028 if (idx < 1 || idx > 4) { 990 if (alg == ALG_AES_CMAC) {
1029 idx = -1; 991 if (idx < NUM_DEFAULT_KEYS + 1 ||
1030 if (!sdata->default_key) 992 idx > NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) {
1031 idx = 0; 993 idx = -1;
1032 else for (i = 0; i < NUM_DEFAULT_KEYS; i++) { 994 if (!sdata->default_mgmt_key)
1033 if (sdata->default_key == sdata->keys[i]) { 995 idx = 0;
1034 idx = i; 996 else for (i = NUM_DEFAULT_KEYS;
1035 break; 997 i < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS;
998 i++) {
999 if (sdata->default_mgmt_key == sdata->keys[i])
1000 {
1001 idx = i;
1002 break;
1003 }
1036 } 1004 }
1037 } 1005 if (idx < 0)
1038 if (idx < 0) 1006 return -EINVAL;
1039 return -EINVAL; 1007 } else
1040 } else 1008 idx--;
1041 idx--; 1009 } else {
1010 if (idx < 1 || idx > 4) {
1011 idx = -1;
1012 if (!sdata->default_key)
1013 idx = 0;
1014 else for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
1015 if (sdata->default_key == sdata->keys[i]) {
1016 idx = i;
1017 break;
1018 }
1019 }
1020 if (idx < 0)
1021 return -EINVAL;
1022 } else
1023 idx--;
1024 }
1042 1025
1043 return ieee80211_set_encryption(sdata, ext->addr.sa_data, idx, alg, 1026 return ieee80211_set_encryption(sdata, ext->addr.sa_data, idx, alg,
1044 remove, 1027 remove,
@@ -1063,7 +1046,7 @@ static const iw_handler ieee80211_handler[] =
1063 (iw_handler) NULL, /* SIOCSIWSENS */ 1046 (iw_handler) NULL, /* SIOCSIWSENS */
1064 (iw_handler) NULL, /* SIOCGIWSENS */ 1047 (iw_handler) NULL, /* SIOCGIWSENS */
1065 (iw_handler) NULL /* not used */, /* SIOCSIWRANGE */ 1048 (iw_handler) NULL /* not used */, /* SIOCSIWRANGE */
1066 (iw_handler) ieee80211_ioctl_giwrange, /* SIOCGIWRANGE */ 1049 (iw_handler) cfg80211_wext_giwrange, /* SIOCGIWRANGE */
1067 (iw_handler) NULL /* not used */, /* SIOCSIWPRIV */ 1050 (iw_handler) NULL /* not used */, /* SIOCSIWPRIV */
1068 (iw_handler) NULL /* kernel code */, /* SIOCGIWPRIV */ 1051 (iw_handler) NULL /* kernel code */, /* SIOCGIWPRIV */
1069 (iw_handler) NULL /* not used */, /* SIOCSIWSTATS */ 1052 (iw_handler) NULL /* not used */, /* SIOCSIWSTATS */
@@ -1076,8 +1059,8 @@ static const iw_handler ieee80211_handler[] =
1076 (iw_handler) ieee80211_ioctl_giwap, /* SIOCGIWAP */ 1059 (iw_handler) ieee80211_ioctl_giwap, /* SIOCGIWAP */
1077 (iw_handler) ieee80211_ioctl_siwmlme, /* SIOCSIWMLME */ 1060 (iw_handler) ieee80211_ioctl_siwmlme, /* SIOCSIWMLME */
1078 (iw_handler) NULL, /* SIOCGIWAPLIST */ 1061 (iw_handler) NULL, /* SIOCGIWAPLIST */
1079 (iw_handler) ieee80211_ioctl_siwscan, /* SIOCSIWSCAN */ 1062 (iw_handler) cfg80211_wext_siwscan, /* SIOCSIWSCAN */
1080 (iw_handler) ieee80211_ioctl_giwscan, /* SIOCGIWSCAN */ 1063 (iw_handler) cfg80211_wext_giwscan, /* SIOCGIWSCAN */
1081 (iw_handler) ieee80211_ioctl_siwessid, /* SIOCSIWESSID */ 1064 (iw_handler) ieee80211_ioctl_siwessid, /* SIOCSIWESSID */
1082 (iw_handler) ieee80211_ioctl_giwessid, /* SIOCGIWESSID */ 1065 (iw_handler) ieee80211_ioctl_giwessid, /* SIOCGIWESSID */
1083 (iw_handler) NULL, /* SIOCSIWNICKN */ 1066 (iw_handler) NULL, /* SIOCSIWNICKN */
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index ac71b38f7cb5..093a4ab7f28b 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -114,9 +114,7 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
114{ 114{
115 struct ieee80211_master_priv *mpriv = netdev_priv(dev); 115 struct ieee80211_master_priv *mpriv = netdev_priv(dev);
116 struct ieee80211_local *local = mpriv->local; 116 struct ieee80211_local *local = mpriv->local;
117 struct ieee80211_hw *hw = &local->hw;
118 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 117 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
119 struct sta_info *sta;
120 u16 queue; 118 u16 queue;
121 u8 tid; 119 u8 tid;
122 120
@@ -124,29 +122,11 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
124 if (unlikely(queue >= local->hw.queues)) 122 if (unlikely(queue >= local->hw.queues))
125 queue = local->hw.queues - 1; 123 queue = local->hw.queues - 1;
126 124
127 if (skb->requeue) { 125 /*
128 if (!hw->ampdu_queues) 126 * Now we know the 1d priority, fill in the QoS header if
129 return queue; 127 * there is one (and we haven't done this before).
130
131 rcu_read_lock();
132 sta = sta_info_get(local, hdr->addr1);
133 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
134 if (sta) {
135 int ampdu_queue = sta->tid_to_tx_q[tid];
136
137 if ((ampdu_queue < ieee80211_num_queues(hw)) &&
138 test_bit(ampdu_queue, local->queue_pool))
139 queue = ampdu_queue;
140 }
141 rcu_read_unlock();
142
143 return queue;
144 }
145
146 /* Now we know the 1d priority, fill in the QoS header if
147 * there is one.
148 */ 128 */
149 if (ieee80211_is_data_qos(hdr->frame_control)) { 129 if (!skb->requeue && ieee80211_is_data_qos(hdr->frame_control)) {
150 u8 *p = ieee80211_get_qos_ctl(hdr); 130 u8 *p = ieee80211_get_qos_ctl(hdr);
151 u8 ack_policy = 0; 131 u8 ack_policy = 0;
152 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; 132 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
@@ -156,140 +136,7 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
156 /* qos header is 2 bytes, second reserved */ 136 /* qos header is 2 bytes, second reserved */
157 *p++ = ack_policy | tid; 137 *p++ = ack_policy | tid;
158 *p = 0; 138 *p = 0;
159
160 if (!hw->ampdu_queues)
161 return queue;
162
163 rcu_read_lock();
164
165 sta = sta_info_get(local, hdr->addr1);
166 if (sta) {
167 int ampdu_queue = sta->tid_to_tx_q[tid];
168
169 if ((ampdu_queue < ieee80211_num_queues(hw)) &&
170 test_bit(ampdu_queue, local->queue_pool))
171 queue = ampdu_queue;
172 }
173
174 rcu_read_unlock();
175 } 139 }
176 140
177 return queue; 141 return queue;
178} 142}
179
180int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
181 struct sta_info *sta, u16 tid)
182{
183 int i;
184
185 /* XXX: currently broken due to cb/requeue use */
186 return -EPERM;
187
188 /* prepare the filter and save it for the SW queue
189 * matching the received HW queue */
190
191 if (!local->hw.ampdu_queues)
192 return -EPERM;
193
194 /* try to get a Qdisc from the pool */
195 for (i = local->hw.queues; i < ieee80211_num_queues(&local->hw); i++)
196 if (!test_and_set_bit(i, local->queue_pool)) {
197 ieee80211_stop_queue(local_to_hw(local), i);
198 sta->tid_to_tx_q[tid] = i;
199
200 /* IF there are already pending packets
201 * on this tid first we need to drain them
202 * on the previous queue
203 * since HT is strict in order */
204#ifdef CONFIG_MAC80211_HT_DEBUG
205 if (net_ratelimit())
206 printk(KERN_DEBUG "allocated aggregation queue"
207 " %d tid %d addr %pM pool=0x%lX\n",
208 i, tid, sta->sta.addr,
209 local->queue_pool[0]);
210#endif /* CONFIG_MAC80211_HT_DEBUG */
211 return 0;
212 }
213
214 return -EAGAIN;
215}
216
217/**
218 * the caller needs to hold netdev_get_tx_queue(local->mdev, X)->lock
219 */
220void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
221 struct sta_info *sta, u16 tid,
222 u8 requeue)
223{
224 int agg_queue = sta->tid_to_tx_q[tid];
225 struct ieee80211_hw *hw = &local->hw;
226
227 /* return the qdisc to the pool */
228 clear_bit(agg_queue, local->queue_pool);
229 sta->tid_to_tx_q[tid] = ieee80211_num_queues(hw);
230
231 if (requeue) {
232 ieee80211_requeue(local, agg_queue);
233 } else {
234 struct netdev_queue *txq;
235 spinlock_t *root_lock;
236 struct Qdisc *q;
237
238 txq = netdev_get_tx_queue(local->mdev, agg_queue);
239 q = rcu_dereference(txq->qdisc);
240 root_lock = qdisc_lock(q);
241
242 spin_lock_bh(root_lock);
243 qdisc_reset(q);
244 spin_unlock_bh(root_lock);
245 }
246}
247
248void ieee80211_requeue(struct ieee80211_local *local, int queue)
249{
250 struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, queue);
251 struct sk_buff_head list;
252 spinlock_t *root_lock;
253 struct Qdisc *qdisc;
254 u32 len;
255
256 rcu_read_lock_bh();
257
258 qdisc = rcu_dereference(txq->qdisc);
259 if (!qdisc || !qdisc->dequeue)
260 goto out_unlock;
261
262 skb_queue_head_init(&list);
263
264 root_lock = qdisc_root_lock(qdisc);
265 spin_lock(root_lock);
266 for (len = qdisc->q.qlen; len > 0; len--) {
267 struct sk_buff *skb = qdisc->dequeue(qdisc);
268
269 if (skb)
270 __skb_queue_tail(&list, skb);
271 }
272 spin_unlock(root_lock);
273
274 for (len = list.qlen; len > 0; len--) {
275 struct sk_buff *skb = __skb_dequeue(&list);
276 u16 new_queue;
277
278 BUG_ON(!skb);
279 new_queue = ieee80211_select_queue(local->mdev, skb);
280 skb_set_queue_mapping(skb, new_queue);
281
282 txq = netdev_get_tx_queue(local->mdev, new_queue);
283
284
285 qdisc = rcu_dereference(txq->qdisc);
286 root_lock = qdisc_root_lock(qdisc);
287
288 spin_lock(root_lock);
289 qdisc_enqueue_root(skb, qdisc);
290 spin_unlock(root_lock);
291 }
292
293out_unlock:
294 rcu_read_unlock_bh();
295}
diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h
index bc62f28a4d3d..7520d2e014dc 100644
--- a/net/mac80211/wme.h
+++ b/net/mac80211/wme.h
@@ -21,11 +21,5 @@
21extern const int ieee802_1d_to_ac[8]; 21extern const int ieee802_1d_to_ac[8];
22 22
23u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb); 23u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb);
24int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
25 struct sta_info *sta, u16 tid);
26void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
27 struct sta_info *sta, u16 tid,
28 u8 requeue);
29void ieee80211_requeue(struct ieee80211_local *local, int queue);
30 24
31#endif /* _WME_H */ 25#endif /* _WME_H */
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 7aa63caf8d50..9101b48ec2ae 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright 2002-2004, Instant802 Networks, Inc. 2 * Copyright 2002-2004, Instant802 Networks, Inc.
3 * Copyright 2008, Jouni Malinen <j@w1.fi>
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -19,6 +20,7 @@
19#include "michael.h" 20#include "michael.h"
20#include "tkip.h" 21#include "tkip.h"
21#include "aes_ccm.h" 22#include "aes_ccm.h"
23#include "aes_cmac.h"
22#include "wpa.h" 24#include "wpa.h"
23 25
24ieee80211_tx_result 26ieee80211_tx_result
@@ -266,7 +268,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
266 int encrypted) 268 int encrypted)
267{ 269{
268 __le16 mask_fc; 270 __le16 mask_fc;
269 int a4_included; 271 int a4_included, mgmt;
270 u8 qos_tid; 272 u8 qos_tid;
271 u8 *b_0, *aad; 273 u8 *b_0, *aad;
272 u16 data_len, len_a; 274 u16 data_len, len_a;
@@ -277,12 +279,15 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
277 aad = scratch + 4 * AES_BLOCK_LEN; 279 aad = scratch + 4 * AES_BLOCK_LEN;
278 280
279 /* 281 /*
280 * Mask FC: zero subtype b4 b5 b6 282 * Mask FC: zero subtype b4 b5 b6 (if not mgmt)
281 * Retry, PwrMgt, MoreData; set Protected 283 * Retry, PwrMgt, MoreData; set Protected
282 */ 284 */
285 mgmt = ieee80211_is_mgmt(hdr->frame_control);
283 mask_fc = hdr->frame_control; 286 mask_fc = hdr->frame_control;
284 mask_fc &= ~cpu_to_le16(0x0070 | IEEE80211_FCTL_RETRY | 287 mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_RETRY |
285 IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA); 288 IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA);
289 if (!mgmt)
290 mask_fc &= ~cpu_to_le16(0x0070);
286 mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); 291 mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
287 292
288 hdrlen = ieee80211_hdrlen(hdr->frame_control); 293 hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -300,8 +305,10 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
300 305
301 /* First block, b_0 */ 306 /* First block, b_0 */
302 b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */ 307 b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */
303 /* Nonce: QoS Priority | A2 | PN */ 308 /* Nonce: Nonce Flags | A2 | PN
304 b_0[1] = qos_tid; 309 * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7)
310 */
311 b_0[1] = qos_tid | (mgmt << 4);
305 memcpy(&b_0[2], hdr->addr2, ETH_ALEN); 312 memcpy(&b_0[2], hdr->addr2, ETH_ALEN);
306 memcpy(&b_0[8], pn, CCMP_PN_LEN); 313 memcpy(&b_0[8], pn, CCMP_PN_LEN);
307 /* l(m) */ 314 /* l(m) */
@@ -360,9 +367,14 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
360 int hdrlen, len, tail; 367 int hdrlen, len, tail;
361 u8 *pos, *pn; 368 u8 *pos, *pn;
362 int i; 369 int i;
370 bool skip_hw;
371
372 skip_hw = (tx->key->conf.flags & IEEE80211_KEY_FLAG_SW_MGMT) &&
373 ieee80211_is_mgmt(hdr->frame_control);
363 374
364 if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && 375 if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) &&
365 !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { 376 !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) &&
377 !skip_hw) {
366 /* hwaccel - with no need for preallocated room for CCMP 378 /* hwaccel - with no need for preallocated room for CCMP
367 * header or MIC fields */ 379 * header or MIC fields */
368 info->control.hw_key = &tx->key->conf; 380 info->control.hw_key = &tx->key->conf;
@@ -397,7 +409,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
397 409
398 ccmp_pn2hdr(pos, pn, key->conf.keyidx); 410 ccmp_pn2hdr(pos, pn, key->conf.keyidx);
399 411
400 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { 412 if ((key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !skip_hw) {
401 /* hwaccel - with preallocated room for CCMP header */ 413 /* hwaccel - with preallocated room for CCMP header */
402 info->control.hw_key = &tx->key->conf; 414 info->control.hw_key = &tx->key->conf;
403 return 0; 415 return 0;
@@ -446,7 +458,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
446 458
447 hdrlen = ieee80211_hdrlen(hdr->frame_control); 459 hdrlen = ieee80211_hdrlen(hdr->frame_control);
448 460
449 if (!ieee80211_is_data(hdr->frame_control)) 461 if (!ieee80211_is_data(hdr->frame_control) &&
462 !ieee80211_is_robust_mgmt_frame(hdr))
450 return RX_CONTINUE; 463 return RX_CONTINUE;
451 464
452 data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN; 465 data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN;
@@ -485,3 +498,126 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
485 498
486 return RX_CONTINUE; 499 return RX_CONTINUE;
487} 500}
501
502
503static void bip_aad(struct sk_buff *skb, u8 *aad)
504{
505 /* BIP AAD: FC(masked) || A1 || A2 || A3 */
506
507 /* FC type/subtype */
508 aad[0] = skb->data[0];
509 /* Mask FC Retry, PwrMgt, MoreData flags to zero */
510 aad[1] = skb->data[1] & ~(BIT(4) | BIT(5) | BIT(6));
511 /* A1 || A2 || A3 */
512 memcpy(aad + 2, skb->data + 4, 3 * ETH_ALEN);
513}
514
515
516static inline void bip_ipn_swap(u8 *d, const u8 *s)
517{
518 *d++ = s[5];
519 *d++ = s[4];
520 *d++ = s[3];
521 *d++ = s[2];
522 *d++ = s[1];
523 *d = s[0];
524}
525
526
527ieee80211_tx_result
528ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx)
529{
530 struct sk_buff *skb = tx->skb;
531 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
532 struct ieee80211_key *key = tx->key;
533 struct ieee80211_mmie *mmie;
534 u8 *pn, aad[20];
535 int i;
536
537 if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
538 /* hwaccel */
539 info->control.hw_key = &tx->key->conf;
540 return 0;
541 }
542
543 if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
544 return TX_DROP;
545
546 mmie = (struct ieee80211_mmie *) skb_put(skb, sizeof(*mmie));
547 mmie->element_id = WLAN_EID_MMIE;
548 mmie->length = sizeof(*mmie) - 2;
549 mmie->key_id = cpu_to_le16(key->conf.keyidx);
550
551 /* PN = PN + 1 */
552 pn = key->u.aes_cmac.tx_pn;
553
554 for (i = sizeof(key->u.aes_cmac.tx_pn) - 1; i >= 0; i--) {
555 pn[i]++;
556 if (pn[i])
557 break;
558 }
559 bip_ipn_swap(mmie->sequence_number, pn);
560
561 bip_aad(skb, aad);
562
563 /*
564 * MIC = AES-128-CMAC(IGTK, AAD || Management Frame Body || MMIE, 64)
565 */
566 ieee80211_aes_cmac(key->u.aes_cmac.tfm, key->u.aes_cmac.tx_crypto_buf,
567 aad, skb->data + 24, skb->len - 24, mmie->mic);
568
569 return TX_CONTINUE;
570}
571
572
573ieee80211_rx_result
574ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
575{
576 struct sk_buff *skb = rx->skb;
577 struct ieee80211_key *key = rx->key;
578 struct ieee80211_mmie *mmie;
579 u8 aad[20], mic[8], ipn[6];
580 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
581
582 if (!ieee80211_is_mgmt(hdr->frame_control))
583 return RX_CONTINUE;
584
585 if ((rx->status->flag & RX_FLAG_DECRYPTED) &&
586 (rx->status->flag & RX_FLAG_IV_STRIPPED))
587 return RX_CONTINUE;
588
589 if (skb->len < 24 + sizeof(*mmie))
590 return RX_DROP_UNUSABLE;
591
592 mmie = (struct ieee80211_mmie *)
593 (skb->data + skb->len - sizeof(*mmie));
594 if (mmie->element_id != WLAN_EID_MMIE ||
595 mmie->length != sizeof(*mmie) - 2)
596 return RX_DROP_UNUSABLE; /* Invalid MMIE */
597
598 bip_ipn_swap(ipn, mmie->sequence_number);
599
600 if (memcmp(ipn, key->u.aes_cmac.rx_pn, 6) <= 0) {
601 key->u.aes_cmac.replays++;
602 return RX_DROP_UNUSABLE;
603 }
604
605 if (!(rx->status->flag & RX_FLAG_DECRYPTED)) {
606 /* hardware didn't decrypt/verify MIC */
607 bip_aad(skb, aad);
608 ieee80211_aes_cmac(key->u.aes_cmac.tfm,
609 key->u.aes_cmac.rx_crypto_buf, aad,
610 skb->data + 24, skb->len - 24, mic);
611 if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
612 key->u.aes_cmac.icverrors++;
613 return RX_DROP_UNUSABLE;
614 }
615 }
616
617 memcpy(key->u.aes_cmac.rx_pn, ipn, 6);
618
619 /* Remove MMIE */
620 skb_trim(skb, skb->len - sizeof(*mmie));
621
622 return RX_CONTINUE;
623}
diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h
index d42d221d8a1d..baba0608313e 100644
--- a/net/mac80211/wpa.h
+++ b/net/mac80211/wpa.h
@@ -28,4 +28,9 @@ ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx);
28ieee80211_rx_result 28ieee80211_rx_result
29ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx); 29ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx);
30 30
31ieee80211_tx_result
32ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx);
33ieee80211_rx_result
34ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx);
35
31#endif /* WPA_H */ 36#endif /* WPA_H */
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 6be5d4efa51b..5c48378a852f 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -149,8 +149,8 @@ static struct task_struct *sync_backup_thread;
149/* multicast addr */ 149/* multicast addr */
150static struct sockaddr_in mcast_addr = { 150static struct sockaddr_in mcast_addr = {
151 .sin_family = AF_INET, 151 .sin_family = AF_INET,
152 .sin_port = __constant_htons(IP_VS_SYNC_PORT), 152 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT),
153 .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP), 153 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
154}; 154};
155 155
156 156
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 4f8fcf498545..07d9d8857e5d 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -177,7 +177,7 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = {
177 .me = THIS_MODULE, 177 .me = THIS_MODULE,
178 .help = amanda_help, 178 .help = amanda_help,
179 .tuple.src.l3num = AF_INET, 179 .tuple.src.l3num = AF_INET,
180 .tuple.src.u.udp.port = __constant_htons(10080), 180 .tuple.src.u.udp.port = cpu_to_be16(10080),
181 .tuple.dst.protonum = IPPROTO_UDP, 181 .tuple.dst.protonum = IPPROTO_UDP,
182 .expect_policy = &amanda_exp_policy, 182 .expect_policy = &amanda_exp_policy,
183 }, 183 },
@@ -186,7 +186,7 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = {
186 .me = THIS_MODULE, 186 .me = THIS_MODULE,
187 .help = amanda_help, 187 .help = amanda_help,
188 .tuple.src.l3num = AF_INET6, 188 .tuple.src.l3num = AF_INET6,
189 .tuple.src.u.udp.port = __constant_htons(10080), 189 .tuple.src.u.udp.port = cpu_to_be16(10080),
190 .tuple.dst.protonum = IPPROTO_UDP, 190 .tuple.dst.protonum = IPPROTO_UDP,
191 .expect_policy = &amanda_exp_policy, 191 .expect_policy = &amanda_exp_policy,
192 }, 192 },
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 687bd633c3d7..66369490230e 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1167,7 +1167,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
1167 .name = "Q.931", 1167 .name = "Q.931",
1168 .me = THIS_MODULE, 1168 .me = THIS_MODULE,
1169 .tuple.src.l3num = AF_INET, 1169 .tuple.src.l3num = AF_INET,
1170 .tuple.src.u.tcp.port = __constant_htons(Q931_PORT), 1170 .tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT),
1171 .tuple.dst.protonum = IPPROTO_TCP, 1171 .tuple.dst.protonum = IPPROTO_TCP,
1172 .help = q931_help, 1172 .help = q931_help,
1173 .expect_policy = &q931_exp_policy, 1173 .expect_policy = &q931_exp_policy,
@@ -1176,7 +1176,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
1176 .name = "Q.931", 1176 .name = "Q.931",
1177 .me = THIS_MODULE, 1177 .me = THIS_MODULE,
1178 .tuple.src.l3num = AF_INET6, 1178 .tuple.src.l3num = AF_INET6,
1179 .tuple.src.u.tcp.port = __constant_htons(Q931_PORT), 1179 .tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT),
1180 .tuple.dst.protonum = IPPROTO_TCP, 1180 .tuple.dst.protonum = IPPROTO_TCP,
1181 .help = q931_help, 1181 .help = q931_help,
1182 .expect_policy = &q931_exp_policy, 1182 .expect_policy = &q931_exp_policy,
@@ -1741,7 +1741,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
1741 .name = "RAS", 1741 .name = "RAS",
1742 .me = THIS_MODULE, 1742 .me = THIS_MODULE,
1743 .tuple.src.l3num = AF_INET, 1743 .tuple.src.l3num = AF_INET,
1744 .tuple.src.u.udp.port = __constant_htons(RAS_PORT), 1744 .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT),
1745 .tuple.dst.protonum = IPPROTO_UDP, 1745 .tuple.dst.protonum = IPPROTO_UDP,
1746 .help = ras_help, 1746 .help = ras_help,
1747 .expect_policy = &ras_exp_policy, 1747 .expect_policy = &ras_exp_policy,
@@ -1750,7 +1750,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
1750 .name = "RAS", 1750 .name = "RAS",
1751 .me = THIS_MODULE, 1751 .me = THIS_MODULE,
1752 .tuple.src.l3num = AF_INET6, 1752 .tuple.src.l3num = AF_INET6,
1753 .tuple.src.u.udp.port = __constant_htons(RAS_PORT), 1753 .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT),
1754 .tuple.dst.protonum = IPPROTO_UDP, 1754 .tuple.dst.protonum = IPPROTO_UDP,
1755 .help = ras_help, 1755 .help = ras_help,
1756 .expect_policy = &ras_exp_policy, 1756 .expect_policy = &ras_exp_policy,
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 5af4273b4668..8a3875e36ec2 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -105,7 +105,7 @@ static struct nf_conntrack_expect_policy exp_policy = {
105static struct nf_conntrack_helper helper __read_mostly = { 105static struct nf_conntrack_helper helper __read_mostly = {
106 .name = "netbios-ns", 106 .name = "netbios-ns",
107 .tuple.src.l3num = AF_INET, 107 .tuple.src.l3num = AF_INET,
108 .tuple.src.u.udp.port = __constant_htons(NMBD_PORT), 108 .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT),
109 .tuple.dst.protonum = IPPROTO_UDP, 109 .tuple.dst.protonum = IPPROTO_UDP,
110 .me = THIS_MODULE, 110 .me = THIS_MODULE,
111 .help = help, 111 .help = help,
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 9e169ef2e854..72cca638a82d 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -591,7 +591,7 @@ static struct nf_conntrack_helper pptp __read_mostly = {
591 .name = "pptp", 591 .name = "pptp",
592 .me = THIS_MODULE, 592 .me = THIS_MODULE,
593 .tuple.src.l3num = AF_INET, 593 .tuple.src.l3num = AF_INET,
594 .tuple.src.u.tcp.port = __constant_htons(PPTP_CONTROL_PORT), 594 .tuple.src.u.tcp.port = cpu_to_be16(PPTP_CONTROL_PORT),
595 .tuple.dst.protonum = IPPROTO_TCP, 595 .tuple.dst.protonum = IPPROTO_TCP,
596 .help = conntrack_pptp_help, 596 .help = conntrack_pptp_help,
597 .destroy = pptp_destroy_siblings, 597 .destroy = pptp_destroy_siblings,
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index cdc97f3105a3..5490fc37c92d 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -71,6 +71,7 @@ int
71nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) 71nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
72{ 72{
73 if (inet_sk(sk)->transparent) { 73 if (inet_sk(sk)->transparent) {
74 skb_orphan(skb);
74 skb->sk = sk; 75 skb->sk = sk;
75 skb->destructor = nf_tproxy_destructor; 76 skb->destructor = nf_tproxy_destructor;
76 return 1; 77 return 1;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3ae3cb816563..a007dbb4c9f1 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -85,6 +85,7 @@ struct netlink_sock {
85 85
86#define NETLINK_KERNEL_SOCKET 0x1 86#define NETLINK_KERNEL_SOCKET 0x1
87#define NETLINK_RECV_PKTINFO 0x2 87#define NETLINK_RECV_PKTINFO 0x2
88#define NETLINK_BROADCAST_SEND_ERROR 0x4
88 89
89static inline struct netlink_sock *nlk_sk(struct sock *sk) 90static inline struct netlink_sock *nlk_sk(struct sock *sk)
90{ 91{
@@ -950,6 +951,7 @@ struct netlink_broadcast_data {
950 u32 pid; 951 u32 pid;
951 u32 group; 952 u32 group;
952 int failure; 953 int failure;
954 int delivery_failure;
953 int congested; 955 int congested;
954 int delivered; 956 int delivered;
955 gfp_t allocation; 957 gfp_t allocation;
@@ -994,11 +996,15 @@ static inline int do_one_broadcast(struct sock *sk,
994 netlink_overrun(sk); 996 netlink_overrun(sk);
995 /* Clone failed. Notify ALL listeners. */ 997 /* Clone failed. Notify ALL listeners. */
996 p->failure = 1; 998 p->failure = 1;
999 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1000 p->delivery_failure = 1;
997 } else if (sk_filter(sk, p->skb2)) { 1001 } else if (sk_filter(sk, p->skb2)) {
998 kfree_skb(p->skb2); 1002 kfree_skb(p->skb2);
999 p->skb2 = NULL; 1003 p->skb2 = NULL;
1000 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { 1004 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1001 netlink_overrun(sk); 1005 netlink_overrun(sk);
1006 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1007 p->delivery_failure = 1;
1002 } else { 1008 } else {
1003 p->congested |= val; 1009 p->congested |= val;
1004 p->delivered = 1; 1010 p->delivered = 1;
@@ -1025,6 +1031,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1025 info.pid = pid; 1031 info.pid = pid;
1026 info.group = group; 1032 info.group = group;
1027 info.failure = 0; 1033 info.failure = 0;
1034 info.delivery_failure = 0;
1028 info.congested = 0; 1035 info.congested = 0;
1029 info.delivered = 0; 1036 info.delivered = 0;
1030 info.allocation = allocation; 1037 info.allocation = allocation;
@@ -1042,16 +1049,16 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1042 1049
1043 netlink_unlock_table(); 1050 netlink_unlock_table();
1044 1051
1045 if (info.skb2) 1052 kfree_skb(info.skb2);
1046 kfree_skb(info.skb2); 1053
1054 if (info.delivery_failure)
1055 return -ENOBUFS;
1047 1056
1048 if (info.delivered) { 1057 if (info.delivered) {
1049 if (info.congested && (allocation & __GFP_WAIT)) 1058 if (info.congested && (allocation & __GFP_WAIT))
1050 yield(); 1059 yield();
1051 return 0; 1060 return 0;
1052 } 1061 }
1053 if (info.failure)
1054 return -ENOBUFS;
1055 return -ESRCH; 1062 return -ESRCH;
1056} 1063}
1057EXPORT_SYMBOL(netlink_broadcast); 1064EXPORT_SYMBOL(netlink_broadcast);
@@ -1167,6 +1174,13 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
1167 err = 0; 1174 err = 0;
1168 break; 1175 break;
1169 } 1176 }
1177 case NETLINK_BROADCAST_ERROR:
1178 if (val)
1179 nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
1180 else
1181 nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
1182 err = 0;
1183 break;
1170 default: 1184 default:
1171 err = -ENOPROTOOPT; 1185 err = -ENOPROTOOPT;
1172 } 1186 }
@@ -1199,6 +1213,16 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
1199 return -EFAULT; 1213 return -EFAULT;
1200 err = 0; 1214 err = 0;
1201 break; 1215 break;
1216 case NETLINK_BROADCAST_ERROR:
1217 if (len < sizeof(int))
1218 return -EINVAL;
1219 len = sizeof(int);
1220 val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
1221 if (put_user(len, optlen) ||
1222 put_user(val, optval))
1223 return -EFAULT;
1224 err = 0;
1225 break;
1202 default: 1226 default:
1203 err = -ENOPROTOOPT; 1227 err = -ENOPROTOOPT;
1204 } 1228 }
@@ -1525,8 +1549,7 @@ EXPORT_SYMBOL(netlink_set_nonroot);
1525 1549
1526static void netlink_destroy_callback(struct netlink_callback *cb) 1550static void netlink_destroy_callback(struct netlink_callback *cb)
1527{ 1551{
1528 if (cb->skb) 1552 kfree_skb(cb->skb);
1529 kfree_skb(cb->skb);
1530 kfree(cb); 1553 kfree(cb);
1531} 1554}
1532 1555
@@ -1743,12 +1766,18 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
1743 exclude_pid = pid; 1766 exclude_pid = pid;
1744 } 1767 }
1745 1768
1746 /* errors reported via destination sk->sk_err */ 1769 /* errors reported via destination sk->sk_err, but propagate
1747 nlmsg_multicast(sk, skb, exclude_pid, group, flags); 1770 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
1771 err = nlmsg_multicast(sk, skb, exclude_pid, group, flags);
1748 } 1772 }
1749 1773
1750 if (report) 1774 if (report) {
1751 err = nlmsg_unicast(sk, skb, pid); 1775 int err2;
1776
1777 err2 = nlmsg_unicast(sk, skb, pid);
1778 if (!err || err == -ESRCH)
1779 err = err2;
1780 }
1752 1781
1753 return err; 1782 return err;
1754} 1783}
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index e9c05b8f4f45..cba7849de98e 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1432,7 +1432,7 @@ static int __init nr_proto_init(void)
1432 struct net_device *dev; 1432 struct net_device *dev;
1433 1433
1434 sprintf(name, "nr%d", i); 1434 sprintf(name, "nr%d", i);
1435 dev = alloc_netdev(sizeof(struct nr_private), name, nr_setup); 1435 dev = alloc_netdev(0, name, nr_setup);
1436 if (!dev) { 1436 if (!dev) {
1437 printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n"); 1437 printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n");
1438 goto fail; 1438 goto fail;
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 6caf459665f2..351372463fed 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -42,7 +42,7 @@
42 42
43int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) 43int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
44{ 44{
45 struct net_device_stats *stats = netdev_priv(dev); 45 struct net_device_stats *stats = &dev->stats;
46 46
47 if (!netif_running(dev)) { 47 if (!netif_running(dev)) {
48 stats->rx_dropped++; 48 stats->rx_dropped++;
@@ -171,8 +171,7 @@ static int nr_close(struct net_device *dev)
171 171
172static int nr_xmit(struct sk_buff *skb, struct net_device *dev) 172static int nr_xmit(struct sk_buff *skb, struct net_device *dev)
173{ 173{
174 struct nr_private *nr = netdev_priv(dev); 174 struct net_device_stats *stats = &dev->stats;
175 struct net_device_stats *stats = &nr->stats;
176 unsigned int len = skb->len; 175 unsigned int len = skb->len;
177 176
178 if (!nr_route_frame(skb, NULL)) { 177 if (!nr_route_frame(skb, NULL)) {
@@ -187,34 +186,27 @@ static int nr_xmit(struct sk_buff *skb, struct net_device *dev)
187 return 0; 186 return 0;
188} 187}
189 188
190static struct net_device_stats *nr_get_stats(struct net_device *dev)
191{
192 struct nr_private *nr = netdev_priv(dev);
193
194 return &nr->stats;
195}
196
197static const struct header_ops nr_header_ops = { 189static const struct header_ops nr_header_ops = {
198 .create = nr_header, 190 .create = nr_header,
199 .rebuild= nr_rebuild_header, 191 .rebuild= nr_rebuild_header,
200}; 192};
201 193
194static const struct net_device_ops nr_netdev_ops = {
195 .ndo_open = nr_open,
196 .ndo_stop = nr_close,
197 .ndo_start_xmit = nr_xmit,
198 .ndo_set_mac_address = nr_set_mac_address,
199};
202 200
203void nr_setup(struct net_device *dev) 201void nr_setup(struct net_device *dev)
204{ 202{
205 dev->mtu = NR_MAX_PACKET_SIZE; 203 dev->mtu = NR_MAX_PACKET_SIZE;
206 dev->hard_start_xmit = nr_xmit; 204 dev->netdev_ops = &nr_netdev_ops;
207 dev->open = nr_open;
208 dev->stop = nr_close;
209
210 dev->header_ops = &nr_header_ops; 205 dev->header_ops = &nr_header_ops;
211 dev->hard_header_len = NR_NETWORK_LEN + NR_TRANSPORT_LEN; 206 dev->hard_header_len = NR_NETWORK_LEN + NR_TRANSPORT_LEN;
212 dev->addr_len = AX25_ADDR_LEN; 207 dev->addr_len = AX25_ADDR_LEN;
213 dev->type = ARPHRD_NETROM; 208 dev->type = ARPHRD_NETROM;
214 dev->set_mac_address = nr_set_mac_address;
215 209
216 /* New-style flags. */ 210 /* New-style flags. */
217 dev->flags = IFF_NOARP; 211 dev->flags = IFF_NOARP;
218
219 dev->get_stats = nr_get_stats;
220} 212}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1fc4a7885c41..d8cc006fac45 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -756,8 +756,7 @@ ring_is_full:
756 spin_unlock(&sk->sk_receive_queue.lock); 756 spin_unlock(&sk->sk_receive_queue.lock);
757 757
758 sk->sk_data_ready(sk, 0); 758 sk->sk_data_ready(sk, 0);
759 if (copy_skb) 759 kfree_skb(copy_skb);
760 kfree_skb(copy_skb);
761 goto drop_n_restore; 760 goto drop_n_restore;
762} 761}
763 762
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 13cb323f8c38..81795ea87794 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -275,8 +275,6 @@ static inline int can_respond(struct sk_buff *skb)
275 return 0; 275 return 0;
276 276
277 ph = pn_hdr(skb); 277 ph = pn_hdr(skb);
278 if (phonet_address_get(skb->dev, ph->pn_rdev) != ph->pn_rdev)
279 return 0; /* we are not the destination */
280 if (ph->pn_res == PN_PREFIX && !pskb_may_pull(skb, 5)) 278 if (ph->pn_res == PN_PREFIX && !pskb_may_pull(skb, 5))
281 return 0; 279 return 0;
282 if (ph->pn_res == PN_COMMGR) /* indications */ 280 if (ph->pn_res == PN_COMMGR) /* indications */
@@ -344,8 +342,8 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
344 struct packet_type *pkttype, 342 struct packet_type *pkttype,
345 struct net_device *orig_dev) 343 struct net_device *orig_dev)
346{ 344{
345 struct net *net = dev_net(dev);
347 struct phonethdr *ph; 346 struct phonethdr *ph;
348 struct sock *sk;
349 struct sockaddr_pn sa; 347 struct sockaddr_pn sa;
350 u16 len; 348 u16 len;
351 349
@@ -364,28 +362,28 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
364 skb_reset_transport_header(skb); 362 skb_reset_transport_header(skb);
365 363
366 pn_skb_get_dst_sockaddr(skb, &sa); 364 pn_skb_get_dst_sockaddr(skb, &sa);
367 if (pn_sockaddr_get_addr(&sa) == 0)
368 goto out; /* currently, we cannot be device 0 */
369 365
370 sk = pn_find_sock_by_sa(dev_net(dev), &sa); 366 /* check if we are the destination */
371 if (sk == NULL) { 367 if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) {
368 /* Phonet packet input */
369 struct sock *sk = pn_find_sock_by_sa(net, &sa);
370
371 if (sk)
372 return sk_receive_skb(sk, skb, 0);
373
372 if (can_respond(skb)) { 374 if (can_respond(skb)) {
373 send_obj_unreachable(skb); 375 send_obj_unreachable(skb);
374 send_reset_indications(skb); 376 send_reset_indications(skb);
375 } 377 }
376 goto out;
377 } 378 }
378 379
379 /* Push data to the socket (or other sockets connected to it). */
380 return sk_receive_skb(sk, skb, 0);
381
382out: 380out:
383 kfree_skb(skb); 381 kfree_skb(skb);
384 return NET_RX_DROP; 382 return NET_RX_DROP;
385} 383}
386 384
387static struct packet_type phonet_packet_type = { 385static struct packet_type phonet_packet_type = {
388 .type = __constant_htons(ETH_P_PHONET), 386 .type = cpu_to_be16(ETH_P_PHONET),
389 .dev = NULL, 387 .dev = NULL,
390 .func = phonet_rcv, 388 .func = phonet_rcv,
391}; 389};
@@ -428,16 +426,18 @@ static int __init phonet_init(void)
428{ 426{
429 int err; 427 int err;
430 428
429 err = phonet_device_init();
430 if (err)
431 return err;
432
431 err = sock_register(&phonet_proto_family); 433 err = sock_register(&phonet_proto_family);
432 if (err) { 434 if (err) {
433 printk(KERN_ALERT 435 printk(KERN_ALERT
434 "phonet protocol family initialization failed\n"); 436 "phonet protocol family initialization failed\n");
435 return err; 437 goto err_sock;
436 } 438 }
437 439
438 phonet_device_init();
439 dev_add_pack(&phonet_packet_type); 440 dev_add_pack(&phonet_packet_type);
440 phonet_netlink_register();
441 phonet_sysctl_init(); 441 phonet_sysctl_init();
442 442
443 err = isi_register(); 443 err = isi_register();
@@ -449,6 +449,7 @@ err:
449 phonet_sysctl_exit(); 449 phonet_sysctl_exit();
450 sock_unregister(PF_PHONET); 450 sock_unregister(PF_PHONET);
451 dev_remove_pack(&phonet_packet_type); 451 dev_remove_pack(&phonet_packet_type);
452err_sock:
452 phonet_device_exit(); 453 phonet_device_exit();
453 return err; 454 return err;
454} 455}
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 5491bf5e354b..80a322d77909 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -28,32 +28,41 @@
28#include <linux/netdevice.h> 28#include <linux/netdevice.h>
29#include <linux/phonet.h> 29#include <linux/phonet.h>
30#include <net/sock.h> 30#include <net/sock.h>
31#include <net/netns/generic.h>
31#include <net/phonet/pn_dev.h> 32#include <net/phonet/pn_dev.h>
32 33
33/* when accessing, remember to lock with spin_lock(&pndevs.lock); */ 34struct phonet_net {
34struct phonet_device_list pndevs = { 35 struct phonet_device_list pndevs;
35 .list = LIST_HEAD_INIT(pndevs.list),
36 .lock = __SPIN_LOCK_UNLOCKED(pndevs.lock),
37}; 36};
38 37
38int phonet_net_id;
39
40struct phonet_device_list *phonet_device_list(struct net *net)
41{
42 struct phonet_net *pnn = net_generic(net, phonet_net_id);
43 return &pnn->pndevs;
44}
45
39/* Allocate new Phonet device. */ 46/* Allocate new Phonet device. */
40static struct phonet_device *__phonet_device_alloc(struct net_device *dev) 47static struct phonet_device *__phonet_device_alloc(struct net_device *dev)
41{ 48{
49 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
42 struct phonet_device *pnd = kmalloc(sizeof(*pnd), GFP_ATOMIC); 50 struct phonet_device *pnd = kmalloc(sizeof(*pnd), GFP_ATOMIC);
43 if (pnd == NULL) 51 if (pnd == NULL)
44 return NULL; 52 return NULL;
45 pnd->netdev = dev; 53 pnd->netdev = dev;
46 bitmap_zero(pnd->addrs, 64); 54 bitmap_zero(pnd->addrs, 64);
47 55
48 list_add(&pnd->list, &pndevs.list); 56 list_add(&pnd->list, &pndevs->list);
49 return pnd; 57 return pnd;
50} 58}
51 59
52static struct phonet_device *__phonet_get(struct net_device *dev) 60static struct phonet_device *__phonet_get(struct net_device *dev)
53{ 61{
62 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
54 struct phonet_device *pnd; 63 struct phonet_device *pnd;
55 64
56 list_for_each_entry(pnd, &pndevs.list, list) { 65 list_for_each_entry(pnd, &pndevs->list, list) {
57 if (pnd->netdev == dev) 66 if (pnd->netdev == dev)
58 return pnd; 67 return pnd;
59 } 68 }
@@ -68,32 +77,33 @@ static void __phonet_device_free(struct phonet_device *pnd)
68 77
69struct net_device *phonet_device_get(struct net *net) 78struct net_device *phonet_device_get(struct net *net)
70{ 79{
80 struct phonet_device_list *pndevs = phonet_device_list(net);
71 struct phonet_device *pnd; 81 struct phonet_device *pnd;
72 struct net_device *dev; 82 struct net_device *dev;
73 83
74 spin_lock_bh(&pndevs.lock); 84 spin_lock_bh(&pndevs->lock);
75 list_for_each_entry(pnd, &pndevs.list, list) { 85 list_for_each_entry(pnd, &pndevs->list, list) {
76 dev = pnd->netdev; 86 dev = pnd->netdev;
77 BUG_ON(!dev); 87 BUG_ON(!dev);
78 88
79 if (net_eq(dev_net(dev), net) && 89 if ((dev->reg_state == NETREG_REGISTERED) &&
80 (dev->reg_state == NETREG_REGISTERED) &&
81 ((pnd->netdev->flags & IFF_UP)) == IFF_UP) 90 ((pnd->netdev->flags & IFF_UP)) == IFF_UP)
82 break; 91 break;
83 dev = NULL; 92 dev = NULL;
84 } 93 }
85 if (dev) 94 if (dev)
86 dev_hold(dev); 95 dev_hold(dev);
87 spin_unlock_bh(&pndevs.lock); 96 spin_unlock_bh(&pndevs->lock);
88 return dev; 97 return dev;
89} 98}
90 99
91int phonet_address_add(struct net_device *dev, u8 addr) 100int phonet_address_add(struct net_device *dev, u8 addr)
92{ 101{
102 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
93 struct phonet_device *pnd; 103 struct phonet_device *pnd;
94 int err = 0; 104 int err = 0;
95 105
96 spin_lock_bh(&pndevs.lock); 106 spin_lock_bh(&pndevs->lock);
97 /* Find or create Phonet-specific device data */ 107 /* Find or create Phonet-specific device data */
98 pnd = __phonet_get(dev); 108 pnd = __phonet_get(dev);
99 if (pnd == NULL) 109 if (pnd == NULL)
@@ -102,31 +112,33 @@ int phonet_address_add(struct net_device *dev, u8 addr)
102 err = -ENOMEM; 112 err = -ENOMEM;
103 else if (test_and_set_bit(addr >> 2, pnd->addrs)) 113 else if (test_and_set_bit(addr >> 2, pnd->addrs))
104 err = -EEXIST; 114 err = -EEXIST;
105 spin_unlock_bh(&pndevs.lock); 115 spin_unlock_bh(&pndevs->lock);
106 return err; 116 return err;
107} 117}
108 118
109int phonet_address_del(struct net_device *dev, u8 addr) 119int phonet_address_del(struct net_device *dev, u8 addr)
110{ 120{
121 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
111 struct phonet_device *pnd; 122 struct phonet_device *pnd;
112 int err = 0; 123 int err = 0;
113 124
114 spin_lock_bh(&pndevs.lock); 125 spin_lock_bh(&pndevs->lock);
115 pnd = __phonet_get(dev); 126 pnd = __phonet_get(dev);
116 if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) 127 if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs))
117 err = -EADDRNOTAVAIL; 128 err = -EADDRNOTAVAIL;
118 else if (bitmap_empty(pnd->addrs, 64)) 129 else if (bitmap_empty(pnd->addrs, 64))
119 __phonet_device_free(pnd); 130 __phonet_device_free(pnd);
120 spin_unlock_bh(&pndevs.lock); 131 spin_unlock_bh(&pndevs->lock);
121 return err; 132 return err;
122} 133}
123 134
124/* Gets a source address toward a destination, through a interface. */ 135/* Gets a source address toward a destination, through a interface. */
125u8 phonet_address_get(struct net_device *dev, u8 addr) 136u8 phonet_address_get(struct net_device *dev, u8 addr)
126{ 137{
138 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
127 struct phonet_device *pnd; 139 struct phonet_device *pnd;
128 140
129 spin_lock_bh(&pndevs.lock); 141 spin_lock_bh(&pndevs->lock);
130 pnd = __phonet_get(dev); 142 pnd = __phonet_get(dev);
131 if (pnd) { 143 if (pnd) {
132 BUG_ON(bitmap_empty(pnd->addrs, 64)); 144 BUG_ON(bitmap_empty(pnd->addrs, 64));
@@ -136,30 +148,31 @@ u8 phonet_address_get(struct net_device *dev, u8 addr)
136 addr = find_first_bit(pnd->addrs, 64) << 2; 148 addr = find_first_bit(pnd->addrs, 64) << 2;
137 } else 149 } else
138 addr = PN_NO_ADDR; 150 addr = PN_NO_ADDR;
139 spin_unlock_bh(&pndevs.lock); 151 spin_unlock_bh(&pndevs->lock);
140 return addr; 152 return addr;
141} 153}
142 154
143int phonet_address_lookup(struct net *net, u8 addr) 155int phonet_address_lookup(struct net *net, u8 addr)
144{ 156{
157 struct phonet_device_list *pndevs = phonet_device_list(net);
145 struct phonet_device *pnd; 158 struct phonet_device *pnd;
159 int err = -EADDRNOTAVAIL;
146 160
147 spin_lock_bh(&pndevs.lock); 161 spin_lock_bh(&pndevs->lock);
148 list_for_each_entry(pnd, &pndevs.list, list) { 162 list_for_each_entry(pnd, &pndevs->list, list) {
149 if (!net_eq(dev_net(pnd->netdev), net))
150 continue;
151 /* Don't allow unregistering devices! */ 163 /* Don't allow unregistering devices! */
152 if ((pnd->netdev->reg_state != NETREG_REGISTERED) || 164 if ((pnd->netdev->reg_state != NETREG_REGISTERED) ||
153 ((pnd->netdev->flags & IFF_UP)) != IFF_UP) 165 ((pnd->netdev->flags & IFF_UP)) != IFF_UP)
154 continue; 166 continue;
155 167
156 if (test_bit(addr >> 2, pnd->addrs)) { 168 if (test_bit(addr >> 2, pnd->addrs)) {
157 spin_unlock_bh(&pndevs.lock); 169 err = 0;
158 return 0; 170 goto found;
159 } 171 }
160 } 172 }
161 spin_unlock_bh(&pndevs.lock); 173found:
162 return -EADDRNOTAVAIL; 174 spin_unlock_bh(&pndevs->lock);
175 return err;
163} 176}
164 177
165/* notify Phonet of device events */ 178/* notify Phonet of device events */
@@ -169,14 +182,16 @@ static int phonet_device_notify(struct notifier_block *me, unsigned long what,
169 struct net_device *dev = arg; 182 struct net_device *dev = arg;
170 183
171 if (what == NETDEV_UNREGISTER) { 184 if (what == NETDEV_UNREGISTER) {
185 struct phonet_device_list *pndevs;
172 struct phonet_device *pnd; 186 struct phonet_device *pnd;
173 187
174 /* Destroy phonet-specific device data */ 188 /* Destroy phonet-specific device data */
175 spin_lock_bh(&pndevs.lock); 189 pndevs = phonet_device_list(dev_net(dev));
190 spin_lock_bh(&pndevs->lock);
176 pnd = __phonet_get(dev); 191 pnd = __phonet_get(dev);
177 if (pnd) 192 if (pnd)
178 __phonet_device_free(pnd); 193 __phonet_device_free(pnd);
179 spin_unlock_bh(&pndevs.lock); 194 spin_unlock_bh(&pndevs->lock);
180 } 195 }
181 return 0; 196 return 0;
182 197
@@ -187,24 +202,52 @@ static struct notifier_block phonet_device_notifier = {
187 .priority = 0, 202 .priority = 0,
188}; 203};
189 204
190/* Initialize Phonet devices list */ 205/* Per-namespace Phonet devices handling */
191void phonet_device_init(void) 206static int phonet_init_net(struct net *net)
192{ 207{
193 register_netdevice_notifier(&phonet_device_notifier); 208 struct phonet_net *pnn = kmalloc(sizeof(*pnn), GFP_KERNEL);
209 if (!pnn)
210 return -ENOMEM;
211
212 INIT_LIST_HEAD(&pnn->pndevs.list);
213 spin_lock_init(&pnn->pndevs.lock);
214 net_assign_generic(net, phonet_net_id, pnn);
215 return 0;
194} 216}
195 217
196void phonet_device_exit(void) 218static void phonet_exit_net(struct net *net)
197{ 219{
220 struct phonet_net *pnn = net_generic(net, phonet_net_id);
198 struct phonet_device *pnd, *n; 221 struct phonet_device *pnd, *n;
199 222
200 rtnl_unregister_all(PF_PHONET); 223 list_for_each_entry_safe(pnd, n, &pnn->pndevs.list, list)
201 rtnl_lock();
202 spin_lock_bh(&pndevs.lock);
203
204 list_for_each_entry_safe(pnd, n, &pndevs.list, list)
205 __phonet_device_free(pnd); 224 __phonet_device_free(pnd);
206 225
207 spin_unlock_bh(&pndevs.lock); 226 kfree(pnn);
208 rtnl_unlock(); 227}
228
229static struct pernet_operations phonet_net_ops = {
230 .init = phonet_init_net,
231 .exit = phonet_exit_net,
232};
233
234/* Initialize Phonet devices list */
235int __init phonet_device_init(void)
236{
237 int err = register_pernet_gen_device(&phonet_net_id, &phonet_net_ops);
238 if (err)
239 return err;
240
241 register_netdevice_notifier(&phonet_device_notifier);
242 err = phonet_netlink_register();
243 if (err)
244 phonet_device_exit();
245 return err;
246}
247
248void phonet_device_exit(void)
249{
250 rtnl_unregister_all(PF_PHONET);
209 unregister_netdevice_notifier(&phonet_device_notifier); 251 unregister_netdevice_notifier(&phonet_device_notifier);
252 unregister_pernet_gen_device(phonet_net_id, &phonet_net_ops);
210} 253}
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 242fe8f8c322..cec4e5951681 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -47,8 +47,9 @@ static void rtmsg_notify(int event, struct net_device *dev, u8 addr)
47 kfree_skb(skb); 47 kfree_skb(skb);
48 goto errout; 48 goto errout;
49 } 49 }
50 err = rtnl_notify(skb, dev_net(dev), 0, 50 rtnl_notify(skb, dev_net(dev), 0,
51 RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); 51 RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL);
52 return;
52errout: 53errout:
53 if (err < 0) 54 if (err < 0)
54 rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); 55 rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err);
@@ -123,17 +124,16 @@ nla_put_failure:
123 124
124static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) 125static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
125{ 126{
126 struct net *net = sock_net(skb->sk); 127 struct phonet_device_list *pndevs;
127 struct phonet_device *pnd; 128 struct phonet_device *pnd;
128 int dev_idx = 0, dev_start_idx = cb->args[0]; 129 int dev_idx = 0, dev_start_idx = cb->args[0];
129 int addr_idx = 0, addr_start_idx = cb->args[1]; 130 int addr_idx = 0, addr_start_idx = cb->args[1];
130 131
131 spin_lock_bh(&pndevs.lock); 132 pndevs = phonet_device_list(sock_net(skb->sk));
132 list_for_each_entry(pnd, &pndevs.list, list) { 133 spin_lock_bh(&pndevs->lock);
134 list_for_each_entry(pnd, &pndevs->list, list) {
133 u8 addr; 135 u8 addr;
134 136
135 if (!net_eq(dev_net(pnd->netdev), net))
136 continue;
137 if (dev_idx > dev_start_idx) 137 if (dev_idx > dev_start_idx)
138 addr_start_idx = 0; 138 addr_start_idx = 0;
139 if (dev_idx++ < dev_start_idx) 139 if (dev_idx++ < dev_start_idx)
@@ -153,16 +153,21 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
153 } 153 }
154 154
155out: 155out:
156 spin_unlock_bh(&pndevs.lock); 156 spin_unlock_bh(&pndevs->lock);
157 cb->args[0] = dev_idx; 157 cb->args[0] = dev_idx;
158 cb->args[1] = addr_idx; 158 cb->args[1] = addr_idx;
159 159
160 return skb->len; 160 return skb->len;
161} 161}
162 162
163void __init phonet_netlink_register(void) 163int __init phonet_netlink_register(void)
164{ 164{
165 rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL); 165 int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL);
166 rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL); 166 if (err)
167 rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit); 167 return err;
168
169 /* Further __rtnl_register() cannot fail */
170 __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL);
171 __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit);
172 return 0;
168} 173}
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
new file mode 100644
index 000000000000..796773b5df9b
--- /dev/null
+++ b/net/rds/Kconfig
@@ -0,0 +1,14 @@
1
2config RDS
3 tristate "Reliable Datagram Sockets (RDS) (EXPERIMENTAL)"
4 depends on INET && INFINIBAND_IPOIB && EXPERIMENTAL
5 depends on INFINIBAND && INFINIBAND_ADDR_TRANS
6 ---help---
7 RDS provides reliable, sequenced delivery of datagrams
8 over Infiniband.
9
10config RDS_DEBUG
11 bool "Debugging messages"
12 depends on RDS
13 default n
14
diff --git a/net/rds/Makefile b/net/rds/Makefile
new file mode 100644
index 000000000000..51f27585fa08
--- /dev/null
+++ b/net/rds/Makefile
@@ -0,0 +1,14 @@
1obj-$(CONFIG_RDS) += rds.o
2rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \
3 recv.o send.o stats.o sysctl.o threads.o transport.o \
4 loop.o page.o rdma.o \
5 rdma_transport.o \
6 ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \
7 ib_sysctl.o ib_rdma.o \
8 iw.o iw_cm.o iw_recv.o iw_ring.o iw_send.o iw_stats.o \
9 iw_sysctl.o iw_rdma.o
10
11ifeq ($(CONFIG_RDS_DEBUG), y)
12EXTRA_CFLAGS += -DDEBUG
13endif
14
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
new file mode 100644
index 000000000000..20cf16fc572f
--- /dev/null
+++ b/net/rds/af_rds.c
@@ -0,0 +1,586 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/module.h>
34#include <linux/errno.h>
35#include <linux/kernel.h>
36#include <linux/in.h>
37#include <linux/poll.h>
38#include <linux/version.h>
39#include <net/sock.h>
40
41#include "rds.h"
42#include "rdma.h"
43#include "rdma_transport.h"
44
45/* this is just used for stats gathering :/ */
46static DEFINE_SPINLOCK(rds_sock_lock);
47static unsigned long rds_sock_count;
48static LIST_HEAD(rds_sock_list);
49DECLARE_WAIT_QUEUE_HEAD(rds_poll_waitq);
50
51/*
52 * This is called as the final descriptor referencing this socket is closed.
53 * We have to unbind the socket so that another socket can be bound to the
54 * address it was using.
55 *
56 * We have to be careful about racing with the incoming path. sock_orphan()
57 * sets SOCK_DEAD and we use that as an indicator to the rx path that new
58 * messages shouldn't be queued.
59 */
60static int rds_release(struct socket *sock)
61{
62 struct sock *sk = sock->sk;
63 struct rds_sock *rs;
64 unsigned long flags;
65
66 if (sk == NULL)
67 goto out;
68
69 rs = rds_sk_to_rs(sk);
70
71 sock_orphan(sk);
72 /* Note - rds_clear_recv_queue grabs rs_recv_lock, so
73 * that ensures the recv path has completed messing
74 * with the socket. */
75 rds_clear_recv_queue(rs);
76 rds_cong_remove_socket(rs);
77 rds_remove_bound(rs);
78 rds_send_drop_to(rs, NULL);
79 rds_rdma_drop_keys(rs);
80 rds_notify_queue_get(rs, NULL);
81
82 spin_lock_irqsave(&rds_sock_lock, flags);
83 list_del_init(&rs->rs_item);
84 rds_sock_count--;
85 spin_unlock_irqrestore(&rds_sock_lock, flags);
86
87 sock->sk = NULL;
88 sock_put(sk);
89out:
90 return 0;
91}
92
93/*
94 * Careful not to race with rds_release -> sock_orphan which clears sk_sleep.
95 * _bh() isn't OK here, we're called from interrupt handlers. It's probably OK
96 * to wake the waitqueue after sk_sleep is clear as we hold a sock ref, but
97 * this seems more conservative.
98 * NB - normally, one would use sk_callback_lock for this, but we can
99 * get here from interrupts, whereas the network code grabs sk_callback_lock
100 * with _lock_bh only - so relying on sk_callback_lock introduces livelocks.
101 */
102void rds_wake_sk_sleep(struct rds_sock *rs)
103{
104 unsigned long flags;
105
106 read_lock_irqsave(&rs->rs_recv_lock, flags);
107 __rds_wake_sk_sleep(rds_rs_to_sk(rs));
108 read_unlock_irqrestore(&rs->rs_recv_lock, flags);
109}
110
111static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
112 int *uaddr_len, int peer)
113{
114 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
115 struct rds_sock *rs = rds_sk_to_rs(sock->sk);
116
117 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
118
119 /* racey, don't care */
120 if (peer) {
121 if (!rs->rs_conn_addr)
122 return -ENOTCONN;
123
124 sin->sin_port = rs->rs_conn_port;
125 sin->sin_addr.s_addr = rs->rs_conn_addr;
126 } else {
127 sin->sin_port = rs->rs_bound_port;
128 sin->sin_addr.s_addr = rs->rs_bound_addr;
129 }
130
131 sin->sin_family = AF_INET;
132
133 *uaddr_len = sizeof(*sin);
134 return 0;
135}
136
137/*
138 * RDS' poll is without a doubt the least intuitive part of the interface,
139 * as POLLIN and POLLOUT do not behave entirely as you would expect from
140 * a network protocol.
141 *
142 * POLLIN is asserted if
143 * - there is data on the receive queue.
144 * - to signal that a previously congested destination may have become
145 * uncongested
146 * - A notification has been queued to the socket (this can be a congestion
147 * update, or a RDMA completion).
148 *
149 * POLLOUT is asserted if there is room on the send queue. This does not mean
150 * however, that the next sendmsg() call will succeed. If the application tries
151 * to send to a congested destination, the system call may still fail (and
152 * return ENOBUFS).
153 */
154static unsigned int rds_poll(struct file *file, struct socket *sock,
155 poll_table *wait)
156{
157 struct sock *sk = sock->sk;
158 struct rds_sock *rs = rds_sk_to_rs(sk);
159 unsigned int mask = 0;
160 unsigned long flags;
161
162 poll_wait(file, sk->sk_sleep, wait);
163
164 poll_wait(file, &rds_poll_waitq, wait);
165
166 read_lock_irqsave(&rs->rs_recv_lock, flags);
167 if (!rs->rs_cong_monitor) {
168 /* When a congestion map was updated, we signal POLLIN for
169 * "historical" reasons. Applications can also poll for
170 * WRBAND instead. */
171 if (rds_cong_updated_since(&rs->rs_cong_track))
172 mask |= (POLLIN | POLLRDNORM | POLLWRBAND);
173 } else {
174 spin_lock(&rs->rs_lock);
175 if (rs->rs_cong_notify)
176 mask |= (POLLIN | POLLRDNORM);
177 spin_unlock(&rs->rs_lock);
178 }
179 if (!list_empty(&rs->rs_recv_queue)
180 || !list_empty(&rs->rs_notify_queue))
181 mask |= (POLLIN | POLLRDNORM);
182 if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
183 mask |= (POLLOUT | POLLWRNORM);
184 read_unlock_irqrestore(&rs->rs_recv_lock, flags);
185
186 return mask;
187}
188
189static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
190{
191 return -ENOIOCTLCMD;
192}
193
194static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
195 int len)
196{
197 struct sockaddr_in sin;
198 int ret = 0;
199
200 /* racing with another thread binding seems ok here */
201 if (rs->rs_bound_addr == 0) {
202 ret = -ENOTCONN; /* XXX not a great errno */
203 goto out;
204 }
205
206 if (len < sizeof(struct sockaddr_in)) {
207 ret = -EINVAL;
208 goto out;
209 }
210
211 if (copy_from_user(&sin, optval, sizeof(sin))) {
212 ret = -EFAULT;
213 goto out;
214 }
215
216 rds_send_drop_to(rs, &sin);
217out:
218 return ret;
219}
220
221static int rds_set_bool_option(unsigned char *optvar, char __user *optval,
222 int optlen)
223{
224 int value;
225
226 if (optlen < sizeof(int))
227 return -EINVAL;
228 if (get_user(value, (int __user *) optval))
229 return -EFAULT;
230 *optvar = !!value;
231 return 0;
232}
233
234static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
235 int optlen)
236{
237 int ret;
238
239 ret = rds_set_bool_option(&rs->rs_cong_monitor, optval, optlen);
240 if (ret == 0) {
241 if (rs->rs_cong_monitor) {
242 rds_cong_add_socket(rs);
243 } else {
244 rds_cong_remove_socket(rs);
245 rs->rs_cong_mask = 0;
246 rs->rs_cong_notify = 0;
247 }
248 }
249 return ret;
250}
251
252static int rds_setsockopt(struct socket *sock, int level, int optname,
253 char __user *optval, int optlen)
254{
255 struct rds_sock *rs = rds_sk_to_rs(sock->sk);
256 int ret;
257
258 if (level != SOL_RDS) {
259 ret = -ENOPROTOOPT;
260 goto out;
261 }
262
263 switch (optname) {
264 case RDS_CANCEL_SENT_TO:
265 ret = rds_cancel_sent_to(rs, optval, optlen);
266 break;
267 case RDS_GET_MR:
268 ret = rds_get_mr(rs, optval, optlen);
269 break;
270 case RDS_FREE_MR:
271 ret = rds_free_mr(rs, optval, optlen);
272 break;
273 case RDS_RECVERR:
274 ret = rds_set_bool_option(&rs->rs_recverr, optval, optlen);
275 break;
276 case RDS_CONG_MONITOR:
277 ret = rds_cong_monitor(rs, optval, optlen);
278 break;
279 default:
280 ret = -ENOPROTOOPT;
281 }
282out:
283 return ret;
284}
285
286static int rds_getsockopt(struct socket *sock, int level, int optname,
287 char __user *optval, int __user *optlen)
288{
289 struct rds_sock *rs = rds_sk_to_rs(sock->sk);
290 int ret = -ENOPROTOOPT, len;
291
292 if (level != SOL_RDS)
293 goto out;
294
295 if (get_user(len, optlen)) {
296 ret = -EFAULT;
297 goto out;
298 }
299
300 switch (optname) {
301 case RDS_INFO_FIRST ... RDS_INFO_LAST:
302 ret = rds_info_getsockopt(sock, optname, optval,
303 optlen);
304 break;
305
306 case RDS_RECVERR:
307 if (len < sizeof(int))
308 ret = -EINVAL;
309 else
310 if (put_user(rs->rs_recverr, (int __user *) optval)
311 || put_user(sizeof(int), optlen))
312 ret = -EFAULT;
313 else
314 ret = 0;
315 break;
316 default:
317 break;
318 }
319
320out:
321 return ret;
322
323}
324
325static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
326 int addr_len, int flags)
327{
328 struct sock *sk = sock->sk;
329 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
330 struct rds_sock *rs = rds_sk_to_rs(sk);
331 int ret = 0;
332
333 lock_sock(sk);
334
335 if (addr_len != sizeof(struct sockaddr_in)) {
336 ret = -EINVAL;
337 goto out;
338 }
339
340 if (sin->sin_family != AF_INET) {
341 ret = -EAFNOSUPPORT;
342 goto out;
343 }
344
345 if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
346 ret = -EDESTADDRREQ;
347 goto out;
348 }
349
350 rs->rs_conn_addr = sin->sin_addr.s_addr;
351 rs->rs_conn_port = sin->sin_port;
352
353out:
354 release_sock(sk);
355 return ret;
356}
357
358static struct proto rds_proto = {
359 .name = "RDS",
360 .owner = THIS_MODULE,
361 .obj_size = sizeof(struct rds_sock),
362};
363
364static struct proto_ops rds_proto_ops = {
365 .family = AF_RDS,
366 .owner = THIS_MODULE,
367 .release = rds_release,
368 .bind = rds_bind,
369 .connect = rds_connect,
370 .socketpair = sock_no_socketpair,
371 .accept = sock_no_accept,
372 .getname = rds_getname,
373 .poll = rds_poll,
374 .ioctl = rds_ioctl,
375 .listen = sock_no_listen,
376 .shutdown = sock_no_shutdown,
377 .setsockopt = rds_setsockopt,
378 .getsockopt = rds_getsockopt,
379 .sendmsg = rds_sendmsg,
380 .recvmsg = rds_recvmsg,
381 .mmap = sock_no_mmap,
382 .sendpage = sock_no_sendpage,
383};
384
385static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
386{
387 unsigned long flags;
388 struct rds_sock *rs;
389
390 sock_init_data(sock, sk);
391 sock->ops = &rds_proto_ops;
392 sk->sk_protocol = protocol;
393
394 rs = rds_sk_to_rs(sk);
395 spin_lock_init(&rs->rs_lock);
396 rwlock_init(&rs->rs_recv_lock);
397 INIT_LIST_HEAD(&rs->rs_send_queue);
398 INIT_LIST_HEAD(&rs->rs_recv_queue);
399 INIT_LIST_HEAD(&rs->rs_notify_queue);
400 INIT_LIST_HEAD(&rs->rs_cong_list);
401 spin_lock_init(&rs->rs_rdma_lock);
402 rs->rs_rdma_keys = RB_ROOT;
403
404 spin_lock_irqsave(&rds_sock_lock, flags);
405 list_add_tail(&rs->rs_item, &rds_sock_list);
406 rds_sock_count++;
407 spin_unlock_irqrestore(&rds_sock_lock, flags);
408
409 return 0;
410}
411
412static int rds_create(struct net *net, struct socket *sock, int protocol)
413{
414 struct sock *sk;
415
416 if (sock->type != SOCK_SEQPACKET || protocol)
417 return -ESOCKTNOSUPPORT;
418
419 sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto);
420 if (!sk)
421 return -ENOMEM;
422
423 return __rds_create(sock, sk, protocol);
424}
425
426void rds_sock_addref(struct rds_sock *rs)
427{
428 sock_hold(rds_rs_to_sk(rs));
429}
430
431void rds_sock_put(struct rds_sock *rs)
432{
433 sock_put(rds_rs_to_sk(rs));
434}
435
436static struct net_proto_family rds_family_ops = {
437 .family = AF_RDS,
438 .create = rds_create,
439 .owner = THIS_MODULE,
440};
441
442static void rds_sock_inc_info(struct socket *sock, unsigned int len,
443 struct rds_info_iterator *iter,
444 struct rds_info_lengths *lens)
445{
446 struct rds_sock *rs;
447 struct sock *sk;
448 struct rds_incoming *inc;
449 unsigned long flags;
450 unsigned int total = 0;
451
452 len /= sizeof(struct rds_info_message);
453
454 spin_lock_irqsave(&rds_sock_lock, flags);
455
456 list_for_each_entry(rs, &rds_sock_list, rs_item) {
457 sk = rds_rs_to_sk(rs);
458 read_lock(&rs->rs_recv_lock);
459
460 /* XXX too lazy to maintain counts.. */
461 list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
462 total++;
463 if (total <= len)
464 rds_inc_info_copy(inc, iter, inc->i_saddr,
465 rs->rs_bound_addr, 1);
466 }
467
468 read_unlock(&rs->rs_recv_lock);
469 }
470
471 spin_unlock_irqrestore(&rds_sock_lock, flags);
472
473 lens->nr = total;
474 lens->each = sizeof(struct rds_info_message);
475}
476
477static void rds_sock_info(struct socket *sock, unsigned int len,
478 struct rds_info_iterator *iter,
479 struct rds_info_lengths *lens)
480{
481 struct rds_info_socket sinfo;
482 struct rds_sock *rs;
483 unsigned long flags;
484
485 len /= sizeof(struct rds_info_socket);
486
487 spin_lock_irqsave(&rds_sock_lock, flags);
488
489 if (len < rds_sock_count)
490 goto out;
491
492 list_for_each_entry(rs, &rds_sock_list, rs_item) {
493 sinfo.sndbuf = rds_sk_sndbuf(rs);
494 sinfo.rcvbuf = rds_sk_rcvbuf(rs);
495 sinfo.bound_addr = rs->rs_bound_addr;
496 sinfo.connected_addr = rs->rs_conn_addr;
497 sinfo.bound_port = rs->rs_bound_port;
498 sinfo.connected_port = rs->rs_conn_port;
499 sinfo.inum = sock_i_ino(rds_rs_to_sk(rs));
500
501 rds_info_copy(iter, &sinfo, sizeof(sinfo));
502 }
503
504out:
505 lens->nr = rds_sock_count;
506 lens->each = sizeof(struct rds_info_socket);
507
508 spin_unlock_irqrestore(&rds_sock_lock, flags);
509}
510
511static void __exit rds_exit(void)
512{
513 rds_rdma_exit();
514 sock_unregister(rds_family_ops.family);
515 proto_unregister(&rds_proto);
516 rds_conn_exit();
517 rds_cong_exit();
518 rds_sysctl_exit();
519 rds_threads_exit();
520 rds_stats_exit();
521 rds_page_exit();
522 rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info);
523 rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
524}
525module_exit(rds_exit);
526
527static int __init rds_init(void)
528{
529 int ret;
530
531 ret = rds_conn_init();
532 if (ret)
533 goto out;
534 ret = rds_threads_init();
535 if (ret)
536 goto out_conn;
537 ret = rds_sysctl_init();
538 if (ret)
539 goto out_threads;
540 ret = rds_stats_init();
541 if (ret)
542 goto out_sysctl;
543 ret = proto_register(&rds_proto, 1);
544 if (ret)
545 goto out_stats;
546 ret = sock_register(&rds_family_ops);
547 if (ret)
548 goto out_proto;
549
550 rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info);
551 rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
552
553 /* ib/iwarp transports currently compiled-in */
554 ret = rds_rdma_init();
555 if (ret)
556 goto out_sock;
557 goto out;
558
559out_sock:
560 sock_unregister(rds_family_ops.family);
561out_proto:
562 proto_unregister(&rds_proto);
563out_stats:
564 rds_stats_exit();
565out_sysctl:
566 rds_sysctl_exit();
567out_threads:
568 rds_threads_exit();
569out_conn:
570 rds_conn_exit();
571 rds_cong_exit();
572 rds_page_exit();
573out:
574 return ret;
575}
576module_init(rds_init);
577
578#define DRV_VERSION "4.0"
579#define DRV_RELDATE "Feb 12, 2009"
580
581MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>");
582MODULE_DESCRIPTION("RDS: Reliable Datagram Sockets"
583 " v" DRV_VERSION " (" DRV_RELDATE ")");
584MODULE_VERSION(DRV_VERSION);
585MODULE_LICENSE("Dual BSD/GPL");
586MODULE_ALIAS_NETPROTO(PF_RDS);
diff --git a/net/rds/bind.c b/net/rds/bind.c
new file mode 100644
index 000000000000..c17cc39160ce
--- /dev/null
+++ b/net/rds/bind.c
@@ -0,0 +1,199 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <net/sock.h>
35#include <linux/in.h>
36#include <linux/if_arp.h>
37#include "rds.h"
38
39/*
40 * XXX this probably still needs more work.. no INADDR_ANY, and rbtrees aren't
41 * particularly zippy.
42 *
43 * This is now called for every incoming frame so we arguably care much more
44 * about it than we used to.
45 */
46static DEFINE_SPINLOCK(rds_bind_lock);
47static struct rb_root rds_bind_tree = RB_ROOT;
48
49static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port,
50 struct rds_sock *insert)
51{
52 struct rb_node **p = &rds_bind_tree.rb_node;
53 struct rb_node *parent = NULL;
54 struct rds_sock *rs;
55 u64 cmp;
56 u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
57
58 while (*p) {
59 parent = *p;
60 rs = rb_entry(parent, struct rds_sock, rs_bound_node);
61
62 cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
63 be16_to_cpu(rs->rs_bound_port);
64
65 if (needle < cmp)
66 p = &(*p)->rb_left;
67 else if (needle > cmp)
68 p = &(*p)->rb_right;
69 else
70 return rs;
71 }
72
73 if (insert) {
74 rb_link_node(&insert->rs_bound_node, parent, p);
75 rb_insert_color(&insert->rs_bound_node, &rds_bind_tree);
76 }
77 return NULL;
78}
79
80/*
81 * Return the rds_sock bound at the given local address.
82 *
83 * The rx path can race with rds_release. We notice if rds_release() has
84 * marked this socket and don't return a rs ref to the rx path.
85 */
86struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
87{
88 struct rds_sock *rs;
89 unsigned long flags;
90
91 spin_lock_irqsave(&rds_bind_lock, flags);
92 rs = rds_bind_tree_walk(addr, port, NULL);
93 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
94 rds_sock_addref(rs);
95 else
96 rs = NULL;
97 spin_unlock_irqrestore(&rds_bind_lock, flags);
98
99 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
100 ntohs(port));
101 return rs;
102}
103
104/* returns -ve errno or +ve port */
105static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
106{
107 unsigned long flags;
108 int ret = -EADDRINUSE;
109 u16 rover, last;
110
111 if (*port != 0) {
112 rover = be16_to_cpu(*port);
113 last = rover;
114 } else {
115 rover = max_t(u16, net_random(), 2);
116 last = rover - 1;
117 }
118
119 spin_lock_irqsave(&rds_bind_lock, flags);
120
121 do {
122 if (rover == 0)
123 rover++;
124 if (rds_bind_tree_walk(addr, cpu_to_be16(rover), rs) == NULL) {
125 *port = cpu_to_be16(rover);
126 ret = 0;
127 break;
128 }
129 } while (rover++ != last);
130
131 if (ret == 0) {
132 rs->rs_bound_addr = addr;
133 rs->rs_bound_port = *port;
134 rds_sock_addref(rs);
135
136 rdsdebug("rs %p binding to %pI4:%d\n",
137 rs, &addr, (int)ntohs(*port));
138 }
139
140 spin_unlock_irqrestore(&rds_bind_lock, flags);
141
142 return ret;
143}
144
145void rds_remove_bound(struct rds_sock *rs)
146{
147 unsigned long flags;
148
149 spin_lock_irqsave(&rds_bind_lock, flags);
150
151 if (rs->rs_bound_addr) {
152 rdsdebug("rs %p unbinding from %pI4:%d\n",
153 rs, &rs->rs_bound_addr,
154 ntohs(rs->rs_bound_port));
155
156 rb_erase(&rs->rs_bound_node, &rds_bind_tree);
157 rds_sock_put(rs);
158 rs->rs_bound_addr = 0;
159 }
160
161 spin_unlock_irqrestore(&rds_bind_lock, flags);
162}
163
164int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
165{
166 struct sock *sk = sock->sk;
167 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
168 struct rds_sock *rs = rds_sk_to_rs(sk);
169 struct rds_transport *trans;
170 int ret = 0;
171
172 lock_sock(sk);
173
174 if (addr_len != sizeof(struct sockaddr_in) ||
175 sin->sin_family != AF_INET ||
176 rs->rs_bound_addr ||
177 sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
178 ret = -EINVAL;
179 goto out;
180 }
181
182 ret = rds_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port);
183 if (ret)
184 goto out;
185
186 trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
187 if (trans == NULL) {
188 ret = -EADDRNOTAVAIL;
189 rds_remove_bound(rs);
190 goto out;
191 }
192
193 rs->rs_transport = trans;
194 ret = 0;
195
196out:
197 release_sock(sk);
198 return ret;
199}
diff --git a/net/rds/cong.c b/net/rds/cong.c
new file mode 100644
index 000000000000..710e4599d76c
--- /dev/null
+++ b/net/rds/cong.c
@@ -0,0 +1,404 @@
1/*
2 * Copyright (c) 2007 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/types.h>
34#include <linux/rbtree.h>
35
36#include <asm-generic/bitops/le.h>
37
38#include "rds.h"
39
40/*
41 * This file implements the receive side of the unconventional congestion
42 * management in RDS.
43 *
44 * Messages waiting in the receive queue on the receiving socket are accounted
45 * against the sockets SO_RCVBUF option value. Only the payload bytes in the
46 * message are accounted for. If the number of bytes queued equals or exceeds
47 * rcvbuf then the socket is congested. All sends attempted to this socket's
48 * address should return block or return -EWOULDBLOCK.
49 *
50 * Applications are expected to be reasonably tuned such that this situation
51 * very rarely occurs. An application encountering this "back-pressure" is
52 * considered a bug.
53 *
54 * This is implemented by having each node maintain bitmaps which indicate
55 * which ports on bound addresses are congested. As the bitmap changes it is
56 * sent through all the connections which terminate in the local address of the
57 * bitmap which changed.
58 *
59 * The bitmaps are allocated as connections are brought up. This avoids
60 * allocation in the interrupt handling path which queues messages on sockets.
61 * The dense bitmaps let transports send the entire bitmap on any bitmap change
62 * reasonably efficiently. This is much easier to implement than some
63 * finer-grained communication of per-port congestion. The sender does a very
64 * inexpensive bit test to test if the port it's about to send to is congested
65 * or not.
66 */
67
68/*
69 * Interaction with poll is a tad tricky. We want all processes stuck in
70 * poll to wake up and check whether a congested destination became uncongested.
71 * The really sad thing is we have no idea which destinations the application
72 * wants to send to - we don't even know which rds_connections are involved.
73 * So until we implement a more flexible rds poll interface, we have to make
74 * do with this:
75 * We maintain a global counter that is incremented each time a congestion map
76 * update is received. Each rds socket tracks this value, and if rds_poll
77 * finds that the saved generation number is smaller than the global generation
78 * number, it wakes up the process.
79 */
80static atomic_t rds_cong_generation = ATOMIC_INIT(0);
81
82/*
83 * Congestion monitoring
84 */
85static LIST_HEAD(rds_cong_monitor);
86static DEFINE_RWLOCK(rds_cong_monitor_lock);
87
88/*
89 * Yes, a global lock. It's used so infrequently that it's worth keeping it
90 * global to simplify the locking. It's only used in the following
91 * circumstances:
92 *
93 * - on connection buildup to associate a conn with its maps
94 * - on map changes to inform conns of a new map to send
95 *
96 * It's sadly ordered under the socket callback lock and the connection lock.
97 * Receive paths can mark ports congested from interrupt context so the
98 * lock masks interrupts.
99 */
100static DEFINE_SPINLOCK(rds_cong_lock);
101static struct rb_root rds_cong_tree = RB_ROOT;
102
103static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
104 struct rds_cong_map *insert)
105{
106 struct rb_node **p = &rds_cong_tree.rb_node;
107 struct rb_node *parent = NULL;
108 struct rds_cong_map *map;
109
110 while (*p) {
111 parent = *p;
112 map = rb_entry(parent, struct rds_cong_map, m_rb_node);
113
114 if (addr < map->m_addr)
115 p = &(*p)->rb_left;
116 else if (addr > map->m_addr)
117 p = &(*p)->rb_right;
118 else
119 return map;
120 }
121
122 if (insert) {
123 rb_link_node(&insert->m_rb_node, parent, p);
124 rb_insert_color(&insert->m_rb_node, &rds_cong_tree);
125 }
126 return NULL;
127}
128
129/*
130 * There is only ever one bitmap for any address. Connections try and allocate
131 * these bitmaps in the process getting pointers to them. The bitmaps are only
132 * ever freed as the module is removed after all connections have been freed.
133 */
134static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
135{
136 struct rds_cong_map *map;
137 struct rds_cong_map *ret = NULL;
138 unsigned long zp;
139 unsigned long i;
140 unsigned long flags;
141
142 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL);
143 if (map == NULL)
144 return NULL;
145
146 map->m_addr = addr;
147 init_waitqueue_head(&map->m_waitq);
148 INIT_LIST_HEAD(&map->m_conn_list);
149
150 for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
151 zp = get_zeroed_page(GFP_KERNEL);
152 if (zp == 0)
153 goto out;
154 map->m_page_addrs[i] = zp;
155 }
156
157 spin_lock_irqsave(&rds_cong_lock, flags);
158 ret = rds_cong_tree_walk(addr, map);
159 spin_unlock_irqrestore(&rds_cong_lock, flags);
160
161 if (ret == NULL) {
162 ret = map;
163 map = NULL;
164 }
165
166out:
167 if (map) {
168 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
169 free_page(map->m_page_addrs[i]);
170 kfree(map);
171 }
172
173 rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr));
174
175 return ret;
176}
177
178/*
179 * Put the conn on its local map's list. This is called when the conn is
180 * really added to the hash. It's nested under the rds_conn_lock, sadly.
181 */
182void rds_cong_add_conn(struct rds_connection *conn)
183{
184 unsigned long flags;
185
186 rdsdebug("conn %p now on map %p\n", conn, conn->c_lcong);
187 spin_lock_irqsave(&rds_cong_lock, flags);
188 list_add_tail(&conn->c_map_item, &conn->c_lcong->m_conn_list);
189 spin_unlock_irqrestore(&rds_cong_lock, flags);
190}
191
192void rds_cong_remove_conn(struct rds_connection *conn)
193{
194 unsigned long flags;
195
196 rdsdebug("removing conn %p from map %p\n", conn, conn->c_lcong);
197 spin_lock_irqsave(&rds_cong_lock, flags);
198 list_del_init(&conn->c_map_item);
199 spin_unlock_irqrestore(&rds_cong_lock, flags);
200}
201
202int rds_cong_get_maps(struct rds_connection *conn)
203{
204 conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
205 conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
206
207 if (conn->c_lcong == NULL || conn->c_fcong == NULL)
208 return -ENOMEM;
209
210 return 0;
211}
212
213void rds_cong_queue_updates(struct rds_cong_map *map)
214{
215 struct rds_connection *conn;
216 unsigned long flags;
217
218 spin_lock_irqsave(&rds_cong_lock, flags);
219
220 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
221 if (!test_and_set_bit(0, &conn->c_map_queued)) {
222 rds_stats_inc(s_cong_update_queued);
223 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
224 }
225 }
226
227 spin_unlock_irqrestore(&rds_cong_lock, flags);
228}
229
230void rds_cong_map_updated(struct rds_cong_map *map, uint64_t portmask)
231{
232 rdsdebug("waking map %p for %pI4\n",
233 map, &map->m_addr);
234 rds_stats_inc(s_cong_update_received);
235 atomic_inc(&rds_cong_generation);
236 if (waitqueue_active(&map->m_waitq))
237 wake_up(&map->m_waitq);
238 if (waitqueue_active(&rds_poll_waitq))
239 wake_up_all(&rds_poll_waitq);
240
241 if (portmask && !list_empty(&rds_cong_monitor)) {
242 unsigned long flags;
243 struct rds_sock *rs;
244
245 read_lock_irqsave(&rds_cong_monitor_lock, flags);
246 list_for_each_entry(rs, &rds_cong_monitor, rs_cong_list) {
247 spin_lock(&rs->rs_lock);
248 rs->rs_cong_notify |= (rs->rs_cong_mask & portmask);
249 rs->rs_cong_mask &= ~portmask;
250 spin_unlock(&rs->rs_lock);
251 if (rs->rs_cong_notify)
252 rds_wake_sk_sleep(rs);
253 }
254 read_unlock_irqrestore(&rds_cong_monitor_lock, flags);
255 }
256}
257
258int rds_cong_updated_since(unsigned long *recent)
259{
260 unsigned long gen = atomic_read(&rds_cong_generation);
261
262 if (likely(*recent == gen))
263 return 0;
264 *recent = gen;
265 return 1;
266}
267
268/*
269 * We're called under the locking that protects the sockets receive buffer
270 * consumption. This makes it a lot easier for the caller to only call us
271 * when it knows that an existing set bit needs to be cleared, and vice versa.
272 * We can't block and we need to deal with concurrent sockets working against
273 * the same per-address map.
274 */
275void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
276{
277 unsigned long i;
278 unsigned long off;
279
280 rdsdebug("setting congestion for %pI4:%u in map %p\n",
281 &map->m_addr, ntohs(port), map);
282
283 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
284 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
285
286 generic___set_le_bit(off, (void *)map->m_page_addrs[i]);
287}
288
289void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
290{
291 unsigned long i;
292 unsigned long off;
293
294 rdsdebug("clearing congestion for %pI4:%u in map %p\n",
295 &map->m_addr, ntohs(port), map);
296
297 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
298 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
299
300 generic___clear_le_bit(off, (void *)map->m_page_addrs[i]);
301}
302
303static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
304{
305 unsigned long i;
306 unsigned long off;
307
308 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
309 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
310
311 return generic_test_le_bit(off, (void *)map->m_page_addrs[i]);
312}
313
314void rds_cong_add_socket(struct rds_sock *rs)
315{
316 unsigned long flags;
317
318 write_lock_irqsave(&rds_cong_monitor_lock, flags);
319 if (list_empty(&rs->rs_cong_list))
320 list_add(&rs->rs_cong_list, &rds_cong_monitor);
321 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
322}
323
324void rds_cong_remove_socket(struct rds_sock *rs)
325{
326 unsigned long flags;
327 struct rds_cong_map *map;
328
329 write_lock_irqsave(&rds_cong_monitor_lock, flags);
330 list_del_init(&rs->rs_cong_list);
331 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
332
333 /* update congestion map for now-closed port */
334 spin_lock_irqsave(&rds_cong_lock, flags);
335 map = rds_cong_tree_walk(rs->rs_bound_addr, NULL);
336 spin_unlock_irqrestore(&rds_cong_lock, flags);
337
338 if (map && rds_cong_test_bit(map, rs->rs_bound_port)) {
339 rds_cong_clear_bit(map, rs->rs_bound_port);
340 rds_cong_queue_updates(map);
341 }
342}
343
344int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock,
345 struct rds_sock *rs)
346{
347 if (!rds_cong_test_bit(map, port))
348 return 0;
349 if (nonblock) {
350 if (rs && rs->rs_cong_monitor) {
351 unsigned long flags;
352
353 /* It would have been nice to have an atomic set_bit on
354 * a uint64_t. */
355 spin_lock_irqsave(&rs->rs_lock, flags);
356 rs->rs_cong_mask |= RDS_CONG_MONITOR_MASK(ntohs(port));
357 spin_unlock_irqrestore(&rs->rs_lock, flags);
358
359 /* Test again - a congestion update may have arrived in
360 * the meantime. */
361 if (!rds_cong_test_bit(map, port))
362 return 0;
363 }
364 rds_stats_inc(s_cong_send_error);
365 return -ENOBUFS;
366 }
367
368 rds_stats_inc(s_cong_send_blocked);
369 rdsdebug("waiting on map %p for port %u\n", map, be16_to_cpu(port));
370
371 return wait_event_interruptible(map->m_waitq,
372 !rds_cong_test_bit(map, port));
373}
374
375void rds_cong_exit(void)
376{
377 struct rb_node *node;
378 struct rds_cong_map *map;
379 unsigned long i;
380
381 while ((node = rb_first(&rds_cong_tree))) {
382 map = rb_entry(node, struct rds_cong_map, m_rb_node);
383 rdsdebug("freeing map %p\n", map);
384 rb_erase(&map->m_rb_node, &rds_cong_tree);
385 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
386 free_page(map->m_page_addrs[i]);
387 kfree(map);
388 }
389}
390
391/*
392 * Allocate a RDS message containing a congestion update.
393 */
394struct rds_message *rds_cong_update_alloc(struct rds_connection *conn)
395{
396 struct rds_cong_map *map = conn->c_lcong;
397 struct rds_message *rm;
398
399 rm = rds_message_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES);
400 if (!IS_ERR(rm))
401 rm->m_inc.i_hdr.h_flags = RDS_FLAG_CONG_BITMAP;
402
403 return rm;
404}
diff --git a/net/rds/connection.c b/net/rds/connection.c
new file mode 100644
index 000000000000..273f064930a8
--- /dev/null
+++ b/net/rds/connection.c
@@ -0,0 +1,487 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/list.h>
35#include <net/inet_hashtables.h>
36
37#include "rds.h"
38#include "loop.h"
39#include "rdma.h"
40
41#define RDS_CONNECTION_HASH_BITS 12
42#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS)
43#define RDS_CONNECTION_HASH_MASK (RDS_CONNECTION_HASH_ENTRIES - 1)
44
45/* converting this to RCU is a chore for another day.. */
46static DEFINE_SPINLOCK(rds_conn_lock);
47static unsigned long rds_conn_count;
48static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES];
49static struct kmem_cache *rds_conn_slab;
50
51static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
52{
53 /* Pass NULL, don't need struct net for hash */
54 unsigned long hash = inet_ehashfn(NULL,
55 be32_to_cpu(laddr), 0,
56 be32_to_cpu(faddr), 0);
57 return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK];
58}
59
60#define rds_conn_info_set(var, test, suffix) do { \
61 if (test) \
62 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \
63} while (0)
64
65static inline int rds_conn_is_sending(struct rds_connection *conn)
66{
67 int ret = 0;
68
69 if (!mutex_trylock(&conn->c_send_lock))
70 ret = 1;
71 else
72 mutex_unlock(&conn->c_send_lock);
73
74 return ret;
75}
76
77static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
78 __be32 laddr, __be32 faddr,
79 struct rds_transport *trans)
80{
81 struct rds_connection *conn, *ret = NULL;
82 struct hlist_node *pos;
83
84 hlist_for_each_entry(conn, pos, head, c_hash_node) {
85 if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
86 conn->c_trans == trans) {
87 ret = conn;
88 break;
89 }
90 }
91 rdsdebug("returning conn %p for %pI4 -> %pI4\n", ret,
92 &laddr, &faddr);
93 return ret;
94}
95
96/*
97 * This is called by transports as they're bringing down a connection.
98 * It clears partial message state so that the transport can start sending
99 * and receiving over this connection again in the future. It is up to
100 * the transport to have serialized this call with its send and recv.
101 */
102void rds_conn_reset(struct rds_connection *conn)
103{
104 rdsdebug("connection %pI4 to %pI4 reset\n",
105 &conn->c_laddr, &conn->c_faddr);
106
107 rds_stats_inc(s_conn_reset);
108 rds_send_reset(conn);
109 conn->c_flags = 0;
110
111 /* Do not clear next_rx_seq here, else we cannot distinguish
112 * retransmitted packets from new packets, and will hand all
113 * of them to the application. That is not consistent with the
114 * reliability guarantees of RDS. */
115}
116
117/*
118 * There is only every one 'conn' for a given pair of addresses in the
119 * system at a time. They contain messages to be retransmitted and so
120 * span the lifetime of the actual underlying transport connections.
121 *
122 * For now they are not garbage collected once they're created. They
123 * are torn down as the module is removed, if ever.
124 */
125static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
126 struct rds_transport *trans, gfp_t gfp,
127 int is_outgoing)
128{
129 struct rds_connection *conn, *tmp, *parent = NULL;
130 struct hlist_head *head = rds_conn_bucket(laddr, faddr);
131 unsigned long flags;
132 int ret;
133
134 spin_lock_irqsave(&rds_conn_lock, flags);
135 conn = rds_conn_lookup(head, laddr, faddr, trans);
136 if (conn
137 && conn->c_loopback
138 && conn->c_trans != &rds_loop_transport
139 && !is_outgoing) {
140 /* This is a looped back IB connection, and we're
141 * called by the code handling the incoming connect.
142 * We need a second connection object into which we
143 * can stick the other QP. */
144 parent = conn;
145 conn = parent->c_passive;
146 }
147 spin_unlock_irqrestore(&rds_conn_lock, flags);
148 if (conn)
149 goto out;
150
151 conn = kmem_cache_alloc(rds_conn_slab, gfp);
152 if (conn == NULL) {
153 conn = ERR_PTR(-ENOMEM);
154 goto out;
155 }
156
157 memset(conn, 0, sizeof(*conn));
158
159 INIT_HLIST_NODE(&conn->c_hash_node);
160 conn->c_version = RDS_PROTOCOL_3_0;
161 conn->c_laddr = laddr;
162 conn->c_faddr = faddr;
163 spin_lock_init(&conn->c_lock);
164 conn->c_next_tx_seq = 1;
165
166 mutex_init(&conn->c_send_lock);
167 INIT_LIST_HEAD(&conn->c_send_queue);
168 INIT_LIST_HEAD(&conn->c_retrans);
169
170 ret = rds_cong_get_maps(conn);
171 if (ret) {
172 kmem_cache_free(rds_conn_slab, conn);
173 conn = ERR_PTR(ret);
174 goto out;
175 }
176
177 /*
178 * This is where a connection becomes loopback. If *any* RDS sockets
179 * can bind to the destination address then we'd rather the messages
180 * flow through loopback rather than either transport.
181 */
182 if (rds_trans_get_preferred(faddr)) {
183 conn->c_loopback = 1;
184 if (is_outgoing && trans->t_prefer_loopback) {
185 /* "outgoing" connection - and the transport
186 * says it wants the connection handled by the
187 * loopback transport. This is what TCP does.
188 */
189 trans = &rds_loop_transport;
190 }
191 }
192
193 conn->c_trans = trans;
194
195 ret = trans->conn_alloc(conn, gfp);
196 if (ret) {
197 kmem_cache_free(rds_conn_slab, conn);
198 conn = ERR_PTR(ret);
199 goto out;
200 }
201
202 atomic_set(&conn->c_state, RDS_CONN_DOWN);
203 conn->c_reconnect_jiffies = 0;
204 INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
205 INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
206 INIT_DELAYED_WORK(&conn->c_conn_w, rds_connect_worker);
207 INIT_WORK(&conn->c_down_w, rds_shutdown_worker);
208 mutex_init(&conn->c_cm_lock);
209 conn->c_flags = 0;
210
211 rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
212 conn, &laddr, &faddr,
213 trans->t_name ? trans->t_name : "[unknown]",
214 is_outgoing ? "(outgoing)" : "");
215
216 spin_lock_irqsave(&rds_conn_lock, flags);
217 if (parent == NULL) {
218 tmp = rds_conn_lookup(head, laddr, faddr, trans);
219 if (tmp == NULL)
220 hlist_add_head(&conn->c_hash_node, head);
221 } else {
222 tmp = parent->c_passive;
223 if (!tmp)
224 parent->c_passive = conn;
225 }
226
227 if (tmp) {
228 trans->conn_free(conn->c_transport_data);
229 kmem_cache_free(rds_conn_slab, conn);
230 conn = tmp;
231 } else {
232 rds_cong_add_conn(conn);
233 rds_conn_count++;
234 }
235
236 spin_unlock_irqrestore(&rds_conn_lock, flags);
237
238out:
239 return conn;
240}
241
242struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
243 struct rds_transport *trans, gfp_t gfp)
244{
245 return __rds_conn_create(laddr, faddr, trans, gfp, 0);
246}
247
248struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
249 struct rds_transport *trans, gfp_t gfp)
250{
251 return __rds_conn_create(laddr, faddr, trans, gfp, 1);
252}
253
254void rds_conn_destroy(struct rds_connection *conn)
255{
256 struct rds_message *rm, *rtmp;
257
258 rdsdebug("freeing conn %p for %pI4 -> "
259 "%pI4\n", conn, &conn->c_laddr,
260 &conn->c_faddr);
261
262 hlist_del_init(&conn->c_hash_node);
263
264 /* wait for the rds thread to shut it down */
265 atomic_set(&conn->c_state, RDS_CONN_ERROR);
266 cancel_delayed_work(&conn->c_conn_w);
267 queue_work(rds_wq, &conn->c_down_w);
268 flush_workqueue(rds_wq);
269
270 /* tear down queued messages */
271 list_for_each_entry_safe(rm, rtmp,
272 &conn->c_send_queue,
273 m_conn_item) {
274 list_del_init(&rm->m_conn_item);
275 BUG_ON(!list_empty(&rm->m_sock_item));
276 rds_message_put(rm);
277 }
278 if (conn->c_xmit_rm)
279 rds_message_put(conn->c_xmit_rm);
280
281 conn->c_trans->conn_free(conn->c_transport_data);
282
283 /*
284 * The congestion maps aren't freed up here. They're
285 * freed by rds_cong_exit() after all the connections
286 * have been freed.
287 */
288 rds_cong_remove_conn(conn);
289
290 BUG_ON(!list_empty(&conn->c_retrans));
291 kmem_cache_free(rds_conn_slab, conn);
292
293 rds_conn_count--;
294}
295
296static void rds_conn_message_info(struct socket *sock, unsigned int len,
297 struct rds_info_iterator *iter,
298 struct rds_info_lengths *lens,
299 int want_send)
300{
301 struct hlist_head *head;
302 struct hlist_node *pos;
303 struct list_head *list;
304 struct rds_connection *conn;
305 struct rds_message *rm;
306 unsigned long flags;
307 unsigned int total = 0;
308 size_t i;
309
310 len /= sizeof(struct rds_info_message);
311
312 spin_lock_irqsave(&rds_conn_lock, flags);
313
314 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
315 i++, head++) {
316 hlist_for_each_entry(conn, pos, head, c_hash_node) {
317 if (want_send)
318 list = &conn->c_send_queue;
319 else
320 list = &conn->c_retrans;
321
322 spin_lock(&conn->c_lock);
323
324 /* XXX too lazy to maintain counts.. */
325 list_for_each_entry(rm, list, m_conn_item) {
326 total++;
327 if (total <= len)
328 rds_inc_info_copy(&rm->m_inc, iter,
329 conn->c_laddr,
330 conn->c_faddr, 0);
331 }
332
333 spin_unlock(&conn->c_lock);
334 }
335 }
336
337 spin_unlock_irqrestore(&rds_conn_lock, flags);
338
339 lens->nr = total;
340 lens->each = sizeof(struct rds_info_message);
341}
342
343static void rds_conn_message_info_send(struct socket *sock, unsigned int len,
344 struct rds_info_iterator *iter,
345 struct rds_info_lengths *lens)
346{
347 rds_conn_message_info(sock, len, iter, lens, 1);
348}
349
350static void rds_conn_message_info_retrans(struct socket *sock,
351 unsigned int len,
352 struct rds_info_iterator *iter,
353 struct rds_info_lengths *lens)
354{
355 rds_conn_message_info(sock, len, iter, lens, 0);
356}
357
358void rds_for_each_conn_info(struct socket *sock, unsigned int len,
359 struct rds_info_iterator *iter,
360 struct rds_info_lengths *lens,
361 int (*visitor)(struct rds_connection *, void *),
362 size_t item_len)
363{
364 uint64_t buffer[(item_len + 7) / 8];
365 struct hlist_head *head;
366 struct hlist_node *pos;
367 struct hlist_node *tmp;
368 struct rds_connection *conn;
369 unsigned long flags;
370 size_t i;
371
372 spin_lock_irqsave(&rds_conn_lock, flags);
373
374 lens->nr = 0;
375 lens->each = item_len;
376
377 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
378 i++, head++) {
379 hlist_for_each_entry_safe(conn, pos, tmp, head, c_hash_node) {
380
381 /* XXX no c_lock usage.. */
382 if (!visitor(conn, buffer))
383 continue;
384
385 /* We copy as much as we can fit in the buffer,
386 * but we count all items so that the caller
387 * can resize the buffer. */
388 if (len >= item_len) {
389 rds_info_copy(iter, buffer, item_len);
390 len -= item_len;
391 }
392 lens->nr++;
393 }
394 }
395
396 spin_unlock_irqrestore(&rds_conn_lock, flags);
397}
398
399static int rds_conn_info_visitor(struct rds_connection *conn,
400 void *buffer)
401{
402 struct rds_info_connection *cinfo = buffer;
403
404 cinfo->next_tx_seq = conn->c_next_tx_seq;
405 cinfo->next_rx_seq = conn->c_next_rx_seq;
406 cinfo->laddr = conn->c_laddr;
407 cinfo->faddr = conn->c_faddr;
408 strncpy(cinfo->transport, conn->c_trans->t_name,
409 sizeof(cinfo->transport));
410 cinfo->flags = 0;
411
412 rds_conn_info_set(cinfo->flags,
413 rds_conn_is_sending(conn), SENDING);
414 /* XXX Future: return the state rather than these funky bits */
415 rds_conn_info_set(cinfo->flags,
416 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING,
417 CONNECTING);
418 rds_conn_info_set(cinfo->flags,
419 atomic_read(&conn->c_state) == RDS_CONN_UP,
420 CONNECTED);
421 return 1;
422}
423
424static void rds_conn_info(struct socket *sock, unsigned int len,
425 struct rds_info_iterator *iter,
426 struct rds_info_lengths *lens)
427{
428 rds_for_each_conn_info(sock, len, iter, lens,
429 rds_conn_info_visitor,
430 sizeof(struct rds_info_connection));
431}
432
433int __init rds_conn_init(void)
434{
435 rds_conn_slab = kmem_cache_create("rds_connection",
436 sizeof(struct rds_connection),
437 0, 0, NULL);
438 if (rds_conn_slab == NULL)
439 return -ENOMEM;
440
441 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info);
442 rds_info_register_func(RDS_INFO_SEND_MESSAGES,
443 rds_conn_message_info_send);
444 rds_info_register_func(RDS_INFO_RETRANS_MESSAGES,
445 rds_conn_message_info_retrans);
446
447 return 0;
448}
449
450void rds_conn_exit(void)
451{
452 rds_loop_exit();
453
454 WARN_ON(!hlist_empty(rds_conn_hash));
455
456 kmem_cache_destroy(rds_conn_slab);
457
458 rds_info_deregister_func(RDS_INFO_CONNECTIONS, rds_conn_info);
459 rds_info_deregister_func(RDS_INFO_SEND_MESSAGES,
460 rds_conn_message_info_send);
461 rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES,
462 rds_conn_message_info_retrans);
463}
464
465/*
466 * Force a disconnect
467 */
468void rds_conn_drop(struct rds_connection *conn)
469{
470 atomic_set(&conn->c_state, RDS_CONN_ERROR);
471 queue_work(rds_wq, &conn->c_down_w);
472}
473
474/*
475 * An error occurred on the connection
476 */
477void
478__rds_conn_error(struct rds_connection *conn, const char *fmt, ...)
479{
480 va_list ap;
481
482 va_start(ap, fmt);
483 vprintk(fmt, ap);
484 va_end(ap);
485
486 rds_conn_drop(conn);
487}
diff --git a/net/rds/ib.c b/net/rds/ib.c
new file mode 100644
index 000000000000..06a7b798d9a7
--- /dev/null
+++ b/net/rds/ib.c
@@ -0,0 +1,323 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/if.h>
36#include <linux/netdevice.h>
37#include <linux/inetdevice.h>
38#include <linux/if_arp.h>
39#include <linux/delay.h>
40
41#include "rds.h"
42#include "ib.h"
43
44unsigned int fmr_pool_size = RDS_FMR_POOL_SIZE;
45unsigned int fmr_message_size = RDS_FMR_SIZE + 1; /* +1 allows for unaligned MRs */
46
47module_param(fmr_pool_size, int, 0444);
48MODULE_PARM_DESC(fmr_pool_size, " Max number of fmr per HCA");
49module_param(fmr_message_size, int, 0444);
50MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
51
52struct list_head rds_ib_devices;
53
54DEFINE_SPINLOCK(ib_nodev_conns_lock);
55LIST_HEAD(ib_nodev_conns);
56
57void rds_ib_add_one(struct ib_device *device)
58{
59 struct rds_ib_device *rds_ibdev;
60 struct ib_device_attr *dev_attr;
61
62 /* Only handle IB (no iWARP) devices */
63 if (device->node_type != RDMA_NODE_IB_CA)
64 return;
65
66 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
67 if (!dev_attr)
68 return;
69
70 if (ib_query_device(device, dev_attr)) {
71 rdsdebug("Query device failed for %s\n", device->name);
72 goto free_attr;
73 }
74
75 rds_ibdev = kmalloc(sizeof *rds_ibdev, GFP_KERNEL);
76 if (!rds_ibdev)
77 goto free_attr;
78
79 spin_lock_init(&rds_ibdev->spinlock);
80
81 rds_ibdev->max_wrs = dev_attr->max_qp_wr;
82 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
83
84 rds_ibdev->fmr_page_shift = max(9, ffs(dev_attr->page_size_cap) - 1);
85 rds_ibdev->fmr_page_size = 1 << rds_ibdev->fmr_page_shift;
86 rds_ibdev->fmr_page_mask = ~((u64) rds_ibdev->fmr_page_size - 1);
87 rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32;
88 rds_ibdev->max_fmrs = dev_attr->max_fmr ?
89 min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) :
90 fmr_pool_size;
91
92 rds_ibdev->dev = device;
93 rds_ibdev->pd = ib_alloc_pd(device);
94 if (IS_ERR(rds_ibdev->pd))
95 goto free_dev;
96
97 rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd,
98 IB_ACCESS_LOCAL_WRITE);
99 if (IS_ERR(rds_ibdev->mr))
100 goto err_pd;
101
102 rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev);
103 if (IS_ERR(rds_ibdev->mr_pool)) {
104 rds_ibdev->mr_pool = NULL;
105 goto err_mr;
106 }
107
108 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
109 INIT_LIST_HEAD(&rds_ibdev->conn_list);
110 list_add_tail(&rds_ibdev->list, &rds_ib_devices);
111
112 ib_set_client_data(device, &rds_ib_client, rds_ibdev);
113
114 goto free_attr;
115
116err_mr:
117 ib_dereg_mr(rds_ibdev->mr);
118err_pd:
119 ib_dealloc_pd(rds_ibdev->pd);
120free_dev:
121 kfree(rds_ibdev);
122free_attr:
123 kfree(dev_attr);
124}
125
126void rds_ib_remove_one(struct ib_device *device)
127{
128 struct rds_ib_device *rds_ibdev;
129 struct rds_ib_ipaddr *i_ipaddr, *i_next;
130
131 rds_ibdev = ib_get_client_data(device, &rds_ib_client);
132 if (!rds_ibdev)
133 return;
134
135 list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
136 list_del(&i_ipaddr->list);
137 kfree(i_ipaddr);
138 }
139
140 rds_ib_remove_conns(rds_ibdev);
141
142 if (rds_ibdev->mr_pool)
143 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
144
145 ib_dereg_mr(rds_ibdev->mr);
146
147 while (ib_dealloc_pd(rds_ibdev->pd)) {
148 rdsdebug("Failed to dealloc pd %p\n", rds_ibdev->pd);
149 msleep(1);
150 }
151
152 list_del(&rds_ibdev->list);
153 kfree(rds_ibdev);
154}
155
156struct ib_client rds_ib_client = {
157 .name = "rds_ib",
158 .add = rds_ib_add_one,
159 .remove = rds_ib_remove_one
160};
161
162static int rds_ib_conn_info_visitor(struct rds_connection *conn,
163 void *buffer)
164{
165 struct rds_info_rdma_connection *iinfo = buffer;
166 struct rds_ib_connection *ic;
167
168 /* We will only ever look at IB transports */
169 if (conn->c_trans != &rds_ib_transport)
170 return 0;
171
172 iinfo->src_addr = conn->c_laddr;
173 iinfo->dst_addr = conn->c_faddr;
174
175 memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
176 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
177 if (rds_conn_state(conn) == RDS_CONN_UP) {
178 struct rds_ib_device *rds_ibdev;
179 struct rdma_dev_addr *dev_addr;
180
181 ic = conn->c_transport_data;
182 dev_addr = &ic->i_cm_id->route.addr.dev_addr;
183
184 ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
185 ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
186
187 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
188 iinfo->max_send_wr = ic->i_send_ring.w_nr;
189 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
190 iinfo->max_send_sge = rds_ibdev->max_sge;
191 rds_ib_get_mr_info(rds_ibdev, iinfo);
192 }
193 return 1;
194}
195
196static void rds_ib_ic_info(struct socket *sock, unsigned int len,
197 struct rds_info_iterator *iter,
198 struct rds_info_lengths *lens)
199{
200 rds_for_each_conn_info(sock, len, iter, lens,
201 rds_ib_conn_info_visitor,
202 sizeof(struct rds_info_rdma_connection));
203}
204
205
206/*
207 * Early RDS/IB was built to only bind to an address if there is an IPoIB
208 * device with that address set.
209 *
210 * If it were me, I'd advocate for something more flexible. Sending and
211 * receiving should be device-agnostic. Transports would try and maintain
212 * connections between peers who have messages queued. Userspace would be
213 * allowed to influence which paths have priority. We could call userspace
214 * asserting this policy "routing".
215 */
216static int rds_ib_laddr_check(__be32 addr)
217{
218 int ret;
219 struct rdma_cm_id *cm_id;
220 struct sockaddr_in sin;
221
222 /* Create a CMA ID and try to bind it. This catches both
223 * IB and iWARP capable NICs.
224 */
225 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP);
226 if (!cm_id)
227 return -EADDRNOTAVAIL;
228
229 memset(&sin, 0, sizeof(sin));
230 sin.sin_family = AF_INET;
231 sin.sin_addr.s_addr = addr;
232
233 /* rdma_bind_addr will only succeed for IB & iWARP devices */
234 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
235 /* due to this, we will claim to support iWARP devices unless we
236 check node_type. */
237 if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA)
238 ret = -EADDRNOTAVAIL;
239
240 rdsdebug("addr %pI4 ret %d node type %d\n",
241 &addr, ret,
242 cm_id->device ? cm_id->device->node_type : -1);
243
244 rdma_destroy_id(cm_id);
245
246 return ret;
247}
248
249void rds_ib_exit(void)
250{
251 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
252 rds_ib_remove_nodev_conns();
253 ib_unregister_client(&rds_ib_client);
254 rds_ib_sysctl_exit();
255 rds_ib_recv_exit();
256 rds_trans_unregister(&rds_ib_transport);
257}
258
259struct rds_transport rds_ib_transport = {
260 .laddr_check = rds_ib_laddr_check,
261 .xmit_complete = rds_ib_xmit_complete,
262 .xmit = rds_ib_xmit,
263 .xmit_cong_map = NULL,
264 .xmit_rdma = rds_ib_xmit_rdma,
265 .recv = rds_ib_recv,
266 .conn_alloc = rds_ib_conn_alloc,
267 .conn_free = rds_ib_conn_free,
268 .conn_connect = rds_ib_conn_connect,
269 .conn_shutdown = rds_ib_conn_shutdown,
270 .inc_copy_to_user = rds_ib_inc_copy_to_user,
271 .inc_purge = rds_ib_inc_purge,
272 .inc_free = rds_ib_inc_free,
273 .cm_initiate_connect = rds_ib_cm_initiate_connect,
274 .cm_handle_connect = rds_ib_cm_handle_connect,
275 .cm_connect_complete = rds_ib_cm_connect_complete,
276 .stats_info_copy = rds_ib_stats_info_copy,
277 .exit = rds_ib_exit,
278 .get_mr = rds_ib_get_mr,
279 .sync_mr = rds_ib_sync_mr,
280 .free_mr = rds_ib_free_mr,
281 .flush_mrs = rds_ib_flush_mrs,
282 .t_owner = THIS_MODULE,
283 .t_name = "infiniband",
284};
285
286int __init rds_ib_init(void)
287{
288 int ret;
289
290 INIT_LIST_HEAD(&rds_ib_devices);
291
292 ret = ib_register_client(&rds_ib_client);
293 if (ret)
294 goto out;
295
296 ret = rds_ib_sysctl_init();
297 if (ret)
298 goto out_ibreg;
299
300 ret = rds_ib_recv_init();
301 if (ret)
302 goto out_sysctl;
303
304 ret = rds_trans_register(&rds_ib_transport);
305 if (ret)
306 goto out_recv;
307
308 rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
309
310 goto out;
311
312out_recv:
313 rds_ib_recv_exit();
314out_sysctl:
315 rds_ib_sysctl_exit();
316out_ibreg:
317 ib_unregister_client(&rds_ib_client);
318out:
319 return ret;
320}
321
322MODULE_LICENSE("GPL");
323
diff --git a/net/rds/ib.h b/net/rds/ib.h
new file mode 100644
index 000000000000..8be563a1363a
--- /dev/null
+++ b/net/rds/ib.h
@@ -0,0 +1,367 @@
1#ifndef _RDS_IB_H
2#define _RDS_IB_H
3
4#include <rdma/ib_verbs.h>
5#include <rdma/rdma_cm.h>
6#include "rds.h"
7#include "rdma_transport.h"
8
9#define RDS_FMR_SIZE 256
10#define RDS_FMR_POOL_SIZE 4096
11
12#define RDS_IB_MAX_SGE 8
13#define RDS_IB_RECV_SGE 2
14
15#define RDS_IB_DEFAULT_RECV_WR 1024
16#define RDS_IB_DEFAULT_SEND_WR 256
17
18#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
19
20extern struct list_head rds_ib_devices;
21
22/*
23 * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to
24 * try and minimize the amount of memory tied up both the device and
25 * socket receive queues.
26 */
27/* page offset of the final full frag that fits in the page */
28#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE)
29struct rds_page_frag {
30 struct list_head f_item;
31 struct page *f_page;
32 unsigned long f_offset;
33 dma_addr_t f_mapped;
34};
35
36struct rds_ib_incoming {
37 struct list_head ii_frags;
38 struct rds_incoming ii_inc;
39};
40
41struct rds_ib_connect_private {
42 /* Add new fields at the end, and don't permute existing fields. */
43 __be32 dp_saddr;
44 __be32 dp_daddr;
45 u8 dp_protocol_major;
46 u8 dp_protocol_minor;
47 __be16 dp_protocol_minor_mask; /* bitmask */
48 __be32 dp_reserved1;
49 __be64 dp_ack_seq;
50 __be32 dp_credit; /* non-zero enables flow ctl */
51};
52
53struct rds_ib_send_work {
54 struct rds_message *s_rm;
55 struct rds_rdma_op *s_op;
56 struct ib_send_wr s_wr;
57 struct ib_sge s_sge[RDS_IB_MAX_SGE];
58 unsigned long s_queued;
59};
60
61struct rds_ib_recv_work {
62 struct rds_ib_incoming *r_ibinc;
63 struct rds_page_frag *r_frag;
64 struct ib_recv_wr r_wr;
65 struct ib_sge r_sge[2];
66};
67
68struct rds_ib_work_ring {
69 u32 w_nr;
70 u32 w_alloc_ptr;
71 u32 w_alloc_ctr;
72 u32 w_free_ptr;
73 atomic_t w_free_ctr;
74};
75
76struct rds_ib_device;
77
78struct rds_ib_connection {
79
80 struct list_head ib_node;
81 struct rds_ib_device *rds_ibdev;
82 struct rds_connection *conn;
83
84 /* alphabet soup, IBTA style */
85 struct rdma_cm_id *i_cm_id;
86 struct ib_pd *i_pd;
87 struct ib_mr *i_mr;
88 struct ib_cq *i_send_cq;
89 struct ib_cq *i_recv_cq;
90
91 /* tx */
92 struct rds_ib_work_ring i_send_ring;
93 struct rds_message *i_rm;
94 struct rds_header *i_send_hdrs;
95 u64 i_send_hdrs_dma;
96 struct rds_ib_send_work *i_sends;
97
98 /* rx */
99 struct mutex i_recv_mutex;
100 struct rds_ib_work_ring i_recv_ring;
101 struct rds_ib_incoming *i_ibinc;
102 u32 i_recv_data_rem;
103 struct rds_header *i_recv_hdrs;
104 u64 i_recv_hdrs_dma;
105 struct rds_ib_recv_work *i_recvs;
106 struct rds_page_frag i_frag;
107 u64 i_ack_recv; /* last ACK received */
108
109 /* sending acks */
110 unsigned long i_ack_flags;
111 u64 i_ack_next; /* next ACK to send */
112 struct rds_header *i_ack;
113 struct ib_send_wr i_ack_wr;
114 struct ib_sge i_ack_sge;
115 u64 i_ack_dma;
116 unsigned long i_ack_queued;
117
118 /* Flow control related information
119 *
120 * Our algorithm uses a pair variables that we need to access
121 * atomically - one for the send credits, and one posted
122 * recv credits we need to transfer to remote.
123 * Rather than protect them using a slow spinlock, we put both into
124 * a single atomic_t and update it using cmpxchg
125 */
126 atomic_t i_credits;
127
128 /* Protocol version specific information */
129 unsigned int i_flowctl:1; /* enable/disable flow ctl */
130
131 /* Batched completions */
132 unsigned int i_unsignaled_wrs;
133 long i_unsignaled_bytes;
134};
135
136/* This assumes that atomic_t is at least 32 bits */
137#define IB_GET_SEND_CREDITS(v) ((v) & 0xffff)
138#define IB_GET_POST_CREDITS(v) ((v) >> 16)
139#define IB_SET_SEND_CREDITS(v) ((v) & 0xffff)
140#define IB_SET_POST_CREDITS(v) ((v) << 16)
141
142struct rds_ib_ipaddr {
143 struct list_head list;
144 __be32 ipaddr;
145};
146
147struct rds_ib_device {
148 struct list_head list;
149 struct list_head ipaddr_list;
150 struct list_head conn_list;
151 struct ib_device *dev;
152 struct ib_pd *pd;
153 struct ib_mr *mr;
154 struct rds_ib_mr_pool *mr_pool;
155 int fmr_page_shift;
156 int fmr_page_size;
157 u64 fmr_page_mask;
158 unsigned int fmr_max_remaps;
159 unsigned int max_fmrs;
160 int max_sge;
161 unsigned int max_wrs;
162 spinlock_t spinlock; /* protect the above */
163};
164
165/* bits for i_ack_flags */
166#define IB_ACK_IN_FLIGHT 0
167#define IB_ACK_REQUESTED 1
168
169/* Magic WR_ID for ACKs */
170#define RDS_IB_ACK_WR_ID (~(u64) 0)
171
172struct rds_ib_statistics {
173 uint64_t s_ib_connect_raced;
174 uint64_t s_ib_listen_closed_stale;
175 uint64_t s_ib_tx_cq_call;
176 uint64_t s_ib_tx_cq_event;
177 uint64_t s_ib_tx_ring_full;
178 uint64_t s_ib_tx_throttle;
179 uint64_t s_ib_tx_sg_mapping_failure;
180 uint64_t s_ib_tx_stalled;
181 uint64_t s_ib_tx_credit_updates;
182 uint64_t s_ib_rx_cq_call;
183 uint64_t s_ib_rx_cq_event;
184 uint64_t s_ib_rx_ring_empty;
185 uint64_t s_ib_rx_refill_from_cq;
186 uint64_t s_ib_rx_refill_from_thread;
187 uint64_t s_ib_rx_alloc_limit;
188 uint64_t s_ib_rx_credit_updates;
189 uint64_t s_ib_ack_sent;
190 uint64_t s_ib_ack_send_failure;
191 uint64_t s_ib_ack_send_delayed;
192 uint64_t s_ib_ack_send_piggybacked;
193 uint64_t s_ib_ack_received;
194 uint64_t s_ib_rdma_mr_alloc;
195 uint64_t s_ib_rdma_mr_free;
196 uint64_t s_ib_rdma_mr_used;
197 uint64_t s_ib_rdma_mr_pool_flush;
198 uint64_t s_ib_rdma_mr_pool_wait;
199 uint64_t s_ib_rdma_mr_pool_depleted;
200};
201
202extern struct workqueue_struct *rds_ib_wq;
203
204/*
205 * Fake ib_dma_sync_sg_for_{cpu,device} as long as ib_verbs.h
206 * doesn't define it.
207 */
208static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev,
209 struct scatterlist *sg, unsigned int sg_dma_len, int direction)
210{
211 unsigned int i;
212
213 for (i = 0; i < sg_dma_len; ++i) {
214 ib_dma_sync_single_for_cpu(dev,
215 ib_sg_dma_address(dev, &sg[i]),
216 ib_sg_dma_len(dev, &sg[i]),
217 direction);
218 }
219}
220#define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu
221
222static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
223 struct scatterlist *sg, unsigned int sg_dma_len, int direction)
224{
225 unsigned int i;
226
227 for (i = 0; i < sg_dma_len; ++i) {
228 ib_dma_sync_single_for_device(dev,
229 ib_sg_dma_address(dev, &sg[i]),
230 ib_sg_dma_len(dev, &sg[i]),
231 direction);
232 }
233}
234#define ib_dma_sync_sg_for_device rds_ib_dma_sync_sg_for_device
235
236
237/* ib.c */
238extern struct rds_transport rds_ib_transport;
239extern void rds_ib_add_one(struct ib_device *device);
240extern void rds_ib_remove_one(struct ib_device *device);
241extern struct ib_client rds_ib_client;
242
243extern unsigned int fmr_pool_size;
244extern unsigned int fmr_message_size;
245
246extern spinlock_t ib_nodev_conns_lock;
247extern struct list_head ib_nodev_conns;
248
249/* ib_cm.c */
250int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp);
251void rds_ib_conn_free(void *arg);
252int rds_ib_conn_connect(struct rds_connection *conn);
253void rds_ib_conn_shutdown(struct rds_connection *conn);
254void rds_ib_state_change(struct sock *sk);
255int __init rds_ib_listen_init(void);
256void rds_ib_listen_stop(void);
257void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
258int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
259 struct rdma_cm_event *event);
260int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id);
261void rds_ib_cm_connect_complete(struct rds_connection *conn,
262 struct rdma_cm_event *event);
263
264
265#define rds_ib_conn_error(conn, fmt...) \
266 __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
267
268/* ib_rdma.c */
269int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
270int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
271void rds_ib_remove_nodev_conns(void);
272void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev);
273struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
274void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
275void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
276void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
277 struct rds_sock *rs, u32 *key_ret);
278void rds_ib_sync_mr(void *trans_private, int dir);
279void rds_ib_free_mr(void *trans_private, int invalidate);
280void rds_ib_flush_mrs(void);
281
282/* ib_recv.c */
283int __init rds_ib_recv_init(void);
284void rds_ib_recv_exit(void);
285int rds_ib_recv(struct rds_connection *conn);
286int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
287 gfp_t page_gfp, int prefill);
288void rds_ib_inc_purge(struct rds_incoming *inc);
289void rds_ib_inc_free(struct rds_incoming *inc);
290int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
291 size_t size);
292void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context);
293void rds_ib_recv_init_ring(struct rds_ib_connection *ic);
294void rds_ib_recv_clear_ring(struct rds_ib_connection *ic);
295void rds_ib_recv_init_ack(struct rds_ib_connection *ic);
296void rds_ib_attempt_ack(struct rds_ib_connection *ic);
297void rds_ib_ack_send_complete(struct rds_ib_connection *ic);
298u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic);
299
300/* ib_ring.c */
301void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr);
302void rds_ib_ring_resize(struct rds_ib_work_ring *ring, u32 nr);
303u32 rds_ib_ring_alloc(struct rds_ib_work_ring *ring, u32 val, u32 *pos);
304void rds_ib_ring_free(struct rds_ib_work_ring *ring, u32 val);
305void rds_ib_ring_unalloc(struct rds_ib_work_ring *ring, u32 val);
306int rds_ib_ring_empty(struct rds_ib_work_ring *ring);
307int rds_ib_ring_low(struct rds_ib_work_ring *ring);
308u32 rds_ib_ring_oldest(struct rds_ib_work_ring *ring);
309u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
310extern wait_queue_head_t rds_ib_ring_empty_wait;
311
312/* ib_send.c */
313void rds_ib_xmit_complete(struct rds_connection *conn);
314int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
315 unsigned int hdr_off, unsigned int sg, unsigned int off);
316void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context);
317void rds_ib_send_init_ring(struct rds_ib_connection *ic);
318void rds_ib_send_clear_ring(struct rds_ib_connection *ic);
319int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op);
320void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits);
321void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted);
322int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted,
323 u32 *adv_credits, int need_posted);
324
325/* ib_stats.c */
326DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
327#define rds_ib_stats_inc(member) rds_stats_inc_which(rds_ib_stats, member)
328unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
329 unsigned int avail);
330
331/* ib_sysctl.c */
332int __init rds_ib_sysctl_init(void);
333void rds_ib_sysctl_exit(void);
334extern unsigned long rds_ib_sysctl_max_send_wr;
335extern unsigned long rds_ib_sysctl_max_recv_wr;
336extern unsigned long rds_ib_sysctl_max_unsig_wrs;
337extern unsigned long rds_ib_sysctl_max_unsig_bytes;
338extern unsigned long rds_ib_sysctl_max_recv_allocation;
339extern unsigned int rds_ib_sysctl_flow_control;
340extern ctl_table rds_ib_sysctl_table[];
341
342/*
343 * Helper functions for getting/setting the header and data SGEs in
344 * RDS packets (not RDMA)
345 */
346static inline struct ib_sge *
347rds_ib_header_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
348{
349 return &sge[0];
350}
351
352static inline struct ib_sge *
353rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
354{
355 return &sge[1];
356}
357
358static inline void rds_ib_set_64bit(u64 *ptr, u64 val)
359{
360#if BITS_PER_LONG == 64
361 *ptr = val;
362#else
363 set_64bit(ptr, val);
364#endif
365}
366
367#endif
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
new file mode 100644
index 000000000000..0532237bd128
--- /dev/null
+++ b/net/rds/ib_cm.c
@@ -0,0 +1,726 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/vmalloc.h>
36
37#include "rds.h"
38#include "ib.h"
39
40/*
41 * Set the selected protocol version
42 */
43static void rds_ib_set_protocol(struct rds_connection *conn, unsigned int version)
44{
45 conn->c_version = version;
46}
47
48/*
49 * Set up flow control
50 */
51static void rds_ib_set_flow_control(struct rds_connection *conn, u32 credits)
52{
53 struct rds_ib_connection *ic = conn->c_transport_data;
54
55 if (rds_ib_sysctl_flow_control && credits != 0) {
56 /* We're doing flow control */
57 ic->i_flowctl = 1;
58 rds_ib_send_add_credits(conn, credits);
59 } else {
60 ic->i_flowctl = 0;
61 }
62}
63
64/*
65 * Tune RNR behavior. Without flow control, we use a rather
66 * low timeout, but not the absolute minimum - this should
67 * be tunable.
68 *
69 * We already set the RNR retry count to 7 (which is the
70 * smallest infinite number :-) above.
71 * If flow control is off, we want to change this back to 0
72 * so that we learn quickly when our credit accounting is
73 * buggy.
74 *
75 * Caller passes in a qp_attr pointer - don't waste stack spacv
76 * by allocation this twice.
77 */
78static void
79rds_ib_tune_rnr(struct rds_ib_connection *ic, struct ib_qp_attr *attr)
80{
81 int ret;
82
83 attr->min_rnr_timer = IB_RNR_TIMER_000_32;
84 ret = ib_modify_qp(ic->i_cm_id->qp, attr, IB_QP_MIN_RNR_TIMER);
85 if (ret)
86 printk(KERN_NOTICE "ib_modify_qp(IB_QP_MIN_RNR_TIMER): err=%d\n", -ret);
87}
88
89/*
90 * Connection established.
91 * We get here for both outgoing and incoming connection.
92 */
93void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
94{
95 const struct rds_ib_connect_private *dp = NULL;
96 struct rds_ib_connection *ic = conn->c_transport_data;
97 struct rds_ib_device *rds_ibdev;
98 struct ib_qp_attr qp_attr;
99 int err;
100
101 if (event->param.conn.private_data_len) {
102 dp = event->param.conn.private_data;
103
104 rds_ib_set_protocol(conn,
105 RDS_PROTOCOL(dp->dp_protocol_major,
106 dp->dp_protocol_minor));
107 rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
108 }
109
110 printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n",
111 &conn->c_laddr,
112 RDS_PROTOCOL_MAJOR(conn->c_version),
113 RDS_PROTOCOL_MINOR(conn->c_version),
114 ic->i_flowctl ? ", flow control" : "");
115
116 /* Tune RNR behavior */
117 rds_ib_tune_rnr(ic, &qp_attr);
118
119 qp_attr.qp_state = IB_QPS_RTS;
120 err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE);
121 if (err)
122 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);
123
124 /* update ib_device with this local ipaddr & conn */
125 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
126 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
127 if (err)
128 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err);
129 err = rds_ib_add_conn(rds_ibdev, conn);
130 if (err)
131 printk(KERN_ERR "rds_ib_add_conn failed (%d)\n", err);
132
133 /* If the peer gave us the last packet it saw, process this as if
134 * we had received a regular ACK. */
135 if (dp && dp->dp_ack_seq)
136 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
137
138 rds_connect_complete(conn);
139}
140
141static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
142 struct rdma_conn_param *conn_param,
143 struct rds_ib_connect_private *dp,
144 u32 protocol_version)
145{
146 memset(conn_param, 0, sizeof(struct rdma_conn_param));
147 /* XXX tune these? */
148 conn_param->responder_resources = 1;
149 conn_param->initiator_depth = 1;
150 conn_param->retry_count = 7;
151 conn_param->rnr_retry_count = 7;
152
153 if (dp) {
154 struct rds_ib_connection *ic = conn->c_transport_data;
155
156 memset(dp, 0, sizeof(*dp));
157 dp->dp_saddr = conn->c_laddr;
158 dp->dp_daddr = conn->c_faddr;
159 dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
160 dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
161 dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
162 dp->dp_ack_seq = rds_ib_piggyb_ack(ic);
163
164 /* Advertise flow control */
165 if (ic->i_flowctl) {
166 unsigned int credits;
167
168 credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits));
169 dp->dp_credit = cpu_to_be32(credits);
170 atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits);
171 }
172
173 conn_param->private_data = dp;
174 conn_param->private_data_len = sizeof(*dp);
175 }
176}
177
178static void rds_ib_cq_event_handler(struct ib_event *event, void *data)
179{
180 rdsdebug("event %u data %p\n", event->event, data);
181}
182
183static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
184{
185 struct rds_connection *conn = data;
186 struct rds_ib_connection *ic = conn->c_transport_data;
187
188 rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event);
189
190 switch (event->event) {
191 case IB_EVENT_COMM_EST:
192 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
193 break;
194 default:
195 printk(KERN_WARNING "RDS/ib: unhandled QP event %u "
196 "on connection to %pI4\n", event->event,
197 &conn->c_faddr);
198 break;
199 }
200}
201
202/*
203 * This needs to be very careful to not leave IS_ERR pointers around for
204 * cleanup to trip over.
205 */
206static int rds_ib_setup_qp(struct rds_connection *conn)
207{
208 struct rds_ib_connection *ic = conn->c_transport_data;
209 struct ib_device *dev = ic->i_cm_id->device;
210 struct ib_qp_init_attr attr;
211 struct rds_ib_device *rds_ibdev;
212 int ret;
213
214 /* rds_ib_add_one creates a rds_ib_device object per IB device,
215 * and allocates a protection domain, memory range and FMR pool
216 * for each. If that fails for any reason, it will not register
217 * the rds_ibdev at all.
218 */
219 rds_ibdev = ib_get_client_data(dev, &rds_ib_client);
220 if (rds_ibdev == NULL) {
221 if (printk_ratelimit())
222 printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n",
223 dev->name);
224 return -EOPNOTSUPP;
225 }
226
227 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
228 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
229 if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1)
230 rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1);
231
232 /* Protection domain and memory range */
233 ic->i_pd = rds_ibdev->pd;
234 ic->i_mr = rds_ibdev->mr;
235
236 ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler,
237 rds_ib_cq_event_handler, conn,
238 ic->i_send_ring.w_nr + 1, 0);
239 if (IS_ERR(ic->i_send_cq)) {
240 ret = PTR_ERR(ic->i_send_cq);
241 ic->i_send_cq = NULL;
242 rdsdebug("ib_create_cq send failed: %d\n", ret);
243 goto out;
244 }
245
246 ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler,
247 rds_ib_cq_event_handler, conn,
248 ic->i_recv_ring.w_nr, 0);
249 if (IS_ERR(ic->i_recv_cq)) {
250 ret = PTR_ERR(ic->i_recv_cq);
251 ic->i_recv_cq = NULL;
252 rdsdebug("ib_create_cq recv failed: %d\n", ret);
253 goto out;
254 }
255
256 ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
257 if (ret) {
258 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
259 goto out;
260 }
261
262 ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
263 if (ret) {
264 rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
265 goto out;
266 }
267
268 /* XXX negotiate max send/recv with remote? */
269 memset(&attr, 0, sizeof(attr));
270 attr.event_handler = rds_ib_qp_event_handler;
271 attr.qp_context = conn;
272 /* + 1 to allow for the single ack message */
273 attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1;
274 attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1;
275 attr.cap.max_send_sge = rds_ibdev->max_sge;
276 attr.cap.max_recv_sge = RDS_IB_RECV_SGE;
277 attr.sq_sig_type = IB_SIGNAL_REQ_WR;
278 attr.qp_type = IB_QPT_RC;
279 attr.send_cq = ic->i_send_cq;
280 attr.recv_cq = ic->i_recv_cq;
281
282 /*
283 * XXX this can fail if max_*_wr is too large? Are we supposed
284 * to back off until we get a value that the hardware can support?
285 */
286 ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
287 if (ret) {
288 rdsdebug("rdma_create_qp failed: %d\n", ret);
289 goto out;
290 }
291
292 ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
293 ic->i_send_ring.w_nr *
294 sizeof(struct rds_header),
295 &ic->i_send_hdrs_dma, GFP_KERNEL);
296 if (ic->i_send_hdrs == NULL) {
297 ret = -ENOMEM;
298 rdsdebug("ib_dma_alloc_coherent send failed\n");
299 goto out;
300 }
301
302 ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
303 ic->i_recv_ring.w_nr *
304 sizeof(struct rds_header),
305 &ic->i_recv_hdrs_dma, GFP_KERNEL);
306 if (ic->i_recv_hdrs == NULL) {
307 ret = -ENOMEM;
308 rdsdebug("ib_dma_alloc_coherent recv failed\n");
309 goto out;
310 }
311
312 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
313 &ic->i_ack_dma, GFP_KERNEL);
314 if (ic->i_ack == NULL) {
315 ret = -ENOMEM;
316 rdsdebug("ib_dma_alloc_coherent ack failed\n");
317 goto out;
318 }
319
320 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work));
321 if (ic->i_sends == NULL) {
322 ret = -ENOMEM;
323 rdsdebug("send allocation failed\n");
324 goto out;
325 }
326 rds_ib_send_init_ring(ic);
327
328 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work));
329 if (ic->i_recvs == NULL) {
330 ret = -ENOMEM;
331 rdsdebug("recv allocation failed\n");
332 goto out;
333 }
334
335 rds_ib_recv_init_ring(ic);
336 rds_ib_recv_init_ack(ic);
337
338 /* Post receive buffers - as a side effect, this will update
339 * the posted credit count. */
340 rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1);
341
342 rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr,
343 ic->i_send_cq, ic->i_recv_cq);
344
345out:
346 return ret;
347}
348
349static u32 rds_ib_protocol_compatible(const struct rds_ib_connect_private *dp)
350{
351 u16 common;
352 u32 version = 0;
353
354 /* rdma_cm private data is odd - when there is any private data in the
355 * request, we will be given a pretty large buffer without telling us the
356 * original size. The only way to tell the difference is by looking at
357 * the contents, which are initialized to zero.
358 * If the protocol version fields aren't set, this is a connection attempt
359 * from an older version. This could could be 3.0 or 2.0 - we can't tell.
360 * We really should have changed this for OFED 1.3 :-( */
361 if (dp->dp_protocol_major == 0)
362 return RDS_PROTOCOL_3_0;
363
364 common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IB_SUPPORTED_PROTOCOLS;
365 if (dp->dp_protocol_major == 3 && common) {
366 version = RDS_PROTOCOL_3_0;
367 while ((common >>= 1) != 0)
368 version++;
369 } else if (printk_ratelimit()) {
370 printk(KERN_NOTICE "RDS: Connection from %pI4 using "
371 "incompatible protocol version %u.%u\n",
372 &dp->dp_saddr,
373 dp->dp_protocol_major,
374 dp->dp_protocol_minor);
375 }
376 return version;
377}
378
379int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
380 struct rdma_cm_event *event)
381{
382 __be64 lguid = cm_id->route.path_rec->sgid.global.interface_id;
383 __be64 fguid = cm_id->route.path_rec->dgid.global.interface_id;
384 const struct rds_ib_connect_private *dp = event->param.conn.private_data;
385 struct rds_ib_connect_private dp_rep;
386 struct rds_connection *conn = NULL;
387 struct rds_ib_connection *ic = NULL;
388 struct rdma_conn_param conn_param;
389 u32 version;
390 int err, destroy = 1;
391
392 /* Check whether the remote protocol version matches ours. */
393 version = rds_ib_protocol_compatible(dp);
394 if (!version)
395 goto out;
396
397 rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u lguid 0x%llx fguid "
398 "0x%llx\n", &dp->dp_saddr, &dp->dp_daddr,
399 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version),
400 (unsigned long long)be64_to_cpu(lguid),
401 (unsigned long long)be64_to_cpu(fguid));
402
403 conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport,
404 GFP_KERNEL);
405 if (IS_ERR(conn)) {
406 rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
407 conn = NULL;
408 goto out;
409 }
410
411 /*
412 * The connection request may occur while the
413 * previous connection exist, e.g. in case of failover.
414 * But as connections may be initiated simultaneously
415 * by both hosts, we have a random backoff mechanism -
416 * see the comment above rds_queue_reconnect()
417 */
418 mutex_lock(&conn->c_cm_lock);
419 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
420 if (rds_conn_state(conn) == RDS_CONN_UP) {
421 rdsdebug("incoming connect while connecting\n");
422 rds_conn_drop(conn);
423 rds_ib_stats_inc(s_ib_listen_closed_stale);
424 } else
425 if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
426 /* Wait and see - our connect may still be succeeding */
427 rds_ib_stats_inc(s_ib_connect_raced);
428 }
429 mutex_unlock(&conn->c_cm_lock);
430 goto out;
431 }
432
433 ic = conn->c_transport_data;
434
435 rds_ib_set_protocol(conn, version);
436 rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
437
438 /* If the peer gave us the last packet it saw, process this as if
439 * we had received a regular ACK. */
440 if (dp->dp_ack_seq)
441 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
442
443 BUG_ON(cm_id->context);
444 BUG_ON(ic->i_cm_id);
445
446 ic->i_cm_id = cm_id;
447 cm_id->context = conn;
448
449 /* We got halfway through setting up the ib_connection, if we
450 * fail now, we have to take the long route out of this mess. */
451 destroy = 0;
452
453 err = rds_ib_setup_qp(conn);
454 if (err) {
455 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
456 goto out;
457 }
458
459 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version);
460
461 /* rdma_accept() calls rdma_reject() internally if it fails */
462 err = rdma_accept(cm_id, &conn_param);
463 mutex_unlock(&conn->c_cm_lock);
464 if (err) {
465 rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err);
466 goto out;
467 }
468
469 return 0;
470
471out:
472 rdma_reject(cm_id, NULL, 0);
473 return destroy;
474}
475
476
477int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
478{
479 struct rds_connection *conn = cm_id->context;
480 struct rds_ib_connection *ic = conn->c_transport_data;
481 struct rdma_conn_param conn_param;
482 struct rds_ib_connect_private dp;
483 int ret;
484
485 /* If the peer doesn't do protocol negotiation, we must
486 * default to RDSv3.0 */
487 rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0);
488 ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */
489
490 ret = rds_ib_setup_qp(conn);
491 if (ret) {
492 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", ret);
493 goto out;
494 }
495
496 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION);
497
498 ret = rdma_connect(cm_id, &conn_param);
499 if (ret)
500 rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
501
502out:
503 /* Beware - returning non-zero tells the rdma_cm to destroy
504 * the cm_id. We should certainly not do it as long as we still
505 * "own" the cm_id. */
506 if (ret) {
507 if (ic->i_cm_id == cm_id)
508 ret = 0;
509 }
510 return ret;
511}
512
513int rds_ib_conn_connect(struct rds_connection *conn)
514{
515 struct rds_ib_connection *ic = conn->c_transport_data;
516 struct sockaddr_in src, dest;
517 int ret;
518
519 /* XXX I wonder what affect the port space has */
520 /* delegate cm event handler to rdma_transport */
521 ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
522 RDMA_PS_TCP);
523 if (IS_ERR(ic->i_cm_id)) {
524 ret = PTR_ERR(ic->i_cm_id);
525 ic->i_cm_id = NULL;
526 rdsdebug("rdma_create_id() failed: %d\n", ret);
527 goto out;
528 }
529
530 rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn);
531
532 src.sin_family = AF_INET;
533 src.sin_addr.s_addr = (__force u32)conn->c_laddr;
534 src.sin_port = (__force u16)htons(0);
535
536 dest.sin_family = AF_INET;
537 dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
538 dest.sin_port = (__force u16)htons(RDS_PORT);
539
540 ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
541 (struct sockaddr *)&dest,
542 RDS_RDMA_RESOLVE_TIMEOUT_MS);
543 if (ret) {
544 rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id,
545 ret);
546 rdma_destroy_id(ic->i_cm_id);
547 ic->i_cm_id = NULL;
548 }
549
550out:
551 return ret;
552}
553
554/*
555 * This is so careful about only cleaning up resources that were built up
556 * so that it can be called at any point during startup. In fact it
557 * can be called multiple times for a given connection.
558 */
559void rds_ib_conn_shutdown(struct rds_connection *conn)
560{
561 struct rds_ib_connection *ic = conn->c_transport_data;
562 int err = 0;
563
564 rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id,
565 ic->i_pd, ic->i_send_cq, ic->i_recv_cq,
566 ic->i_cm_id ? ic->i_cm_id->qp : NULL);
567
568 if (ic->i_cm_id) {
569 struct ib_device *dev = ic->i_cm_id->device;
570
571 rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
572 err = rdma_disconnect(ic->i_cm_id);
573 if (err) {
574 /* Actually this may happen quite frequently, when
575 * an outgoing connect raced with an incoming connect.
576 */
577 rdsdebug("failed to disconnect, cm: %p err %d\n",
578 ic->i_cm_id, err);
579 }
580
581 wait_event(rds_ib_ring_empty_wait,
582 rds_ib_ring_empty(&ic->i_send_ring) &&
583 rds_ib_ring_empty(&ic->i_recv_ring));
584
585 if (ic->i_send_hdrs)
586 ib_dma_free_coherent(dev,
587 ic->i_send_ring.w_nr *
588 sizeof(struct rds_header),
589 ic->i_send_hdrs,
590 ic->i_send_hdrs_dma);
591
592 if (ic->i_recv_hdrs)
593 ib_dma_free_coherent(dev,
594 ic->i_recv_ring.w_nr *
595 sizeof(struct rds_header),
596 ic->i_recv_hdrs,
597 ic->i_recv_hdrs_dma);
598
599 if (ic->i_ack)
600 ib_dma_free_coherent(dev, sizeof(struct rds_header),
601 ic->i_ack, ic->i_ack_dma);
602
603 if (ic->i_sends)
604 rds_ib_send_clear_ring(ic);
605 if (ic->i_recvs)
606 rds_ib_recv_clear_ring(ic);
607
608 if (ic->i_cm_id->qp)
609 rdma_destroy_qp(ic->i_cm_id);
610 if (ic->i_send_cq)
611 ib_destroy_cq(ic->i_send_cq);
612 if (ic->i_recv_cq)
613 ib_destroy_cq(ic->i_recv_cq);
614 rdma_destroy_id(ic->i_cm_id);
615
616 /*
617 * Move connection back to the nodev list.
618 */
619 if (ic->rds_ibdev) {
620
621 spin_lock_irq(&ic->rds_ibdev->spinlock);
622 BUG_ON(list_empty(&ic->ib_node));
623 list_del(&ic->ib_node);
624 spin_unlock_irq(&ic->rds_ibdev->spinlock);
625
626 spin_lock_irq(&ib_nodev_conns_lock);
627 list_add_tail(&ic->ib_node, &ib_nodev_conns);
628 spin_unlock_irq(&ib_nodev_conns_lock);
629 ic->rds_ibdev = NULL;
630 }
631
632 ic->i_cm_id = NULL;
633 ic->i_pd = NULL;
634 ic->i_mr = NULL;
635 ic->i_send_cq = NULL;
636 ic->i_recv_cq = NULL;
637 ic->i_send_hdrs = NULL;
638 ic->i_recv_hdrs = NULL;
639 ic->i_ack = NULL;
640 }
641 BUG_ON(ic->rds_ibdev);
642
643 /* Clear pending transmit */
644 if (ic->i_rm) {
645 rds_message_put(ic->i_rm);
646 ic->i_rm = NULL;
647 }
648
649 /* Clear the ACK state */
650 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
651 rds_ib_set_64bit(&ic->i_ack_next, 0);
652 ic->i_ack_recv = 0;
653
654 /* Clear flow control state */
655 ic->i_flowctl = 0;
656 atomic_set(&ic->i_credits, 0);
657
658 rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr);
659 rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr);
660
661 if (ic->i_ibinc) {
662 rds_inc_put(&ic->i_ibinc->ii_inc);
663 ic->i_ibinc = NULL;
664 }
665
666 vfree(ic->i_sends);
667 ic->i_sends = NULL;
668 vfree(ic->i_recvs);
669 ic->i_recvs = NULL;
670}
671
672int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
673{
674 struct rds_ib_connection *ic;
675 unsigned long flags;
676
677 /* XXX too lazy? */
678 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL);
679 if (ic == NULL)
680 return -ENOMEM;
681
682 INIT_LIST_HEAD(&ic->ib_node);
683 mutex_init(&ic->i_recv_mutex);
684
685 /*
686 * rds_ib_conn_shutdown() waits for these to be emptied so they
687 * must be initialized before it can be called.
688 */
689 rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr);
690 rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr);
691
692 ic->conn = conn;
693 conn->c_transport_data = ic;
694
695 spin_lock_irqsave(&ib_nodev_conns_lock, flags);
696 list_add_tail(&ic->ib_node, &ib_nodev_conns);
697 spin_unlock_irqrestore(&ib_nodev_conns_lock, flags);
698
699
700 rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data);
701 return 0;
702}
703
704void rds_ib_conn_free(void *arg)
705{
706 struct rds_ib_connection *ic = arg;
707 rdsdebug("ic %p\n", ic);
708 list_del(&ic->ib_node);
709 kfree(ic);
710}
711
712
713/*
714 * An error occurred on the connection
715 */
716void
717__rds_ib_conn_error(struct rds_connection *conn, const char *fmt, ...)
718{
719 va_list ap;
720
721 rds_conn_drop(conn);
722
723 va_start(ap, fmt);
724 vprintk(fmt, ap);
725 va_end(ap);
726}
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
new file mode 100644
index 000000000000..69a6289ed672
--- /dev/null
+++ b/net/rds/ib_rdma.c
@@ -0,0 +1,641 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34
35#include "rds.h"
36#include "rdma.h"
37#include "ib.h"
38
39
40/*
41 * This is stored as mr->r_trans_private.
42 */
43struct rds_ib_mr {
44 struct rds_ib_device *device;
45 struct rds_ib_mr_pool *pool;
46 struct ib_fmr *fmr;
47 struct list_head list;
48 unsigned int remap_count;
49
50 struct scatterlist *sg;
51 unsigned int sg_len;
52 u64 *dma;
53 int sg_dma_len;
54};
55
56/*
57 * Our own little FMR pool
58 */
59struct rds_ib_mr_pool {
60 struct mutex flush_lock; /* serialize fmr invalidate */
61 struct work_struct flush_worker; /* flush worker */
62
63 spinlock_t list_lock; /* protect variables below */
64 atomic_t item_count; /* total # of MRs */
65 atomic_t dirty_count; /* # dirty of MRs */
66 struct list_head drop_list; /* MRs that have reached their max_maps limit */
67 struct list_head free_list; /* unused MRs */
68 struct list_head clean_list; /* unused & unamapped MRs */
69 atomic_t free_pinned; /* memory pinned by free MRs */
70 unsigned long max_items;
71 unsigned long max_items_soft;
72 unsigned long max_free_pinned;
73 struct ib_fmr_attr fmr_attr;
74};
75
76static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all);
77static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr);
78static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
79
80static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
81{
82 struct rds_ib_device *rds_ibdev;
83 struct rds_ib_ipaddr *i_ipaddr;
84
85 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
86 spin_lock_irq(&rds_ibdev->spinlock);
87 list_for_each_entry(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
88 if (i_ipaddr->ipaddr == ipaddr) {
89 spin_unlock_irq(&rds_ibdev->spinlock);
90 return rds_ibdev;
91 }
92 }
93 spin_unlock_irq(&rds_ibdev->spinlock);
94 }
95
96 return NULL;
97}
98
99static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
100{
101 struct rds_ib_ipaddr *i_ipaddr;
102
103 i_ipaddr = kmalloc(sizeof *i_ipaddr, GFP_KERNEL);
104 if (!i_ipaddr)
105 return -ENOMEM;
106
107 i_ipaddr->ipaddr = ipaddr;
108
109 spin_lock_irq(&rds_ibdev->spinlock);
110 list_add_tail(&i_ipaddr->list, &rds_ibdev->ipaddr_list);
111 spin_unlock_irq(&rds_ibdev->spinlock);
112
113 return 0;
114}
115
116static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
117{
118 struct rds_ib_ipaddr *i_ipaddr, *next;
119
120 spin_lock_irq(&rds_ibdev->spinlock);
121 list_for_each_entry_safe(i_ipaddr, next, &rds_ibdev->ipaddr_list, list) {
122 if (i_ipaddr->ipaddr == ipaddr) {
123 list_del(&i_ipaddr->list);
124 kfree(i_ipaddr);
125 break;
126 }
127 }
128 spin_unlock_irq(&rds_ibdev->spinlock);
129}
130
131int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
132{
133 struct rds_ib_device *rds_ibdev_old;
134
135 rds_ibdev_old = rds_ib_get_device(ipaddr);
136 if (rds_ibdev_old)
137 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
138
139 return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
140}
141
142int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
143{
144 struct rds_ib_connection *ic = conn->c_transport_data;
145
146 /* conn was previously on the nodev_conns_list */
147 spin_lock_irq(&ib_nodev_conns_lock);
148 BUG_ON(list_empty(&ib_nodev_conns));
149 BUG_ON(list_empty(&ic->ib_node));
150 list_del(&ic->ib_node);
151 spin_unlock_irq(&ib_nodev_conns_lock);
152
153 spin_lock_irq(&rds_ibdev->spinlock);
154 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
155 spin_unlock_irq(&rds_ibdev->spinlock);
156
157 ic->rds_ibdev = rds_ibdev;
158
159 return 0;
160}
161
162void rds_ib_remove_nodev_conns(void)
163{
164 struct rds_ib_connection *ic, *_ic;
165 LIST_HEAD(tmp_list);
166
167 /* avoid calling conn_destroy with irqs off */
168 spin_lock_irq(&ib_nodev_conns_lock);
169 list_splice(&ib_nodev_conns, &tmp_list);
170 INIT_LIST_HEAD(&ib_nodev_conns);
171 spin_unlock_irq(&ib_nodev_conns_lock);
172
173 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) {
174 if (ic->conn->c_passive)
175 rds_conn_destroy(ic->conn->c_passive);
176 rds_conn_destroy(ic->conn);
177 }
178}
179
180void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev)
181{
182 struct rds_ib_connection *ic, *_ic;
183 LIST_HEAD(tmp_list);
184
185 /* avoid calling conn_destroy with irqs off */
186 spin_lock_irq(&rds_ibdev->spinlock);
187 list_splice(&rds_ibdev->conn_list, &tmp_list);
188 INIT_LIST_HEAD(&rds_ibdev->conn_list);
189 spin_unlock_irq(&rds_ibdev->spinlock);
190
191 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) {
192 if (ic->conn->c_passive)
193 rds_conn_destroy(ic->conn->c_passive);
194 rds_conn_destroy(ic->conn);
195 }
196}
197
198struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
199{
200 struct rds_ib_mr_pool *pool;
201
202 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
203 if (!pool)
204 return ERR_PTR(-ENOMEM);
205
206 INIT_LIST_HEAD(&pool->free_list);
207 INIT_LIST_HEAD(&pool->drop_list);
208 INIT_LIST_HEAD(&pool->clean_list);
209 mutex_init(&pool->flush_lock);
210 spin_lock_init(&pool->list_lock);
211 INIT_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
212
213 pool->fmr_attr.max_pages = fmr_message_size;
214 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
215 pool->fmr_attr.page_shift = rds_ibdev->fmr_page_shift;
216 pool->max_free_pinned = rds_ibdev->max_fmrs * fmr_message_size / 4;
217
218 /* We never allow more than max_items MRs to be allocated.
219 * When we exceed more than max_items_soft, we start freeing
220 * items more aggressively.
221 * Make sure that max_items > max_items_soft > max_items / 2
222 */
223 pool->max_items_soft = rds_ibdev->max_fmrs * 3 / 4;
224 pool->max_items = rds_ibdev->max_fmrs;
225
226 return pool;
227}
228
229void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo)
230{
231 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
232
233 iinfo->rdma_mr_max = pool->max_items;
234 iinfo->rdma_mr_size = pool->fmr_attr.max_pages;
235}
236
237void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
238{
239 flush_workqueue(rds_wq);
240 rds_ib_flush_mr_pool(pool, 1);
241 BUG_ON(atomic_read(&pool->item_count));
242 BUG_ON(atomic_read(&pool->free_pinned));
243 kfree(pool);
244}
245
246static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
247{
248 struct rds_ib_mr *ibmr = NULL;
249 unsigned long flags;
250
251 spin_lock_irqsave(&pool->list_lock, flags);
252 if (!list_empty(&pool->clean_list)) {
253 ibmr = list_entry(pool->clean_list.next, struct rds_ib_mr, list);
254 list_del_init(&ibmr->list);
255 }
256 spin_unlock_irqrestore(&pool->list_lock, flags);
257
258 return ibmr;
259}
260
261static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
262{
263 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
264 struct rds_ib_mr *ibmr = NULL;
265 int err = 0, iter = 0;
266
267 while (1) {
268 ibmr = rds_ib_reuse_fmr(pool);
269 if (ibmr)
270 return ibmr;
271
272 /* No clean MRs - now we have the choice of either
273 * allocating a fresh MR up to the limit imposed by the
274 * driver, or flush any dirty unused MRs.
275 * We try to avoid stalling in the send path if possible,
276 * so we allocate as long as we're allowed to.
277 *
278 * We're fussy with enforcing the FMR limit, though. If the driver
279 * tells us we can't use more than N fmrs, we shouldn't start
280 * arguing with it */
281 if (atomic_inc_return(&pool->item_count) <= pool->max_items)
282 break;
283
284 atomic_dec(&pool->item_count);
285
286 if (++iter > 2) {
287 rds_ib_stats_inc(s_ib_rdma_mr_pool_depleted);
288 return ERR_PTR(-EAGAIN);
289 }
290
291 /* We do have some empty MRs. Flush them out. */
292 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait);
293 rds_ib_flush_mr_pool(pool, 0);
294 }
295
296 ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL);
297 if (!ibmr) {
298 err = -ENOMEM;
299 goto out_no_cigar;
300 }
301
302 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
303 (IB_ACCESS_LOCAL_WRITE |
304 IB_ACCESS_REMOTE_READ |
305 IB_ACCESS_REMOTE_WRITE),
306 &pool->fmr_attr);
307 if (IS_ERR(ibmr->fmr)) {
308 err = PTR_ERR(ibmr->fmr);
309 ibmr->fmr = NULL;
310 printk(KERN_WARNING "RDS/IB: ib_alloc_fmr failed (err=%d)\n", err);
311 goto out_no_cigar;
312 }
313
314 rds_ib_stats_inc(s_ib_rdma_mr_alloc);
315 return ibmr;
316
317out_no_cigar:
318 if (ibmr) {
319 if (ibmr->fmr)
320 ib_dealloc_fmr(ibmr->fmr);
321 kfree(ibmr);
322 }
323 atomic_dec(&pool->item_count);
324 return ERR_PTR(err);
325}
326
327static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
328 struct scatterlist *sg, unsigned int nents)
329{
330 struct ib_device *dev = rds_ibdev->dev;
331 struct scatterlist *scat = sg;
332 u64 io_addr = 0;
333 u64 *dma_pages;
334 u32 len;
335 int page_cnt, sg_dma_len;
336 int i, j;
337 int ret;
338
339 sg_dma_len = ib_dma_map_sg(dev, sg, nents,
340 DMA_BIDIRECTIONAL);
341 if (unlikely(!sg_dma_len)) {
342 printk(KERN_WARNING "RDS/IB: dma_map_sg failed!\n");
343 return -EBUSY;
344 }
345
346 len = 0;
347 page_cnt = 0;
348
349 for (i = 0; i < sg_dma_len; ++i) {
350 unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
351 u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
352
353 if (dma_addr & ~rds_ibdev->fmr_page_mask) {
354 if (i > 0)
355 return -EINVAL;
356 else
357 ++page_cnt;
358 }
359 if ((dma_addr + dma_len) & ~rds_ibdev->fmr_page_mask) {
360 if (i < sg_dma_len - 1)
361 return -EINVAL;
362 else
363 ++page_cnt;
364 }
365
366 len += dma_len;
367 }
368
369 page_cnt += len >> rds_ibdev->fmr_page_shift;
370 if (page_cnt > fmr_message_size)
371 return -EINVAL;
372
373 dma_pages = kmalloc(sizeof(u64) * page_cnt, GFP_ATOMIC);
374 if (!dma_pages)
375 return -ENOMEM;
376
377 page_cnt = 0;
378 for (i = 0; i < sg_dma_len; ++i) {
379 unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
380 u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
381
382 for (j = 0; j < dma_len; j += rds_ibdev->fmr_page_size)
383 dma_pages[page_cnt++] =
384 (dma_addr & rds_ibdev->fmr_page_mask) + j;
385 }
386
387 ret = ib_map_phys_fmr(ibmr->fmr,
388 dma_pages, page_cnt, io_addr);
389 if (ret)
390 goto out;
391
392 /* Success - we successfully remapped the MR, so we can
393 * safely tear down the old mapping. */
394 rds_ib_teardown_mr(ibmr);
395
396 ibmr->sg = scat;
397 ibmr->sg_len = nents;
398 ibmr->sg_dma_len = sg_dma_len;
399 ibmr->remap_count++;
400
401 rds_ib_stats_inc(s_ib_rdma_mr_used);
402 ret = 0;
403
404out:
405 kfree(dma_pages);
406
407 return ret;
408}
409
410void rds_ib_sync_mr(void *trans_private, int direction)
411{
412 struct rds_ib_mr *ibmr = trans_private;
413 struct rds_ib_device *rds_ibdev = ibmr->device;
414
415 switch (direction) {
416 case DMA_FROM_DEVICE:
417 ib_dma_sync_sg_for_cpu(rds_ibdev->dev, ibmr->sg,
418 ibmr->sg_dma_len, DMA_BIDIRECTIONAL);
419 break;
420 case DMA_TO_DEVICE:
421 ib_dma_sync_sg_for_device(rds_ibdev->dev, ibmr->sg,
422 ibmr->sg_dma_len, DMA_BIDIRECTIONAL);
423 break;
424 }
425}
426
427static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
428{
429 struct rds_ib_device *rds_ibdev = ibmr->device;
430
431 if (ibmr->sg_dma_len) {
432 ib_dma_unmap_sg(rds_ibdev->dev,
433 ibmr->sg, ibmr->sg_len,
434 DMA_BIDIRECTIONAL);
435 ibmr->sg_dma_len = 0;
436 }
437
438 /* Release the s/g list */
439 if (ibmr->sg_len) {
440 unsigned int i;
441
442 for (i = 0; i < ibmr->sg_len; ++i) {
443 struct page *page = sg_page(&ibmr->sg[i]);
444
445 /* FIXME we need a way to tell a r/w MR
446 * from a r/o MR */
447 set_page_dirty(page);
448 put_page(page);
449 }
450 kfree(ibmr->sg);
451
452 ibmr->sg = NULL;
453 ibmr->sg_len = 0;
454 }
455}
456
457static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
458{
459 unsigned int pinned = ibmr->sg_len;
460
461 __rds_ib_teardown_mr(ibmr);
462 if (pinned) {
463 struct rds_ib_device *rds_ibdev = ibmr->device;
464 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
465
466 atomic_sub(pinned, &pool->free_pinned);
467 }
468}
469
470static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int free_all)
471{
472 unsigned int item_count;
473
474 item_count = atomic_read(&pool->item_count);
475 if (free_all)
476 return item_count;
477
478 return 0;
479}
480
481/*
482 * Flush our pool of MRs.
483 * At a minimum, all currently unused MRs are unmapped.
484 * If the number of MRs allocated exceeds the limit, we also try
485 * to free as many MRs as needed to get back to this limit.
486 */
487static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
488{
489 struct rds_ib_mr *ibmr, *next;
490 LIST_HEAD(unmap_list);
491 LIST_HEAD(fmr_list);
492 unsigned long unpinned = 0;
493 unsigned long flags;
494 unsigned int nfreed = 0, ncleaned = 0, free_goal;
495 int ret = 0;
496
497 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush);
498
499 mutex_lock(&pool->flush_lock);
500
501 spin_lock_irqsave(&pool->list_lock, flags);
502 /* Get the list of all MRs to be dropped. Ordering matters -
503 * we want to put drop_list ahead of free_list. */
504 list_splice_init(&pool->free_list, &unmap_list);
505 list_splice_init(&pool->drop_list, &unmap_list);
506 if (free_all)
507 list_splice_init(&pool->clean_list, &unmap_list);
508 spin_unlock_irqrestore(&pool->list_lock, flags);
509
510 free_goal = rds_ib_flush_goal(pool, free_all);
511
512 if (list_empty(&unmap_list))
513 goto out;
514
515 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
516 list_for_each_entry(ibmr, &unmap_list, list)
517 list_add(&ibmr->fmr->list, &fmr_list);
518 ret = ib_unmap_fmr(&fmr_list);
519 if (ret)
520 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret);
521
522 /* Now we can destroy the DMA mapping and unpin any pages */
523 list_for_each_entry_safe(ibmr, next, &unmap_list, list) {
524 unpinned += ibmr->sg_len;
525 __rds_ib_teardown_mr(ibmr);
526 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) {
527 rds_ib_stats_inc(s_ib_rdma_mr_free);
528 list_del(&ibmr->list);
529 ib_dealloc_fmr(ibmr->fmr);
530 kfree(ibmr);
531 nfreed++;
532 }
533 ncleaned++;
534 }
535
536 spin_lock_irqsave(&pool->list_lock, flags);
537 list_splice(&unmap_list, &pool->clean_list);
538 spin_unlock_irqrestore(&pool->list_lock, flags);
539
540 atomic_sub(unpinned, &pool->free_pinned);
541 atomic_sub(ncleaned, &pool->dirty_count);
542 atomic_sub(nfreed, &pool->item_count);
543
544out:
545 mutex_unlock(&pool->flush_lock);
546 return ret;
547}
548
549static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
550{
551 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker);
552
553 rds_ib_flush_mr_pool(pool, 0);
554}
555
556void rds_ib_free_mr(void *trans_private, int invalidate)
557{
558 struct rds_ib_mr *ibmr = trans_private;
559 struct rds_ib_device *rds_ibdev = ibmr->device;
560 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
561 unsigned long flags;
562
563 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
564
565 /* Return it to the pool's free list */
566 spin_lock_irqsave(&pool->list_lock, flags);
567 if (ibmr->remap_count >= pool->fmr_attr.max_maps)
568 list_add(&ibmr->list, &pool->drop_list);
569 else
570 list_add(&ibmr->list, &pool->free_list);
571
572 atomic_add(ibmr->sg_len, &pool->free_pinned);
573 atomic_inc(&pool->dirty_count);
574 spin_unlock_irqrestore(&pool->list_lock, flags);
575
576 /* If we've pinned too many pages, request a flush */
577 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned
578 || atomic_read(&pool->dirty_count) >= pool->max_items / 10)
579 queue_work(rds_wq, &pool->flush_worker);
580
581 if (invalidate) {
582 if (likely(!in_interrupt())) {
583 rds_ib_flush_mr_pool(pool, 0);
584 } else {
585 /* We get here if the user created a MR marked
586 * as use_once and invalidate at the same time. */
587 queue_work(rds_wq, &pool->flush_worker);
588 }
589 }
590}
591
592void rds_ib_flush_mrs(void)
593{
594 struct rds_ib_device *rds_ibdev;
595
596 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
597 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
598
599 if (pool)
600 rds_ib_flush_mr_pool(pool, 0);
601 }
602}
603
604void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
605 struct rds_sock *rs, u32 *key_ret)
606{
607 struct rds_ib_device *rds_ibdev;
608 struct rds_ib_mr *ibmr = NULL;
609 int ret;
610
611 rds_ibdev = rds_ib_get_device(rs->rs_bound_addr);
612 if (!rds_ibdev) {
613 ret = -ENODEV;
614 goto out;
615 }
616
617 if (!rds_ibdev->mr_pool) {
618 ret = -ENODEV;
619 goto out;
620 }
621
622 ibmr = rds_ib_alloc_fmr(rds_ibdev);
623 if (IS_ERR(ibmr))
624 return ibmr;
625
626 ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
627 if (ret == 0)
628 *key_ret = ibmr->fmr->rkey;
629 else
630 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret);
631
632 ibmr->device = rds_ibdev;
633
634 out:
635 if (ret) {
636 if (ibmr)
637 rds_ib_free_mr(ibmr, 0);
638 ibmr = ERR_PTR(ret);
639 }
640 return ibmr;
641}
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
new file mode 100644
index 000000000000..5061b5502162
--- /dev/null
+++ b/net/rds/ib_recv.c
@@ -0,0 +1,869 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/pci.h>
35#include <linux/dma-mapping.h>
36#include <rdma/rdma_cm.h>
37
38#include "rds.h"
39#include "ib.h"
40
41static struct kmem_cache *rds_ib_incoming_slab;
42static struct kmem_cache *rds_ib_frag_slab;
43static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
44
45static void rds_ib_frag_drop_page(struct rds_page_frag *frag)
46{
47 rdsdebug("frag %p page %p\n", frag, frag->f_page);
48 __free_page(frag->f_page);
49 frag->f_page = NULL;
50}
51
52static void rds_ib_frag_free(struct rds_page_frag *frag)
53{
54 rdsdebug("frag %p page %p\n", frag, frag->f_page);
55 BUG_ON(frag->f_page != NULL);
56 kmem_cache_free(rds_ib_frag_slab, frag);
57}
58
59/*
60 * We map a page at a time. Its fragments are posted in order. This
61 * is called in fragment order as the fragments get send completion events.
62 * Only the last frag in the page performs the unmapping.
63 *
64 * It's OK for ring cleanup to call this in whatever order it likes because
65 * DMA is not in flight and so we can unmap while other ring entries still
66 * hold page references in their frags.
67 */
68static void rds_ib_recv_unmap_page(struct rds_ib_connection *ic,
69 struct rds_ib_recv_work *recv)
70{
71 struct rds_page_frag *frag = recv->r_frag;
72
73 rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page);
74 if (frag->f_mapped)
75 ib_dma_unmap_page(ic->i_cm_id->device,
76 frag->f_mapped,
77 RDS_FRAG_SIZE, DMA_FROM_DEVICE);
78 frag->f_mapped = 0;
79}
80
81void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
82{
83 struct rds_ib_recv_work *recv;
84 u32 i;
85
86 for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
87 struct ib_sge *sge;
88
89 recv->r_ibinc = NULL;
90 recv->r_frag = NULL;
91
92 recv->r_wr.next = NULL;
93 recv->r_wr.wr_id = i;
94 recv->r_wr.sg_list = recv->r_sge;
95 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
96
97 sge = rds_ib_data_sge(ic, recv->r_sge);
98 sge->addr = 0;
99 sge->length = RDS_FRAG_SIZE;
100 sge->lkey = ic->i_mr->lkey;
101
102 sge = rds_ib_header_sge(ic, recv->r_sge);
103 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
104 sge->length = sizeof(struct rds_header);
105 sge->lkey = ic->i_mr->lkey;
106 }
107}
108
109static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
110 struct rds_ib_recv_work *recv)
111{
112 if (recv->r_ibinc) {
113 rds_inc_put(&recv->r_ibinc->ii_inc);
114 recv->r_ibinc = NULL;
115 }
116 if (recv->r_frag) {
117 rds_ib_recv_unmap_page(ic, recv);
118 if (recv->r_frag->f_page)
119 rds_ib_frag_drop_page(recv->r_frag);
120 rds_ib_frag_free(recv->r_frag);
121 recv->r_frag = NULL;
122 }
123}
124
125void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
126{
127 u32 i;
128
129 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
130 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
131
132 if (ic->i_frag.f_page)
133 rds_ib_frag_drop_page(&ic->i_frag);
134}
135
136static int rds_ib_recv_refill_one(struct rds_connection *conn,
137 struct rds_ib_recv_work *recv,
138 gfp_t kptr_gfp, gfp_t page_gfp)
139{
140 struct rds_ib_connection *ic = conn->c_transport_data;
141 dma_addr_t dma_addr;
142 struct ib_sge *sge;
143 int ret = -ENOMEM;
144
145 if (recv->r_ibinc == NULL) {
146 if (atomic_read(&rds_ib_allocation) >= rds_ib_sysctl_max_recv_allocation) {
147 rds_ib_stats_inc(s_ib_rx_alloc_limit);
148 goto out;
149 }
150 recv->r_ibinc = kmem_cache_alloc(rds_ib_incoming_slab,
151 kptr_gfp);
152 if (recv->r_ibinc == NULL)
153 goto out;
154 atomic_inc(&rds_ib_allocation);
155 INIT_LIST_HEAD(&recv->r_ibinc->ii_frags);
156 rds_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr);
157 }
158
159 if (recv->r_frag == NULL) {
160 recv->r_frag = kmem_cache_alloc(rds_ib_frag_slab, kptr_gfp);
161 if (recv->r_frag == NULL)
162 goto out;
163 INIT_LIST_HEAD(&recv->r_frag->f_item);
164 recv->r_frag->f_page = NULL;
165 }
166
167 if (ic->i_frag.f_page == NULL) {
168 ic->i_frag.f_page = alloc_page(page_gfp);
169 if (ic->i_frag.f_page == NULL)
170 goto out;
171 ic->i_frag.f_offset = 0;
172 }
173
174 dma_addr = ib_dma_map_page(ic->i_cm_id->device,
175 ic->i_frag.f_page,
176 ic->i_frag.f_offset,
177 RDS_FRAG_SIZE,
178 DMA_FROM_DEVICE);
179 if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr))
180 goto out;
181
182 /*
183 * Once we get the RDS_PAGE_LAST_OFF frag then rds_ib_frag_unmap()
184 * must be called on this recv. This happens as completions hit
185 * in order or on connection shutdown.
186 */
187 recv->r_frag->f_page = ic->i_frag.f_page;
188 recv->r_frag->f_offset = ic->i_frag.f_offset;
189 recv->r_frag->f_mapped = dma_addr;
190
191 sge = rds_ib_data_sge(ic, recv->r_sge);
192 sge->addr = dma_addr;
193 sge->length = RDS_FRAG_SIZE;
194
195 sge = rds_ib_header_sge(ic, recv->r_sge);
196 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
197 sge->length = sizeof(struct rds_header);
198
199 get_page(recv->r_frag->f_page);
200
201 if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) {
202 ic->i_frag.f_offset += RDS_FRAG_SIZE;
203 } else {
204 put_page(ic->i_frag.f_page);
205 ic->i_frag.f_page = NULL;
206 ic->i_frag.f_offset = 0;
207 }
208
209 ret = 0;
210out:
211 return ret;
212}
213
214/*
215 * This tries to allocate and post unused work requests after making sure that
216 * they have all the allocations they need to queue received fragments into
217 * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc
218 * pairs don't go unmatched.
219 *
220 * -1 is returned if posting fails due to temporary resource exhaustion.
221 */
222int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
223 gfp_t page_gfp, int prefill)
224{
225 struct rds_ib_connection *ic = conn->c_transport_data;
226 struct rds_ib_recv_work *recv;
227 struct ib_recv_wr *failed_wr;
228 unsigned int posted = 0;
229 int ret = 0;
230 u32 pos;
231
232 while ((prefill || rds_conn_up(conn))
233 && rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
234 if (pos >= ic->i_recv_ring.w_nr) {
235 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
236 pos);
237 ret = -EINVAL;
238 break;
239 }
240
241 recv = &ic->i_recvs[pos];
242 ret = rds_ib_recv_refill_one(conn, recv, kptr_gfp, page_gfp);
243 if (ret) {
244 ret = -1;
245 break;
246 }
247
248 /* XXX when can this fail? */
249 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
250 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
251 recv->r_ibinc, recv->r_frag->f_page,
252 (long) recv->r_frag->f_mapped, ret);
253 if (ret) {
254 rds_ib_conn_error(conn, "recv post on "
255 "%pI4 returned %d, disconnecting and "
256 "reconnecting\n", &conn->c_faddr,
257 ret);
258 ret = -1;
259 break;
260 }
261
262 posted++;
263 }
264
265 /* We're doing flow control - update the window. */
266 if (ic->i_flowctl && posted)
267 rds_ib_advertise_credits(conn, posted);
268
269 if (ret)
270 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
271 return ret;
272}
273
274void rds_ib_inc_purge(struct rds_incoming *inc)
275{
276 struct rds_ib_incoming *ibinc;
277 struct rds_page_frag *frag;
278 struct rds_page_frag *pos;
279
280 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
281 rdsdebug("purging ibinc %p inc %p\n", ibinc, inc);
282
283 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
284 list_del_init(&frag->f_item);
285 rds_ib_frag_drop_page(frag);
286 rds_ib_frag_free(frag);
287 }
288}
289
290void rds_ib_inc_free(struct rds_incoming *inc)
291{
292 struct rds_ib_incoming *ibinc;
293
294 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
295
296 rds_ib_inc_purge(inc);
297 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
298 BUG_ON(!list_empty(&ibinc->ii_frags));
299 kmem_cache_free(rds_ib_incoming_slab, ibinc);
300 atomic_dec(&rds_ib_allocation);
301 BUG_ON(atomic_read(&rds_ib_allocation) < 0);
302}
303
304int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
305 size_t size)
306{
307 struct rds_ib_incoming *ibinc;
308 struct rds_page_frag *frag;
309 struct iovec *iov = first_iov;
310 unsigned long to_copy;
311 unsigned long frag_off = 0;
312 unsigned long iov_off = 0;
313 int copied = 0;
314 int ret;
315 u32 len;
316
317 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
318 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
319 len = be32_to_cpu(inc->i_hdr.h_len);
320
321 while (copied < size && copied < len) {
322 if (frag_off == RDS_FRAG_SIZE) {
323 frag = list_entry(frag->f_item.next,
324 struct rds_page_frag, f_item);
325 frag_off = 0;
326 }
327 while (iov_off == iov->iov_len) {
328 iov_off = 0;
329 iov++;
330 }
331
332 to_copy = min(iov->iov_len - iov_off, RDS_FRAG_SIZE - frag_off);
333 to_copy = min_t(size_t, to_copy, size - copied);
334 to_copy = min_t(unsigned long, to_copy, len - copied);
335
336 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag "
337 "[%p, %lu] + %lu\n",
338 to_copy, iov->iov_base, iov->iov_len, iov_off,
339 frag->f_page, frag->f_offset, frag_off);
340
341 /* XXX needs + offset for multiple recvs per page */
342 ret = rds_page_copy_to_user(frag->f_page,
343 frag->f_offset + frag_off,
344 iov->iov_base + iov_off,
345 to_copy);
346 if (ret) {
347 copied = ret;
348 break;
349 }
350
351 iov_off += to_copy;
352 frag_off += to_copy;
353 copied += to_copy;
354 }
355
356 return copied;
357}
358
359/* ic starts out kzalloc()ed */
360void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
361{
362 struct ib_send_wr *wr = &ic->i_ack_wr;
363 struct ib_sge *sge = &ic->i_ack_sge;
364
365 sge->addr = ic->i_ack_dma;
366 sge->length = sizeof(struct rds_header);
367 sge->lkey = ic->i_mr->lkey;
368
369 wr->sg_list = sge;
370 wr->num_sge = 1;
371 wr->opcode = IB_WR_SEND;
372 wr->wr_id = RDS_IB_ACK_WR_ID;
373 wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
374}
375
376/*
377 * You'd think that with reliable IB connections you wouldn't need to ack
378 * messages that have been received. The problem is that IB hardware generates
379 * an ack message before it has DMAed the message into memory. This creates a
380 * potential message loss if the HCA is disabled for any reason between when it
381 * sends the ack and before the message is DMAed and processed. This is only a
382 * potential issue if another HCA is available for fail-over.
383 *
384 * When the remote host receives our ack they'll free the sent message from
385 * their send queue. To decrease the latency of this we always send an ack
386 * immediately after we've received messages.
387 *
388 * For simplicity, we only have one ack in flight at a time. This puts
389 * pressure on senders to have deep enough send queues to absorb the latency of
390 * a single ack frame being in flight. This might not be good enough.
391 *
392 * This is implemented by have a long-lived send_wr and sge which point to a
393 * statically allocated ack frame. This ack wr does not fall under the ring
394 * accounting that the tx and rx wrs do. The QP attribute specifically makes
395 * room for it beyond the ring size. Send completion notices its special
396 * wr_id and avoids working with the ring in that case.
397 */
398static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
399 int ack_required)
400{
401 rds_ib_set_64bit(&ic->i_ack_next, seq);
402 if (ack_required) {
403 smp_mb__before_clear_bit();
404 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
405 }
406}
407
408static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
409{
410 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
411 smp_mb__after_clear_bit();
412
413 return ic->i_ack_next;
414}
415
416static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
417{
418 struct rds_header *hdr = ic->i_ack;
419 struct ib_send_wr *failed_wr;
420 u64 seq;
421 int ret;
422
423 seq = rds_ib_get_ack(ic);
424
425 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
426 rds_message_populate_header(hdr, 0, 0, 0);
427 hdr->h_ack = cpu_to_be64(seq);
428 hdr->h_credit = adv_credits;
429 rds_message_make_checksum(hdr);
430 ic->i_ack_queued = jiffies;
431
432 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr);
433 if (unlikely(ret)) {
434 /* Failed to send. Release the WR, and
435 * force another ACK.
436 */
437 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
438 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
439
440 rds_ib_stats_inc(s_ib_ack_send_failure);
441 /* Need to finesse this later. */
442 BUG();
443 } else
444 rds_ib_stats_inc(s_ib_ack_sent);
445}
446
447/*
448 * There are 3 ways of getting acknowledgements to the peer:
449 * 1. We call rds_ib_attempt_ack from the recv completion handler
450 * to send an ACK-only frame.
451 * However, there can be only one such frame in the send queue
452 * at any time, so we may have to postpone it.
453 * 2. When another (data) packet is transmitted while there's
454 * an ACK in the queue, we piggyback the ACK sequence number
455 * on the data packet.
456 * 3. If the ACK WR is done sending, we get called from the
457 * send queue completion handler, and check whether there's
458 * another ACK pending (postponed because the WR was on the
459 * queue). If so, we transmit it.
460 *
461 * We maintain 2 variables:
462 * - i_ack_flags, which keeps track of whether the ACK WR
463 * is currently in the send queue or not (IB_ACK_IN_FLIGHT)
464 * - i_ack_next, which is the last sequence number we received
465 *
466 * Potentially, send queue and receive queue handlers can run concurrently.
467 *
468 * Reconnecting complicates this picture just slightly. When we
469 * reconnect, we may be seeing duplicate packets. The peer
470 * is retransmitting them, because it hasn't seen an ACK for
471 * them. It is important that we ACK these.
472 *
473 * ACK mitigation adds a header flag "ACK_REQUIRED"; any packet with
474 * this flag set *MUST* be acknowledged immediately.
475 */
476
477/*
478 * When we get here, we're called from the recv queue handler.
479 * Check whether we ought to transmit an ACK.
480 */
481void rds_ib_attempt_ack(struct rds_ib_connection *ic)
482{
483 unsigned int adv_credits;
484
485 if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
486 return;
487
488 if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
489 rds_ib_stats_inc(s_ib_ack_send_delayed);
490 return;
491 }
492
493 /* Can we get a send credit? */
494 if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0)) {
495 rds_ib_stats_inc(s_ib_tx_throttle);
496 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
497 return;
498 }
499
500 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
501 rds_ib_send_ack(ic, adv_credits);
502}
503
504/*
505 * We get here from the send completion handler, when the
506 * adapter tells us the ACK frame was sent.
507 */
508void rds_ib_ack_send_complete(struct rds_ib_connection *ic)
509{
510 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
511 rds_ib_attempt_ack(ic);
512}
513
514/*
515 * This is called by the regular xmit code when it wants to piggyback
516 * an ACK on an outgoing frame.
517 */
518u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
519{
520 if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
521 rds_ib_stats_inc(s_ib_ack_send_piggybacked);
522 return rds_ib_get_ack(ic);
523}
524
525/*
526 * It's kind of lame that we're copying from the posted receive pages into
527 * long-lived bitmaps. We could have posted the bitmaps and rdma written into
528 * them. But receiving new congestion bitmaps should be a *rare* event, so
529 * hopefully we won't need to invest that complexity in making it more
530 * efficient. By copying we can share a simpler core with TCP which has to
531 * copy.
532 */
533static void rds_ib_cong_recv(struct rds_connection *conn,
534 struct rds_ib_incoming *ibinc)
535{
536 struct rds_cong_map *map;
537 unsigned int map_off;
538 unsigned int map_page;
539 struct rds_page_frag *frag;
540 unsigned long frag_off;
541 unsigned long to_copy;
542 unsigned long copied;
543 uint64_t uncongested = 0;
544 void *addr;
545
546 /* catch completely corrupt packets */
547 if (be32_to_cpu(ibinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
548 return;
549
550 map = conn->c_fcong;
551 map_page = 0;
552 map_off = 0;
553
554 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
555 frag_off = 0;
556
557 copied = 0;
558
559 while (copied < RDS_CONG_MAP_BYTES) {
560 uint64_t *src, *dst;
561 unsigned int k;
562
563 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
564 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
565
566 addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0);
567
568 src = addr + frag_off;
569 dst = (void *)map->m_page_addrs[map_page] + map_off;
570 for (k = 0; k < to_copy; k += 8) {
571 /* Record ports that became uncongested, ie
572 * bits that changed from 0 to 1. */
573 uncongested |= ~(*src) & *dst;
574 *dst++ = *src++;
575 }
576 kunmap_atomic(addr, KM_SOFTIRQ0);
577
578 copied += to_copy;
579
580 map_off += to_copy;
581 if (map_off == PAGE_SIZE) {
582 map_off = 0;
583 map_page++;
584 }
585
586 frag_off += to_copy;
587 if (frag_off == RDS_FRAG_SIZE) {
588 frag = list_entry(frag->f_item.next,
589 struct rds_page_frag, f_item);
590 frag_off = 0;
591 }
592 }
593
594 /* the congestion map is in little endian order */
595 uncongested = le64_to_cpu(uncongested);
596
597 rds_cong_map_updated(map, uncongested);
598}
599
600/*
601 * Rings are posted with all the allocations they'll need to queue the
602 * incoming message to the receiving socket so this can't fail.
603 * All fragments start with a header, so we can make sure we're not receiving
604 * garbage, and we can tell a small 8 byte fragment from an ACK frame.
605 */
606struct rds_ib_ack_state {
607 u64 ack_next;
608 u64 ack_recv;
609 unsigned int ack_required:1;
610 unsigned int ack_next_valid:1;
611 unsigned int ack_recv_valid:1;
612};
613
614static void rds_ib_process_recv(struct rds_connection *conn,
615 struct rds_ib_recv_work *recv, u32 byte_len,
616 struct rds_ib_ack_state *state)
617{
618 struct rds_ib_connection *ic = conn->c_transport_data;
619 struct rds_ib_incoming *ibinc = ic->i_ibinc;
620 struct rds_header *ihdr, *hdr;
621
622 /* XXX shut down the connection if port 0,0 are seen? */
623
624 rdsdebug("ic %p ibinc %p recv %p byte len %u\n", ic, ibinc, recv,
625 byte_len);
626
627 if (byte_len < sizeof(struct rds_header)) {
628 rds_ib_conn_error(conn, "incoming message "
629 "from %pI4 didn't inclue a "
630 "header, disconnecting and "
631 "reconnecting\n",
632 &conn->c_faddr);
633 return;
634 }
635 byte_len -= sizeof(struct rds_header);
636
637 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
638
639 /* Validate the checksum. */
640 if (!rds_message_verify_checksum(ihdr)) {
641 rds_ib_conn_error(conn, "incoming message "
642 "from %pI4 has corrupted header - "
643 "forcing a reconnect\n",
644 &conn->c_faddr);
645 rds_stats_inc(s_recv_drop_bad_checksum);
646 return;
647 }
648
649 /* Process the ACK sequence which comes with every packet */
650 state->ack_recv = be64_to_cpu(ihdr->h_ack);
651 state->ack_recv_valid = 1;
652
653 /* Process the credits update if there was one */
654 if (ihdr->h_credit)
655 rds_ib_send_add_credits(conn, ihdr->h_credit);
656
657 if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && byte_len == 0) {
658 /* This is an ACK-only packet. The fact that it gets
659 * special treatment here is that historically, ACKs
660 * were rather special beasts.
661 */
662 rds_ib_stats_inc(s_ib_ack_received);
663
664 /*
665 * Usually the frags make their way on to incs and are then freed as
666 * the inc is freed. We don't go that route, so we have to drop the
667 * page ref ourselves. We can't just leave the page on the recv
668 * because that confuses the dma mapping of pages and each recv's use
669 * of a partial page. We can leave the frag, though, it will be
670 * reused.
671 *
672 * FIXME: Fold this into the code path below.
673 */
674 rds_ib_frag_drop_page(recv->r_frag);
675 return;
676 }
677
678 /*
679 * If we don't already have an inc on the connection then this
680 * fragment has a header and starts a message.. copy its header
681 * into the inc and save the inc so we can hang upcoming fragments
682 * off its list.
683 */
684 if (ibinc == NULL) {
685 ibinc = recv->r_ibinc;
686 recv->r_ibinc = NULL;
687 ic->i_ibinc = ibinc;
688
689 hdr = &ibinc->ii_inc.i_hdr;
690 memcpy(hdr, ihdr, sizeof(*hdr));
691 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
692
693 rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
694 ic->i_recv_data_rem, hdr->h_flags);
695 } else {
696 hdr = &ibinc->ii_inc.i_hdr;
697 /* We can't just use memcmp here; fragments of a
698 * single message may carry different ACKs */
699 if (hdr->h_sequence != ihdr->h_sequence
700 || hdr->h_len != ihdr->h_len
701 || hdr->h_sport != ihdr->h_sport
702 || hdr->h_dport != ihdr->h_dport) {
703 rds_ib_conn_error(conn,
704 "fragment header mismatch; forcing reconnect\n");
705 return;
706 }
707 }
708
709 list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags);
710 recv->r_frag = NULL;
711
712 if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
713 ic->i_recv_data_rem -= RDS_FRAG_SIZE;
714 else {
715 ic->i_recv_data_rem = 0;
716 ic->i_ibinc = NULL;
717
718 if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
719 rds_ib_cong_recv(conn, ibinc);
720 else {
721 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
722 &ibinc->ii_inc, GFP_ATOMIC,
723 KM_SOFTIRQ0);
724 state->ack_next = be64_to_cpu(hdr->h_sequence);
725 state->ack_next_valid = 1;
726 }
727
728 /* Evaluate the ACK_REQUIRED flag *after* we received
729 * the complete frame, and after bumping the next_rx
730 * sequence. */
731 if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) {
732 rds_stats_inc(s_recv_ack_required);
733 state->ack_required = 1;
734 }
735
736 rds_inc_put(&ibinc->ii_inc);
737 }
738}
739
740/*
741 * Plucking the oldest entry from the ring can be done concurrently with
742 * the thread refilling the ring. Each ring operation is protected by
743 * spinlocks and the transient state of refilling doesn't change the
744 * recording of which entry is oldest.
745 *
746 * This relies on IB only calling one cq comp_handler for each cq so that
747 * there will only be one caller of rds_recv_incoming() per RDS connection.
748 */
749void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context)
750{
751 struct rds_connection *conn = context;
752 struct rds_ib_connection *ic = conn->c_transport_data;
753 struct ib_wc wc;
754 struct rds_ib_ack_state state = { 0, };
755 struct rds_ib_recv_work *recv;
756
757 rdsdebug("conn %p cq %p\n", conn, cq);
758
759 rds_ib_stats_inc(s_ib_rx_cq_call);
760
761 ib_req_notify_cq(cq, IB_CQ_SOLICITED);
762
763 while (ib_poll_cq(cq, 1, &wc) > 0) {
764 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
765 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
766 be32_to_cpu(wc.ex.imm_data));
767 rds_ib_stats_inc(s_ib_rx_cq_event);
768
769 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
770
771 rds_ib_recv_unmap_page(ic, recv);
772
773 /*
774 * Also process recvs in connecting state because it is possible
775 * to get a recv completion _before_ the rdmacm ESTABLISHED
776 * event is processed.
777 */
778 if (rds_conn_up(conn) || rds_conn_connecting(conn)) {
779 /* We expect errors as the qp is drained during shutdown */
780 if (wc.status == IB_WC_SUCCESS) {
781 rds_ib_process_recv(conn, recv, wc.byte_len, &state);
782 } else {
783 rds_ib_conn_error(conn, "recv completion on "
784 "%pI4 had status %u, disconnecting and "
785 "reconnecting\n", &conn->c_faddr,
786 wc.status);
787 }
788 }
789
790 rds_ib_ring_free(&ic->i_recv_ring, 1);
791 }
792
793 if (state.ack_next_valid)
794 rds_ib_set_ack(ic, state.ack_next, state.ack_required);
795 if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
796 rds_send_drop_acked(conn, state.ack_recv, NULL);
797 ic->i_ack_recv = state.ack_recv;
798 }
799 if (rds_conn_up(conn))
800 rds_ib_attempt_ack(ic);
801
802 /* If we ever end up with a really empty receive ring, we're
803 * in deep trouble, as the sender will definitely see RNR
804 * timeouts. */
805 if (rds_ib_ring_empty(&ic->i_recv_ring))
806 rds_ib_stats_inc(s_ib_rx_ring_empty);
807
808 /*
809 * If the ring is running low, then schedule the thread to refill.
810 */
811 if (rds_ib_ring_low(&ic->i_recv_ring))
812 queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
813}
814
815int rds_ib_recv(struct rds_connection *conn)
816{
817 struct rds_ib_connection *ic = conn->c_transport_data;
818 int ret = 0;
819
820 rdsdebug("conn %p\n", conn);
821
822 /*
823 * If we get a temporary posting failure in this context then
824 * we're really low and we want the caller to back off for a bit.
825 */
826 mutex_lock(&ic->i_recv_mutex);
827 if (rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0))
828 ret = -ENOMEM;
829 else
830 rds_ib_stats_inc(s_ib_rx_refill_from_thread);
831 mutex_unlock(&ic->i_recv_mutex);
832
833 if (rds_conn_up(conn))
834 rds_ib_attempt_ack(ic);
835
836 return ret;
837}
838
839int __init rds_ib_recv_init(void)
840{
841 struct sysinfo si;
842 int ret = -ENOMEM;
843
844 /* Default to 30% of all available RAM for recv memory */
845 si_meminfo(&si);
846 rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
847
848 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
849 sizeof(struct rds_ib_incoming),
850 0, 0, NULL);
851 if (rds_ib_incoming_slab == NULL)
852 goto out;
853
854 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
855 sizeof(struct rds_page_frag),
856 0, 0, NULL);
857 if (rds_ib_frag_slab == NULL)
858 kmem_cache_destroy(rds_ib_incoming_slab);
859 else
860 ret = 0;
861out:
862 return ret;
863}
864
865void rds_ib_recv_exit(void)
866{
867 kmem_cache_destroy(rds_ib_incoming_slab);
868 kmem_cache_destroy(rds_ib_frag_slab);
869}
diff --git a/net/rds/ib_ring.c b/net/rds/ib_ring.c
new file mode 100644
index 000000000000..99a6ccae964c
--- /dev/null
+++ b/net/rds/ib_ring.c
@@ -0,0 +1,168 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34
35#include "rds.h"
36#include "ib.h"
37
38/*
39 * Locking for IB rings.
40 * We assume that allocation is always protected by a mutex
41 * in the caller (this is a valid assumption for the current
42 * implementation).
43 *
44 * Freeing always happens in an interrupt, and hence only
45 * races with allocations, but not with other free()s.
46 *
47 * The interaction between allocation and freeing is that
48 * the alloc code has to determine the number of free entries.
49 * To this end, we maintain two counters; an allocation counter
50 * and a free counter. Both are allowed to run freely, and wrap
51 * around.
52 * The number of used entries is always (alloc_ctr - free_ctr) % NR.
53 *
54 * The current implementation makes free_ctr atomic. When the
55 * caller finds an allocation fails, it should set an "alloc fail"
56 * bit and retry the allocation. The "alloc fail" bit essentially tells
57 * the CQ completion handlers to wake it up after freeing some
58 * more entries.
59 */
60
61/*
62 * This only happens on shutdown.
63 */
64DECLARE_WAIT_QUEUE_HEAD(rds_ib_ring_empty_wait);
65
66void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr)
67{
68 memset(ring, 0, sizeof(*ring));
69 ring->w_nr = nr;
70 rdsdebug("ring %p nr %u\n", ring, ring->w_nr);
71}
72
73static inline u32 __rds_ib_ring_used(struct rds_ib_work_ring *ring)
74{
75 u32 diff;
76
77 /* This assumes that atomic_t has at least as many bits as u32 */
78 diff = ring->w_alloc_ctr - (u32) atomic_read(&ring->w_free_ctr);
79 BUG_ON(diff > ring->w_nr);
80
81 return diff;
82}
83
84void rds_ib_ring_resize(struct rds_ib_work_ring *ring, u32 nr)
85{
86 /* We only ever get called from the connection setup code,
87 * prior to creating the QP. */
88 BUG_ON(__rds_ib_ring_used(ring));
89 ring->w_nr = nr;
90}
91
92static int __rds_ib_ring_empty(struct rds_ib_work_ring *ring)
93{
94 return __rds_ib_ring_used(ring) == 0;
95}
96
97u32 rds_ib_ring_alloc(struct rds_ib_work_ring *ring, u32 val, u32 *pos)
98{
99 u32 ret = 0, avail;
100
101 avail = ring->w_nr - __rds_ib_ring_used(ring);
102
103 rdsdebug("ring %p val %u next %u free %u\n", ring, val,
104 ring->w_alloc_ptr, avail);
105
106 if (val && avail) {
107 ret = min(val, avail);
108 *pos = ring->w_alloc_ptr;
109
110 ring->w_alloc_ptr = (ring->w_alloc_ptr + ret) % ring->w_nr;
111 ring->w_alloc_ctr += ret;
112 }
113
114 return ret;
115}
116
117void rds_ib_ring_free(struct rds_ib_work_ring *ring, u32 val)
118{
119 ring->w_free_ptr = (ring->w_free_ptr + val) % ring->w_nr;
120 atomic_add(val, &ring->w_free_ctr);
121
122 if (__rds_ib_ring_empty(ring) &&
123 waitqueue_active(&rds_ib_ring_empty_wait))
124 wake_up(&rds_ib_ring_empty_wait);
125}
126
127void rds_ib_ring_unalloc(struct rds_ib_work_ring *ring, u32 val)
128{
129 ring->w_alloc_ptr = (ring->w_alloc_ptr - val) % ring->w_nr;
130 ring->w_alloc_ctr -= val;
131}
132
133int rds_ib_ring_empty(struct rds_ib_work_ring *ring)
134{
135 return __rds_ib_ring_empty(ring);
136}
137
138int rds_ib_ring_low(struct rds_ib_work_ring *ring)
139{
140 return __rds_ib_ring_used(ring) <= (ring->w_nr >> 2);
141}
142
143/*
144 * returns the oldest alloced ring entry. This will be the next one
145 * freed. This can't be called if there are none allocated.
146 */
147u32 rds_ib_ring_oldest(struct rds_ib_work_ring *ring)
148{
149 return ring->w_free_ptr;
150}
151
152/*
153 * returns the number of completed work requests.
154 */
155
156u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest)
157{
158 u32 ret;
159
160 if (oldest <= (unsigned long long)wr_id)
161 ret = (unsigned long long)wr_id - oldest + 1;
162 else
163 ret = ring->w_nr - oldest + (unsigned long long)wr_id + 1;
164
165 rdsdebug("ring %p ret %u wr_id %u oldest %u\n", ring, ret,
166 wr_id, oldest);
167 return ret;
168}
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
new file mode 100644
index 000000000000..cb6c52cb1c4c
--- /dev/null
+++ b/net/rds/ib_send.c
@@ -0,0 +1,874 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/device.h>
36#include <linux/dmapool.h>
37
38#include "rds.h"
39#include "rdma.h"
40#include "ib.h"
41
42static void rds_ib_send_rdma_complete(struct rds_message *rm,
43 int wc_status)
44{
45 int notify_status;
46
47 switch (wc_status) {
48 case IB_WC_WR_FLUSH_ERR:
49 return;
50
51 case IB_WC_SUCCESS:
52 notify_status = RDS_RDMA_SUCCESS;
53 break;
54
55 case IB_WC_REM_ACCESS_ERR:
56 notify_status = RDS_RDMA_REMOTE_ERROR;
57 break;
58
59 default:
60 notify_status = RDS_RDMA_OTHER_ERROR;
61 break;
62 }
63 rds_rdma_send_complete(rm, notify_status);
64}
65
66static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
67 struct rds_rdma_op *op)
68{
69 if (op->r_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0;
74 }
75}
76
77static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
78 struct rds_ib_send_work *send,
79 int wc_status)
80{
81 struct rds_message *rm = send->s_rm;
82
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
84
85 ib_dma_unmap_sg(ic->i_cm_id->device,
86 rm->m_sg, rm->m_nents,
87 DMA_TO_DEVICE);
88
89 if (rm->m_rdma_op != NULL) {
90 rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
91
92 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics:
94 * 1. Notify when we received the ACK on the RDS message
95 * that was queued with the RDMA. This provides reliable
96 * notification of RDMA status at the expense of a one-way
97 * packet delay.
98 * 2. Notify when the IB stack gives us the completion event for
99 * the RDMA operation.
100 * 3. Notify when the IB stack gives us the completion event for
101 * the accompanying RDS messages.
102 * Here, we implement approach #3. To implement approach #2,
103 * call rds_rdma_send_complete from the cq_handler. To implement #1,
104 * don't call rds_rdma_send_complete at all, and fall back to the notify
105 * handling in the ACK processing code.
106 *
107 * Note: There's no need to explicitly sync any RDMA buffers using
108 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
109 * operation itself unmapped the RDMA buffers, which takes care
110 * of synching.
111 */
112 rds_ib_send_rdma_complete(rm, wc_status);
113
114 if (rm->m_rdma_op->r_write)
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes);
116 else
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes);
118 }
119
120 /* If anyone waited for this message to get flushed out, wake
121 * them up now */
122 rds_message_unmapped(rm);
123
124 rds_message_put(rm);
125 send->s_rm = NULL;
126}
127
128void rds_ib_send_init_ring(struct rds_ib_connection *ic)
129{
130 struct rds_ib_send_work *send;
131 u32 i;
132
133 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
134 struct ib_sge *sge;
135
136 send->s_rm = NULL;
137 send->s_op = NULL;
138
139 send->s_wr.wr_id = i;
140 send->s_wr.sg_list = send->s_sge;
141 send->s_wr.num_sge = 1;
142 send->s_wr.opcode = IB_WR_SEND;
143 send->s_wr.send_flags = 0;
144 send->s_wr.ex.imm_data = 0;
145
146 sge = rds_ib_data_sge(ic, send->s_sge);
147 sge->lkey = ic->i_mr->lkey;
148
149 sge = rds_ib_header_sge(ic, send->s_sge);
150 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
151 sge->length = sizeof(struct rds_header);
152 sge->lkey = ic->i_mr->lkey;
153 }
154}
155
156void rds_ib_send_clear_ring(struct rds_ib_connection *ic)
157{
158 struct rds_ib_send_work *send;
159 u32 i;
160
161 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
162 if (send->s_wr.opcode == 0xdead)
163 continue;
164 if (send->s_rm)
165 rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
166 if (send->s_op)
167 rds_ib_send_unmap_rdma(ic, send->s_op);
168 }
169}
170
171/*
172 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
173 * operations performed in the send path. As the sender allocs and potentially
174 * unallocs the next free entry in the ring it doesn't alter which is
175 * the next to be freed, which is what this is concerned with.
176 */
177void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
178{
179 struct rds_connection *conn = context;
180 struct rds_ib_connection *ic = conn->c_transport_data;
181 struct ib_wc wc;
182 struct rds_ib_send_work *send;
183 u32 completed;
184 u32 oldest;
185 u32 i = 0;
186 int ret;
187
188 rdsdebug("cq %p conn %p\n", cq, conn);
189 rds_ib_stats_inc(s_ib_tx_cq_call);
190 ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
191 if (ret)
192 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
193
194 while (ib_poll_cq(cq, 1, &wc) > 0) {
195 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
196 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
197 be32_to_cpu(wc.ex.imm_data));
198 rds_ib_stats_inc(s_ib_tx_cq_event);
199
200 if (wc.wr_id == RDS_IB_ACK_WR_ID) {
201 if (ic->i_ack_queued + HZ/2 < jiffies)
202 rds_ib_stats_inc(s_ib_tx_stalled);
203 rds_ib_ack_send_complete(ic);
204 continue;
205 }
206
207 oldest = rds_ib_ring_oldest(&ic->i_send_ring);
208
209 completed = rds_ib_ring_completed(&ic->i_send_ring, wc.wr_id, oldest);
210
211 for (i = 0; i < completed; i++) {
212 send = &ic->i_sends[oldest];
213
214 /* In the error case, wc.opcode sometimes contains garbage */
215 switch (send->s_wr.opcode) {
216 case IB_WR_SEND:
217 if (send->s_rm)
218 rds_ib_send_unmap_rm(ic, send, wc.status);
219 break;
220 case IB_WR_RDMA_WRITE:
221 case IB_WR_RDMA_READ:
222 /* Nothing to be done - the SG list will be unmapped
223 * when the SEND completes. */
224 break;
225 default:
226 if (printk_ratelimit())
227 printk(KERN_NOTICE
228 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
229 __func__, send->s_wr.opcode);
230 break;
231 }
232
233 send->s_wr.opcode = 0xdead;
234 send->s_wr.num_sge = 1;
235 if (send->s_queued + HZ/2 < jiffies)
236 rds_ib_stats_inc(s_ib_tx_stalled);
237
238 /* If a RDMA operation produced an error, signal this right
239 * away. If we don't, the subsequent SEND that goes with this
240 * RDMA will be canceled with ERR_WFLUSH, and the application
241 * never learn that the RDMA failed. */
242 if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) {
243 struct rds_message *rm;
244
245 rm = rds_send_get_message(conn, send->s_op);
246 if (rm)
247 rds_ib_send_rdma_complete(rm, wc.status);
248 }
249
250 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
251 }
252
253 rds_ib_ring_free(&ic->i_send_ring, completed);
254
255 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags)
256 || test_bit(0, &conn->c_map_queued))
257 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
258
259 /* We expect errors as the qp is drained during shutdown */
260 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
261 rds_ib_conn_error(conn,
262 "send completion on %pI4 "
263 "had status %u, disconnecting and reconnecting\n",
264 &conn->c_faddr, wc.status);
265 }
266 }
267}
268
269/*
270 * This is the main function for allocating credits when sending
271 * messages.
272 *
273 * Conceptually, we have two counters:
274 * - send credits: this tells us how many WRs we're allowed
275 * to submit without overruning the reciever's queue. For
276 * each SEND WR we post, we decrement this by one.
277 *
278 * - posted credits: this tells us how many WRs we recently
279 * posted to the receive queue. This value is transferred
280 * to the peer as a "credit update" in a RDS header field.
281 * Every time we transmit credits to the peer, we subtract
282 * the amount of transferred credits from this counter.
283 *
284 * It is essential that we avoid situations where both sides have
285 * exhausted their send credits, and are unable to send new credits
286 * to the peer. We achieve this by requiring that we send at least
287 * one credit update to the peer before exhausting our credits.
288 * When new credits arrive, we subtract one credit that is withheld
289 * until we've posted new buffers and are ready to transmit these
290 * credits (see rds_ib_send_add_credits below).
291 *
292 * The RDS send code is essentially single-threaded; rds_send_xmit
293 * grabs c_send_lock to ensure exclusive access to the send ring.
294 * However, the ACK sending code is independent and can race with
295 * message SENDs.
296 *
297 * In the send path, we need to update the counters for send credits
298 * and the counter of posted buffers atomically - when we use the
299 * last available credit, we cannot allow another thread to race us
300 * and grab the posted credits counter. Hence, we have to use a
301 * spinlock to protect the credit counter, or use atomics.
302 *
303 * Spinlocks shared between the send and the receive path are bad,
304 * because they create unnecessary delays. An early implementation
305 * using a spinlock showed a 5% degradation in throughput at some
306 * loads.
307 *
308 * This implementation avoids spinlocks completely, putting both
309 * counters into a single atomic, and updating that atomic using
310 * atomic_add (in the receive path, when receiving fresh credits),
311 * and using atomic_cmpxchg when updating the two counters.
312 */
313int rds_ib_send_grab_credits(struct rds_ib_connection *ic,
314 u32 wanted, u32 *adv_credits, int need_posted)
315{
316 unsigned int avail, posted, got = 0, advertise;
317 long oldval, newval;
318
319 *adv_credits = 0;
320 if (!ic->i_flowctl)
321 return wanted;
322
323try_again:
324 advertise = 0;
325 oldval = newval = atomic_read(&ic->i_credits);
326 posted = IB_GET_POST_CREDITS(oldval);
327 avail = IB_GET_SEND_CREDITS(oldval);
328
329 rdsdebug("rds_ib_send_grab_credits(%u): credits=%u posted=%u\n",
330 wanted, avail, posted);
331
332 /* The last credit must be used to send a credit update. */
333 if (avail && !posted)
334 avail--;
335
336 if (avail < wanted) {
337 struct rds_connection *conn = ic->i_cm_id->context;
338
339 /* Oops, there aren't that many credits left! */
340 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
341 got = avail;
342 } else {
343 /* Sometimes you get what you want, lalala. */
344 got = wanted;
345 }
346 newval -= IB_SET_SEND_CREDITS(got);
347
348 /*
349 * If need_posted is non-zero, then the caller wants
350 * the posted regardless of whether any send credits are
351 * available.
352 */
353 if (posted && (got || need_posted)) {
354 advertise = min_t(unsigned int, posted, RDS_MAX_ADV_CREDIT);
355 newval -= IB_SET_POST_CREDITS(advertise);
356 }
357
358 /* Finally bill everything */
359 if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval)
360 goto try_again;
361
362 *adv_credits = advertise;
363 return got;
364}
365
366void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits)
367{
368 struct rds_ib_connection *ic = conn->c_transport_data;
369
370 if (credits == 0)
371 return;
372
373 rdsdebug("rds_ib_send_add_credits(%u): current=%u%s\n",
374 credits,
375 IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
376 test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
377
378 atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits);
379 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
380 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
381
382 WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384);
383
384 rds_ib_stats_inc(s_ib_rx_credit_updates);
385}
386
387void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted)
388{
389 struct rds_ib_connection *ic = conn->c_transport_data;
390
391 if (posted == 0)
392 return;
393
394 atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits);
395
396 /* Decide whether to send an update to the peer now.
397 * If we would send a credit update for every single buffer we
398 * post, we would end up with an ACK storm (ACK arrives,
399 * consumes buffer, we refill the ring, send ACK to remote
400 * advertising the newly posted buffer... ad inf)
401 *
402 * Performance pretty much depends on how often we send
403 * credit updates - too frequent updates mean lots of ACKs.
404 * Too infrequent updates, and the peer will run out of
405 * credits and has to throttle.
406 * For the time being, 16 seems to be a good compromise.
407 */
408 if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16)
409 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
410}
411
412static inline void
413rds_ib_xmit_populate_wr(struct rds_ib_connection *ic,
414 struct rds_ib_send_work *send, unsigned int pos,
415 unsigned long buffer, unsigned int length,
416 int send_flags)
417{
418 struct ib_sge *sge;
419
420 WARN_ON(pos != send - ic->i_sends);
421
422 send->s_wr.send_flags = send_flags;
423 send->s_wr.opcode = IB_WR_SEND;
424 send->s_wr.num_sge = 2;
425 send->s_wr.next = NULL;
426 send->s_queued = jiffies;
427 send->s_op = NULL;
428
429 if (length != 0) {
430 sge = rds_ib_data_sge(ic, send->s_sge);
431 sge->addr = buffer;
432 sge->length = length;
433 sge->lkey = ic->i_mr->lkey;
434
435 sge = rds_ib_header_sge(ic, send->s_sge);
436 } else {
437 /* We're sending a packet with no payload. There is only
438 * one SGE */
439 send->s_wr.num_sge = 1;
440 sge = &send->s_sge[0];
441 }
442
443 sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header));
444 sge->length = sizeof(struct rds_header);
445 sge->lkey = ic->i_mr->lkey;
446}
447
448/*
449 * This can be called multiple times for a given message. The first time
450 * we see a message we map its scatterlist into the IB device so that
451 * we can provide that mapped address to the IB scatter gather entries
452 * in the IB work requests. We translate the scatterlist into a series
453 * of work requests that fragment the message. These work requests complete
454 * in order so we pass ownership of the message to the completion handler
455 * once we send the final fragment.
456 *
457 * The RDS core uses the c_send_lock to only enter this function once
458 * per connection. This makes sure that the tx ring alloc/unalloc pairs
459 * don't get out of sync and confuse the ring.
460 */
461int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
462 unsigned int hdr_off, unsigned int sg, unsigned int off)
463{
464 struct rds_ib_connection *ic = conn->c_transport_data;
465 struct ib_device *dev = ic->i_cm_id->device;
466 struct rds_ib_send_work *send = NULL;
467 struct rds_ib_send_work *first;
468 struct rds_ib_send_work *prev;
469 struct ib_send_wr *failed_wr;
470 struct scatterlist *scat;
471 u32 pos;
472 u32 i;
473 u32 work_alloc;
474 u32 credit_alloc;
475 u32 posted;
476 u32 adv_credits = 0;
477 int send_flags = 0;
478 int sent;
479 int ret;
480 int flow_controlled = 0;
481
482 BUG_ON(off % RDS_FRAG_SIZE);
483 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
484
485 /* FIXME we may overallocate here */
486 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
487 i = 1;
488 else
489 i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
490
491 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
492 if (work_alloc == 0) {
493 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
494 rds_ib_stats_inc(s_ib_tx_ring_full);
495 ret = -ENOMEM;
496 goto out;
497 }
498
499 credit_alloc = work_alloc;
500 if (ic->i_flowctl) {
501 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0);
502 adv_credits += posted;
503 if (credit_alloc < work_alloc) {
504 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
505 work_alloc = credit_alloc;
506 flow_controlled++;
507 }
508 if (work_alloc == 0) {
509 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
510 rds_ib_stats_inc(s_ib_tx_throttle);
511 ret = -ENOMEM;
512 goto out;
513 }
514 }
515
516 /* map the message the first time we see it */
517 if (ic->i_rm == NULL) {
518 /*
519 printk(KERN_NOTICE "rds_ib_xmit prep msg dport=%u flags=0x%x len=%d\n",
520 be16_to_cpu(rm->m_inc.i_hdr.h_dport),
521 rm->m_inc.i_hdr.h_flags,
522 be32_to_cpu(rm->m_inc.i_hdr.h_len));
523 */
524 if (rm->m_nents) {
525 rm->m_count = ib_dma_map_sg(dev,
526 rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
527 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
528 if (rm->m_count == 0) {
529 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
530 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
531 ret = -ENOMEM; /* XXX ? */
532 goto out;
533 }
534 } else {
535 rm->m_count = 0;
536 }
537
538 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
539 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes;
540 rds_message_addref(rm);
541 ic->i_rm = rm;
542
543 /* Finalize the header */
544 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
545 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED;
546 if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
547 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
548
549 /* If it has a RDMA op, tell the peer we did it. This is
550 * used by the peer to release use-once RDMA MRs. */
551 if (rm->m_rdma_op) {
552 struct rds_ext_header_rdma ext_hdr;
553
554 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key);
555 rds_message_add_extension(&rm->m_inc.i_hdr,
556 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
557 }
558 if (rm->m_rdma_cookie) {
559 rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
560 rds_rdma_cookie_key(rm->m_rdma_cookie),
561 rds_rdma_cookie_offset(rm->m_rdma_cookie));
562 }
563
564 /* Note - rds_ib_piggyb_ack clears the ACK_REQUIRED bit, so
565 * we should not do this unless we have a chance of at least
566 * sticking the header into the send ring. Which is why we
567 * should call rds_ib_ring_alloc first. */
568 rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_ib_piggyb_ack(ic));
569 rds_message_make_checksum(&rm->m_inc.i_hdr);
570
571 /*
572 * Update adv_credits since we reset the ACK_REQUIRED bit.
573 */
574 rds_ib_send_grab_credits(ic, 0, &posted, 1);
575 adv_credits += posted;
576 BUG_ON(adv_credits > 255);
577 } else if (ic->i_rm != rm)
578 BUG();
579
580 send = &ic->i_sends[pos];
581 first = send;
582 prev = NULL;
583 scat = &rm->m_sg[sg];
584 sent = 0;
585 i = 0;
586
587 /* Sometimes you want to put a fence between an RDMA
588 * READ and the following SEND.
589 * We could either do this all the time
590 * or when requested by the user. Right now, we let
591 * the application choose.
592 */
593 if (rm->m_rdma_op && rm->m_rdma_op->r_fence)
594 send_flags = IB_SEND_FENCE;
595
596 /*
597 * We could be copying the header into the unused tail of the page.
598 * That would need to be changed in the future when those pages might
599 * be mapped userspace pages or page cache pages. So instead we always
600 * use a second sge and our long-lived ring of mapped headers. We send
601 * the header after the data so that the data payload can be aligned on
602 * the receiver.
603 */
604
605 /* handle a 0-len message */
606 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) {
607 rds_ib_xmit_populate_wr(ic, send, pos, 0, 0, send_flags);
608 goto add_header;
609 }
610
611 /* if there's data reference it with a chain of work reqs */
612 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
613 unsigned int len;
614
615 send = &ic->i_sends[pos];
616
617 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off);
618 rds_ib_xmit_populate_wr(ic, send, pos,
619 ib_sg_dma_address(dev, scat) + off, len,
620 send_flags);
621
622 /*
623 * We want to delay signaling completions just enough to get
624 * the batching benefits but not so much that we create dead time
625 * on the wire.
626 */
627 if (ic->i_unsignaled_wrs-- == 0) {
628 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
629 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
630 }
631
632 ic->i_unsignaled_bytes -= len;
633 if (ic->i_unsignaled_bytes <= 0) {
634 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes;
635 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
636 }
637
638 /*
639 * Always signal the last one if we're stopping due to flow control.
640 */
641 if (flow_controlled && i == (work_alloc-1))
642 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
643
644 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
645 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
646
647 sent += len;
648 off += len;
649 if (off == ib_sg_dma_len(dev, scat)) {
650 scat++;
651 off = 0;
652 }
653
654add_header:
655 /* Tack on the header after the data. The header SGE should already
656 * have been set up to point to the right header buffer. */
657 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
658
659 if (0) {
660 struct rds_header *hdr = &ic->i_send_hdrs[pos];
661
662 printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n",
663 be16_to_cpu(hdr->h_dport),
664 hdr->h_flags,
665 be32_to_cpu(hdr->h_len));
666 }
667 if (adv_credits) {
668 struct rds_header *hdr = &ic->i_send_hdrs[pos];
669
670 /* add credit and redo the header checksum */
671 hdr->h_credit = adv_credits;
672 rds_message_make_checksum(hdr);
673 adv_credits = 0;
674 rds_ib_stats_inc(s_ib_tx_credit_updates);
675 }
676
677 if (prev)
678 prev->s_wr.next = &send->s_wr;
679 prev = send;
680
681 pos = (pos + 1) % ic->i_send_ring.w_nr;
682 }
683
684 /* Account the RDS header in the number of bytes we sent, but just once.
685 * The caller has no concept of fragmentation. */
686 if (hdr_off == 0)
687 sent += sizeof(struct rds_header);
688
689 /* if we finished the message then send completion owns it */
690 if (scat == &rm->m_sg[rm->m_count]) {
691 prev->s_rm = ic->i_rm;
692 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
693 ic->i_rm = NULL;
694 }
695
696 if (i < work_alloc) {
697 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
698 work_alloc = i;
699 }
700 if (ic->i_flowctl && i < credit_alloc)
701 rds_ib_send_add_credits(conn, credit_alloc - i);
702
703 /* XXX need to worry about failed_wr and partial sends. */
704 failed_wr = &first->s_wr;
705 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
706 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
707 first, &first->s_wr, ret, failed_wr);
708 BUG_ON(failed_wr != &first->s_wr);
709 if (ret) {
710 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 "
711 "returned %d\n", &conn->c_faddr, ret);
712 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
713 if (prev->s_rm) {
714 ic->i_rm = prev->s_rm;
715 prev->s_rm = NULL;
716 }
717 /* Finesse this later */
718 BUG();
719 goto out;
720 }
721
722 ret = sent;
723out:
724 BUG_ON(adv_credits);
725 return ret;
726}
727
728int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
729{
730 struct rds_ib_connection *ic = conn->c_transport_data;
731 struct rds_ib_send_work *send = NULL;
732 struct rds_ib_send_work *first;
733 struct rds_ib_send_work *prev;
734 struct ib_send_wr *failed_wr;
735 struct rds_ib_device *rds_ibdev;
736 struct scatterlist *scat;
737 unsigned long len;
738 u64 remote_addr = op->r_remote_addr;
739 u32 pos;
740 u32 work_alloc;
741 u32 i;
742 u32 j;
743 int sent;
744 int ret;
745 int num_sge;
746
747 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
748
749 /* map the message the first time we see it */
750 if (!op->r_mapped) {
751 op->r_count = ib_dma_map_sg(ic->i_cm_id->device,
752 op->r_sg, op->r_nents, (op->r_write) ?
753 DMA_TO_DEVICE : DMA_FROM_DEVICE);
754 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count);
755 if (op->r_count == 0) {
756 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
757 ret = -ENOMEM; /* XXX ? */
758 goto out;
759 }
760
761 op->r_mapped = 1;
762 }
763
764 /*
765 * Instead of knowing how to return a partial rdma read/write we insist that there
766 * be enough work requests to send the entire message.
767 */
768 i = ceil(op->r_count, rds_ibdev->max_sge);
769
770 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
771 if (work_alloc != i) {
772 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
773 rds_ib_stats_inc(s_ib_tx_ring_full);
774 ret = -ENOMEM;
775 goto out;
776 }
777
778 send = &ic->i_sends[pos];
779 first = send;
780 prev = NULL;
781 scat = &op->r_sg[0];
782 sent = 0;
783 num_sge = op->r_count;
784
785 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) {
786 send->s_wr.send_flags = 0;
787 send->s_queued = jiffies;
788 /*
789 * We want to delay signaling completions just enough to get
790 * the batching benefits but not so much that we create dead time on the wire.
791 */
792 if (ic->i_unsignaled_wrs-- == 0) {
793 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
794 send->s_wr.send_flags = IB_SEND_SIGNALED;
795 }
796
797 send->s_wr.opcode = op->r_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
798 send->s_wr.wr.rdma.remote_addr = remote_addr;
799 send->s_wr.wr.rdma.rkey = op->r_key;
800 send->s_op = op;
801
802 if (num_sge > rds_ibdev->max_sge) {
803 send->s_wr.num_sge = rds_ibdev->max_sge;
804 num_sge -= rds_ibdev->max_sge;
805 } else {
806 send->s_wr.num_sge = num_sge;
807 }
808
809 send->s_wr.next = NULL;
810
811 if (prev)
812 prev->s_wr.next = &send->s_wr;
813
814 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) {
815 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
816 send->s_sge[j].addr =
817 ib_sg_dma_address(ic->i_cm_id->device, scat);
818 send->s_sge[j].length = len;
819 send->s_sge[j].lkey = ic->i_mr->lkey;
820
821 sent += len;
822 rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
823
824 remote_addr += len;
825 scat++;
826 }
827
828 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
829 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
830
831 prev = send;
832 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
833 send = ic->i_sends;
834 }
835
836 /* if we finished the message then send completion owns it */
837 if (scat == &op->r_sg[op->r_count])
838 prev->s_wr.send_flags = IB_SEND_SIGNALED;
839
840 if (i < work_alloc) {
841 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
842 work_alloc = i;
843 }
844
845 failed_wr = &first->s_wr;
846 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
847 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
848 first, &first->s_wr, ret, failed_wr);
849 BUG_ON(failed_wr != &first->s_wr);
850 if (ret) {
851 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
852 "returned %d\n", &conn->c_faddr, ret);
853 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
854 goto out;
855 }
856
857 if (unlikely(failed_wr != &first->s_wr)) {
858 printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
859 BUG_ON(failed_wr != &first->s_wr);
860 }
861
862
863out:
864 return ret;
865}
866
867void rds_ib_xmit_complete(struct rds_connection *conn)
868{
869 struct rds_ib_connection *ic = conn->c_transport_data;
870
871 /* We may have a pending ACK or window update we were unable
872 * to send previously (due to flow control). Try again. */
873 rds_ib_attempt_ack(ic);
874}
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
new file mode 100644
index 000000000000..02e3e3d50d4a
--- /dev/null
+++ b/net/rds/ib_stats.c
@@ -0,0 +1,95 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/percpu.h>
34#include <linux/seq_file.h>
35#include <linux/proc_fs.h>
36
37#include "rds.h"
38#include "ib.h"
39
40DEFINE_PER_CPU(struct rds_ib_statistics, rds_ib_stats) ____cacheline_aligned;
41
42static char *rds_ib_stat_names[] = {
43 "ib_connect_raced",
44 "ib_listen_closed_stale",
45 "ib_tx_cq_call",
46 "ib_tx_cq_event",
47 "ib_tx_ring_full",
48 "ib_tx_throttle",
49 "ib_tx_sg_mapping_failure",
50 "ib_tx_stalled",
51 "ib_tx_credit_updates",
52 "ib_rx_cq_call",
53 "ib_rx_cq_event",
54 "ib_rx_ring_empty",
55 "ib_rx_refill_from_cq",
56 "ib_rx_refill_from_thread",
57 "ib_rx_alloc_limit",
58 "ib_rx_credit_updates",
59 "ib_ack_sent",
60 "ib_ack_send_failure",
61 "ib_ack_send_delayed",
62 "ib_ack_send_piggybacked",
63 "ib_ack_received",
64 "ib_rdma_mr_alloc",
65 "ib_rdma_mr_free",
66 "ib_rdma_mr_used",
67 "ib_rdma_mr_pool_flush",
68 "ib_rdma_mr_pool_wait",
69 "ib_rdma_mr_pool_depleted",
70};
71
72unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
73 unsigned int avail)
74{
75 struct rds_ib_statistics stats = {0, };
76 uint64_t *src;
77 uint64_t *sum;
78 size_t i;
79 int cpu;
80
81 if (avail < ARRAY_SIZE(rds_ib_stat_names))
82 goto out;
83
84 for_each_online_cpu(cpu) {
85 src = (uint64_t *)&(per_cpu(rds_ib_stats, cpu));
86 sum = (uint64_t *)&stats;
87 for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++)
88 *(sum++) += *(src++);
89 }
90
91 rds_stats_info_copy(iter, (uint64_t *)&stats, rds_ib_stat_names,
92 ARRAY_SIZE(rds_ib_stat_names));
93out:
94 return ARRAY_SIZE(rds_ib_stat_names);
95}
diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c
new file mode 100644
index 000000000000..d87830db93a0
--- /dev/null
+++ b/net/rds/ib_sysctl.c
@@ -0,0 +1,137 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/sysctl.h>
35#include <linux/proc_fs.h>
36
37#include "ib.h"
38
39static struct ctl_table_header *rds_ib_sysctl_hdr;
40
41unsigned long rds_ib_sysctl_max_send_wr = RDS_IB_DEFAULT_SEND_WR;
42unsigned long rds_ib_sysctl_max_recv_wr = RDS_IB_DEFAULT_RECV_WR;
43unsigned long rds_ib_sysctl_max_recv_allocation = (128 * 1024 * 1024) / RDS_FRAG_SIZE;
44static unsigned long rds_ib_sysctl_max_wr_min = 1;
45/* hardware will fail CQ creation long before this */
46static unsigned long rds_ib_sysctl_max_wr_max = (u32)~0;
47
48unsigned long rds_ib_sysctl_max_unsig_wrs = 16;
49static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1;
50static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64;
51
52unsigned long rds_ib_sysctl_max_unsig_bytes = (16 << 20);
53static unsigned long rds_ib_sysctl_max_unsig_bytes_min = 1;
54static unsigned long rds_ib_sysctl_max_unsig_bytes_max = ~0UL;
55
56unsigned int rds_ib_sysctl_flow_control = 1;
57
58ctl_table rds_ib_sysctl_table[] = {
59 {
60 .ctl_name = CTL_UNNUMBERED,
61 .procname = "max_send_wr",
62 .data = &rds_ib_sysctl_max_send_wr,
63 .maxlen = sizeof(unsigned long),
64 .mode = 0644,
65 .proc_handler = &proc_doulongvec_minmax,
66 .extra1 = &rds_ib_sysctl_max_wr_min,
67 .extra2 = &rds_ib_sysctl_max_wr_max,
68 },
69 {
70 .ctl_name = CTL_UNNUMBERED,
71 .procname = "max_recv_wr",
72 .data = &rds_ib_sysctl_max_recv_wr,
73 .maxlen = sizeof(unsigned long),
74 .mode = 0644,
75 .proc_handler = &proc_doulongvec_minmax,
76 .extra1 = &rds_ib_sysctl_max_wr_min,
77 .extra2 = &rds_ib_sysctl_max_wr_max,
78 },
79 {
80 .ctl_name = CTL_UNNUMBERED,
81 .procname = "max_unsignaled_wr",
82 .data = &rds_ib_sysctl_max_unsig_wrs,
83 .maxlen = sizeof(unsigned long),
84 .mode = 0644,
85 .proc_handler = &proc_doulongvec_minmax,
86 .extra1 = &rds_ib_sysctl_max_unsig_wr_min,
87 .extra2 = &rds_ib_sysctl_max_unsig_wr_max,
88 },
89 {
90 .ctl_name = CTL_UNNUMBERED,
91 .procname = "max_unsignaled_bytes",
92 .data = &rds_ib_sysctl_max_unsig_bytes,
93 .maxlen = sizeof(unsigned long),
94 .mode = 0644,
95 .proc_handler = &proc_doulongvec_minmax,
96 .extra1 = &rds_ib_sysctl_max_unsig_bytes_min,
97 .extra2 = &rds_ib_sysctl_max_unsig_bytes_max,
98 },
99 {
100 .ctl_name = CTL_UNNUMBERED,
101 .procname = "max_recv_allocation",
102 .data = &rds_ib_sysctl_max_recv_allocation,
103 .maxlen = sizeof(unsigned long),
104 .mode = 0644,
105 .proc_handler = &proc_doulongvec_minmax,
106 },
107 {
108 .ctl_name = CTL_UNNUMBERED,
109 .procname = "flow_control",
110 .data = &rds_ib_sysctl_flow_control,
111 .maxlen = sizeof(rds_ib_sysctl_flow_control),
112 .mode = 0644,
113 .proc_handler = &proc_dointvec,
114 },
115 { .ctl_name = 0}
116};
117
118static struct ctl_path rds_ib_sysctl_path[] = {
119 { .procname = "net", .ctl_name = CTL_NET, },
120 { .procname = "rds", .ctl_name = CTL_UNNUMBERED, },
121 { .procname = "ib", .ctl_name = CTL_UNNUMBERED, },
122 { }
123};
124
125void rds_ib_sysctl_exit(void)
126{
127 if (rds_ib_sysctl_hdr)
128 unregister_sysctl_table(rds_ib_sysctl_hdr);
129}
130
131int __init rds_ib_sysctl_init(void)
132{
133 rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table);
134 if (rds_ib_sysctl_hdr == NULL)
135 return -ENOMEM;
136 return 0;
137}
diff --git a/net/rds/info.c b/net/rds/info.c
new file mode 100644
index 000000000000..1d885535214d
--- /dev/null
+++ b/net/rds/info.c
@@ -0,0 +1,241 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/percpu.h>
34#include <linux/seq_file.h>
35#include <linux/proc_fs.h>
36
37#include "rds.h"
38
39/*
40 * This file implements a getsockopt() call which copies a set of fixed
41 * sized structs into a user-specified buffer as a means of providing
42 * read-only information about RDS.
43 *
44 * For a given information source there are a given number of fixed sized
45 * structs at a given time. The structs are only copied if the user-specified
46 * buffer is big enough. The destination pages that make up the buffer
47 * are pinned for the duration of the copy.
48 *
49 * This gives us the following benefits:
50 *
51 * - simple implementation, no copy "position" across multiple calls
52 * - consistent snapshot of an info source
53 * - atomic copy works well with whatever locking info source has
54 * - one portable tool to get rds info across implementations
55 * - long-lived tool can get info without allocating
56 *
57 * at the following costs:
58 *
59 * - info source copy must be pinned, may be "large"
60 */
61
62struct rds_info_iterator {
63 struct page **pages;
64 void *addr;
65 unsigned long offset;
66};
67
68static DEFINE_SPINLOCK(rds_info_lock);
69static rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1];
70
71void rds_info_register_func(int optname, rds_info_func func)
72{
73 int offset = optname - RDS_INFO_FIRST;
74
75 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST);
76
77 spin_lock(&rds_info_lock);
78 BUG_ON(rds_info_funcs[offset] != NULL);
79 rds_info_funcs[offset] = func;
80 spin_unlock(&rds_info_lock);
81}
82
83void rds_info_deregister_func(int optname, rds_info_func func)
84{
85 int offset = optname - RDS_INFO_FIRST;
86
87 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST);
88
89 spin_lock(&rds_info_lock);
90 BUG_ON(rds_info_funcs[offset] != func);
91 rds_info_funcs[offset] = NULL;
92 spin_unlock(&rds_info_lock);
93}
94
95/*
96 * Typically we hold an atomic kmap across multiple rds_info_copy() calls
97 * because the kmap is so expensive. This must be called before using blocking
98 * operations while holding the mapping and as the iterator is torn down.
99 */
100void rds_info_iter_unmap(struct rds_info_iterator *iter)
101{
102 if (iter->addr != NULL) {
103 kunmap_atomic(iter->addr, KM_USER0);
104 iter->addr = NULL;
105 }
106}
107
108/*
109 * get_user_pages() called flush_dcache_page() on the pages for us.
110 */
111void rds_info_copy(struct rds_info_iterator *iter, void *data,
112 unsigned long bytes)
113{
114 unsigned long this;
115
116 while (bytes) {
117 if (iter->addr == NULL)
118 iter->addr = kmap_atomic(*iter->pages, KM_USER0);
119
120 this = min(bytes, PAGE_SIZE - iter->offset);
121
122 rdsdebug("page %p addr %p offset %lu this %lu data %p "
123 "bytes %lu\n", *iter->pages, iter->addr,
124 iter->offset, this, data, bytes);
125
126 memcpy(iter->addr + iter->offset, data, this);
127
128 data += this;
129 bytes -= this;
130 iter->offset += this;
131
132 if (iter->offset == PAGE_SIZE) {
133 kunmap_atomic(iter->addr, KM_USER0);
134 iter->addr = NULL;
135 iter->offset = 0;
136 iter->pages++;
137 }
138 }
139}
140
141/*
142 * @optval points to the userspace buffer that the information snapshot
143 * will be copied into.
144 *
145 * @optlen on input is the size of the buffer in userspace. @optlen
146 * on output is the size of the requested snapshot in bytes.
147 *
148 * This function returns -errno if there is a failure, particularly -ENOSPC
149 * if the given userspace buffer was not large enough to fit the snapshot.
150 * On success it returns the positive number of bytes of each array element
151 * in the snapshot.
152 */
153int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
154 int __user *optlen)
155{
156 struct rds_info_iterator iter;
157 struct rds_info_lengths lens;
158 unsigned long nr_pages = 0;
159 unsigned long start;
160 unsigned long i;
161 rds_info_func func;
162 struct page **pages = NULL;
163 int ret;
164 int len;
165 int total;
166
167 if (get_user(len, optlen)) {
168 ret = -EFAULT;
169 goto out;
170 }
171
172 /* check for all kinds of wrapping and the like */
173 start = (unsigned long)optval;
174 if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) {
175 ret = -EINVAL;
176 goto out;
177 }
178
179 /* a 0 len call is just trying to probe its length */
180 if (len == 0)
181 goto call_func;
182
183 nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK))
184 >> PAGE_SHIFT;
185
186 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
187 if (pages == NULL) {
188 ret = -ENOMEM;
189 goto out;
190 }
191 down_read(&current->mm->mmap_sem);
192 ret = get_user_pages(current, current->mm, start, nr_pages, 1, 0,
193 pages, NULL);
194 up_read(&current->mm->mmap_sem);
195 if (ret != nr_pages) {
196 if (ret > 0)
197 nr_pages = ret;
198 else
199 nr_pages = 0;
200 ret = -EAGAIN; /* XXX ? */
201 goto out;
202 }
203
204 rdsdebug("len %d nr_pages %lu\n", len, nr_pages);
205
206call_func:
207 func = rds_info_funcs[optname - RDS_INFO_FIRST];
208 if (func == NULL) {
209 ret = -ENOPROTOOPT;
210 goto out;
211 }
212
213 iter.pages = pages;
214 iter.addr = NULL;
215 iter.offset = start & (PAGE_SIZE - 1);
216
217 func(sock, len, &iter, &lens);
218 BUG_ON(lens.each == 0);
219
220 total = lens.nr * lens.each;
221
222 rds_info_iter_unmap(&iter);
223
224 if (total > len) {
225 len = total;
226 ret = -ENOSPC;
227 } else {
228 len = total;
229 ret = lens.each;
230 }
231
232 if (put_user(len, optlen))
233 ret = -EFAULT;
234
235out:
236 for (i = 0; pages != NULL && i < nr_pages; i++)
237 put_page(pages[i]);
238 kfree(pages);
239
240 return ret;
241}
diff --git a/net/rds/info.h b/net/rds/info.h
new file mode 100644
index 000000000000..b6c052ca7d22
--- /dev/null
+++ b/net/rds/info.h
@@ -0,0 +1,30 @@
1#ifndef _RDS_INFO_H
2#define _RDS_INFO_H
3
4struct rds_info_lengths {
5 unsigned int nr;
6 unsigned int each;
7};
8
9struct rds_info_iterator;
10
11/*
12 * These functions must fill in the fields of @lens to reflect the size
13 * of the available info source. If the snapshot fits in @len then it
14 * should be copied using @iter. The caller will deduce if it was copied
15 * or not by comparing the lengths.
16 */
17typedef void (*rds_info_func)(struct socket *sock, unsigned int len,
18 struct rds_info_iterator *iter,
19 struct rds_info_lengths *lens);
20
21void rds_info_register_func(int optname, rds_info_func func);
22void rds_info_deregister_func(int optname, rds_info_func func);
23int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
24 int __user *optlen);
25void rds_info_copy(struct rds_info_iterator *iter, void *data,
26 unsigned long bytes);
27void rds_info_iter_unmap(struct rds_info_iterator *iter);
28
29
30#endif
diff --git a/net/rds/iw.c b/net/rds/iw.c
new file mode 100644
index 000000000000..1b56905c4c08
--- /dev/null
+++ b/net/rds/iw.c
@@ -0,0 +1,333 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/if.h>
36#include <linux/netdevice.h>
37#include <linux/inetdevice.h>
38#include <linux/if_arp.h>
39#include <linux/delay.h>
40
41#include "rds.h"
42#include "iw.h"
43
44unsigned int fastreg_pool_size = RDS_FASTREG_POOL_SIZE;
45unsigned int fastreg_message_size = RDS_FASTREG_SIZE + 1; /* +1 allows for unaligned MRs */
46
47module_param(fastreg_pool_size, int, 0444);
48MODULE_PARM_DESC(fastreg_pool_size, " Max number of fastreg MRs per device");
49module_param(fastreg_message_size, int, 0444);
50MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MRs)");
51
52struct list_head rds_iw_devices;
53
54DEFINE_SPINLOCK(iw_nodev_conns_lock);
55LIST_HEAD(iw_nodev_conns);
56
57void rds_iw_add_one(struct ib_device *device)
58{
59 struct rds_iw_device *rds_iwdev;
60 struct ib_device_attr *dev_attr;
61
62 /* Only handle iwarp devices */
63 if (device->node_type != RDMA_NODE_RNIC)
64 return;
65
66 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
67 if (!dev_attr)
68 return;
69
70 if (ib_query_device(device, dev_attr)) {
71 rdsdebug("Query device failed for %s\n", device->name);
72 goto free_attr;
73 }
74
75 rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL);
76 if (!rds_iwdev)
77 goto free_attr;
78
79 spin_lock_init(&rds_iwdev->spinlock);
80
81 rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
82 rds_iwdev->max_wrs = dev_attr->max_qp_wr;
83 rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE);
84
85 rds_iwdev->page_shift = max(PAGE_SHIFT, ffs(dev_attr->page_size_cap) - 1);
86
87 rds_iwdev->dev = device;
88 rds_iwdev->pd = ib_alloc_pd(device);
89 if (IS_ERR(rds_iwdev->pd))
90 goto free_dev;
91
92 if (!rds_iwdev->dma_local_lkey) {
93 if (device->node_type != RDMA_NODE_RNIC) {
94 rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd,
95 IB_ACCESS_LOCAL_WRITE);
96 } else {
97 rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd,
98 IB_ACCESS_REMOTE_READ |
99 IB_ACCESS_REMOTE_WRITE |
100 IB_ACCESS_LOCAL_WRITE);
101 }
102 if (IS_ERR(rds_iwdev->mr))
103 goto err_pd;
104 } else
105 rds_iwdev->mr = NULL;
106
107 rds_iwdev->mr_pool = rds_iw_create_mr_pool(rds_iwdev);
108 if (IS_ERR(rds_iwdev->mr_pool)) {
109 rds_iwdev->mr_pool = NULL;
110 goto err_mr;
111 }
112
113 INIT_LIST_HEAD(&rds_iwdev->cm_id_list);
114 INIT_LIST_HEAD(&rds_iwdev->conn_list);
115 list_add_tail(&rds_iwdev->list, &rds_iw_devices);
116
117 ib_set_client_data(device, &rds_iw_client, rds_iwdev);
118
119 goto free_attr;
120
121err_mr:
122 if (rds_iwdev->mr)
123 ib_dereg_mr(rds_iwdev->mr);
124err_pd:
125 ib_dealloc_pd(rds_iwdev->pd);
126free_dev:
127 kfree(rds_iwdev);
128free_attr:
129 kfree(dev_attr);
130}
131
132void rds_iw_remove_one(struct ib_device *device)
133{
134 struct rds_iw_device *rds_iwdev;
135 struct rds_iw_cm_id *i_cm_id, *next;
136
137 rds_iwdev = ib_get_client_data(device, &rds_iw_client);
138 if (!rds_iwdev)
139 return;
140
141 spin_lock_irq(&rds_iwdev->spinlock);
142 list_for_each_entry_safe(i_cm_id, next, &rds_iwdev->cm_id_list, list) {
143 list_del(&i_cm_id->list);
144 kfree(i_cm_id);
145 }
146 spin_unlock_irq(&rds_iwdev->spinlock);
147
148 rds_iw_remove_conns(rds_iwdev);
149
150 if (rds_iwdev->mr_pool)
151 rds_iw_destroy_mr_pool(rds_iwdev->mr_pool);
152
153 if (rds_iwdev->mr)
154 ib_dereg_mr(rds_iwdev->mr);
155
156 while (ib_dealloc_pd(rds_iwdev->pd)) {
157 rdsdebug("Failed to dealloc pd %p\n", rds_iwdev->pd);
158 msleep(1);
159 }
160
161 list_del(&rds_iwdev->list);
162 kfree(rds_iwdev);
163}
164
165struct ib_client rds_iw_client = {
166 .name = "rds_iw",
167 .add = rds_iw_add_one,
168 .remove = rds_iw_remove_one
169};
170
171static int rds_iw_conn_info_visitor(struct rds_connection *conn,
172 void *buffer)
173{
174 struct rds_info_rdma_connection *iinfo = buffer;
175 struct rds_iw_connection *ic;
176
177 /* We will only ever look at IB transports */
178 if (conn->c_trans != &rds_iw_transport)
179 return 0;
180
181 iinfo->src_addr = conn->c_laddr;
182 iinfo->dst_addr = conn->c_faddr;
183
184 memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
185 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
186 if (rds_conn_state(conn) == RDS_CONN_UP) {
187 struct rds_iw_device *rds_iwdev;
188 struct rdma_dev_addr *dev_addr;
189
190 ic = conn->c_transport_data;
191 dev_addr = &ic->i_cm_id->route.addr.dev_addr;
192
193 ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
194 ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
195
196 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
197 iinfo->max_send_wr = ic->i_send_ring.w_nr;
198 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
199 iinfo->max_send_sge = rds_iwdev->max_sge;
200 rds_iw_get_mr_info(rds_iwdev, iinfo);
201 }
202 return 1;
203}
204
205static void rds_iw_ic_info(struct socket *sock, unsigned int len,
206 struct rds_info_iterator *iter,
207 struct rds_info_lengths *lens)
208{
209 rds_for_each_conn_info(sock, len, iter, lens,
210 rds_iw_conn_info_visitor,
211 sizeof(struct rds_info_rdma_connection));
212}
213
214
215/*
216 * Early RDS/IB was built to only bind to an address if there is an IPoIB
217 * device with that address set.
218 *
219 * If it were me, I'd advocate for something more flexible. Sending and
220 * receiving should be device-agnostic. Transports would try and maintain
221 * connections between peers who have messages queued. Userspace would be
222 * allowed to influence which paths have priority. We could call userspace
223 * asserting this policy "routing".
224 */
225static int rds_iw_laddr_check(__be32 addr)
226{
227 int ret;
228 struct rdma_cm_id *cm_id;
229 struct sockaddr_in sin;
230
231 /* Create a CMA ID and try to bind it. This catches both
232 * IB and iWARP capable NICs.
233 */
234 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP);
235 if (!cm_id)
236 return -EADDRNOTAVAIL;
237
238 memset(&sin, 0, sizeof(sin));
239 sin.sin_family = AF_INET;
240 sin.sin_addr.s_addr = addr;
241
242 /* rdma_bind_addr will only succeed for IB & iWARP devices */
243 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
244 /* due to this, we will claim to support IB devices unless we
245 check node_type. */
246 if (ret || cm_id->device->node_type != RDMA_NODE_RNIC)
247 ret = -EADDRNOTAVAIL;
248
249 rdsdebug("addr %pI4 ret %d node type %d\n",
250 &addr, ret,
251 cm_id->device ? cm_id->device->node_type : -1);
252
253 rdma_destroy_id(cm_id);
254
255 return ret;
256}
257
258void rds_iw_exit(void)
259{
260 rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
261 rds_iw_remove_nodev_conns();
262 ib_unregister_client(&rds_iw_client);
263 rds_iw_sysctl_exit();
264 rds_iw_recv_exit();
265 rds_trans_unregister(&rds_iw_transport);
266}
267
268struct rds_transport rds_iw_transport = {
269 .laddr_check = rds_iw_laddr_check,
270 .xmit_complete = rds_iw_xmit_complete,
271 .xmit = rds_iw_xmit,
272 .xmit_cong_map = NULL,
273 .xmit_rdma = rds_iw_xmit_rdma,
274 .recv = rds_iw_recv,
275 .conn_alloc = rds_iw_conn_alloc,
276 .conn_free = rds_iw_conn_free,
277 .conn_connect = rds_iw_conn_connect,
278 .conn_shutdown = rds_iw_conn_shutdown,
279 .inc_copy_to_user = rds_iw_inc_copy_to_user,
280 .inc_purge = rds_iw_inc_purge,
281 .inc_free = rds_iw_inc_free,
282 .cm_initiate_connect = rds_iw_cm_initiate_connect,
283 .cm_handle_connect = rds_iw_cm_handle_connect,
284 .cm_connect_complete = rds_iw_cm_connect_complete,
285 .stats_info_copy = rds_iw_stats_info_copy,
286 .exit = rds_iw_exit,
287 .get_mr = rds_iw_get_mr,
288 .sync_mr = rds_iw_sync_mr,
289 .free_mr = rds_iw_free_mr,
290 .flush_mrs = rds_iw_flush_mrs,
291 .t_owner = THIS_MODULE,
292 .t_name = "iwarp",
293 .t_prefer_loopback = 1,
294};
295
296int __init rds_iw_init(void)
297{
298 int ret;
299
300 INIT_LIST_HEAD(&rds_iw_devices);
301
302 ret = ib_register_client(&rds_iw_client);
303 if (ret)
304 goto out;
305
306 ret = rds_iw_sysctl_init();
307 if (ret)
308 goto out_ibreg;
309
310 ret = rds_iw_recv_init();
311 if (ret)
312 goto out_sysctl;
313
314 ret = rds_trans_register(&rds_iw_transport);
315 if (ret)
316 goto out_recv;
317
318 rds_info_register_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
319
320 goto out;
321
322out_recv:
323 rds_iw_recv_exit();
324out_sysctl:
325 rds_iw_sysctl_exit();
326out_ibreg:
327 ib_unregister_client(&rds_iw_client);
328out:
329 return ret;
330}
331
332MODULE_LICENSE("GPL");
333
diff --git a/net/rds/iw.h b/net/rds/iw.h
new file mode 100644
index 000000000000..0ddda34f2a1c
--- /dev/null
+++ b/net/rds/iw.h
@@ -0,0 +1,395 @@
1#ifndef _RDS_IW_H
2#define _RDS_IW_H
3
4#include <rdma/ib_verbs.h>
5#include <rdma/rdma_cm.h>
6#include "rds.h"
7#include "rdma_transport.h"
8
9#define RDS_FASTREG_SIZE 20
10#define RDS_FASTREG_POOL_SIZE 2048
11
12#define RDS_IW_MAX_SGE 8
13#define RDS_IW_RECV_SGE 2
14
15#define RDS_IW_DEFAULT_RECV_WR 1024
16#define RDS_IW_DEFAULT_SEND_WR 256
17
18#define RDS_IW_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
19
20extern struct list_head rds_iw_devices;
21
22/*
23 * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to
24 * try and minimize the amount of memory tied up both the device and
25 * socket receive queues.
26 */
27/* page offset of the final full frag that fits in the page */
28#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE)
29struct rds_page_frag {
30 struct list_head f_item;
31 struct page *f_page;
32 unsigned long f_offset;
33 dma_addr_t f_mapped;
34};
35
36struct rds_iw_incoming {
37 struct list_head ii_frags;
38 struct rds_incoming ii_inc;
39};
40
41struct rds_iw_connect_private {
42 /* Add new fields at the end, and don't permute existing fields. */
43 __be32 dp_saddr;
44 __be32 dp_daddr;
45 u8 dp_protocol_major;
46 u8 dp_protocol_minor;
47 __be16 dp_protocol_minor_mask; /* bitmask */
48 __be32 dp_reserved1;
49 __be64 dp_ack_seq;
50 __be32 dp_credit; /* non-zero enables flow ctl */
51};
52
53struct rds_iw_scatterlist {
54 struct scatterlist *list;
55 unsigned int len;
56 int dma_len;
57 unsigned int dma_npages;
58 unsigned int bytes;
59};
60
61struct rds_iw_mapping {
62 spinlock_t m_lock; /* protect the mapping struct */
63 struct list_head m_list;
64 struct rds_iw_mr *m_mr;
65 uint32_t m_rkey;
66 struct rds_iw_scatterlist m_sg;
67};
68
69struct rds_iw_send_work {
70 struct rds_message *s_rm;
71
72 /* We should really put these into a union: */
73 struct rds_rdma_op *s_op;
74 struct rds_iw_mapping *s_mapping;
75 struct ib_mr *s_mr;
76 struct ib_fast_reg_page_list *s_page_list;
77 unsigned char s_remap_count;
78
79 struct ib_send_wr s_wr;
80 struct ib_sge s_sge[RDS_IW_MAX_SGE];
81 unsigned long s_queued;
82};
83
84struct rds_iw_recv_work {
85 struct rds_iw_incoming *r_iwinc;
86 struct rds_page_frag *r_frag;
87 struct ib_recv_wr r_wr;
88 struct ib_sge r_sge[2];
89};
90
91struct rds_iw_work_ring {
92 u32 w_nr;
93 u32 w_alloc_ptr;
94 u32 w_alloc_ctr;
95 u32 w_free_ptr;
96 atomic_t w_free_ctr;
97};
98
99struct rds_iw_device;
100
101struct rds_iw_connection {
102
103 struct list_head iw_node;
104 struct rds_iw_device *rds_iwdev;
105 struct rds_connection *conn;
106
107 /* alphabet soup, IBTA style */
108 struct rdma_cm_id *i_cm_id;
109 struct ib_pd *i_pd;
110 struct ib_mr *i_mr;
111 struct ib_cq *i_send_cq;
112 struct ib_cq *i_recv_cq;
113
114 /* tx */
115 struct rds_iw_work_ring i_send_ring;
116 struct rds_message *i_rm;
117 struct rds_header *i_send_hdrs;
118 u64 i_send_hdrs_dma;
119 struct rds_iw_send_work *i_sends;
120
121 /* rx */
122 struct mutex i_recv_mutex;
123 struct rds_iw_work_ring i_recv_ring;
124 struct rds_iw_incoming *i_iwinc;
125 u32 i_recv_data_rem;
126 struct rds_header *i_recv_hdrs;
127 u64 i_recv_hdrs_dma;
128 struct rds_iw_recv_work *i_recvs;
129 struct rds_page_frag i_frag;
130 u64 i_ack_recv; /* last ACK received */
131
132 /* sending acks */
133 unsigned long i_ack_flags;
134 u64 i_ack_next; /* next ACK to send */
135 struct rds_header *i_ack;
136 struct ib_send_wr i_ack_wr;
137 struct ib_sge i_ack_sge;
138 u64 i_ack_dma;
139 unsigned long i_ack_queued;
140
141 /* Flow control related information
142 *
143 * Our algorithm uses a pair variables that we need to access
144 * atomically - one for the send credits, and one posted
145 * recv credits we need to transfer to remote.
146 * Rather than protect them using a slow spinlock, we put both into
147 * a single atomic_t and update it using cmpxchg
148 */
149 atomic_t i_credits;
150
151 /* Protocol version specific information */
152 unsigned int i_flowctl:1; /* enable/disable flow ctl */
153 unsigned int i_dma_local_lkey:1;
154 unsigned int i_fastreg_posted:1; /* fastreg posted on this connection */
155 /* Batched completions */
156 unsigned int i_unsignaled_wrs;
157 long i_unsignaled_bytes;
158};
159
160/* This assumes that atomic_t is at least 32 bits */
161#define IB_GET_SEND_CREDITS(v) ((v) & 0xffff)
162#define IB_GET_POST_CREDITS(v) ((v) >> 16)
163#define IB_SET_SEND_CREDITS(v) ((v) & 0xffff)
164#define IB_SET_POST_CREDITS(v) ((v) << 16)
165
166struct rds_iw_cm_id {
167 struct list_head list;
168 struct rdma_cm_id *cm_id;
169};
170
171struct rds_iw_device {
172 struct list_head list;
173 struct list_head cm_id_list;
174 struct list_head conn_list;
175 struct ib_device *dev;
176 struct ib_pd *pd;
177 struct ib_mr *mr;
178 struct rds_iw_mr_pool *mr_pool;
179 int page_shift;
180 int max_sge;
181 unsigned int max_wrs;
182 unsigned int dma_local_lkey:1;
183 spinlock_t spinlock; /* protect the above */
184};
185
186/* bits for i_ack_flags */
187#define IB_ACK_IN_FLIGHT 0
188#define IB_ACK_REQUESTED 1
189
190/* Magic WR_ID for ACKs */
191#define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL)
192#define RDS_IW_FAST_REG_WR_ID ((u64)0xefefefefefefefefULL)
193#define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL)
194
195struct rds_iw_statistics {
196 uint64_t s_iw_connect_raced;
197 uint64_t s_iw_listen_closed_stale;
198 uint64_t s_iw_tx_cq_call;
199 uint64_t s_iw_tx_cq_event;
200 uint64_t s_iw_tx_ring_full;
201 uint64_t s_iw_tx_throttle;
202 uint64_t s_iw_tx_sg_mapping_failure;
203 uint64_t s_iw_tx_stalled;
204 uint64_t s_iw_tx_credit_updates;
205 uint64_t s_iw_rx_cq_call;
206 uint64_t s_iw_rx_cq_event;
207 uint64_t s_iw_rx_ring_empty;
208 uint64_t s_iw_rx_refill_from_cq;
209 uint64_t s_iw_rx_refill_from_thread;
210 uint64_t s_iw_rx_alloc_limit;
211 uint64_t s_iw_rx_credit_updates;
212 uint64_t s_iw_ack_sent;
213 uint64_t s_iw_ack_send_failure;
214 uint64_t s_iw_ack_send_delayed;
215 uint64_t s_iw_ack_send_piggybacked;
216 uint64_t s_iw_ack_received;
217 uint64_t s_iw_rdma_mr_alloc;
218 uint64_t s_iw_rdma_mr_free;
219 uint64_t s_iw_rdma_mr_used;
220 uint64_t s_iw_rdma_mr_pool_flush;
221 uint64_t s_iw_rdma_mr_pool_wait;
222 uint64_t s_iw_rdma_mr_pool_depleted;
223};
224
225extern struct workqueue_struct *rds_iw_wq;
226
227/*
228 * Fake ib_dma_sync_sg_for_{cpu,device} as long as ib_verbs.h
229 * doesn't define it.
230 */
231static inline void rds_iw_dma_sync_sg_for_cpu(struct ib_device *dev,
232 struct scatterlist *sg, unsigned int sg_dma_len, int direction)
233{
234 unsigned int i;
235
236 for (i = 0; i < sg_dma_len; ++i) {
237 ib_dma_sync_single_for_cpu(dev,
238 ib_sg_dma_address(dev, &sg[i]),
239 ib_sg_dma_len(dev, &sg[i]),
240 direction);
241 }
242}
243#define ib_dma_sync_sg_for_cpu rds_iw_dma_sync_sg_for_cpu
244
245static inline void rds_iw_dma_sync_sg_for_device(struct ib_device *dev,
246 struct scatterlist *sg, unsigned int sg_dma_len, int direction)
247{
248 unsigned int i;
249
250 for (i = 0; i < sg_dma_len; ++i) {
251 ib_dma_sync_single_for_device(dev,
252 ib_sg_dma_address(dev, &sg[i]),
253 ib_sg_dma_len(dev, &sg[i]),
254 direction);
255 }
256}
257#define ib_dma_sync_sg_for_device rds_iw_dma_sync_sg_for_device
258
259static inline u32 rds_iw_local_dma_lkey(struct rds_iw_connection *ic)
260{
261 return ic->i_dma_local_lkey ? ic->i_cm_id->device->local_dma_lkey : ic->i_mr->lkey;
262}
263
264/* ib.c */
265extern struct rds_transport rds_iw_transport;
266extern void rds_iw_add_one(struct ib_device *device);
267extern void rds_iw_remove_one(struct ib_device *device);
268extern struct ib_client rds_iw_client;
269
270extern unsigned int fastreg_pool_size;
271extern unsigned int fastreg_message_size;
272
273extern spinlock_t iw_nodev_conns_lock;
274extern struct list_head iw_nodev_conns;
275
276/* ib_cm.c */
277int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp);
278void rds_iw_conn_free(void *arg);
279int rds_iw_conn_connect(struct rds_connection *conn);
280void rds_iw_conn_shutdown(struct rds_connection *conn);
281void rds_iw_state_change(struct sock *sk);
282int __init rds_iw_listen_init(void);
283void rds_iw_listen_stop(void);
284void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...);
285int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
286 struct rdma_cm_event *event);
287int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id);
288void rds_iw_cm_connect_complete(struct rds_connection *conn,
289 struct rdma_cm_event *event);
290
291
292#define rds_iw_conn_error(conn, fmt...) \
293 __rds_iw_conn_error(conn, KERN_WARNING "RDS/IW: " fmt)
294
295/* ib_rdma.c */
296int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
297int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
298void rds_iw_remove_nodev_conns(void);
299void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev);
300struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *);
301void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo);
302void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *);
303void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
304 struct rds_sock *rs, u32 *key_ret);
305void rds_iw_sync_mr(void *trans_private, int dir);
306void rds_iw_free_mr(void *trans_private, int invalidate);
307void rds_iw_flush_mrs(void);
308void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
309
310/* ib_recv.c */
311int __init rds_iw_recv_init(void);
312void rds_iw_recv_exit(void);
313int rds_iw_recv(struct rds_connection *conn);
314int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
315 gfp_t page_gfp, int prefill);
316void rds_iw_inc_purge(struct rds_incoming *inc);
317void rds_iw_inc_free(struct rds_incoming *inc);
318int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
319 size_t size);
320void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context);
321void rds_iw_recv_init_ring(struct rds_iw_connection *ic);
322void rds_iw_recv_clear_ring(struct rds_iw_connection *ic);
323void rds_iw_recv_init_ack(struct rds_iw_connection *ic);
324void rds_iw_attempt_ack(struct rds_iw_connection *ic);
325void rds_iw_ack_send_complete(struct rds_iw_connection *ic);
326u64 rds_iw_piggyb_ack(struct rds_iw_connection *ic);
327
328/* ib_ring.c */
329void rds_iw_ring_init(struct rds_iw_work_ring *ring, u32 nr);
330void rds_iw_ring_resize(struct rds_iw_work_ring *ring, u32 nr);
331u32 rds_iw_ring_alloc(struct rds_iw_work_ring *ring, u32 val, u32 *pos);
332void rds_iw_ring_free(struct rds_iw_work_ring *ring, u32 val);
333void rds_iw_ring_unalloc(struct rds_iw_work_ring *ring, u32 val);
334int rds_iw_ring_empty(struct rds_iw_work_ring *ring);
335int rds_iw_ring_low(struct rds_iw_work_ring *ring);
336u32 rds_iw_ring_oldest(struct rds_iw_work_ring *ring);
337u32 rds_iw_ring_completed(struct rds_iw_work_ring *ring, u32 wr_id, u32 oldest);
338extern wait_queue_head_t rds_iw_ring_empty_wait;
339
340/* ib_send.c */
341void rds_iw_xmit_complete(struct rds_connection *conn);
342int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
343 unsigned int hdr_off, unsigned int sg, unsigned int off);
344void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context);
345void rds_iw_send_init_ring(struct rds_iw_connection *ic);
346void rds_iw_send_clear_ring(struct rds_iw_connection *ic);
347int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op);
348void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits);
349void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted);
350int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted,
351 u32 *adv_credits, int need_posted);
352
353/* ib_stats.c */
354DECLARE_PER_CPU(struct rds_iw_statistics, rds_iw_stats);
355#define rds_iw_stats_inc(member) rds_stats_inc_which(rds_iw_stats, member)
356unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter,
357 unsigned int avail);
358
359/* ib_sysctl.c */
360int __init rds_iw_sysctl_init(void);
361void rds_iw_sysctl_exit(void);
362extern unsigned long rds_iw_sysctl_max_send_wr;
363extern unsigned long rds_iw_sysctl_max_recv_wr;
364extern unsigned long rds_iw_sysctl_max_unsig_wrs;
365extern unsigned long rds_iw_sysctl_max_unsig_bytes;
366extern unsigned long rds_iw_sysctl_max_recv_allocation;
367extern unsigned int rds_iw_sysctl_flow_control;
368extern ctl_table rds_iw_sysctl_table[];
369
370/*
371 * Helper functions for getting/setting the header and data SGEs in
372 * RDS packets (not RDMA)
373 */
374static inline struct ib_sge *
375rds_iw_header_sge(struct rds_iw_connection *ic, struct ib_sge *sge)
376{
377 return &sge[0];
378}
379
380static inline struct ib_sge *
381rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge)
382{
383 return &sge[1];
384}
385
386static inline void rds_iw_set_64bit(u64 *ptr, u64 val)
387{
388#if BITS_PER_LONG == 64
389 *ptr = val;
390#else
391 set_64bit(ptr, val);
392#endif
393}
394
395#endif
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
new file mode 100644
index 000000000000..57ecb3d4b8a5
--- /dev/null
+++ b/net/rds/iw_cm.c
@@ -0,0 +1,750 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/vmalloc.h>
36
37#include "rds.h"
38#include "iw.h"
39
40/*
41 * Set the selected protocol version
42 */
43static void rds_iw_set_protocol(struct rds_connection *conn, unsigned int version)
44{
45 conn->c_version = version;
46}
47
48/*
49 * Set up flow control
50 */
51static void rds_iw_set_flow_control(struct rds_connection *conn, u32 credits)
52{
53 struct rds_iw_connection *ic = conn->c_transport_data;
54
55 if (rds_iw_sysctl_flow_control && credits != 0) {
56 /* We're doing flow control */
57 ic->i_flowctl = 1;
58 rds_iw_send_add_credits(conn, credits);
59 } else {
60 ic->i_flowctl = 0;
61 }
62}
63
64/*
65 * Connection established.
66 * We get here for both outgoing and incoming connection.
67 */
68void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
69{
70 const struct rds_iw_connect_private *dp = NULL;
71 struct rds_iw_connection *ic = conn->c_transport_data;
72 struct rds_iw_device *rds_iwdev;
73 int err;
74
75 if (event->param.conn.private_data_len) {
76 dp = event->param.conn.private_data;
77
78 rds_iw_set_protocol(conn,
79 RDS_PROTOCOL(dp->dp_protocol_major,
80 dp->dp_protocol_minor));
81 rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
82 }
83
84 /* update ib_device with this local ipaddr & conn */
85 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
86 err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id);
87 if (err)
88 printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err);
89 err = rds_iw_add_conn(rds_iwdev, conn);
90 if (err)
91 printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err);
92
93 /* If the peer gave us the last packet it saw, process this as if
94 * we had received a regular ACK. */
95 if (dp && dp->dp_ack_seq)
96 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
97
98 printk(KERN_NOTICE "RDS/IW: connected to %pI4<->%pI4 version %u.%u%s\n",
99 &conn->c_laddr, &conn->c_faddr,
100 RDS_PROTOCOL_MAJOR(conn->c_version),
101 RDS_PROTOCOL_MINOR(conn->c_version),
102 ic->i_flowctl ? ", flow control" : "");
103
104 rds_connect_complete(conn);
105}
106
107static void rds_iw_cm_fill_conn_param(struct rds_connection *conn,
108 struct rdma_conn_param *conn_param,
109 struct rds_iw_connect_private *dp,
110 u32 protocol_version)
111{
112 struct rds_iw_connection *ic = conn->c_transport_data;
113
114 memset(conn_param, 0, sizeof(struct rdma_conn_param));
115 /* XXX tune these? */
116 conn_param->responder_resources = 1;
117 conn_param->initiator_depth = 1;
118
119 if (dp) {
120 memset(dp, 0, sizeof(*dp));
121 dp->dp_saddr = conn->c_laddr;
122 dp->dp_daddr = conn->c_faddr;
123 dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
124 dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
125 dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IW_SUPPORTED_PROTOCOLS);
126 dp->dp_ack_seq = rds_iw_piggyb_ack(ic);
127
128 /* Advertise flow control */
129 if (ic->i_flowctl) {
130 unsigned int credits;
131
132 credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits));
133 dp->dp_credit = cpu_to_be32(credits);
134 atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits);
135 }
136
137 conn_param->private_data = dp;
138 conn_param->private_data_len = sizeof(*dp);
139 }
140}
141
142static void rds_iw_cq_event_handler(struct ib_event *event, void *data)
143{
144 rdsdebug("event %u data %p\n", event->event, data);
145}
146
147static void rds_iw_qp_event_handler(struct ib_event *event, void *data)
148{
149 struct rds_connection *conn = data;
150 struct rds_iw_connection *ic = conn->c_transport_data;
151
152 rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event);
153
154 switch (event->event) {
155 case IB_EVENT_COMM_EST:
156 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
157 break;
158 case IB_EVENT_QP_REQ_ERR:
159 case IB_EVENT_QP_FATAL:
160 default:
161 rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %pI4->%pI4...reconnecting\n",
162 event->event, &conn->c_laddr,
163 &conn->c_faddr);
164 break;
165 }
166}
167
168/*
169 * Create a QP
170 */
171static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr,
172 struct rds_iw_device *rds_iwdev,
173 struct rds_iw_work_ring *send_ring,
174 void (*send_cq_handler)(struct ib_cq *, void *),
175 struct rds_iw_work_ring *recv_ring,
176 void (*recv_cq_handler)(struct ib_cq *, void *),
177 void *context)
178{
179 struct ib_device *dev = rds_iwdev->dev;
180 unsigned int send_size, recv_size;
181 int ret;
182
183 /* The offset of 1 is to accomodate the additional ACK WR. */
184 send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1);
185 recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1);
186 rds_iw_ring_resize(send_ring, send_size - 1);
187 rds_iw_ring_resize(recv_ring, recv_size - 1);
188
189 memset(attr, 0, sizeof(*attr));
190 attr->event_handler = rds_iw_qp_event_handler;
191 attr->qp_context = context;
192 attr->cap.max_send_wr = send_size;
193 attr->cap.max_recv_wr = recv_size;
194 attr->cap.max_send_sge = rds_iwdev->max_sge;
195 attr->cap.max_recv_sge = RDS_IW_RECV_SGE;
196 attr->sq_sig_type = IB_SIGNAL_REQ_WR;
197 attr->qp_type = IB_QPT_RC;
198
199 attr->send_cq = ib_create_cq(dev, send_cq_handler,
200 rds_iw_cq_event_handler,
201 context, send_size, 0);
202 if (IS_ERR(attr->send_cq)) {
203 ret = PTR_ERR(attr->send_cq);
204 attr->send_cq = NULL;
205 rdsdebug("ib_create_cq send failed: %d\n", ret);
206 goto out;
207 }
208
209 attr->recv_cq = ib_create_cq(dev, recv_cq_handler,
210 rds_iw_cq_event_handler,
211 context, recv_size, 0);
212 if (IS_ERR(attr->recv_cq)) {
213 ret = PTR_ERR(attr->recv_cq);
214 attr->recv_cq = NULL;
215 rdsdebug("ib_create_cq send failed: %d\n", ret);
216 goto out;
217 }
218
219 ret = ib_req_notify_cq(attr->send_cq, IB_CQ_NEXT_COMP);
220 if (ret) {
221 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
222 goto out;
223 }
224
225 ret = ib_req_notify_cq(attr->recv_cq, IB_CQ_SOLICITED);
226 if (ret) {
227 rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
228 goto out;
229 }
230
231out:
232 if (ret) {
233 if (attr->send_cq)
234 ib_destroy_cq(attr->send_cq);
235 if (attr->recv_cq)
236 ib_destroy_cq(attr->recv_cq);
237 }
238 return ret;
239}
240
241/*
242 * This needs to be very careful to not leave IS_ERR pointers around for
243 * cleanup to trip over.
244 */
245static int rds_iw_setup_qp(struct rds_connection *conn)
246{
247 struct rds_iw_connection *ic = conn->c_transport_data;
248 struct ib_device *dev = ic->i_cm_id->device;
249 struct ib_qp_init_attr attr;
250 struct rds_iw_device *rds_iwdev;
251 int ret;
252
253 /* rds_iw_add_one creates a rds_iw_device object per IB device,
254 * and allocates a protection domain, memory range and MR pool
255 * for each. If that fails for any reason, it will not register
256 * the rds_iwdev at all.
257 */
258 rds_iwdev = ib_get_client_data(dev, &rds_iw_client);
259 if (rds_iwdev == NULL) {
260 if (printk_ratelimit())
261 printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n",
262 dev->name);
263 return -EOPNOTSUPP;
264 }
265
266 /* Protection domain and memory range */
267 ic->i_pd = rds_iwdev->pd;
268 ic->i_mr = rds_iwdev->mr;
269
270 ret = rds_iw_init_qp_attrs(&attr, rds_iwdev,
271 &ic->i_send_ring, rds_iw_send_cq_comp_handler,
272 &ic->i_recv_ring, rds_iw_recv_cq_comp_handler,
273 conn);
274 if (ret < 0)
275 goto out;
276
277 ic->i_send_cq = attr.send_cq;
278 ic->i_recv_cq = attr.recv_cq;
279
280 /*
281 * XXX this can fail if max_*_wr is too large? Are we supposed
282 * to back off until we get a value that the hardware can support?
283 */
284 ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
285 if (ret) {
286 rdsdebug("rdma_create_qp failed: %d\n", ret);
287 goto out;
288 }
289
290 ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
291 ic->i_send_ring.w_nr *
292 sizeof(struct rds_header),
293 &ic->i_send_hdrs_dma, GFP_KERNEL);
294 if (ic->i_send_hdrs == NULL) {
295 ret = -ENOMEM;
296 rdsdebug("ib_dma_alloc_coherent send failed\n");
297 goto out;
298 }
299
300 ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
301 ic->i_recv_ring.w_nr *
302 sizeof(struct rds_header),
303 &ic->i_recv_hdrs_dma, GFP_KERNEL);
304 if (ic->i_recv_hdrs == NULL) {
305 ret = -ENOMEM;
306 rdsdebug("ib_dma_alloc_coherent recv failed\n");
307 goto out;
308 }
309
310 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
311 &ic->i_ack_dma, GFP_KERNEL);
312 if (ic->i_ack == NULL) {
313 ret = -ENOMEM;
314 rdsdebug("ib_dma_alloc_coherent ack failed\n");
315 goto out;
316 }
317
318 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work));
319 if (ic->i_sends == NULL) {
320 ret = -ENOMEM;
321 rdsdebug("send allocation failed\n");
322 goto out;
323 }
324 rds_iw_send_init_ring(ic);
325
326 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work));
327 if (ic->i_recvs == NULL) {
328 ret = -ENOMEM;
329 rdsdebug("recv allocation failed\n");
330 goto out;
331 }
332
333 rds_iw_recv_init_ring(ic);
334 rds_iw_recv_init_ack(ic);
335
336 /* Post receive buffers - as a side effect, this will update
337 * the posted credit count. */
338 rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1);
339
340 rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr,
341 ic->i_send_cq, ic->i_recv_cq);
342
343out:
344 return ret;
345}
346
347static u32 rds_iw_protocol_compatible(const struct rds_iw_connect_private *dp)
348{
349 u16 common;
350 u32 version = 0;
351
352 /* rdma_cm private data is odd - when there is any private data in the
353 * request, we will be given a pretty large buffer without telling us the
354 * original size. The only way to tell the difference is by looking at
355 * the contents, which are initialized to zero.
356 * If the protocol version fields aren't set, this is a connection attempt
357 * from an older version. This could could be 3.0 or 2.0 - we can't tell.
358 * We really should have changed this for OFED 1.3 :-( */
359 if (dp->dp_protocol_major == 0)
360 return RDS_PROTOCOL_3_0;
361
362 common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IW_SUPPORTED_PROTOCOLS;
363 if (dp->dp_protocol_major == 3 && common) {
364 version = RDS_PROTOCOL_3_0;
365 while ((common >>= 1) != 0)
366 version++;
367 } else if (printk_ratelimit()) {
368 printk(KERN_NOTICE "RDS: Connection from %pI4 using "
369 "incompatible protocol version %u.%u\n",
370 &dp->dp_saddr,
371 dp->dp_protocol_major,
372 dp->dp_protocol_minor);
373 }
374 return version;
375}
376
377int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
378 struct rdma_cm_event *event)
379{
380 const struct rds_iw_connect_private *dp = event->param.conn.private_data;
381 struct rds_iw_connect_private dp_rep;
382 struct rds_connection *conn = NULL;
383 struct rds_iw_connection *ic = NULL;
384 struct rdma_conn_param conn_param;
385 struct rds_iw_device *rds_iwdev;
386 u32 version;
387 int err, destroy = 1;
388
389 /* Check whether the remote protocol version matches ours. */
390 version = rds_iw_protocol_compatible(dp);
391 if (!version)
392 goto out;
393
394 rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u\n",
395 &dp->dp_saddr, &dp->dp_daddr,
396 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version));
397
398 conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_iw_transport,
399 GFP_KERNEL);
400 if (IS_ERR(conn)) {
401 rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
402 conn = NULL;
403 goto out;
404 }
405
406 /*
407 * The connection request may occur while the
408 * previous connection exist, e.g. in case of failover.
409 * But as connections may be initiated simultaneously
410 * by both hosts, we have a random backoff mechanism -
411 * see the comment above rds_queue_reconnect()
412 */
413 mutex_lock(&conn->c_cm_lock);
414 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
415 if (rds_conn_state(conn) == RDS_CONN_UP) {
416 rdsdebug("incoming connect while connecting\n");
417 rds_conn_drop(conn);
418 rds_iw_stats_inc(s_iw_listen_closed_stale);
419 } else
420 if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
421 /* Wait and see - our connect may still be succeeding */
422 rds_iw_stats_inc(s_iw_connect_raced);
423 }
424 mutex_unlock(&conn->c_cm_lock);
425 goto out;
426 }
427
428 ic = conn->c_transport_data;
429
430 rds_iw_set_protocol(conn, version);
431 rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
432
433 /* If the peer gave us the last packet it saw, process this as if
434 * we had received a regular ACK. */
435 if (dp->dp_ack_seq)
436 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
437
438 BUG_ON(cm_id->context);
439 BUG_ON(ic->i_cm_id);
440
441 ic->i_cm_id = cm_id;
442 cm_id->context = conn;
443
444 rds_iwdev = ib_get_client_data(cm_id->device, &rds_iw_client);
445 ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey;
446
447 /* We got halfway through setting up the ib_connection, if we
448 * fail now, we have to take the long route out of this mess. */
449 destroy = 0;
450
451 err = rds_iw_setup_qp(conn);
452 if (err) {
453 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err);
454 goto out;
455 }
456
457 rds_iw_cm_fill_conn_param(conn, &conn_param, &dp_rep, version);
458
459 /* rdma_accept() calls rdma_reject() internally if it fails */
460 err = rdma_accept(cm_id, &conn_param);
461 mutex_unlock(&conn->c_cm_lock);
462 if (err) {
463 rds_iw_conn_error(conn, "rdma_accept failed (%d)\n", err);
464 goto out;
465 }
466
467 return 0;
468
469out:
470 rdma_reject(cm_id, NULL, 0);
471 return destroy;
472}
473
474
475int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id)
476{
477 struct rds_connection *conn = cm_id->context;
478 struct rds_iw_connection *ic = conn->c_transport_data;
479 struct rdma_conn_param conn_param;
480 struct rds_iw_connect_private dp;
481 int ret;
482
483 /* If the peer doesn't do protocol negotiation, we must
484 * default to RDSv3.0 */
485 rds_iw_set_protocol(conn, RDS_PROTOCOL_3_0);
486 ic->i_flowctl = rds_iw_sysctl_flow_control; /* advertise flow control */
487
488 ret = rds_iw_setup_qp(conn);
489 if (ret) {
490 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", ret);
491 goto out;
492 }
493
494 rds_iw_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION);
495
496 ret = rdma_connect(cm_id, &conn_param);
497 if (ret)
498 rds_iw_conn_error(conn, "rdma_connect failed (%d)\n", ret);
499
500out:
501 /* Beware - returning non-zero tells the rdma_cm to destroy
502 * the cm_id. We should certainly not do it as long as we still
503 * "own" the cm_id. */
504 if (ret) {
505 struct rds_iw_connection *ic = conn->c_transport_data;
506
507 if (ic->i_cm_id == cm_id)
508 ret = 0;
509 }
510 return ret;
511}
512
513int rds_iw_conn_connect(struct rds_connection *conn)
514{
515 struct rds_iw_connection *ic = conn->c_transport_data;
516 struct rds_iw_device *rds_iwdev;
517 struct sockaddr_in src, dest;
518 int ret;
519
520 /* XXX I wonder what affect the port space has */
521 /* delegate cm event handler to rdma_transport */
522 ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
523 RDMA_PS_TCP);
524 if (IS_ERR(ic->i_cm_id)) {
525 ret = PTR_ERR(ic->i_cm_id);
526 ic->i_cm_id = NULL;
527 rdsdebug("rdma_create_id() failed: %d\n", ret);
528 goto out;
529 }
530
531 rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn);
532
533 src.sin_family = AF_INET;
534 src.sin_addr.s_addr = (__force u32)conn->c_laddr;
535 src.sin_port = (__force u16)htons(0);
536
537 /* First, bind to the local address and device. */
538 ret = rdma_bind_addr(ic->i_cm_id, (struct sockaddr *) &src);
539 if (ret) {
540 rdsdebug("rdma_bind_addr(%pI4) failed: %d\n",
541 &conn->c_laddr, ret);
542 rdma_destroy_id(ic->i_cm_id);
543 ic->i_cm_id = NULL;
544 goto out;
545 }
546
547 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
548 ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey;
549
550 dest.sin_family = AF_INET;
551 dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
552 dest.sin_port = (__force u16)htons(RDS_PORT);
553
554 ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
555 (struct sockaddr *)&dest,
556 RDS_RDMA_RESOLVE_TIMEOUT_MS);
557 if (ret) {
558 rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id,
559 ret);
560 rdma_destroy_id(ic->i_cm_id);
561 ic->i_cm_id = NULL;
562 }
563
564out:
565 return ret;
566}
567
568/*
569 * This is so careful about only cleaning up resources that were built up
570 * so that it can be called at any point during startup. In fact it
571 * can be called multiple times for a given connection.
572 */
573void rds_iw_conn_shutdown(struct rds_connection *conn)
574{
575 struct rds_iw_connection *ic = conn->c_transport_data;
576 int err = 0;
577 struct ib_qp_attr qp_attr;
578
579 rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id,
580 ic->i_pd, ic->i_send_cq, ic->i_recv_cq,
581 ic->i_cm_id ? ic->i_cm_id->qp : NULL);
582
583 if (ic->i_cm_id) {
584 struct ib_device *dev = ic->i_cm_id->device;
585
586 rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
587 err = rdma_disconnect(ic->i_cm_id);
588 if (err) {
589 /* Actually this may happen quite frequently, when
590 * an outgoing connect raced with an incoming connect.
591 */
592 rdsdebug("rds_iw_conn_shutdown: failed to disconnect,"
593 " cm: %p err %d\n", ic->i_cm_id, err);
594 }
595
596 if (ic->i_cm_id->qp) {
597 qp_attr.qp_state = IB_QPS_ERR;
598 ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE);
599 }
600
601 wait_event(rds_iw_ring_empty_wait,
602 rds_iw_ring_empty(&ic->i_send_ring) &&
603 rds_iw_ring_empty(&ic->i_recv_ring));
604
605 if (ic->i_send_hdrs)
606 ib_dma_free_coherent(dev,
607 ic->i_send_ring.w_nr *
608 sizeof(struct rds_header),
609 ic->i_send_hdrs,
610 ic->i_send_hdrs_dma);
611
612 if (ic->i_recv_hdrs)
613 ib_dma_free_coherent(dev,
614 ic->i_recv_ring.w_nr *
615 sizeof(struct rds_header),
616 ic->i_recv_hdrs,
617 ic->i_recv_hdrs_dma);
618
619 if (ic->i_ack)
620 ib_dma_free_coherent(dev, sizeof(struct rds_header),
621 ic->i_ack, ic->i_ack_dma);
622
623 if (ic->i_sends)
624 rds_iw_send_clear_ring(ic);
625 if (ic->i_recvs)
626 rds_iw_recv_clear_ring(ic);
627
628 if (ic->i_cm_id->qp)
629 rdma_destroy_qp(ic->i_cm_id);
630 if (ic->i_send_cq)
631 ib_destroy_cq(ic->i_send_cq);
632 if (ic->i_recv_cq)
633 ib_destroy_cq(ic->i_recv_cq);
634
635 /*
636 * If associated with an rds_iw_device:
637 * Move connection back to the nodev list.
638 * Remove cm_id from the device cm_id list.
639 */
640 if (ic->rds_iwdev) {
641
642 spin_lock_irq(&ic->rds_iwdev->spinlock);
643 BUG_ON(list_empty(&ic->iw_node));
644 list_del(&ic->iw_node);
645 spin_unlock_irq(&ic->rds_iwdev->spinlock);
646
647 spin_lock_irq(&iw_nodev_conns_lock);
648 list_add_tail(&ic->iw_node, &iw_nodev_conns);
649 spin_unlock_irq(&iw_nodev_conns_lock);
650 rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
651 ic->rds_iwdev = NULL;
652 }
653
654 rdma_destroy_id(ic->i_cm_id);
655
656 ic->i_cm_id = NULL;
657 ic->i_pd = NULL;
658 ic->i_mr = NULL;
659 ic->i_send_cq = NULL;
660 ic->i_recv_cq = NULL;
661 ic->i_send_hdrs = NULL;
662 ic->i_recv_hdrs = NULL;
663 ic->i_ack = NULL;
664 }
665 BUG_ON(ic->rds_iwdev);
666
667 /* Clear pending transmit */
668 if (ic->i_rm) {
669 rds_message_put(ic->i_rm);
670 ic->i_rm = NULL;
671 }
672
673 /* Clear the ACK state */
674 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
675 rds_iw_set_64bit(&ic->i_ack_next, 0);
676 ic->i_ack_recv = 0;
677
678 /* Clear flow control state */
679 ic->i_flowctl = 0;
680 atomic_set(&ic->i_credits, 0);
681
682 rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr);
683 rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr);
684
685 if (ic->i_iwinc) {
686 rds_inc_put(&ic->i_iwinc->ii_inc);
687 ic->i_iwinc = NULL;
688 }
689
690 vfree(ic->i_sends);
691 ic->i_sends = NULL;
692 vfree(ic->i_recvs);
693 ic->i_recvs = NULL;
694 rdsdebug("shutdown complete\n");
695}
696
697int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
698{
699 struct rds_iw_connection *ic;
700 unsigned long flags;
701
702 /* XXX too lazy? */
703 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL);
704 if (ic == NULL)
705 return -ENOMEM;
706
707 INIT_LIST_HEAD(&ic->iw_node);
708 mutex_init(&ic->i_recv_mutex);
709
710 /*
711 * rds_iw_conn_shutdown() waits for these to be emptied so they
712 * must be initialized before it can be called.
713 */
714 rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr);
715 rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr);
716
717 ic->conn = conn;
718 conn->c_transport_data = ic;
719
720 spin_lock_irqsave(&iw_nodev_conns_lock, flags);
721 list_add_tail(&ic->iw_node, &iw_nodev_conns);
722 spin_unlock_irqrestore(&iw_nodev_conns_lock, flags);
723
724
725 rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data);
726 return 0;
727}
728
729void rds_iw_conn_free(void *arg)
730{
731 struct rds_iw_connection *ic = arg;
732 rdsdebug("ic %p\n", ic);
733 list_del(&ic->iw_node);
734 kfree(ic);
735}
736
737/*
738 * An error occurred on the connection
739 */
740void
741__rds_iw_conn_error(struct rds_connection *conn, const char *fmt, ...)
742{
743 va_list ap;
744
745 rds_conn_drop(conn);
746
747 va_start(ap, fmt);
748 vprintk(fmt, ap);
749 va_end(ap);
750}
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
new file mode 100644
index 000000000000..1c02a8f952d0
--- /dev/null
+++ b/net/rds/iw_rdma.c
@@ -0,0 +1,888 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34
35#include "rds.h"
36#include "rdma.h"
37#include "iw.h"
38
39
40/*
41 * This is stored as mr->r_trans_private.
42 */
43struct rds_iw_mr {
44 struct rds_iw_device *device;
45 struct rds_iw_mr_pool *pool;
46 struct rdma_cm_id *cm_id;
47
48 struct ib_mr *mr;
49 struct ib_fast_reg_page_list *page_list;
50
51 struct rds_iw_mapping mapping;
52 unsigned char remap_count;
53};
54
55/*
56 * Our own little MR pool
57 */
58struct rds_iw_mr_pool {
59 struct rds_iw_device *device; /* back ptr to the device that owns us */
60
61 struct mutex flush_lock; /* serialize fmr invalidate */
62 struct work_struct flush_worker; /* flush worker */
63
64 spinlock_t list_lock; /* protect variables below */
65 atomic_t item_count; /* total # of MRs */
66 atomic_t dirty_count; /* # dirty of MRs */
67 struct list_head dirty_list; /* dirty mappings */
68 struct list_head clean_list; /* unused & unamapped MRs */
69 atomic_t free_pinned; /* memory pinned by free MRs */
70 unsigned long max_message_size; /* in pages */
71 unsigned long max_items;
72 unsigned long max_items_soft;
73 unsigned long max_free_pinned;
74 int max_pages;
75};
76
77static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
78static void rds_iw_mr_pool_flush_worker(struct work_struct *work);
79static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
80static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
81 struct rds_iw_mr *ibmr,
82 struct scatterlist *sg, unsigned int nents);
83static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
84static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
85 struct list_head *unmap_list,
86 struct list_head *kill_list);
87static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
88
89static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id)
90{
91 struct rds_iw_device *iwdev;
92 struct rds_iw_cm_id *i_cm_id;
93
94 *rds_iwdev = NULL;
95 *cm_id = NULL;
96
97 list_for_each_entry(iwdev, &rds_iw_devices, list) {
98 spin_lock_irq(&iwdev->spinlock);
99 list_for_each_entry(i_cm_id, &iwdev->cm_id_list, list) {
100 struct sockaddr_in *src_addr, *dst_addr;
101
102 src_addr = (struct sockaddr_in *)&i_cm_id->cm_id->route.addr.src_addr;
103 dst_addr = (struct sockaddr_in *)&i_cm_id->cm_id->route.addr.dst_addr;
104
105 rdsdebug("local ipaddr = %x port %d, "
106 "remote ipaddr = %x port %d"
107 "..looking for %x port %d, "
108 "remote ipaddr = %x port %d\n",
109 src_addr->sin_addr.s_addr,
110 src_addr->sin_port,
111 dst_addr->sin_addr.s_addr,
112 dst_addr->sin_port,
113 rs->rs_bound_addr,
114 rs->rs_bound_port,
115 rs->rs_conn_addr,
116 rs->rs_conn_port);
117#ifdef WORKING_TUPLE_DETECTION
118 if (src_addr->sin_addr.s_addr == rs->rs_bound_addr &&
119 src_addr->sin_port == rs->rs_bound_port &&
120 dst_addr->sin_addr.s_addr == rs->rs_conn_addr &&
121 dst_addr->sin_port == rs->rs_conn_port) {
122#else
123 /* FIXME - needs to compare the local and remote
124 * ipaddr/port tuple, but the ipaddr is the only
125 * available infomation in the rds_sock (as the rest are
126 * zero'ed. It doesn't appear to be properly populated
127 * during connection setup...
128 */
129 if (src_addr->sin_addr.s_addr == rs->rs_bound_addr) {
130#endif
131 spin_unlock_irq(&iwdev->spinlock);
132 *rds_iwdev = iwdev;
133 *cm_id = i_cm_id->cm_id;
134 return 0;
135 }
136 }
137 spin_unlock_irq(&iwdev->spinlock);
138 }
139
140 return 1;
141}
142
143static int rds_iw_add_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id)
144{
145 struct rds_iw_cm_id *i_cm_id;
146
147 i_cm_id = kmalloc(sizeof *i_cm_id, GFP_KERNEL);
148 if (!i_cm_id)
149 return -ENOMEM;
150
151 i_cm_id->cm_id = cm_id;
152
153 spin_lock_irq(&rds_iwdev->spinlock);
154 list_add_tail(&i_cm_id->list, &rds_iwdev->cm_id_list);
155 spin_unlock_irq(&rds_iwdev->spinlock);
156
157 return 0;
158}
159
160void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id)
161{
162 struct rds_iw_cm_id *i_cm_id;
163
164 spin_lock_irq(&rds_iwdev->spinlock);
165 list_for_each_entry(i_cm_id, &rds_iwdev->cm_id_list, list) {
166 if (i_cm_id->cm_id == cm_id) {
167 list_del(&i_cm_id->list);
168 kfree(i_cm_id);
169 break;
170 }
171 }
172 spin_unlock_irq(&rds_iwdev->spinlock);
173}
174
175
176int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id)
177{
178 struct sockaddr_in *src_addr, *dst_addr;
179 struct rds_iw_device *rds_iwdev_old;
180 struct rds_sock rs;
181 struct rdma_cm_id *pcm_id;
182 int rc;
183
184 src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
185 dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
186
187 rs.rs_bound_addr = src_addr->sin_addr.s_addr;
188 rs.rs_bound_port = src_addr->sin_port;
189 rs.rs_conn_addr = dst_addr->sin_addr.s_addr;
190 rs.rs_conn_port = dst_addr->sin_port;
191
192 rc = rds_iw_get_device(&rs, &rds_iwdev_old, &pcm_id);
193 if (rc)
194 rds_iw_remove_cm_id(rds_iwdev, cm_id);
195
196 return rds_iw_add_cm_id(rds_iwdev, cm_id);
197}
198
199int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
200{
201 struct rds_iw_connection *ic = conn->c_transport_data;
202
203 /* conn was previously on the nodev_conns_list */
204 spin_lock_irq(&iw_nodev_conns_lock);
205 BUG_ON(list_empty(&iw_nodev_conns));
206 BUG_ON(list_empty(&ic->iw_node));
207 list_del(&ic->iw_node);
208 spin_unlock_irq(&iw_nodev_conns_lock);
209
210 spin_lock_irq(&rds_iwdev->spinlock);
211 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
212 spin_unlock_irq(&rds_iwdev->spinlock);
213
214 ic->rds_iwdev = rds_iwdev;
215
216 return 0;
217}
218
219void rds_iw_remove_nodev_conns(void)
220{
221 struct rds_iw_connection *ic, *_ic;
222 LIST_HEAD(tmp_list);
223
224 /* avoid calling conn_destroy with irqs off */
225 spin_lock_irq(&iw_nodev_conns_lock);
226 list_splice(&iw_nodev_conns, &tmp_list);
227 INIT_LIST_HEAD(&iw_nodev_conns);
228 spin_unlock_irq(&iw_nodev_conns_lock);
229
230 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) {
231 if (ic->conn->c_passive)
232 rds_conn_destroy(ic->conn->c_passive);
233 rds_conn_destroy(ic->conn);
234 }
235}
236
237void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev)
238{
239 struct rds_iw_connection *ic, *_ic;
240 LIST_HEAD(tmp_list);
241
242 /* avoid calling conn_destroy with irqs off */
243 spin_lock_irq(&rds_iwdev->spinlock);
244 list_splice(&rds_iwdev->conn_list, &tmp_list);
245 INIT_LIST_HEAD(&rds_iwdev->conn_list);
246 spin_unlock_irq(&rds_iwdev->spinlock);
247
248 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) {
249 if (ic->conn->c_passive)
250 rds_conn_destroy(ic->conn->c_passive);
251 rds_conn_destroy(ic->conn);
252 }
253}
254
255static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg,
256 struct scatterlist *list, unsigned int sg_len)
257{
258 sg->list = list;
259 sg->len = sg_len;
260 sg->dma_len = 0;
261 sg->dma_npages = 0;
262 sg->bytes = 0;
263}
264
265static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
266 struct rds_iw_scatterlist *sg,
267 unsigned int dma_page_shift)
268{
269 struct ib_device *dev = rds_iwdev->dev;
270 u64 *dma_pages = NULL;
271 u64 dma_mask;
272 unsigned int dma_page_size;
273 int i, j, ret;
274
275 dma_page_size = 1 << dma_page_shift;
276 dma_mask = dma_page_size - 1;
277
278 WARN_ON(sg->dma_len);
279
280 sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
281 if (unlikely(!sg->dma_len)) {
282 printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n");
283 return ERR_PTR(-EBUSY);
284 }
285
286 sg->bytes = 0;
287 sg->dma_npages = 0;
288
289 ret = -EINVAL;
290 for (i = 0; i < sg->dma_len; ++i) {
291 unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
292 u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
293 u64 end_addr;
294
295 sg->bytes += dma_len;
296
297 end_addr = dma_addr + dma_len;
298 if (dma_addr & dma_mask) {
299 if (i > 0)
300 goto out_unmap;
301 dma_addr &= ~dma_mask;
302 }
303 if (end_addr & dma_mask) {
304 if (i < sg->dma_len - 1)
305 goto out_unmap;
306 end_addr = (end_addr + dma_mask) & ~dma_mask;
307 }
308
309 sg->dma_npages += (end_addr - dma_addr) >> dma_page_shift;
310 }
311
312 /* Now gather the dma addrs into one list */
313 if (sg->dma_npages > fastreg_message_size)
314 goto out_unmap;
315
316 dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC);
317 if (!dma_pages) {
318 ret = -ENOMEM;
319 goto out_unmap;
320 }
321
322 for (i = j = 0; i < sg->dma_len; ++i) {
323 unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
324 u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
325 u64 end_addr;
326
327 end_addr = dma_addr + dma_len;
328 dma_addr &= ~dma_mask;
329 for (; dma_addr < end_addr; dma_addr += dma_page_size)
330 dma_pages[j++] = dma_addr;
331 BUG_ON(j > sg->dma_npages);
332 }
333
334 return dma_pages;
335
336out_unmap:
337 ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
338 sg->dma_len = 0;
339 kfree(dma_pages);
340 return ERR_PTR(ret);
341}
342
343
344struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *rds_iwdev)
345{
346 struct rds_iw_mr_pool *pool;
347
348 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
349 if (!pool) {
350 printk(KERN_WARNING "RDS/IW: rds_iw_create_mr_pool alloc error\n");
351 return ERR_PTR(-ENOMEM);
352 }
353
354 pool->device = rds_iwdev;
355 INIT_LIST_HEAD(&pool->dirty_list);
356 INIT_LIST_HEAD(&pool->clean_list);
357 mutex_init(&pool->flush_lock);
358 spin_lock_init(&pool->list_lock);
359 INIT_WORK(&pool->flush_worker, rds_iw_mr_pool_flush_worker);
360
361 pool->max_message_size = fastreg_message_size;
362 pool->max_items = fastreg_pool_size;
363 pool->max_free_pinned = pool->max_items * pool->max_message_size / 4;
364 pool->max_pages = fastreg_message_size;
365
366 /* We never allow more than max_items MRs to be allocated.
367 * When we exceed more than max_items_soft, we start freeing
368 * items more aggressively.
369 * Make sure that max_items > max_items_soft > max_items / 2
370 */
371 pool->max_items_soft = pool->max_items * 3 / 4;
372
373 return pool;
374}
375
376void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo)
377{
378 struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool;
379
380 iinfo->rdma_mr_max = pool->max_items;
381 iinfo->rdma_mr_size = pool->max_pages;
382}
383
384void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *pool)
385{
386 flush_workqueue(rds_wq);
387 rds_iw_flush_mr_pool(pool, 1);
388 BUG_ON(atomic_read(&pool->item_count));
389 BUG_ON(atomic_read(&pool->free_pinned));
390 kfree(pool);
391}
392
393static inline struct rds_iw_mr *rds_iw_reuse_fmr(struct rds_iw_mr_pool *pool)
394{
395 struct rds_iw_mr *ibmr = NULL;
396 unsigned long flags;
397
398 spin_lock_irqsave(&pool->list_lock, flags);
399 if (!list_empty(&pool->clean_list)) {
400 ibmr = list_entry(pool->clean_list.next, struct rds_iw_mr, mapping.m_list);
401 list_del_init(&ibmr->mapping.m_list);
402 }
403 spin_unlock_irqrestore(&pool->list_lock, flags);
404
405 return ibmr;
406}
407
408static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev)
409{
410 struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool;
411 struct rds_iw_mr *ibmr = NULL;
412 int err = 0, iter = 0;
413
414 while (1) {
415 ibmr = rds_iw_reuse_fmr(pool);
416 if (ibmr)
417 return ibmr;
418
419 /* No clean MRs - now we have the choice of either
420 * allocating a fresh MR up to the limit imposed by the
421 * driver, or flush any dirty unused MRs.
422 * We try to avoid stalling in the send path if possible,
423 * so we allocate as long as we're allowed to.
424 *
425 * We're fussy with enforcing the FMR limit, though. If the driver
426 * tells us we can't use more than N fmrs, we shouldn't start
427 * arguing with it */
428 if (atomic_inc_return(&pool->item_count) <= pool->max_items)
429 break;
430
431 atomic_dec(&pool->item_count);
432
433 if (++iter > 2) {
434 rds_iw_stats_inc(s_iw_rdma_mr_pool_depleted);
435 return ERR_PTR(-EAGAIN);
436 }
437
438 /* We do have some empty MRs. Flush them out. */
439 rds_iw_stats_inc(s_iw_rdma_mr_pool_wait);
440 rds_iw_flush_mr_pool(pool, 0);
441 }
442
443 ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL);
444 if (!ibmr) {
445 err = -ENOMEM;
446 goto out_no_cigar;
447 }
448
449 spin_lock_init(&ibmr->mapping.m_lock);
450 INIT_LIST_HEAD(&ibmr->mapping.m_list);
451 ibmr->mapping.m_mr = ibmr;
452
453 err = rds_iw_init_fastreg(pool, ibmr);
454 if (err)
455 goto out_no_cigar;
456
457 rds_iw_stats_inc(s_iw_rdma_mr_alloc);
458 return ibmr;
459
460out_no_cigar:
461 if (ibmr) {
462 rds_iw_destroy_fastreg(pool, ibmr);
463 kfree(ibmr);
464 }
465 atomic_dec(&pool->item_count);
466 return ERR_PTR(err);
467}
468
469void rds_iw_sync_mr(void *trans_private, int direction)
470{
471 struct rds_iw_mr *ibmr = trans_private;
472 struct rds_iw_device *rds_iwdev = ibmr->device;
473
474 switch (direction) {
475 case DMA_FROM_DEVICE:
476 ib_dma_sync_sg_for_cpu(rds_iwdev->dev, ibmr->mapping.m_sg.list,
477 ibmr->mapping.m_sg.dma_len, DMA_BIDIRECTIONAL);
478 break;
479 case DMA_TO_DEVICE:
480 ib_dma_sync_sg_for_device(rds_iwdev->dev, ibmr->mapping.m_sg.list,
481 ibmr->mapping.m_sg.dma_len, DMA_BIDIRECTIONAL);
482 break;
483 }
484}
485
486static inline unsigned int rds_iw_flush_goal(struct rds_iw_mr_pool *pool, int free_all)
487{
488 unsigned int item_count;
489
490 item_count = atomic_read(&pool->item_count);
491 if (free_all)
492 return item_count;
493
494 return 0;
495}
496
497/*
498 * Flush our pool of MRs.
499 * At a minimum, all currently unused MRs are unmapped.
500 * If the number of MRs allocated exceeds the limit, we also try
501 * to free as many MRs as needed to get back to this limit.
502 */
503static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
504{
505 struct rds_iw_mr *ibmr, *next;
506 LIST_HEAD(unmap_list);
507 LIST_HEAD(kill_list);
508 unsigned long flags;
509 unsigned int nfreed = 0, ncleaned = 0, free_goal;
510 int ret = 0;
511
512 rds_iw_stats_inc(s_iw_rdma_mr_pool_flush);
513
514 mutex_lock(&pool->flush_lock);
515
516 spin_lock_irqsave(&pool->list_lock, flags);
517 /* Get the list of all mappings to be destroyed */
518 list_splice_init(&pool->dirty_list, &unmap_list);
519 if (free_all)
520 list_splice_init(&pool->clean_list, &kill_list);
521 spin_unlock_irqrestore(&pool->list_lock, flags);
522
523 free_goal = rds_iw_flush_goal(pool, free_all);
524
525 /* Batched invalidate of dirty MRs.
526 * For FMR based MRs, the mappings on the unmap list are
527 * actually members of an ibmr (ibmr->mapping). They either
528 * migrate to the kill_list, or have been cleaned and should be
529 * moved to the clean_list.
530 * For fastregs, they will be dynamically allocated, and
531 * will be destroyed by the unmap function.
532 */
533 if (!list_empty(&unmap_list)) {
534 ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, &kill_list);
535 /* If we've been asked to destroy all MRs, move those
536 * that were simply cleaned to the kill list */
537 if (free_all)
538 list_splice_init(&unmap_list, &kill_list);
539 }
540
541 /* Destroy any MRs that are past their best before date */
542 list_for_each_entry_safe(ibmr, next, &kill_list, mapping.m_list) {
543 rds_iw_stats_inc(s_iw_rdma_mr_free);
544 list_del(&ibmr->mapping.m_list);
545 rds_iw_destroy_fastreg(pool, ibmr);
546 kfree(ibmr);
547 nfreed++;
548 }
549
550 /* Anything that remains are laundered ibmrs, which we can add
551 * back to the clean list. */
552 if (!list_empty(&unmap_list)) {
553 spin_lock_irqsave(&pool->list_lock, flags);
554 list_splice(&unmap_list, &pool->clean_list);
555 spin_unlock_irqrestore(&pool->list_lock, flags);
556 }
557
558 atomic_sub(ncleaned, &pool->dirty_count);
559 atomic_sub(nfreed, &pool->item_count);
560
561 mutex_unlock(&pool->flush_lock);
562 return ret;
563}
564
565static void rds_iw_mr_pool_flush_worker(struct work_struct *work)
566{
567 struct rds_iw_mr_pool *pool = container_of(work, struct rds_iw_mr_pool, flush_worker);
568
569 rds_iw_flush_mr_pool(pool, 0);
570}
571
572void rds_iw_free_mr(void *trans_private, int invalidate)
573{
574 struct rds_iw_mr *ibmr = trans_private;
575 struct rds_iw_mr_pool *pool = ibmr->device->mr_pool;
576
577 rdsdebug("RDS/IW: free_mr nents %u\n", ibmr->mapping.m_sg.len);
578 if (!pool)
579 return;
580
581 /* Return it to the pool's free list */
582 rds_iw_free_fastreg(pool, ibmr);
583
584 /* If we've pinned too many pages, request a flush */
585 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned
586 || atomic_read(&pool->dirty_count) >= pool->max_items / 10)
587 queue_work(rds_wq, &pool->flush_worker);
588
589 if (invalidate) {
590 if (likely(!in_interrupt())) {
591 rds_iw_flush_mr_pool(pool, 0);
592 } else {
593 /* We get here if the user created a MR marked
594 * as use_once and invalidate at the same time. */
595 queue_work(rds_wq, &pool->flush_worker);
596 }
597 }
598}
599
600void rds_iw_flush_mrs(void)
601{
602 struct rds_iw_device *rds_iwdev;
603
604 list_for_each_entry(rds_iwdev, &rds_iw_devices, list) {
605 struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool;
606
607 if (pool)
608 rds_iw_flush_mr_pool(pool, 0);
609 }
610}
611
612void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
613 struct rds_sock *rs, u32 *key_ret)
614{
615 struct rds_iw_device *rds_iwdev;
616 struct rds_iw_mr *ibmr = NULL;
617 struct rdma_cm_id *cm_id;
618 int ret;
619
620 ret = rds_iw_get_device(rs, &rds_iwdev, &cm_id);
621 if (ret || !cm_id) {
622 ret = -ENODEV;
623 goto out;
624 }
625
626 if (!rds_iwdev->mr_pool) {
627 ret = -ENODEV;
628 goto out;
629 }
630
631 ibmr = rds_iw_alloc_mr(rds_iwdev);
632 if (IS_ERR(ibmr))
633 return ibmr;
634
635 ibmr->cm_id = cm_id;
636 ibmr->device = rds_iwdev;
637
638 ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents);
639 if (ret == 0)
640 *key_ret = ibmr->mr->rkey;
641 else
642 printk(KERN_WARNING "RDS/IW: failed to map mr (errno=%d)\n", ret);
643
644out:
645 if (ret) {
646 if (ibmr)
647 rds_iw_free_mr(ibmr, 0);
648 ibmr = ERR_PTR(ret);
649 }
650 return ibmr;
651}
652
653/*
654 * iWARP fastreg handling
655 *
656 * The life cycle of a fastreg registration is a bit different from
657 * FMRs.
658 * The idea behind fastreg is to have one MR, to which we bind different
659 * mappings over time. To avoid stalling on the expensive map and invalidate
660 * operations, these operations are pipelined on the same send queue on
661 * which we want to send the message containing the r_key.
662 *
663 * This creates a bit of a problem for us, as we do not have the destination
664 * IP in GET_MR, so the connection must be setup prior to the GET_MR call for
665 * RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit
666 * will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request
667 * before queuing the SEND. When completions for these arrive, they are
668 * dispatched to the MR has a bit set showing that RDMa can be performed.
669 *
670 * There is another interesting aspect that's related to invalidation.
671 * The application can request that a mapping is invalidated in FREE_MR.
672 * The expectation there is that this invalidation step includes ALL
673 * PREVIOUSLY FREED MRs.
674 */
675static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
676 struct rds_iw_mr *ibmr)
677{
678 struct rds_iw_device *rds_iwdev = pool->device;
679 struct ib_fast_reg_page_list *page_list = NULL;
680 struct ib_mr *mr;
681 int err;
682
683 mr = ib_alloc_fast_reg_mr(rds_iwdev->pd, pool->max_message_size);
684 if (IS_ERR(mr)) {
685 err = PTR_ERR(mr);
686
687 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed (err=%d)\n", err);
688 return err;
689 }
690
691 /* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages
692 * is not filled in.
693 */
694 page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size);
695 if (IS_ERR(page_list)) {
696 err = PTR_ERR(page_list);
697
698 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err);
699 ib_dereg_mr(mr);
700 return err;
701 }
702
703 ibmr->page_list = page_list;
704 ibmr->mr = mr;
705 return 0;
706}
707
708static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping)
709{
710 struct rds_iw_mr *ibmr = mapping->m_mr;
711 struct ib_send_wr f_wr, *failed_wr;
712 int ret;
713
714 /*
715 * Perform a WR for the fast_reg_mr. Each individual page
716 * in the sg list is added to the fast reg page list and placed
717 * inside the fast_reg_mr WR. The key used is a rolling 8bit
718 * counter, which should guarantee uniqueness.
719 */
720 ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++);
721 mapping->m_rkey = ibmr->mr->rkey;
722
723 memset(&f_wr, 0, sizeof(f_wr));
724 f_wr.wr_id = RDS_IW_FAST_REG_WR_ID;
725 f_wr.opcode = IB_WR_FAST_REG_MR;
726 f_wr.wr.fast_reg.length = mapping->m_sg.bytes;
727 f_wr.wr.fast_reg.rkey = mapping->m_rkey;
728 f_wr.wr.fast_reg.page_list = ibmr->page_list;
729 f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len;
730 f_wr.wr.fast_reg.page_shift = ibmr->device->page_shift;
731 f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE |
732 IB_ACCESS_REMOTE_READ |
733 IB_ACCESS_REMOTE_WRITE;
734 f_wr.wr.fast_reg.iova_start = 0;
735 f_wr.send_flags = IB_SEND_SIGNALED;
736
737 failed_wr = &f_wr;
738 ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr);
739 BUG_ON(failed_wr != &f_wr);
740 if (ret && printk_ratelimit())
741 printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
742 __func__, __LINE__, ret);
743 return ret;
744}
745
746static int rds_iw_rdma_fastreg_inv(struct rds_iw_mr *ibmr)
747{
748 struct ib_send_wr s_wr, *failed_wr;
749 int ret = 0;
750
751 if (!ibmr->cm_id->qp || !ibmr->mr)
752 goto out;
753
754 memset(&s_wr, 0, sizeof(s_wr));
755 s_wr.wr_id = RDS_IW_LOCAL_INV_WR_ID;
756 s_wr.opcode = IB_WR_LOCAL_INV;
757 s_wr.ex.invalidate_rkey = ibmr->mr->rkey;
758 s_wr.send_flags = IB_SEND_SIGNALED;
759
760 failed_wr = &s_wr;
761 ret = ib_post_send(ibmr->cm_id->qp, &s_wr, &failed_wr);
762 if (ret && printk_ratelimit()) {
763 printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
764 __func__, __LINE__, ret);
765 goto out;
766 }
767out:
768 return ret;
769}
770
771static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
772 struct rds_iw_mr *ibmr,
773 struct scatterlist *sg,
774 unsigned int sg_len)
775{
776 struct rds_iw_device *rds_iwdev = pool->device;
777 struct rds_iw_mapping *mapping = &ibmr->mapping;
778 u64 *dma_pages;
779 int i, ret = 0;
780
781 rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len);
782
783 dma_pages = rds_iw_map_scatterlist(rds_iwdev,
784 &mapping->m_sg,
785 rds_iwdev->page_shift);
786 if (IS_ERR(dma_pages)) {
787 ret = PTR_ERR(dma_pages);
788 dma_pages = NULL;
789 goto out;
790 }
791
792 if (mapping->m_sg.dma_len > pool->max_message_size) {
793 ret = -EMSGSIZE;
794 goto out;
795 }
796
797 for (i = 0; i < mapping->m_sg.dma_npages; ++i)
798 ibmr->page_list->page_list[i] = dma_pages[i];
799
800 ret = rds_iw_rdma_build_fastreg(mapping);
801 if (ret)
802 goto out;
803
804 rds_iw_stats_inc(s_iw_rdma_mr_used);
805
806out:
807 kfree(dma_pages);
808
809 return ret;
810}
811
812/*
813 * "Free" a fastreg MR.
814 */
815static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool,
816 struct rds_iw_mr *ibmr)
817{
818 unsigned long flags;
819 int ret;
820
821 if (!ibmr->mapping.m_sg.dma_len)
822 return;
823
824 ret = rds_iw_rdma_fastreg_inv(ibmr);
825 if (ret)
826 return;
827
828 /* Try to post the LOCAL_INV WR to the queue. */
829 spin_lock_irqsave(&pool->list_lock, flags);
830
831 list_add_tail(&ibmr->mapping.m_list, &pool->dirty_list);
832 atomic_add(ibmr->mapping.m_sg.len, &pool->free_pinned);
833 atomic_inc(&pool->dirty_count);
834
835 spin_unlock_irqrestore(&pool->list_lock, flags);
836}
837
838static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
839 struct list_head *unmap_list,
840 struct list_head *kill_list)
841{
842 struct rds_iw_mapping *mapping, *next;
843 unsigned int ncleaned = 0;
844 LIST_HEAD(laundered);
845
846 /* Batched invalidation of fastreg MRs.
847 * Why do we do it this way, even though we could pipeline unmap
848 * and remap? The reason is the application semantics - when the
849 * application requests an invalidation of MRs, it expects all
850 * previously released R_Keys to become invalid.
851 *
852 * If we implement MR reuse naively, we risk memory corruption
853 * (this has actually been observed). So the default behavior
854 * requires that a MR goes through an explicit unmap operation before
855 * we can reuse it again.
856 *
857 * We could probably improve on this a little, by allowing immediate
858 * reuse of a MR on the same socket (eg you could add small
859 * cache of unused MRs to strct rds_socket - GET_MR could grab one
860 * of these without requiring an explicit invalidate).
861 */
862 while (!list_empty(unmap_list)) {
863 unsigned long flags;
864
865 spin_lock_irqsave(&pool->list_lock, flags);
866 list_for_each_entry_safe(mapping, next, unmap_list, m_list) {
867 list_move(&mapping->m_list, &laundered);
868 ncleaned++;
869 }
870 spin_unlock_irqrestore(&pool->list_lock, flags);
871 }
872
873 /* Move all laundered mappings back to the unmap list.
874 * We do not kill any WRs right now - it doesn't seem the
875 * fastreg API has a max_remap limit. */
876 list_splice_init(&laundered, unmap_list);
877
878 return ncleaned;
879}
880
881static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool,
882 struct rds_iw_mr *ibmr)
883{
884 if (ibmr->page_list)
885 ib_free_fast_reg_page_list(ibmr->page_list);
886 if (ibmr->mr)
887 ib_dereg_mr(ibmr->mr);
888}
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
new file mode 100644
index 000000000000..a1931f0027a2
--- /dev/null
+++ b/net/rds/iw_recv.c
@@ -0,0 +1,869 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/pci.h>
35#include <linux/dma-mapping.h>
36#include <rdma/rdma_cm.h>
37
38#include "rds.h"
39#include "iw.h"
40
41static struct kmem_cache *rds_iw_incoming_slab;
42static struct kmem_cache *rds_iw_frag_slab;
43static atomic_t rds_iw_allocation = ATOMIC_INIT(0);
44
45static void rds_iw_frag_drop_page(struct rds_page_frag *frag)
46{
47 rdsdebug("frag %p page %p\n", frag, frag->f_page);
48 __free_page(frag->f_page);
49 frag->f_page = NULL;
50}
51
52static void rds_iw_frag_free(struct rds_page_frag *frag)
53{
54 rdsdebug("frag %p page %p\n", frag, frag->f_page);
55 BUG_ON(frag->f_page != NULL);
56 kmem_cache_free(rds_iw_frag_slab, frag);
57}
58
59/*
60 * We map a page at a time. Its fragments are posted in order. This
61 * is called in fragment order as the fragments get send completion events.
62 * Only the last frag in the page performs the unmapping.
63 *
64 * It's OK for ring cleanup to call this in whatever order it likes because
65 * DMA is not in flight and so we can unmap while other ring entries still
66 * hold page references in their frags.
67 */
68static void rds_iw_recv_unmap_page(struct rds_iw_connection *ic,
69 struct rds_iw_recv_work *recv)
70{
71 struct rds_page_frag *frag = recv->r_frag;
72
73 rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page);
74 if (frag->f_mapped)
75 ib_dma_unmap_page(ic->i_cm_id->device,
76 frag->f_mapped,
77 RDS_FRAG_SIZE, DMA_FROM_DEVICE);
78 frag->f_mapped = 0;
79}
80
81void rds_iw_recv_init_ring(struct rds_iw_connection *ic)
82{
83 struct rds_iw_recv_work *recv;
84 u32 i;
85
86 for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
87 struct ib_sge *sge;
88
89 recv->r_iwinc = NULL;
90 recv->r_frag = NULL;
91
92 recv->r_wr.next = NULL;
93 recv->r_wr.wr_id = i;
94 recv->r_wr.sg_list = recv->r_sge;
95 recv->r_wr.num_sge = RDS_IW_RECV_SGE;
96
97 sge = rds_iw_data_sge(ic, recv->r_sge);
98 sge->addr = 0;
99 sge->length = RDS_FRAG_SIZE;
100 sge->lkey = 0;
101
102 sge = rds_iw_header_sge(ic, recv->r_sge);
103 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
104 sge->length = sizeof(struct rds_header);
105 sge->lkey = 0;
106 }
107}
108
109static void rds_iw_recv_clear_one(struct rds_iw_connection *ic,
110 struct rds_iw_recv_work *recv)
111{
112 if (recv->r_iwinc) {
113 rds_inc_put(&recv->r_iwinc->ii_inc);
114 recv->r_iwinc = NULL;
115 }
116 if (recv->r_frag) {
117 rds_iw_recv_unmap_page(ic, recv);
118 if (recv->r_frag->f_page)
119 rds_iw_frag_drop_page(recv->r_frag);
120 rds_iw_frag_free(recv->r_frag);
121 recv->r_frag = NULL;
122 }
123}
124
125void rds_iw_recv_clear_ring(struct rds_iw_connection *ic)
126{
127 u32 i;
128
129 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
130 rds_iw_recv_clear_one(ic, &ic->i_recvs[i]);
131
132 if (ic->i_frag.f_page)
133 rds_iw_frag_drop_page(&ic->i_frag);
134}
135
136static int rds_iw_recv_refill_one(struct rds_connection *conn,
137 struct rds_iw_recv_work *recv,
138 gfp_t kptr_gfp, gfp_t page_gfp)
139{
140 struct rds_iw_connection *ic = conn->c_transport_data;
141 dma_addr_t dma_addr;
142 struct ib_sge *sge;
143 int ret = -ENOMEM;
144
145 if (recv->r_iwinc == NULL) {
146 if (atomic_read(&rds_iw_allocation) >= rds_iw_sysctl_max_recv_allocation) {
147 rds_iw_stats_inc(s_iw_rx_alloc_limit);
148 goto out;
149 }
150 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab,
151 kptr_gfp);
152 if (recv->r_iwinc == NULL)
153 goto out;
154 atomic_inc(&rds_iw_allocation);
155 INIT_LIST_HEAD(&recv->r_iwinc->ii_frags);
156 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr);
157 }
158
159 if (recv->r_frag == NULL) {
160 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp);
161 if (recv->r_frag == NULL)
162 goto out;
163 INIT_LIST_HEAD(&recv->r_frag->f_item);
164 recv->r_frag->f_page = NULL;
165 }
166
167 if (ic->i_frag.f_page == NULL) {
168 ic->i_frag.f_page = alloc_page(page_gfp);
169 if (ic->i_frag.f_page == NULL)
170 goto out;
171 ic->i_frag.f_offset = 0;
172 }
173
174 dma_addr = ib_dma_map_page(ic->i_cm_id->device,
175 ic->i_frag.f_page,
176 ic->i_frag.f_offset,
177 RDS_FRAG_SIZE,
178 DMA_FROM_DEVICE);
179 if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr))
180 goto out;
181
182 /*
183 * Once we get the RDS_PAGE_LAST_OFF frag then rds_iw_frag_unmap()
184 * must be called on this recv. This happens as completions hit
185 * in order or on connection shutdown.
186 */
187 recv->r_frag->f_page = ic->i_frag.f_page;
188 recv->r_frag->f_offset = ic->i_frag.f_offset;
189 recv->r_frag->f_mapped = dma_addr;
190
191 sge = rds_iw_data_sge(ic, recv->r_sge);
192 sge->addr = dma_addr;
193 sge->length = RDS_FRAG_SIZE;
194
195 sge = rds_iw_header_sge(ic, recv->r_sge);
196 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
197 sge->length = sizeof(struct rds_header);
198
199 get_page(recv->r_frag->f_page);
200
201 if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) {
202 ic->i_frag.f_offset += RDS_FRAG_SIZE;
203 } else {
204 put_page(ic->i_frag.f_page);
205 ic->i_frag.f_page = NULL;
206 ic->i_frag.f_offset = 0;
207 }
208
209 ret = 0;
210out:
211 return ret;
212}
213
214/*
215 * This tries to allocate and post unused work requests after making sure that
216 * they have all the allocations they need to queue received fragments into
217 * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc
218 * pairs don't go unmatched.
219 *
220 * -1 is returned if posting fails due to temporary resource exhaustion.
221 */
222int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
223 gfp_t page_gfp, int prefill)
224{
225 struct rds_iw_connection *ic = conn->c_transport_data;
226 struct rds_iw_recv_work *recv;
227 struct ib_recv_wr *failed_wr;
228 unsigned int posted = 0;
229 int ret = 0;
230 u32 pos;
231
232 while ((prefill || rds_conn_up(conn))
233 && rds_iw_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
234 if (pos >= ic->i_recv_ring.w_nr) {
235 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
236 pos);
237 ret = -EINVAL;
238 break;
239 }
240
241 recv = &ic->i_recvs[pos];
242 ret = rds_iw_recv_refill_one(conn, recv, kptr_gfp, page_gfp);
243 if (ret) {
244 ret = -1;
245 break;
246 }
247
248 /* XXX when can this fail? */
249 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
250 rdsdebug("recv %p iwinc %p page %p addr %lu ret %d\n", recv,
251 recv->r_iwinc, recv->r_frag->f_page,
252 (long) recv->r_frag->f_mapped, ret);
253 if (ret) {
254 rds_iw_conn_error(conn, "recv post on "
255 "%pI4 returned %d, disconnecting and "
256 "reconnecting\n", &conn->c_faddr,
257 ret);
258 ret = -1;
259 break;
260 }
261
262 posted++;
263 }
264
265 /* We're doing flow control - update the window. */
266 if (ic->i_flowctl && posted)
267 rds_iw_advertise_credits(conn, posted);
268
269 if (ret)
270 rds_iw_ring_unalloc(&ic->i_recv_ring, 1);
271 return ret;
272}
273
274void rds_iw_inc_purge(struct rds_incoming *inc)
275{
276 struct rds_iw_incoming *iwinc;
277 struct rds_page_frag *frag;
278 struct rds_page_frag *pos;
279
280 iwinc = container_of(inc, struct rds_iw_incoming, ii_inc);
281 rdsdebug("purging iwinc %p inc %p\n", iwinc, inc);
282
283 list_for_each_entry_safe(frag, pos, &iwinc->ii_frags, f_item) {
284 list_del_init(&frag->f_item);
285 rds_iw_frag_drop_page(frag);
286 rds_iw_frag_free(frag);
287 }
288}
289
290void rds_iw_inc_free(struct rds_incoming *inc)
291{
292 struct rds_iw_incoming *iwinc;
293
294 iwinc = container_of(inc, struct rds_iw_incoming, ii_inc);
295
296 rds_iw_inc_purge(inc);
297 rdsdebug("freeing iwinc %p inc %p\n", iwinc, inc);
298 BUG_ON(!list_empty(&iwinc->ii_frags));
299 kmem_cache_free(rds_iw_incoming_slab, iwinc);
300 atomic_dec(&rds_iw_allocation);
301 BUG_ON(atomic_read(&rds_iw_allocation) < 0);
302}
303
304int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
305 size_t size)
306{
307 struct rds_iw_incoming *iwinc;
308 struct rds_page_frag *frag;
309 struct iovec *iov = first_iov;
310 unsigned long to_copy;
311 unsigned long frag_off = 0;
312 unsigned long iov_off = 0;
313 int copied = 0;
314 int ret;
315 u32 len;
316
317 iwinc = container_of(inc, struct rds_iw_incoming, ii_inc);
318 frag = list_entry(iwinc->ii_frags.next, struct rds_page_frag, f_item);
319 len = be32_to_cpu(inc->i_hdr.h_len);
320
321 while (copied < size && copied < len) {
322 if (frag_off == RDS_FRAG_SIZE) {
323 frag = list_entry(frag->f_item.next,
324 struct rds_page_frag, f_item);
325 frag_off = 0;
326 }
327 while (iov_off == iov->iov_len) {
328 iov_off = 0;
329 iov++;
330 }
331
332 to_copy = min(iov->iov_len - iov_off, RDS_FRAG_SIZE - frag_off);
333 to_copy = min_t(size_t, to_copy, size - copied);
334 to_copy = min_t(unsigned long, to_copy, len - copied);
335
336 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag "
337 "[%p, %lu] + %lu\n",
338 to_copy, iov->iov_base, iov->iov_len, iov_off,
339 frag->f_page, frag->f_offset, frag_off);
340
341 /* XXX needs + offset for multiple recvs per page */
342 ret = rds_page_copy_to_user(frag->f_page,
343 frag->f_offset + frag_off,
344 iov->iov_base + iov_off,
345 to_copy);
346 if (ret) {
347 copied = ret;
348 break;
349 }
350
351 iov_off += to_copy;
352 frag_off += to_copy;
353 copied += to_copy;
354 }
355
356 return copied;
357}
358
359/* ic starts out kzalloc()ed */
360void rds_iw_recv_init_ack(struct rds_iw_connection *ic)
361{
362 struct ib_send_wr *wr = &ic->i_ack_wr;
363 struct ib_sge *sge = &ic->i_ack_sge;
364
365 sge->addr = ic->i_ack_dma;
366 sge->length = sizeof(struct rds_header);
367 sge->lkey = rds_iw_local_dma_lkey(ic);
368
369 wr->sg_list = sge;
370 wr->num_sge = 1;
371 wr->opcode = IB_WR_SEND;
372 wr->wr_id = RDS_IW_ACK_WR_ID;
373 wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
374}
375
376/*
377 * You'd think that with reliable IB connections you wouldn't need to ack
378 * messages that have been received. The problem is that IB hardware generates
379 * an ack message before it has DMAed the message into memory. This creates a
380 * potential message loss if the HCA is disabled for any reason between when it
381 * sends the ack and before the message is DMAed and processed. This is only a
382 * potential issue if another HCA is available for fail-over.
383 *
384 * When the remote host receives our ack they'll free the sent message from
385 * their send queue. To decrease the latency of this we always send an ack
386 * immediately after we've received messages.
387 *
388 * For simplicity, we only have one ack in flight at a time. This puts
389 * pressure on senders to have deep enough send queues to absorb the latency of
390 * a single ack frame being in flight. This might not be good enough.
391 *
392 * This is implemented by have a long-lived send_wr and sge which point to a
393 * statically allocated ack frame. This ack wr does not fall under the ring
394 * accounting that the tx and rx wrs do. The QP attribute specifically makes
395 * room for it beyond the ring size. Send completion notices its special
396 * wr_id and avoids working with the ring in that case.
397 */
398static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
399 int ack_required)
400{
401 rds_iw_set_64bit(&ic->i_ack_next, seq);
402 if (ack_required) {
403 smp_mb__before_clear_bit();
404 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
405 }
406}
407
408static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
409{
410 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
411 smp_mb__after_clear_bit();
412
413 return ic->i_ack_next;
414}
415
416static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits)
417{
418 struct rds_header *hdr = ic->i_ack;
419 struct ib_send_wr *failed_wr;
420 u64 seq;
421 int ret;
422
423 seq = rds_iw_get_ack(ic);
424
425 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
426 rds_message_populate_header(hdr, 0, 0, 0);
427 hdr->h_ack = cpu_to_be64(seq);
428 hdr->h_credit = adv_credits;
429 rds_message_make_checksum(hdr);
430 ic->i_ack_queued = jiffies;
431
432 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr);
433 if (unlikely(ret)) {
434 /* Failed to send. Release the WR, and
435 * force another ACK.
436 */
437 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
438 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
439
440 rds_iw_stats_inc(s_iw_ack_send_failure);
441 /* Need to finesse this later. */
442 BUG();
443 } else
444 rds_iw_stats_inc(s_iw_ack_sent);
445}
446
447/*
448 * There are 3 ways of getting acknowledgements to the peer:
449 * 1. We call rds_iw_attempt_ack from the recv completion handler
450 * to send an ACK-only frame.
451 * However, there can be only one such frame in the send queue
452 * at any time, so we may have to postpone it.
453 * 2. When another (data) packet is transmitted while there's
454 * an ACK in the queue, we piggyback the ACK sequence number
455 * on the data packet.
456 * 3. If the ACK WR is done sending, we get called from the
457 * send queue completion handler, and check whether there's
458 * another ACK pending (postponed because the WR was on the
459 * queue). If so, we transmit it.
460 *
461 * We maintain 2 variables:
462 * - i_ack_flags, which keeps track of whether the ACK WR
463 * is currently in the send queue or not (IB_ACK_IN_FLIGHT)
464 * - i_ack_next, which is the last sequence number we received
465 *
466 * Potentially, send queue and receive queue handlers can run concurrently.
467 *
468 * Reconnecting complicates this picture just slightly. When we
469 * reconnect, we may be seeing duplicate packets. The peer
470 * is retransmitting them, because it hasn't seen an ACK for
471 * them. It is important that we ACK these.
472 *
473 * ACK mitigation adds a header flag "ACK_REQUIRED"; any packet with
474 * this flag set *MUST* be acknowledged immediately.
475 */
476
477/*
478 * When we get here, we're called from the recv queue handler.
479 * Check whether we ought to transmit an ACK.
480 */
481void rds_iw_attempt_ack(struct rds_iw_connection *ic)
482{
483 unsigned int adv_credits;
484
485 if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
486 return;
487
488 if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
489 rds_iw_stats_inc(s_iw_ack_send_delayed);
490 return;
491 }
492
493 /* Can we get a send credit? */
494 if (!rds_iw_send_grab_credits(ic, 1, &adv_credits, 0)) {
495 rds_iw_stats_inc(s_iw_tx_throttle);
496 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
497 return;
498 }
499
500 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
501 rds_iw_send_ack(ic, adv_credits);
502}
503
504/*
505 * We get here from the send completion handler, when the
506 * adapter tells us the ACK frame was sent.
507 */
508void rds_iw_ack_send_complete(struct rds_iw_connection *ic)
509{
510 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
511 rds_iw_attempt_ack(ic);
512}
513
514/*
515 * This is called by the regular xmit code when it wants to piggyback
516 * an ACK on an outgoing frame.
517 */
518u64 rds_iw_piggyb_ack(struct rds_iw_connection *ic)
519{
520 if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
521 rds_iw_stats_inc(s_iw_ack_send_piggybacked);
522 return rds_iw_get_ack(ic);
523}
524
525/*
526 * It's kind of lame that we're copying from the posted receive pages into
527 * long-lived bitmaps. We could have posted the bitmaps and rdma written into
528 * them. But receiving new congestion bitmaps should be a *rare* event, so
529 * hopefully we won't need to invest that complexity in making it more
530 * efficient. By copying we can share a simpler core with TCP which has to
531 * copy.
532 */
533static void rds_iw_cong_recv(struct rds_connection *conn,
534 struct rds_iw_incoming *iwinc)
535{
536 struct rds_cong_map *map;
537 unsigned int map_off;
538 unsigned int map_page;
539 struct rds_page_frag *frag;
540 unsigned long frag_off;
541 unsigned long to_copy;
542 unsigned long copied;
543 uint64_t uncongested = 0;
544 void *addr;
545
546 /* catch completely corrupt packets */
547 if (be32_to_cpu(iwinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
548 return;
549
550 map = conn->c_fcong;
551 map_page = 0;
552 map_off = 0;
553
554 frag = list_entry(iwinc->ii_frags.next, struct rds_page_frag, f_item);
555 frag_off = 0;
556
557 copied = 0;
558
559 while (copied < RDS_CONG_MAP_BYTES) {
560 uint64_t *src, *dst;
561 unsigned int k;
562
563 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
564 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
565
566 addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0);
567
568 src = addr + frag_off;
569 dst = (void *)map->m_page_addrs[map_page] + map_off;
570 for (k = 0; k < to_copy; k += 8) {
571 /* Record ports that became uncongested, ie
572 * bits that changed from 0 to 1. */
573 uncongested |= ~(*src) & *dst;
574 *dst++ = *src++;
575 }
576 kunmap_atomic(addr, KM_SOFTIRQ0);
577
578 copied += to_copy;
579
580 map_off += to_copy;
581 if (map_off == PAGE_SIZE) {
582 map_off = 0;
583 map_page++;
584 }
585
586 frag_off += to_copy;
587 if (frag_off == RDS_FRAG_SIZE) {
588 frag = list_entry(frag->f_item.next,
589 struct rds_page_frag, f_item);
590 frag_off = 0;
591 }
592 }
593
594 /* the congestion map is in little endian order */
595 uncongested = le64_to_cpu(uncongested);
596
597 rds_cong_map_updated(map, uncongested);
598}
599
600/*
601 * Rings are posted with all the allocations they'll need to queue the
602 * incoming message to the receiving socket so this can't fail.
603 * All fragments start with a header, so we can make sure we're not receiving
604 * garbage, and we can tell a small 8 byte fragment from an ACK frame.
605 */
606struct rds_iw_ack_state {
607 u64 ack_next;
608 u64 ack_recv;
609 unsigned int ack_required:1;
610 unsigned int ack_next_valid:1;
611 unsigned int ack_recv_valid:1;
612};
613
614static void rds_iw_process_recv(struct rds_connection *conn,
615 struct rds_iw_recv_work *recv, u32 byte_len,
616 struct rds_iw_ack_state *state)
617{
618 struct rds_iw_connection *ic = conn->c_transport_data;
619 struct rds_iw_incoming *iwinc = ic->i_iwinc;
620 struct rds_header *ihdr, *hdr;
621
622 /* XXX shut down the connection if port 0,0 are seen? */
623
624 rdsdebug("ic %p iwinc %p recv %p byte len %u\n", ic, iwinc, recv,
625 byte_len);
626
627 if (byte_len < sizeof(struct rds_header)) {
628 rds_iw_conn_error(conn, "incoming message "
629 "from %pI4 didn't inclue a "
630 "header, disconnecting and "
631 "reconnecting\n",
632 &conn->c_faddr);
633 return;
634 }
635 byte_len -= sizeof(struct rds_header);
636
637 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
638
639 /* Validate the checksum. */
640 if (!rds_message_verify_checksum(ihdr)) {
641 rds_iw_conn_error(conn, "incoming message "
642 "from %pI4 has corrupted header - "
643 "forcing a reconnect\n",
644 &conn->c_faddr);
645 rds_stats_inc(s_recv_drop_bad_checksum);
646 return;
647 }
648
649 /* Process the ACK sequence which comes with every packet */
650 state->ack_recv = be64_to_cpu(ihdr->h_ack);
651 state->ack_recv_valid = 1;
652
653 /* Process the credits update if there was one */
654 if (ihdr->h_credit)
655 rds_iw_send_add_credits(conn, ihdr->h_credit);
656
657 if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && byte_len == 0) {
658 /* This is an ACK-only packet. The fact that it gets
659 * special treatment here is that historically, ACKs
660 * were rather special beasts.
661 */
662 rds_iw_stats_inc(s_iw_ack_received);
663
664 /*
665 * Usually the frags make their way on to incs and are then freed as
666 * the inc is freed. We don't go that route, so we have to drop the
667 * page ref ourselves. We can't just leave the page on the recv
668 * because that confuses the dma mapping of pages and each recv's use
669 * of a partial page. We can leave the frag, though, it will be
670 * reused.
671 *
672 * FIXME: Fold this into the code path below.
673 */
674 rds_iw_frag_drop_page(recv->r_frag);
675 return;
676 }
677
678 /*
679 * If we don't already have an inc on the connection then this
680 * fragment has a header and starts a message.. copy its header
681 * into the inc and save the inc so we can hang upcoming fragments
682 * off its list.
683 */
684 if (iwinc == NULL) {
685 iwinc = recv->r_iwinc;
686 recv->r_iwinc = NULL;
687 ic->i_iwinc = iwinc;
688
689 hdr = &iwinc->ii_inc.i_hdr;
690 memcpy(hdr, ihdr, sizeof(*hdr));
691 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
692
693 rdsdebug("ic %p iwinc %p rem %u flag 0x%x\n", ic, iwinc,
694 ic->i_recv_data_rem, hdr->h_flags);
695 } else {
696 hdr = &iwinc->ii_inc.i_hdr;
697 /* We can't just use memcmp here; fragments of a
698 * single message may carry different ACKs */
699 if (hdr->h_sequence != ihdr->h_sequence
700 || hdr->h_len != ihdr->h_len
701 || hdr->h_sport != ihdr->h_sport
702 || hdr->h_dport != ihdr->h_dport) {
703 rds_iw_conn_error(conn,
704 "fragment header mismatch; forcing reconnect\n");
705 return;
706 }
707 }
708
709 list_add_tail(&recv->r_frag->f_item, &iwinc->ii_frags);
710 recv->r_frag = NULL;
711
712 if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
713 ic->i_recv_data_rem -= RDS_FRAG_SIZE;
714 else {
715 ic->i_recv_data_rem = 0;
716 ic->i_iwinc = NULL;
717
718 if (iwinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
719 rds_iw_cong_recv(conn, iwinc);
720 else {
721 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
722 &iwinc->ii_inc, GFP_ATOMIC,
723 KM_SOFTIRQ0);
724 state->ack_next = be64_to_cpu(hdr->h_sequence);
725 state->ack_next_valid = 1;
726 }
727
728 /* Evaluate the ACK_REQUIRED flag *after* we received
729 * the complete frame, and after bumping the next_rx
730 * sequence. */
731 if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) {
732 rds_stats_inc(s_recv_ack_required);
733 state->ack_required = 1;
734 }
735
736 rds_inc_put(&iwinc->ii_inc);
737 }
738}
739
740/*
741 * Plucking the oldest entry from the ring can be done concurrently with
742 * the thread refilling the ring. Each ring operation is protected by
743 * spinlocks and the transient state of refilling doesn't change the
744 * recording of which entry is oldest.
745 *
746 * This relies on IB only calling one cq comp_handler for each cq so that
747 * there will only be one caller of rds_recv_incoming() per RDS connection.
748 */
749void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context)
750{
751 struct rds_connection *conn = context;
752 struct rds_iw_connection *ic = conn->c_transport_data;
753 struct ib_wc wc;
754 struct rds_iw_ack_state state = { 0, };
755 struct rds_iw_recv_work *recv;
756
757 rdsdebug("conn %p cq %p\n", conn, cq);
758
759 rds_iw_stats_inc(s_iw_rx_cq_call);
760
761 ib_req_notify_cq(cq, IB_CQ_SOLICITED);
762
763 while (ib_poll_cq(cq, 1, &wc) > 0) {
764 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
765 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
766 be32_to_cpu(wc.ex.imm_data));
767 rds_iw_stats_inc(s_iw_rx_cq_event);
768
769 recv = &ic->i_recvs[rds_iw_ring_oldest(&ic->i_recv_ring)];
770
771 rds_iw_recv_unmap_page(ic, recv);
772
773 /*
774 * Also process recvs in connecting state because it is possible
775 * to get a recv completion _before_ the rdmacm ESTABLISHED
776 * event is processed.
777 */
778 if (rds_conn_up(conn) || rds_conn_connecting(conn)) {
779 /* We expect errors as the qp is drained during shutdown */
780 if (wc.status == IB_WC_SUCCESS) {
781 rds_iw_process_recv(conn, recv, wc.byte_len, &state);
782 } else {
783 rds_iw_conn_error(conn, "recv completion on "
784 "%pI4 had status %u, disconnecting and "
785 "reconnecting\n", &conn->c_faddr,
786 wc.status);
787 }
788 }
789
790 rds_iw_ring_free(&ic->i_recv_ring, 1);
791 }
792
793 if (state.ack_next_valid)
794 rds_iw_set_ack(ic, state.ack_next, state.ack_required);
795 if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
796 rds_send_drop_acked(conn, state.ack_recv, NULL);
797 ic->i_ack_recv = state.ack_recv;
798 }
799 if (rds_conn_up(conn))
800 rds_iw_attempt_ack(ic);
801
802 /* If we ever end up with a really empty receive ring, we're
803 * in deep trouble, as the sender will definitely see RNR
804 * timeouts. */
805 if (rds_iw_ring_empty(&ic->i_recv_ring))
806 rds_iw_stats_inc(s_iw_rx_ring_empty);
807
808 /*
809 * If the ring is running low, then schedule the thread to refill.
810 */
811 if (rds_iw_ring_low(&ic->i_recv_ring))
812 queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
813}
814
815int rds_iw_recv(struct rds_connection *conn)
816{
817 struct rds_iw_connection *ic = conn->c_transport_data;
818 int ret = 0;
819
820 rdsdebug("conn %p\n", conn);
821
822 /*
823 * If we get a temporary posting failure in this context then
824 * we're really low and we want the caller to back off for a bit.
825 */
826 mutex_lock(&ic->i_recv_mutex);
827 if (rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0))
828 ret = -ENOMEM;
829 else
830 rds_iw_stats_inc(s_iw_rx_refill_from_thread);
831 mutex_unlock(&ic->i_recv_mutex);
832
833 if (rds_conn_up(conn))
834 rds_iw_attempt_ack(ic);
835
836 return ret;
837}
838
839int __init rds_iw_recv_init(void)
840{
841 struct sysinfo si;
842 int ret = -ENOMEM;
843
844 /* Default to 30% of all available RAM for recv memory */
845 si_meminfo(&si);
846 rds_iw_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
847
848 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming",
849 sizeof(struct rds_iw_incoming),
850 0, 0, NULL);
851 if (rds_iw_incoming_slab == NULL)
852 goto out;
853
854 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag",
855 sizeof(struct rds_page_frag),
856 0, 0, NULL);
857 if (rds_iw_frag_slab == NULL)
858 kmem_cache_destroy(rds_iw_incoming_slab);
859 else
860 ret = 0;
861out:
862 return ret;
863}
864
865void rds_iw_recv_exit(void)
866{
867 kmem_cache_destroy(rds_iw_incoming_slab);
868 kmem_cache_destroy(rds_iw_frag_slab);
869}
diff --git a/net/rds/iw_ring.c b/net/rds/iw_ring.c
new file mode 100644
index 000000000000..d422d4b5deef
--- /dev/null
+++ b/net/rds/iw_ring.c
@@ -0,0 +1,169 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34
35#include "rds.h"
36#include "iw.h"
37
38/*
39 * Locking for IB rings.
40 * We assume that allocation is always protected by a mutex
41 * in the caller (this is a valid assumption for the current
42 * implementation).
43 *
44 * Freeing always happens in an interrupt, and hence only
45 * races with allocations, but not with other free()s.
46 *
47 * The interaction between allocation and freeing is that
48 * the alloc code has to determine the number of free entries.
49 * To this end, we maintain two counters; an allocation counter
50 * and a free counter. Both are allowed to run freely, and wrap
51 * around.
52 * The number of used entries is always (alloc_ctr - free_ctr) % NR.
53 *
54 * The current implementation makes free_ctr atomic. When the
55 * caller finds an allocation fails, it should set an "alloc fail"
56 * bit and retry the allocation. The "alloc fail" bit essentially tells
57 * the CQ completion handlers to wake it up after freeing some
58 * more entries.
59 */
60
61/*
62 * This only happens on shutdown.
63 */
64DECLARE_WAIT_QUEUE_HEAD(rds_iw_ring_empty_wait);
65
66void rds_iw_ring_init(struct rds_iw_work_ring *ring, u32 nr)
67{
68 memset(ring, 0, sizeof(*ring));
69 ring->w_nr = nr;
70 rdsdebug("ring %p nr %u\n", ring, ring->w_nr);
71}
72
73static inline u32 __rds_iw_ring_used(struct rds_iw_work_ring *ring)
74{
75 u32 diff;
76
77 /* This assumes that atomic_t has at least as many bits as u32 */
78 diff = ring->w_alloc_ctr - (u32) atomic_read(&ring->w_free_ctr);
79 BUG_ON(diff > ring->w_nr);
80
81 return diff;
82}
83
84void rds_iw_ring_resize(struct rds_iw_work_ring *ring, u32 nr)
85{
86 /* We only ever get called from the connection setup code,
87 * prior to creating the QP. */
88 BUG_ON(__rds_iw_ring_used(ring));
89 ring->w_nr = nr;
90}
91
92static int __rds_iw_ring_empty(struct rds_iw_work_ring *ring)
93{
94 return __rds_iw_ring_used(ring) == 0;
95}
96
97u32 rds_iw_ring_alloc(struct rds_iw_work_ring *ring, u32 val, u32 *pos)
98{
99 u32 ret = 0, avail;
100
101 avail = ring->w_nr - __rds_iw_ring_used(ring);
102
103 rdsdebug("ring %p val %u next %u free %u\n", ring, val,
104 ring->w_alloc_ptr, avail);
105
106 if (val && avail) {
107 ret = min(val, avail);
108 *pos = ring->w_alloc_ptr;
109
110 ring->w_alloc_ptr = (ring->w_alloc_ptr + ret) % ring->w_nr;
111 ring->w_alloc_ctr += ret;
112 }
113
114 return ret;
115}
116
117void rds_iw_ring_free(struct rds_iw_work_ring *ring, u32 val)
118{
119 ring->w_free_ptr = (ring->w_free_ptr + val) % ring->w_nr;
120 atomic_add(val, &ring->w_free_ctr);
121
122 if (__rds_iw_ring_empty(ring) &&
123 waitqueue_active(&rds_iw_ring_empty_wait))
124 wake_up(&rds_iw_ring_empty_wait);
125}
126
127void rds_iw_ring_unalloc(struct rds_iw_work_ring *ring, u32 val)
128{
129 ring->w_alloc_ptr = (ring->w_alloc_ptr - val) % ring->w_nr;
130 ring->w_alloc_ctr -= val;
131}
132
133int rds_iw_ring_empty(struct rds_iw_work_ring *ring)
134{
135 return __rds_iw_ring_empty(ring);
136}
137
138int rds_iw_ring_low(struct rds_iw_work_ring *ring)
139{
140 return __rds_iw_ring_used(ring) <= (ring->w_nr >> 2);
141}
142
143
144/*
145 * returns the oldest alloced ring entry. This will be the next one
146 * freed. This can't be called if there are none allocated.
147 */
148u32 rds_iw_ring_oldest(struct rds_iw_work_ring *ring)
149{
150 return ring->w_free_ptr;
151}
152
153/*
154 * returns the number of completed work requests.
155 */
156
157u32 rds_iw_ring_completed(struct rds_iw_work_ring *ring, u32 wr_id, u32 oldest)
158{
159 u32 ret;
160
161 if (oldest <= (unsigned long long)wr_id)
162 ret = (unsigned long long)wr_id - oldest + 1;
163 else
164 ret = ring->w_nr - oldest + (unsigned long long)wr_id + 1;
165
166 rdsdebug("ring %p ret %u wr_id %u oldest %u\n", ring, ret,
167 wr_id, oldest);
168 return ret;
169}
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
new file mode 100644
index 000000000000..22dd38ffd608
--- /dev/null
+++ b/net/rds/iw_send.c
@@ -0,0 +1,975 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/device.h>
36#include <linux/dmapool.h>
37
38#include "rds.h"
39#include "rdma.h"
40#include "iw.h"
41
42static void rds_iw_send_rdma_complete(struct rds_message *rm,
43 int wc_status)
44{
45 int notify_status;
46
47 switch (wc_status) {
48 case IB_WC_WR_FLUSH_ERR:
49 return;
50
51 case IB_WC_SUCCESS:
52 notify_status = RDS_RDMA_SUCCESS;
53 break;
54
55 case IB_WC_REM_ACCESS_ERR:
56 notify_status = RDS_RDMA_REMOTE_ERROR;
57 break;
58
59 default:
60 notify_status = RDS_RDMA_OTHER_ERROR;
61 break;
62 }
63 rds_rdma_send_complete(rm, notify_status);
64}
65
66static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic,
67 struct rds_rdma_op *op)
68{
69 if (op->r_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0;
74 }
75}
76
77static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
78 struct rds_iw_send_work *send,
79 int wc_status)
80{
81 struct rds_message *rm = send->s_rm;
82
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
84
85 ib_dma_unmap_sg(ic->i_cm_id->device,
86 rm->m_sg, rm->m_nents,
87 DMA_TO_DEVICE);
88
89 if (rm->m_rdma_op != NULL) {
90 rds_iw_send_unmap_rdma(ic, rm->m_rdma_op);
91
92 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics:
94 * 1. Notify when we received the ACK on the RDS message
95 * that was queued with the RDMA. This provides reliable
96 * notification of RDMA status at the expense of a one-way
97 * packet delay.
98 * 2. Notify when the IB stack gives us the completion event for
99 * the RDMA operation.
100 * 3. Notify when the IB stack gives us the completion event for
101 * the accompanying RDS messages.
102 * Here, we implement approach #3. To implement approach #2,
103 * call rds_rdma_send_complete from the cq_handler. To implement #1,
104 * don't call rds_rdma_send_complete at all, and fall back to the notify
105 * handling in the ACK processing code.
106 *
107 * Note: There's no need to explicitly sync any RDMA buffers using
108 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
109 * operation itself unmapped the RDMA buffers, which takes care
110 * of synching.
111 */
112 rds_iw_send_rdma_complete(rm, wc_status);
113
114 if (rm->m_rdma_op->r_write)
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes);
116 else
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes);
118 }
119
120 /* If anyone waited for this message to get flushed out, wake
121 * them up now */
122 rds_message_unmapped(rm);
123
124 rds_message_put(rm);
125 send->s_rm = NULL;
126}
127
128void rds_iw_send_init_ring(struct rds_iw_connection *ic)
129{
130 struct rds_iw_send_work *send;
131 u32 i;
132
133 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
134 struct ib_sge *sge;
135
136 send->s_rm = NULL;
137 send->s_op = NULL;
138 send->s_mapping = NULL;
139
140 send->s_wr.next = NULL;
141 send->s_wr.wr_id = i;
142 send->s_wr.sg_list = send->s_sge;
143 send->s_wr.num_sge = 1;
144 send->s_wr.opcode = IB_WR_SEND;
145 send->s_wr.send_flags = 0;
146 send->s_wr.ex.imm_data = 0;
147
148 sge = rds_iw_data_sge(ic, send->s_sge);
149 sge->lkey = 0;
150
151 sge = rds_iw_header_sge(ic, send->s_sge);
152 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
153 sge->length = sizeof(struct rds_header);
154 sge->lkey = 0;
155
156 send->s_mr = ib_alloc_fast_reg_mr(ic->i_pd, fastreg_message_size);
157 if (IS_ERR(send->s_mr)) {
158 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed\n");
159 break;
160 }
161
162 send->s_page_list = ib_alloc_fast_reg_page_list(
163 ic->i_cm_id->device, fastreg_message_size);
164 if (IS_ERR(send->s_page_list)) {
165 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n");
166 break;
167 }
168 }
169}
170
171void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
172{
173 struct rds_iw_send_work *send;
174 u32 i;
175
176 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
177 BUG_ON(!send->s_mr);
178 ib_dereg_mr(send->s_mr);
179 BUG_ON(!send->s_page_list);
180 ib_free_fast_reg_page_list(send->s_page_list);
181 if (send->s_wr.opcode == 0xdead)
182 continue;
183 if (send->s_rm)
184 rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
185 if (send->s_op)
186 rds_iw_send_unmap_rdma(ic, send->s_op);
187 }
188}
189
190/*
191 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
192 * operations performed in the send path. As the sender allocs and potentially
193 * unallocs the next free entry in the ring it doesn't alter which is
194 * the next to be freed, which is what this is concerned with.
195 */
196void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
197{
198 struct rds_connection *conn = context;
199 struct rds_iw_connection *ic = conn->c_transport_data;
200 struct ib_wc wc;
201 struct rds_iw_send_work *send;
202 u32 completed;
203 u32 oldest;
204 u32 i;
205 int ret;
206
207 rdsdebug("cq %p conn %p\n", cq, conn);
208 rds_iw_stats_inc(s_iw_tx_cq_call);
209 ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
210 if (ret)
211 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
212
213 while (ib_poll_cq(cq, 1, &wc) > 0) {
214 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
215 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
216 be32_to_cpu(wc.ex.imm_data));
217 rds_iw_stats_inc(s_iw_tx_cq_event);
218
219 if (wc.status != IB_WC_SUCCESS) {
220 printk(KERN_ERR "WC Error: status = %d opcode = %d\n", wc.status, wc.opcode);
221 break;
222 }
223
224 if (wc.opcode == IB_WC_LOCAL_INV && wc.wr_id == RDS_IW_LOCAL_INV_WR_ID) {
225 ic->i_fastreg_posted = 0;
226 continue;
227 }
228
229 if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) {
230 ic->i_fastreg_posted = 1;
231 continue;
232 }
233
234 if (wc.wr_id == RDS_IW_ACK_WR_ID) {
235 if (ic->i_ack_queued + HZ/2 < jiffies)
236 rds_iw_stats_inc(s_iw_tx_stalled);
237 rds_iw_ack_send_complete(ic);
238 continue;
239 }
240
241 oldest = rds_iw_ring_oldest(&ic->i_send_ring);
242
243 completed = rds_iw_ring_completed(&ic->i_send_ring, wc.wr_id, oldest);
244
245 for (i = 0; i < completed; i++) {
246 send = &ic->i_sends[oldest];
247
248 /* In the error case, wc.opcode sometimes contains garbage */
249 switch (send->s_wr.opcode) {
250 case IB_WR_SEND:
251 if (send->s_rm)
252 rds_iw_send_unmap_rm(ic, send, wc.status);
253 break;
254 case IB_WR_FAST_REG_MR:
255 case IB_WR_RDMA_WRITE:
256 case IB_WR_RDMA_READ:
257 case IB_WR_RDMA_READ_WITH_INV:
258 /* Nothing to be done - the SG list will be unmapped
259 * when the SEND completes. */
260 break;
261 default:
262 if (printk_ratelimit())
263 printk(KERN_NOTICE
264 "RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
265 __func__, send->s_wr.opcode);
266 break;
267 }
268
269 send->s_wr.opcode = 0xdead;
270 send->s_wr.num_sge = 1;
271 if (send->s_queued + HZ/2 < jiffies)
272 rds_iw_stats_inc(s_iw_tx_stalled);
273
274 /* If a RDMA operation produced an error, signal this right
275 * away. If we don't, the subsequent SEND that goes with this
276 * RDMA will be canceled with ERR_WFLUSH, and the application
277 * never learn that the RDMA failed. */
278 if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) {
279 struct rds_message *rm;
280
281 rm = rds_send_get_message(conn, send->s_op);
282 if (rm)
283 rds_iw_send_rdma_complete(rm, wc.status);
284 }
285
286 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
287 }
288
289 rds_iw_ring_free(&ic->i_send_ring, completed);
290
291 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags)
292 || test_bit(0, &conn->c_map_queued))
293 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
294
295 /* We expect errors as the qp is drained during shutdown */
296 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
297 rds_iw_conn_error(conn,
298 "send completion on %pI4 "
299 "had status %u, disconnecting and reconnecting\n",
300 &conn->c_faddr, wc.status);
301 }
302 }
303}
304
305/*
306 * This is the main function for allocating credits when sending
307 * messages.
308 *
309 * Conceptually, we have two counters:
310 * - send credits: this tells us how many WRs we're allowed
311 * to submit without overruning the reciever's queue. For
312 * each SEND WR we post, we decrement this by one.
313 *
314 * - posted credits: this tells us how many WRs we recently
315 * posted to the receive queue. This value is transferred
316 * to the peer as a "credit update" in a RDS header field.
317 * Every time we transmit credits to the peer, we subtract
318 * the amount of transferred credits from this counter.
319 *
320 * It is essential that we avoid situations where both sides have
321 * exhausted their send credits, and are unable to send new credits
322 * to the peer. We achieve this by requiring that we send at least
323 * one credit update to the peer before exhausting our credits.
324 * When new credits arrive, we subtract one credit that is withheld
325 * until we've posted new buffers and are ready to transmit these
326 * credits (see rds_iw_send_add_credits below).
327 *
328 * The RDS send code is essentially single-threaded; rds_send_xmit
329 * grabs c_send_lock to ensure exclusive access to the send ring.
330 * However, the ACK sending code is independent and can race with
331 * message SENDs.
332 *
333 * In the send path, we need to update the counters for send credits
334 * and the counter of posted buffers atomically - when we use the
335 * last available credit, we cannot allow another thread to race us
336 * and grab the posted credits counter. Hence, we have to use a
337 * spinlock to protect the credit counter, or use atomics.
338 *
339 * Spinlocks shared between the send and the receive path are bad,
340 * because they create unnecessary delays. An early implementation
341 * using a spinlock showed a 5% degradation in throughput at some
342 * loads.
343 *
344 * This implementation avoids spinlocks completely, putting both
345 * counters into a single atomic, and updating that atomic using
346 * atomic_add (in the receive path, when receiving fresh credits),
347 * and using atomic_cmpxchg when updating the two counters.
348 */
349int rds_iw_send_grab_credits(struct rds_iw_connection *ic,
350 u32 wanted, u32 *adv_credits, int need_posted)
351{
352 unsigned int avail, posted, got = 0, advertise;
353 long oldval, newval;
354
355 *adv_credits = 0;
356 if (!ic->i_flowctl)
357 return wanted;
358
359try_again:
360 advertise = 0;
361 oldval = newval = atomic_read(&ic->i_credits);
362 posted = IB_GET_POST_CREDITS(oldval);
363 avail = IB_GET_SEND_CREDITS(oldval);
364
365 rdsdebug("rds_iw_send_grab_credits(%u): credits=%u posted=%u\n",
366 wanted, avail, posted);
367
368 /* The last credit must be used to send a credit update. */
369 if (avail && !posted)
370 avail--;
371
372 if (avail < wanted) {
373 struct rds_connection *conn = ic->i_cm_id->context;
374
375 /* Oops, there aren't that many credits left! */
376 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
377 got = avail;
378 } else {
379 /* Sometimes you get what you want, lalala. */
380 got = wanted;
381 }
382 newval -= IB_SET_SEND_CREDITS(got);
383
384 /*
385 * If need_posted is non-zero, then the caller wants
386 * the posted regardless of whether any send credits are
387 * available.
388 */
389 if (posted && (got || need_posted)) {
390 advertise = min_t(unsigned int, posted, RDS_MAX_ADV_CREDIT);
391 newval -= IB_SET_POST_CREDITS(advertise);
392 }
393
394 /* Finally bill everything */
395 if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval)
396 goto try_again;
397
398 *adv_credits = advertise;
399 return got;
400}
401
402void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits)
403{
404 struct rds_iw_connection *ic = conn->c_transport_data;
405
406 if (credits == 0)
407 return;
408
409 rdsdebug("rds_iw_send_add_credits(%u): current=%u%s\n",
410 credits,
411 IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
412 test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
413
414 atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits);
415 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
416 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
417
418 WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384);
419
420 rds_iw_stats_inc(s_iw_rx_credit_updates);
421}
422
423void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted)
424{
425 struct rds_iw_connection *ic = conn->c_transport_data;
426
427 if (posted == 0)
428 return;
429
430 atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits);
431
432 /* Decide whether to send an update to the peer now.
433 * If we would send a credit update for every single buffer we
434 * post, we would end up with an ACK storm (ACK arrives,
435 * consumes buffer, we refill the ring, send ACK to remote
436 * advertising the newly posted buffer... ad inf)
437 *
438 * Performance pretty much depends on how often we send
439 * credit updates - too frequent updates mean lots of ACKs.
440 * Too infrequent updates, and the peer will run out of
441 * credits and has to throttle.
442 * For the time being, 16 seems to be a good compromise.
443 */
444 if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16)
445 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
446}
447
448static inline void
449rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
450 struct rds_iw_send_work *send, unsigned int pos,
451 unsigned long buffer, unsigned int length,
452 int send_flags)
453{
454 struct ib_sge *sge;
455
456 WARN_ON(pos != send - ic->i_sends);
457
458 send->s_wr.send_flags = send_flags;
459 send->s_wr.opcode = IB_WR_SEND;
460 send->s_wr.num_sge = 2;
461 send->s_wr.next = NULL;
462 send->s_queued = jiffies;
463 send->s_op = NULL;
464
465 if (length != 0) {
466 sge = rds_iw_data_sge(ic, send->s_sge);
467 sge->addr = buffer;
468 sge->length = length;
469 sge->lkey = rds_iw_local_dma_lkey(ic);
470
471 sge = rds_iw_header_sge(ic, send->s_sge);
472 } else {
473 /* We're sending a packet with no payload. There is only
474 * one SGE */
475 send->s_wr.num_sge = 1;
476 sge = &send->s_sge[0];
477 }
478
479 sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header));
480 sge->length = sizeof(struct rds_header);
481 sge->lkey = rds_iw_local_dma_lkey(ic);
482}
483
484/*
485 * This can be called multiple times for a given message. The first time
486 * we see a message we map its scatterlist into the IB device so that
487 * we can provide that mapped address to the IB scatter gather entries
488 * in the IB work requests. We translate the scatterlist into a series
489 * of work requests that fragment the message. These work requests complete
490 * in order so we pass ownership of the message to the completion handler
491 * once we send the final fragment.
492 *
493 * The RDS core uses the c_send_lock to only enter this function once
494 * per connection. This makes sure that the tx ring alloc/unalloc pairs
495 * don't get out of sync and confuse the ring.
496 */
497int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
498 unsigned int hdr_off, unsigned int sg, unsigned int off)
499{
500 struct rds_iw_connection *ic = conn->c_transport_data;
501 struct ib_device *dev = ic->i_cm_id->device;
502 struct rds_iw_send_work *send = NULL;
503 struct rds_iw_send_work *first;
504 struct rds_iw_send_work *prev;
505 struct ib_send_wr *failed_wr;
506 struct scatterlist *scat;
507 u32 pos;
508 u32 i;
509 u32 work_alloc;
510 u32 credit_alloc;
511 u32 posted;
512 u32 adv_credits = 0;
513 int send_flags = 0;
514 int sent;
515 int ret;
516 int flow_controlled = 0;
517
518 BUG_ON(off % RDS_FRAG_SIZE);
519 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
520
521 /* Fastreg support */
522 if (rds_rdma_cookie_key(rm->m_rdma_cookie)
523 && !ic->i_fastreg_posted) {
524 ret = -EAGAIN;
525 goto out;
526 }
527
528 /* FIXME we may overallocate here */
529 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
530 i = 1;
531 else
532 i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
533
534 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
535 if (work_alloc == 0) {
536 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
537 rds_iw_stats_inc(s_iw_tx_ring_full);
538 ret = -ENOMEM;
539 goto out;
540 }
541
542 credit_alloc = work_alloc;
543 if (ic->i_flowctl) {
544 credit_alloc = rds_iw_send_grab_credits(ic, work_alloc, &posted, 0);
545 adv_credits += posted;
546 if (credit_alloc < work_alloc) {
547 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
548 work_alloc = credit_alloc;
549 flow_controlled++;
550 }
551 if (work_alloc == 0) {
552 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
553 rds_iw_stats_inc(s_iw_tx_throttle);
554 ret = -ENOMEM;
555 goto out;
556 }
557 }
558
559 /* map the message the first time we see it */
560 if (ic->i_rm == NULL) {
561 /*
562 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n",
563 be16_to_cpu(rm->m_inc.i_hdr.h_dport),
564 rm->m_inc.i_hdr.h_flags,
565 be32_to_cpu(rm->m_inc.i_hdr.h_len));
566 */
567 if (rm->m_nents) {
568 rm->m_count = ib_dma_map_sg(dev,
569 rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
570 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
571 if (rm->m_count == 0) {
572 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
573 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
574 ret = -ENOMEM; /* XXX ? */
575 goto out;
576 }
577 } else {
578 rm->m_count = 0;
579 }
580
581 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
582 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
583 rds_message_addref(rm);
584 ic->i_rm = rm;
585
586 /* Finalize the header */
587 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
588 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED;
589 if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
590 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
591
592 /* If it has a RDMA op, tell the peer we did it. This is
593 * used by the peer to release use-once RDMA MRs. */
594 if (rm->m_rdma_op) {
595 struct rds_ext_header_rdma ext_hdr;
596
597 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key);
598 rds_message_add_extension(&rm->m_inc.i_hdr,
599 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
600 }
601 if (rm->m_rdma_cookie) {
602 rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
603 rds_rdma_cookie_key(rm->m_rdma_cookie),
604 rds_rdma_cookie_offset(rm->m_rdma_cookie));
605 }
606
607 /* Note - rds_iw_piggyb_ack clears the ACK_REQUIRED bit, so
608 * we should not do this unless we have a chance of at least
609 * sticking the header into the send ring. Which is why we
610 * should call rds_iw_ring_alloc first. */
611 rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_iw_piggyb_ack(ic));
612 rds_message_make_checksum(&rm->m_inc.i_hdr);
613
614 /*
615 * Update adv_credits since we reset the ACK_REQUIRED bit.
616 */
617 rds_iw_send_grab_credits(ic, 0, &posted, 1);
618 adv_credits += posted;
619 BUG_ON(adv_credits > 255);
620 } else if (ic->i_rm != rm)
621 BUG();
622
623 send = &ic->i_sends[pos];
624 first = send;
625 prev = NULL;
626 scat = &rm->m_sg[sg];
627 sent = 0;
628 i = 0;
629
630 /* Sometimes you want to put a fence between an RDMA
631 * READ and the following SEND.
632 * We could either do this all the time
633 * or when requested by the user. Right now, we let
634 * the application choose.
635 */
636 if (rm->m_rdma_op && rm->m_rdma_op->r_fence)
637 send_flags = IB_SEND_FENCE;
638
639 /*
640 * We could be copying the header into the unused tail of the page.
641 * That would need to be changed in the future when those pages might
642 * be mapped userspace pages or page cache pages. So instead we always
643 * use a second sge and our long-lived ring of mapped headers. We send
644 * the header after the data so that the data payload can be aligned on
645 * the receiver.
646 */
647
648 /* handle a 0-len message */
649 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) {
650 rds_iw_xmit_populate_wr(ic, send, pos, 0, 0, send_flags);
651 goto add_header;
652 }
653
654 /* if there's data reference it with a chain of work reqs */
655 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
656 unsigned int len;
657
658 send = &ic->i_sends[pos];
659
660 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off);
661 rds_iw_xmit_populate_wr(ic, send, pos,
662 ib_sg_dma_address(dev, scat) + off, len,
663 send_flags);
664
665 /*
666 * We want to delay signaling completions just enough to get
667 * the batching benefits but not so much that we create dead time
668 * on the wire.
669 */
670 if (ic->i_unsignaled_wrs-- == 0) {
671 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
672 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
673 }
674
675 ic->i_unsignaled_bytes -= len;
676 if (ic->i_unsignaled_bytes <= 0) {
677 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
678 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
679 }
680
681 /*
682 * Always signal the last one if we're stopping due to flow control.
683 */
684 if (flow_controlled && i == (work_alloc-1))
685 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
686
687 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
688 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
689
690 sent += len;
691 off += len;
692 if (off == ib_sg_dma_len(dev, scat)) {
693 scat++;
694 off = 0;
695 }
696
697add_header:
698 /* Tack on the header after the data. The header SGE should already
699 * have been set up to point to the right header buffer. */
700 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
701
702 if (0) {
703 struct rds_header *hdr = &ic->i_send_hdrs[pos];
704
705 printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n",
706 be16_to_cpu(hdr->h_dport),
707 hdr->h_flags,
708 be32_to_cpu(hdr->h_len));
709 }
710 if (adv_credits) {
711 struct rds_header *hdr = &ic->i_send_hdrs[pos];
712
713 /* add credit and redo the header checksum */
714 hdr->h_credit = adv_credits;
715 rds_message_make_checksum(hdr);
716 adv_credits = 0;
717 rds_iw_stats_inc(s_iw_tx_credit_updates);
718 }
719
720 if (prev)
721 prev->s_wr.next = &send->s_wr;
722 prev = send;
723
724 pos = (pos + 1) % ic->i_send_ring.w_nr;
725 }
726
727 /* Account the RDS header in the number of bytes we sent, but just once.
728 * The caller has no concept of fragmentation. */
729 if (hdr_off == 0)
730 sent += sizeof(struct rds_header);
731
732 /* if we finished the message then send completion owns it */
733 if (scat == &rm->m_sg[rm->m_count]) {
734 prev->s_rm = ic->i_rm;
735 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
736 ic->i_rm = NULL;
737 }
738
739 if (i < work_alloc) {
740 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
741 work_alloc = i;
742 }
743 if (ic->i_flowctl && i < credit_alloc)
744 rds_iw_send_add_credits(conn, credit_alloc - i);
745
746 /* XXX need to worry about failed_wr and partial sends. */
747 failed_wr = &first->s_wr;
748 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
749 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
750 first, &first->s_wr, ret, failed_wr);
751 BUG_ON(failed_wr != &first->s_wr);
752 if (ret) {
753 printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
754 "returned %d\n", &conn->c_faddr, ret);
755 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
756 if (prev->s_rm) {
757 ic->i_rm = prev->s_rm;
758 prev->s_rm = NULL;
759 }
760 goto out;
761 }
762
763 ret = sent;
764out:
765 BUG_ON(adv_credits);
766 return ret;
767}
768
769static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr)
770{
771 BUG_ON(nent > send->s_page_list->max_page_list_len);
772 /*
773 * Perform a WR for the fast_reg_mr. Each individual page
774 * in the sg list is added to the fast reg page list and placed
775 * inside the fast_reg_mr WR.
776 */
777 send->s_wr.opcode = IB_WR_FAST_REG_MR;
778 send->s_wr.wr.fast_reg.length = len;
779 send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey;
780 send->s_wr.wr.fast_reg.page_list = send->s_page_list;
781 send->s_wr.wr.fast_reg.page_list_len = nent;
782 send->s_wr.wr.fast_reg.page_shift = rds_iwdev->page_shift;
783 send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE;
784 send->s_wr.wr.fast_reg.iova_start = sg_addr;
785
786 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
787}
788
789int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
790{
791 struct rds_iw_connection *ic = conn->c_transport_data;
792 struct rds_iw_send_work *send = NULL;
793 struct rds_iw_send_work *first;
794 struct rds_iw_send_work *prev;
795 struct ib_send_wr *failed_wr;
796 struct rds_iw_device *rds_iwdev;
797 struct scatterlist *scat;
798 unsigned long len;
799 u64 remote_addr = op->r_remote_addr;
800 u32 pos, fr_pos;
801 u32 work_alloc;
802 u32 i;
803 u32 j;
804 int sent;
805 int ret;
806 int num_sge;
807
808 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
809
810 /* map the message the first time we see it */
811 if (!op->r_mapped) {
812 op->r_count = ib_dma_map_sg(ic->i_cm_id->device,
813 op->r_sg, op->r_nents, (op->r_write) ?
814 DMA_TO_DEVICE : DMA_FROM_DEVICE);
815 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count);
816 if (op->r_count == 0) {
817 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
818 ret = -ENOMEM; /* XXX ? */
819 goto out;
820 }
821
822 op->r_mapped = 1;
823 }
824
825 if (!op->r_write) {
826 /* Alloc space on the send queue for the fastreg */
827 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos);
828 if (work_alloc != 1) {
829 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
830 rds_iw_stats_inc(s_iw_tx_ring_full);
831 ret = -ENOMEM;
832 goto out;
833 }
834 }
835
836 /*
837 * Instead of knowing how to return a partial rdma read/write we insist that there
838 * be enough work requests to send the entire message.
839 */
840 i = ceil(op->r_count, rds_iwdev->max_sge);
841
842 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
843 if (work_alloc != i) {
844 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
845 rds_iw_stats_inc(s_iw_tx_ring_full);
846 ret = -ENOMEM;
847 goto out;
848 }
849
850 send = &ic->i_sends[pos];
851 if (!op->r_write) {
852 first = prev = &ic->i_sends[fr_pos];
853 } else {
854 first = send;
855 prev = NULL;
856 }
857 scat = &op->r_sg[0];
858 sent = 0;
859 num_sge = op->r_count;
860
861 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) {
862 send->s_wr.send_flags = 0;
863 send->s_queued = jiffies;
864
865 /*
866 * We want to delay signaling completions just enough to get
867 * the batching benefits but not so much that we create dead time on the wire.
868 */
869 if (ic->i_unsignaled_wrs-- == 0) {
870 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
871 send->s_wr.send_flags = IB_SEND_SIGNALED;
872 }
873
874 /* To avoid the need to have the plumbing to invalidate the fastreg_mr used
875 * for local access after RDS is finished with it, using
876 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
877 */
878 if (op->r_write)
879 send->s_wr.opcode = IB_WR_RDMA_WRITE;
880 else
881 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
882
883 send->s_wr.wr.rdma.remote_addr = remote_addr;
884 send->s_wr.wr.rdma.rkey = op->r_key;
885 send->s_op = op;
886
887 if (num_sge > rds_iwdev->max_sge) {
888 send->s_wr.num_sge = rds_iwdev->max_sge;
889 num_sge -= rds_iwdev->max_sge;
890 } else
891 send->s_wr.num_sge = num_sge;
892
893 send->s_wr.next = NULL;
894
895 if (prev)
896 prev->s_wr.next = &send->s_wr;
897
898 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) {
899 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
900
901 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
902 send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat);
903 else {
904 send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
905 send->s_sge[j].length = len;
906 send->s_sge[j].lkey = rds_iw_local_dma_lkey(ic);
907 }
908
909 sent += len;
910 rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
911 remote_addr += len;
912
913 scat++;
914 }
915
916 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
917 send->s_wr.num_sge = 1;
918 send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
919 send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
920 send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
921 }
922
923 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
924 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
925
926 prev = send;
927 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
928 send = ic->i_sends;
929 }
930
931 /* if we finished the message then send completion owns it */
932 if (scat == &op->r_sg[op->r_count])
933 first->s_wr.send_flags = IB_SEND_SIGNALED;
934
935 if (i < work_alloc) {
936 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
937 work_alloc = i;
938 }
939
940 /* On iWARP, local memory access by a remote system (ie, RDMA Read) is not
941 * recommended. Putting the lkey on the wire is a security hole, as it can
942 * allow for memory access to all of memory on the remote system. Some
943 * adapters do not allow using the lkey for this at all. To bypass this use a
944 * fastreg_mr (or possibly a dma_mr)
945 */
946 if (!op->r_write) {
947 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
948 op->r_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
949 work_alloc++;
950 }
951
952 failed_wr = &first->s_wr;
953 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
954 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
955 first, &first->s_wr, ret, failed_wr);
956 BUG_ON(failed_wr != &first->s_wr);
957 if (ret) {
958 printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
959 "returned %d\n", &conn->c_faddr, ret);
960 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
961 goto out;
962 }
963
964out:
965 return ret;
966}
967
968void rds_iw_xmit_complete(struct rds_connection *conn)
969{
970 struct rds_iw_connection *ic = conn->c_transport_data;
971
972 /* We may have a pending ACK or window update we were unable
973 * to send previously (due to flow control). Try again. */
974 rds_iw_attempt_ack(ic);
975}
diff --git a/net/rds/iw_stats.c b/net/rds/iw_stats.c
new file mode 100644
index 000000000000..ccc7e8f0bf0e
--- /dev/null
+++ b/net/rds/iw_stats.c
@@ -0,0 +1,95 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/percpu.h>
34#include <linux/seq_file.h>
35#include <linux/proc_fs.h>
36
37#include "rds.h"
38#include "iw.h"
39
40DEFINE_PER_CPU(struct rds_iw_statistics, rds_iw_stats) ____cacheline_aligned;
41
42static char *rds_iw_stat_names[] = {
43 "iw_connect_raced",
44 "iw_listen_closed_stale",
45 "iw_tx_cq_call",
46 "iw_tx_cq_event",
47 "iw_tx_ring_full",
48 "iw_tx_throttle",
49 "iw_tx_sg_mapping_failure",
50 "iw_tx_stalled",
51 "iw_tx_credit_updates",
52 "iw_rx_cq_call",
53 "iw_rx_cq_event",
54 "iw_rx_ring_empty",
55 "iw_rx_refill_from_cq",
56 "iw_rx_refill_from_thread",
57 "iw_rx_alloc_limit",
58 "iw_rx_credit_updates",
59 "iw_ack_sent",
60 "iw_ack_send_failure",
61 "iw_ack_send_delayed",
62 "iw_ack_send_piggybacked",
63 "iw_ack_received",
64 "iw_rdma_mr_alloc",
65 "iw_rdma_mr_free",
66 "iw_rdma_mr_used",
67 "iw_rdma_mr_pool_flush",
68 "iw_rdma_mr_pool_wait",
69 "iw_rdma_mr_pool_depleted",
70};
71
72unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter,
73 unsigned int avail)
74{
75 struct rds_iw_statistics stats = {0, };
76 uint64_t *src;
77 uint64_t *sum;
78 size_t i;
79 int cpu;
80
81 if (avail < ARRAY_SIZE(rds_iw_stat_names))
82 goto out;
83
84 for_each_online_cpu(cpu) {
85 src = (uint64_t *)&(per_cpu(rds_iw_stats, cpu));
86 sum = (uint64_t *)&stats;
87 for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++)
88 *(sum++) += *(src++);
89 }
90
91 rds_stats_info_copy(iter, (uint64_t *)&stats, rds_iw_stat_names,
92 ARRAY_SIZE(rds_iw_stat_names));
93out:
94 return ARRAY_SIZE(rds_iw_stat_names);
95}
diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c
new file mode 100644
index 000000000000..9590678cd616
--- /dev/null
+++ b/net/rds/iw_sysctl.c
@@ -0,0 +1,137 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/sysctl.h>
35#include <linux/proc_fs.h>
36
37#include "iw.h"
38
39static struct ctl_table_header *rds_iw_sysctl_hdr;
40
41unsigned long rds_iw_sysctl_max_send_wr = RDS_IW_DEFAULT_SEND_WR;
42unsigned long rds_iw_sysctl_max_recv_wr = RDS_IW_DEFAULT_RECV_WR;
43unsigned long rds_iw_sysctl_max_recv_allocation = (128 * 1024 * 1024) / RDS_FRAG_SIZE;
44static unsigned long rds_iw_sysctl_max_wr_min = 1;
45/* hardware will fail CQ creation long before this */
46static unsigned long rds_iw_sysctl_max_wr_max = (u32)~0;
47
48unsigned long rds_iw_sysctl_max_unsig_wrs = 16;
49static unsigned long rds_iw_sysctl_max_unsig_wr_min = 1;
50static unsigned long rds_iw_sysctl_max_unsig_wr_max = 64;
51
52unsigned long rds_iw_sysctl_max_unsig_bytes = (16 << 20);
53static unsigned long rds_iw_sysctl_max_unsig_bytes_min = 1;
54static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL;
55
56unsigned int rds_iw_sysctl_flow_control = 1;
57
58ctl_table rds_iw_sysctl_table[] = {
59 {
60 .ctl_name = CTL_UNNUMBERED,
61 .procname = "max_send_wr",
62 .data = &rds_iw_sysctl_max_send_wr,
63 .maxlen = sizeof(unsigned long),
64 .mode = 0644,
65 .proc_handler = &proc_doulongvec_minmax,
66 .extra1 = &rds_iw_sysctl_max_wr_min,
67 .extra2 = &rds_iw_sysctl_max_wr_max,
68 },
69 {
70 .ctl_name = CTL_UNNUMBERED,
71 .procname = "max_recv_wr",
72 .data = &rds_iw_sysctl_max_recv_wr,
73 .maxlen = sizeof(unsigned long),
74 .mode = 0644,
75 .proc_handler = &proc_doulongvec_minmax,
76 .extra1 = &rds_iw_sysctl_max_wr_min,
77 .extra2 = &rds_iw_sysctl_max_wr_max,
78 },
79 {
80 .ctl_name = CTL_UNNUMBERED,
81 .procname = "max_unsignaled_wr",
82 .data = &rds_iw_sysctl_max_unsig_wrs,
83 .maxlen = sizeof(unsigned long),
84 .mode = 0644,
85 .proc_handler = &proc_doulongvec_minmax,
86 .extra1 = &rds_iw_sysctl_max_unsig_wr_min,
87 .extra2 = &rds_iw_sysctl_max_unsig_wr_max,
88 },
89 {
90 .ctl_name = CTL_UNNUMBERED,
91 .procname = "max_unsignaled_bytes",
92 .data = &rds_iw_sysctl_max_unsig_bytes,
93 .maxlen = sizeof(unsigned long),
94 .mode = 0644,
95 .proc_handler = &proc_doulongvec_minmax,
96 .extra1 = &rds_iw_sysctl_max_unsig_bytes_min,
97 .extra2 = &rds_iw_sysctl_max_unsig_bytes_max,
98 },
99 {
100 .ctl_name = CTL_UNNUMBERED,
101 .procname = "max_recv_allocation",
102 .data = &rds_iw_sysctl_max_recv_allocation,
103 .maxlen = sizeof(unsigned long),
104 .mode = 0644,
105 .proc_handler = &proc_doulongvec_minmax,
106 },
107 {
108 .ctl_name = CTL_UNNUMBERED,
109 .procname = "flow_control",
110 .data = &rds_iw_sysctl_flow_control,
111 .maxlen = sizeof(rds_iw_sysctl_flow_control),
112 .mode = 0644,
113 .proc_handler = &proc_dointvec,
114 },
115 { .ctl_name = 0}
116};
117
118static struct ctl_path rds_iw_sysctl_path[] = {
119 { .procname = "net", .ctl_name = CTL_NET, },
120 { .procname = "rds", .ctl_name = CTL_UNNUMBERED, },
121 { .procname = "iw", .ctl_name = CTL_UNNUMBERED, },
122 { }
123};
124
125void rds_iw_sysctl_exit(void)
126{
127 if (rds_iw_sysctl_hdr)
128 unregister_sysctl_table(rds_iw_sysctl_hdr);
129}
130
131int __init rds_iw_sysctl_init(void)
132{
133 rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table);
134 if (rds_iw_sysctl_hdr == NULL)
135 return -ENOMEM;
136 return 0;
137}
diff --git a/net/rds/loop.c b/net/rds/loop.c
new file mode 100644
index 000000000000..4a61997f554d
--- /dev/null
+++ b/net/rds/loop.c
@@ -0,0 +1,188 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35
36#include "rds.h"
37#include "loop.h"
38
39static DEFINE_SPINLOCK(loop_conns_lock);
40static LIST_HEAD(loop_conns);
41
42/*
43 * This 'loopback' transport is a special case for flows that originate
44 * and terminate on the same machine.
45 *
46 * Connection build-up notices if the destination address is thought of
47 * as a local address by a transport. At that time it decides to use the
48 * loopback transport instead of the bound transport of the sending socket.
49 *
50 * The loopback transport's sending path just hands the sent rds_message
51 * straight to the receiving path via an embedded rds_incoming.
52 */
53
54/*
55 * Usually a message transits both the sender and receiver's conns as it
56 * flows to the receiver. In the loopback case, though, the receive path
57 * is handed the sending conn so the sense of the addresses is reversed.
58 */
59static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
60 unsigned int hdr_off, unsigned int sg,
61 unsigned int off)
62{
63 BUG_ON(hdr_off || sg || off);
64
65 rds_inc_init(&rm->m_inc, conn, conn->c_laddr);
66 rds_message_addref(rm); /* for the inc */
67
68 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc,
69 GFP_KERNEL, KM_USER0);
70
71 rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence),
72 NULL);
73
74 rds_inc_put(&rm->m_inc);
75
76 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len);
77}
78
79static int rds_loop_xmit_cong_map(struct rds_connection *conn,
80 struct rds_cong_map *map,
81 unsigned long offset)
82{
83 unsigned long i;
84
85 BUG_ON(offset);
86 BUG_ON(map != conn->c_lcong);
87
88 for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
89 memcpy((void *)conn->c_fcong->m_page_addrs[i],
90 (void *)map->m_page_addrs[i], PAGE_SIZE);
91 }
92
93 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
94
95 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
96}
97
98/* we need to at least give the thread something to succeed */
99static int rds_loop_recv(struct rds_connection *conn)
100{
101 return 0;
102}
103
104struct rds_loop_connection {
105 struct list_head loop_node;
106 struct rds_connection *conn;
107};
108
109/*
110 * Even the loopback transport needs to keep track of its connections,
111 * so it can call rds_conn_destroy() on them on exit. N.B. there are
112 * 1+ loopback addresses (127.*.*.*) so it's not a bug to have
113 * multiple loopback conns allocated, although rather useless.
114 */
115static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
116{
117 struct rds_loop_connection *lc;
118 unsigned long flags;
119
120 lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL);
121 if (lc == NULL)
122 return -ENOMEM;
123
124 INIT_LIST_HEAD(&lc->loop_node);
125 lc->conn = conn;
126 conn->c_transport_data = lc;
127
128 spin_lock_irqsave(&loop_conns_lock, flags);
129 list_add_tail(&lc->loop_node, &loop_conns);
130 spin_unlock_irqrestore(&loop_conns_lock, flags);
131
132 return 0;
133}
134
135static void rds_loop_conn_free(void *arg)
136{
137 struct rds_loop_connection *lc = arg;
138 rdsdebug("lc %p\n", lc);
139 list_del(&lc->loop_node);
140 kfree(lc);
141}
142
143static int rds_loop_conn_connect(struct rds_connection *conn)
144{
145 rds_connect_complete(conn);
146 return 0;
147}
148
149static void rds_loop_conn_shutdown(struct rds_connection *conn)
150{
151}
152
153void rds_loop_exit(void)
154{
155 struct rds_loop_connection *lc, *_lc;
156 LIST_HEAD(tmp_list);
157
158 /* avoid calling conn_destroy with irqs off */
159 spin_lock_irq(&loop_conns_lock);
160 list_splice(&loop_conns, &tmp_list);
161 INIT_LIST_HEAD(&loop_conns);
162 spin_unlock_irq(&loop_conns_lock);
163
164 list_for_each_entry_safe(lc, _lc, &tmp_list, loop_node) {
165 WARN_ON(lc->conn->c_passive);
166 rds_conn_destroy(lc->conn);
167 }
168}
169
170/*
171 * This is missing .xmit_* because loop doesn't go through generic
172 * rds_send_xmit() and doesn't call rds_recv_incoming(). .listen_stop and
173 * .laddr_check are missing because transport.c doesn't iterate over
174 * rds_loop_transport.
175 */
176struct rds_transport rds_loop_transport = {
177 .xmit = rds_loop_xmit,
178 .xmit_cong_map = rds_loop_xmit_cong_map,
179 .recv = rds_loop_recv,
180 .conn_alloc = rds_loop_conn_alloc,
181 .conn_free = rds_loop_conn_free,
182 .conn_connect = rds_loop_conn_connect,
183 .conn_shutdown = rds_loop_conn_shutdown,
184 .inc_copy_to_user = rds_message_inc_copy_to_user,
185 .inc_purge = rds_message_inc_purge,
186 .inc_free = rds_message_inc_free,
187 .t_name = "loopback",
188};
diff --git a/net/rds/loop.h b/net/rds/loop.h
new file mode 100644
index 000000000000..f32b0939a04d
--- /dev/null
+++ b/net/rds/loop.h
@@ -0,0 +1,9 @@
1#ifndef _RDS_LOOP_H
2#define _RDS_LOOP_H
3
4/* loop.c */
5extern struct rds_transport rds_loop_transport;
6
7void rds_loop_exit(void);
8
9#endif
diff --git a/net/rds/message.c b/net/rds/message.c
new file mode 100644
index 000000000000..5a15dc8d0cd7
--- /dev/null
+++ b/net/rds/message.c
@@ -0,0 +1,402 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34
35#include "rds.h"
36#include "rdma.h"
37
38static DECLARE_WAIT_QUEUE_HEAD(rds_message_flush_waitq);
39
40static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
41[RDS_EXTHDR_NONE] = 0,
42[RDS_EXTHDR_VERSION] = sizeof(struct rds_ext_header_version),
43[RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma),
44[RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest),
45};
46
47
48void rds_message_addref(struct rds_message *rm)
49{
50 rdsdebug("addref rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
51 atomic_inc(&rm->m_refcount);
52}
53
54/*
55 * This relies on dma_map_sg() not touching sg[].page during merging.
56 */
57static void rds_message_purge(struct rds_message *rm)
58{
59 unsigned long i;
60
61 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
62 return;
63
64 for (i = 0; i < rm->m_nents; i++) {
65 rdsdebug("putting data page %p\n", (void *)sg_page(&rm->m_sg[i]));
66 /* XXX will have to put_page for page refs */
67 __free_page(sg_page(&rm->m_sg[i]));
68 }
69 rm->m_nents = 0;
70
71 if (rm->m_rdma_op)
72 rds_rdma_free_op(rm->m_rdma_op);
73 if (rm->m_rdma_mr)
74 rds_mr_put(rm->m_rdma_mr);
75}
76
77void rds_message_inc_purge(struct rds_incoming *inc)
78{
79 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
80 rds_message_purge(rm);
81}
82
83void rds_message_put(struct rds_message *rm)
84{
85 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
86
87 if (atomic_dec_and_test(&rm->m_refcount)) {
88 BUG_ON(!list_empty(&rm->m_sock_item));
89 BUG_ON(!list_empty(&rm->m_conn_item));
90 rds_message_purge(rm);
91
92 kfree(rm);
93 }
94}
95
96void rds_message_inc_free(struct rds_incoming *inc)
97{
98 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
99 rds_message_put(rm);
100}
101
102void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
103 __be16 dport, u64 seq)
104{
105 hdr->h_flags = 0;
106 hdr->h_sport = sport;
107 hdr->h_dport = dport;
108 hdr->h_sequence = cpu_to_be64(seq);
109 hdr->h_exthdr[0] = RDS_EXTHDR_NONE;
110}
111
112int rds_message_add_extension(struct rds_header *hdr,
113 unsigned int type, const void *data, unsigned int len)
114{
115 unsigned int ext_len = sizeof(u8) + len;
116 unsigned char *dst;
117
118 /* For now, refuse to add more than one extension header */
119 if (hdr->h_exthdr[0] != RDS_EXTHDR_NONE)
120 return 0;
121
122 if (type >= __RDS_EXTHDR_MAX
123 || len != rds_exthdr_size[type])
124 return 0;
125
126 if (ext_len >= RDS_HEADER_EXT_SPACE)
127 return 0;
128 dst = hdr->h_exthdr;
129
130 *dst++ = type;
131 memcpy(dst, data, len);
132
133 dst[len] = RDS_EXTHDR_NONE;
134 return 1;
135}
136
137/*
138 * If a message has extension headers, retrieve them here.
139 * Call like this:
140 *
141 * unsigned int pos = 0;
142 *
143 * while (1) {
144 * buflen = sizeof(buffer);
145 * type = rds_message_next_extension(hdr, &pos, buffer, &buflen);
146 * if (type == RDS_EXTHDR_NONE)
147 * break;
148 * ...
149 * }
150 */
151int rds_message_next_extension(struct rds_header *hdr,
152 unsigned int *pos, void *buf, unsigned int *buflen)
153{
154 unsigned int offset, ext_type, ext_len;
155 u8 *src = hdr->h_exthdr;
156
157 offset = *pos;
158 if (offset >= RDS_HEADER_EXT_SPACE)
159 goto none;
160
161 /* Get the extension type and length. For now, the
162 * length is implied by the extension type. */
163 ext_type = src[offset++];
164
165 if (ext_type == RDS_EXTHDR_NONE || ext_type >= __RDS_EXTHDR_MAX)
166 goto none;
167 ext_len = rds_exthdr_size[ext_type];
168 if (offset + ext_len > RDS_HEADER_EXT_SPACE)
169 goto none;
170
171 *pos = offset + ext_len;
172 if (ext_len < *buflen)
173 *buflen = ext_len;
174 memcpy(buf, src + offset, *buflen);
175 return ext_type;
176
177none:
178 *pos = RDS_HEADER_EXT_SPACE;
179 *buflen = 0;
180 return RDS_EXTHDR_NONE;
181}
182
183int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version)
184{
185 struct rds_ext_header_version ext_hdr;
186
187 ext_hdr.h_version = cpu_to_be32(version);
188 return rds_message_add_extension(hdr, RDS_EXTHDR_VERSION, &ext_hdr, sizeof(ext_hdr));
189}
190
191int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version)
192{
193 struct rds_ext_header_version ext_hdr;
194 unsigned int pos = 0, len = sizeof(ext_hdr);
195
196 /* We assume the version extension is the only one present */
197 if (rds_message_next_extension(hdr, &pos, &ext_hdr, &len) != RDS_EXTHDR_VERSION)
198 return 0;
199 *version = be32_to_cpu(ext_hdr.h_version);
200 return 1;
201}
202
203int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset)
204{
205 struct rds_ext_header_rdma_dest ext_hdr;
206
207 ext_hdr.h_rdma_rkey = cpu_to_be32(r_key);
208 ext_hdr.h_rdma_offset = cpu_to_be32(offset);
209 return rds_message_add_extension(hdr, RDS_EXTHDR_RDMA_DEST, &ext_hdr, sizeof(ext_hdr));
210}
211
212struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp)
213{
214 struct rds_message *rm;
215
216 rm = kzalloc(sizeof(struct rds_message) +
217 (nents * sizeof(struct scatterlist)), gfp);
218 if (!rm)
219 goto out;
220
221 if (nents)
222 sg_init_table(rm->m_sg, nents);
223 atomic_set(&rm->m_refcount, 1);
224 INIT_LIST_HEAD(&rm->m_sock_item);
225 INIT_LIST_HEAD(&rm->m_conn_item);
226 spin_lock_init(&rm->m_rs_lock);
227
228out:
229 return rm;
230}
231
232struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
233{
234 struct rds_message *rm;
235 unsigned int i;
236
237 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL);
238 if (rm == NULL)
239 return ERR_PTR(-ENOMEM);
240
241 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
242 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
243 rm->m_nents = ceil(total_len, PAGE_SIZE);
244
245 for (i = 0; i < rm->m_nents; ++i) {
246 sg_set_page(&rm->m_sg[i],
247 virt_to_page(page_addrs[i]),
248 PAGE_SIZE, 0);
249 }
250
251 return rm;
252}
253
254struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
255 size_t total_len)
256{
257 unsigned long to_copy;
258 unsigned long iov_off;
259 unsigned long sg_off;
260 struct rds_message *rm;
261 struct iovec *iov;
262 struct scatterlist *sg;
263 int ret;
264
265 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL);
266 if (rm == NULL) {
267 ret = -ENOMEM;
268 goto out;
269 }
270
271 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
272
273 /*
274 * now allocate and copy in the data payload.
275 */
276 sg = rm->m_sg;
277 iov = first_iov;
278 iov_off = 0;
279 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
280
281 while (total_len) {
282 if (sg_page(sg) == NULL) {
283 ret = rds_page_remainder_alloc(sg, total_len,
284 GFP_HIGHUSER);
285 if (ret)
286 goto out;
287 rm->m_nents++;
288 sg_off = 0;
289 }
290
291 while (iov_off == iov->iov_len) {
292 iov_off = 0;
293 iov++;
294 }
295
296 to_copy = min(iov->iov_len - iov_off, sg->length - sg_off);
297 to_copy = min_t(size_t, to_copy, total_len);
298
299 rdsdebug("copying %lu bytes from user iov [%p, %zu] + %lu to "
300 "sg [%p, %u, %u] + %lu\n",
301 to_copy, iov->iov_base, iov->iov_len, iov_off,
302 (void *)sg_page(sg), sg->offset, sg->length, sg_off);
303
304 ret = rds_page_copy_from_user(sg_page(sg), sg->offset + sg_off,
305 iov->iov_base + iov_off,
306 to_copy);
307 if (ret)
308 goto out;
309
310 iov_off += to_copy;
311 total_len -= to_copy;
312 sg_off += to_copy;
313
314 if (sg_off == sg->length)
315 sg++;
316 }
317
318 ret = 0;
319out:
320 if (ret) {
321 if (rm)
322 rds_message_put(rm);
323 rm = ERR_PTR(ret);
324 }
325 return rm;
326}
327
328int rds_message_inc_copy_to_user(struct rds_incoming *inc,
329 struct iovec *first_iov, size_t size)
330{
331 struct rds_message *rm;
332 struct iovec *iov;
333 struct scatterlist *sg;
334 unsigned long to_copy;
335 unsigned long iov_off;
336 unsigned long vec_off;
337 int copied;
338 int ret;
339 u32 len;
340
341 rm = container_of(inc, struct rds_message, m_inc);
342 len = be32_to_cpu(rm->m_inc.i_hdr.h_len);
343
344 iov = first_iov;
345 iov_off = 0;
346 sg = rm->m_sg;
347 vec_off = 0;
348 copied = 0;
349
350 while (copied < size && copied < len) {
351 while (iov_off == iov->iov_len) {
352 iov_off = 0;
353 iov++;
354 }
355
356 to_copy = min(iov->iov_len - iov_off, sg->length - vec_off);
357 to_copy = min_t(size_t, to_copy, size - copied);
358 to_copy = min_t(unsigned long, to_copy, len - copied);
359
360 rdsdebug("copying %lu bytes to user iov [%p, %zu] + %lu to "
361 "sg [%p, %u, %u] + %lu\n",
362 to_copy, iov->iov_base, iov->iov_len, iov_off,
363 sg_page(sg), sg->offset, sg->length, vec_off);
364
365 ret = rds_page_copy_to_user(sg_page(sg), sg->offset + vec_off,
366 iov->iov_base + iov_off,
367 to_copy);
368 if (ret) {
369 copied = ret;
370 break;
371 }
372
373 iov_off += to_copy;
374 vec_off += to_copy;
375 copied += to_copy;
376
377 if (vec_off == sg->length) {
378 vec_off = 0;
379 sg++;
380 }
381 }
382
383 return copied;
384}
385
386/*
387 * If the message is still on the send queue, wait until the transport
388 * is done with it. This is particularly important for RDMA operations.
389 */
390void rds_message_wait(struct rds_message *rm)
391{
392 wait_event(rds_message_flush_waitq,
393 !test_bit(RDS_MSG_MAPPED, &rm->m_flags));
394}
395
396void rds_message_unmapped(struct rds_message *rm)
397{
398 clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
399 if (waitqueue_active(&rds_message_flush_waitq))
400 wake_up(&rds_message_flush_waitq);
401}
402
diff --git a/net/rds/page.c b/net/rds/page.c
new file mode 100644
index 000000000000..c460743a89ad
--- /dev/null
+++ b/net/rds/page.c
@@ -0,0 +1,221 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/highmem.h>
34
35#include "rds.h"
36
37struct rds_page_remainder {
38 struct page *r_page;
39 unsigned long r_offset;
40};
41
42DEFINE_PER_CPU(struct rds_page_remainder, rds_page_remainders) ____cacheline_aligned;
43
44/*
45 * returns 0 on success or -errno on failure.
46 *
47 * We don't have to worry about flush_dcache_page() as this only works
48 * with private pages. If, say, we were to do directed receive to pinned
49 * user pages we'd have to worry more about cache coherence. (Though
50 * the flush_dcache_page() in get_user_pages() would probably be enough).
51 */
52int rds_page_copy_user(struct page *page, unsigned long offset,
53 void __user *ptr, unsigned long bytes,
54 int to_user)
55{
56 unsigned long ret;
57 void *addr;
58
59 if (to_user)
60 rds_stats_add(s_copy_to_user, bytes);
61 else
62 rds_stats_add(s_copy_from_user, bytes);
63
64 addr = kmap_atomic(page, KM_USER0);
65 if (to_user)
66 ret = __copy_to_user_inatomic(ptr, addr + offset, bytes);
67 else
68 ret = __copy_from_user_inatomic(addr + offset, ptr, bytes);
69 kunmap_atomic(addr, KM_USER0);
70
71 if (ret) {
72 addr = kmap(page);
73 if (to_user)
74 ret = copy_to_user(ptr, addr + offset, bytes);
75 else
76 ret = copy_from_user(addr + offset, ptr, bytes);
77 kunmap(page);
78 if (ret)
79 return -EFAULT;
80 }
81
82 return 0;
83}
84
85/*
86 * Message allocation uses this to build up regions of a message.
87 *
88 * @bytes - the number of bytes needed.
89 * @gfp - the waiting behaviour of the allocation
90 *
91 * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to
92 * kmap the pages, etc.
93 *
94 * If @bytes is at least a full page then this just returns a page from
95 * alloc_page().
96 *
97 * If @bytes is a partial page then this stores the unused region of the
98 * page in a per-cpu structure. Future partial-page allocations may be
99 * satisfied from that cached region. This lets us waste less memory on
100 * small allocations with minimal complexity. It works because the transmit
101 * path passes read-only page regions down to devices. They hold a page
102 * reference until they are done with the region.
103 */
104int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
105 gfp_t gfp)
106{
107 struct rds_page_remainder *rem;
108 unsigned long flags;
109 struct page *page;
110 int ret;
111
112 gfp |= __GFP_HIGHMEM;
113
114 /* jump straight to allocation if we're trying for a huge page */
115 if (bytes >= PAGE_SIZE) {
116 page = alloc_page(gfp);
117 if (page == NULL) {
118 ret = -ENOMEM;
119 } else {
120 sg_set_page(scat, page, PAGE_SIZE, 0);
121 ret = 0;
122 }
123 goto out;
124 }
125
126 rem = &per_cpu(rds_page_remainders, get_cpu());
127 local_irq_save(flags);
128
129 while (1) {
130 /* avoid a tiny region getting stuck by tossing it */
131 if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) {
132 rds_stats_inc(s_page_remainder_miss);
133 __free_page(rem->r_page);
134 rem->r_page = NULL;
135 }
136
137 /* hand out a fragment from the cached page */
138 if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) {
139 sg_set_page(scat, rem->r_page, bytes, rem->r_offset);
140 get_page(sg_page(scat));
141
142 if (rem->r_offset != 0)
143 rds_stats_inc(s_page_remainder_hit);
144
145 rem->r_offset += bytes;
146 if (rem->r_offset == PAGE_SIZE) {
147 __free_page(rem->r_page);
148 rem->r_page = NULL;
149 }
150 ret = 0;
151 break;
152 }
153
154 /* alloc if there is nothing for us to use */
155 local_irq_restore(flags);
156 put_cpu();
157
158 page = alloc_page(gfp);
159
160 rem = &per_cpu(rds_page_remainders, get_cpu());
161 local_irq_save(flags);
162
163 if (page == NULL) {
164 ret = -ENOMEM;
165 break;
166 }
167
168 /* did someone race to fill the remainder before us? */
169 if (rem->r_page) {
170 __free_page(page);
171 continue;
172 }
173
174 /* otherwise install our page and loop around to alloc */
175 rem->r_page = page;
176 rem->r_offset = 0;
177 }
178
179 local_irq_restore(flags);
180 put_cpu();
181out:
182 rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret,
183 ret ? NULL : sg_page(scat), ret ? 0 : scat->offset,
184 ret ? 0 : scat->length);
185 return ret;
186}
187
188static int rds_page_remainder_cpu_notify(struct notifier_block *self,
189 unsigned long action, void *hcpu)
190{
191 struct rds_page_remainder *rem;
192 long cpu = (long)hcpu;
193
194 rem = &per_cpu(rds_page_remainders, cpu);
195
196 rdsdebug("cpu %ld action 0x%lx\n", cpu, action);
197
198 switch (action) {
199 case CPU_DEAD:
200 if (rem->r_page)
201 __free_page(rem->r_page);
202 rem->r_page = NULL;
203 break;
204 }
205
206 return 0;
207}
208
209static struct notifier_block rds_page_remainder_nb = {
210 .notifier_call = rds_page_remainder_cpu_notify,
211};
212
213void rds_page_exit(void)
214{
215 int i;
216
217 for_each_possible_cpu(i)
218 rds_page_remainder_cpu_notify(&rds_page_remainder_nb,
219 (unsigned long)CPU_DEAD,
220 (void *)(long)i);
221}
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
new file mode 100644
index 000000000000..eaeeb91e1119
--- /dev/null
+++ b/net/rds/rdma.c
@@ -0,0 +1,679 @@
1/*
2 * Copyright (c) 2007 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/pagemap.h>
34#include <linux/rbtree.h>
35#include <linux/dma-mapping.h> /* for DMA_*_DEVICE */
36
37#include "rdma.h"
38
39/*
40 * XXX
41 * - build with sparse
42 * - should we limit the size of a mr region? let transport return failure?
43 * - should we detect duplicate keys on a socket? hmm.
44 * - an rdma is an mlock, apply rlimit?
45 */
46
47/*
48 * get the number of pages by looking at the page indices that the start and
49 * end addresses fall in.
50 *
51 * Returns 0 if the vec is invalid. It is invalid if the number of bytes
52 * causes the address to wrap or overflows an unsigned int. This comes
53 * from being stored in the 'length' member of 'struct scatterlist'.
54 */
55static unsigned int rds_pages_in_vec(struct rds_iovec *vec)
56{
57 if ((vec->addr + vec->bytes <= vec->addr) ||
58 (vec->bytes > (u64)UINT_MAX))
59 return 0;
60
61 return ((vec->addr + vec->bytes + PAGE_SIZE - 1) >> PAGE_SHIFT) -
62 (vec->addr >> PAGE_SHIFT);
63}
64
65static struct rds_mr *rds_mr_tree_walk(struct rb_root *root, u64 key,
66 struct rds_mr *insert)
67{
68 struct rb_node **p = &root->rb_node;
69 struct rb_node *parent = NULL;
70 struct rds_mr *mr;
71
72 while (*p) {
73 parent = *p;
74 mr = rb_entry(parent, struct rds_mr, r_rb_node);
75
76 if (key < mr->r_key)
77 p = &(*p)->rb_left;
78 else if (key > mr->r_key)
79 p = &(*p)->rb_right;
80 else
81 return mr;
82 }
83
84 if (insert) {
85 rb_link_node(&insert->r_rb_node, parent, p);
86 rb_insert_color(&insert->r_rb_node, root);
87 atomic_inc(&insert->r_refcount);
88 }
89 return NULL;
90}
91
92/*
93 * Destroy the transport-specific part of a MR.
94 */
95static void rds_destroy_mr(struct rds_mr *mr)
96{
97 struct rds_sock *rs = mr->r_sock;
98 void *trans_private = NULL;
99 unsigned long flags;
100
101 rdsdebug("RDS: destroy mr key is %x refcnt %u\n",
102 mr->r_key, atomic_read(&mr->r_refcount));
103
104 if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state))
105 return;
106
107 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
108 if (!RB_EMPTY_NODE(&mr->r_rb_node))
109 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
110 trans_private = mr->r_trans_private;
111 mr->r_trans_private = NULL;
112 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
113
114 if (trans_private)
115 mr->r_trans->free_mr(trans_private, mr->r_invalidate);
116}
117
118void __rds_put_mr_final(struct rds_mr *mr)
119{
120 rds_destroy_mr(mr);
121 kfree(mr);
122}
123
124/*
125 * By the time this is called we can't have any more ioctls called on
126 * the socket so we don't need to worry about racing with others.
127 */
128void rds_rdma_drop_keys(struct rds_sock *rs)
129{
130 struct rds_mr *mr;
131 struct rb_node *node;
132
133 /* Release any MRs associated with this socket */
134 while ((node = rb_first(&rs->rs_rdma_keys))) {
135 mr = container_of(node, struct rds_mr, r_rb_node);
136 if (mr->r_trans == rs->rs_transport)
137 mr->r_invalidate = 0;
138 rds_mr_put(mr);
139 }
140
141 if (rs->rs_transport && rs->rs_transport->flush_mrs)
142 rs->rs_transport->flush_mrs();
143}
144
145/*
146 * Helper function to pin user pages.
147 */
148static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages,
149 struct page **pages, int write)
150{
151 int ret;
152
153 down_read(&current->mm->mmap_sem);
154 ret = get_user_pages(current, current->mm, user_addr,
155 nr_pages, write, 0, pages, NULL);
156 up_read(&current->mm->mmap_sem);
157
158 if (0 <= ret && (unsigned) ret < nr_pages) {
159 while (ret--)
160 put_page(pages[ret]);
161 ret = -EFAULT;
162 }
163
164 return ret;
165}
166
167static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
168 u64 *cookie_ret, struct rds_mr **mr_ret)
169{
170 struct rds_mr *mr = NULL, *found;
171 unsigned int nr_pages;
172 struct page **pages = NULL;
173 struct scatterlist *sg;
174 void *trans_private;
175 unsigned long flags;
176 rds_rdma_cookie_t cookie;
177 unsigned int nents;
178 long i;
179 int ret;
180
181 if (rs->rs_bound_addr == 0) {
182 ret = -ENOTCONN; /* XXX not a great errno */
183 goto out;
184 }
185
186 if (rs->rs_transport->get_mr == NULL) {
187 ret = -EOPNOTSUPP;
188 goto out;
189 }
190
191 nr_pages = rds_pages_in_vec(&args->vec);
192 if (nr_pages == 0) {
193 ret = -EINVAL;
194 goto out;
195 }
196
197 rdsdebug("RDS: get_mr addr %llx len %llu nr_pages %u\n",
198 args->vec.addr, args->vec.bytes, nr_pages);
199
200 /* XXX clamp nr_pages to limit the size of this alloc? */
201 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
202 if (pages == NULL) {
203 ret = -ENOMEM;
204 goto out;
205 }
206
207 mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL);
208 if (mr == NULL) {
209 ret = -ENOMEM;
210 goto out;
211 }
212
213 atomic_set(&mr->r_refcount, 1);
214 RB_CLEAR_NODE(&mr->r_rb_node);
215 mr->r_trans = rs->rs_transport;
216 mr->r_sock = rs;
217
218 if (args->flags & RDS_RDMA_USE_ONCE)
219 mr->r_use_once = 1;
220 if (args->flags & RDS_RDMA_INVALIDATE)
221 mr->r_invalidate = 1;
222 if (args->flags & RDS_RDMA_READWRITE)
223 mr->r_write = 1;
224
225 /*
226 * Pin the pages that make up the user buffer and transfer the page
227 * pointers to the mr's sg array. We check to see if we've mapped
228 * the whole region after transferring the partial page references
229 * to the sg array so that we can have one page ref cleanup path.
230 *
231 * For now we have no flag that tells us whether the mapping is
232 * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to
233 * the zero page.
234 */
235 ret = rds_pin_pages(args->vec.addr & PAGE_MASK, nr_pages, pages, 1);
236 if (ret < 0)
237 goto out;
238
239 nents = ret;
240 sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL);
241 if (sg == NULL) {
242 ret = -ENOMEM;
243 goto out;
244 }
245 WARN_ON(!nents);
246 sg_init_table(sg, nents);
247
248 /* Stick all pages into the scatterlist */
249 for (i = 0 ; i < nents; i++)
250 sg_set_page(&sg[i], pages[i], PAGE_SIZE, 0);
251
252 rdsdebug("RDS: trans_private nents is %u\n", nents);
253
254 /* Obtain a transport specific MR. If this succeeds, the
255 * s/g list is now owned by the MR.
256 * Note that dma_map() implies that pending writes are
257 * flushed to RAM, so no dma_sync is needed here. */
258 trans_private = rs->rs_transport->get_mr(sg, nents, rs,
259 &mr->r_key);
260
261 if (IS_ERR(trans_private)) {
262 for (i = 0 ; i < nents; i++)
263 put_page(sg_page(&sg[i]));
264 kfree(sg);
265 ret = PTR_ERR(trans_private);
266 goto out;
267 }
268
269 mr->r_trans_private = trans_private;
270
271 rdsdebug("RDS: get_mr put_user key is %x cookie_addr %p\n",
272 mr->r_key, (void *)(unsigned long) args->cookie_addr);
273
274 /* The user may pass us an unaligned address, but we can only
275 * map page aligned regions. So we keep the offset, and build
276 * a 64bit cookie containing <R_Key, offset> and pass that
277 * around. */
278 cookie = rds_rdma_make_cookie(mr->r_key, args->vec.addr & ~PAGE_MASK);
279 if (cookie_ret)
280 *cookie_ret = cookie;
281
282 if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long) args->cookie_addr)) {
283 ret = -EFAULT;
284 goto out;
285 }
286
287 /* Inserting the new MR into the rbtree bumps its
288 * reference count. */
289 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
290 found = rds_mr_tree_walk(&rs->rs_rdma_keys, mr->r_key, mr);
291 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
292
293 BUG_ON(found && found != mr);
294
295 rdsdebug("RDS: get_mr key is %x\n", mr->r_key);
296 if (mr_ret) {
297 atomic_inc(&mr->r_refcount);
298 *mr_ret = mr;
299 }
300
301 ret = 0;
302out:
303 kfree(pages);
304 if (mr)
305 rds_mr_put(mr);
306 return ret;
307}
308
309int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen)
310{
311 struct rds_get_mr_args args;
312
313 if (optlen != sizeof(struct rds_get_mr_args))
314 return -EINVAL;
315
316 if (copy_from_user(&args, (struct rds_get_mr_args __user *)optval,
317 sizeof(struct rds_get_mr_args)))
318 return -EFAULT;
319
320 return __rds_rdma_map(rs, &args, NULL, NULL);
321}
322
323/*
324 * Free the MR indicated by the given R_Key
325 */
326int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen)
327{
328 struct rds_free_mr_args args;
329 struct rds_mr *mr;
330 unsigned long flags;
331
332 if (optlen != sizeof(struct rds_free_mr_args))
333 return -EINVAL;
334
335 if (copy_from_user(&args, (struct rds_free_mr_args __user *)optval,
336 sizeof(struct rds_free_mr_args)))
337 return -EFAULT;
338
339 /* Special case - a null cookie means flush all unused MRs */
340 if (args.cookie == 0) {
341 if (!rs->rs_transport || !rs->rs_transport->flush_mrs)
342 return -EINVAL;
343 rs->rs_transport->flush_mrs();
344 return 0;
345 }
346
347 /* Look up the MR given its R_key and remove it from the rbtree
348 * so nobody else finds it.
349 * This should also prevent races with rds_rdma_unuse.
350 */
351 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
352 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, rds_rdma_cookie_key(args.cookie), NULL);
353 if (mr) {
354 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
355 RB_CLEAR_NODE(&mr->r_rb_node);
356 if (args.flags & RDS_RDMA_INVALIDATE)
357 mr->r_invalidate = 1;
358 }
359 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
360
361 if (!mr)
362 return -EINVAL;
363
364 /*
365 * call rds_destroy_mr() ourselves so that we're sure it's done by the time
366 * we return. If we let rds_mr_put() do it it might not happen until
367 * someone else drops their ref.
368 */
369 rds_destroy_mr(mr);
370 rds_mr_put(mr);
371 return 0;
372}
373
374/*
375 * This is called when we receive an extension header that
376 * tells us this MR was used. It allows us to implement
377 * use_once semantics
378 */
379void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
380{
381 struct rds_mr *mr;
382 unsigned long flags;
383 int zot_me = 0;
384
385 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
386 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
387 if (mr && (mr->r_use_once || force)) {
388 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
389 RB_CLEAR_NODE(&mr->r_rb_node);
390 zot_me = 1;
391 } else if (mr)
392 atomic_inc(&mr->r_refcount);
393 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
394
395 /* May have to issue a dma_sync on this memory region.
396 * Note we could avoid this if the operation was a RDMA READ,
397 * but at this point we can't tell. */
398 if (mr != NULL) {
399 if (mr->r_trans->sync_mr)
400 mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE);
401
402 /* If the MR was marked as invalidate, this will
403 * trigger an async flush. */
404 if (zot_me)
405 rds_destroy_mr(mr);
406 rds_mr_put(mr);
407 }
408}
409
410void rds_rdma_free_op(struct rds_rdma_op *ro)
411{
412 unsigned int i;
413
414 for (i = 0; i < ro->r_nents; i++) {
415 struct page *page = sg_page(&ro->r_sg[i]);
416
417 /* Mark page dirty if it was possibly modified, which
418 * is the case for a RDMA_READ which copies from remote
419 * to local memory */
420 if (!ro->r_write)
421 set_page_dirty(page);
422 put_page(page);
423 }
424
425 kfree(ro->r_notifier);
426 kfree(ro);
427}
428
429/*
430 * args is a pointer to an in-kernel copy in the sendmsg cmsg.
431 */
432static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
433 struct rds_rdma_args *args)
434{
435 struct rds_iovec vec;
436 struct rds_rdma_op *op = NULL;
437 unsigned int nr_pages;
438 unsigned int max_pages;
439 unsigned int nr_bytes;
440 struct page **pages = NULL;
441 struct rds_iovec __user *local_vec;
442 struct scatterlist *sg;
443 unsigned int nr;
444 unsigned int i, j;
445 int ret;
446
447
448 if (rs->rs_bound_addr == 0) {
449 ret = -ENOTCONN; /* XXX not a great errno */
450 goto out;
451 }
452
453 if (args->nr_local > (u64)UINT_MAX) {
454 ret = -EMSGSIZE;
455 goto out;
456 }
457
458 nr_pages = 0;
459 max_pages = 0;
460
461 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
462
463 /* figure out the number of pages in the vector */
464 for (i = 0; i < args->nr_local; i++) {
465 if (copy_from_user(&vec, &local_vec[i],
466 sizeof(struct rds_iovec))) {
467 ret = -EFAULT;
468 goto out;
469 }
470
471 nr = rds_pages_in_vec(&vec);
472 if (nr == 0) {
473 ret = -EINVAL;
474 goto out;
475 }
476
477 max_pages = max(nr, max_pages);
478 nr_pages += nr;
479 }
480
481 pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL);
482 if (pages == NULL) {
483 ret = -ENOMEM;
484 goto out;
485 }
486
487 op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL);
488 if (op == NULL) {
489 ret = -ENOMEM;
490 goto out;
491 }
492
493 op->r_write = !!(args->flags & RDS_RDMA_READWRITE);
494 op->r_fence = !!(args->flags & RDS_RDMA_FENCE);
495 op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
496 op->r_recverr = rs->rs_recverr;
497 WARN_ON(!nr_pages);
498 sg_init_table(op->r_sg, nr_pages);
499
500 if (op->r_notify || op->r_recverr) {
501 /* We allocate an uninitialized notifier here, because
502 * we don't want to do that in the completion handler. We
503 * would have to use GFP_ATOMIC there, and don't want to deal
504 * with failed allocations.
505 */
506 op->r_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL);
507 if (!op->r_notifier) {
508 ret = -ENOMEM;
509 goto out;
510 }
511 op->r_notifier->n_user_token = args->user_token;
512 op->r_notifier->n_status = RDS_RDMA_SUCCESS;
513 }
514
515 /* The cookie contains the R_Key of the remote memory region, and
516 * optionally an offset into it. This is how we implement RDMA into
517 * unaligned memory.
518 * When setting up the RDMA, we need to add that offset to the
519 * destination address (which is really an offset into the MR)
520 * FIXME: We may want to move this into ib_rdma.c
521 */
522 op->r_key = rds_rdma_cookie_key(args->cookie);
523 op->r_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie);
524
525 nr_bytes = 0;
526
527 rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n",
528 (unsigned long long)args->nr_local,
529 (unsigned long long)args->remote_vec.addr,
530 op->r_key);
531
532 for (i = 0; i < args->nr_local; i++) {
533 if (copy_from_user(&vec, &local_vec[i],
534 sizeof(struct rds_iovec))) {
535 ret = -EFAULT;
536 goto out;
537 }
538
539 nr = rds_pages_in_vec(&vec);
540 if (nr == 0) {
541 ret = -EINVAL;
542 goto out;
543 }
544
545 rs->rs_user_addr = vec.addr;
546 rs->rs_user_bytes = vec.bytes;
547
548 /* did the user change the vec under us? */
549 if (nr > max_pages || op->r_nents + nr > nr_pages) {
550 ret = -EINVAL;
551 goto out;
552 }
553 /* If it's a WRITE operation, we want to pin the pages for reading.
554 * If it's a READ operation, we need to pin the pages for writing.
555 */
556 ret = rds_pin_pages(vec.addr & PAGE_MASK, nr, pages, !op->r_write);
557 if (ret < 0)
558 goto out;
559
560 rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n",
561 nr_bytes, nr, vec.bytes, vec.addr);
562
563 nr_bytes += vec.bytes;
564
565 for (j = 0; j < nr; j++) {
566 unsigned int offset = vec.addr & ~PAGE_MASK;
567
568 sg = &op->r_sg[op->r_nents + j];
569 sg_set_page(sg, pages[j],
570 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset),
571 offset);
572
573 rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n",
574 sg->offset, sg->length, vec.addr, vec.bytes);
575
576 vec.addr += sg->length;
577 vec.bytes -= sg->length;
578 }
579
580 op->r_nents += nr;
581 }
582
583
584 if (nr_bytes > args->remote_vec.bytes) {
585 rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n",
586 nr_bytes,
587 (unsigned int) args->remote_vec.bytes);
588 ret = -EINVAL;
589 goto out;
590 }
591 op->r_bytes = nr_bytes;
592
593 ret = 0;
594out:
595 kfree(pages);
596 if (ret) {
597 if (op)
598 rds_rdma_free_op(op);
599 op = ERR_PTR(ret);
600 }
601 return op;
602}
603
604/*
605 * The application asks for a RDMA transfer.
606 * Extract all arguments and set up the rdma_op
607 */
608int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
609 struct cmsghdr *cmsg)
610{
611 struct rds_rdma_op *op;
612
613 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args))
614 || rm->m_rdma_op != NULL)
615 return -EINVAL;
616
617 op = rds_rdma_prepare(rs, CMSG_DATA(cmsg));
618 if (IS_ERR(op))
619 return PTR_ERR(op);
620 rds_stats_inc(s_send_rdma);
621 rm->m_rdma_op = op;
622 return 0;
623}
624
625/*
626 * The application wants us to pass an RDMA destination (aka MR)
627 * to the remote
628 */
629int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
630 struct cmsghdr *cmsg)
631{
632 unsigned long flags;
633 struct rds_mr *mr;
634 u32 r_key;
635 int err = 0;
636
637 if (cmsg->cmsg_len < CMSG_LEN(sizeof(rds_rdma_cookie_t))
638 || rm->m_rdma_cookie != 0)
639 return -EINVAL;
640
641 memcpy(&rm->m_rdma_cookie, CMSG_DATA(cmsg), sizeof(rm->m_rdma_cookie));
642
643 /* We are reusing a previously mapped MR here. Most likely, the
644 * application has written to the buffer, so we need to explicitly
645 * flush those writes to RAM. Otherwise the HCA may not see them
646 * when doing a DMA from that buffer.
647 */
648 r_key = rds_rdma_cookie_key(rm->m_rdma_cookie);
649
650 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
651 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
652 if (mr == NULL)
653 err = -EINVAL; /* invalid r_key */
654 else
655 atomic_inc(&mr->r_refcount);
656 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
657
658 if (mr) {
659 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
660 rm->m_rdma_mr = mr;
661 }
662 return err;
663}
664
665/*
666 * The application passes us an address range it wants to enable RDMA
667 * to/from. We map the area, and save the <R_Key,offset> pair
668 * in rm->m_rdma_cookie. This causes it to be sent along to the peer
669 * in an extension header.
670 */
671int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
672 struct cmsghdr *cmsg)
673{
674 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_get_mr_args))
675 || rm->m_rdma_cookie != 0)
676 return -EINVAL;
677
678 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr);
679}
diff --git a/net/rds/rdma.h b/net/rds/rdma.h
new file mode 100644
index 000000000000..425512098b0b
--- /dev/null
+++ b/net/rds/rdma.h
@@ -0,0 +1,84 @@
1#ifndef _RDS_RDMA_H
2#define _RDS_RDMA_H
3
4#include <linux/rbtree.h>
5#include <linux/spinlock.h>
6#include <linux/scatterlist.h>
7
8#include "rds.h"
9
10struct rds_mr {
11 struct rb_node r_rb_node;
12 atomic_t r_refcount;
13 u32 r_key;
14
15 /* A copy of the creation flags */
16 unsigned int r_use_once:1;
17 unsigned int r_invalidate:1;
18 unsigned int r_write:1;
19
20 /* This is for RDS_MR_DEAD.
21 * It would be nice & consistent to make this part of the above
22 * bit field here, but we need to use test_and_set_bit.
23 */
24 unsigned long r_state;
25 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
26 struct rds_transport *r_trans;
27 void *r_trans_private;
28};
29
30/* Flags for mr->r_state */
31#define RDS_MR_DEAD 0
32
33struct rds_rdma_op {
34 u32 r_key;
35 u64 r_remote_addr;
36 unsigned int r_write:1;
37 unsigned int r_fence:1;
38 unsigned int r_notify:1;
39 unsigned int r_recverr:1;
40 unsigned int r_mapped:1;
41 struct rds_notifier *r_notifier;
42 unsigned int r_bytes;
43 unsigned int r_nents;
44 unsigned int r_count;
45 struct scatterlist r_sg[0];
46};
47
48static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
49{
50 return r_key | (((u64) offset) << 32);
51}
52
53static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
54{
55 return cookie;
56}
57
58static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
59{
60 return cookie >> 32;
61}
62
63int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
64int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
65void rds_rdma_drop_keys(struct rds_sock *rs);
66int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
67 struct cmsghdr *cmsg);
68int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
69 struct cmsghdr *cmsg);
70int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
71 struct cmsghdr *cmsg);
72int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
73 struct cmsghdr *cmsg);
74void rds_rdma_free_op(struct rds_rdma_op *ro);
75void rds_rdma_send_complete(struct rds_message *rm, int);
76
77extern void __rds_put_mr_final(struct rds_mr *mr);
78static inline void rds_mr_put(struct rds_mr *mr)
79{
80 if (atomic_dec_and_test(&mr->r_refcount))
81 __rds_put_mr_final(mr);
82}
83
84#endif
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
new file mode 100644
index 000000000000..7b19024f9706
--- /dev/null
+++ b/net/rds/rdma_transport.c
@@ -0,0 +1,214 @@
1/*
2 * Copyright (c) 2009 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <rdma/rdma_cm.h>
34
35#include "rdma_transport.h"
36
37static struct rdma_cm_id *rds_iw_listen_id;
38
39int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
40 struct rdma_cm_event *event)
41{
42 /* this can be null in the listening path */
43 struct rds_connection *conn = cm_id->context;
44 struct rds_transport *trans;
45 int ret = 0;
46
47 rdsdebug("conn %p id %p handling event %u\n", conn, cm_id,
48 event->event);
49
50 if (cm_id->device->node_type == RDMA_NODE_RNIC)
51 trans = &rds_iw_transport;
52 else
53 trans = &rds_ib_transport;
54
55 /* Prevent shutdown from tearing down the connection
56 * while we're executing. */
57 if (conn) {
58 mutex_lock(&conn->c_cm_lock);
59
60 /* If the connection is being shut down, bail out
61 * right away. We return 0 so cm_id doesn't get
62 * destroyed prematurely */
63 if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING) {
64 /* Reject incoming connections while we're tearing
65 * down an existing one. */
66 if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
67 ret = 1;
68 goto out;
69 }
70 }
71
72 switch (event->event) {
73 case RDMA_CM_EVENT_CONNECT_REQUEST:
74 ret = trans->cm_handle_connect(cm_id, event);
75 break;
76
77 case RDMA_CM_EVENT_ADDR_RESOLVED:
78 /* XXX do we need to clean up if this fails? */
79 ret = rdma_resolve_route(cm_id,
80 RDS_RDMA_RESOLVE_TIMEOUT_MS);
81 break;
82
83 case RDMA_CM_EVENT_ROUTE_RESOLVED:
84 /* XXX worry about racing with listen acceptance */
85 ret = trans->cm_initiate_connect(cm_id);
86 break;
87
88 case RDMA_CM_EVENT_ESTABLISHED:
89 trans->cm_connect_complete(conn, event);
90 break;
91
92 case RDMA_CM_EVENT_ADDR_ERROR:
93 case RDMA_CM_EVENT_ROUTE_ERROR:
94 case RDMA_CM_EVENT_CONNECT_ERROR:
95 case RDMA_CM_EVENT_UNREACHABLE:
96 case RDMA_CM_EVENT_REJECTED:
97 case RDMA_CM_EVENT_DEVICE_REMOVAL:
98 case RDMA_CM_EVENT_ADDR_CHANGE:
99 if (conn)
100 rds_conn_drop(conn);
101 break;
102
103 case RDMA_CM_EVENT_DISCONNECTED:
104 printk(KERN_WARNING "RDS/IW: DISCONNECT event - dropping connection "
105 "%pI4->%pI4\n", &conn->c_laddr,
106 &conn->c_faddr);
107 rds_conn_drop(conn);
108 break;
109
110 default:
111 /* things like device disconnect? */
112 printk(KERN_ERR "unknown event %u\n", event->event);
113 BUG();
114 break;
115 }
116
117out:
118 if (conn)
119 mutex_unlock(&conn->c_cm_lock);
120
121 rdsdebug("id %p event %u handling ret %d\n", cm_id, event->event, ret);
122
123 return ret;
124}
125
126static int __init rds_rdma_listen_init(void)
127{
128 struct sockaddr_in sin;
129 struct rdma_cm_id *cm_id;
130 int ret;
131
132 cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP);
133 if (IS_ERR(cm_id)) {
134 ret = PTR_ERR(cm_id);
135 printk(KERN_ERR "RDS/IW: failed to setup listener, "
136 "rdma_create_id() returned %d\n", ret);
137 goto out;
138 }
139
140 sin.sin_family = PF_INET,
141 sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
142 sin.sin_port = (__force u16)htons(RDS_PORT);
143
144 /*
145 * XXX I bet this binds the cm_id to a device. If we want to support
146 * fail-over we'll have to take this into consideration.
147 */
148 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
149 if (ret) {
150 printk(KERN_ERR "RDS/IW: failed to setup listener, "
151 "rdma_bind_addr() returned %d\n", ret);
152 goto out;
153 }
154
155 ret = rdma_listen(cm_id, 128);
156 if (ret) {
157 printk(KERN_ERR "RDS/IW: failed to setup listener, "
158 "rdma_listen() returned %d\n", ret);
159 goto out;
160 }
161
162 rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT);
163
164 rds_iw_listen_id = cm_id;
165 cm_id = NULL;
166out:
167 if (cm_id)
168 rdma_destroy_id(cm_id);
169 return ret;
170}
171
172static void rds_rdma_listen_stop(void)
173{
174 if (rds_iw_listen_id) {
175 rdsdebug("cm %p\n", rds_iw_listen_id);
176 rdma_destroy_id(rds_iw_listen_id);
177 rds_iw_listen_id = NULL;
178 }
179}
180
181int __init rds_rdma_init(void)
182{
183 int ret;
184
185 ret = rds_rdma_listen_init();
186 if (ret)
187 goto out;
188
189 ret = rds_iw_init();
190 if (ret)
191 goto err_iw_init;
192
193 ret = rds_ib_init();
194 if (ret)
195 goto err_ib_init;
196
197 goto out;
198
199err_ib_init:
200 rds_iw_exit();
201err_iw_init:
202 rds_rdma_listen_stop();
203out:
204 return ret;
205}
206
207void rds_rdma_exit(void)
208{
209 /* stop listening first to ensure no new connections are attempted */
210 rds_rdma_listen_stop();
211 rds_ib_exit();
212 rds_iw_exit();
213}
214
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
new file mode 100644
index 000000000000..2f2c7d976c21
--- /dev/null
+++ b/net/rds/rdma_transport.h
@@ -0,0 +1,28 @@
1#ifndef _RDMA_TRANSPORT_H
2#define _RDMA_TRANSPORT_H
3
4#include <rdma/ib_verbs.h>
5#include <rdma/rdma_cm.h>
6#include "rds.h"
7
8#define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000
9
10int rds_rdma_conn_connect(struct rds_connection *conn);
11int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
12 struct rdma_cm_event *event);
13
14/* from rdma_transport.c */
15int rds_rdma_init(void);
16void rds_rdma_exit(void);
17
18/* from ib.c */
19extern struct rds_transport rds_ib_transport;
20int rds_ib_init(void);
21void rds_ib_exit(void);
22
23/* from iw.c */
24extern struct rds_transport rds_iw_transport;
25int rds_iw_init(void);
26void rds_iw_exit(void);
27
28#endif
diff --git a/net/rds/rds.h b/net/rds/rds.h
new file mode 100644
index 000000000000..060400704979
--- /dev/null
+++ b/net/rds/rds.h
@@ -0,0 +1,686 @@
1#ifndef _RDS_RDS_H
2#define _RDS_RDS_H
3
4#include <net/sock.h>
5#include <linux/scatterlist.h>
6#include <linux/highmem.h>
7#include <rdma/rdma_cm.h>
8#include <linux/mutex.h>
9#include <linux/rds.h>
10
11#include "info.h"
12
13/*
14 * RDS Network protocol version
15 */
16#define RDS_PROTOCOL_3_0 0x0300
17#define RDS_PROTOCOL_3_1 0x0301
18#define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1
19#define RDS_PROTOCOL_MAJOR(v) ((v) >> 8)
20#define RDS_PROTOCOL_MINOR(v) ((v) & 255)
21#define RDS_PROTOCOL(maj, min) (((maj) << 8) | min)
22
23/*
24 * XXX randomly chosen, but at least seems to be unused:
25 * # 18464-18768 Unassigned
26 * We should do better. We want a reserved port to discourage unpriv'ed
27 * userspace from listening.
28 */
29#define RDS_PORT 18634
30
31#ifdef DEBUG
32#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
33#else
34/* sigh, pr_debug() causes unused variable warnings */
35static inline void __attribute__ ((format (printf, 1, 2)))
36rdsdebug(char *fmt, ...)
37{
38}
39#endif
40
41/* XXX is there one of these somewhere? */
42#define ceil(x, y) \
43 ({ unsigned long __x = (x), __y = (y); (__x + __y - 1) / __y; })
44
45#define RDS_FRAG_SHIFT 12
46#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT))
47
48#define RDS_CONG_MAP_BYTES (65536 / 8)
49#define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long))
50#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
51#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8)
52
53struct rds_cong_map {
54 struct rb_node m_rb_node;
55 __be32 m_addr;
56 wait_queue_head_t m_waitq;
57 struct list_head m_conn_list;
58 unsigned long m_page_addrs[RDS_CONG_MAP_PAGES];
59};
60
61
62/*
63 * This is how we will track the connection state:
64 * A connection is always in one of the following
65 * states. Updates to the state are atomic and imply
66 * a memory barrier.
67 */
68enum {
69 RDS_CONN_DOWN = 0,
70 RDS_CONN_CONNECTING,
71 RDS_CONN_DISCONNECTING,
72 RDS_CONN_UP,
73 RDS_CONN_ERROR,
74};
75
76/* Bits for c_flags */
77#define RDS_LL_SEND_FULL 0
78#define RDS_RECONNECT_PENDING 1
79
80struct rds_connection {
81 struct hlist_node c_hash_node;
82 __be32 c_laddr;
83 __be32 c_faddr;
84 unsigned int c_loopback:1;
85 struct rds_connection *c_passive;
86
87 struct rds_cong_map *c_lcong;
88 struct rds_cong_map *c_fcong;
89
90 struct mutex c_send_lock; /* protect send ring */
91 struct rds_message *c_xmit_rm;
92 unsigned long c_xmit_sg;
93 unsigned int c_xmit_hdr_off;
94 unsigned int c_xmit_data_off;
95 unsigned int c_xmit_rdma_sent;
96
97 spinlock_t c_lock; /* protect msg queues */
98 u64 c_next_tx_seq;
99 struct list_head c_send_queue;
100 struct list_head c_retrans;
101
102 u64 c_next_rx_seq;
103
104 struct rds_transport *c_trans;
105 void *c_transport_data;
106
107 atomic_t c_state;
108 unsigned long c_flags;
109 unsigned long c_reconnect_jiffies;
110 struct delayed_work c_send_w;
111 struct delayed_work c_recv_w;
112 struct delayed_work c_conn_w;
113 struct work_struct c_down_w;
114 struct mutex c_cm_lock; /* protect conn state & cm */
115
116 struct list_head c_map_item;
117 unsigned long c_map_queued;
118 unsigned long c_map_offset;
119 unsigned long c_map_bytes;
120
121 unsigned int c_unacked_packets;
122 unsigned int c_unacked_bytes;
123
124 /* Protocol version */
125 unsigned int c_version;
126};
127
128#define RDS_FLAG_CONG_BITMAP 0x01
129#define RDS_FLAG_ACK_REQUIRED 0x02
130#define RDS_FLAG_RETRANSMITTED 0x04
131#define RDS_MAX_ADV_CREDIT 127
132
133/*
134 * Maximum space available for extension headers.
135 */
136#define RDS_HEADER_EXT_SPACE 16
137
138struct rds_header {
139 __be64 h_sequence;
140 __be64 h_ack;
141 __be32 h_len;
142 __be16 h_sport;
143 __be16 h_dport;
144 u8 h_flags;
145 u8 h_credit;
146 u8 h_padding[4];
147 __sum16 h_csum;
148
149 u8 h_exthdr[RDS_HEADER_EXT_SPACE];
150};
151
152/*
153 * Reserved - indicates end of extensions
154 */
155#define RDS_EXTHDR_NONE 0
156
157/*
158 * This extension header is included in the very
159 * first message that is sent on a new connection,
160 * and identifies the protocol level. This will help
161 * rolling updates if a future change requires breaking
162 * the protocol.
163 * NB: This is no longer true for IB, where we do a version
164 * negotiation during the connection setup phase (protocol
165 * version information is included in the RDMA CM private data).
166 */
167#define RDS_EXTHDR_VERSION 1
168struct rds_ext_header_version {
169 __be32 h_version;
170};
171
172/*
173 * This extension header is included in the RDS message
174 * chasing an RDMA operation.
175 */
176#define RDS_EXTHDR_RDMA 2
177struct rds_ext_header_rdma {
178 __be32 h_rdma_rkey;
179};
180
181/*
182 * This extension header tells the peer about the
183 * destination <R_Key,offset> of the requested RDMA
184 * operation.
185 */
186#define RDS_EXTHDR_RDMA_DEST 3
187struct rds_ext_header_rdma_dest {
188 __be32 h_rdma_rkey;
189 __be32 h_rdma_offset;
190};
191
192#define __RDS_EXTHDR_MAX 16 /* for now */
193
194struct rds_incoming {
195 atomic_t i_refcount;
196 struct list_head i_item;
197 struct rds_connection *i_conn;
198 struct rds_header i_hdr;
199 unsigned long i_rx_jiffies;
200 __be32 i_saddr;
201
202 rds_rdma_cookie_t i_rdma_cookie;
203};
204
205/*
206 * m_sock_item and m_conn_item are on lists that are serialized under
207 * conn->c_lock. m_sock_item has additional meaning in that once it is empty
208 * the message will not be put back on the retransmit list after being sent.
209 * messages that are canceled while being sent rely on this.
210 *
211 * m_inc is used by loopback so that it can pass an incoming message straight
212 * back up into the rx path. It embeds a wire header which is also used by
213 * the send path, which is kind of awkward.
214 *
215 * m_sock_item indicates the message's presence on a socket's send or receive
216 * queue. m_rs will point to that socket.
217 *
218 * m_daddr is used by cancellation to prune messages to a given destination.
219 *
220 * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock
221 * nesting. As paths iterate over messages on a sock, or conn, they must
222 * also lock the conn, or sock, to remove the message from those lists too.
223 * Testing the flag to determine if the message is still on the lists lets
224 * us avoid testing the list_head directly. That means each path can use
225 * the message's list_head to keep it on a local list while juggling locks
226 * without confusing the other path.
227 *
228 * m_ack_seq is an optional field set by transports who need a different
229 * sequence number range to invalidate. They can use this in a callback
230 * that they pass to rds_send_drop_acked() to see if each message has been
231 * acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't
232 * had ack_seq set yet.
233 */
234#define RDS_MSG_ON_SOCK 1
235#define RDS_MSG_ON_CONN 2
236#define RDS_MSG_HAS_ACK_SEQ 3
237#define RDS_MSG_ACK_REQUIRED 4
238#define RDS_MSG_RETRANSMITTED 5
239#define RDS_MSG_MAPPED 6
240#define RDS_MSG_PAGEVEC 7
241
242struct rds_message {
243 atomic_t m_refcount;
244 struct list_head m_sock_item;
245 struct list_head m_conn_item;
246 struct rds_incoming m_inc;
247 u64 m_ack_seq;
248 __be32 m_daddr;
249 unsigned long m_flags;
250
251 /* Never access m_rs without holding m_rs_lock.
252 * Lock nesting is
253 * rm->m_rs_lock
254 * -> rs->rs_lock
255 */
256 spinlock_t m_rs_lock;
257 struct rds_sock *m_rs;
258 struct rds_rdma_op *m_rdma_op;
259 rds_rdma_cookie_t m_rdma_cookie;
260 struct rds_mr *m_rdma_mr;
261 unsigned int m_nents;
262 unsigned int m_count;
263 struct scatterlist m_sg[0];
264};
265
266/*
267 * The RDS notifier is used (optionally) to tell the application about
268 * completed RDMA operations. Rather than keeping the whole rds message
269 * around on the queue, we allocate a small notifier that is put on the
270 * socket's notifier_list. Notifications are delivered to the application
271 * through control messages.
272 */
273struct rds_notifier {
274 struct list_head n_list;
275 uint64_t n_user_token;
276 int n_status;
277};
278
279/**
280 * struct rds_transport - transport specific behavioural hooks
281 *
282 * @xmit: .xmit is called by rds_send_xmit() to tell the transport to send
283 * part of a message. The caller serializes on the send_sem so this
284 * doesn't need to be reentrant for a given conn. The header must be
285 * sent before the data payload. .xmit must be prepared to send a
286 * message with no data payload. .xmit should return the number of
287 * bytes that were sent down the connection, including header bytes.
288 * Returning 0 tells the caller that it doesn't need to perform any
289 * additional work now. This is usually the case when the transport has
290 * filled the sending queue for its connection and will handle
291 * triggering the rds thread to continue the send when space becomes
292 * available. Returning -EAGAIN tells the caller to retry the send
293 * immediately. Returning -ENOMEM tells the caller to retry the send at
294 * some point in the future.
295 *
296 * @conn_shutdown: conn_shutdown stops traffic on the given connection. Once
297 * it returns the connection can not call rds_recv_incoming().
298 * This will only be called once after conn_connect returns
299 * non-zero success and will The caller serializes this with
300 * the send and connecting paths (xmit_* and conn_*). The
301 * transport is responsible for other serialization, including
302 * rds_recv_incoming(). This is called in process context but
303 * should try hard not to block.
304 *
305 * @xmit_cong_map: This asks the transport to send the local bitmap down the
306 * given connection. XXX get a better story about the bitmap
307 * flag and header.
308 */
309
310struct rds_transport {
311 char t_name[TRANSNAMSIZ];
312 struct list_head t_item;
313 struct module *t_owner;
314 unsigned int t_prefer_loopback:1;
315
316 int (*laddr_check)(__be32 addr);
317 int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
318 void (*conn_free)(void *data);
319 int (*conn_connect)(struct rds_connection *conn);
320 void (*conn_shutdown)(struct rds_connection *conn);
321 void (*xmit_prepare)(struct rds_connection *conn);
322 void (*xmit_complete)(struct rds_connection *conn);
323 int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
324 unsigned int hdr_off, unsigned int sg, unsigned int off);
325 int (*xmit_cong_map)(struct rds_connection *conn,
326 struct rds_cong_map *map, unsigned long offset);
327 int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op);
328 int (*recv)(struct rds_connection *conn);
329 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov,
330 size_t size);
331 void (*inc_purge)(struct rds_incoming *inc);
332 void (*inc_free)(struct rds_incoming *inc);
333
334 int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
335 struct rdma_cm_event *event);
336 int (*cm_initiate_connect)(struct rdma_cm_id *cm_id);
337 void (*cm_connect_complete)(struct rds_connection *conn,
338 struct rdma_cm_event *event);
339
340 unsigned int (*stats_info_copy)(struct rds_info_iterator *iter,
341 unsigned int avail);
342 void (*exit)(void);
343 void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg,
344 struct rds_sock *rs, u32 *key_ret);
345 void (*sync_mr)(void *trans_private, int direction);
346 void (*free_mr)(void *trans_private, int invalidate);
347 void (*flush_mrs)(void);
348};
349
350struct rds_sock {
351 struct sock rs_sk;
352
353 u64 rs_user_addr;
354 u64 rs_user_bytes;
355
356 /*
357 * bound_addr used for both incoming and outgoing, no INADDR_ANY
358 * support.
359 */
360 struct rb_node rs_bound_node;
361 __be32 rs_bound_addr;
362 __be32 rs_conn_addr;
363 __be16 rs_bound_port;
364 __be16 rs_conn_port;
365
366 /*
367 * This is only used to communicate the transport between bind and
368 * initiating connections. All other trans use is referenced through
369 * the connection.
370 */
371 struct rds_transport *rs_transport;
372
373 /*
374 * rds_sendmsg caches the conn it used the last time around.
375 * This helps avoid costly lookups.
376 */
377 struct rds_connection *rs_conn;
378
379 /* flag indicating we were congested or not */
380 int rs_congested;
381
382 /* rs_lock protects all these adjacent members before the newline */
383 spinlock_t rs_lock;
384 struct list_head rs_send_queue;
385 u32 rs_snd_bytes;
386 int rs_rcv_bytes;
387 struct list_head rs_notify_queue; /* currently used for failed RDMAs */
388
389 /* Congestion wake_up. If rs_cong_monitor is set, we use cong_mask
390 * to decide whether the application should be woken up.
391 * If not set, we use rs_cong_track to find out whether a cong map
392 * update arrived.
393 */
394 uint64_t rs_cong_mask;
395 uint64_t rs_cong_notify;
396 struct list_head rs_cong_list;
397 unsigned long rs_cong_track;
398
399 /*
400 * rs_recv_lock protects the receive queue, and is
401 * used to serialize with rds_release.
402 */
403 rwlock_t rs_recv_lock;
404 struct list_head rs_recv_queue;
405
406 /* just for stats reporting */
407 struct list_head rs_item;
408
409 /* these have their own lock */
410 spinlock_t rs_rdma_lock;
411 struct rb_root rs_rdma_keys;
412
413 /* Socket options - in case there will be more */
414 unsigned char rs_recverr,
415 rs_cong_monitor;
416};
417
418static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
419{
420 return container_of(sk, struct rds_sock, rs_sk);
421}
422static inline struct sock *rds_rs_to_sk(struct rds_sock *rs)
423{
424 return &rs->rs_sk;
425}
426
427/*
428 * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value
429 * to account for overhead. We don't account for overhead, we just apply
430 * the number of payload bytes to the specified value.
431 */
432static inline int rds_sk_sndbuf(struct rds_sock *rs)
433{
434 return rds_rs_to_sk(rs)->sk_sndbuf / 2;
435}
436static inline int rds_sk_rcvbuf(struct rds_sock *rs)
437{
438 return rds_rs_to_sk(rs)->sk_rcvbuf / 2;
439}
440
441struct rds_statistics {
442 uint64_t s_conn_reset;
443 uint64_t s_recv_drop_bad_checksum;
444 uint64_t s_recv_drop_old_seq;
445 uint64_t s_recv_drop_no_sock;
446 uint64_t s_recv_drop_dead_sock;
447 uint64_t s_recv_deliver_raced;
448 uint64_t s_recv_delivered;
449 uint64_t s_recv_queued;
450 uint64_t s_recv_immediate_retry;
451 uint64_t s_recv_delayed_retry;
452 uint64_t s_recv_ack_required;
453 uint64_t s_recv_rdma_bytes;
454 uint64_t s_recv_ping;
455 uint64_t s_send_queue_empty;
456 uint64_t s_send_queue_full;
457 uint64_t s_send_sem_contention;
458 uint64_t s_send_sem_queue_raced;
459 uint64_t s_send_immediate_retry;
460 uint64_t s_send_delayed_retry;
461 uint64_t s_send_drop_acked;
462 uint64_t s_send_ack_required;
463 uint64_t s_send_queued;
464 uint64_t s_send_rdma;
465 uint64_t s_send_rdma_bytes;
466 uint64_t s_send_pong;
467 uint64_t s_page_remainder_hit;
468 uint64_t s_page_remainder_miss;
469 uint64_t s_copy_to_user;
470 uint64_t s_copy_from_user;
471 uint64_t s_cong_update_queued;
472 uint64_t s_cong_update_received;
473 uint64_t s_cong_send_error;
474 uint64_t s_cong_send_blocked;
475};
476
477/* af_rds.c */
478void rds_sock_addref(struct rds_sock *rs);
479void rds_sock_put(struct rds_sock *rs);
480void rds_wake_sk_sleep(struct rds_sock *rs);
481static inline void __rds_wake_sk_sleep(struct sock *sk)
482{
483 wait_queue_head_t *waitq = sk->sk_sleep;
484
485 if (!sock_flag(sk, SOCK_DEAD) && waitq)
486 wake_up(waitq);
487}
488extern wait_queue_head_t rds_poll_waitq;
489
490
491/* bind.c */
492int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
493void rds_remove_bound(struct rds_sock *rs);
494struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
495
496/* cong.c */
497int rds_cong_get_maps(struct rds_connection *conn);
498void rds_cong_add_conn(struct rds_connection *conn);
499void rds_cong_remove_conn(struct rds_connection *conn);
500void rds_cong_set_bit(struct rds_cong_map *map, __be16 port);
501void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port);
502int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock, struct rds_sock *rs);
503void rds_cong_queue_updates(struct rds_cong_map *map);
504void rds_cong_map_updated(struct rds_cong_map *map, uint64_t);
505int rds_cong_updated_since(unsigned long *recent);
506void rds_cong_add_socket(struct rds_sock *);
507void rds_cong_remove_socket(struct rds_sock *);
508void rds_cong_exit(void);
509struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
510
511/* conn.c */
512int __init rds_conn_init(void);
513void rds_conn_exit(void);
514struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
515 struct rds_transport *trans, gfp_t gfp);
516struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
517 struct rds_transport *trans, gfp_t gfp);
518void rds_conn_destroy(struct rds_connection *conn);
519void rds_conn_reset(struct rds_connection *conn);
520void rds_conn_drop(struct rds_connection *conn);
521void rds_for_each_conn_info(struct socket *sock, unsigned int len,
522 struct rds_info_iterator *iter,
523 struct rds_info_lengths *lens,
524 int (*visitor)(struct rds_connection *, void *),
525 size_t item_len);
526void __rds_conn_error(struct rds_connection *conn, const char *, ...)
527 __attribute__ ((format (printf, 2, 3)));
528#define rds_conn_error(conn, fmt...) \
529 __rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
530
531static inline int
532rds_conn_transition(struct rds_connection *conn, int old, int new)
533{
534 return atomic_cmpxchg(&conn->c_state, old, new) == old;
535}
536
537static inline int
538rds_conn_state(struct rds_connection *conn)
539{
540 return atomic_read(&conn->c_state);
541}
542
543static inline int
544rds_conn_up(struct rds_connection *conn)
545{
546 return atomic_read(&conn->c_state) == RDS_CONN_UP;
547}
548
549static inline int
550rds_conn_connecting(struct rds_connection *conn)
551{
552 return atomic_read(&conn->c_state) == RDS_CONN_CONNECTING;
553}
554
555/* message.c */
556struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
557struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
558 size_t total_len);
559struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
560void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
561 __be16 dport, u64 seq);
562int rds_message_add_extension(struct rds_header *hdr,
563 unsigned int type, const void *data, unsigned int len);
564int rds_message_next_extension(struct rds_header *hdr,
565 unsigned int *pos, void *buf, unsigned int *buflen);
566int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version);
567int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version);
568int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
569int rds_message_inc_copy_to_user(struct rds_incoming *inc,
570 struct iovec *first_iov, size_t size);
571void rds_message_inc_purge(struct rds_incoming *inc);
572void rds_message_inc_free(struct rds_incoming *inc);
573void rds_message_addref(struct rds_message *rm);
574void rds_message_put(struct rds_message *rm);
575void rds_message_wait(struct rds_message *rm);
576void rds_message_unmapped(struct rds_message *rm);
577
578static inline void rds_message_make_checksum(struct rds_header *hdr)
579{
580 hdr->h_csum = 0;
581 hdr->h_csum = ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2);
582}
583
584static inline int rds_message_verify_checksum(const struct rds_header *hdr)
585{
586 return !hdr->h_csum || ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2) == 0;
587}
588
589
590/* page.c */
591int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
592 gfp_t gfp);
593int rds_page_copy_user(struct page *page, unsigned long offset,
594 void __user *ptr, unsigned long bytes,
595 int to_user);
596#define rds_page_copy_to_user(page, offset, ptr, bytes) \
597 rds_page_copy_user(page, offset, ptr, bytes, 1)
598#define rds_page_copy_from_user(page, offset, ptr, bytes) \
599 rds_page_copy_user(page, offset, ptr, bytes, 0)
600void rds_page_exit(void);
601
602/* recv.c */
603void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
604 __be32 saddr);
605void rds_inc_addref(struct rds_incoming *inc);
606void rds_inc_put(struct rds_incoming *inc);
607void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
608 struct rds_incoming *inc, gfp_t gfp, enum km_type km);
609int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
610 size_t size, int msg_flags);
611void rds_clear_recv_queue(struct rds_sock *rs);
612int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg);
613void rds_inc_info_copy(struct rds_incoming *inc,
614 struct rds_info_iterator *iter,
615 __be32 saddr, __be32 daddr, int flip);
616
617/* send.c */
618int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
619 size_t payload_len);
620void rds_send_reset(struct rds_connection *conn);
621int rds_send_xmit(struct rds_connection *conn);
622struct sockaddr_in;
623void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
624typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
625void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
626 is_acked_func is_acked);
627int rds_send_acked_before(struct rds_connection *conn, u64 seq);
628void rds_send_remove_from_sock(struct list_head *messages, int status);
629int rds_send_pong(struct rds_connection *conn, __be16 dport);
630struct rds_message *rds_send_get_message(struct rds_connection *,
631 struct rds_rdma_op *);
632
633/* rdma.c */
634void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
635
636/* stats.c */
637DECLARE_PER_CPU(struct rds_statistics, rds_stats);
638#define rds_stats_inc_which(which, member) do { \
639 per_cpu(which, get_cpu()).member++; \
640 put_cpu(); \
641} while (0)
642#define rds_stats_inc(member) rds_stats_inc_which(rds_stats, member)
643#define rds_stats_add_which(which, member, count) do { \
644 per_cpu(which, get_cpu()).member += count; \
645 put_cpu(); \
646} while (0)
647#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count)
648int __init rds_stats_init(void);
649void rds_stats_exit(void);
650void rds_stats_info_copy(struct rds_info_iterator *iter,
651 uint64_t *values, char **names, size_t nr);
652
653/* sysctl.c */
654int __init rds_sysctl_init(void);
655void rds_sysctl_exit(void);
656extern unsigned long rds_sysctl_sndbuf_min;
657extern unsigned long rds_sysctl_sndbuf_default;
658extern unsigned long rds_sysctl_sndbuf_max;
659extern unsigned long rds_sysctl_reconnect_min_jiffies;
660extern unsigned long rds_sysctl_reconnect_max_jiffies;
661extern unsigned int rds_sysctl_max_unacked_packets;
662extern unsigned int rds_sysctl_max_unacked_bytes;
663extern unsigned int rds_sysctl_ping_enable;
664extern unsigned long rds_sysctl_trace_flags;
665extern unsigned int rds_sysctl_trace_level;
666
667/* threads.c */
668int __init rds_threads_init(void);
669void rds_threads_exit(void);
670extern struct workqueue_struct *rds_wq;
671void rds_connect_worker(struct work_struct *);
672void rds_shutdown_worker(struct work_struct *);
673void rds_send_worker(struct work_struct *);
674void rds_recv_worker(struct work_struct *);
675void rds_connect_complete(struct rds_connection *conn);
676
677/* transport.c */
678int rds_trans_register(struct rds_transport *trans);
679void rds_trans_unregister(struct rds_transport *trans);
680struct rds_transport *rds_trans_get_preferred(__be32 addr);
681unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
682 unsigned int avail);
683int __init rds_trans_init(void);
684void rds_trans_exit(void);
685
686#endif
diff --git a/net/rds/recv.c b/net/rds/recv.c
new file mode 100644
index 000000000000..f2118c51cfa3
--- /dev/null
+++ b/net/rds/recv.c
@@ -0,0 +1,542 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <net/sock.h>
35#include <linux/in.h>
36
37#include "rds.h"
38#include "rdma.h"
39
40void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
41 __be32 saddr)
42{
43 atomic_set(&inc->i_refcount, 1);
44 INIT_LIST_HEAD(&inc->i_item);
45 inc->i_conn = conn;
46 inc->i_saddr = saddr;
47 inc->i_rdma_cookie = 0;
48}
49
50void rds_inc_addref(struct rds_incoming *inc)
51{
52 rdsdebug("addref inc %p ref %d\n", inc, atomic_read(&inc->i_refcount));
53 atomic_inc(&inc->i_refcount);
54}
55
56void rds_inc_put(struct rds_incoming *inc)
57{
58 rdsdebug("put inc %p ref %d\n", inc, atomic_read(&inc->i_refcount));
59 if (atomic_dec_and_test(&inc->i_refcount)) {
60 BUG_ON(!list_empty(&inc->i_item));
61
62 inc->i_conn->c_trans->inc_free(inc);
63 }
64}
65
66static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk,
67 struct rds_cong_map *map,
68 int delta, __be16 port)
69{
70 int now_congested;
71
72 if (delta == 0)
73 return;
74
75 rs->rs_rcv_bytes += delta;
76 now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs);
77
78 rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d "
79 "now_cong %d delta %d\n",
80 rs, &rs->rs_bound_addr,
81 ntohs(rs->rs_bound_port), rs->rs_rcv_bytes,
82 rds_sk_rcvbuf(rs), now_congested, delta);
83
84 /* wasn't -> am congested */
85 if (!rs->rs_congested && now_congested) {
86 rs->rs_congested = 1;
87 rds_cong_set_bit(map, port);
88 rds_cong_queue_updates(map);
89 }
90 /* was -> aren't congested */
91 /* Require more free space before reporting uncongested to prevent
92 bouncing cong/uncong state too often */
93 else if (rs->rs_congested && (rs->rs_rcv_bytes < (rds_sk_rcvbuf(rs)/2))) {
94 rs->rs_congested = 0;
95 rds_cong_clear_bit(map, port);
96 rds_cong_queue_updates(map);
97 }
98
99 /* do nothing if no change in cong state */
100}
101
102/*
103 * Process all extension headers that come with this message.
104 */
105static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock *rs)
106{
107 struct rds_header *hdr = &inc->i_hdr;
108 unsigned int pos = 0, type, len;
109 union {
110 struct rds_ext_header_version version;
111 struct rds_ext_header_rdma rdma;
112 struct rds_ext_header_rdma_dest rdma_dest;
113 } buffer;
114
115 while (1) {
116 len = sizeof(buffer);
117 type = rds_message_next_extension(hdr, &pos, &buffer, &len);
118 if (type == RDS_EXTHDR_NONE)
119 break;
120 /* Process extension header here */
121 switch (type) {
122 case RDS_EXTHDR_RDMA:
123 rds_rdma_unuse(rs, be32_to_cpu(buffer.rdma.h_rdma_rkey), 0);
124 break;
125
126 case RDS_EXTHDR_RDMA_DEST:
127 /* We ignore the size for now. We could stash it
128 * somewhere and use it for error checking. */
129 inc->i_rdma_cookie = rds_rdma_make_cookie(
130 be32_to_cpu(buffer.rdma_dest.h_rdma_rkey),
131 be32_to_cpu(buffer.rdma_dest.h_rdma_offset));
132
133 break;
134 }
135 }
136}
137
138/*
139 * The transport must make sure that this is serialized against other
140 * rx and conn reset on this specific conn.
141 *
142 * We currently assert that only one fragmented message will be sent
143 * down a connection at a time. This lets us reassemble in the conn
144 * instead of per-flow which means that we don't have to go digging through
145 * flows to tear down partial reassembly progress on conn failure and
146 * we save flow lookup and locking for each frag arrival. It does mean
147 * that small messages will wait behind large ones. Fragmenting at all
148 * is only to reduce the memory consumption of pre-posted buffers.
149 *
150 * The caller passes in saddr and daddr instead of us getting it from the
151 * conn. This lets loopback, who only has one conn for both directions,
152 * tell us which roles the addrs in the conn are playing for this message.
153 */
154void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
155 struct rds_incoming *inc, gfp_t gfp, enum km_type km)
156{
157 struct rds_sock *rs = NULL;
158 struct sock *sk;
159 unsigned long flags;
160
161 inc->i_conn = conn;
162 inc->i_rx_jiffies = jiffies;
163
164 rdsdebug("conn %p next %llu inc %p seq %llu len %u sport %u dport %u "
165 "flags 0x%x rx_jiffies %lu\n", conn,
166 (unsigned long long)conn->c_next_rx_seq,
167 inc,
168 (unsigned long long)be64_to_cpu(inc->i_hdr.h_sequence),
169 be32_to_cpu(inc->i_hdr.h_len),
170 be16_to_cpu(inc->i_hdr.h_sport),
171 be16_to_cpu(inc->i_hdr.h_dport),
172 inc->i_hdr.h_flags,
173 inc->i_rx_jiffies);
174
175 /*
176 * Sequence numbers should only increase. Messages get their
177 * sequence number as they're queued in a sending conn. They
178 * can be dropped, though, if the sending socket is closed before
179 * they hit the wire. So sequence numbers can skip forward
180 * under normal operation. They can also drop back in the conn
181 * failover case as previously sent messages are resent down the
182 * new instance of a conn. We drop those, otherwise we have
183 * to assume that the next valid seq does not come after a
184 * hole in the fragment stream.
185 *
186 * The headers don't give us a way to realize if fragments of
187 * a message have been dropped. We assume that frags that arrive
188 * to a flow are part of the current message on the flow that is
189 * being reassembled. This means that senders can't drop messages
190 * from the sending conn until all their frags are sent.
191 *
192 * XXX we could spend more on the wire to get more robust failure
193 * detection, arguably worth it to avoid data corruption.
194 */
195 if (be64_to_cpu(inc->i_hdr.h_sequence) < conn->c_next_rx_seq
196 && (inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) {
197 rds_stats_inc(s_recv_drop_old_seq);
198 goto out;
199 }
200 conn->c_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1;
201
202 if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
203 rds_stats_inc(s_recv_ping);
204 rds_send_pong(conn, inc->i_hdr.h_sport);
205 goto out;
206 }
207
208 rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
209 if (rs == NULL) {
210 rds_stats_inc(s_recv_drop_no_sock);
211 goto out;
212 }
213
214 /* Process extension headers */
215 rds_recv_incoming_exthdrs(inc, rs);
216
217 /* We can be racing with rds_release() which marks the socket dead. */
218 sk = rds_rs_to_sk(rs);
219
220 /* serialize with rds_release -> sock_orphan */
221 write_lock_irqsave(&rs->rs_recv_lock, flags);
222 if (!sock_flag(sk, SOCK_DEAD)) {
223 rdsdebug("adding inc %p to rs %p's recv queue\n", inc, rs);
224 rds_stats_inc(s_recv_queued);
225 rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
226 be32_to_cpu(inc->i_hdr.h_len),
227 inc->i_hdr.h_dport);
228 rds_inc_addref(inc);
229 list_add_tail(&inc->i_item, &rs->rs_recv_queue);
230 __rds_wake_sk_sleep(sk);
231 } else {
232 rds_stats_inc(s_recv_drop_dead_sock);
233 }
234 write_unlock_irqrestore(&rs->rs_recv_lock, flags);
235
236out:
237 if (rs)
238 rds_sock_put(rs);
239}
240
241/*
242 * be very careful here. This is being called as the condition in
243 * wait_event_*() needs to cope with being called many times.
244 */
245static int rds_next_incoming(struct rds_sock *rs, struct rds_incoming **inc)
246{
247 unsigned long flags;
248
249 if (*inc == NULL) {
250 read_lock_irqsave(&rs->rs_recv_lock, flags);
251 if (!list_empty(&rs->rs_recv_queue)) {
252 *inc = list_entry(rs->rs_recv_queue.next,
253 struct rds_incoming,
254 i_item);
255 rds_inc_addref(*inc);
256 }
257 read_unlock_irqrestore(&rs->rs_recv_lock, flags);
258 }
259
260 return *inc != NULL;
261}
262
263static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
264 int drop)
265{
266 struct sock *sk = rds_rs_to_sk(rs);
267 int ret = 0;
268 unsigned long flags;
269
270 write_lock_irqsave(&rs->rs_recv_lock, flags);
271 if (!list_empty(&inc->i_item)) {
272 ret = 1;
273 if (drop) {
274 /* XXX make sure this i_conn is reliable */
275 rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
276 -be32_to_cpu(inc->i_hdr.h_len),
277 inc->i_hdr.h_dport);
278 list_del_init(&inc->i_item);
279 rds_inc_put(inc);
280 }
281 }
282 write_unlock_irqrestore(&rs->rs_recv_lock, flags);
283
284 rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop);
285 return ret;
286}
287
288/*
289 * Pull errors off the error queue.
290 * If msghdr is NULL, we will just purge the error queue.
291 */
292int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
293{
294 struct rds_notifier *notifier;
295 struct rds_rdma_notify cmsg;
296 unsigned int count = 0, max_messages = ~0U;
297 unsigned long flags;
298 LIST_HEAD(copy);
299 int err = 0;
300
301
302 /* put_cmsg copies to user space and thus may sleep. We can't do this
303 * with rs_lock held, so first grab as many notifications as we can stuff
304 * in the user provided cmsg buffer. We don't try to copy more, to avoid
305 * losing notifications - except when the buffer is so small that it wouldn't
306 * even hold a single notification. Then we give him as much of this single
307 * msg as we can squeeze in, and set MSG_CTRUNC.
308 */
309 if (msghdr) {
310 max_messages = msghdr->msg_controllen / CMSG_SPACE(sizeof(cmsg));
311 if (!max_messages)
312 max_messages = 1;
313 }
314
315 spin_lock_irqsave(&rs->rs_lock, flags);
316 while (!list_empty(&rs->rs_notify_queue) && count < max_messages) {
317 notifier = list_entry(rs->rs_notify_queue.next,
318 struct rds_notifier, n_list);
319 list_move(&notifier->n_list, &copy);
320 count++;
321 }
322 spin_unlock_irqrestore(&rs->rs_lock, flags);
323
324 if (!count)
325 return 0;
326
327 while (!list_empty(&copy)) {
328 notifier = list_entry(copy.next, struct rds_notifier, n_list);
329
330 if (msghdr) {
331 cmsg.user_token = notifier->n_user_token;
332 cmsg.status = notifier->n_status;
333
334 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS,
335 sizeof(cmsg), &cmsg);
336 if (err)
337 break;
338 }
339
340 list_del_init(&notifier->n_list);
341 kfree(notifier);
342 }
343
344 /* If we bailed out because of an error in put_cmsg,
345 * we may be left with one or more notifications that we
346 * didn't process. Return them to the head of the list. */
347 if (!list_empty(&copy)) {
348 spin_lock_irqsave(&rs->rs_lock, flags);
349 list_splice(&copy, &rs->rs_notify_queue);
350 spin_unlock_irqrestore(&rs->rs_lock, flags);
351 }
352
353 return err;
354}
355
356/*
357 * Queue a congestion notification
358 */
359static int rds_notify_cong(struct rds_sock *rs, struct msghdr *msghdr)
360{
361 uint64_t notify = rs->rs_cong_notify;
362 unsigned long flags;
363 int err;
364
365 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_CONG_UPDATE,
366 sizeof(notify), &notify);
367 if (err)
368 return err;
369
370 spin_lock_irqsave(&rs->rs_lock, flags);
371 rs->rs_cong_notify &= ~notify;
372 spin_unlock_irqrestore(&rs->rs_lock, flags);
373
374 return 0;
375}
376
377/*
378 * Receive any control messages.
379 */
380static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg)
381{
382 int ret = 0;
383
384 if (inc->i_rdma_cookie) {
385 ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST,
386 sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie);
387 if (ret)
388 return ret;
389 }
390
391 return 0;
392}
393
394int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
395 size_t size, int msg_flags)
396{
397 struct sock *sk = sock->sk;
398 struct rds_sock *rs = rds_sk_to_rs(sk);
399 long timeo;
400 int ret = 0, nonblock = msg_flags & MSG_DONTWAIT;
401 struct sockaddr_in *sin;
402 struct rds_incoming *inc = NULL;
403
404 /* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */
405 timeo = sock_rcvtimeo(sk, nonblock);
406
407 rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo);
408
409 if (msg_flags & MSG_OOB)
410 goto out;
411
412 /* If there are pending notifications, do those - and nothing else */
413 if (!list_empty(&rs->rs_notify_queue)) {
414 ret = rds_notify_queue_get(rs, msg);
415 goto out;
416 }
417
418 if (rs->rs_cong_notify) {
419 ret = rds_notify_cong(rs, msg);
420 goto out;
421 }
422
423 while (1) {
424 if (!rds_next_incoming(rs, &inc)) {
425 if (nonblock) {
426 ret = -EAGAIN;
427 break;
428 }
429
430 timeo = wait_event_interruptible_timeout(*sk->sk_sleep,
431 rds_next_incoming(rs, &inc),
432 timeo);
433 rdsdebug("recvmsg woke inc %p timeo %ld\n", inc,
434 timeo);
435 if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT)
436 continue;
437
438 ret = timeo;
439 if (ret == 0)
440 ret = -ETIMEDOUT;
441 break;
442 }
443
444 rdsdebug("copying inc %p from %pI4:%u to user\n", inc,
445 &inc->i_conn->c_faddr,
446 ntohs(inc->i_hdr.h_sport));
447 ret = inc->i_conn->c_trans->inc_copy_to_user(inc, msg->msg_iov,
448 size);
449 if (ret < 0)
450 break;
451
452 /*
453 * if the message we just copied isn't at the head of the
454 * recv queue then someone else raced us to return it, try
455 * to get the next message.
456 */
457 if (!rds_still_queued(rs, inc, !(msg_flags & MSG_PEEK))) {
458 rds_inc_put(inc);
459 inc = NULL;
460 rds_stats_inc(s_recv_deliver_raced);
461 continue;
462 }
463
464 if (ret < be32_to_cpu(inc->i_hdr.h_len)) {
465 if (msg_flags & MSG_TRUNC)
466 ret = be32_to_cpu(inc->i_hdr.h_len);
467 msg->msg_flags |= MSG_TRUNC;
468 }
469
470 if (rds_cmsg_recv(inc, msg)) {
471 ret = -EFAULT;
472 goto out;
473 }
474
475 rds_stats_inc(s_recv_delivered);
476
477 sin = (struct sockaddr_in *)msg->msg_name;
478 if (sin) {
479 sin->sin_family = AF_INET;
480 sin->sin_port = inc->i_hdr.h_sport;
481 sin->sin_addr.s_addr = inc->i_saddr;
482 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
483 }
484 break;
485 }
486
487 if (inc)
488 rds_inc_put(inc);
489
490out:
491 return ret;
492}
493
494/*
495 * The socket is being shut down and we're asked to drop messages that were
496 * queued for recvmsg. The caller has unbound the socket so the receive path
497 * won't queue any more incoming fragments or messages on the socket.
498 */
499void rds_clear_recv_queue(struct rds_sock *rs)
500{
501 struct sock *sk = rds_rs_to_sk(rs);
502 struct rds_incoming *inc, *tmp;
503 unsigned long flags;
504
505 write_lock_irqsave(&rs->rs_recv_lock, flags);
506 list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) {
507 rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
508 -be32_to_cpu(inc->i_hdr.h_len),
509 inc->i_hdr.h_dport);
510 list_del_init(&inc->i_item);
511 rds_inc_put(inc);
512 }
513 write_unlock_irqrestore(&rs->rs_recv_lock, flags);
514}
515
516/*
517 * inc->i_saddr isn't used here because it is only set in the receive
518 * path.
519 */
520void rds_inc_info_copy(struct rds_incoming *inc,
521 struct rds_info_iterator *iter,
522 __be32 saddr, __be32 daddr, int flip)
523{
524 struct rds_info_message minfo;
525
526 minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence);
527 minfo.len = be32_to_cpu(inc->i_hdr.h_len);
528
529 if (flip) {
530 minfo.laddr = daddr;
531 minfo.faddr = saddr;
532 minfo.lport = inc->i_hdr.h_dport;
533 minfo.fport = inc->i_hdr.h_sport;
534 } else {
535 minfo.laddr = saddr;
536 minfo.faddr = daddr;
537 minfo.lport = inc->i_hdr.h_sport;
538 minfo.fport = inc->i_hdr.h_dport;
539 }
540
541 rds_info_copy(iter, &minfo, sizeof(minfo));
542}
diff --git a/net/rds/send.c b/net/rds/send.c
new file mode 100644
index 000000000000..1b37364656f0
--- /dev/null
+++ b/net/rds/send.c
@@ -0,0 +1,1003 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <net/sock.h>
35#include <linux/in.h>
36#include <linux/list.h>
37
38#include "rds.h"
39#include "rdma.h"
40
41/* When transmitting messages in rds_send_xmit, we need to emerge from
42 * time to time and briefly release the CPU. Otherwise the softlock watchdog
43 * will kick our shin.
44 * Also, it seems fairer to not let one busy connection stall all the
45 * others.
46 *
47 * send_batch_count is the number of times we'll loop in send_xmit. Setting
48 * it to 0 will restore the old behavior (where we looped until we had
49 * drained the queue).
50 */
51static int send_batch_count = 64;
52module_param(send_batch_count, int, 0444);
53MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue");
54
55/*
56 * Reset the send state. Caller must hold c_send_lock when calling here.
57 */
58void rds_send_reset(struct rds_connection *conn)
59{
60 struct rds_message *rm, *tmp;
61 unsigned long flags;
62
63 if (conn->c_xmit_rm) {
64 /* Tell the user the RDMA op is no longer mapped by the
65 * transport. This isn't entirely true (it's flushed out
66 * independently) but as the connection is down, there's
67 * no ongoing RDMA to/from that memory */
68 rds_message_unmapped(conn->c_xmit_rm);
69 rds_message_put(conn->c_xmit_rm);
70 conn->c_xmit_rm = NULL;
71 }
72 conn->c_xmit_sg = 0;
73 conn->c_xmit_hdr_off = 0;
74 conn->c_xmit_data_off = 0;
75 conn->c_xmit_rdma_sent = 0;
76
77 conn->c_map_queued = 0;
78
79 conn->c_unacked_packets = rds_sysctl_max_unacked_packets;
80 conn->c_unacked_bytes = rds_sysctl_max_unacked_bytes;
81
82 /* Mark messages as retransmissions, and move them to the send q */
83 spin_lock_irqsave(&conn->c_lock, flags);
84 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
85 set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
86 set_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags);
87 }
88 list_splice_init(&conn->c_retrans, &conn->c_send_queue);
89 spin_unlock_irqrestore(&conn->c_lock, flags);
90}
91
92/*
93 * We're making the concious trade-off here to only send one message
94 * down the connection at a time.
95 * Pro:
96 * - tx queueing is a simple fifo list
97 * - reassembly is optional and easily done by transports per conn
98 * - no per flow rx lookup at all, straight to the socket
99 * - less per-frag memory and wire overhead
100 * Con:
101 * - queued acks can be delayed behind large messages
102 * Depends:
103 * - small message latency is higher behind queued large messages
104 * - large message latency isn't starved by intervening small sends
105 */
106int rds_send_xmit(struct rds_connection *conn)
107{
108 struct rds_message *rm;
109 unsigned long flags;
110 unsigned int tmp;
111 unsigned int send_quota = send_batch_count;
112 struct scatterlist *sg;
113 int ret = 0;
114 int was_empty = 0;
115 LIST_HEAD(to_be_dropped);
116
117 /*
118 * sendmsg calls here after having queued its message on the send
119 * queue. We only have one task feeding the connection at a time. If
120 * another thread is already feeding the queue then we back off. This
121 * avoids blocking the caller and trading per-connection data between
122 * caches per message.
123 *
124 * The sem holder will issue a retry if they notice that someone queued
125 * a message after they stopped walking the send queue but before they
126 * dropped the sem.
127 */
128 if (!mutex_trylock(&conn->c_send_lock)) {
129 rds_stats_inc(s_send_sem_contention);
130 ret = -ENOMEM;
131 goto out;
132 }
133
134 if (conn->c_trans->xmit_prepare)
135 conn->c_trans->xmit_prepare(conn);
136
137 /*
138 * spin trying to push headers and data down the connection until
139 * the connection doens't make forward progress.
140 */
141 while (--send_quota) {
142 /*
143 * See if need to send a congestion map update if we're
144 * between sending messages. The send_sem protects our sole
145 * use of c_map_offset and _bytes.
146 * Note this is used only by transports that define a special
147 * xmit_cong_map function. For all others, we create allocate
148 * a cong_map message and treat it just like any other send.
149 */
150 if (conn->c_map_bytes) {
151 ret = conn->c_trans->xmit_cong_map(conn, conn->c_lcong,
152 conn->c_map_offset);
153 if (ret <= 0)
154 break;
155
156 conn->c_map_offset += ret;
157 conn->c_map_bytes -= ret;
158 if (conn->c_map_bytes)
159 continue;
160 }
161
162 /* If we're done sending the current message, clear the
163 * offset and S/G temporaries.
164 */
165 rm = conn->c_xmit_rm;
166 if (rm != NULL &&
167 conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
168 conn->c_xmit_sg == rm->m_nents) {
169 conn->c_xmit_rm = NULL;
170 conn->c_xmit_sg = 0;
171 conn->c_xmit_hdr_off = 0;
172 conn->c_xmit_data_off = 0;
173 conn->c_xmit_rdma_sent = 0;
174
175 /* Release the reference to the previous message. */
176 rds_message_put(rm);
177 rm = NULL;
178 }
179
180 /* If we're asked to send a cong map update, do so.
181 */
182 if (rm == NULL && test_and_clear_bit(0, &conn->c_map_queued)) {
183 if (conn->c_trans->xmit_cong_map != NULL) {
184 conn->c_map_offset = 0;
185 conn->c_map_bytes = sizeof(struct rds_header) +
186 RDS_CONG_MAP_BYTES;
187 continue;
188 }
189
190 rm = rds_cong_update_alloc(conn);
191 if (IS_ERR(rm)) {
192 ret = PTR_ERR(rm);
193 break;
194 }
195
196 conn->c_xmit_rm = rm;
197 }
198
199 /*
200 * Grab the next message from the send queue, if there is one.
201 *
202 * c_xmit_rm holds a ref while we're sending this message down
203 * the connction. We can use this ref while holding the
204 * send_sem.. rds_send_reset() is serialized with it.
205 */
206 if (rm == NULL) {
207 unsigned int len;
208
209 spin_lock_irqsave(&conn->c_lock, flags);
210
211 if (!list_empty(&conn->c_send_queue)) {
212 rm = list_entry(conn->c_send_queue.next,
213 struct rds_message,
214 m_conn_item);
215 rds_message_addref(rm);
216
217 /*
218 * Move the message from the send queue to the retransmit
219 * list right away.
220 */
221 list_move_tail(&rm->m_conn_item, &conn->c_retrans);
222 }
223
224 spin_unlock_irqrestore(&conn->c_lock, flags);
225
226 if (rm == NULL) {
227 was_empty = 1;
228 break;
229 }
230
231 /* Unfortunately, the way Infiniband deals with
232 * RDMA to a bad MR key is by moving the entire
233 * queue pair to error state. We cold possibly
234 * recover from that, but right now we drop the
235 * connection.
236 * Therefore, we never retransmit messages with RDMA ops.
237 */
238 if (rm->m_rdma_op
239 && test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
240 spin_lock_irqsave(&conn->c_lock, flags);
241 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
242 list_move(&rm->m_conn_item, &to_be_dropped);
243 spin_unlock_irqrestore(&conn->c_lock, flags);
244 rds_message_put(rm);
245 continue;
246 }
247
248 /* Require an ACK every once in a while */
249 len = ntohl(rm->m_inc.i_hdr.h_len);
250 if (conn->c_unacked_packets == 0
251 || conn->c_unacked_bytes < len) {
252 __set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
253
254 conn->c_unacked_packets = rds_sysctl_max_unacked_packets;
255 conn->c_unacked_bytes = rds_sysctl_max_unacked_bytes;
256 rds_stats_inc(s_send_ack_required);
257 } else {
258 conn->c_unacked_bytes -= len;
259 conn->c_unacked_packets--;
260 }
261
262 conn->c_xmit_rm = rm;
263 }
264
265 /*
266 * Try and send an rdma message. Let's see if we can
267 * keep this simple and require that the transport either
268 * send the whole rdma or none of it.
269 */
270 if (rm->m_rdma_op && !conn->c_xmit_rdma_sent) {
271 ret = conn->c_trans->xmit_rdma(conn, rm->m_rdma_op);
272 if (ret)
273 break;
274 conn->c_xmit_rdma_sent = 1;
275 /* The transport owns the mapped memory for now.
276 * You can't unmap it while it's on the send queue */
277 set_bit(RDS_MSG_MAPPED, &rm->m_flags);
278 }
279
280 if (conn->c_xmit_hdr_off < sizeof(struct rds_header) ||
281 conn->c_xmit_sg < rm->m_nents) {
282 ret = conn->c_trans->xmit(conn, rm,
283 conn->c_xmit_hdr_off,
284 conn->c_xmit_sg,
285 conn->c_xmit_data_off);
286 if (ret <= 0)
287 break;
288
289 if (conn->c_xmit_hdr_off < sizeof(struct rds_header)) {
290 tmp = min_t(int, ret,
291 sizeof(struct rds_header) -
292 conn->c_xmit_hdr_off);
293 conn->c_xmit_hdr_off += tmp;
294 ret -= tmp;
295 }
296
297 sg = &rm->m_sg[conn->c_xmit_sg];
298 while (ret) {
299 tmp = min_t(int, ret, sg->length -
300 conn->c_xmit_data_off);
301 conn->c_xmit_data_off += tmp;
302 ret -= tmp;
303 if (conn->c_xmit_data_off == sg->length) {
304 conn->c_xmit_data_off = 0;
305 sg++;
306 conn->c_xmit_sg++;
307 BUG_ON(ret != 0 &&
308 conn->c_xmit_sg == rm->m_nents);
309 }
310 }
311 }
312 }
313
314 /* Nuke any messages we decided not to retransmit. */
315 if (!list_empty(&to_be_dropped))
316 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED);
317
318 if (conn->c_trans->xmit_complete)
319 conn->c_trans->xmit_complete(conn);
320
321 /*
322 * We might be racing with another sender who queued a message but
323 * backed off on noticing that we held the c_send_lock. If we check
324 * for queued messages after dropping the sem then either we'll
325 * see the queued message or the queuer will get the sem. If we
326 * notice the queued message then we trigger an immediate retry.
327 *
328 * We need to be careful only to do this when we stopped processing
329 * the send queue because it was empty. It's the only way we
330 * stop processing the loop when the transport hasn't taken
331 * responsibility for forward progress.
332 */
333 mutex_unlock(&conn->c_send_lock);
334
335 if (conn->c_map_bytes || (send_quota == 0 && !was_empty)) {
336 /* We exhausted the send quota, but there's work left to
337 * do. Return and (re-)schedule the send worker.
338 */
339 ret = -EAGAIN;
340 }
341
342 if (ret == 0 && was_empty) {
343 /* A simple bit test would be way faster than taking the
344 * spin lock */
345 spin_lock_irqsave(&conn->c_lock, flags);
346 if (!list_empty(&conn->c_send_queue)) {
347 rds_stats_inc(s_send_sem_queue_raced);
348 ret = -EAGAIN;
349 }
350 spin_unlock_irqrestore(&conn->c_lock, flags);
351 }
352out:
353 return ret;
354}
355
356static void rds_send_sndbuf_remove(struct rds_sock *rs, struct rds_message *rm)
357{
358 u32 len = be32_to_cpu(rm->m_inc.i_hdr.h_len);
359
360 assert_spin_locked(&rs->rs_lock);
361
362 BUG_ON(rs->rs_snd_bytes < len);
363 rs->rs_snd_bytes -= len;
364
365 if (rs->rs_snd_bytes == 0)
366 rds_stats_inc(s_send_queue_empty);
367}
368
369static inline int rds_send_is_acked(struct rds_message *rm, u64 ack,
370 is_acked_func is_acked)
371{
372 if (is_acked)
373 return is_acked(rm, ack);
374 return be64_to_cpu(rm->m_inc.i_hdr.h_sequence) <= ack;
375}
376
377/*
378 * Returns true if there are no messages on the send and retransmit queues
379 * which have a sequence number greater than or equal to the given sequence
380 * number.
381 */
382int rds_send_acked_before(struct rds_connection *conn, u64 seq)
383{
384 struct rds_message *rm, *tmp;
385 int ret = 1;
386
387 spin_lock(&conn->c_lock);
388
389 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
390 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq)
391 ret = 0;
392 break;
393 }
394
395 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
396 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq)
397 ret = 0;
398 break;
399 }
400
401 spin_unlock(&conn->c_lock);
402
403 return ret;
404}
405
406/*
407 * This is pretty similar to what happens below in the ACK
408 * handling code - except that we call here as soon as we get
409 * the IB send completion on the RDMA op and the accompanying
410 * message.
411 */
412void rds_rdma_send_complete(struct rds_message *rm, int status)
413{
414 struct rds_sock *rs = NULL;
415 struct rds_rdma_op *ro;
416 struct rds_notifier *notifier;
417
418 spin_lock(&rm->m_rs_lock);
419
420 ro = rm->m_rdma_op;
421 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)
422 && ro && ro->r_notify && ro->r_notifier) {
423 notifier = ro->r_notifier;
424 rs = rm->m_rs;
425 sock_hold(rds_rs_to_sk(rs));
426
427 notifier->n_status = status;
428 spin_lock(&rs->rs_lock);
429 list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
430 spin_unlock(&rs->rs_lock);
431
432 ro->r_notifier = NULL;
433 }
434
435 spin_unlock(&rm->m_rs_lock);
436
437 if (rs) {
438 rds_wake_sk_sleep(rs);
439 sock_put(rds_rs_to_sk(rs));
440 }
441}
442
443/*
444 * This is the same as rds_rdma_send_complete except we
445 * don't do any locking - we have all the ingredients (message,
446 * socket, socket lock) and can just move the notifier.
447 */
448static inline void
449__rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status)
450{
451 struct rds_rdma_op *ro;
452
453 ro = rm->m_rdma_op;
454 if (ro && ro->r_notify && ro->r_notifier) {
455 ro->r_notifier->n_status = status;
456 list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue);
457 ro->r_notifier = NULL;
458 }
459
460 /* No need to wake the app - caller does this */
461}
462
463/*
464 * This is called from the IB send completion when we detect
465 * a RDMA operation that failed with remote access error.
466 * So speed is not an issue here.
467 */
468struct rds_message *rds_send_get_message(struct rds_connection *conn,
469 struct rds_rdma_op *op)
470{
471 struct rds_message *rm, *tmp, *found = NULL;
472 unsigned long flags;
473
474 spin_lock_irqsave(&conn->c_lock, flags);
475
476 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
477 if (rm->m_rdma_op == op) {
478 atomic_inc(&rm->m_refcount);
479 found = rm;
480 goto out;
481 }
482 }
483
484 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
485 if (rm->m_rdma_op == op) {
486 atomic_inc(&rm->m_refcount);
487 found = rm;
488 break;
489 }
490 }
491
492out:
493 spin_unlock_irqrestore(&conn->c_lock, flags);
494
495 return found;
496}
497
498/*
499 * This removes messages from the socket's list if they're on it. The list
500 * argument must be private to the caller, we must be able to modify it
501 * without locks. The messages must have a reference held for their
502 * position on the list. This function will drop that reference after
503 * removing the messages from the 'messages' list regardless of if it found
504 * the messages on the socket list or not.
505 */
506void rds_send_remove_from_sock(struct list_head *messages, int status)
507{
508 unsigned long flags = 0; /* silence gcc :P */
509 struct rds_sock *rs = NULL;
510 struct rds_message *rm;
511
512 local_irq_save(flags);
513 while (!list_empty(messages)) {
514 rm = list_entry(messages->next, struct rds_message,
515 m_conn_item);
516 list_del_init(&rm->m_conn_item);
517
518 /*
519 * If we see this flag cleared then we're *sure* that someone
520 * else beat us to removing it from the sock. If we race
521 * with their flag update we'll get the lock and then really
522 * see that the flag has been cleared.
523 *
524 * The message spinlock makes sure nobody clears rm->m_rs
525 * while we're messing with it. It does not prevent the
526 * message from being removed from the socket, though.
527 */
528 spin_lock(&rm->m_rs_lock);
529 if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags))
530 goto unlock_and_drop;
531
532 if (rs != rm->m_rs) {
533 if (rs) {
534 spin_unlock(&rs->rs_lock);
535 rds_wake_sk_sleep(rs);
536 sock_put(rds_rs_to_sk(rs));
537 }
538 rs = rm->m_rs;
539 spin_lock(&rs->rs_lock);
540 sock_hold(rds_rs_to_sk(rs));
541 }
542
543 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
544 struct rds_rdma_op *ro = rm->m_rdma_op;
545 struct rds_notifier *notifier;
546
547 list_del_init(&rm->m_sock_item);
548 rds_send_sndbuf_remove(rs, rm);
549
550 if (ro && ro->r_notifier
551 && (status || ro->r_notify)) {
552 notifier = ro->r_notifier;
553 list_add_tail(&notifier->n_list,
554 &rs->rs_notify_queue);
555 if (!notifier->n_status)
556 notifier->n_status = status;
557 rm->m_rdma_op->r_notifier = NULL;
558 }
559 rds_message_put(rm);
560 rm->m_rs = NULL;
561 }
562
563unlock_and_drop:
564 spin_unlock(&rm->m_rs_lock);
565 rds_message_put(rm);
566 }
567
568 if (rs) {
569 spin_unlock(&rs->rs_lock);
570 rds_wake_sk_sleep(rs);
571 sock_put(rds_rs_to_sk(rs));
572 }
573 local_irq_restore(flags);
574}
575
576/*
577 * Transports call here when they've determined that the receiver queued
578 * messages up to, and including, the given sequence number. Messages are
579 * moved to the retrans queue when rds_send_xmit picks them off the send
580 * queue. This means that in the TCP case, the message may not have been
581 * assigned the m_ack_seq yet - but that's fine as long as tcp_is_acked
582 * checks the RDS_MSG_HAS_ACK_SEQ bit.
583 *
584 * XXX It's not clear to me how this is safely serialized with socket
585 * destruction. Maybe it should bail if it sees SOCK_DEAD.
586 */
587void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
588 is_acked_func is_acked)
589{
590 struct rds_message *rm, *tmp;
591 unsigned long flags;
592 LIST_HEAD(list);
593
594 spin_lock_irqsave(&conn->c_lock, flags);
595
596 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
597 if (!rds_send_is_acked(rm, ack, is_acked))
598 break;
599
600 list_move(&rm->m_conn_item, &list);
601 clear_bit(RDS_MSG_ON_CONN, &rm->m_flags);
602 }
603
604 /* order flag updates with spin locks */
605 if (!list_empty(&list))
606 smp_mb__after_clear_bit();
607
608 spin_unlock_irqrestore(&conn->c_lock, flags);
609
610 /* now remove the messages from the sock list as needed */
611 rds_send_remove_from_sock(&list, RDS_RDMA_SUCCESS);
612}
613
614void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
615{
616 struct rds_message *rm, *tmp;
617 struct rds_connection *conn;
618 unsigned long flags;
619 LIST_HEAD(list);
620 int wake = 0;
621
622 /* get all the messages we're dropping under the rs lock */
623 spin_lock_irqsave(&rs->rs_lock, flags);
624
625 list_for_each_entry_safe(rm, tmp, &rs->rs_send_queue, m_sock_item) {
626 if (dest && (dest->sin_addr.s_addr != rm->m_daddr ||
627 dest->sin_port != rm->m_inc.i_hdr.h_dport))
628 continue;
629
630 wake = 1;
631 list_move(&rm->m_sock_item, &list);
632 rds_send_sndbuf_remove(rs, rm);
633 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
634
635 /* If this is a RDMA operation, notify the app. */
636 __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
637 }
638
639 /* order flag updates with the rs lock */
640 if (wake)
641 smp_mb__after_clear_bit();
642
643 spin_unlock_irqrestore(&rs->rs_lock, flags);
644
645 if (wake)
646 rds_wake_sk_sleep(rs);
647
648 conn = NULL;
649
650 /* now remove the messages from the conn list as needed */
651 list_for_each_entry(rm, &list, m_sock_item) {
652 /* We do this here rather than in the loop above, so that
653 * we don't have to nest m_rs_lock under rs->rs_lock */
654 spin_lock(&rm->m_rs_lock);
655 rm->m_rs = NULL;
656 spin_unlock(&rm->m_rs_lock);
657
658 /*
659 * If we see this flag cleared then we're *sure* that someone
660 * else beat us to removing it from the conn. If we race
661 * with their flag update we'll get the lock and then really
662 * see that the flag has been cleared.
663 */
664 if (!test_bit(RDS_MSG_ON_CONN, &rm->m_flags))
665 continue;
666
667 if (conn != rm->m_inc.i_conn) {
668 if (conn)
669 spin_unlock_irqrestore(&conn->c_lock, flags);
670 conn = rm->m_inc.i_conn;
671 spin_lock_irqsave(&conn->c_lock, flags);
672 }
673
674 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
675 list_del_init(&rm->m_conn_item);
676 rds_message_put(rm);
677 }
678 }
679
680 if (conn)
681 spin_unlock_irqrestore(&conn->c_lock, flags);
682
683 while (!list_empty(&list)) {
684 rm = list_entry(list.next, struct rds_message, m_sock_item);
685 list_del_init(&rm->m_sock_item);
686
687 rds_message_wait(rm);
688 rds_message_put(rm);
689 }
690}
691
692/*
693 * we only want this to fire once so we use the callers 'queued'. It's
694 * possible that another thread can race with us and remove the
695 * message from the flow with RDS_CANCEL_SENT_TO.
696 */
697static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
698 struct rds_message *rm, __be16 sport,
699 __be16 dport, int *queued)
700{
701 unsigned long flags;
702 u32 len;
703
704 if (*queued)
705 goto out;
706
707 len = be32_to_cpu(rm->m_inc.i_hdr.h_len);
708
709 /* this is the only place which holds both the socket's rs_lock
710 * and the connection's c_lock */
711 spin_lock_irqsave(&rs->rs_lock, flags);
712
713 /*
714 * If there is a little space in sndbuf, we don't queue anything,
715 * and userspace gets -EAGAIN. But poll() indicates there's send
716 * room. This can lead to bad behavior (spinning) if snd_bytes isn't
717 * freed up by incoming acks. So we check the *old* value of
718 * rs_snd_bytes here to allow the last msg to exceed the buffer,
719 * and poll() now knows no more data can be sent.
720 */
721 if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) {
722 rs->rs_snd_bytes += len;
723
724 /* let recv side know we are close to send space exhaustion.
725 * This is probably not the optimal way to do it, as this
726 * means we set the flag on *all* messages as soon as our
727 * throughput hits a certain threshold.
728 */
729 if (rs->rs_snd_bytes >= rds_sk_sndbuf(rs) / 2)
730 __set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
731
732 list_add_tail(&rm->m_sock_item, &rs->rs_send_queue);
733 set_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
734 rds_message_addref(rm);
735 rm->m_rs = rs;
736
737 /* The code ordering is a little weird, but we're
738 trying to minimize the time we hold c_lock */
739 rds_message_populate_header(&rm->m_inc.i_hdr, sport, dport, 0);
740 rm->m_inc.i_conn = conn;
741 rds_message_addref(rm);
742
743 spin_lock(&conn->c_lock);
744 rm->m_inc.i_hdr.h_sequence = cpu_to_be64(conn->c_next_tx_seq++);
745 list_add_tail(&rm->m_conn_item, &conn->c_send_queue);
746 set_bit(RDS_MSG_ON_CONN, &rm->m_flags);
747 spin_unlock(&conn->c_lock);
748
749 rdsdebug("queued msg %p len %d, rs %p bytes %d seq %llu\n",
750 rm, len, rs, rs->rs_snd_bytes,
751 (unsigned long long)be64_to_cpu(rm->m_inc.i_hdr.h_sequence));
752
753 *queued = 1;
754 }
755
756 spin_unlock_irqrestore(&rs->rs_lock, flags);
757out:
758 return *queued;
759}
760
761static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
762 struct msghdr *msg, int *allocated_mr)
763{
764 struct cmsghdr *cmsg;
765 int ret = 0;
766
767 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
768 if (!CMSG_OK(msg, cmsg))
769 return -EINVAL;
770
771 if (cmsg->cmsg_level != SOL_RDS)
772 continue;
773
774 /* As a side effect, RDMA_DEST and RDMA_MAP will set
775 * rm->m_rdma_cookie and rm->m_rdma_mr.
776 */
777 switch (cmsg->cmsg_type) {
778 case RDS_CMSG_RDMA_ARGS:
779 ret = rds_cmsg_rdma_args(rs, rm, cmsg);
780 break;
781
782 case RDS_CMSG_RDMA_DEST:
783 ret = rds_cmsg_rdma_dest(rs, rm, cmsg);
784 break;
785
786 case RDS_CMSG_RDMA_MAP:
787 ret = rds_cmsg_rdma_map(rs, rm, cmsg);
788 if (!ret)
789 *allocated_mr = 1;
790 break;
791
792 default:
793 return -EINVAL;
794 }
795
796 if (ret)
797 break;
798 }
799
800 return ret;
801}
802
803int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
804 size_t payload_len)
805{
806 struct sock *sk = sock->sk;
807 struct rds_sock *rs = rds_sk_to_rs(sk);
808 struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
809 __be32 daddr;
810 __be16 dport;
811 struct rds_message *rm = NULL;
812 struct rds_connection *conn;
813 int ret = 0;
814 int queued = 0, allocated_mr = 0;
815 int nonblock = msg->msg_flags & MSG_DONTWAIT;
816 long timeo = sock_rcvtimeo(sk, nonblock);
817
818 /* Mirror Linux UDP mirror of BSD error message compatibility */
819 /* XXX: Perhaps MSG_MORE someday */
820 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
821 printk(KERN_INFO "msg_flags 0x%08X\n", msg->msg_flags);
822 ret = -EOPNOTSUPP;
823 goto out;
824 }
825
826 if (msg->msg_namelen) {
827 /* XXX fail non-unicast destination IPs? */
828 if (msg->msg_namelen < sizeof(*usin) || usin->sin_family != AF_INET) {
829 ret = -EINVAL;
830 goto out;
831 }
832 daddr = usin->sin_addr.s_addr;
833 dport = usin->sin_port;
834 } else {
835 /* We only care about consistency with ->connect() */
836 lock_sock(sk);
837 daddr = rs->rs_conn_addr;
838 dport = rs->rs_conn_port;
839 release_sock(sk);
840 }
841
842 /* racing with another thread binding seems ok here */
843 if (daddr == 0 || rs->rs_bound_addr == 0) {
844 ret = -ENOTCONN; /* XXX not a great errno */
845 goto out;
846 }
847
848 rm = rds_message_copy_from_user(msg->msg_iov, payload_len);
849 if (IS_ERR(rm)) {
850 ret = PTR_ERR(rm);
851 rm = NULL;
852 goto out;
853 }
854
855 rm->m_daddr = daddr;
856
857 /* Parse any control messages the user may have included. */
858 ret = rds_cmsg_send(rs, rm, msg, &allocated_mr);
859 if (ret)
860 goto out;
861
862 /* rds_conn_create has a spinlock that runs with IRQ off.
863 * Caching the conn in the socket helps a lot. */
864 if (rs->rs_conn && rs->rs_conn->c_faddr == daddr)
865 conn = rs->rs_conn;
866 else {
867 conn = rds_conn_create_outgoing(rs->rs_bound_addr, daddr,
868 rs->rs_transport,
869 sock->sk->sk_allocation);
870 if (IS_ERR(conn)) {
871 ret = PTR_ERR(conn);
872 goto out;
873 }
874 rs->rs_conn = conn;
875 }
876
877 if ((rm->m_rdma_cookie || rm->m_rdma_op)
878 && conn->c_trans->xmit_rdma == NULL) {
879 if (printk_ratelimit())
880 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
881 rm->m_rdma_op, conn->c_trans->xmit_rdma);
882 ret = -EOPNOTSUPP;
883 goto out;
884 }
885
886 /* If the connection is down, trigger a connect. We may
887 * have scheduled a delayed reconnect however - in this case
888 * we should not interfere.
889 */
890 if (rds_conn_state(conn) == RDS_CONN_DOWN
891 && !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
892 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
893
894 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
895 if (ret)
896 goto out;
897
898 while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port,
899 dport, &queued)) {
900 rds_stats_inc(s_send_queue_full);
901 /* XXX make sure this is reasonable */
902 if (payload_len > rds_sk_sndbuf(rs)) {
903 ret = -EMSGSIZE;
904 goto out;
905 }
906 if (nonblock) {
907 ret = -EAGAIN;
908 goto out;
909 }
910
911 timeo = wait_event_interruptible_timeout(*sk->sk_sleep,
912 rds_send_queue_rm(rs, conn, rm,
913 rs->rs_bound_port,
914 dport,
915 &queued),
916 timeo);
917 rdsdebug("sendmsg woke queued %d timeo %ld\n", queued, timeo);
918 if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT)
919 continue;
920
921 ret = timeo;
922 if (ret == 0)
923 ret = -ETIMEDOUT;
924 goto out;
925 }
926
927 /*
928 * By now we've committed to the send. We reuse rds_send_worker()
929 * to retry sends in the rds thread if the transport asks us to.
930 */
931 rds_stats_inc(s_send_queued);
932
933 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
934 rds_send_worker(&conn->c_send_w.work);
935
936 rds_message_put(rm);
937 return payload_len;
938
939out:
940 /* If the user included a RDMA_MAP cmsg, we allocated a MR on the fly.
941 * If the sendmsg goes through, we keep the MR. If it fails with EAGAIN
942 * or in any other way, we need to destroy the MR again */
943 if (allocated_mr)
944 rds_rdma_unuse(rs, rds_rdma_cookie_key(rm->m_rdma_cookie), 1);
945
946 if (rm)
947 rds_message_put(rm);
948 return ret;
949}
950
951/*
952 * Reply to a ping packet.
953 */
954int
955rds_send_pong(struct rds_connection *conn, __be16 dport)
956{
957 struct rds_message *rm;
958 unsigned long flags;
959 int ret = 0;
960
961 rm = rds_message_alloc(0, GFP_ATOMIC);
962 if (rm == NULL) {
963 ret = -ENOMEM;
964 goto out;
965 }
966
967 rm->m_daddr = conn->c_faddr;
968
969 /* If the connection is down, trigger a connect. We may
970 * have scheduled a delayed reconnect however - in this case
971 * we should not interfere.
972 */
973 if (rds_conn_state(conn) == RDS_CONN_DOWN
974 && !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
975 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
976
977 ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL);
978 if (ret)
979 goto out;
980
981 spin_lock_irqsave(&conn->c_lock, flags);
982 list_add_tail(&rm->m_conn_item, &conn->c_send_queue);
983 set_bit(RDS_MSG_ON_CONN, &rm->m_flags);
984 rds_message_addref(rm);
985 rm->m_inc.i_conn = conn;
986
987 rds_message_populate_header(&rm->m_inc.i_hdr, 0, dport,
988 conn->c_next_tx_seq);
989 conn->c_next_tx_seq++;
990 spin_unlock_irqrestore(&conn->c_lock, flags);
991
992 rds_stats_inc(s_send_queued);
993 rds_stats_inc(s_send_pong);
994
995 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
996 rds_message_put(rm);
997 return 0;
998
999out:
1000 if (rm)
1001 rds_message_put(rm);
1002 return ret;
1003}
diff --git a/net/rds/stats.c b/net/rds/stats.c
new file mode 100644
index 000000000000..637146893cf3
--- /dev/null
+++ b/net/rds/stats.c
@@ -0,0 +1,148 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/percpu.h>
34#include <linux/seq_file.h>
35#include <linux/proc_fs.h>
36
37#include "rds.h"
38
39DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
40
41/* :.,$s/unsigned long\>.*\<s_\(.*\);/"\1",/g */
42
43static char *rds_stat_names[] = {
44 "conn_reset",
45 "recv_drop_bad_checksum",
46 "recv_drop_old_seq",
47 "recv_drop_no_sock",
48 "recv_drop_dead_sock",
49 "recv_deliver_raced",
50 "recv_delivered",
51 "recv_queued",
52 "recv_immediate_retry",
53 "recv_delayed_retry",
54 "recv_ack_required",
55 "recv_rdma_bytes",
56 "recv_ping",
57 "send_queue_empty",
58 "send_queue_full",
59 "send_sem_contention",
60 "send_sem_queue_raced",
61 "send_immediate_retry",
62 "send_delayed_retry",
63 "send_drop_acked",
64 "send_ack_required",
65 "send_queued",
66 "send_rdma",
67 "send_rdma_bytes",
68 "send_pong",
69 "page_remainder_hit",
70 "page_remainder_miss",
71 "copy_to_user",
72 "copy_from_user",
73 "cong_update_queued",
74 "cong_update_received",
75 "cong_send_error",
76 "cong_send_blocked",
77};
78
79void rds_stats_info_copy(struct rds_info_iterator *iter,
80 uint64_t *values, char **names, size_t nr)
81{
82 struct rds_info_counter ctr;
83 size_t i;
84
85 for (i = 0; i < nr; i++) {
86 BUG_ON(strlen(names[i]) >= sizeof(ctr.name));
87 strncpy(ctr.name, names[i], sizeof(ctr.name) - 1);
88 ctr.value = values[i];
89
90 rds_info_copy(iter, &ctr, sizeof(ctr));
91 }
92}
93
94/*
95 * This gives global counters across all the transports. The strings
96 * are copied in so that the tool doesn't need knowledge of the specific
97 * stats that we're exporting. Some are pretty implementation dependent
98 * and may change over time. That doesn't stop them from being useful.
99 *
100 * This is the only function in the chain that knows about the byte granular
101 * length in userspace. It converts it to number of stat entries that the
102 * rest of the functions operate in.
103 */
104static void rds_stats_info(struct socket *sock, unsigned int len,
105 struct rds_info_iterator *iter,
106 struct rds_info_lengths *lens)
107{
108 struct rds_statistics stats = {0, };
109 uint64_t *src;
110 uint64_t *sum;
111 size_t i;
112 int cpu;
113 unsigned int avail;
114
115 avail = len / sizeof(struct rds_info_counter);
116
117 if (avail < ARRAY_SIZE(rds_stat_names)) {
118 avail = 0;
119 goto trans;
120 }
121
122 for_each_online_cpu(cpu) {
123 src = (uint64_t *)&(per_cpu(rds_stats, cpu));
124 sum = (uint64_t *)&stats;
125 for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++)
126 *(sum++) += *(src++);
127 }
128
129 rds_stats_info_copy(iter, (uint64_t *)&stats, rds_stat_names,
130 ARRAY_SIZE(rds_stat_names));
131 avail -= ARRAY_SIZE(rds_stat_names);
132
133trans:
134 lens->each = sizeof(struct rds_info_counter);
135 lens->nr = rds_trans_stats_info_copy(iter, avail) +
136 ARRAY_SIZE(rds_stat_names);
137}
138
139void rds_stats_exit(void)
140{
141 rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info);
142}
143
144int __init rds_stats_init(void)
145{
146 rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info);
147 return 0;
148}
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
new file mode 100644
index 000000000000..307dc5c1be15
--- /dev/null
+++ b/net/rds/sysctl.c
@@ -0,0 +1,122 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/sysctl.h>
35#include <linux/proc_fs.h>
36
37#include "rds.h"
38
39static struct ctl_table_header *rds_sysctl_reg_table;
40
41static unsigned long rds_sysctl_reconnect_min = 1;
42static unsigned long rds_sysctl_reconnect_max = ~0UL;
43
44unsigned long rds_sysctl_reconnect_min_jiffies;
45unsigned long rds_sysctl_reconnect_max_jiffies = HZ;
46
47unsigned int rds_sysctl_max_unacked_packets = 8;
48unsigned int rds_sysctl_max_unacked_bytes = (16 << 20);
49
50unsigned int rds_sysctl_ping_enable = 1;
51
52static ctl_table rds_sysctl_rds_table[] = {
53 {
54 .ctl_name = CTL_UNNUMBERED,
55 .procname = "reconnect_min_delay_ms",
56 .data = &rds_sysctl_reconnect_min_jiffies,
57 .maxlen = sizeof(unsigned long),
58 .mode = 0644,
59 .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
60 .extra1 = &rds_sysctl_reconnect_min,
61 .extra2 = &rds_sysctl_reconnect_max_jiffies,
62 },
63 {
64 .ctl_name = CTL_UNNUMBERED,
65 .procname = "reconnect_max_delay_ms",
66 .data = &rds_sysctl_reconnect_max_jiffies,
67 .maxlen = sizeof(unsigned long),
68 .mode = 0644,
69 .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
70 .extra1 = &rds_sysctl_reconnect_min_jiffies,
71 .extra2 = &rds_sysctl_reconnect_max,
72 },
73 {
74 .ctl_name = CTL_UNNUMBERED,
75 .procname = "max_unacked_packets",
76 .data = &rds_sysctl_max_unacked_packets,
77 .maxlen = sizeof(unsigned long),
78 .mode = 0644,
79 .proc_handler = &proc_dointvec,
80 },
81 {
82 .ctl_name = CTL_UNNUMBERED,
83 .procname = "max_unacked_bytes",
84 .data = &rds_sysctl_max_unacked_bytes,
85 .maxlen = sizeof(unsigned long),
86 .mode = 0644,
87 .proc_handler = &proc_dointvec,
88 },
89 {
90 .ctl_name = CTL_UNNUMBERED,
91 .procname = "ping_enable",
92 .data = &rds_sysctl_ping_enable,
93 .maxlen = sizeof(int),
94 .mode = 0644,
95 .proc_handler = &proc_dointvec,
96 },
97 { .ctl_name = 0}
98};
99
100static struct ctl_path rds_sysctl_path[] = {
101 { .procname = "net", .ctl_name = CTL_NET, },
102 { .procname = "rds", .ctl_name = CTL_UNNUMBERED, },
103 { }
104};
105
106
107void rds_sysctl_exit(void)
108{
109 if (rds_sysctl_reg_table)
110 unregister_sysctl_table(rds_sysctl_reg_table);
111}
112
113int __init rds_sysctl_init(void)
114{
115 rds_sysctl_reconnect_min = msecs_to_jiffies(1);
116 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min;
117
118 rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table);
119 if (rds_sysctl_reg_table == NULL)
120 return -ENOMEM;
121 return 0;
122}
diff --git a/net/rds/threads.c b/net/rds/threads.c
new file mode 100644
index 000000000000..828a1bf9ea92
--- /dev/null
+++ b/net/rds/threads.c
@@ -0,0 +1,265 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/random.h>
35
36#include "rds.h"
37
38/*
39 * All of connection management is simplified by serializing it through
40 * work queues that execute in a connection managing thread.
41 *
42 * TCP wants to send acks through sendpage() in response to data_ready(),
43 * but it needs a process context to do so.
44 *
45 * The receive paths need to allocate but can't drop packets (!) so we have
46 * a thread around to block allocating if the receive fast path sees an
47 * allocation failure.
48 */
49
50/* Grand Unified Theory of connection life cycle:
51 * At any point in time, the connection can be in one of these states:
52 * DOWN, CONNECTING, UP, DISCONNECTING, ERROR
53 *
54 * The following transitions are possible:
55 * ANY -> ERROR
56 * UP -> DISCONNECTING
57 * ERROR -> DISCONNECTING
58 * DISCONNECTING -> DOWN
59 * DOWN -> CONNECTING
60 * CONNECTING -> UP
61 *
62 * Transition to state DISCONNECTING/DOWN:
63 * - Inside the shutdown worker; synchronizes with xmit path
64 * through c_send_lock, and with connection management callbacks
65 * via c_cm_lock.
66 *
67 * For receive callbacks, we rely on the underlying transport
68 * (TCP, IB/RDMA) to provide the necessary synchronisation.
69 */
70struct workqueue_struct *rds_wq;
71
72void rds_connect_complete(struct rds_connection *conn)
73{
74 if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) {
75 printk(KERN_WARNING "%s: Cannot transition to state UP, "
76 "current state is %d\n",
77 __func__,
78 atomic_read(&conn->c_state));
79 atomic_set(&conn->c_state, RDS_CONN_ERROR);
80 queue_work(rds_wq, &conn->c_down_w);
81 return;
82 }
83
84 rdsdebug("conn %p for %pI4 to %pI4 complete\n",
85 conn, &conn->c_laddr, &conn->c_faddr);
86
87 conn->c_reconnect_jiffies = 0;
88 set_bit(0, &conn->c_map_queued);
89 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
90 queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
91}
92
93/*
94 * This random exponential backoff is relied on to eventually resolve racing
95 * connects.
96 *
97 * If connect attempts race then both parties drop both connections and come
98 * here to wait for a random amount of time before trying again. Eventually
99 * the backoff range will be so much greater than the time it takes to
100 * establish a connection that one of the pair will establish the connection
101 * before the other's random delay fires.
102 *
103 * Connection attempts that arrive while a connection is already established
104 * are also considered to be racing connects. This lets a connection from
105 * a rebooted machine replace an existing stale connection before the transport
106 * notices that the connection has failed.
107 *
108 * We should *always* start with a random backoff; otherwise a broken connection
109 * will always take several iterations to be re-established.
110 */
111static void rds_queue_reconnect(struct rds_connection *conn)
112{
113 unsigned long rand;
114
115 rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n",
116 conn, &conn->c_laddr, &conn->c_faddr,
117 conn->c_reconnect_jiffies);
118
119 set_bit(RDS_RECONNECT_PENDING, &conn->c_flags);
120 if (conn->c_reconnect_jiffies == 0) {
121 conn->c_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
122 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
123 return;
124 }
125
126 get_random_bytes(&rand, sizeof(rand));
127 rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",
128 rand % conn->c_reconnect_jiffies, conn->c_reconnect_jiffies,
129 conn, &conn->c_laddr, &conn->c_faddr);
130 queue_delayed_work(rds_wq, &conn->c_conn_w,
131 rand % conn->c_reconnect_jiffies);
132
133 conn->c_reconnect_jiffies = min(conn->c_reconnect_jiffies * 2,
134 rds_sysctl_reconnect_max_jiffies);
135}
136
137void rds_connect_worker(struct work_struct *work)
138{
139 struct rds_connection *conn = container_of(work, struct rds_connection, c_conn_w.work);
140 int ret;
141
142 clear_bit(RDS_RECONNECT_PENDING, &conn->c_flags);
143 if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
144 ret = conn->c_trans->conn_connect(conn);
145 rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n",
146 conn, &conn->c_laddr, &conn->c_faddr, ret);
147
148 if (ret) {
149 if (rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_DOWN))
150 rds_queue_reconnect(conn);
151 else
152 rds_conn_error(conn, "RDS: connect failed\n");
153 }
154 }
155}
156
157void rds_shutdown_worker(struct work_struct *work)
158{
159 struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
160
161 /* shut it down unless it's down already */
162 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
163 /*
164 * Quiesce the connection mgmt handlers before we start tearing
165 * things down. We don't hold the mutex for the entire
166 * duration of the shutdown operation, else we may be
167 * deadlocking with the CM handler. Instead, the CM event
168 * handler is supposed to check for state DISCONNECTING
169 */
170 mutex_lock(&conn->c_cm_lock);
171 if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
172 && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
173 rds_conn_error(conn, "shutdown called in state %d\n",
174 atomic_read(&conn->c_state));
175 mutex_unlock(&conn->c_cm_lock);
176 return;
177 }
178 mutex_unlock(&conn->c_cm_lock);
179
180 mutex_lock(&conn->c_send_lock);
181 conn->c_trans->conn_shutdown(conn);
182 rds_conn_reset(conn);
183 mutex_unlock(&conn->c_send_lock);
184
185 if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
186 /* This can happen - eg when we're in the middle of tearing
187 * down the connection, and someone unloads the rds module.
188 * Quite reproduceable with loopback connections.
189 * Mostly harmless.
190 */
191 rds_conn_error(conn,
192 "%s: failed to transition to state DOWN, "
193 "current state is %d\n",
194 __func__,
195 atomic_read(&conn->c_state));
196 return;
197 }
198 }
199
200 /* Then reconnect if it's still live.
201 * The passive side of an IB loopback connection is never added
202 * to the conn hash, so we never trigger a reconnect on this
203 * conn - the reconnect is always triggered by the active peer. */
204 cancel_delayed_work(&conn->c_conn_w);
205 if (!hlist_unhashed(&conn->c_hash_node))
206 rds_queue_reconnect(conn);
207}
208
209void rds_send_worker(struct work_struct *work)
210{
211 struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work);
212 int ret;
213
214 if (rds_conn_state(conn) == RDS_CONN_UP) {
215 ret = rds_send_xmit(conn);
216 rdsdebug("conn %p ret %d\n", conn, ret);
217 switch (ret) {
218 case -EAGAIN:
219 rds_stats_inc(s_send_immediate_retry);
220 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
221 break;
222 case -ENOMEM:
223 rds_stats_inc(s_send_delayed_retry);
224 queue_delayed_work(rds_wq, &conn->c_send_w, 2);
225 default:
226 break;
227 }
228 }
229}
230
231void rds_recv_worker(struct work_struct *work)
232{
233 struct rds_connection *conn = container_of(work, struct rds_connection, c_recv_w.work);
234 int ret;
235
236 if (rds_conn_state(conn) == RDS_CONN_UP) {
237 ret = conn->c_trans->recv(conn);
238 rdsdebug("conn %p ret %d\n", conn, ret);
239 switch (ret) {
240 case -EAGAIN:
241 rds_stats_inc(s_recv_immediate_retry);
242 queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
243 break;
244 case -ENOMEM:
245 rds_stats_inc(s_recv_delayed_retry);
246 queue_delayed_work(rds_wq, &conn->c_recv_w, 2);
247 default:
248 break;
249 }
250 }
251}
252
253void rds_threads_exit(void)
254{
255 destroy_workqueue(rds_wq);
256}
257
258int __init rds_threads_init(void)
259{
260 rds_wq = create_singlethread_workqueue("krdsd");
261 if (rds_wq == NULL)
262 return -ENOMEM;
263
264 return 0;
265}
diff --git a/net/rds/transport.c b/net/rds/transport.c
new file mode 100644
index 000000000000..767da61ad2f3
--- /dev/null
+++ b/net/rds/transport.c
@@ -0,0 +1,117 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/module.h>
35#include <linux/in.h>
36
37#include "rds.h"
38#include "loop.h"
39
40static LIST_HEAD(rds_transports);
41static DECLARE_RWSEM(rds_trans_sem);
42
43int rds_trans_register(struct rds_transport *trans)
44{
45 BUG_ON(strlen(trans->t_name) + 1 > TRANSNAMSIZ);
46
47 down_write(&rds_trans_sem);
48
49 list_add_tail(&trans->t_item, &rds_transports);
50 printk(KERN_INFO "Registered RDS/%s transport\n", trans->t_name);
51
52 up_write(&rds_trans_sem);
53
54 return 0;
55}
56
57void rds_trans_unregister(struct rds_transport *trans)
58{
59 down_write(&rds_trans_sem);
60
61 list_del_init(&trans->t_item);
62 printk(KERN_INFO "Unregistered RDS/%s transport\n", trans->t_name);
63
64 up_write(&rds_trans_sem);
65}
66
67struct rds_transport *rds_trans_get_preferred(__be32 addr)
68{
69 struct rds_transport *trans;
70 struct rds_transport *ret = NULL;
71
72 if (IN_LOOPBACK(ntohl(addr)))
73 return &rds_loop_transport;
74
75 down_read(&rds_trans_sem);
76 list_for_each_entry(trans, &rds_transports, t_item) {
77 if (trans->laddr_check(addr) == 0) {
78 ret = trans;
79 break;
80 }
81 }
82 up_read(&rds_trans_sem);
83
84 return ret;
85}
86
87/*
88 * This returns the number of stats entries in the snapshot and only
89 * copies them using the iter if there is enough space for them. The
90 * caller passes in the global stats so that we can size and copy while
91 * holding the lock.
92 */
93unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
94 unsigned int avail)
95
96{
97 struct rds_transport *trans;
98 unsigned int total = 0;
99 unsigned int part;
100
101 rds_info_iter_unmap(iter);
102 down_read(&rds_trans_sem);
103
104 list_for_each_entry(trans, &rds_transports, t_item) {
105 if (trans->stats_info_copy == NULL)
106 continue;
107
108 part = trans->stats_info_copy(iter, avail);
109 avail -= min(avail, part);
110 total += part;
111 }
112
113 up_read(&rds_trans_sem);
114
115 return total;
116}
117
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 01392649b462..650139626581 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1587,8 +1587,7 @@ static int __init rose_proto_init(void)
1587 char name[IFNAMSIZ]; 1587 char name[IFNAMSIZ];
1588 1588
1589 sprintf(name, "rose%d", i); 1589 sprintf(name, "rose%d", i);
1590 dev = alloc_netdev(sizeof(struct net_device_stats), 1590 dev = alloc_netdev(0, name, rose_setup);
1591 name, rose_setup);
1592 if (!dev) { 1591 if (!dev) {
1593 printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n"); 1592 printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n");
1594 rc = -ENOMEM; 1593 rc = -ENOMEM;
diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
index 12cfcf09556b..7dcf2569613b 100644
--- a/net/rose/rose_dev.c
+++ b/net/rose/rose_dev.c
@@ -57,7 +57,7 @@ static int rose_rebuild_header(struct sk_buff *skb)
57{ 57{
58#ifdef CONFIG_INET 58#ifdef CONFIG_INET
59 struct net_device *dev = skb->dev; 59 struct net_device *dev = skb->dev;
60 struct net_device_stats *stats = netdev_priv(dev); 60 struct net_device_stats *stats = &dev->stats;
61 unsigned char *bp = (unsigned char *)skb->data; 61 unsigned char *bp = (unsigned char *)skb->data;
62 struct sk_buff *skbn; 62 struct sk_buff *skbn;
63 unsigned int len; 63 unsigned int len;
@@ -133,7 +133,7 @@ static int rose_close(struct net_device *dev)
133 133
134static int rose_xmit(struct sk_buff *skb, struct net_device *dev) 134static int rose_xmit(struct sk_buff *skb, struct net_device *dev)
135{ 135{
136 struct net_device_stats *stats = netdev_priv(dev); 136 struct net_device_stats *stats = &dev->stats;
137 137
138 if (!netif_running(dev)) { 138 if (!netif_running(dev)) {
139 printk(KERN_ERR "ROSE: rose_xmit - called when iface is down\n"); 139 printk(KERN_ERR "ROSE: rose_xmit - called when iface is down\n");
@@ -144,30 +144,28 @@ static int rose_xmit(struct sk_buff *skb, struct net_device *dev)
144 return 0; 144 return 0;
145} 145}
146 146
147static struct net_device_stats *rose_get_stats(struct net_device *dev)
148{
149 return netdev_priv(dev);
150}
151
152static const struct header_ops rose_header_ops = { 147static const struct header_ops rose_header_ops = {
153 .create = rose_header, 148 .create = rose_header,
154 .rebuild= rose_rebuild_header, 149 .rebuild= rose_rebuild_header,
155}; 150};
156 151
152static const struct net_device_ops rose_netdev_ops = {
153 .ndo_open = rose_open,
154 .ndo_stop = rose_close,
155 .ndo_start_xmit = rose_xmit,
156 .ndo_set_mac_address = rose_set_mac_address,
157};
158
157void rose_setup(struct net_device *dev) 159void rose_setup(struct net_device *dev)
158{ 160{
159 dev->mtu = ROSE_MAX_PACKET_SIZE - 2; 161 dev->mtu = ROSE_MAX_PACKET_SIZE - 2;
160 dev->hard_start_xmit = rose_xmit; 162 dev->netdev_ops = &rose_netdev_ops;
161 dev->open = rose_open;
162 dev->stop = rose_close;
163 163
164 dev->header_ops = &rose_header_ops; 164 dev->header_ops = &rose_header_ops;
165 dev->hard_header_len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN; 165 dev->hard_header_len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN;
166 dev->addr_len = ROSE_ADDR_LEN; 166 dev->addr_len = ROSE_ADDR_LEN;
167 dev->type = ARPHRD_ROSE; 167 dev->type = ARPHRD_ROSE;
168 dev->set_mac_address = rose_set_mac_address;
169 168
170 /* New-style flags. */ 169 /* New-style flags. */
171 dev->flags = IFF_NOARP; 170 dev->flags = IFF_NOARP;
172 dev->get_stats = rose_get_stats;
173} 171}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 0fc4a18fd96f..32009793307b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -444,6 +444,17 @@ out:
444} 444}
445EXPORT_SYMBOL(qdisc_calculate_pkt_len); 445EXPORT_SYMBOL(qdisc_calculate_pkt_len);
446 446
447void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
448{
449 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
450 printk(KERN_WARNING
451 "%s: %s qdisc %X: is non-work-conserving?\n",
452 txt, qdisc->ops->id, qdisc->handle >> 16);
453 qdisc->flags |= TCQ_F_WARN_NONWC;
454 }
455}
456EXPORT_SYMBOL(qdisc_warn_nonwc);
457
447static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) 458static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
448{ 459{
449 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 460 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 45c31b1a4e1d..74226b265528 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -887,8 +887,7 @@ qdisc_peek_len(struct Qdisc *sch)
887 887
888 skb = sch->ops->peek(sch); 888 skb = sch->ops->peek(sch);
889 if (skb == NULL) { 889 if (skb == NULL) {
890 if (net_ratelimit()) 890 qdisc_warn_nonwc("qdisc_peek_len", sch);
891 printk("qdisc_peek_len: non work-conserving qdisc ?\n");
892 return 0; 891 return 0;
893 } 892 }
894 len = qdisc_pkt_len(skb); 893 len = qdisc_pkt_len(skb);
@@ -1642,8 +1641,7 @@ hfsc_dequeue(struct Qdisc *sch)
1642 1641
1643 skb = qdisc_dequeue_peeked(cl->qdisc); 1642 skb = qdisc_dequeue_peeked(cl->qdisc);
1644 if (skb == NULL) { 1643 if (skb == NULL) {
1645 if (net_ratelimit()) 1644 qdisc_warn_nonwc("HFSC", cl->qdisc);
1646 printk("HFSC: Non-work-conserving qdisc ?\n");
1647 return NULL; 1645 return NULL;
1648 } 1646 }
1649 1647
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 2f0f0b04d3fb..355974f610c5 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -35,6 +35,7 @@
35#include <linux/list.h> 35#include <linux/list.h>
36#include <linux/compiler.h> 36#include <linux/compiler.h>
37#include <linux/rbtree.h> 37#include <linux/rbtree.h>
38#include <linux/workqueue.h>
38#include <net/netlink.h> 39#include <net/netlink.h>
39#include <net/pkt_sched.h> 40#include <net/pkt_sched.h>
40 41
@@ -114,8 +115,6 @@ struct htb_class {
114 struct tcf_proto *filter_list; 115 struct tcf_proto *filter_list;
115 int filter_cnt; 116 int filter_cnt;
116 117
117 int warned; /* only one warning about non work conserving .. */
118
119 /* token bucket parameters */ 118 /* token bucket parameters */
120 struct qdisc_rate_table *rate; /* rate table of the class itself */ 119 struct qdisc_rate_table *rate; /* rate table of the class itself */
121 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ 120 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
@@ -155,6 +154,10 @@ struct htb_sched {
155 int direct_qlen; /* max qlen of above */ 154 int direct_qlen; /* max qlen of above */
156 155
157 long direct_pkts; 156 long direct_pkts;
157
158#define HTB_WARN_TOOMANYEVENTS 0x1
159 unsigned int warned; /* only one warning */
160 struct work_struct work;
158}; 161};
159 162
160/* find class in global hash table using given handle */ 163/* find class in global hash table using given handle */
@@ -658,7 +661,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
658 * htb_do_events - make mode changes to classes at the level 661 * htb_do_events - make mode changes to classes at the level
659 * 662 *
660 * Scans event queue for pending events and applies them. Returns time of 663 * Scans event queue for pending events and applies them. Returns time of
661 * next pending event (0 for no event in pq). 664 * next pending event (0 for no event in pq, q->now for too many events).
662 * Note: Applied are events whose have cl->pq_key <= q->now. 665 * Note: Applied are events whose have cl->pq_key <= q->now.
663 */ 666 */
664static psched_time_t htb_do_events(struct htb_sched *q, int level, 667static psched_time_t htb_do_events(struct htb_sched *q, int level,
@@ -686,8 +689,14 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
686 if (cl->cmode != HTB_CAN_SEND) 689 if (cl->cmode != HTB_CAN_SEND)
687 htb_add_to_wait_tree(q, cl, diff); 690 htb_add_to_wait_tree(q, cl, diff);
688 } 691 }
689 /* too much load - let's continue on next jiffie (including above) */ 692
690 return q->now + 2 * PSCHED_TICKS_PER_SEC / HZ; 693 /* too much load - let's continue after a break for scheduling */
694 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
695 printk(KERN_WARNING "htb: too many events!\n");
696 q->warned |= HTB_WARN_TOOMANYEVENTS;
697 }
698
699 return q->now;
691} 700}
692 701
693/* Returns class->node+prio from id-tree where classe's id is >= id. NULL 702/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
@@ -809,13 +818,8 @@ next:
809 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q); 818 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
810 if (likely(skb != NULL)) 819 if (likely(skb != NULL))
811 break; 820 break;
812 if (!cl->warned) {
813 printk(KERN_WARNING
814 "htb: class %X isn't work conserving ?!\n",
815 cl->common.classid);
816 cl->warned = 1;
817 }
818 821
822 qdisc_warn_nonwc("htb", cl->un.leaf.q);
819 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> 823 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
820 ptr[0]) + prio); 824 ptr[0]) + prio);
821 cl = htb_lookup_leaf(q->row[level] + prio, prio, 825 cl = htb_lookup_leaf(q->row[level] + prio, prio,
@@ -892,7 +896,10 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
892 } 896 }
893 } 897 }
894 sch->qstats.overlimits++; 898 sch->qstats.overlimits++;
895 qdisc_watchdog_schedule(&q->watchdog, next_event); 899 if (likely(next_event > q->now))
900 qdisc_watchdog_schedule(&q->watchdog, next_event);
901 else
902 schedule_work(&q->work);
896fin: 903fin:
897 return skb; 904 return skb;
898} 905}
@@ -962,6 +969,14 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
962 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 969 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
963}; 970};
964 971
972static void htb_work_func(struct work_struct *work)
973{
974 struct htb_sched *q = container_of(work, struct htb_sched, work);
975 struct Qdisc *sch = q->watchdog.qdisc;
976
977 __netif_schedule(qdisc_root(sch));
978}
979
965static int htb_init(struct Qdisc *sch, struct nlattr *opt) 980static int htb_init(struct Qdisc *sch, struct nlattr *opt)
966{ 981{
967 struct htb_sched *q = qdisc_priv(sch); 982 struct htb_sched *q = qdisc_priv(sch);
@@ -996,6 +1011,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
996 INIT_LIST_HEAD(q->drops + i); 1011 INIT_LIST_HEAD(q->drops + i);
997 1012
998 qdisc_watchdog_init(&q->watchdog, sch); 1013 qdisc_watchdog_init(&q->watchdog, sch);
1014 INIT_WORK(&q->work, htb_work_func);
999 skb_queue_head_init(&q->direct_queue); 1015 skb_queue_head_init(&q->direct_queue);
1000 1016
1001 q->direct_qlen = qdisc_dev(sch)->tx_queue_len; 1017 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
@@ -1188,7 +1204,6 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1188 kfree(cl); 1204 kfree(cl);
1189} 1205}
1190 1206
1191/* always caled under BH & queue lock */
1192static void htb_destroy(struct Qdisc *sch) 1207static void htb_destroy(struct Qdisc *sch)
1193{ 1208{
1194 struct htb_sched *q = qdisc_priv(sch); 1209 struct htb_sched *q = qdisc_priv(sch);
@@ -1196,6 +1211,7 @@ static void htb_destroy(struct Qdisc *sch)
1196 struct htb_class *cl; 1211 struct htb_class *cl;
1197 unsigned int i; 1212 unsigned int i;
1198 1213
1214 cancel_work_sync(&q->work);
1199 qdisc_watchdog_cancel(&q->watchdog); 1215 qdisc_watchdog_cancel(&q->watchdog);
1200 /* This line used to be after htb_destroy_class call below 1216 /* This line used to be after htb_destroy_class call below
1201 and surprisingly it worked in 2.4. But it must precede it 1217 and surprisingly it worked in 2.4. But it must precede it
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 7e151861794b..912731203047 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -202,7 +202,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
202 int i; 202 int i;
203 203
204 if (!netif_is_multiqueue(qdisc_dev(sch))) 204 if (!netif_is_multiqueue(qdisc_dev(sch)))
205 return -EINVAL; 205 return -EOPNOTSUPP;
206 if (nla_len(opt) < sizeof(*qopt)) 206 if (nla_len(opt) < sizeof(*qopt))
207 return -EINVAL; 207 return -EINVAL;
208 208
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 67715f4eb849..7ff548a30cfb 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -86,6 +86,9 @@ const char *sctp_cname(const sctp_subtype_t cid)
86 case SCTP_CID_FWD_TSN: 86 case SCTP_CID_FWD_TSN:
87 return "FWD_TSN"; 87 return "FWD_TSN";
88 88
89 case SCTP_CID_AUTH:
90 return "AUTH";
91
89 default: 92 default:
90 break; 93 break;
91 } 94 }
@@ -135,6 +138,7 @@ static const char *sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = {
135 "PRIMITIVE_ABORT", 138 "PRIMITIVE_ABORT",
136 "PRIMITIVE_SEND", 139 "PRIMITIVE_SEND",
137 "PRIMITIVE_REQUESTHEARTBEAT", 140 "PRIMITIVE_REQUESTHEARTBEAT",
141 "PRIMITIVE_ASCONF",
138}; 142};
139 143
140/* Lookup primitive debug name. */ 144/* Lookup primitive debug name. */
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 2e4a8646dbc3..d2e98803ffe3 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -83,14 +83,15 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb)
83{ 83{
84 struct sk_buff *list = skb_shinfo(skb)->frag_list; 84 struct sk_buff *list = skb_shinfo(skb)->frag_list;
85 struct sctphdr *sh = sctp_hdr(skb); 85 struct sctphdr *sh = sctp_hdr(skb);
86 __be32 cmp = sh->checksum; 86 __le32 cmp = sh->checksum;
87 __be32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb)); 87 __le32 val;
88 __u32 tmp = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
88 89
89 for (; list; list = list->next) 90 for (; list; list = list->next)
90 val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list), 91 tmp = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
91 val); 92 tmp);
92 93
93 val = sctp_end_cksum(val); 94 val = sctp_end_cksum(tmp);
94 95
95 if (val != cmp) { 96 if (val != cmp) {
96 /* CRC failure, dump it. */ 97 /* CRC failure, dump it. */
@@ -142,7 +143,8 @@ int sctp_rcv(struct sk_buff *skb)
142 __skb_pull(skb, skb_transport_offset(skb)); 143 __skb_pull(skb, skb_transport_offset(skb));
143 if (skb->len < sizeof(struct sctphdr)) 144 if (skb->len < sizeof(struct sctphdr))
144 goto discard_it; 145 goto discard_it;
145 if (!skb_csum_unnecessary(skb) && sctp_rcv_checksum(skb) < 0) 146 if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) &&
147 sctp_rcv_checksum(skb) < 0)
146 goto discard_it; 148 goto discard_it;
147 149
148 skb_pull(skb, sizeof(struct sctphdr)); 150 skb_pull(skb, sizeof(struct sctphdr));
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ceaa4aa066ea..a63de3f7f185 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -97,8 +97,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
97 if (addr) { 97 if (addr) {
98 addr->a.v6.sin6_family = AF_INET6; 98 addr->a.v6.sin6_family = AF_INET6;
99 addr->a.v6.sin6_port = 0; 99 addr->a.v6.sin6_port = 0;
100 memcpy(&addr->a.v6.sin6_addr, &ifa->addr, 100 ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifa->addr);
101 sizeof(struct in6_addr));
102 addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; 101 addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
103 addr->valid = 1; 102 addr->valid = 1;
104 spin_lock_bh(&sctp_local_addr_lock); 103 spin_lock_bh(&sctp_local_addr_lock);
@@ -628,9 +627,7 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
628static struct sock *sctp_v6_create_accept_sk(struct sock *sk, 627static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
629 struct sctp_association *asoc) 628 struct sctp_association *asoc)
630{ 629{
631 struct inet_sock *inet = inet_sk(sk);
632 struct sock *newsk; 630 struct sock *newsk;
633 struct inet_sock *newinet;
634 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 631 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
635 struct sctp6_sock *newsctp6sk; 632 struct sctp6_sock *newsctp6sk;
636 633
@@ -640,17 +637,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
640 637
641 sock_init_data(NULL, newsk); 638 sock_init_data(NULL, newsk);
642 639
643 newsk->sk_type = SOCK_STREAM; 640 sctp_copy_sock(newsk, sk, asoc);
644
645 newsk->sk_prot = sk->sk_prot;
646 newsk->sk_no_check = sk->sk_no_check;
647 newsk->sk_reuse = sk->sk_reuse;
648
649 newsk->sk_destruct = inet_sock_destruct;
650 newsk->sk_family = PF_INET6;
651 newsk->sk_protocol = IPPROTO_SCTP;
652 newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
653 newsk->sk_shutdown = sk->sk_shutdown;
654 sock_reset_flag(sk, SOCK_ZAPPED); 641 sock_reset_flag(sk, SOCK_ZAPPED);
655 642
656 newsctp6sk = (struct sctp6_sock *)newsk; 643 newsctp6sk = (struct sctp6_sock *)newsk;
@@ -658,7 +645,6 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
658 645
659 sctp_sk(newsk)->v4mapped = sctp_sk(sk)->v4mapped; 646 sctp_sk(newsk)->v4mapped = sctp_sk(sk)->v4mapped;
660 647
661 newinet = inet_sk(newsk);
662 newnp = inet6_sk(newsk); 648 newnp = inet6_sk(newsk);
663 649
664 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 650 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
@@ -666,26 +652,8 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
666 /* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname() 652 /* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname()
667 * and getpeername(). 653 * and getpeername().
668 */ 654 */
669 newinet->sport = inet->sport;
670 newnp->saddr = np->saddr;
671 newnp->rcv_saddr = np->rcv_saddr;
672 newinet->dport = htons(asoc->peer.port);
673 sctp_v6_to_sk_daddr(&asoc->peer.primary_addr, newsk); 655 sctp_v6_to_sk_daddr(&asoc->peer.primary_addr, newsk);
674 656
675 /* Init the ipv4 part of the socket since we can have sockets
676 * using v6 API for ipv4.
677 */
678 newinet->uc_ttl = -1;
679 newinet->mc_loop = 1;
680 newinet->mc_ttl = 1;
681 newinet->mc_index = 0;
682 newinet->mc_list = NULL;
683
684 if (ipv4_config.no_pmtu_disc)
685 newinet->pmtudisc = IP_PMTUDISC_DONT;
686 else
687 newinet->pmtudisc = IP_PMTUDISC_WANT;
688
689 sk_refcnt_debug_inc(newsk); 657 sk_refcnt_debug_inc(newsk);
690 658
691 if (newsk->sk_prot->init(newsk)) { 659 if (newsk->sk_prot->init(newsk)) {
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 73639355157e..07d58903a746 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -367,7 +367,6 @@ int sctp_packet_transmit(struct sctp_packet *packet)
367 struct sctp_transport *tp = packet->transport; 367 struct sctp_transport *tp = packet->transport;
368 struct sctp_association *asoc = tp->asoc; 368 struct sctp_association *asoc = tp->asoc;
369 struct sctphdr *sh; 369 struct sctphdr *sh;
370 __be32 crc32 = __constant_cpu_to_be32(0);
371 struct sk_buff *nskb; 370 struct sk_buff *nskb;
372 struct sctp_chunk *chunk, *tmp; 371 struct sctp_chunk *chunk, *tmp;
373 struct sock *sk; 372 struct sock *sk;
@@ -531,17 +530,16 @@ int sctp_packet_transmit(struct sctp_packet *packet)
531 * Note: Adler-32 is no longer applicable, as has been replaced 530 * Note: Adler-32 is no longer applicable, as has been replaced
532 * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. 531 * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
533 */ 532 */
534 if (!(dst->dev->features & NETIF_F_NO_CSUM)) { 533 if (!sctp_checksum_disable && !(dst->dev->features & NETIF_F_NO_CSUM)) {
535 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); 534 __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
536 crc32 = sctp_end_cksum(crc32); 535
536 /* 3) Put the resultant value into the checksum field in the
537 * common header, and leave the rest of the bits unchanged.
538 */
539 sh->checksum = sctp_end_cksum(crc32);
537 } else 540 } else
538 nskb->ip_summed = CHECKSUM_UNNECESSARY; 541 nskb->ip_summed = CHECKSUM_UNNECESSARY;
539 542
540 /* 3) Put the resultant value into the checksum field in the
541 * common header, and leave the rest of the bits unchanged.
542 */
543 sh->checksum = crc32;
544
545 /* IP layer ECN support 543 /* IP layer ECN support
546 * From RFC 2481 544 * From RFC 2481
547 * "The ECN-Capable Transport (ECT) bit would be set by the 545 * "The ECN-Capable Transport (ECT) bit would be set by the
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index bc411c896216..a367d15a21aa 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -428,7 +428,8 @@ void sctp_retransmit_mark(struct sctp_outq *q,
428 * retransmitting due to T3 timeout. 428 * retransmitting due to T3 timeout.
429 */ 429 */
430 if (reason == SCTP_RTXR_T3_RTX && 430 if (reason == SCTP_RTXR_T3_RTX &&
431 (jiffies - chunk->sent_at) < transport->last_rto) 431 time_before(jiffies, chunk->sent_at +
432 transport->last_rto))
432 continue; 433 continue;
433 434
434 /* RFC 2960 6.2.1 Processing a Received SACK 435 /* RFC 2960 6.2.1 Processing a Received SACK
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index c4986d0f7419..cb198af8887c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -589,46 +589,21 @@ static int sctp_v4_is_ce(const struct sk_buff *skb)
589static struct sock *sctp_v4_create_accept_sk(struct sock *sk, 589static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
590 struct sctp_association *asoc) 590 struct sctp_association *asoc)
591{ 591{
592 struct inet_sock *inet = inet_sk(sk);
593 struct inet_sock *newinet;
594 struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, 592 struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL,
595 sk->sk_prot); 593 sk->sk_prot);
594 struct inet_sock *newinet;
596 595
597 if (!newsk) 596 if (!newsk)
598 goto out; 597 goto out;
599 598
600 sock_init_data(NULL, newsk); 599 sock_init_data(NULL, newsk);
601 600
602 newsk->sk_type = SOCK_STREAM; 601 sctp_copy_sock(newsk, sk, asoc);
603
604 newsk->sk_no_check = sk->sk_no_check;
605 newsk->sk_reuse = sk->sk_reuse;
606 newsk->sk_shutdown = sk->sk_shutdown;
607
608 newsk->sk_destruct = inet_sock_destruct;
609 newsk->sk_family = PF_INET;
610 newsk->sk_protocol = IPPROTO_SCTP;
611 newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
612 sock_reset_flag(newsk, SOCK_ZAPPED); 602 sock_reset_flag(newsk, SOCK_ZAPPED);
613 603
614 newinet = inet_sk(newsk); 604 newinet = inet_sk(newsk);
615 605
616 /* Initialize sk's sport, dport, rcv_saddr and daddr for
617 * getsockname() and getpeername()
618 */
619 newinet->sport = inet->sport;
620 newinet->saddr = inet->saddr;
621 newinet->rcv_saddr = inet->rcv_saddr;
622 newinet->dport = htons(asoc->peer.port);
623 newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; 606 newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr;
624 newinet->pmtudisc = inet->pmtudisc;
625 newinet->id = asoc->next_tsn ^ jiffies;
626
627 newinet->uc_ttl = -1;
628 newinet->mc_loop = 1;
629 newinet->mc_ttl = 1;
630 newinet->mc_index = 0;
631 newinet->mc_list = NULL;
632 607
633 sk_refcnt_debug_inc(newsk); 608 sk_refcnt_debug_inc(newsk);
634 609
@@ -1413,4 +1388,6 @@ MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132");
1413MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132"); 1388MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132");
1414MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>"); 1389MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>");
1415MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)"); 1390MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)");
1391module_param_named(no_checksums, sctp_checksum_disable, bool, 0644);
1392MODULE_PARM_DESC(no_checksums, "Disable checksums computing and verification");
1416MODULE_LICENSE("GPL"); 1393MODULE_LICENSE("GPL");
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index fd8acb48c3f2..b40e95f9851b 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -100,11 +100,11 @@ int sctp_chunk_iif(const struct sctp_chunk *chunk)
100 */ 100 */
101static const struct sctp_paramhdr ecap_param = { 101static const struct sctp_paramhdr ecap_param = {
102 SCTP_PARAM_ECN_CAPABLE, 102 SCTP_PARAM_ECN_CAPABLE,
103 __constant_htons(sizeof(struct sctp_paramhdr)), 103 cpu_to_be16(sizeof(struct sctp_paramhdr)),
104}; 104};
105static const struct sctp_paramhdr prsctp_param = { 105static const struct sctp_paramhdr prsctp_param = {
106 SCTP_PARAM_FWD_TSN_SUPPORT, 106 SCTP_PARAM_FWD_TSN_SUPPORT,
107 __constant_htons(sizeof(struct sctp_paramhdr)), 107 cpu_to_be16(sizeof(struct sctp_paramhdr)),
108}; 108};
109 109
110/* A helper to initialize to initialize an op error inside a 110/* A helper to initialize to initialize an op error inside a
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b5495aecab60..e2020eb2c8ca 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -434,7 +434,8 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
434 * 434 *
435 */ 435 */
436static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, 436static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
437 struct sctp_transport *transport) 437 struct sctp_transport *transport,
438 int is_hb)
438{ 439{
439 /* The check for association's overall error counter exceeding the 440 /* The check for association's overall error counter exceeding the
440 * threshold is done in the state function. 441 * threshold is done in the state function.
@@ -461,9 +462,15 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
461 * expires, set RTO <- RTO * 2 ("back off the timer"). The 462 * expires, set RTO <- RTO * 2 ("back off the timer"). The
462 * maximum value discussed in rule C7 above (RTO.max) may be 463 * maximum value discussed in rule C7 above (RTO.max) may be
463 * used to provide an upper bound to this doubling operation. 464 * used to provide an upper bound to this doubling operation.
465 *
466 * Special Case: the first HB doesn't trigger exponential backoff.
467 * The first unacknowleged HB triggers it. We do this with a flag
468 * that indicates that we have an outstanding HB.
464 */ 469 */
465 transport->last_rto = transport->rto; 470 if (!is_hb || transport->hb_sent) {
466 transport->rto = min((transport->rto * 2), transport->asoc->rto_max); 471 transport->last_rto = transport->rto;
472 transport->rto = min((transport->rto * 2), transport->asoc->rto_max);
473 }
467} 474}
468 475
469/* Worker routine to handle INIT command failure. */ 476/* Worker routine to handle INIT command failure. */
@@ -621,6 +628,11 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
621 t->error_count = 0; 628 t->error_count = 0;
622 t->asoc->overall_error_count = 0; 629 t->asoc->overall_error_count = 0;
623 630
631 /* Clear the hb_sent flag to signal that we had a good
632 * acknowledgement.
633 */
634 t->hb_sent = 0;
635
624 /* Mark the destination transport address as active if it is not so 636 /* Mark the destination transport address as active if it is not so
625 * marked. 637 * marked.
626 */ 638 */
@@ -646,18 +658,6 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
646 sctp_transport_hold(t); 658 sctp_transport_hold(t);
647} 659}
648 660
649/* Helper function to do a transport reset at the expiry of the hearbeat
650 * timer.
651 */
652static void sctp_cmd_transport_reset(sctp_cmd_seq_t *cmds,
653 struct sctp_association *asoc,
654 struct sctp_transport *t)
655{
656 sctp_transport_lower_cwnd(t, SCTP_LOWER_CWND_INACTIVE);
657
658 /* Mark one strike against a transport. */
659 sctp_do_8_2_transport_strike(asoc, t);
660}
661 661
662/* Helper function to process the process SACK command. */ 662/* Helper function to process the process SACK command. */
663static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, 663static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
@@ -1458,12 +1458,19 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1458 1458
1459 case SCTP_CMD_STRIKE: 1459 case SCTP_CMD_STRIKE:
1460 /* Mark one strike against a transport. */ 1460 /* Mark one strike against a transport. */
1461 sctp_do_8_2_transport_strike(asoc, cmd->obj.transport); 1461 sctp_do_8_2_transport_strike(asoc, cmd->obj.transport,
1462 0);
1463 break;
1464
1465 case SCTP_CMD_TRANSPORT_IDLE:
1466 t = cmd->obj.transport;
1467 sctp_transport_lower_cwnd(t, SCTP_LOWER_CWND_INACTIVE);
1462 break; 1468 break;
1463 1469
1464 case SCTP_CMD_TRANSPORT_RESET: 1470 case SCTP_CMD_TRANSPORT_HB_SENT:
1465 t = cmd->obj.transport; 1471 t = cmd->obj.transport;
1466 sctp_cmd_transport_reset(commands, asoc, t); 1472 sctp_do_8_2_transport_strike(asoc, t, 1);
1473 t->hb_sent = 1;
1467 break; 1474 break;
1468 1475
1469 case SCTP_CMD_TRANSPORT_ON: 1476 case SCTP_CMD_TRANSPORT_ON:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f88dfded0e3a..55a61aa69662 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -988,7 +988,9 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
988 /* Set transport error counter and association error counter 988 /* Set transport error counter and association error counter
989 * when sending heartbeat. 989 * when sending heartbeat.
990 */ 990 */
991 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_RESET, 991 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE,
992 SCTP_TRANSPORT(transport));
993 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT,
992 SCTP_TRANSPORT(transport)); 994 SCTP_TRANSPORT(transport));
993 } 995 }
994 sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE, 996 sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE,
@@ -4955,7 +4957,7 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
4955 * to that address and not acknowledged within one RTO. 4957 * to that address and not acknowledged within one RTO.
4956 * 4958 *
4957 */ 4959 */
4958 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_RESET, 4960 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT,
4959 SCTP_TRANSPORT(arg)); 4961 SCTP_TRANSPORT(arg));
4960 return SCTP_DISPOSITION_CONSUME; 4962 return SCTP_DISPOSITION_CONSUME;
4961} 4963}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ff0a8f88de04..bbd3cd238d7f 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3069,9 +3069,6 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
3069 int val; 3069 int val;
3070 int assoc_id = 0; 3070 int assoc_id = 0;
3071 3071
3072 if (optlen < sizeof(int))
3073 return -EINVAL;
3074
3075 if (optlen == sizeof(int)) { 3072 if (optlen == sizeof(int)) {
3076 printk(KERN_WARNING 3073 printk(KERN_WARNING
3077 "SCTP: Use of int in max_burst socket option deprecated\n"); 3074 "SCTP: Use of int in max_burst socket option deprecated\n");
@@ -3939,7 +3936,6 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc,
3939{ 3936{
3940 struct sock *sk = asoc->base.sk; 3937 struct sock *sk = asoc->base.sk;
3941 struct socket *sock; 3938 struct socket *sock;
3942 struct inet_sock *inetsk;
3943 struct sctp_af *af; 3939 struct sctp_af *af;
3944 int err = 0; 3940 int err = 0;
3945 3941
@@ -3954,18 +3950,18 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc,
3954 if (err < 0) 3950 if (err < 0)
3955 return err; 3951 return err;
3956 3952
3957 /* Populate the fields of the newsk from the oldsk and migrate the 3953 sctp_copy_sock(sock->sk, sk, asoc);
3958 * asoc to the newsk.
3959 */
3960 sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
3961 3954
3962 /* Make peeled-off sockets more like 1-1 accepted sockets. 3955 /* Make peeled-off sockets more like 1-1 accepted sockets.
3963 * Set the daddr and initialize id to something more random 3956 * Set the daddr and initialize id to something more random
3964 */ 3957 */
3965 af = sctp_get_af_specific(asoc->peer.primary_addr.sa.sa_family); 3958 af = sctp_get_af_specific(asoc->peer.primary_addr.sa.sa_family);
3966 af->to_sk_daddr(&asoc->peer.primary_addr, sk); 3959 af->to_sk_daddr(&asoc->peer.primary_addr, sk);
3967 inetsk = inet_sk(sock->sk); 3960
3968 inetsk->id = asoc->next_tsn ^ jiffies; 3961 /* Populate the fields of the newsk from the oldsk and migrate the
3962 * asoc to the newsk.
3963 */
3964 sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
3969 3965
3970 *sockp = sock; 3966 *sockp = sock;
3971 3967
@@ -5284,16 +5280,14 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
5284 struct sctp_sock *sp; 5280 struct sctp_sock *sp;
5285 struct sctp_association *asoc; 5281 struct sctp_association *asoc;
5286 5282
5287 if (len < sizeof(int))
5288 return -EINVAL;
5289
5290 if (len == sizeof(int)) { 5283 if (len == sizeof(int)) {
5291 printk(KERN_WARNING 5284 printk(KERN_WARNING
5292 "SCTP: Use of int in max_burst socket option deprecated\n"); 5285 "SCTP: Use of int in max_burst socket option deprecated\n");
5293 printk(KERN_WARNING 5286 printk(KERN_WARNING
5294 "SCTP: Use struct sctp_assoc_value instead\n"); 5287 "SCTP: Use struct sctp_assoc_value instead\n");
5295 params.assoc_id = 0; 5288 params.assoc_id = 0;
5296 } else if (len == sizeof (struct sctp_assoc_value)) { 5289 } else if (len >= sizeof(struct sctp_assoc_value)) {
5290 len = sizeof(struct sctp_assoc_value);
5297 if (copy_from_user(&params, optval, len)) 5291 if (copy_from_user(&params, optval, len))
5298 return -EFAULT; 5292 return -EFAULT;
5299 } else 5293 } else
@@ -6700,6 +6694,48 @@ done:
6700 sctp_skb_set_owner_r(skb, sk); 6694 sctp_skb_set_owner_r(skb, sk);
6701} 6695}
6702 6696
6697void sctp_copy_sock(struct sock *newsk, struct sock *sk,
6698 struct sctp_association *asoc)
6699{
6700 struct inet_sock *inet = inet_sk(sk);
6701 struct inet_sock *newinet = inet_sk(newsk);
6702
6703 newsk->sk_type = sk->sk_type;
6704 newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
6705 newsk->sk_flags = sk->sk_flags;
6706 newsk->sk_no_check = sk->sk_no_check;
6707 newsk->sk_reuse = sk->sk_reuse;
6708
6709 newsk->sk_shutdown = sk->sk_shutdown;
6710 newsk->sk_destruct = inet_sock_destruct;
6711 newsk->sk_family = sk->sk_family;
6712 newsk->sk_protocol = IPPROTO_SCTP;
6713 newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
6714 newsk->sk_sndbuf = sk->sk_sndbuf;
6715 newsk->sk_rcvbuf = sk->sk_rcvbuf;
6716 newsk->sk_lingertime = sk->sk_lingertime;
6717 newsk->sk_rcvtimeo = sk->sk_rcvtimeo;
6718 newsk->sk_sndtimeo = sk->sk_sndtimeo;
6719
6720 newinet = inet_sk(newsk);
6721
6722 /* Initialize sk's sport, dport, rcv_saddr and daddr for
6723 * getsockname() and getpeername()
6724 */
6725 newinet->sport = inet->sport;
6726 newinet->saddr = inet->saddr;
6727 newinet->rcv_saddr = inet->rcv_saddr;
6728 newinet->dport = htons(asoc->peer.port);
6729 newinet->pmtudisc = inet->pmtudisc;
6730 newinet->id = asoc->next_tsn ^ jiffies;
6731
6732 newinet->uc_ttl = inet->uc_ttl;
6733 newinet->mc_loop = 1;
6734 newinet->mc_ttl = 1;
6735 newinet->mc_index = 0;
6736 newinet->mc_list = NULL;
6737}
6738
6703/* Populate the fields of the newsk from the oldsk and migrate the assoc 6739/* Populate the fields of the newsk from the oldsk and migrate the assoc
6704 * and its messages to the newsk. 6740 * and its messages to the newsk.
6705 */ 6741 */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index e745c118f239..e5dde45c79d3 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -79,6 +79,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
79 peer->rttvar = 0; 79 peer->rttvar = 0;
80 peer->srtt = 0; 80 peer->srtt = 0;
81 peer->rto_pending = 0; 81 peer->rto_pending = 0;
82 peer->hb_sent = 0;
82 peer->fast_recovery = 0; 83 peer->fast_recovery = 0;
83 84
84 peer->last_time_heard = jiffies; 85 peer->last_time_heard = jiffies;
@@ -542,8 +543,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
542 * congestion indications more than once every window of 543 * congestion indications more than once every window of
543 * data (or more loosely more than once every round-trip time). 544 * data (or more loosely more than once every round-trip time).
544 */ 545 */
545 if ((jiffies - transport->last_time_ecne_reduced) > 546 if (time_after(jiffies, transport->last_time_ecne_reduced +
546 transport->rtt) { 547 transport->rtt)) {
547 transport->ssthresh = max(transport->cwnd/2, 548 transport->ssthresh = max(transport->cwnd/2,
548 4*transport->asoc->pathmtu); 549 4*transport->asoc->pathmtu);
549 transport->cwnd = transport->ssthresh; 550 transport->cwnd = transport->ssthresh;
@@ -560,7 +561,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
560 * to be done every RTO interval, we do it every hearbeat 561 * to be done every RTO interval, we do it every hearbeat
561 * interval. 562 * interval.
562 */ 563 */
563 if ((jiffies - transport->last_time_used) > transport->rto) 564 if (time_after(jiffies, transport->last_time_used +
565 transport->rto))
564 transport->cwnd = max(transport->cwnd/2, 566 transport->cwnd = max(transport->cwnd/2,
565 4*transport->asoc->pathmtu); 567 4*transport->asoc->pathmtu);
566 break; 568 break;
@@ -608,6 +610,7 @@ void sctp_transport_reset(struct sctp_transport *t)
608 t->flight_size = 0; 610 t->flight_size = 0;
609 t->error_count = 0; 611 t->error_count = 0;
610 t->rto_pending = 0; 612 t->rto_pending = 0;
613 t->hb_sent = 0;
611 t->fast_recovery = 0; 614 t->fast_recovery = 0;
612 615
613 /* Initialize the state information for SFR-CACC */ 616 /* Initialize the state information for SFR-CACC */
diff --git a/net/socket.c b/net/socket.c
index 35dd7371752a..47a3dc074eb0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -545,6 +545,18 @@ void sock_release(struct socket *sock)
545 sock->file = NULL; 545 sock->file = NULL;
546} 546}
547 547
548int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
549 union skb_shared_tx *shtx)
550{
551 shtx->flags = 0;
552 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
553 shtx->hardware = 1;
554 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
555 shtx->software = 1;
556 return 0;
557}
558EXPORT_SYMBOL(sock_tx_timestamp);
559
548static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 560static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
549 struct msghdr *msg, size_t size) 561 struct msghdr *msg, size_t size)
550{ 562{
@@ -595,33 +607,65 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
595 return result; 607 return result;
596} 608}
597 609
610static int ktime2ts(ktime_t kt, struct timespec *ts)
611{
612 if (kt.tv64) {
613 *ts = ktime_to_timespec(kt);
614 return 1;
615 } else {
616 return 0;
617 }
618}
619
598/* 620/*
599 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) 621 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
600 */ 622 */
601void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 623void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
602 struct sk_buff *skb) 624 struct sk_buff *skb)
603{ 625{
604 ktime_t kt = skb->tstamp; 626 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
605 627 struct timespec ts[3];
606 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { 628 int empty = 1;
607 struct timeval tv; 629 struct skb_shared_hwtstamps *shhwtstamps =
608 /* Race occurred between timestamp enabling and packet 630 skb_hwtstamps(skb);
609 receiving. Fill in the current time for now. */ 631
610 if (kt.tv64 == 0) 632 /* Race occurred between timestamp enabling and packet
611 kt = ktime_get_real(); 633 receiving. Fill in the current time for now. */
612 skb->tstamp = kt; 634 if (need_software_tstamp && skb->tstamp.tv64 == 0)
613 tv = ktime_to_timeval(kt); 635 __net_timestamp(skb);
614 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); 636
615 } else { 637 if (need_software_tstamp) {
616 struct timespec ts; 638 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
617 /* Race occurred between timestamp enabling and packet 639 struct timeval tv;
618 receiving. Fill in the current time for now. */ 640 skb_get_timestamp(skb, &tv);
619 if (kt.tv64 == 0) 641 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
620 kt = ktime_get_real(); 642 sizeof(tv), &tv);
621 skb->tstamp = kt; 643 } else {
622 ts = ktime_to_timespec(kt); 644 struct timespec ts;
623 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts); 645 skb_get_timestampns(skb, &ts);
646 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
647 sizeof(ts), &ts);
648 }
649 }
650
651
652 memset(ts, 0, sizeof(ts));
653 if (skb->tstamp.tv64 &&
654 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
655 skb_get_timestampns(skb, ts + 0);
656 empty = 0;
657 }
658 if (shhwtstamps) {
659 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
660 ktime2ts(shhwtstamps->syststamp, ts + 1))
661 empty = 0;
662 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
663 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
664 empty = 0;
624 } 665 }
666 if (!empty)
667 put_cmsg(msg, SOL_SOCKET,
668 SCM_TIMESTAMPING, sizeof(ts), &ts);
625} 669}
626 670
627EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 671EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 5cbb404c4cdf..b49e434c094f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1215,6 +1215,23 @@ out:
1215 read_unlock(&sk->sk_callback_lock); 1215 read_unlock(&sk->sk_callback_lock);
1216} 1216}
1217 1217
1218static void xs_write_space(struct sock *sk)
1219{
1220 struct socket *sock;
1221 struct rpc_xprt *xprt;
1222
1223 if (unlikely(!(sock = sk->sk_socket)))
1224 return;
1225 clear_bit(SOCK_NOSPACE, &sock->flags);
1226
1227 if (unlikely(!(xprt = xprt_from_sock(sk))))
1228 return;
1229 if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
1230 return;
1231
1232 xprt_write_space(xprt);
1233}
1234
1218/** 1235/**
1219 * xs_udp_write_space - callback invoked when socket buffer space 1236 * xs_udp_write_space - callback invoked when socket buffer space
1220 * becomes available 1237 * becomes available
@@ -1230,23 +1247,9 @@ static void xs_udp_write_space(struct sock *sk)
1230 read_lock(&sk->sk_callback_lock); 1247 read_lock(&sk->sk_callback_lock);
1231 1248
1232 /* from net/core/sock.c:sock_def_write_space */ 1249 /* from net/core/sock.c:sock_def_write_space */
1233 if (sock_writeable(sk)) { 1250 if (sock_writeable(sk))
1234 struct socket *sock; 1251 xs_write_space(sk);
1235 struct rpc_xprt *xprt;
1236
1237 if (unlikely(!(sock = sk->sk_socket)))
1238 goto out;
1239 clear_bit(SOCK_NOSPACE, &sock->flags);
1240
1241 if (unlikely(!(xprt = xprt_from_sock(sk))))
1242 goto out;
1243 if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
1244 goto out;
1245
1246 xprt_write_space(xprt);
1247 }
1248 1252
1249 out:
1250 read_unlock(&sk->sk_callback_lock); 1253 read_unlock(&sk->sk_callback_lock);
1251} 1254}
1252 1255
@@ -1265,23 +1268,9 @@ static void xs_tcp_write_space(struct sock *sk)
1265 read_lock(&sk->sk_callback_lock); 1268 read_lock(&sk->sk_callback_lock);
1266 1269
1267 /* from net/core/stream.c:sk_stream_write_space */ 1270 /* from net/core/stream.c:sk_stream_write_space */
1268 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { 1271 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
1269 struct socket *sock; 1272 xs_write_space(sk);
1270 struct rpc_xprt *xprt;
1271
1272 if (unlikely(!(sock = sk->sk_socket)))
1273 goto out;
1274 clear_bit(SOCK_NOSPACE, &sock->flags);
1275 1273
1276 if (unlikely(!(xprt = xprt_from_sock(sk))))
1277 goto out;
1278 if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
1279 goto out;
1280
1281 xprt_write_space(xprt);
1282 }
1283
1284 out:
1285 read_unlock(&sk->sk_callback_lock); 1274 read_unlock(&sk->sk_callback_lock);
1286} 1275}
1287 1276
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d1b89820ab4f..baac91049b0e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1178,8 +1178,7 @@ out_unlock:
1178 unix_state_unlock(other); 1178 unix_state_unlock(other);
1179 1179
1180out: 1180out:
1181 if (skb) 1181 kfree_skb(skb);
1182 kfree_skb(skb);
1183 if (newsk) 1182 if (newsk)
1184 unix_release_sock(newsk, 0); 1183 unix_release_sock(newsk, 0);
1185 if (other) 1184 if (other)
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 39701dec1dba..466e2d22d256 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -86,8 +86,10 @@ static int wanrouter_device_del_if(struct wan_device *wandev,
86 86
87static struct wan_device *wanrouter_find_device(char *name); 87static struct wan_device *wanrouter_find_device(char *name);
88static int wanrouter_delete_interface(struct wan_device *wandev, char *name); 88static int wanrouter_delete_interface(struct wan_device *wandev, char *name);
89static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); 89static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
90static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); 90 __acquires(lock);
91static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
92 __releases(lock);
91 93
92 94
93 95
@@ -763,12 +765,14 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name)
763} 765}
764 766
765static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) 767static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
768 __acquires(lock)
766{ 769{
767 spin_lock_irqsave(lock, *smp_flags); 770 spin_lock_irqsave(lock, *smp_flags);
768} 771}
769 772
770 773
771static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) 774static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
775 __releases(lock)
772{ 776{
773 spin_unlock_irqrestore(lock, *smp_flags); 777 spin_unlock_irqrestore(lock, *smp_flags);
774} 778}
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
index 267f7ff49827..c44d96b3a437 100644
--- a/net/wanrouter/wanproc.c
+++ b/net/wanrouter/wanproc.c
@@ -80,6 +80,7 @@ static struct proc_dir_entry *proc_router;
80 * Iterator 80 * Iterator
81 */ 81 */
82static void *r_start(struct seq_file *m, loff_t *pos) 82static void *r_start(struct seq_file *m, loff_t *pos)
83 __acquires(kernel_lock)
83{ 84{
84 struct wan_device *wandev; 85 struct wan_device *wandev;
85 loff_t l = *pos; 86 loff_t l = *pos;
@@ -101,6 +102,7 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
101} 102}
102 103
103static void r_stop(struct seq_file *m, void *v) 104static void r_stop(struct seq_file *m, void *v)
105 __releases(kernel_lock)
104{ 106{
105 unlock_kernel(); 107 unlock_kernel();
106} 108}
diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c
index cb3b4ad53683..5d149c1b5f0d 100644
--- a/net/wimax/op-msg.c
+++ b/net/wimax/op-msg.c
@@ -258,7 +258,6 @@ EXPORT_SYMBOL_GPL(wimax_msg_len);
258 */ 258 */
259int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb) 259int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb)
260{ 260{
261 int result;
262 struct device *dev = wimax_dev->net_dev->dev.parent; 261 struct device *dev = wimax_dev->net_dev->dev.parent;
263 void *msg = skb->data; 262 void *msg = skb->data;
264 size_t size = skb->len; 263 size_t size = skb->len;
@@ -266,11 +265,9 @@ int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb)
266 265
267 d_printf(1, dev, "CTX: wimax msg, %zu bytes\n", size); 266 d_printf(1, dev, "CTX: wimax msg, %zu bytes\n", size);
268 d_dump(2, dev, msg, size); 267 d_dump(2, dev, msg, size);
269 result = genlmsg_multicast(skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); 268 genlmsg_multicast(skb, 0, wimax_gnl_mcg.id, GFP_KERNEL);
270 d_printf(1, dev, "CTX: genl multicast result %d\n", result); 269 d_printf(1, dev, "CTX: genl multicast done\n");
271 if (result == -ESRCH) /* Nobody connected, ignore it */ 270 return 0;
272 result = 0; /* btw, the skb is freed already */
273 return result;
274} 271}
275EXPORT_SYMBOL_GPL(wimax_msg_send); 272EXPORT_SYMBOL_GPL(wimax_msg_send);
276 273
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 3869c0327882..a0ee76b52510 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -163,16 +163,12 @@ int wimax_gnl_re_state_change_send(
163 struct device *dev = wimax_dev_to_dev(wimax_dev); 163 struct device *dev = wimax_dev_to_dev(wimax_dev);
164 d_fnstart(3, dev, "(wimax_dev %p report_skb %p)\n", 164 d_fnstart(3, dev, "(wimax_dev %p report_skb %p)\n",
165 wimax_dev, report_skb); 165 wimax_dev, report_skb);
166 if (report_skb == NULL) 166 if (report_skb == NULL) {
167 result = -ENOMEM;
167 goto out; 168 goto out;
168 genlmsg_end(report_skb, header);
169 result = genlmsg_multicast(report_skb, 0, wimax_gnl_mcg.id, GFP_KERNEL);
170 if (result == -ESRCH) /* Nobody connected, ignore it */
171 result = 0; /* btw, the skb is freed already */
172 if (result < 0) {
173 dev_err(dev, "RE_STCH: Error sending: %d\n", result);
174 nlmsg_free(report_skb);
175 } 169 }
170 genlmsg_end(report_skb, header);
171 genlmsg_multicast(report_skb, 0, wimax_gnl_mcg.id, GFP_KERNEL);
176out: 172out:
177 d_fnend(3, dev, "(wimax_dev %p report_skb %p) = %d\n", 173 d_fnend(3, dev, "(wimax_dev %p report_skb %p) = %d\n",
178 wimax_dev, report_skb, result); 174 wimax_dev, report_skb, result);
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 938a334c8dbc..dad43c24f695 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -5,7 +5,7 @@ obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
5obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o 5obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o
6obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o 6obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o
7 7
8cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o 8cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o
9cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o 9cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o
10cfg80211-$(CONFIG_NL80211) += nl80211.o 10cfg80211-$(CONFIG_NL80211) += nl80211.o
11 11
diff --git a/net/wireless/core.c b/net/wireless/core.c
index b96fc0c3f1c4..dd7f222919fe 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -7,7 +7,6 @@
7#include <linux/if.h> 7#include <linux/if.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/err.h> 9#include <linux/err.h>
10#include <linux/mutex.h>
11#include <linux/list.h> 10#include <linux/list.h>
12#include <linux/nl80211.h> 11#include <linux/nl80211.h>
13#include <linux/debugfs.h> 12#include <linux/debugfs.h>
@@ -31,18 +30,29 @@ MODULE_DESCRIPTION("wireless configuration support");
31 * only read the list, and that can happen quite 30 * only read the list, and that can happen quite
32 * often because we need to do it for each command */ 31 * often because we need to do it for each command */
33LIST_HEAD(cfg80211_drv_list); 32LIST_HEAD(cfg80211_drv_list);
34DEFINE_MUTEX(cfg80211_drv_mutex); 33
34/*
35 * This is used to protect the cfg80211_drv_list, cfg80211_regdomain,
36 * country_ie_regdomain, the reg_beacon_list and the the last regulatory
37 * request receipt (last_request).
38 */
39DEFINE_MUTEX(cfg80211_mutex);
35 40
36/* for debugfs */ 41/* for debugfs */
37static struct dentry *ieee80211_debugfs_dir; 42static struct dentry *ieee80211_debugfs_dir;
38 43
39/* requires cfg80211_drv_mutex to be held! */ 44/* requires cfg80211_mutex to be held! */
40static struct cfg80211_registered_device *cfg80211_drv_by_wiphy(int wiphy) 45struct cfg80211_registered_device *cfg80211_drv_by_wiphy_idx(int wiphy_idx)
41{ 46{
42 struct cfg80211_registered_device *result = NULL, *drv; 47 struct cfg80211_registered_device *result = NULL, *drv;
43 48
49 if (!wiphy_idx_valid(wiphy_idx))
50 return NULL;
51
52 assert_cfg80211_lock();
53
44 list_for_each_entry(drv, &cfg80211_drv_list, list) { 54 list_for_each_entry(drv, &cfg80211_drv_list, list) {
45 if (drv->idx == wiphy) { 55 if (drv->wiphy_idx == wiphy_idx) {
46 result = drv; 56 result = drv;
47 break; 57 break;
48 } 58 }
@@ -51,17 +61,44 @@ static struct cfg80211_registered_device *cfg80211_drv_by_wiphy(int wiphy)
51 return result; 61 return result;
52} 62}
53 63
64int get_wiphy_idx(struct wiphy *wiphy)
65{
66 struct cfg80211_registered_device *drv;
67 if (!wiphy)
68 return WIPHY_IDX_STALE;
69 drv = wiphy_to_dev(wiphy);
70 return drv->wiphy_idx;
71}
72
54/* requires cfg80211_drv_mutex to be held! */ 73/* requires cfg80211_drv_mutex to be held! */
74struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx)
75{
76 struct cfg80211_registered_device *drv;
77
78 if (!wiphy_idx_valid(wiphy_idx))
79 return NULL;
80
81 assert_cfg80211_lock();
82
83 drv = cfg80211_drv_by_wiphy_idx(wiphy_idx);
84 if (!drv)
85 return NULL;
86 return &drv->wiphy;
87}
88
89/* requires cfg80211_mutex to be held! */
55static struct cfg80211_registered_device * 90static struct cfg80211_registered_device *
56__cfg80211_drv_from_info(struct genl_info *info) 91__cfg80211_drv_from_info(struct genl_info *info)
57{ 92{
58 int ifindex; 93 int ifindex;
59 struct cfg80211_registered_device *bywiphy = NULL, *byifidx = NULL; 94 struct cfg80211_registered_device *bywiphyidx = NULL, *byifidx = NULL;
60 struct net_device *dev; 95 struct net_device *dev;
61 int err = -EINVAL; 96 int err = -EINVAL;
62 97
98 assert_cfg80211_lock();
99
63 if (info->attrs[NL80211_ATTR_WIPHY]) { 100 if (info->attrs[NL80211_ATTR_WIPHY]) {
64 bywiphy = cfg80211_drv_by_wiphy( 101 bywiphyidx = cfg80211_drv_by_wiphy_idx(
65 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY])); 102 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY]));
66 err = -ENODEV; 103 err = -ENODEV;
67 } 104 }
@@ -78,14 +115,14 @@ __cfg80211_drv_from_info(struct genl_info *info)
78 err = -ENODEV; 115 err = -ENODEV;
79 } 116 }
80 117
81 if (bywiphy && byifidx) { 118 if (bywiphyidx && byifidx) {
82 if (bywiphy != byifidx) 119 if (bywiphyidx != byifidx)
83 return ERR_PTR(-EINVAL); 120 return ERR_PTR(-EINVAL);
84 else 121 else
85 return bywiphy; /* == byifidx */ 122 return bywiphyidx; /* == byifidx */
86 } 123 }
87 if (bywiphy) 124 if (bywiphyidx)
88 return bywiphy; 125 return bywiphyidx;
89 126
90 if (byifidx) 127 if (byifidx)
91 return byifidx; 128 return byifidx;
@@ -98,7 +135,7 @@ cfg80211_get_dev_from_info(struct genl_info *info)
98{ 135{
99 struct cfg80211_registered_device *drv; 136 struct cfg80211_registered_device *drv;
100 137
101 mutex_lock(&cfg80211_drv_mutex); 138 mutex_lock(&cfg80211_mutex);
102 drv = __cfg80211_drv_from_info(info); 139 drv = __cfg80211_drv_from_info(info);
103 140
104 /* if it is not an error we grab the lock on 141 /* if it is not an error we grab the lock on
@@ -107,7 +144,7 @@ cfg80211_get_dev_from_info(struct genl_info *info)
107 if (!IS_ERR(drv)) 144 if (!IS_ERR(drv))
108 mutex_lock(&drv->mtx); 145 mutex_lock(&drv->mtx);
109 146
110 mutex_unlock(&cfg80211_drv_mutex); 147 mutex_unlock(&cfg80211_mutex);
111 148
112 return drv; 149 return drv;
113} 150}
@@ -118,7 +155,7 @@ cfg80211_get_dev_from_ifindex(int ifindex)
118 struct cfg80211_registered_device *drv = ERR_PTR(-ENODEV); 155 struct cfg80211_registered_device *drv = ERR_PTR(-ENODEV);
119 struct net_device *dev; 156 struct net_device *dev;
120 157
121 mutex_lock(&cfg80211_drv_mutex); 158 mutex_lock(&cfg80211_mutex);
122 dev = dev_get_by_index(&init_net, ifindex); 159 dev = dev_get_by_index(&init_net, ifindex);
123 if (!dev) 160 if (!dev)
124 goto out; 161 goto out;
@@ -129,7 +166,7 @@ cfg80211_get_dev_from_ifindex(int ifindex)
129 drv = ERR_PTR(-ENODEV); 166 drv = ERR_PTR(-ENODEV);
130 dev_put(dev); 167 dev_put(dev);
131 out: 168 out:
132 mutex_unlock(&cfg80211_drv_mutex); 169 mutex_unlock(&cfg80211_mutex);
133 return drv; 170 return drv;
134} 171}
135 172
@@ -143,16 +180,16 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
143 char *newname) 180 char *newname)
144{ 181{
145 struct cfg80211_registered_device *drv; 182 struct cfg80211_registered_device *drv;
146 int idx, taken = -1, result, digits; 183 int wiphy_idx, taken = -1, result, digits;
147 184
148 mutex_lock(&cfg80211_drv_mutex); 185 mutex_lock(&cfg80211_mutex);
149 186
150 /* prohibit calling the thing phy%d when %d is not its number */ 187 /* prohibit calling the thing phy%d when %d is not its number */
151 sscanf(newname, PHY_NAME "%d%n", &idx, &taken); 188 sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken);
152 if (taken == strlen(newname) && idx != rdev->idx) { 189 if (taken == strlen(newname) && wiphy_idx != rdev->wiphy_idx) {
153 /* count number of places needed to print idx */ 190 /* count number of places needed to print wiphy_idx */
154 digits = 1; 191 digits = 1;
155 while (idx /= 10) 192 while (wiphy_idx /= 10)
156 digits++; 193 digits++;
157 /* 194 /*
158 * deny the name if it is phy<idx> where <idx> is printed 195 * deny the name if it is phy<idx> where <idx> is printed
@@ -193,7 +230,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
193 230
194 result = 0; 231 result = 0;
195out_unlock: 232out_unlock:
196 mutex_unlock(&cfg80211_drv_mutex); 233 mutex_unlock(&cfg80211_mutex);
197 if (result == 0) 234 if (result == 0)
198 nl80211_notify_dev_rename(rdev); 235 nl80211_notify_dev_rename(rdev);
199 236
@@ -220,26 +257,28 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
220 257
221 drv->ops = ops; 258 drv->ops = ops;
222 259
223 mutex_lock(&cfg80211_drv_mutex); 260 mutex_lock(&cfg80211_mutex);
224 261
225 drv->idx = wiphy_counter++; 262 drv->wiphy_idx = wiphy_counter++;
226 263
227 if (unlikely(drv->idx < 0)) { 264 if (unlikely(!wiphy_idx_valid(drv->wiphy_idx))) {
228 wiphy_counter--; 265 wiphy_counter--;
229 mutex_unlock(&cfg80211_drv_mutex); 266 mutex_unlock(&cfg80211_mutex);
230 /* ugh, wrapped! */ 267 /* ugh, wrapped! */
231 kfree(drv); 268 kfree(drv);
232 return NULL; 269 return NULL;
233 } 270 }
234 271
235 mutex_unlock(&cfg80211_drv_mutex); 272 mutex_unlock(&cfg80211_mutex);
236 273
237 /* give it a proper name */ 274 /* give it a proper name */
238 dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->idx); 275 dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->wiphy_idx);
239 276
240 mutex_init(&drv->mtx); 277 mutex_init(&drv->mtx);
241 mutex_init(&drv->devlist_mtx); 278 mutex_init(&drv->devlist_mtx);
242 INIT_LIST_HEAD(&drv->netdev_list); 279 INIT_LIST_HEAD(&drv->netdev_list);
280 spin_lock_init(&drv->bss_lock);
281 INIT_LIST_HEAD(&drv->bss_list);
243 282
244 device_initialize(&drv->wiphy.dev); 283 device_initialize(&drv->wiphy.dev);
245 drv->wiphy.dev.class = &ieee80211_class; 284 drv->wiphy.dev.class = &ieee80211_class;
@@ -259,6 +298,9 @@ int wiphy_register(struct wiphy *wiphy)
259 int i; 298 int i;
260 u16 ifmodes = wiphy->interface_modes; 299 u16 ifmodes = wiphy->interface_modes;
261 300
301 if (WARN_ON(wiphy->max_scan_ssids < 1))
302 return -EINVAL;
303
262 /* sanity check ifmodes */ 304 /* sanity check ifmodes */
263 WARN_ON(!ifmodes); 305 WARN_ON(!ifmodes);
264 ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; 306 ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1;
@@ -273,10 +315,16 @@ int wiphy_register(struct wiphy *wiphy)
273 315
274 sband->band = band; 316 sband->band = band;
275 317
276 if (!sband->n_channels || !sband->n_bitrates) { 318 if (WARN_ON(!sband->n_channels || !sband->n_bitrates))
277 WARN_ON(1); 319 return -EINVAL;
320
321 /*
322 * Since we use a u32 for rate bitmaps in
323 * ieee80211_get_response_rate, we cannot
324 * have more than 32 legacy rates.
325 */
326 if (WARN_ON(sband->n_bitrates > 32))
278 return -EINVAL; 327 return -EINVAL;
279 }
280 328
281 for (i = 0; i < sband->n_channels; i++) { 329 for (i = 0; i < sband->n_channels; i++) {
282 sband->channels[i].orig_flags = 330 sband->channels[i].orig_flags =
@@ -299,7 +347,7 @@ int wiphy_register(struct wiphy *wiphy)
299 /* check and set up bitrates */ 347 /* check and set up bitrates */
300 ieee80211_set_bitrate_flags(wiphy); 348 ieee80211_set_bitrate_flags(wiphy);
301 349
302 mutex_lock(&cfg80211_drv_mutex); 350 mutex_lock(&cfg80211_mutex);
303 351
304 /* set up regulatory info */ 352 /* set up regulatory info */
305 wiphy_update_regulatory(wiphy, REGDOM_SET_BY_CORE); 353 wiphy_update_regulatory(wiphy, REGDOM_SET_BY_CORE);
@@ -319,7 +367,7 @@ int wiphy_register(struct wiphy *wiphy)
319 367
320 res = 0; 368 res = 0;
321out_unlock: 369out_unlock:
322 mutex_unlock(&cfg80211_drv_mutex); 370 mutex_unlock(&cfg80211_mutex);
323 return res; 371 return res;
324} 372}
325EXPORT_SYMBOL(wiphy_register); 373EXPORT_SYMBOL(wiphy_register);
@@ -329,7 +377,7 @@ void wiphy_unregister(struct wiphy *wiphy)
329 struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy); 377 struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
330 378
331 /* protect the device list */ 379 /* protect the device list */
332 mutex_lock(&cfg80211_drv_mutex); 380 mutex_lock(&cfg80211_mutex);
333 381
334 BUG_ON(!list_empty(&drv->netdev_list)); 382 BUG_ON(!list_empty(&drv->netdev_list));
335 383
@@ -355,14 +403,17 @@ void wiphy_unregister(struct wiphy *wiphy)
355 device_del(&drv->wiphy.dev); 403 device_del(&drv->wiphy.dev);
356 debugfs_remove(drv->wiphy.debugfsdir); 404 debugfs_remove(drv->wiphy.debugfsdir);
357 405
358 mutex_unlock(&cfg80211_drv_mutex); 406 mutex_unlock(&cfg80211_mutex);
359} 407}
360EXPORT_SYMBOL(wiphy_unregister); 408EXPORT_SYMBOL(wiphy_unregister);
361 409
362void cfg80211_dev_free(struct cfg80211_registered_device *drv) 410void cfg80211_dev_free(struct cfg80211_registered_device *drv)
363{ 411{
412 struct cfg80211_internal_bss *scan, *tmp;
364 mutex_destroy(&drv->mtx); 413 mutex_destroy(&drv->mtx);
365 mutex_destroy(&drv->devlist_mtx); 414 mutex_destroy(&drv->devlist_mtx);
415 list_for_each_entry_safe(scan, tmp, &drv->bss_list, list)
416 cfg80211_put_bss(&scan->pub);
366 kfree(drv); 417 kfree(drv);
367} 418}
368 419
diff --git a/net/wireless/core.h b/net/wireless/core.h
index f7fb9f413028..f6c53f5807f4 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -8,6 +8,9 @@
8#include <linux/mutex.h> 8#include <linux/mutex.h>
9#include <linux/list.h> 9#include <linux/list.h>
10#include <linux/netdevice.h> 10#include <linux/netdevice.h>
11#include <linux/kref.h>
12#include <linux/rbtree.h>
13#include <linux/mutex.h>
11#include <net/genetlink.h> 14#include <net/genetlink.h>
12#include <net/wireless.h> 15#include <net/wireless.h>
13#include <net/cfg80211.h> 16#include <net/cfg80211.h>
@@ -35,12 +38,20 @@ struct cfg80211_registered_device {
35 enum environment_cap env; 38 enum environment_cap env;
36 39
37 /* wiphy index, internal only */ 40 /* wiphy index, internal only */
38 int idx; 41 int wiphy_idx;
39 42
40 /* associate netdev list */ 43 /* associate netdev list */
41 struct mutex devlist_mtx; 44 struct mutex devlist_mtx;
42 struct list_head netdev_list; 45 struct list_head netdev_list;
43 46
47 /* BSSes/scanning */
48 spinlock_t bss_lock;
49 struct list_head bss_list;
50 struct rb_root bss_tree;
51 u32 bss_generation;
52 struct cfg80211_scan_request *scan_req; /* protected by RTNL */
53 unsigned long suspend_at;
54
44 /* must be last because of the way we do wiphy_priv(), 55 /* must be last because of the way we do wiphy_priv(),
45 * and it should at least be aligned to NETDEV_ALIGN */ 56 * and it should at least be aligned to NETDEV_ALIGN */
46 struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN))); 57 struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
@@ -53,9 +64,39 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
53 return container_of(wiphy, struct cfg80211_registered_device, wiphy); 64 return container_of(wiphy, struct cfg80211_registered_device, wiphy);
54} 65}
55 66
56extern struct mutex cfg80211_drv_mutex; 67/* Note 0 is valid, hence phy0 */
68static inline
69bool wiphy_idx_valid(int wiphy_idx)
70{
71 return (wiphy_idx >= 0);
72}
73
74extern struct mutex cfg80211_mutex;
57extern struct list_head cfg80211_drv_list; 75extern struct list_head cfg80211_drv_list;
58 76
77static inline void assert_cfg80211_lock(void)
78{
79 WARN_ON(!mutex_is_locked(&cfg80211_mutex));
80}
81
82/*
83 * You can use this to mark a wiphy_idx as not having an associated wiphy.
84 * It guarantees cfg80211_drv_by_wiphy_idx(wiphy_idx) will return NULL
85 */
86#define WIPHY_IDX_STALE -1
87
88struct cfg80211_internal_bss {
89 struct list_head list;
90 struct rb_node rbn;
91 unsigned long ts;
92 struct kref ref;
93 /* must be last because of priv member */
94 struct cfg80211_bss pub;
95};
96
97struct cfg80211_registered_device *cfg80211_drv_by_wiphy_idx(int wiphy_idx);
98int get_wiphy_idx(struct wiphy *wiphy);
99
59/* 100/*
60 * This function returns a pointer to the driver 101 * This function returns a pointer to the driver
61 * that the genl_info item that is passed refers to. 102 * that the genl_info item that is passed refers to.
@@ -63,13 +104,13 @@ extern struct list_head cfg80211_drv_list;
63 * the driver's mutex! 104 * the driver's mutex!
64 * 105 *
65 * This means that you need to call cfg80211_put_dev() 106 * This means that you need to call cfg80211_put_dev()
66 * before being allowed to acquire &cfg80211_drv_mutex! 107 * before being allowed to acquire &cfg80211_mutex!
67 * 108 *
68 * This is necessary because we need to lock the global 109 * This is necessary because we need to lock the global
69 * mutex to get an item off the list safely, and then 110 * mutex to get an item off the list safely, and then
70 * we lock the drv mutex so it doesn't go away under us. 111 * we lock the drv mutex so it doesn't go away under us.
71 * 112 *
72 * We don't want to keep cfg80211_drv_mutex locked 113 * We don't want to keep cfg80211_mutex locked
73 * for all the time in order to allow requests on 114 * for all the time in order to allow requests on
74 * other interfaces to go through at the same time. 115 * other interfaces to go through at the same time.
75 * 116 *
@@ -79,6 +120,9 @@ extern struct list_head cfg80211_drv_list;
79extern struct cfg80211_registered_device * 120extern struct cfg80211_registered_device *
80cfg80211_get_dev_from_info(struct genl_info *info); 121cfg80211_get_dev_from_info(struct genl_info *info);
81 122
123/* requires cfg80211_drv_mutex to be held! */
124struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx);
125
82/* identical to cfg80211_get_dev_from_info but only operate on ifindex */ 126/* identical to cfg80211_get_dev_from_info but only operate on ifindex */
83extern struct cfg80211_registered_device * 127extern struct cfg80211_registered_device *
84cfg80211_get_dev_from_ifindex(int ifindex); 128cfg80211_get_dev_from_ifindex(int ifindex);
@@ -94,4 +138,8 @@ extern int cfg80211_dev_rename(struct cfg80211_registered_device *drv,
94void ieee80211_set_bitrate_flags(struct wiphy *wiphy); 138void ieee80211_set_bitrate_flags(struct wiphy *wiphy);
95void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby); 139void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby);
96 140
141void cfg80211_bss_expire(struct cfg80211_registered_device *dev);
142void cfg80211_bss_age(struct cfg80211_registered_device *dev,
143 unsigned long age_secs);
144
97#endif /* __NET_WIRELESS_CORE_H */ 145#endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1e728fff474e..531bb67cf502 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7,13 +7,13 @@
7#include <linux/if.h> 7#include <linux/if.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/err.h> 9#include <linux/err.h>
10#include <linux/mutex.h>
11#include <linux/list.h> 10#include <linux/list.h>
12#include <linux/if_ether.h> 11#include <linux/if_ether.h>
13#include <linux/ieee80211.h> 12#include <linux/ieee80211.h>
14#include <linux/nl80211.h> 13#include <linux/nl80211.h>
15#include <linux/rtnetlink.h> 14#include <linux/rtnetlink.h>
16#include <linux/netlink.h> 15#include <linux/netlink.h>
16#include <linux/etherdevice.h>
17#include <net/genetlink.h> 17#include <net/genetlink.h>
18#include <net/cfg80211.h> 18#include <net/cfg80211.h>
19#include "core.h" 19#include "core.h"
@@ -105,6 +105,12 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
105 105
106 [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY, 106 [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY,
107 .len = NL80211_HT_CAPABILITY_LEN }, 107 .len = NL80211_HT_CAPABILITY_LEN },
108
109 [NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 },
110 [NL80211_ATTR_IE] = { .type = NLA_BINARY,
111 .len = IEEE80211_MAX_DATA_LEN },
112 [NL80211_ATTR_SCAN_FREQUENCIES] = { .type = NLA_NESTED },
113 [NL80211_ATTR_SCAN_SSIDS] = { .type = NLA_NESTED },
108}; 114};
109 115
110/* message building helper */ 116/* message building helper */
@@ -135,8 +141,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
135 if (!hdr) 141 if (!hdr)
136 return -1; 142 return -1;
137 143
138 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->idx); 144 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx);
139 NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); 145 NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy));
146 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
147 dev->wiphy.max_scan_ssids);
140 148
141 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); 149 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
142 if (!nl_modes) 150 if (!nl_modes)
@@ -247,7 +255,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
247 int start = cb->args[0]; 255 int start = cb->args[0];
248 struct cfg80211_registered_device *dev; 256 struct cfg80211_registered_device *dev;
249 257
250 mutex_lock(&cfg80211_drv_mutex); 258 mutex_lock(&cfg80211_mutex);
251 list_for_each_entry(dev, &cfg80211_drv_list, list) { 259 list_for_each_entry(dev, &cfg80211_drv_list, list) {
252 if (++idx <= start) 260 if (++idx <= start)
253 continue; 261 continue;
@@ -258,7 +266,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
258 break; 266 break;
259 } 267 }
260 } 268 }
261 mutex_unlock(&cfg80211_drv_mutex); 269 mutex_unlock(&cfg80211_mutex);
262 270
263 cb->args[0] = idx; 271 cb->args[0] = idx;
264 272
@@ -461,7 +469,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
461 struct cfg80211_registered_device *dev; 469 struct cfg80211_registered_device *dev;
462 struct wireless_dev *wdev; 470 struct wireless_dev *wdev;
463 471
464 mutex_lock(&cfg80211_drv_mutex); 472 mutex_lock(&cfg80211_mutex);
465 list_for_each_entry(dev, &cfg80211_drv_list, list) { 473 list_for_each_entry(dev, &cfg80211_drv_list, list) {
466 if (wp_idx < wp_start) { 474 if (wp_idx < wp_start) {
467 wp_idx++; 475 wp_idx++;
@@ -488,7 +496,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
488 wp_idx++; 496 wp_idx++;
489 } 497 }
490 out: 498 out:
491 mutex_unlock(&cfg80211_drv_mutex); 499 mutex_unlock(&cfg80211_mutex);
492 500
493 cb->args[0] = wp_idx; 501 cb->args[0] = wp_idx;
494 cb->args[1] = if_idx; 502 cb->args[1] = if_idx;
@@ -738,7 +746,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
738 if (info->attrs[NL80211_ATTR_KEY_IDX]) 746 if (info->attrs[NL80211_ATTR_KEY_IDX])
739 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); 747 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
740 748
741 if (key_idx > 3) 749 if (key_idx > 5)
742 return -EINVAL; 750 return -EINVAL;
743 751
744 if (info->attrs[NL80211_ATTR_MAC]) 752 if (info->attrs[NL80211_ATTR_MAC])
@@ -804,30 +812,41 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
804 int err; 812 int err;
805 struct net_device *dev; 813 struct net_device *dev;
806 u8 key_idx; 814 u8 key_idx;
815 int (*func)(struct wiphy *wiphy, struct net_device *netdev,
816 u8 key_index);
807 817
808 if (!info->attrs[NL80211_ATTR_KEY_IDX]) 818 if (!info->attrs[NL80211_ATTR_KEY_IDX])
809 return -EINVAL; 819 return -EINVAL;
810 820
811 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); 821 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
812 822
813 if (key_idx > 3) 823 if (info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]) {
824 if (key_idx < 4 || key_idx > 5)
825 return -EINVAL;
826 } else if (key_idx > 3)
814 return -EINVAL; 827 return -EINVAL;
815 828
816 /* currently only support setting default key */ 829 /* currently only support setting default key */
817 if (!info->attrs[NL80211_ATTR_KEY_DEFAULT]) 830 if (!info->attrs[NL80211_ATTR_KEY_DEFAULT] &&
831 !info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT])
818 return -EINVAL; 832 return -EINVAL;
819 833
820 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 834 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
821 if (err) 835 if (err)
822 return err; 836 return err;
823 837
824 if (!drv->ops->set_default_key) { 838 if (info->attrs[NL80211_ATTR_KEY_DEFAULT])
839 func = drv->ops->set_default_key;
840 else
841 func = drv->ops->set_default_mgmt_key;
842
843 if (!func) {
825 err = -EOPNOTSUPP; 844 err = -EOPNOTSUPP;
826 goto out; 845 goto out;
827 } 846 }
828 847
829 rtnl_lock(); 848 rtnl_lock();
830 err = drv->ops->set_default_key(&drv->wiphy, dev, key_idx); 849 err = func(&drv->wiphy, dev, key_idx);
831 rtnl_unlock(); 850 rtnl_unlock();
832 851
833 out: 852 out:
@@ -863,7 +882,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
863 if (info->attrs[NL80211_ATTR_MAC]) 882 if (info->attrs[NL80211_ATTR_MAC])
864 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 883 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
865 884
866 if (key_idx > 3) 885 if (key_idx > 5)
867 return -EINVAL; 886 return -EINVAL;
868 887
869 /* 888 /*
@@ -894,6 +913,10 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
894 if (params.key_len != 13) 913 if (params.key_len != 13)
895 return -EINVAL; 914 return -EINVAL;
896 break; 915 break;
916 case WLAN_CIPHER_SUITE_AES_CMAC:
917 if (params.key_len != 16)
918 return -EINVAL;
919 break;
897 default: 920 default:
898 return -EINVAL; 921 return -EINVAL;
899 } 922 }
@@ -928,7 +951,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
928 if (info->attrs[NL80211_ATTR_KEY_IDX]) 951 if (info->attrs[NL80211_ATTR_KEY_IDX])
929 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); 952 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
930 953
931 if (key_idx > 3) 954 if (key_idx > 5)
932 return -EINVAL; 955 return -EINVAL;
933 956
934 if (info->attrs[NL80211_ATTR_MAC]) 957 if (info->attrs[NL80211_ATTR_MAC])
@@ -1182,6 +1205,12 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
1182 1205
1183 nla_nest_end(msg, txrate); 1206 nla_nest_end(msg, txrate);
1184 } 1207 }
1208 if (sinfo->filled & STATION_INFO_RX_PACKETS)
1209 NLA_PUT_U32(msg, NL80211_STA_INFO_RX_PACKETS,
1210 sinfo->rx_packets);
1211 if (sinfo->filled & STATION_INFO_TX_PACKETS)
1212 NLA_PUT_U32(msg, NL80211_STA_INFO_TX_PACKETS,
1213 sinfo->tx_packets);
1185 nla_nest_end(msg, sinfoattr); 1214 nla_nest_end(msg, sinfoattr);
1186 1215
1187 return genlmsg_end(msg, hdr); 1216 return genlmsg_end(msg, hdr);
@@ -1876,6 +1905,19 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
1876 int r; 1905 int r;
1877 char *data = NULL; 1906 char *data = NULL;
1878 1907
1908 /*
1909 * You should only get this when cfg80211 hasn't yet initialized
1910 * completely when built-in to the kernel right between the time
1911 * window between nl80211_init() and regulatory_init(), if that is
1912 * even possible.
1913 */
1914 mutex_lock(&cfg80211_mutex);
1915 if (unlikely(!cfg80211_regdomain)) {
1916 mutex_unlock(&cfg80211_mutex);
1917 return -EINPROGRESS;
1918 }
1919 mutex_unlock(&cfg80211_mutex);
1920
1879 if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) 1921 if (!info->attrs[NL80211_ATTR_REG_ALPHA2])
1880 return -EINVAL; 1922 return -EINVAL;
1881 1923
@@ -1886,9 +1928,9 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
1886 if (is_world_regdom(data)) 1928 if (is_world_regdom(data))
1887 return -EINVAL; 1929 return -EINVAL;
1888#endif 1930#endif
1889 mutex_lock(&cfg80211_drv_mutex); 1931
1890 r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, 0, ENVIRON_ANY); 1932 r = regulatory_hint_user(data);
1891 mutex_unlock(&cfg80211_drv_mutex); 1933
1892 return r; 1934 return r;
1893} 1935}
1894 1936
@@ -2069,6 +2111,81 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
2069 2111
2070#undef FILL_IN_MESH_PARAM_IF_SET 2112#undef FILL_IN_MESH_PARAM_IF_SET
2071 2113
2114static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
2115{
2116 struct sk_buff *msg;
2117 void *hdr = NULL;
2118 struct nlattr *nl_reg_rules;
2119 unsigned int i;
2120 int err = -EINVAL;
2121
2122 mutex_lock(&cfg80211_mutex);
2123
2124 if (!cfg80211_regdomain)
2125 goto out;
2126
2127 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
2128 if (!msg) {
2129 err = -ENOBUFS;
2130 goto out;
2131 }
2132
2133 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
2134 NL80211_CMD_GET_REG);
2135 if (!hdr)
2136 goto nla_put_failure;
2137
2138 NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2,
2139 cfg80211_regdomain->alpha2);
2140
2141 nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES);
2142 if (!nl_reg_rules)
2143 goto nla_put_failure;
2144
2145 for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) {
2146 struct nlattr *nl_reg_rule;
2147 const struct ieee80211_reg_rule *reg_rule;
2148 const struct ieee80211_freq_range *freq_range;
2149 const struct ieee80211_power_rule *power_rule;
2150
2151 reg_rule = &cfg80211_regdomain->reg_rules[i];
2152 freq_range = &reg_rule->freq_range;
2153 power_rule = &reg_rule->power_rule;
2154
2155 nl_reg_rule = nla_nest_start(msg, i);
2156 if (!nl_reg_rule)
2157 goto nla_put_failure;
2158
2159 NLA_PUT_U32(msg, NL80211_ATTR_REG_RULE_FLAGS,
2160 reg_rule->flags);
2161 NLA_PUT_U32(msg, NL80211_ATTR_FREQ_RANGE_START,
2162 freq_range->start_freq_khz);
2163 NLA_PUT_U32(msg, NL80211_ATTR_FREQ_RANGE_END,
2164 freq_range->end_freq_khz);
2165 NLA_PUT_U32(msg, NL80211_ATTR_FREQ_RANGE_MAX_BW,
2166 freq_range->max_bandwidth_khz);
2167 NLA_PUT_U32(msg, NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN,
2168 power_rule->max_antenna_gain);
2169 NLA_PUT_U32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP,
2170 power_rule->max_eirp);
2171
2172 nla_nest_end(msg, nl_reg_rule);
2173 }
2174
2175 nla_nest_end(msg, nl_reg_rules);
2176
2177 genlmsg_end(msg, hdr);
2178 err = genlmsg_unicast(msg, info->snd_pid);
2179 goto out;
2180
2181nla_put_failure:
2182 genlmsg_cancel(msg, hdr);
2183 err = -EMSGSIZE;
2184out:
2185 mutex_unlock(&cfg80211_mutex);
2186 return err;
2187}
2188
2072static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) 2189static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
2073{ 2190{
2074 struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; 2191 struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1];
@@ -2124,9 +2241,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
2124 2241
2125 BUG_ON(rule_idx != num_rules); 2242 BUG_ON(rule_idx != num_rules);
2126 2243
2127 mutex_lock(&cfg80211_drv_mutex); 2244 mutex_lock(&cfg80211_mutex);
2128 r = set_regdom(rd); 2245 r = set_regdom(rd);
2129 mutex_unlock(&cfg80211_drv_mutex); 2246 mutex_unlock(&cfg80211_mutex);
2130 return r; 2247 return r;
2131 2248
2132 bad_reg: 2249 bad_reg:
@@ -2134,6 +2251,302 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
2134 return -EINVAL; 2251 return -EINVAL;
2135} 2252}
2136 2253
2254static int nl80211_set_mgmt_extra_ie(struct sk_buff *skb,
2255 struct genl_info *info)
2256{
2257 struct cfg80211_registered_device *drv;
2258 int err;
2259 struct net_device *dev;
2260 struct mgmt_extra_ie_params params;
2261
2262 memset(&params, 0, sizeof(params));
2263
2264 if (!info->attrs[NL80211_ATTR_MGMT_SUBTYPE])
2265 return -EINVAL;
2266 params.subtype = nla_get_u8(info->attrs[NL80211_ATTR_MGMT_SUBTYPE]);
2267 if (params.subtype > 15)
2268 return -EINVAL; /* FC Subtype field is 4 bits (0..15) */
2269
2270 if (info->attrs[NL80211_ATTR_IE]) {
2271 params.ies = nla_data(info->attrs[NL80211_ATTR_IE]);
2272 params.ies_len = nla_len(info->attrs[NL80211_ATTR_IE]);
2273 }
2274
2275 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
2276 if (err)
2277 return err;
2278
2279 if (drv->ops->set_mgmt_extra_ie) {
2280 rtnl_lock();
2281 err = drv->ops->set_mgmt_extra_ie(&drv->wiphy, dev, &params);
2282 rtnl_unlock();
2283 } else
2284 err = -EOPNOTSUPP;
2285
2286 cfg80211_put_dev(drv);
2287 dev_put(dev);
2288 return err;
2289}
2290
2291static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
2292{
2293 struct cfg80211_registered_device *drv;
2294 struct net_device *dev;
2295 struct cfg80211_scan_request *request;
2296 struct cfg80211_ssid *ssid;
2297 struct ieee80211_channel *channel;
2298 struct nlattr *attr;
2299 struct wiphy *wiphy;
2300 int err, tmp, n_ssids = 0, n_channels = 0, i;
2301 enum ieee80211_band band;
2302 size_t ie_len;
2303
2304 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
2305 if (err)
2306 return err;
2307
2308 wiphy = &drv->wiphy;
2309
2310 if (!drv->ops->scan) {
2311 err = -EOPNOTSUPP;
2312 goto out;
2313 }
2314
2315 rtnl_lock();
2316
2317 if (drv->scan_req) {
2318 err = -EBUSY;
2319 goto out_unlock;
2320 }
2321
2322 if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
2323 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp)
2324 n_channels++;
2325 if (!n_channels) {
2326 err = -EINVAL;
2327 goto out_unlock;
2328 }
2329 } else {
2330 for (band = 0; band < IEEE80211_NUM_BANDS; band++)
2331 if (wiphy->bands[band])
2332 n_channels += wiphy->bands[band]->n_channels;
2333 }
2334
2335 if (info->attrs[NL80211_ATTR_SCAN_SSIDS])
2336 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp)
2337 n_ssids++;
2338
2339 if (n_ssids > wiphy->max_scan_ssids) {
2340 err = -EINVAL;
2341 goto out_unlock;
2342 }
2343
2344 if (info->attrs[NL80211_ATTR_IE])
2345 ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
2346 else
2347 ie_len = 0;
2348
2349 request = kzalloc(sizeof(*request)
2350 + sizeof(*ssid) * n_ssids
2351 + sizeof(channel) * n_channels
2352 + ie_len, GFP_KERNEL);
2353 if (!request) {
2354 err = -ENOMEM;
2355 goto out_unlock;
2356 }
2357
2358 request->channels = (void *)((char *)request + sizeof(*request));
2359 request->n_channels = n_channels;
2360 if (n_ssids)
2361 request->ssids = (void *)(request->channels + n_channels);
2362 request->n_ssids = n_ssids;
2363 if (ie_len) {
2364 if (request->ssids)
2365 request->ie = (void *)(request->ssids + n_ssids);
2366 else
2367 request->ie = (void *)(request->channels + n_channels);
2368 }
2369
2370 if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
2371 /* user specified, bail out if channel not found */
2372 request->n_channels = n_channels;
2373 i = 0;
2374 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp) {
2375 request->channels[i] = ieee80211_get_channel(wiphy, nla_get_u32(attr));
2376 if (!request->channels[i]) {
2377 err = -EINVAL;
2378 goto out_free;
2379 }
2380 i++;
2381 }
2382 } else {
2383 /* all channels */
2384 i = 0;
2385 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
2386 int j;
2387 if (!wiphy->bands[band])
2388 continue;
2389 for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
2390 request->channels[i] = &wiphy->bands[band]->channels[j];
2391 i++;
2392 }
2393 }
2394 }
2395
2396 i = 0;
2397 if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) {
2398 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) {
2399 if (request->ssids[i].ssid_len > IEEE80211_MAX_SSID_LEN) {
2400 err = -EINVAL;
2401 goto out_free;
2402 }
2403 memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr));
2404 request->ssids[i].ssid_len = nla_len(attr);
2405 i++;
2406 }
2407 }
2408
2409 if (info->attrs[NL80211_ATTR_IE]) {
2410 request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
2411 memcpy(request->ie, nla_data(info->attrs[NL80211_ATTR_IE]),
2412 request->ie_len);
2413 }
2414
2415 request->ifidx = dev->ifindex;
2416 request->wiphy = &drv->wiphy;
2417
2418 drv->scan_req = request;
2419 err = drv->ops->scan(&drv->wiphy, dev, request);
2420
2421 out_free:
2422 if (err) {
2423 drv->scan_req = NULL;
2424 kfree(request);
2425 }
2426 out_unlock:
2427 rtnl_unlock();
2428 out:
2429 cfg80211_put_dev(drv);
2430 dev_put(dev);
2431 return err;
2432}
2433
2434static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
2435 struct cfg80211_registered_device *rdev,
2436 struct net_device *dev,
2437 struct cfg80211_bss *res)
2438{
2439 void *hdr;
2440 struct nlattr *bss;
2441
2442 hdr = nl80211hdr_put(msg, pid, seq, flags,
2443 NL80211_CMD_NEW_SCAN_RESULTS);
2444 if (!hdr)
2445 return -1;
2446
2447 NLA_PUT_U32(msg, NL80211_ATTR_SCAN_GENERATION,
2448 rdev->bss_generation);
2449 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
2450
2451 bss = nla_nest_start(msg, NL80211_ATTR_BSS);
2452 if (!bss)
2453 goto nla_put_failure;
2454 if (!is_zero_ether_addr(res->bssid))
2455 NLA_PUT(msg, NL80211_BSS_BSSID, ETH_ALEN, res->bssid);
2456 if (res->information_elements && res->len_information_elements)
2457 NLA_PUT(msg, NL80211_BSS_INFORMATION_ELEMENTS,
2458 res->len_information_elements,
2459 res->information_elements);
2460 if (res->tsf)
2461 NLA_PUT_U64(msg, NL80211_BSS_TSF, res->tsf);
2462 if (res->beacon_interval)
2463 NLA_PUT_U16(msg, NL80211_BSS_BEACON_INTERVAL, res->beacon_interval);
2464 NLA_PUT_U16(msg, NL80211_BSS_CAPABILITY, res->capability);
2465 NLA_PUT_U32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq);
2466
2467 switch (rdev->wiphy.signal_type) {
2468 case CFG80211_SIGNAL_TYPE_MBM:
2469 NLA_PUT_U32(msg, NL80211_BSS_SIGNAL_MBM, res->signal);
2470 break;
2471 case CFG80211_SIGNAL_TYPE_UNSPEC:
2472 NLA_PUT_U8(msg, NL80211_BSS_SIGNAL_UNSPEC, res->signal);
2473 break;
2474 default:
2475 break;
2476 }
2477
2478 nla_nest_end(msg, bss);
2479
2480 return genlmsg_end(msg, hdr);
2481
2482 nla_put_failure:
2483 genlmsg_cancel(msg, hdr);
2484 return -EMSGSIZE;
2485}
2486
2487static int nl80211_dump_scan(struct sk_buff *skb,
2488 struct netlink_callback *cb)
2489{
2490 struct cfg80211_registered_device *dev;
2491 struct net_device *netdev;
2492 struct cfg80211_internal_bss *scan;
2493 int ifidx = cb->args[0];
2494 int start = cb->args[1], idx = 0;
2495 int err;
2496
2497 if (!ifidx) {
2498 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
2499 nl80211_fam.attrbuf, nl80211_fam.maxattr,
2500 nl80211_policy);
2501 if (err)
2502 return err;
2503
2504 if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX])
2505 return -EINVAL;
2506
2507 ifidx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]);
2508 if (!ifidx)
2509 return -EINVAL;
2510 cb->args[0] = ifidx;
2511 }
2512
2513 netdev = dev_get_by_index(&init_net, ifidx);
2514 if (!netdev)
2515 return -ENODEV;
2516
2517 dev = cfg80211_get_dev_from_ifindex(ifidx);
2518 if (IS_ERR(dev)) {
2519 err = PTR_ERR(dev);
2520 goto out_put_netdev;
2521 }
2522
2523 spin_lock_bh(&dev->bss_lock);
2524 cfg80211_bss_expire(dev);
2525
2526 list_for_each_entry(scan, &dev->bss_list, list) {
2527 if (++idx <= start)
2528 continue;
2529 if (nl80211_send_bss(skb,
2530 NETLINK_CB(cb->skb).pid,
2531 cb->nlh->nlmsg_seq, NLM_F_MULTI,
2532 dev, netdev, &scan->pub) < 0) {
2533 idx--;
2534 goto out;
2535 }
2536 }
2537
2538 out:
2539 spin_unlock_bh(&dev->bss_lock);
2540
2541 cb->args[1] = idx;
2542 err = skb->len;
2543 cfg80211_put_dev(dev);
2544 out_put_netdev:
2545 dev_put(netdev);
2546
2547 return err;
2548}
2549
2137static struct genl_ops nl80211_ops[] = { 2550static struct genl_ops nl80211_ops[] = {
2138 { 2551 {
2139 .cmd = NL80211_CMD_GET_WIPHY, 2552 .cmd = NL80211_CMD_GET_WIPHY,
@@ -2220,7 +2633,6 @@ static struct genl_ops nl80211_ops[] = {
2220 .doit = nl80211_get_station, 2633 .doit = nl80211_get_station,
2221 .dumpit = nl80211_dump_station, 2634 .dumpit = nl80211_dump_station,
2222 .policy = nl80211_policy, 2635 .policy = nl80211_policy,
2223 .flags = GENL_ADMIN_PERM,
2224 }, 2636 },
2225 { 2637 {
2226 .cmd = NL80211_CMD_SET_STATION, 2638 .cmd = NL80211_CMD_SET_STATION,
@@ -2272,6 +2684,12 @@ static struct genl_ops nl80211_ops[] = {
2272 .flags = GENL_ADMIN_PERM, 2684 .flags = GENL_ADMIN_PERM,
2273 }, 2685 },
2274 { 2686 {
2687 .cmd = NL80211_CMD_GET_REG,
2688 .doit = nl80211_get_reg,
2689 .policy = nl80211_policy,
2690 /* can be retrieved by unprivileged users */
2691 },
2692 {
2275 .cmd = NL80211_CMD_SET_REG, 2693 .cmd = NL80211_CMD_SET_REG,
2276 .doit = nl80211_set_reg, 2694 .doit = nl80211_set_reg,
2277 .policy = nl80211_policy, 2695 .policy = nl80211_policy,
@@ -2295,12 +2713,32 @@ static struct genl_ops nl80211_ops[] = {
2295 .policy = nl80211_policy, 2713 .policy = nl80211_policy,
2296 .flags = GENL_ADMIN_PERM, 2714 .flags = GENL_ADMIN_PERM,
2297 }, 2715 },
2716 {
2717 .cmd = NL80211_CMD_SET_MGMT_EXTRA_IE,
2718 .doit = nl80211_set_mgmt_extra_ie,
2719 .policy = nl80211_policy,
2720 .flags = GENL_ADMIN_PERM,
2721 },
2722 {
2723 .cmd = NL80211_CMD_TRIGGER_SCAN,
2724 .doit = nl80211_trigger_scan,
2725 .policy = nl80211_policy,
2726 .flags = GENL_ADMIN_PERM,
2727 },
2728 {
2729 .cmd = NL80211_CMD_GET_SCAN,
2730 .policy = nl80211_policy,
2731 .dumpit = nl80211_dump_scan,
2732 },
2298}; 2733};
2299 2734
2300/* multicast groups */ 2735/* multicast groups */
2301static struct genl_multicast_group nl80211_config_mcgrp = { 2736static struct genl_multicast_group nl80211_config_mcgrp = {
2302 .name = "config", 2737 .name = "config",
2303}; 2738};
2739static struct genl_multicast_group nl80211_scan_mcgrp = {
2740 .name = "scan",
2741};
2304 2742
2305/* notification functions */ 2743/* notification functions */
2306 2744
@@ -2320,6 +2758,66 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
2320 genlmsg_multicast(msg, 0, nl80211_config_mcgrp.id, GFP_KERNEL); 2758 genlmsg_multicast(msg, 0, nl80211_config_mcgrp.id, GFP_KERNEL);
2321} 2759}
2322 2760
2761static int nl80211_send_scan_donemsg(struct sk_buff *msg,
2762 struct cfg80211_registered_device *rdev,
2763 struct net_device *netdev,
2764 u32 pid, u32 seq, int flags,
2765 u32 cmd)
2766{
2767 void *hdr;
2768
2769 hdr = nl80211hdr_put(msg, pid, seq, flags, cmd);
2770 if (!hdr)
2771 return -1;
2772
2773 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
2774 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
2775
2776 /* XXX: we should probably bounce back the request? */
2777
2778 return genlmsg_end(msg, hdr);
2779
2780 nla_put_failure:
2781 genlmsg_cancel(msg, hdr);
2782 return -EMSGSIZE;
2783}
2784
2785void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
2786 struct net_device *netdev)
2787{
2788 struct sk_buff *msg;
2789
2790 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
2791 if (!msg)
2792 return;
2793
2794 if (nl80211_send_scan_donemsg(msg, rdev, netdev, 0, 0, 0,
2795 NL80211_CMD_NEW_SCAN_RESULTS) < 0) {
2796 nlmsg_free(msg);
2797 return;
2798 }
2799
2800 genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL);
2801}
2802
2803void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
2804 struct net_device *netdev)
2805{
2806 struct sk_buff *msg;
2807
2808 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
2809 if (!msg)
2810 return;
2811
2812 if (nl80211_send_scan_donemsg(msg, rdev, netdev, 0, 0, 0,
2813 NL80211_CMD_SCAN_ABORTED) < 0) {
2814 nlmsg_free(msg);
2815 return;
2816 }
2817
2818 genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL);
2819}
2820
2323/* initialisation/exit functions */ 2821/* initialisation/exit functions */
2324 2822
2325int nl80211_init(void) 2823int nl80211_init(void)
@@ -2340,6 +2838,10 @@ int nl80211_init(void)
2340 if (err) 2838 if (err)
2341 goto err_out; 2839 goto err_out;
2342 2840
2841 err = genl_register_mc_group(&nl80211_fam, &nl80211_scan_mcgrp);
2842 if (err)
2843 goto err_out;
2844
2343 return 0; 2845 return 0;
2344 err_out: 2846 err_out:
2345 genl_unregister_family(&nl80211_fam); 2847 genl_unregister_family(&nl80211_fam);
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index f3ea5c029aee..69787b621365 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -7,6 +7,10 @@
7extern int nl80211_init(void); 7extern int nl80211_init(void);
8extern void nl80211_exit(void); 8extern void nl80211_exit(void);
9extern void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev); 9extern void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev);
10extern void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
11 struct net_device *netdev);
12extern void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
13 struct net_device *netdev);
10#else 14#else
11static inline int nl80211_init(void) 15static inline int nl80211_init(void)
12{ 16{
@@ -19,6 +23,14 @@ static inline void nl80211_notify_dev_rename(
19 struct cfg80211_registered_device *rdev) 23 struct cfg80211_registered_device *rdev)
20{ 24{
21} 25}
26static inline void
27nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
28 struct net_device *netdev)
29{}
30static inline void nl80211_send_scan_aborted(
31 struct cfg80211_registered_device *rdev,
32 struct net_device *netdev)
33{}
22#endif /* CONFIG_NL80211 */ 34#endif /* CONFIG_NL80211 */
23 35
24#endif /* __NET_WIRELESS_NL80211_H */ 36#endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 85c9034c59b2..ce66bfdf57ec 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -42,38 +42,6 @@
42#include "core.h" 42#include "core.h"
43#include "reg.h" 43#include "reg.h"
44 44
45/**
46 * struct regulatory_request - receipt of last regulatory request
47 *
48 * @wiphy: this is set if this request's initiator is
49 * %REGDOM_SET_BY_COUNTRY_IE or %REGDOM_SET_BY_DRIVER. This
50 * can be used by the wireless core to deal with conflicts
51 * and potentially inform users of which devices specifically
52 * cased the conflicts.
53 * @initiator: indicates who sent this request, could be any of
54 * of those set in reg_set_by, %REGDOM_SET_BY_*
55 * @alpha2: the ISO / IEC 3166 alpha2 country code of the requested
56 * regulatory domain. We have a few special codes:
57 * 00 - World regulatory domain
58 * 99 - built by driver but a specific alpha2 cannot be determined
59 * 98 - result of an intersection between two regulatory domains
60 * @intersect: indicates whether the wireless core should intersect
61 * the requested regulatory domain with the presently set regulatory
62 * domain.
63 * @country_ie_checksum: checksum of the last processed and accepted
64 * country IE
65 * @country_ie_env: lets us know if the AP is telling us we are outdoor,
66 * indoor, or if it doesn't matter
67 */
68struct regulatory_request {
69 struct wiphy *wiphy;
70 enum reg_set_by initiator;
71 char alpha2[2];
72 bool intersect;
73 u32 country_ie_checksum;
74 enum environment_cap country_ie_env;
75};
76
77/* Receipt of information from last regulatory request */ 45/* Receipt of information from last regulatory request */
78static struct regulatory_request *last_request; 46static struct regulatory_request *last_request;
79 47
@@ -86,22 +54,52 @@ static u32 supported_bandwidths[] = {
86 MHZ_TO_KHZ(20), 54 MHZ_TO_KHZ(20),
87}; 55};
88 56
89/* Central wireless core regulatory domains, we only need two, 57/*
58 * Central wireless core regulatory domains, we only need two,
90 * the current one and a world regulatory domain in case we have no 59 * the current one and a world regulatory domain in case we have no
91 * information to give us an alpha2 */ 60 * information to give us an alpha2
92static const struct ieee80211_regdomain *cfg80211_regdomain; 61 */
62const struct ieee80211_regdomain *cfg80211_regdomain;
93 63
94/* We use this as a place for the rd structure built from the 64/*
65 * We use this as a place for the rd structure built from the
95 * last parsed country IE to rest until CRDA gets back to us with 66 * last parsed country IE to rest until CRDA gets back to us with
96 * what it thinks should apply for the same country */ 67 * what it thinks should apply for the same country
68 */
97static const struct ieee80211_regdomain *country_ie_regdomain; 69static const struct ieee80211_regdomain *country_ie_regdomain;
98 70
71/* Used to queue up regulatory hints */
72static LIST_HEAD(reg_requests_list);
73static spinlock_t reg_requests_lock;
74
75/* Used to queue up beacon hints for review */
76static LIST_HEAD(reg_pending_beacons);
77static spinlock_t reg_pending_beacons_lock;
78
79/* Used to keep track of processed beacon hints */
80static LIST_HEAD(reg_beacon_list);
81
82struct reg_beacon {
83 struct list_head list;
84 struct ieee80211_channel chan;
85};
86
99/* We keep a static world regulatory domain in case of the absence of CRDA */ 87/* We keep a static world regulatory domain in case of the absence of CRDA */
100static const struct ieee80211_regdomain world_regdom = { 88static const struct ieee80211_regdomain world_regdom = {
101 .n_reg_rules = 1, 89 .n_reg_rules = 3,
102 .alpha2 = "00", 90 .alpha2 = "00",
103 .reg_rules = { 91 .reg_rules = {
104 REG_RULE(2412-10, 2462+10, 40, 6, 20, 92 /* IEEE 802.11b/g, channels 1..11 */
93 REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
94 /* IEEE 802.11a, channel 36..48 */
95 REG_RULE(5180-10, 5240+10, 40, 6, 23,
96 NL80211_RRF_PASSIVE_SCAN |
97 NL80211_RRF_NO_IBSS),
98
99 /* NB: 5260 MHz - 5700 MHz requies DFS */
100
101 /* IEEE 802.11a, channel 149..165 */
102 REG_RULE(5745-10, 5825+10, 40, 6, 23,
105 NL80211_RRF_PASSIVE_SCAN | 103 NL80211_RRF_PASSIVE_SCAN |
106 NL80211_RRF_NO_IBSS), 104 NL80211_RRF_NO_IBSS),
107 } 105 }
@@ -115,9 +113,11 @@ static char *ieee80211_regdom = "US";
115module_param(ieee80211_regdom, charp, 0444); 113module_param(ieee80211_regdom, charp, 0444);
116MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); 114MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
117 115
118/* We assume 40 MHz bandwidth for the old regulatory work. 116/*
117 * We assume 40 MHz bandwidth for the old regulatory work.
119 * We make emphasis we are using the exact same frequencies 118 * We make emphasis we are using the exact same frequencies
120 * as before */ 119 * as before
120 */
121 121
122static const struct ieee80211_regdomain us_regdom = { 122static const struct ieee80211_regdomain us_regdom = {
123 .n_reg_rules = 6, 123 .n_reg_rules = 6,
@@ -156,8 +156,10 @@ static const struct ieee80211_regdomain jp_regdom = {
156 156
157static const struct ieee80211_regdomain eu_regdom = { 157static const struct ieee80211_regdomain eu_regdom = {
158 .n_reg_rules = 6, 158 .n_reg_rules = 6,
159 /* This alpha2 is bogus, we leave it here just for stupid 159 /*
160 * backward compatibility */ 160 * This alpha2 is bogus, we leave it here just for stupid
161 * backward compatibility
162 */
161 .alpha2 = "EU", 163 .alpha2 = "EU",
162 .reg_rules = { 164 .reg_rules = {
163 /* IEEE 802.11b/g, channels 1..13 */ 165 /* IEEE 802.11b/g, channels 1..13 */
@@ -226,8 +228,10 @@ static void reset_regdomains(void)
226 cfg80211_regdomain = NULL; 228 cfg80211_regdomain = NULL;
227} 229}
228 230
229/* Dynamic world regulatory domain requested by the wireless 231/*
230 * core upon initialization */ 232 * Dynamic world regulatory domain requested by the wireless
233 * core upon initialization
234 */
231static void update_world_regdomain(const struct ieee80211_regdomain *rd) 235static void update_world_regdomain(const struct ieee80211_regdomain *rd)
232{ 236{
233 BUG_ON(!last_request); 237 BUG_ON(!last_request);
@@ -268,8 +272,10 @@ static bool is_unknown_alpha2(const char *alpha2)
268{ 272{
269 if (!alpha2) 273 if (!alpha2)
270 return false; 274 return false;
271 /* Special case where regulatory domain was built by driver 275 /*
272 * but a specific alpha2 cannot be determined */ 276 * Special case where regulatory domain was built by driver
277 * but a specific alpha2 cannot be determined
278 */
273 if (alpha2[0] == '9' && alpha2[1] == '9') 279 if (alpha2[0] == '9' && alpha2[1] == '9')
274 return true; 280 return true;
275 return false; 281 return false;
@@ -279,9 +285,11 @@ static bool is_intersected_alpha2(const char *alpha2)
279{ 285{
280 if (!alpha2) 286 if (!alpha2)
281 return false; 287 return false;
282 /* Special case where regulatory domain is the 288 /*
289 * Special case where regulatory domain is the
283 * result of an intersection between two regulatory domain 290 * result of an intersection between two regulatory domain
284 * structures */ 291 * structures
292 */
285 if (alpha2[0] == '9' && alpha2[1] == '8') 293 if (alpha2[0] == '9' && alpha2[1] == '8')
286 return true; 294 return true;
287 return false; 295 return false;
@@ -306,8 +314,10 @@ static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y)
306 return false; 314 return false;
307} 315}
308 316
309static bool regdom_changed(const char *alpha2) 317static bool regdom_changes(const char *alpha2)
310{ 318{
319 assert_cfg80211_lock();
320
311 if (!cfg80211_regdomain) 321 if (!cfg80211_regdomain)
312 return true; 322 return true;
313 if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) 323 if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2))
@@ -334,8 +344,10 @@ static bool country_ie_integrity_changes(u32 checksum)
334 return false; 344 return false;
335} 345}
336 346
337/* This lets us keep regulatory code which is updated on a regulatory 347/*
338 * basis in userspace. */ 348 * This lets us keep regulatory code which is updated on a regulatory
349 * basis in userspace.
350 */
339static int call_crda(const char *alpha2) 351static int call_crda(const char *alpha2)
340{ 352{
341 char country_env[9 + 2] = "COUNTRY="; 353 char country_env[9 + 2] = "COUNTRY=";
@@ -446,10 +458,12 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
446#undef ONE_GHZ_IN_KHZ 458#undef ONE_GHZ_IN_KHZ
447} 459}
448 460
449/* Converts a country IE to a regulatory domain. A regulatory domain 461/*
462 * Converts a country IE to a regulatory domain. A regulatory domain
450 * structure has a lot of information which the IE doesn't yet have, 463 * structure has a lot of information which the IE doesn't yet have,
451 * so for the other values we use upper max values as we will intersect 464 * so for the other values we use upper max values as we will intersect
452 * with our userspace regulatory agent to get lower bounds. */ 465 * with our userspace regulatory agent to get lower bounds.
466 */
453static struct ieee80211_regdomain *country_ie_2_rd( 467static struct ieee80211_regdomain *country_ie_2_rd(
454 u8 *country_ie, 468 u8 *country_ie,
455 u8 country_ie_len, 469 u8 country_ie_len,
@@ -494,9 +508,11 @@ static struct ieee80211_regdomain *country_ie_2_rd(
494 508
495 *checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8); 509 *checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8);
496 510
497 /* We need to build a reg rule for each triplet, but first we must 511 /*
512 * We need to build a reg rule for each triplet, but first we must
498 * calculate the number of reg rules we will need. We will need one 513 * calculate the number of reg rules we will need. We will need one
499 * for each channel subband */ 514 * for each channel subband
515 */
500 while (country_ie_len >= 3) { 516 while (country_ie_len >= 3) {
501 int end_channel = 0; 517 int end_channel = 0;
502 struct ieee80211_country_ie_triplet *triplet = 518 struct ieee80211_country_ie_triplet *triplet =
@@ -534,9 +550,11 @@ static struct ieee80211_regdomain *country_ie_2_rd(
534 if (cur_sub_max_channel < cur_channel) 550 if (cur_sub_max_channel < cur_channel)
535 return NULL; 551 return NULL;
536 552
537 /* Do not allow overlapping channels. Also channels 553 /*
554 * Do not allow overlapping channels. Also channels
538 * passed in each subband must be monotonically 555 * passed in each subband must be monotonically
539 * increasing */ 556 * increasing
557 */
540 if (last_sub_max_channel) { 558 if (last_sub_max_channel) {
541 if (cur_channel <= last_sub_max_channel) 559 if (cur_channel <= last_sub_max_channel)
542 return NULL; 560 return NULL;
@@ -544,10 +562,12 @@ static struct ieee80211_regdomain *country_ie_2_rd(
544 return NULL; 562 return NULL;
545 } 563 }
546 564
547 /* When dot11RegulatoryClassesRequired is supported 565 /*
566 * When dot11RegulatoryClassesRequired is supported
548 * we can throw ext triplets as part of this soup, 567 * we can throw ext triplets as part of this soup,
549 * for now we don't care when those change as we 568 * for now we don't care when those change as we
550 * don't support them */ 569 * don't support them
570 */
551 *checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) | 571 *checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) |
552 ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) | 572 ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) |
553 ((triplet->chans.max_power ^ cur_sub_max_channel) << 24); 573 ((triplet->chans.max_power ^ cur_sub_max_channel) << 24);
@@ -558,8 +578,10 @@ static struct ieee80211_regdomain *country_ie_2_rd(
558 country_ie_len -= 3; 578 country_ie_len -= 3;
559 num_rules++; 579 num_rules++;
560 580
561 /* Note: this is not a IEEE requirement but 581 /*
562 * simply a memory requirement */ 582 * Note: this is not a IEEE requirement but
583 * simply a memory requirement
584 */
563 if (num_rules > NL80211_MAX_SUPP_REG_RULES) 585 if (num_rules > NL80211_MAX_SUPP_REG_RULES)
564 return NULL; 586 return NULL;
565 } 587 }
@@ -587,8 +609,10 @@ static struct ieee80211_regdomain *country_ie_2_rd(
587 struct ieee80211_freq_range *freq_range = NULL; 609 struct ieee80211_freq_range *freq_range = NULL;
588 struct ieee80211_power_rule *power_rule = NULL; 610 struct ieee80211_power_rule *power_rule = NULL;
589 611
590 /* Must parse if dot11RegulatoryClassesRequired is true, 612 /*
591 * we don't support this yet */ 613 * Must parse if dot11RegulatoryClassesRequired is true,
614 * we don't support this yet
615 */
592 if (triplet->ext.reg_extension_id >= 616 if (triplet->ext.reg_extension_id >=
593 IEEE80211_COUNTRY_EXTENSION_ID) { 617 IEEE80211_COUNTRY_EXTENSION_ID) {
594 country_ie += 3; 618 country_ie += 3;
@@ -610,10 +634,12 @@ static struct ieee80211_regdomain *country_ie_2_rd(
610 end_channel = triplet->chans.first_channel + 634 end_channel = triplet->chans.first_channel +
611 (4 * (triplet->chans.num_channels - 1)); 635 (4 * (triplet->chans.num_channels - 1));
612 636
613 /* The +10 is since the regulatory domain expects 637 /*
638 * The +10 is since the regulatory domain expects
614 * the actual band edge, not the center of freq for 639 * the actual band edge, not the center of freq for
615 * its start and end freqs, assuming 20 MHz bandwidth on 640 * its start and end freqs, assuming 20 MHz bandwidth on
616 * the channels passed */ 641 * the channels passed
642 */
617 freq_range->start_freq_khz = 643 freq_range->start_freq_khz =
618 MHZ_TO_KHZ(ieee80211_channel_to_frequency( 644 MHZ_TO_KHZ(ieee80211_channel_to_frequency(
619 triplet->chans.first_channel) - 10); 645 triplet->chans.first_channel) - 10);
@@ -621,9 +647,11 @@ static struct ieee80211_regdomain *country_ie_2_rd(
621 MHZ_TO_KHZ(ieee80211_channel_to_frequency( 647 MHZ_TO_KHZ(ieee80211_channel_to_frequency(
622 end_channel) + 10); 648 end_channel) + 10);
623 649
624 /* Large arbitrary values, we intersect later */ 650 /*
625 /* Increment this if we ever support >= 40 MHz channels 651 * These are large arbitrary values we use to intersect later.
626 * in IEEE 802.11 */ 652 * Increment this if we ever support >= 40 MHz channels
653 * in IEEE 802.11
654 */
627 freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40); 655 freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40);
628 power_rule->max_antenna_gain = DBI_TO_MBI(100); 656 power_rule->max_antenna_gain = DBI_TO_MBI(100);
629 power_rule->max_eirp = DBM_TO_MBM(100); 657 power_rule->max_eirp = DBM_TO_MBM(100);
@@ -639,8 +667,10 @@ static struct ieee80211_regdomain *country_ie_2_rd(
639} 667}
640 668
641 669
642/* Helper for regdom_intersect(), this does the real 670/*
643 * mathematical intersection fun */ 671 * Helper for regdom_intersect(), this does the real
672 * mathematical intersection fun
673 */
644static int reg_rules_intersect( 674static int reg_rules_intersect(
645 const struct ieee80211_reg_rule *rule1, 675 const struct ieee80211_reg_rule *rule1,
646 const struct ieee80211_reg_rule *rule2, 676 const struct ieee80211_reg_rule *rule2,
@@ -718,11 +748,13 @@ static struct ieee80211_regdomain *regdom_intersect(
718 if (!rd1 || !rd2) 748 if (!rd1 || !rd2)
719 return NULL; 749 return NULL;
720 750
721 /* First we get a count of the rules we'll need, then we actually 751 /*
752 * First we get a count of the rules we'll need, then we actually
722 * build them. This is to so we can malloc() and free() a 753 * build them. This is to so we can malloc() and free() a
723 * regdomain once. The reason we use reg_rules_intersect() here 754 * regdomain once. The reason we use reg_rules_intersect() here
724 * is it will return -EINVAL if the rule computed makes no sense. 755 * is it will return -EINVAL if the rule computed makes no sense.
725 * All rules that do check out OK are valid. */ 756 * All rules that do check out OK are valid.
757 */
726 758
727 for (x = 0; x < rd1->n_reg_rules; x++) { 759 for (x = 0; x < rd1->n_reg_rules; x++) {
728 rule1 = &rd1->reg_rules[x]; 760 rule1 = &rd1->reg_rules[x];
@@ -750,14 +782,18 @@ static struct ieee80211_regdomain *regdom_intersect(
750 rule1 = &rd1->reg_rules[x]; 782 rule1 = &rd1->reg_rules[x];
751 for (y = 0; y < rd2->n_reg_rules; y++) { 783 for (y = 0; y < rd2->n_reg_rules; y++) {
752 rule2 = &rd2->reg_rules[y]; 784 rule2 = &rd2->reg_rules[y];
753 /* This time around instead of using the stack lets 785 /*
786 * This time around instead of using the stack lets
754 * write to the target rule directly saving ourselves 787 * write to the target rule directly saving ourselves
755 * a memcpy() */ 788 * a memcpy()
789 */
756 intersected_rule = &rd->reg_rules[rule_idx]; 790 intersected_rule = &rd->reg_rules[rule_idx];
757 r = reg_rules_intersect(rule1, rule2, 791 r = reg_rules_intersect(rule1, rule2,
758 intersected_rule); 792 intersected_rule);
759 /* No need to memset here the intersected rule here as 793 /*
760 * we're not using the stack anymore */ 794 * No need to memset here the intersected rule here as
795 * we're not using the stack anymore
796 */
761 if (r) 797 if (r)
762 continue; 798 continue;
763 rule_idx++; 799 rule_idx++;
@@ -776,8 +812,10 @@ static struct ieee80211_regdomain *regdom_intersect(
776 return rd; 812 return rd;
777} 813}
778 814
779/* XXX: add support for the rest of enum nl80211_reg_rule_flags, we may 815/*
780 * want to just have the channel structure use these */ 816 * XXX: add support for the rest of enum nl80211_reg_rule_flags, we may
817 * want to just have the channel structure use these
818 */
781static u32 map_regdom_flags(u32 rd_flags) 819static u32 map_regdom_flags(u32 rd_flags)
782{ 820{
783 u32 channel_flags = 0; 821 u32 channel_flags = 0;
@@ -790,48 +828,45 @@ static u32 map_regdom_flags(u32 rd_flags)
790 return channel_flags; 828 return channel_flags;
791} 829}
792 830
793/** 831static int freq_reg_info_regd(struct wiphy *wiphy,
794 * freq_reg_info - get regulatory information for the given frequency 832 u32 center_freq,
795 * @center_freq: Frequency in KHz for which we want regulatory information for 833 u32 *bandwidth,
796 * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one 834 const struct ieee80211_reg_rule **reg_rule,
797 * you can set this to 0. If this frequency is allowed we then set 835 const struct ieee80211_regdomain *custom_regd)
798 * this value to the maximum allowed bandwidth.
799 * @reg_rule: the regulatory rule which we have for this frequency
800 *
801 * Use this function to get the regulatory rule for a specific frequency on
802 * a given wireless device. If the device has a specific regulatory domain
803 * it wants to follow we respect that unless a country IE has been received
804 * and processed already.
805 *
806 * Returns 0 if it was able to find a valid regulatory rule which does
807 * apply to the given center_freq otherwise it returns non-zero. It will
808 * also return -ERANGE if we determine the given center_freq does not even have
809 * a regulatory rule for a frequency range in the center_freq's band. See
810 * freq_in_rule_band() for our current definition of a band -- this is purely
811 * subjective and right now its 802.11 specific.
812 */
813static int freq_reg_info(u32 center_freq, u32 *bandwidth,
814 const struct ieee80211_reg_rule **reg_rule)
815{ 836{
816 int i; 837 int i;
817 bool band_rule_found = false; 838 bool band_rule_found = false;
839 const struct ieee80211_regdomain *regd;
818 u32 max_bandwidth = 0; 840 u32 max_bandwidth = 0;
819 841
820 if (!cfg80211_regdomain) 842 regd = custom_regd ? custom_regd : cfg80211_regdomain;
843
844 /*
845 * Follow the driver's regulatory domain, if present, unless a country
846 * IE has been processed or a user wants to help complaince further
847 */
848 if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE &&
849 last_request->initiator != REGDOM_SET_BY_USER &&
850 wiphy->regd)
851 regd = wiphy->regd;
852
853 if (!regd)
821 return -EINVAL; 854 return -EINVAL;
822 855
823 for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) { 856 for (i = 0; i < regd->n_reg_rules; i++) {
824 const struct ieee80211_reg_rule *rr; 857 const struct ieee80211_reg_rule *rr;
825 const struct ieee80211_freq_range *fr = NULL; 858 const struct ieee80211_freq_range *fr = NULL;
826 const struct ieee80211_power_rule *pr = NULL; 859 const struct ieee80211_power_rule *pr = NULL;
827 860
828 rr = &cfg80211_regdomain->reg_rules[i]; 861 rr = &regd->reg_rules[i];
829 fr = &rr->freq_range; 862 fr = &rr->freq_range;
830 pr = &rr->power_rule; 863 pr = &rr->power_rule;
831 864
832 /* We only need to know if one frequency rule was 865 /*
866 * We only need to know if one frequency rule was
833 * was in center_freq's band, that's enough, so lets 867 * was in center_freq's band, that's enough, so lets
834 * not overwrite it once found */ 868 * not overwrite it once found
869 */
835 if (!band_rule_found) 870 if (!band_rule_found)
836 band_rule_found = freq_in_rule_band(fr, center_freq); 871 band_rule_found = freq_in_rule_band(fr, center_freq);
837 872
@@ -849,6 +884,14 @@ static int freq_reg_info(u32 center_freq, u32 *bandwidth,
849 884
850 return !max_bandwidth; 885 return !max_bandwidth;
851} 886}
887EXPORT_SYMBOL(freq_reg_info);
888
889int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth,
890 const struct ieee80211_reg_rule **reg_rule)
891{
892 return freq_reg_info_regd(wiphy, center_freq,
893 bandwidth, reg_rule, NULL);
894}
852 895
853static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, 896static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
854 unsigned int chan_idx) 897 unsigned int chan_idx)
@@ -860,6 +903,11 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
860 const struct ieee80211_power_rule *power_rule = NULL; 903 const struct ieee80211_power_rule *power_rule = NULL;
861 struct ieee80211_supported_band *sband; 904 struct ieee80211_supported_band *sband;
862 struct ieee80211_channel *chan; 905 struct ieee80211_channel *chan;
906 struct wiphy *request_wiphy = NULL;
907
908 assert_cfg80211_lock();
909
910 request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
863 911
864 sband = wiphy->bands[band]; 912 sband = wiphy->bands[band];
865 BUG_ON(chan_idx >= sband->n_channels); 913 BUG_ON(chan_idx >= sband->n_channels);
@@ -867,11 +915,12 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
867 915
868 flags = chan->orig_flags; 916 flags = chan->orig_flags;
869 917
870 r = freq_reg_info(MHZ_TO_KHZ(chan->center_freq), 918 r = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq),
871 &max_bandwidth, &reg_rule); 919 &max_bandwidth, &reg_rule);
872 920
873 if (r) { 921 if (r) {
874 /* This means no regulatory rule was found in the country IE 922 /*
923 * This means no regulatory rule was found in the country IE
875 * with a frequency range on the center_freq's band, since 924 * with a frequency range on the center_freq's band, since
876 * IEEE-802.11 allows for a country IE to have a subset of the 925 * IEEE-802.11 allows for a country IE to have a subset of the
877 * regulatory information provided in a country we ignore 926 * regulatory information provided in a country we ignore
@@ -890,8 +939,10 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
890 chan->center_freq, wiphy_name(wiphy)); 939 chan->center_freq, wiphy_name(wiphy));
891#endif 940#endif
892 } else { 941 } else {
893 /* In this case we know the country IE has at least one reg rule 942 /*
894 * for the band so we respect its band definitions */ 943 * In this case we know the country IE has at least one reg rule
944 * for the band so we respect its band definitions
945 */
895#ifdef CONFIG_CFG80211_REG_DEBUG 946#ifdef CONFIG_CFG80211_REG_DEBUG
896 if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) 947 if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE)
897 printk(KERN_DEBUG "cfg80211: Disabling " 948 printk(KERN_DEBUG "cfg80211: Disabling "
@@ -907,6 +958,24 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
907 958
908 power_rule = &reg_rule->power_rule; 959 power_rule = &reg_rule->power_rule;
909 960
961 if (last_request->initiator == REGDOM_SET_BY_DRIVER &&
962 request_wiphy && request_wiphy == wiphy &&
963 request_wiphy->strict_regulatory) {
964 /*
965 * This gaurantees the driver's requested regulatory domain
966 * will always be used as a base for further regulatory
967 * settings
968 */
969 chan->flags = chan->orig_flags =
970 map_regdom_flags(reg_rule->flags);
971 chan->max_antenna_gain = chan->orig_mag =
972 (int) MBI_TO_DBI(power_rule->max_antenna_gain);
973 chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth);
974 chan->max_power = chan->orig_mpwr =
975 (int) MBM_TO_DBM(power_rule->max_eirp);
976 return;
977 }
978
910 chan->flags = flags | map_regdom_flags(reg_rule->flags); 979 chan->flags = flags | map_regdom_flags(reg_rule->flags);
911 chan->max_antenna_gain = min(chan->orig_mag, 980 chan->max_antenna_gain = min(chan->orig_mag,
912 (int) MBI_TO_DBI(power_rule->max_antenna_gain)); 981 (int) MBI_TO_DBI(power_rule->max_antenna_gain));
@@ -935,7 +1004,14 @@ static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby)
935 if (!last_request) 1004 if (!last_request)
936 return true; 1005 return true;
937 if (setby == REGDOM_SET_BY_CORE && 1006 if (setby == REGDOM_SET_BY_CORE &&
938 wiphy->fw_handles_regulatory) 1007 wiphy->custom_regulatory)
1008 return true;
1009 /*
1010 * wiphy->regd will be set once the device has its own
1011 * desired regulatory domain set
1012 */
1013 if (wiphy->strict_regulatory && !wiphy->regd &&
1014 !is_world_regdom(last_request->alpha2))
939 return true; 1015 return true;
940 return false; 1016 return false;
941} 1017}
@@ -945,117 +1021,374 @@ static void update_all_wiphy_regulatory(enum reg_set_by setby)
945 struct cfg80211_registered_device *drv; 1021 struct cfg80211_registered_device *drv;
946 1022
947 list_for_each_entry(drv, &cfg80211_drv_list, list) 1023 list_for_each_entry(drv, &cfg80211_drv_list, list)
948 if (!ignore_reg_update(&drv->wiphy, setby)) 1024 wiphy_update_regulatory(&drv->wiphy, setby);
949 wiphy_update_regulatory(&drv->wiphy, setby); 1025}
1026
1027static void handle_reg_beacon(struct wiphy *wiphy,
1028 unsigned int chan_idx,
1029 struct reg_beacon *reg_beacon)
1030{
1031#ifdef CONFIG_CFG80211_REG_DEBUG
1032#define REG_DEBUG_BEACON_FLAG(desc) \
1033 printk(KERN_DEBUG "cfg80211: Enabling " desc " on " \
1034 "frequency: %d MHz (Ch %d) on %s\n", \
1035 reg_beacon->chan.center_freq, \
1036 ieee80211_frequency_to_channel(reg_beacon->chan.center_freq), \
1037 wiphy_name(wiphy));
1038#else
1039#define REG_DEBUG_BEACON_FLAG(desc) do {} while (0)
1040#endif
1041 struct ieee80211_supported_band *sband;
1042 struct ieee80211_channel *chan;
1043
1044 assert_cfg80211_lock();
1045
1046 sband = wiphy->bands[reg_beacon->chan.band];
1047 chan = &sband->channels[chan_idx];
1048
1049 if (likely(chan->center_freq != reg_beacon->chan.center_freq))
1050 return;
1051
1052 if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) {
1053 chan->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN;
1054 REG_DEBUG_BEACON_FLAG("active scanning");
1055 }
1056
1057 if (chan->flags & IEEE80211_CHAN_NO_IBSS) {
1058 chan->flags &= ~IEEE80211_CHAN_NO_IBSS;
1059 REG_DEBUG_BEACON_FLAG("beaconing");
1060 }
1061
1062 chan->beacon_found = true;
1063#undef REG_DEBUG_BEACON_FLAG
1064}
1065
1066/*
1067 * Called when a scan on a wiphy finds a beacon on
1068 * new channel
1069 */
1070static void wiphy_update_new_beacon(struct wiphy *wiphy,
1071 struct reg_beacon *reg_beacon)
1072{
1073 unsigned int i;
1074 struct ieee80211_supported_band *sband;
1075
1076 assert_cfg80211_lock();
1077
1078 if (!wiphy->bands[reg_beacon->chan.band])
1079 return;
1080
1081 sband = wiphy->bands[reg_beacon->chan.band];
1082
1083 for (i = 0; i < sband->n_channels; i++)
1084 handle_reg_beacon(wiphy, i, reg_beacon);
1085}
1086
1087/*
1088 * Called upon reg changes or a new wiphy is added
1089 */
1090static void wiphy_update_beacon_reg(struct wiphy *wiphy)
1091{
1092 unsigned int i;
1093 struct ieee80211_supported_band *sband;
1094 struct reg_beacon *reg_beacon;
1095
1096 assert_cfg80211_lock();
1097
1098 if (list_empty(&reg_beacon_list))
1099 return;
1100
1101 list_for_each_entry(reg_beacon, &reg_beacon_list, list) {
1102 if (!wiphy->bands[reg_beacon->chan.band])
1103 continue;
1104 sband = wiphy->bands[reg_beacon->chan.band];
1105 for (i = 0; i < sband->n_channels; i++)
1106 handle_reg_beacon(wiphy, i, reg_beacon);
1107 }
1108}
1109
1110static bool reg_is_world_roaming(struct wiphy *wiphy)
1111{
1112 if (is_world_regdom(cfg80211_regdomain->alpha2) ||
1113 (wiphy->regd && is_world_regdom(wiphy->regd->alpha2)))
1114 return true;
1115 if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE &&
1116 wiphy->custom_regulatory)
1117 return true;
1118 return false;
1119}
1120
1121/* Reap the advantages of previously found beacons */
1122static void reg_process_beacons(struct wiphy *wiphy)
1123{
1124 if (!reg_is_world_roaming(wiphy))
1125 return;
1126 wiphy_update_beacon_reg(wiphy);
950} 1127}
951 1128
952void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) 1129void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby)
953{ 1130{
954 enum ieee80211_band band; 1131 enum ieee80211_band band;
1132
1133 if (ignore_reg_update(wiphy, setby))
1134 goto out;
955 for (band = 0; band < IEEE80211_NUM_BANDS; band++) { 1135 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
956 if (wiphy->bands[band]) 1136 if (wiphy->bands[band])
957 handle_band(wiphy, band); 1137 handle_band(wiphy, band);
958 if (wiphy->reg_notifier)
959 wiphy->reg_notifier(wiphy, setby);
960 } 1138 }
1139out:
1140 reg_process_beacons(wiphy);
1141 if (wiphy->reg_notifier)
1142 wiphy->reg_notifier(wiphy, last_request);
1143}
1144
1145static void handle_channel_custom(struct wiphy *wiphy,
1146 enum ieee80211_band band,
1147 unsigned int chan_idx,
1148 const struct ieee80211_regdomain *regd)
1149{
1150 int r;
1151 u32 max_bandwidth = 0;
1152 const struct ieee80211_reg_rule *reg_rule = NULL;
1153 const struct ieee80211_power_rule *power_rule = NULL;
1154 struct ieee80211_supported_band *sband;
1155 struct ieee80211_channel *chan;
1156
1157 sband = wiphy->bands[band];
1158 BUG_ON(chan_idx >= sband->n_channels);
1159 chan = &sband->channels[chan_idx];
1160
1161 r = freq_reg_info_regd(wiphy, MHZ_TO_KHZ(chan->center_freq),
1162 &max_bandwidth, &reg_rule, regd);
1163
1164 if (r) {
1165 chan->flags = IEEE80211_CHAN_DISABLED;
1166 return;
1167 }
1168
1169 power_rule = &reg_rule->power_rule;
1170
1171 chan->flags |= map_regdom_flags(reg_rule->flags);
1172 chan->max_antenna_gain = (int) MBI_TO_DBI(power_rule->max_antenna_gain);
1173 chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth);
1174 chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp);
961} 1175}
962 1176
963/* Return value which can be used by ignore_request() to indicate 1177static void handle_band_custom(struct wiphy *wiphy, enum ieee80211_band band,
964 * it has been determined we should intersect two regulatory domains */ 1178 const struct ieee80211_regdomain *regd)
1179{
1180 unsigned int i;
1181 struct ieee80211_supported_band *sband;
1182
1183 BUG_ON(!wiphy->bands[band]);
1184 sband = wiphy->bands[band];
1185
1186 for (i = 0; i < sband->n_channels; i++)
1187 handle_channel_custom(wiphy, band, i, regd);
1188}
1189
1190/* Used by drivers prior to wiphy registration */
1191void wiphy_apply_custom_regulatory(struct wiphy *wiphy,
1192 const struct ieee80211_regdomain *regd)
1193{
1194 enum ieee80211_band band;
1195 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
1196 if (wiphy->bands[band])
1197 handle_band_custom(wiphy, band, regd);
1198 }
1199}
1200EXPORT_SYMBOL(wiphy_apply_custom_regulatory);
1201
1202static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd,
1203 const struct ieee80211_regdomain *src_regd)
1204{
1205 struct ieee80211_regdomain *regd;
1206 int size_of_regd = 0;
1207 unsigned int i;
1208
1209 size_of_regd = sizeof(struct ieee80211_regdomain) +
1210 ((src_regd->n_reg_rules + 1) * sizeof(struct ieee80211_reg_rule));
1211
1212 regd = kzalloc(size_of_regd, GFP_KERNEL);
1213 if (!regd)
1214 return -ENOMEM;
1215
1216 memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain));
1217
1218 for (i = 0; i < src_regd->n_reg_rules; i++)
1219 memcpy(&regd->reg_rules[i], &src_regd->reg_rules[i],
1220 sizeof(struct ieee80211_reg_rule));
1221
1222 *dst_regd = regd;
1223 return 0;
1224}
1225
1226/*
1227 * Return value which can be used by ignore_request() to indicate
1228 * it has been determined we should intersect two regulatory domains
1229 */
965#define REG_INTERSECT 1 1230#define REG_INTERSECT 1
966 1231
967/* This has the logic which determines when a new request 1232/* This has the logic which determines when a new request
968 * should be ignored. */ 1233 * should be ignored. */
969static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, 1234static int ignore_request(struct wiphy *wiphy,
970 const char *alpha2) 1235 struct regulatory_request *pending_request)
971{ 1236{
1237 struct wiphy *last_wiphy = NULL;
1238
1239 assert_cfg80211_lock();
1240
972 /* All initial requests are respected */ 1241 /* All initial requests are respected */
973 if (!last_request) 1242 if (!last_request)
974 return 0; 1243 return 0;
975 1244
976 switch (set_by) { 1245 switch (pending_request->initiator) {
977 case REGDOM_SET_BY_INIT: 1246 case REGDOM_SET_BY_INIT:
978 return -EINVAL; 1247 return -EINVAL;
979 case REGDOM_SET_BY_CORE: 1248 case REGDOM_SET_BY_CORE:
980 /* 1249 return -EINVAL;
981 * Always respect new wireless core hints, should only happen
982 * when updating the world regulatory domain at init.
983 */
984 return 0;
985 case REGDOM_SET_BY_COUNTRY_IE: 1250 case REGDOM_SET_BY_COUNTRY_IE:
986 if (unlikely(!is_an_alpha2(alpha2))) 1251
1252 last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
1253
1254 if (unlikely(!is_an_alpha2(pending_request->alpha2)))
987 return -EINVAL; 1255 return -EINVAL;
988 if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { 1256 if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) {
989 if (last_request->wiphy != wiphy) { 1257 if (last_wiphy != wiphy) {
990 /* 1258 /*
991 * Two cards with two APs claiming different 1259 * Two cards with two APs claiming different
992 * different Country IE alpha2s. We could 1260 * different Country IE alpha2s. We could
993 * intersect them, but that seems unlikely 1261 * intersect them, but that seems unlikely
994 * to be correct. Reject second one for now. 1262 * to be correct. Reject second one for now.
995 */ 1263 */
996 if (!alpha2_equal(alpha2, 1264 if (regdom_changes(pending_request->alpha2))
997 cfg80211_regdomain->alpha2))
998 return -EOPNOTSUPP; 1265 return -EOPNOTSUPP;
999 return -EALREADY; 1266 return -EALREADY;
1000 } 1267 }
1001 /* Two consecutive Country IE hints on the same wiphy. 1268 /*
1002 * This should be picked up early by the driver/stack */ 1269 * Two consecutive Country IE hints on the same wiphy.
1003 if (WARN_ON(!alpha2_equal(cfg80211_regdomain->alpha2, 1270 * This should be picked up early by the driver/stack
1004 alpha2))) 1271 */
1272 if (WARN_ON(regdom_changes(pending_request->alpha2)))
1005 return 0; 1273 return 0;
1006 return -EALREADY; 1274 return -EALREADY;
1007 } 1275 }
1008 return REG_INTERSECT; 1276 return REG_INTERSECT;
1009 case REGDOM_SET_BY_DRIVER: 1277 case REGDOM_SET_BY_DRIVER:
1010 if (last_request->initiator == REGDOM_SET_BY_DRIVER) 1278 if (last_request->initiator == REGDOM_SET_BY_CORE) {
1279 if (is_old_static_regdom(cfg80211_regdomain))
1280 return 0;
1281 if (regdom_changes(pending_request->alpha2))
1282 return 0;
1011 return -EALREADY; 1283 return -EALREADY;
1012 return 0; 1284 }
1285
1286 /*
1287 * This would happen if you unplug and plug your card
1288 * back in or if you add a new device for which the previously
1289 * loaded card also agrees on the regulatory domain.
1290 */
1291 if (last_request->initiator == REGDOM_SET_BY_DRIVER &&
1292 !regdom_changes(pending_request->alpha2))
1293 return -EALREADY;
1294
1295 return REG_INTERSECT;
1013 case REGDOM_SET_BY_USER: 1296 case REGDOM_SET_BY_USER:
1014 if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) 1297 if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE)
1015 return REG_INTERSECT; 1298 return REG_INTERSECT;
1016 /* If the user knows better the user should set the regdom 1299 /*
1017 * to their country before the IE is picked up */ 1300 * If the user knows better the user should set the regdom
1301 * to their country before the IE is picked up
1302 */
1018 if (last_request->initiator == REGDOM_SET_BY_USER && 1303 if (last_request->initiator == REGDOM_SET_BY_USER &&
1019 last_request->intersect) 1304 last_request->intersect)
1020 return -EOPNOTSUPP; 1305 return -EOPNOTSUPP;
1306 /*
1307 * Process user requests only after previous user/driver/core
1308 * requests have been processed
1309 */
1310 if (last_request->initiator == REGDOM_SET_BY_CORE ||
1311 last_request->initiator == REGDOM_SET_BY_DRIVER ||
1312 last_request->initiator == REGDOM_SET_BY_USER) {
1313 if (regdom_changes(last_request->alpha2))
1314 return -EAGAIN;
1315 }
1316
1317 if (!is_old_static_regdom(cfg80211_regdomain) &&
1318 !regdom_changes(pending_request->alpha2))
1319 return -EALREADY;
1320
1021 return 0; 1321 return 0;
1022 } 1322 }
1023 1323
1024 return -EINVAL; 1324 return -EINVAL;
1025} 1325}
1026 1326
1027/* Caller must hold &cfg80211_drv_mutex */ 1327/**
1028int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, 1328 * __regulatory_hint - hint to the wireless core a regulatory domain
1029 const char *alpha2, 1329 * @wiphy: if the hint comes from country information from an AP, this
1030 u32 country_ie_checksum, 1330 * is required to be set to the wiphy that received the information
1031 enum environment_cap env) 1331 * @pending_request: the regulatory request currently being processed
1332 *
1333 * The Wireless subsystem can use this function to hint to the wireless core
1334 * what it believes should be the current regulatory domain.
1335 *
1336 * Returns zero if all went fine, %-EALREADY if a regulatory domain had
1337 * already been set or other standard error codes.
1338 *
1339 * Caller must hold &cfg80211_mutex
1340 */
1341static int __regulatory_hint(struct wiphy *wiphy,
1342 struct regulatory_request *pending_request)
1032{ 1343{
1033 struct regulatory_request *request;
1034 bool intersect = false; 1344 bool intersect = false;
1035 int r = 0; 1345 int r = 0;
1036 1346
1037 r = ignore_request(wiphy, set_by, alpha2); 1347 assert_cfg80211_lock();
1348
1349 r = ignore_request(wiphy, pending_request);
1038 1350
1039 if (r == REG_INTERSECT) 1351 if (r == REG_INTERSECT) {
1352 if (pending_request->initiator == REGDOM_SET_BY_DRIVER) {
1353 r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain);
1354 if (r) {
1355 kfree(pending_request);
1356 return r;
1357 }
1358 }
1040 intersect = true; 1359 intersect = true;
1041 else if (r) 1360 } else if (r) {
1361 /*
1362 * If the regulatory domain being requested by the
1363 * driver has already been set just copy it to the
1364 * wiphy
1365 */
1366 if (r == -EALREADY &&
1367 pending_request->initiator == REGDOM_SET_BY_DRIVER) {
1368 r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain);
1369 if (r) {
1370 kfree(pending_request);
1371 return r;
1372 }
1373 r = -EALREADY;
1374 goto new_request;
1375 }
1376 kfree(pending_request);
1042 return r; 1377 return r;
1378 }
1043 1379
1044 request = kzalloc(sizeof(struct regulatory_request), 1380new_request:
1045 GFP_KERNEL); 1381 kfree(last_request);
1046 if (!request)
1047 return -ENOMEM;
1048 1382
1049 request->alpha2[0] = alpha2[0]; 1383 last_request = pending_request;
1050 request->alpha2[1] = alpha2[1]; 1384 last_request->intersect = intersect;
1051 request->initiator = set_by; 1385
1052 request->wiphy = wiphy; 1386 pending_request = NULL;
1053 request->intersect = intersect; 1387
1054 request->country_ie_checksum = country_ie_checksum; 1388 /* When r == REG_INTERSECT we do need to call CRDA */
1055 request->country_ie_env = env; 1389 if (r < 0)
1390 return r;
1056 1391
1057 kfree(last_request);
1058 last_request = request;
1059 /* 1392 /*
1060 * Note: When CONFIG_WIRELESS_OLD_REGULATORY is enabled 1393 * Note: When CONFIG_WIRELESS_OLD_REGULATORY is enabled
1061 * AND if CRDA is NOT present nothing will happen, if someone 1394 * AND if CRDA is NOT present nothing will happen, if someone
@@ -1066,29 +1399,194 @@ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
1066 * 1399 *
1067 * to intersect with the static rd 1400 * to intersect with the static rd
1068 */ 1401 */
1069 return call_crda(alpha2); 1402 return call_crda(last_request->alpha2);
1070} 1403}
1071 1404
1072void regulatory_hint(struct wiphy *wiphy, const char *alpha2) 1405/* This currently only processes user and driver regulatory hints */
1406static void reg_process_hint(struct regulatory_request *reg_request)
1073{ 1407{
1408 int r = 0;
1409 struct wiphy *wiphy = NULL;
1410
1411 BUG_ON(!reg_request->alpha2);
1412
1413 mutex_lock(&cfg80211_mutex);
1414
1415 if (wiphy_idx_valid(reg_request->wiphy_idx))
1416 wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx);
1417
1418 if (reg_request->initiator == REGDOM_SET_BY_DRIVER &&
1419 !wiphy) {
1420 kfree(reg_request);
1421 goto out;
1422 }
1423
1424 r = __regulatory_hint(wiphy, reg_request);
1425 /* This is required so that the orig_* parameters are saved */
1426 if (r == -EALREADY && wiphy && wiphy->strict_regulatory)
1427 wiphy_update_regulatory(wiphy, reg_request->initiator);
1428out:
1429 mutex_unlock(&cfg80211_mutex);
1430}
1431
1432/* Processes regulatory hints, this is all the REGDOM_SET_BY_* */
1433static void reg_process_pending_hints(void)
1434 {
1435 struct regulatory_request *reg_request;
1436
1437 spin_lock(&reg_requests_lock);
1438 while (!list_empty(&reg_requests_list)) {
1439 reg_request = list_first_entry(&reg_requests_list,
1440 struct regulatory_request,
1441 list);
1442 list_del_init(&reg_request->list);
1443
1444 spin_unlock(&reg_requests_lock);
1445 reg_process_hint(reg_request);
1446 spin_lock(&reg_requests_lock);
1447 }
1448 spin_unlock(&reg_requests_lock);
1449}
1450
1451/* Processes beacon hints -- this has nothing to do with country IEs */
1452static void reg_process_pending_beacon_hints(void)
1453{
1454 struct cfg80211_registered_device *drv;
1455 struct reg_beacon *pending_beacon, *tmp;
1456
1457 mutex_lock(&cfg80211_mutex);
1458
1459 /* This goes through the _pending_ beacon list */
1460 spin_lock_bh(&reg_pending_beacons_lock);
1461
1462 if (list_empty(&reg_pending_beacons)) {
1463 spin_unlock_bh(&reg_pending_beacons_lock);
1464 goto out;
1465 }
1466
1467 list_for_each_entry_safe(pending_beacon, tmp,
1468 &reg_pending_beacons, list) {
1469
1470 list_del_init(&pending_beacon->list);
1471
1472 /* Applies the beacon hint to current wiphys */
1473 list_for_each_entry(drv, &cfg80211_drv_list, list)
1474 wiphy_update_new_beacon(&drv->wiphy, pending_beacon);
1475
1476 /* Remembers the beacon hint for new wiphys or reg changes */
1477 list_add_tail(&pending_beacon->list, &reg_beacon_list);
1478 }
1479
1480 spin_unlock_bh(&reg_pending_beacons_lock);
1481out:
1482 mutex_unlock(&cfg80211_mutex);
1483}
1484
1485static void reg_todo(struct work_struct *work)
1486{
1487 reg_process_pending_hints();
1488 reg_process_pending_beacon_hints();
1489}
1490
1491static DECLARE_WORK(reg_work, reg_todo);
1492
1493static void queue_regulatory_request(struct regulatory_request *request)
1494{
1495 spin_lock(&reg_requests_lock);
1496 list_add_tail(&request->list, &reg_requests_list);
1497 spin_unlock(&reg_requests_lock);
1498
1499 schedule_work(&reg_work);
1500}
1501
1502/* Core regulatory hint -- happens once during cfg80211_init() */
1503static int regulatory_hint_core(const char *alpha2)
1504{
1505 struct regulatory_request *request;
1506
1507 BUG_ON(last_request);
1508
1509 request = kzalloc(sizeof(struct regulatory_request),
1510 GFP_KERNEL);
1511 if (!request)
1512 return -ENOMEM;
1513
1514 request->alpha2[0] = alpha2[0];
1515 request->alpha2[1] = alpha2[1];
1516 request->initiator = REGDOM_SET_BY_CORE;
1517
1518 queue_regulatory_request(request);
1519
1520 return 0;
1521}
1522
1523/* User hints */
1524int regulatory_hint_user(const char *alpha2)
1525{
1526 struct regulatory_request *request;
1527
1528 BUG_ON(!alpha2);
1529
1530 request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
1531 if (!request)
1532 return -ENOMEM;
1533
1534 request->wiphy_idx = WIPHY_IDX_STALE;
1535 request->alpha2[0] = alpha2[0];
1536 request->alpha2[1] = alpha2[1];
1537 request->initiator = REGDOM_SET_BY_USER,
1538
1539 queue_regulatory_request(request);
1540
1541 return 0;
1542}
1543
1544/* Driver hints */
1545int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
1546{
1547 struct regulatory_request *request;
1548
1074 BUG_ON(!alpha2); 1549 BUG_ON(!alpha2);
1550 BUG_ON(!wiphy);
1551
1552 request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
1553 if (!request)
1554 return -ENOMEM;
1555
1556 request->wiphy_idx = get_wiphy_idx(wiphy);
1075 1557
1076 mutex_lock(&cfg80211_drv_mutex); 1558 /* Must have registered wiphy first */
1077 __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, 0, ENVIRON_ANY); 1559 BUG_ON(!wiphy_idx_valid(request->wiphy_idx));
1078 mutex_unlock(&cfg80211_drv_mutex); 1560
1561 request->alpha2[0] = alpha2[0];
1562 request->alpha2[1] = alpha2[1];
1563 request->initiator = REGDOM_SET_BY_DRIVER;
1564
1565 queue_regulatory_request(request);
1566
1567 return 0;
1079} 1568}
1080EXPORT_SYMBOL(regulatory_hint); 1569EXPORT_SYMBOL(regulatory_hint);
1081 1570
1082static bool reg_same_country_ie_hint(struct wiphy *wiphy, 1571static bool reg_same_country_ie_hint(struct wiphy *wiphy,
1083 u32 country_ie_checksum) 1572 u32 country_ie_checksum)
1084{ 1573{
1085 if (!last_request->wiphy) 1574 struct wiphy *request_wiphy;
1575
1576 assert_cfg80211_lock();
1577
1578 request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
1579
1580 if (!request_wiphy)
1086 return false; 1581 return false;
1087 if (likely(last_request->wiphy != wiphy)) 1582
1583 if (likely(request_wiphy != wiphy))
1088 return !country_ie_integrity_changes(country_ie_checksum); 1584 return !country_ie_integrity_changes(country_ie_checksum);
1089 /* We should not have let these through at this point, they 1585 /*
1586 * We should not have let these through at this point, they
1090 * should have been picked up earlier by the first alpha2 check 1587 * should have been picked up earlier by the first alpha2 check
1091 * on the device */ 1588 * on the device
1589 */
1092 if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum))) 1590 if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum)))
1093 return true; 1591 return true;
1094 return false; 1592 return false;
@@ -1102,11 +1600,14 @@ void regulatory_hint_11d(struct wiphy *wiphy,
1102 char alpha2[2]; 1600 char alpha2[2];
1103 u32 checksum = 0; 1601 u32 checksum = 0;
1104 enum environment_cap env = ENVIRON_ANY; 1602 enum environment_cap env = ENVIRON_ANY;
1603 struct regulatory_request *request;
1105 1604
1106 if (!last_request) 1605 mutex_lock(&cfg80211_mutex);
1107 return;
1108 1606
1109 mutex_lock(&cfg80211_drv_mutex); 1607 if (unlikely(!last_request)) {
1608 mutex_unlock(&cfg80211_mutex);
1609 return;
1610 }
1110 1611
1111 /* IE len must be evenly divisible by 2 */ 1612 /* IE len must be evenly divisible by 2 */
1112 if (country_ie_len & 0x01) 1613 if (country_ie_len & 0x01)
@@ -1115,9 +1616,11 @@ void regulatory_hint_11d(struct wiphy *wiphy,
1115 if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) 1616 if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN)
1116 goto out; 1617 goto out;
1117 1618
1118 /* Pending country IE processing, this can happen after we 1619 /*
1620 * Pending country IE processing, this can happen after we
1119 * call CRDA and wait for a response if a beacon was received before 1621 * call CRDA and wait for a response if a beacon was received before
1120 * we were able to process the last regulatory_hint_11d() call */ 1622 * we were able to process the last regulatory_hint_11d() call
1623 */
1121 if (country_ie_regdomain) 1624 if (country_ie_regdomain)
1122 goto out; 1625 goto out;
1123 1626
@@ -1129,33 +1632,44 @@ void regulatory_hint_11d(struct wiphy *wiphy,
1129 else if (country_ie[2] == 'O') 1632 else if (country_ie[2] == 'O')
1130 env = ENVIRON_OUTDOOR; 1633 env = ENVIRON_OUTDOOR;
1131 1634
1132 /* We will run this for *every* beacon processed for the BSSID, so 1635 /*
1636 * We will run this for *every* beacon processed for the BSSID, so
1133 * we optimize an early check to exit out early if we don't have to 1637 * we optimize an early check to exit out early if we don't have to
1134 * do anything */ 1638 * do anything
1135 if (likely(last_request->wiphy)) { 1639 */
1640 if (likely(wiphy_idx_valid(last_request->wiphy_idx))) {
1136 struct cfg80211_registered_device *drv_last_ie; 1641 struct cfg80211_registered_device *drv_last_ie;
1137 1642
1138 drv_last_ie = wiphy_to_dev(last_request->wiphy); 1643 drv_last_ie =
1644 cfg80211_drv_by_wiphy_idx(last_request->wiphy_idx);
1139 1645
1140 /* Lets keep this simple -- we trust the first AP 1646 /*
1141 * after we intersect with CRDA */ 1647 * Lets keep this simple -- we trust the first AP
1142 if (likely(last_request->wiphy == wiphy)) { 1648 * after we intersect with CRDA
1143 /* Ignore IEs coming in on this wiphy with 1649 */
1144 * the same alpha2 and environment cap */ 1650 if (likely(&drv_last_ie->wiphy == wiphy)) {
1651 /*
1652 * Ignore IEs coming in on this wiphy with
1653 * the same alpha2 and environment cap
1654 */
1145 if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, 1655 if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2,
1146 alpha2) && 1656 alpha2) &&
1147 env == drv_last_ie->env)) { 1657 env == drv_last_ie->env)) {
1148 goto out; 1658 goto out;
1149 } 1659 }
1150 /* the wiphy moved on to another BSSID or the AP 1660 /*
1661 * the wiphy moved on to another BSSID or the AP
1151 * was reconfigured. XXX: We need to deal with the 1662 * was reconfigured. XXX: We need to deal with the
1152 * case where the user suspends and goes to goes 1663 * case where the user suspends and goes to goes
1153 * to another country, and then gets IEs from an 1664 * to another country, and then gets IEs from an
1154 * AP with different settings */ 1665 * AP with different settings
1666 */
1155 goto out; 1667 goto out;
1156 } else { 1668 } else {
1157 /* Ignore IEs coming in on two separate wiphys with 1669 /*
1158 * the same alpha2 and environment cap */ 1670 * Ignore IEs coming in on two separate wiphys with
1671 * the same alpha2 and environment cap
1672 */
1159 if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, 1673 if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2,
1160 alpha2) && 1674 alpha2) &&
1161 env == drv_last_ie->env)) { 1675 env == drv_last_ie->env)) {
@@ -1170,28 +1684,97 @@ void regulatory_hint_11d(struct wiphy *wiphy,
1170 if (!rd) 1684 if (!rd)
1171 goto out; 1685 goto out;
1172 1686
1173 /* This will not happen right now but we leave it here for the 1687 /*
1688 * This will not happen right now but we leave it here for the
1174 * the future when we want to add suspend/resume support and having 1689 * the future when we want to add suspend/resume support and having
1175 * the user move to another country after doing so, or having the user 1690 * the user move to another country after doing so, or having the user
1176 * move to another AP. Right now we just trust the first AP. This is why 1691 * move to another AP. Right now we just trust the first AP.
1177 * this is marked as likley(). If we hit this before we add this support 1692 *
1178 * we want to be informed of it as it would indicate a mistake in the 1693 * If we hit this before we add this support we want to be informed of
1179 * current design */ 1694 * it as it would indicate a mistake in the current design
1180 if (likely(WARN_ON(reg_same_country_ie_hint(wiphy, checksum)))) 1695 */
1181 goto out; 1696 if (WARN_ON(reg_same_country_ie_hint(wiphy, checksum)))
1697 goto free_rd_out;
1698
1699 request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
1700 if (!request)
1701 goto free_rd_out;
1182 1702
1183 /* We keep this around for when CRDA comes back with a response so 1703 /*
1184 * we can intersect with that */ 1704 * We keep this around for when CRDA comes back with a response so
1705 * we can intersect with that
1706 */
1185 country_ie_regdomain = rd; 1707 country_ie_regdomain = rd;
1186 1708
1187 __regulatory_hint(wiphy, REGDOM_SET_BY_COUNTRY_IE, 1709 request->wiphy_idx = get_wiphy_idx(wiphy);
1188 country_ie_regdomain->alpha2, checksum, env); 1710 request->alpha2[0] = rd->alpha2[0];
1711 request->alpha2[1] = rd->alpha2[1];
1712 request->initiator = REGDOM_SET_BY_COUNTRY_IE;
1713 request->country_ie_checksum = checksum;
1714 request->country_ie_env = env;
1715
1716 mutex_unlock(&cfg80211_mutex);
1189 1717
1718 queue_regulatory_request(request);
1719
1720 return;
1721
1722free_rd_out:
1723 kfree(rd);
1190out: 1724out:
1191 mutex_unlock(&cfg80211_drv_mutex); 1725 mutex_unlock(&cfg80211_mutex);
1192} 1726}
1193EXPORT_SYMBOL(regulatory_hint_11d); 1727EXPORT_SYMBOL(regulatory_hint_11d);
1194 1728
1729static bool freq_is_chan_12_13_14(u16 freq)
1730{
1731 if (freq == ieee80211_channel_to_frequency(12) ||
1732 freq == ieee80211_channel_to_frequency(13) ||
1733 freq == ieee80211_channel_to_frequency(14))
1734 return true;
1735 return false;
1736}
1737
1738int regulatory_hint_found_beacon(struct wiphy *wiphy,
1739 struct ieee80211_channel *beacon_chan,
1740 gfp_t gfp)
1741{
1742 struct reg_beacon *reg_beacon;
1743
1744 if (likely((beacon_chan->beacon_found ||
1745 (beacon_chan->flags & IEEE80211_CHAN_RADAR) ||
1746 (beacon_chan->band == IEEE80211_BAND_2GHZ &&
1747 !freq_is_chan_12_13_14(beacon_chan->center_freq)))))
1748 return 0;
1749
1750 reg_beacon = kzalloc(sizeof(struct reg_beacon), gfp);
1751 if (!reg_beacon)
1752 return -ENOMEM;
1753
1754#ifdef CONFIG_CFG80211_REG_DEBUG
1755 printk(KERN_DEBUG "cfg80211: Found new beacon on "
1756 "frequency: %d MHz (Ch %d) on %s\n",
1757 beacon_chan->center_freq,
1758 ieee80211_frequency_to_channel(beacon_chan->center_freq),
1759 wiphy_name(wiphy));
1760#endif
1761 memcpy(&reg_beacon->chan, beacon_chan,
1762 sizeof(struct ieee80211_channel));
1763
1764
1765 /*
1766 * Since we can be called from BH or and non-BH context
1767 * we must use spin_lock_bh()
1768 */
1769 spin_lock_bh(&reg_pending_beacons_lock);
1770 list_add_tail(&reg_beacon->list, &reg_pending_beacons);
1771 spin_unlock_bh(&reg_pending_beacons_lock);
1772
1773 schedule_work(&reg_work);
1774
1775 return 0;
1776}
1777
1195static void print_rd_rules(const struct ieee80211_regdomain *rd) 1778static void print_rd_rules(const struct ieee80211_regdomain *rd)
1196{ 1779{
1197 unsigned int i; 1780 unsigned int i;
@@ -1207,8 +1790,10 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
1207 freq_range = &reg_rule->freq_range; 1790 freq_range = &reg_rule->freq_range;
1208 power_rule = &reg_rule->power_rule; 1791 power_rule = &reg_rule->power_rule;
1209 1792
1210 /* There may not be documentation for max antenna gain 1793 /*
1211 * in certain regions */ 1794 * There may not be documentation for max antenna gain
1795 * in certain regions
1796 */
1212 if (power_rule->max_antenna_gain) 1797 if (power_rule->max_antenna_gain)
1213 printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), " 1798 printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), "
1214 "(%d mBi, %d mBm)\n", 1799 "(%d mBi, %d mBm)\n",
@@ -1231,13 +1816,12 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
1231{ 1816{
1232 1817
1233 if (is_intersected_alpha2(rd->alpha2)) { 1818 if (is_intersected_alpha2(rd->alpha2)) {
1234 struct wiphy *wiphy = NULL;
1235 struct cfg80211_registered_device *drv;
1236 1819
1237 if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { 1820 if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) {
1238 if (last_request->wiphy) { 1821 struct cfg80211_registered_device *drv;
1239 wiphy = last_request->wiphy; 1822 drv = cfg80211_drv_by_wiphy_idx(
1240 drv = wiphy_to_dev(wiphy); 1823 last_request->wiphy_idx);
1824 if (drv) {
1241 printk(KERN_INFO "cfg80211: Current regulatory " 1825 printk(KERN_INFO "cfg80211: Current regulatory "
1242 "domain updated by AP to: %c%c\n", 1826 "domain updated by AP to: %c%c\n",
1243 drv->country_ie_alpha2[0], 1827 drv->country_ie_alpha2[0],
@@ -1247,7 +1831,7 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
1247 "domain intersected: \n"); 1831 "domain intersected: \n");
1248 } else 1832 } else
1249 printk(KERN_INFO "cfg80211: Current regulatory " 1833 printk(KERN_INFO "cfg80211: Current regulatory "
1250 "intersected: \n"); 1834 "domain intersected: \n");
1251 } else if (is_world_regdom(rd->alpha2)) 1835 } else if (is_world_regdom(rd->alpha2))
1252 printk(KERN_INFO "cfg80211: World regulatory " 1836 printk(KERN_INFO "cfg80211: World regulatory "
1253 "domain updated:\n"); 1837 "domain updated:\n");
@@ -1303,7 +1887,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1303{ 1887{
1304 const struct ieee80211_regdomain *intersected_rd = NULL; 1888 const struct ieee80211_regdomain *intersected_rd = NULL;
1305 struct cfg80211_registered_device *drv = NULL; 1889 struct cfg80211_registered_device *drv = NULL;
1306 struct wiphy *wiphy = NULL; 1890 struct wiphy *request_wiphy;
1307 /* Some basic sanity checks first */ 1891 /* Some basic sanity checks first */
1308 1892
1309 if (is_world_regdom(rd->alpha2)) { 1893 if (is_world_regdom(rd->alpha2)) {
@@ -1320,23 +1904,27 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1320 if (!last_request) 1904 if (!last_request)
1321 return -EINVAL; 1905 return -EINVAL;
1322 1906
1323 /* Lets only bother proceeding on the same alpha2 if the current 1907 /*
1908 * Lets only bother proceeding on the same alpha2 if the current
1324 * rd is non static (it means CRDA was present and was used last) 1909 * rd is non static (it means CRDA was present and was used last)
1325 * and the pending request came in from a country IE */ 1910 * and the pending request came in from a country IE
1911 */
1326 if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) { 1912 if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) {
1327 /* If someone else asked us to change the rd lets only bother 1913 /*
1328 * checking if the alpha2 changes if CRDA was already called */ 1914 * If someone else asked us to change the rd lets only bother
1915 * checking if the alpha2 changes if CRDA was already called
1916 */
1329 if (!is_old_static_regdom(cfg80211_regdomain) && 1917 if (!is_old_static_regdom(cfg80211_regdomain) &&
1330 !regdom_changed(rd->alpha2)) 1918 !regdom_changes(rd->alpha2))
1331 return -EINVAL; 1919 return -EINVAL;
1332 } 1920 }
1333 1921
1334 wiphy = last_request->wiphy; 1922 /*
1335 1923 * Now lets set the regulatory domain, update all driver channels
1336 /* Now lets set the regulatory domain, update all driver channels
1337 * and finally inform them of what we have done, in case they want 1924 * and finally inform them of what we have done, in case they want
1338 * to review or adjust their own settings based on their own 1925 * to review or adjust their own settings based on their own
1339 * internal EEPROM data */ 1926 * internal EEPROM data
1927 */
1340 1928
1341 if (WARN_ON(!reg_is_valid_request(rd->alpha2))) 1929 if (WARN_ON(!reg_is_valid_request(rd->alpha2)))
1342 return -EINVAL; 1930 return -EINVAL;
@@ -1348,7 +1936,28 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1348 return -EINVAL; 1936 return -EINVAL;
1349 } 1937 }
1350 1938
1939 request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
1940
1351 if (!last_request->intersect) { 1941 if (!last_request->intersect) {
1942 int r;
1943
1944 if (last_request->initiator != REGDOM_SET_BY_DRIVER) {
1945 reset_regdomains();
1946 cfg80211_regdomain = rd;
1947 return 0;
1948 }
1949
1950 /*
1951 * For a driver hint, lets copy the regulatory domain the
1952 * driver wanted to the wiphy to deal with conflicts
1953 */
1954
1955 BUG_ON(request_wiphy->regd);
1956
1957 r = reg_copy_regd(&request_wiphy->regd, rd);
1958 if (r)
1959 return r;
1960
1352 reset_regdomains(); 1961 reset_regdomains();
1353 cfg80211_regdomain = rd; 1962 cfg80211_regdomain = rd;
1354 return 0; 1963 return 0;
@@ -1362,8 +1971,16 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1362 if (!intersected_rd) 1971 if (!intersected_rd)
1363 return -EINVAL; 1972 return -EINVAL;
1364 1973
1365 /* We can trash what CRDA provided now */ 1974 /*
1366 kfree(rd); 1975 * We can trash what CRDA provided now.
1976 * However if a driver requested this specific regulatory
1977 * domain we keep it for its private use
1978 */
1979 if (last_request->initiator == REGDOM_SET_BY_DRIVER)
1980 request_wiphy->regd = rd;
1981 else
1982 kfree(rd);
1983
1367 rd = NULL; 1984 rd = NULL;
1368 1985
1369 reset_regdomains(); 1986 reset_regdomains();
@@ -1380,8 +1997,10 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1380 BUG_ON(!country_ie_regdomain); 1997 BUG_ON(!country_ie_regdomain);
1381 1998
1382 if (rd != country_ie_regdomain) { 1999 if (rd != country_ie_regdomain) {
1383 /* Intersect what CRDA returned and our what we 2000 /*
1384 * had built from the Country IE received */ 2001 * Intersect what CRDA returned and our what we
2002 * had built from the Country IE received
2003 */
1385 2004
1386 intersected_rd = regdom_intersect(rd, country_ie_regdomain); 2005 intersected_rd = regdom_intersect(rd, country_ie_regdomain);
1387 2006
@@ -1391,16 +2010,18 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1391 kfree(country_ie_regdomain); 2010 kfree(country_ie_regdomain);
1392 country_ie_regdomain = NULL; 2011 country_ie_regdomain = NULL;
1393 } else { 2012 } else {
1394 /* This would happen when CRDA was not present and 2013 /*
2014 * This would happen when CRDA was not present and
1395 * OLD_REGULATORY was enabled. We intersect our Country 2015 * OLD_REGULATORY was enabled. We intersect our Country
1396 * IE rd and what was set on cfg80211 originally */ 2016 * IE rd and what was set on cfg80211 originally
2017 */
1397 intersected_rd = regdom_intersect(rd, cfg80211_regdomain); 2018 intersected_rd = regdom_intersect(rd, cfg80211_regdomain);
1398 } 2019 }
1399 2020
1400 if (!intersected_rd) 2021 if (!intersected_rd)
1401 return -EINVAL; 2022 return -EINVAL;
1402 2023
1403 drv = wiphy_to_dev(wiphy); 2024 drv = wiphy_to_dev(request_wiphy);
1404 2025
1405 drv->country_ie_alpha2[0] = rd->alpha2[0]; 2026 drv->country_ie_alpha2[0] = rd->alpha2[0];
1406 drv->country_ie_alpha2[1] = rd->alpha2[1]; 2027 drv->country_ie_alpha2[1] = rd->alpha2[1];
@@ -1418,13 +2039,17 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1418} 2039}
1419 2040
1420 2041
1421/* Use this call to set the current regulatory domain. Conflicts with 2042/*
2043 * Use this call to set the current regulatory domain. Conflicts with
1422 * multiple drivers can be ironed out later. Caller must've already 2044 * multiple drivers can be ironed out later. Caller must've already
1423 * kmalloc'd the rd structure. Caller must hold cfg80211_drv_mutex */ 2045 * kmalloc'd the rd structure. Caller must hold cfg80211_mutex
2046 */
1424int set_regdom(const struct ieee80211_regdomain *rd) 2047int set_regdom(const struct ieee80211_regdomain *rd)
1425{ 2048{
1426 int r; 2049 int r;
1427 2050
2051 assert_cfg80211_lock();
2052
1428 /* Note that this doesn't update the wiphys, this is done below */ 2053 /* Note that this doesn't update the wiphys, this is done below */
1429 r = __set_regdom(rd); 2054 r = __set_regdom(rd);
1430 if (r) { 2055 if (r) {
@@ -1444,53 +2069,82 @@ int set_regdom(const struct ieee80211_regdomain *rd)
1444 return r; 2069 return r;
1445} 2070}
1446 2071
1447/* Caller must hold cfg80211_drv_mutex */ 2072/* Caller must hold cfg80211_mutex */
1448void reg_device_remove(struct wiphy *wiphy) 2073void reg_device_remove(struct wiphy *wiphy)
1449{ 2074{
1450 if (!last_request || !last_request->wiphy) 2075 struct wiphy *request_wiphy;
2076
2077 assert_cfg80211_lock();
2078
2079 request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
2080
2081 kfree(wiphy->regd);
2082 if (!last_request || !request_wiphy)
1451 return; 2083 return;
1452 if (last_request->wiphy != wiphy) 2084 if (request_wiphy != wiphy)
1453 return; 2085 return;
1454 last_request->wiphy = NULL; 2086 last_request->wiphy_idx = WIPHY_IDX_STALE;
1455 last_request->country_ie_env = ENVIRON_ANY; 2087 last_request->country_ie_env = ENVIRON_ANY;
1456} 2088}
1457 2089
1458int regulatory_init(void) 2090int regulatory_init(void)
1459{ 2091{
1460 int err; 2092 int err = 0;
1461 2093
1462 reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); 2094 reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
1463 if (IS_ERR(reg_pdev)) 2095 if (IS_ERR(reg_pdev))
1464 return PTR_ERR(reg_pdev); 2096 return PTR_ERR(reg_pdev);
1465 2097
2098 spin_lock_init(&reg_requests_lock);
2099 spin_lock_init(&reg_pending_beacons_lock);
2100
1466#ifdef CONFIG_WIRELESS_OLD_REGULATORY 2101#ifdef CONFIG_WIRELESS_OLD_REGULATORY
1467 cfg80211_regdomain = static_regdom(ieee80211_regdom); 2102 cfg80211_regdomain = static_regdom(ieee80211_regdom);
1468 2103
1469 printk(KERN_INFO "cfg80211: Using static regulatory domain info\n"); 2104 printk(KERN_INFO "cfg80211: Using static regulatory domain info\n");
1470 print_regdomain_info(cfg80211_regdomain); 2105 print_regdomain_info(cfg80211_regdomain);
1471 /* The old code still requests for a new regdomain and if 2106 /*
2107 * The old code still requests for a new regdomain and if
1472 * you have CRDA you get it updated, otherwise you get 2108 * you have CRDA you get it updated, otherwise you get
1473 * stuck with the static values. We ignore "EU" code as 2109 * stuck with the static values. We ignore "EU" code as
1474 * that is not a valid ISO / IEC 3166 alpha2 */ 2110 * that is not a valid ISO / IEC 3166 alpha2
2111 */
1475 if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U') 2112 if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U')
1476 err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, 2113 err = regulatory_hint_core(ieee80211_regdom);
1477 ieee80211_regdom, 0, ENVIRON_ANY);
1478#else 2114#else
1479 cfg80211_regdomain = cfg80211_world_regdom; 2115 cfg80211_regdomain = cfg80211_world_regdom;
1480 2116
1481 err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", 0, ENVIRON_ANY); 2117 err = regulatory_hint_core("00");
1482 if (err) 2118#endif
1483 printk(KERN_ERR "cfg80211: calling CRDA failed - " 2119 if (err) {
1484 "unable to update world regulatory domain, " 2120 if (err == -ENOMEM)
1485 "using static definition\n"); 2121 return err;
2122 /*
2123 * N.B. kobject_uevent_env() can fail mainly for when we're out
2124 * memory which is handled and propagated appropriately above
2125 * but it can also fail during a netlink_broadcast() or during
2126 * early boot for call_usermodehelper(). For now treat these
2127 * errors as non-fatal.
2128 */
2129 printk(KERN_ERR "cfg80211: kobject_uevent_env() was unable "
2130 "to call CRDA during init");
2131#ifdef CONFIG_CFG80211_REG_DEBUG
2132 /* We want to find out exactly why when debugging */
2133 WARN_ON(err);
1486#endif 2134#endif
2135 }
1487 2136
1488 return 0; 2137 return 0;
1489} 2138}
1490 2139
1491void regulatory_exit(void) 2140void regulatory_exit(void)
1492{ 2141{
1493 mutex_lock(&cfg80211_drv_mutex); 2142 struct regulatory_request *reg_request, *tmp;
2143 struct reg_beacon *reg_beacon, *btmp;
2144
2145 cancel_work_sync(&reg_work);
2146
2147 mutex_lock(&cfg80211_mutex);
1494 2148
1495 reset_regdomains(); 2149 reset_regdomains();
1496 2150
@@ -1501,5 +2155,33 @@ void regulatory_exit(void)
1501 2155
1502 platform_device_unregister(reg_pdev); 2156 platform_device_unregister(reg_pdev);
1503 2157
1504 mutex_unlock(&cfg80211_drv_mutex); 2158 spin_lock_bh(&reg_pending_beacons_lock);
2159 if (!list_empty(&reg_pending_beacons)) {
2160 list_for_each_entry_safe(reg_beacon, btmp,
2161 &reg_pending_beacons, list) {
2162 list_del(&reg_beacon->list);
2163 kfree(reg_beacon);
2164 }
2165 }
2166 spin_unlock_bh(&reg_pending_beacons_lock);
2167
2168 if (!list_empty(&reg_beacon_list)) {
2169 list_for_each_entry_safe(reg_beacon, btmp,
2170 &reg_beacon_list, list) {
2171 list_del(&reg_beacon->list);
2172 kfree(reg_beacon);
2173 }
2174 }
2175
2176 spin_lock(&reg_requests_lock);
2177 if (!list_empty(&reg_requests_list)) {
2178 list_for_each_entry_safe(reg_request, tmp,
2179 &reg_requests_list, list) {
2180 list_del(&reg_request->list);
2181 kfree(reg_request);
2182 }
2183 }
2184 spin_unlock(&reg_requests_lock);
2185
2186 mutex_unlock(&cfg80211_mutex);
1505} 2187}
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index a76ea3ff7cd6..e37829a49dc4 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -1,9 +1,13 @@
1#ifndef __NET_WIRELESS_REG_H 1#ifndef __NET_WIRELESS_REG_H
2#define __NET_WIRELESS_REG_H 2#define __NET_WIRELESS_REG_H
3 3
4extern const struct ieee80211_regdomain *cfg80211_regdomain;
5
4bool is_world_regdom(const char *alpha2); 6bool is_world_regdom(const char *alpha2);
5bool reg_is_valid_request(const char *alpha2); 7bool reg_is_valid_request(const char *alpha2);
6 8
9int regulatory_hint_user(const char *alpha2);
10
7void reg_device_remove(struct wiphy *wiphy); 11void reg_device_remove(struct wiphy *wiphy);
8 12
9int regulatory_init(void); 13int regulatory_init(void);
@@ -11,34 +15,25 @@ void regulatory_exit(void);
11 15
12int set_regdom(const struct ieee80211_regdomain *rd); 16int set_regdom(const struct ieee80211_regdomain *rd);
13 17
14enum environment_cap {
15 ENVIRON_ANY,
16 ENVIRON_INDOOR,
17 ENVIRON_OUTDOOR,
18};
19
20
21/** 18/**
22 * __regulatory_hint - hint to the wireless core a regulatory domain 19 * regulatory_hint_found_beacon - hints a beacon was found on a channel
23 * @wiphy: if the hint comes from country information from an AP, this 20 * @wiphy: the wireless device where the beacon was found on
24 * is required to be set to the wiphy that received the information 21 * @beacon_chan: the channel on which the beacon was found on
25 * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain 22 * @gfp: context flags
26 * should be in.
27 * @country_ie_checksum: checksum of processed country IE, set this to 0
28 * if the hint did not come from a country IE
29 * @country_ie_env: the environment the IE told us we are in, %ENVIRON_*
30 *
31 * The Wireless subsystem can use this function to hint to the wireless core
32 * what it believes should be the current regulatory domain by giving it an
33 * ISO/IEC 3166 alpha2 country code it knows its regulatory domain should be
34 * in.
35 * 23 *
36 * Returns zero if all went fine, %-EALREADY if a regulatory domain had 24 * This informs the wireless core that a beacon from an AP was found on
37 * already been set or other standard error codes. 25 * the channel provided. This allows the wireless core to make educated
26 * guesses on regulatory to help with world roaming. This is only used for
27 * world roaming -- when we do not know our current location. This is
28 * only useful on channels 12, 13 and 14 on the 2 GHz band as channels
29 * 1-11 are already enabled by the world regulatory domain; and on
30 * non-radar 5 GHz channels.
38 * 31 *
32 * Drivers do not need to call this, cfg80211 will do it for after a scan
33 * on a newly found BSS.
39 */ 34 */
40extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, 35int regulatory_hint_found_beacon(struct wiphy *wiphy,
41 const char *alpha2, u32 country_ie_checksum, 36 struct ieee80211_channel *beacon_chan,
42 enum environment_cap country_ie_env); 37 gfp_t gfp);
43 38
44#endif /* __NET_WIRELESS_REG_H */ 39#endif /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
new file mode 100644
index 000000000000..280dbcd02c15
--- /dev/null
+++ b/net/wireless/scan.c
@@ -0,0 +1,866 @@
1/*
2 * cfg80211 scan result handling
3 *
4 * Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
5 */
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/netdevice.h>
9#include <linux/wireless.h>
10#include <linux/nl80211.h>
11#include <linux/etherdevice.h>
12#include <net/arp.h>
13#include <net/cfg80211.h>
14#include <net/iw_handler.h>
15#include "core.h"
16#include "nl80211.h"
17
18#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ)
19
20void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
21{
22 struct net_device *dev;
23#ifdef CONFIG_WIRELESS_EXT
24 union iwreq_data wrqu;
25#endif
26
27 dev = dev_get_by_index(&init_net, request->ifidx);
28 if (!dev)
29 goto out;
30
31 WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req);
32 wiphy_to_dev(request->wiphy)->scan_req = NULL;
33
34 if (aborted)
35 nl80211_send_scan_aborted(wiphy_to_dev(request->wiphy), dev);
36 else
37 nl80211_send_scan_done(wiphy_to_dev(request->wiphy), dev);
38
39#ifdef CONFIG_WIRELESS_EXT
40 if (!aborted) {
41 memset(&wrqu, 0, sizeof(wrqu));
42
43 wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL);
44 }
45#endif
46
47 dev_put(dev);
48
49 out:
50 kfree(request);
51}
52EXPORT_SYMBOL(cfg80211_scan_done);
53
54static void bss_release(struct kref *ref)
55{
56 struct cfg80211_internal_bss *bss;
57
58 bss = container_of(ref, struct cfg80211_internal_bss, ref);
59 if (bss->pub.free_priv)
60 bss->pub.free_priv(&bss->pub);
61 kfree(bss);
62}
63
64/* must hold dev->bss_lock! */
65void cfg80211_bss_age(struct cfg80211_registered_device *dev,
66 unsigned long age_secs)
67{
68 struct cfg80211_internal_bss *bss;
69 unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC);
70
71 list_for_each_entry(bss, &dev->bss_list, list) {
72 bss->ts -= age_jiffies;
73 }
74}
75
76/* must hold dev->bss_lock! */
77void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
78{
79 struct cfg80211_internal_bss *bss, *tmp;
80 bool expired = false;
81
82 list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) {
83 if (!time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE))
84 continue;
85 list_del(&bss->list);
86 rb_erase(&bss->rbn, &dev->bss_tree);
87 kref_put(&bss->ref, bss_release);
88 expired = true;
89 }
90
91 if (expired)
92 dev->bss_generation++;
93}
94
95static u8 *find_ie(u8 num, u8 *ies, size_t len)
96{
97 while (len > 2 && ies[0] != num) {
98 len -= ies[1] + 2;
99 ies += ies[1] + 2;
100 }
101 if (len < 2)
102 return NULL;
103 if (len < 2 + ies[1])
104 return NULL;
105 return ies;
106}
107
108static int cmp_ies(u8 num, u8 *ies1, size_t len1, u8 *ies2, size_t len2)
109{
110 const u8 *ie1 = find_ie(num, ies1, len1);
111 const u8 *ie2 = find_ie(num, ies2, len2);
112 int r;
113
114 if (!ie1 && !ie2)
115 return 0;
116 if (!ie1)
117 return -1;
118
119 r = memcmp(ie1 + 2, ie2 + 2, min(ie1[1], ie2[1]));
120 if (r == 0 && ie1[1] != ie2[1])
121 return ie2[1] - ie1[1];
122 return r;
123}
124
125static bool is_bss(struct cfg80211_bss *a,
126 const u8 *bssid,
127 const u8 *ssid, size_t ssid_len)
128{
129 const u8 *ssidie;
130
131 if (bssid && compare_ether_addr(a->bssid, bssid))
132 return false;
133
134 if (!ssid)
135 return true;
136
137 ssidie = find_ie(WLAN_EID_SSID,
138 a->information_elements,
139 a->len_information_elements);
140 if (!ssidie)
141 return false;
142 if (ssidie[1] != ssid_len)
143 return false;
144 return memcmp(ssidie + 2, ssid, ssid_len) == 0;
145}
146
147static bool is_mesh(struct cfg80211_bss *a,
148 const u8 *meshid, size_t meshidlen,
149 const u8 *meshcfg)
150{
151 const u8 *ie;
152
153 if (!is_zero_ether_addr(a->bssid))
154 return false;
155
156 ie = find_ie(WLAN_EID_MESH_ID,
157 a->information_elements,
158 a->len_information_elements);
159 if (!ie)
160 return false;
161 if (ie[1] != meshidlen)
162 return false;
163 if (memcmp(ie + 2, meshid, meshidlen))
164 return false;
165
166 ie = find_ie(WLAN_EID_MESH_CONFIG,
167 a->information_elements,
168 a->len_information_elements);
169 if (ie[1] != IEEE80211_MESH_CONFIG_LEN)
170 return false;
171
172 /*
173 * Ignore mesh capability (last two bytes of the IE) when
174 * comparing since that may differ between stations taking
175 * part in the same mesh.
176 */
177 return memcmp(ie + 2, meshcfg, IEEE80211_MESH_CONFIG_LEN - 2) == 0;
178}
179
180static int cmp_bss(struct cfg80211_bss *a,
181 struct cfg80211_bss *b)
182{
183 int r;
184
185 if (a->channel != b->channel)
186 return b->channel->center_freq - a->channel->center_freq;
187
188 r = memcmp(a->bssid, b->bssid, ETH_ALEN);
189 if (r)
190 return r;
191
192 if (is_zero_ether_addr(a->bssid)) {
193 r = cmp_ies(WLAN_EID_MESH_ID,
194 a->information_elements,
195 a->len_information_elements,
196 b->information_elements,
197 b->len_information_elements);
198 if (r)
199 return r;
200 return cmp_ies(WLAN_EID_MESH_CONFIG,
201 a->information_elements,
202 a->len_information_elements,
203 b->information_elements,
204 b->len_information_elements);
205 }
206
207 return cmp_ies(WLAN_EID_SSID,
208 a->information_elements,
209 a->len_information_elements,
210 b->information_elements,
211 b->len_information_elements);
212}
213
214struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
215 struct ieee80211_channel *channel,
216 const u8 *bssid,
217 const u8 *ssid, size_t ssid_len,
218 u16 capa_mask, u16 capa_val)
219{
220 struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
221 struct cfg80211_internal_bss *bss, *res = NULL;
222
223 spin_lock_bh(&dev->bss_lock);
224
225 list_for_each_entry(bss, &dev->bss_list, list) {
226 if ((bss->pub.capability & capa_mask) != capa_val)
227 continue;
228 if (channel && bss->pub.channel != channel)
229 continue;
230 if (is_bss(&bss->pub, bssid, ssid, ssid_len)) {
231 res = bss;
232 kref_get(&res->ref);
233 break;
234 }
235 }
236
237 spin_unlock_bh(&dev->bss_lock);
238 if (!res)
239 return NULL;
240 return &res->pub;
241}
242EXPORT_SYMBOL(cfg80211_get_bss);
243
244struct cfg80211_bss *cfg80211_get_mesh(struct wiphy *wiphy,
245 struct ieee80211_channel *channel,
246 const u8 *meshid, size_t meshidlen,
247 const u8 *meshcfg)
248{
249 struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
250 struct cfg80211_internal_bss *bss, *res = NULL;
251
252 spin_lock_bh(&dev->bss_lock);
253
254 list_for_each_entry(bss, &dev->bss_list, list) {
255 if (channel && bss->pub.channel != channel)
256 continue;
257 if (is_mesh(&bss->pub, meshid, meshidlen, meshcfg)) {
258 res = bss;
259 kref_get(&res->ref);
260 break;
261 }
262 }
263
264 spin_unlock_bh(&dev->bss_lock);
265 if (!res)
266 return NULL;
267 return &res->pub;
268}
269EXPORT_SYMBOL(cfg80211_get_mesh);
270
271
272static void rb_insert_bss(struct cfg80211_registered_device *dev,
273 struct cfg80211_internal_bss *bss)
274{
275 struct rb_node **p = &dev->bss_tree.rb_node;
276 struct rb_node *parent = NULL;
277 struct cfg80211_internal_bss *tbss;
278 int cmp;
279
280 while (*p) {
281 parent = *p;
282 tbss = rb_entry(parent, struct cfg80211_internal_bss, rbn);
283
284 cmp = cmp_bss(&bss->pub, &tbss->pub);
285
286 if (WARN_ON(!cmp)) {
287 /* will sort of leak this BSS */
288 return;
289 }
290
291 if (cmp < 0)
292 p = &(*p)->rb_left;
293 else
294 p = &(*p)->rb_right;
295 }
296
297 rb_link_node(&bss->rbn, parent, p);
298 rb_insert_color(&bss->rbn, &dev->bss_tree);
299}
300
301static struct cfg80211_internal_bss *
302rb_find_bss(struct cfg80211_registered_device *dev,
303 struct cfg80211_internal_bss *res)
304{
305 struct rb_node *n = dev->bss_tree.rb_node;
306 struct cfg80211_internal_bss *bss;
307 int r;
308
309 while (n) {
310 bss = rb_entry(n, struct cfg80211_internal_bss, rbn);
311 r = cmp_bss(&res->pub, &bss->pub);
312
313 if (r == 0)
314 return bss;
315 else if (r < 0)
316 n = n->rb_left;
317 else
318 n = n->rb_right;
319 }
320
321 return NULL;
322}
323
324static struct cfg80211_internal_bss *
325cfg80211_bss_update(struct cfg80211_registered_device *dev,
326 struct cfg80211_internal_bss *res,
327 bool overwrite)
328{
329 struct cfg80211_internal_bss *found = NULL;
330 const u8 *meshid, *meshcfg;
331
332 /*
333 * The reference to "res" is donated to this function.
334 */
335
336 if (WARN_ON(!res->pub.channel)) {
337 kref_put(&res->ref, bss_release);
338 return NULL;
339 }
340
341 res->ts = jiffies;
342
343 if (is_zero_ether_addr(res->pub.bssid)) {
344 /* must be mesh, verify */
345 meshid = find_ie(WLAN_EID_MESH_ID, res->pub.information_elements,
346 res->pub.len_information_elements);
347 meshcfg = find_ie(WLAN_EID_MESH_CONFIG,
348 res->pub.information_elements,
349 res->pub.len_information_elements);
350 if (!meshid || !meshcfg ||
351 meshcfg[1] != IEEE80211_MESH_CONFIG_LEN) {
352 /* bogus mesh */
353 kref_put(&res->ref, bss_release);
354 return NULL;
355 }
356 }
357
358 spin_lock_bh(&dev->bss_lock);
359
360 found = rb_find_bss(dev, res);
361
362 if (found && overwrite) {
363 list_replace(&found->list, &res->list);
364 rb_replace_node(&found->rbn, &res->rbn,
365 &dev->bss_tree);
366 kref_put(&found->ref, bss_release);
367 found = res;
368 } else if (found) {
369 kref_get(&found->ref);
370 found->pub.beacon_interval = res->pub.beacon_interval;
371 found->pub.tsf = res->pub.tsf;
372 found->pub.signal = res->pub.signal;
373 found->pub.capability = res->pub.capability;
374 found->ts = res->ts;
375 kref_put(&res->ref, bss_release);
376 } else {
377 /* this "consumes" the reference */
378 list_add_tail(&res->list, &dev->bss_list);
379 rb_insert_bss(dev, res);
380 found = res;
381 }
382
383 dev->bss_generation++;
384 spin_unlock_bh(&dev->bss_lock);
385
386 kref_get(&found->ref);
387 return found;
388}
389
390struct cfg80211_bss *
391cfg80211_inform_bss_frame(struct wiphy *wiphy,
392 struct ieee80211_channel *channel,
393 struct ieee80211_mgmt *mgmt, size_t len,
394 s32 signal, gfp_t gfp)
395{
396 struct cfg80211_internal_bss *res;
397 size_t ielen = len - offsetof(struct ieee80211_mgmt,
398 u.probe_resp.variable);
399 bool overwrite;
400 size_t privsz = wiphy->bss_priv_size;
401
402 if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC &&
403 (signal < 0 || signal > 100)))
404 return NULL;
405
406 if (WARN_ON(!mgmt || !wiphy ||
407 len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable)))
408 return NULL;
409
410 res = kzalloc(sizeof(*res) + privsz + ielen, gfp);
411 if (!res)
412 return NULL;
413
414 memcpy(res->pub.bssid, mgmt->bssid, ETH_ALEN);
415 res->pub.channel = channel;
416 res->pub.signal = signal;
417 res->pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
418 res->pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
419 res->pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
420 /* point to after the private area */
421 res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz;
422 memcpy(res->pub.information_elements, mgmt->u.probe_resp.variable, ielen);
423 res->pub.len_information_elements = ielen;
424
425 kref_init(&res->ref);
426
427 overwrite = ieee80211_is_probe_resp(mgmt->frame_control);
428
429 res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, overwrite);
430 if (!res)
431 return NULL;
432
433 if (res->pub.capability & WLAN_CAPABILITY_ESS)
434 regulatory_hint_found_beacon(wiphy, channel, gfp);
435
436 /* cfg80211_bss_update gives us a referenced result */
437 return &res->pub;
438}
439EXPORT_SYMBOL(cfg80211_inform_bss_frame);
440
441void cfg80211_put_bss(struct cfg80211_bss *pub)
442{
443 struct cfg80211_internal_bss *bss;
444
445 if (!pub)
446 return;
447
448 bss = container_of(pub, struct cfg80211_internal_bss, pub);
449 kref_put(&bss->ref, bss_release);
450}
451EXPORT_SYMBOL(cfg80211_put_bss);
452
453void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
454{
455 struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
456 struct cfg80211_internal_bss *bss;
457
458 if (WARN_ON(!pub))
459 return;
460
461 bss = container_of(pub, struct cfg80211_internal_bss, pub);
462
463 spin_lock_bh(&dev->bss_lock);
464
465 list_del(&bss->list);
466 rb_erase(&bss->rbn, &dev->bss_tree);
467
468 spin_unlock_bh(&dev->bss_lock);
469
470 kref_put(&bss->ref, bss_release);
471}
472EXPORT_SYMBOL(cfg80211_unlink_bss);
473
474#ifdef CONFIG_WIRELESS_EXT
475int cfg80211_wext_siwscan(struct net_device *dev,
476 struct iw_request_info *info,
477 union iwreq_data *wrqu, char *extra)
478{
479 struct cfg80211_registered_device *rdev;
480 struct wiphy *wiphy;
481 struct iw_scan_req *wreq = NULL;
482 struct cfg80211_scan_request *creq;
483 int i, err, n_channels = 0;
484 enum ieee80211_band band;
485
486 if (!netif_running(dev))
487 return -ENETDOWN;
488
489 rdev = cfg80211_get_dev_from_ifindex(dev->ifindex);
490
491 if (IS_ERR(rdev))
492 return PTR_ERR(rdev);
493
494 if (rdev->scan_req) {
495 err = -EBUSY;
496 goto out;
497 }
498
499 wiphy = &rdev->wiphy;
500
501 for (band = 0; band < IEEE80211_NUM_BANDS; band++)
502 if (wiphy->bands[band])
503 n_channels += wiphy->bands[band]->n_channels;
504
505 creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) +
506 n_channels * sizeof(void *),
507 GFP_ATOMIC);
508 if (!creq) {
509 err = -ENOMEM;
510 goto out;
511 }
512
513 creq->wiphy = wiphy;
514 creq->ifidx = dev->ifindex;
515 creq->ssids = (void *)(creq + 1);
516 creq->channels = (void *)(creq->ssids + 1);
517 creq->n_channels = n_channels;
518 creq->n_ssids = 1;
519
520 /* all channels */
521 i = 0;
522 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
523 int j;
524 if (!wiphy->bands[band])
525 continue;
526 for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
527 creq->channels[i] = &wiphy->bands[band]->channels[j];
528 i++;
529 }
530 }
531
532 /* translate scan request */
533 if (wrqu->data.length == sizeof(struct iw_scan_req)) {
534 wreq = (struct iw_scan_req *)extra;
535
536 if (wrqu->data.flags & IW_SCAN_THIS_ESSID) {
537 if (wreq->essid_len > IEEE80211_MAX_SSID_LEN)
538 return -EINVAL;
539 memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len);
540 creq->ssids[0].ssid_len = wreq->essid_len;
541 }
542 if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE)
543 creq->n_ssids = 0;
544 }
545
546 rdev->scan_req = creq;
547 err = rdev->ops->scan(wiphy, dev, creq);
548 if (err) {
549 rdev->scan_req = NULL;
550 kfree(creq);
551 }
552 out:
553 cfg80211_put_dev(rdev);
554 return err;
555}
556EXPORT_SYMBOL(cfg80211_wext_siwscan);
557
558static void ieee80211_scan_add_ies(struct iw_request_info *info,
559 struct cfg80211_bss *bss,
560 char **current_ev, char *end_buf)
561{
562 u8 *pos, *end, *next;
563 struct iw_event iwe;
564
565 if (!bss->information_elements ||
566 !bss->len_information_elements)
567 return;
568
569 /*
570 * If needed, fragment the IEs buffer (at IE boundaries) into short
571 * enough fragments to fit into IW_GENERIC_IE_MAX octet messages.
572 */
573 pos = bss->information_elements;
574 end = pos + bss->len_information_elements;
575
576 while (end - pos > IW_GENERIC_IE_MAX) {
577 next = pos + 2 + pos[1];
578 while (next + 2 + next[1] - pos < IW_GENERIC_IE_MAX)
579 next = next + 2 + next[1];
580
581 memset(&iwe, 0, sizeof(iwe));
582 iwe.cmd = IWEVGENIE;
583 iwe.u.data.length = next - pos;
584 *current_ev = iwe_stream_add_point(info, *current_ev,
585 end_buf, &iwe, pos);
586
587 pos = next;
588 }
589
590 if (end > pos) {
591 memset(&iwe, 0, sizeof(iwe));
592 iwe.cmd = IWEVGENIE;
593 iwe.u.data.length = end - pos;
594 *current_ev = iwe_stream_add_point(info, *current_ev,
595 end_buf, &iwe, pos);
596 }
597}
598
599static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
600{
601 unsigned long end = jiffies;
602
603 if (end >= start)
604 return jiffies_to_msecs(end - start);
605
606 return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1);
607}
608
609static char *
610ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info,
611 struct cfg80211_internal_bss *bss, char *current_ev,
612 char *end_buf)
613{
614 struct iw_event iwe;
615 u8 *buf, *cfg, *p;
616 u8 *ie = bss->pub.information_elements;
617 int rem = bss->pub.len_information_elements, i, sig;
618 bool ismesh = false;
619
620 memset(&iwe, 0, sizeof(iwe));
621 iwe.cmd = SIOCGIWAP;
622 iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
623 memcpy(iwe.u.ap_addr.sa_data, bss->pub.bssid, ETH_ALEN);
624 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
625 IW_EV_ADDR_LEN);
626
627 memset(&iwe, 0, sizeof(iwe));
628 iwe.cmd = SIOCGIWFREQ;
629 iwe.u.freq.m = ieee80211_frequency_to_channel(bss->pub.channel->center_freq);
630 iwe.u.freq.e = 0;
631 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
632 IW_EV_FREQ_LEN);
633
634 memset(&iwe, 0, sizeof(iwe));
635 iwe.cmd = SIOCGIWFREQ;
636 iwe.u.freq.m = bss->pub.channel->center_freq;
637 iwe.u.freq.e = 6;
638 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
639 IW_EV_FREQ_LEN);
640
641 if (wiphy->signal_type != CFG80211_SIGNAL_TYPE_NONE) {
642 memset(&iwe, 0, sizeof(iwe));
643 iwe.cmd = IWEVQUAL;
644 iwe.u.qual.updated = IW_QUAL_LEVEL_UPDATED |
645 IW_QUAL_NOISE_INVALID |
646 IW_QUAL_QUAL_UPDATED;
647 switch (wiphy->signal_type) {
648 case CFG80211_SIGNAL_TYPE_MBM:
649 sig = bss->pub.signal / 100;
650 iwe.u.qual.level = sig;
651 iwe.u.qual.updated |= IW_QUAL_DBM;
652 if (sig < -110) /* rather bad */
653 sig = -110;
654 else if (sig > -40) /* perfect */
655 sig = -40;
656 /* will give a range of 0 .. 70 */
657 iwe.u.qual.qual = sig + 110;
658 break;
659 case CFG80211_SIGNAL_TYPE_UNSPEC:
660 iwe.u.qual.level = bss->pub.signal;
661 /* will give range 0 .. 100 */
662 iwe.u.qual.qual = bss->pub.signal;
663 break;
664 default:
665 /* not reached */
666 break;
667 }
668 current_ev = iwe_stream_add_event(info, current_ev, end_buf,
669 &iwe, IW_EV_QUAL_LEN);
670 }
671
672 memset(&iwe, 0, sizeof(iwe));
673 iwe.cmd = SIOCGIWENCODE;
674 if (bss->pub.capability & WLAN_CAPABILITY_PRIVACY)
675 iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
676 else
677 iwe.u.data.flags = IW_ENCODE_DISABLED;
678 iwe.u.data.length = 0;
679 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
680 &iwe, "");
681
682 while (rem >= 2) {
683 /* invalid data */
684 if (ie[1] > rem - 2)
685 break;
686
687 switch (ie[0]) {
688 case WLAN_EID_SSID:
689 memset(&iwe, 0, sizeof(iwe));
690 iwe.cmd = SIOCGIWESSID;
691 iwe.u.data.length = ie[1];
692 iwe.u.data.flags = 1;
693 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
694 &iwe, ie + 2);
695 break;
696 case WLAN_EID_MESH_ID:
697 memset(&iwe, 0, sizeof(iwe));
698 iwe.cmd = SIOCGIWESSID;
699 iwe.u.data.length = ie[1];
700 iwe.u.data.flags = 1;
701 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
702 &iwe, ie + 2);
703 break;
704 case WLAN_EID_MESH_CONFIG:
705 ismesh = true;
706 if (ie[1] != IEEE80211_MESH_CONFIG_LEN)
707 break;
708 buf = kmalloc(50, GFP_ATOMIC);
709 if (!buf)
710 break;
711 cfg = ie + 2;
712 memset(&iwe, 0, sizeof(iwe));
713 iwe.cmd = IWEVCUSTOM;
714 sprintf(buf, "Mesh network (version %d)", cfg[0]);
715 iwe.u.data.length = strlen(buf);
716 current_ev = iwe_stream_add_point(info, current_ev,
717 end_buf,
718 &iwe, buf);
719 sprintf(buf, "Path Selection Protocol ID: "
720 "0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3],
721 cfg[4]);
722 iwe.u.data.length = strlen(buf);
723 current_ev = iwe_stream_add_point(info, current_ev,
724 end_buf,
725 &iwe, buf);
726 sprintf(buf, "Path Selection Metric ID: "
727 "0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7],
728 cfg[8]);
729 iwe.u.data.length = strlen(buf);
730 current_ev = iwe_stream_add_point(info, current_ev,
731 end_buf,
732 &iwe, buf);
733 sprintf(buf, "Congestion Control Mode ID: "
734 "0x%02X%02X%02X%02X", cfg[9], cfg[10],
735 cfg[11], cfg[12]);
736 iwe.u.data.length = strlen(buf);
737 current_ev = iwe_stream_add_point(info, current_ev,
738 end_buf,
739 &iwe, buf);
740 sprintf(buf, "Channel Precedence: "
741 "0x%02X%02X%02X%02X", cfg[13], cfg[14],
742 cfg[15], cfg[16]);
743 iwe.u.data.length = strlen(buf);
744 current_ev = iwe_stream_add_point(info, current_ev,
745 end_buf,
746 &iwe, buf);
747 kfree(buf);
748 break;
749 case WLAN_EID_SUPP_RATES:
750 case WLAN_EID_EXT_SUPP_RATES:
751 /* display all supported rates in readable format */
752 p = current_ev + iwe_stream_lcp_len(info);
753
754 memset(&iwe, 0, sizeof(iwe));
755 iwe.cmd = SIOCGIWRATE;
756 /* Those two flags are ignored... */
757 iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
758
759 for (i = 0; i < ie[1]; i++) {
760 iwe.u.bitrate.value =
761 ((ie[i + 2] & 0x7f) * 500000);
762 p = iwe_stream_add_value(info, current_ev, p,
763 end_buf, &iwe, IW_EV_PARAM_LEN);
764 }
765 current_ev = p;
766 break;
767 }
768 rem -= ie[1] + 2;
769 ie += ie[1] + 2;
770 }
771
772 if (bss->pub.capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)
773 || ismesh) {
774 memset(&iwe, 0, sizeof(iwe));
775 iwe.cmd = SIOCGIWMODE;
776 if (ismesh)
777 iwe.u.mode = IW_MODE_MESH;
778 else if (bss->pub.capability & WLAN_CAPABILITY_ESS)
779 iwe.u.mode = IW_MODE_MASTER;
780 else
781 iwe.u.mode = IW_MODE_ADHOC;
782 current_ev = iwe_stream_add_event(info, current_ev, end_buf,
783 &iwe, IW_EV_UINT_LEN);
784 }
785
786 buf = kmalloc(30, GFP_ATOMIC);
787 if (buf) {
788 memset(&iwe, 0, sizeof(iwe));
789 iwe.cmd = IWEVCUSTOM;
790 sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->pub.tsf));
791 iwe.u.data.length = strlen(buf);
792 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
793 &iwe, buf);
794 memset(&iwe, 0, sizeof(iwe));
795 iwe.cmd = IWEVCUSTOM;
796 sprintf(buf, " Last beacon: %ums ago",
797 elapsed_jiffies_msecs(bss->ts));
798 iwe.u.data.length = strlen(buf);
799 current_ev = iwe_stream_add_point(info, current_ev,
800 end_buf, &iwe, buf);
801 kfree(buf);
802 }
803
804 ieee80211_scan_add_ies(info, &bss->pub, &current_ev, end_buf);
805
806 return current_ev;
807}
808
809
810static int ieee80211_scan_results(struct cfg80211_registered_device *dev,
811 struct iw_request_info *info,
812 char *buf, size_t len)
813{
814 char *current_ev = buf;
815 char *end_buf = buf + len;
816 struct cfg80211_internal_bss *bss;
817
818 spin_lock_bh(&dev->bss_lock);
819 cfg80211_bss_expire(dev);
820
821 list_for_each_entry(bss, &dev->bss_list, list) {
822 if (buf + len - current_ev <= IW_EV_ADDR_LEN) {
823 spin_unlock_bh(&dev->bss_lock);
824 return -E2BIG;
825 }
826 current_ev = ieee80211_bss(&dev->wiphy, info, bss,
827 current_ev, end_buf);
828 }
829 spin_unlock_bh(&dev->bss_lock);
830 return current_ev - buf;
831}
832
833
834int cfg80211_wext_giwscan(struct net_device *dev,
835 struct iw_request_info *info,
836 struct iw_point *data, char *extra)
837{
838 struct cfg80211_registered_device *rdev;
839 int res;
840
841 if (!netif_running(dev))
842 return -ENETDOWN;
843
844 rdev = cfg80211_get_dev_from_ifindex(dev->ifindex);
845
846 if (IS_ERR(rdev))
847 return PTR_ERR(rdev);
848
849 if (rdev->scan_req) {
850 res = -EAGAIN;
851 goto out;
852 }
853
854 res = ieee80211_scan_results(rdev, info, extra, data->length);
855 data->length = 0;
856 if (res >= 0) {
857 data->length = res;
858 res = 0;
859 }
860
861 out:
862 cfg80211_put_dev(rdev);
863 return res;
864}
865EXPORT_SYMBOL(cfg80211_wext_giwscan);
866#endif
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 79a382877641..efe3c5c92b2d 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -31,7 +31,7 @@ static ssize_t name ## _show(struct device *dev, \
31 return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \ 31 return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \
32} 32}
33 33
34SHOW_FMT(index, "%d", idx); 34SHOW_FMT(index, "%d", wiphy_idx);
35SHOW_FMT(macaddress, "%pM", wiphy.perm_addr); 35SHOW_FMT(macaddress, "%pM", wiphy.perm_addr);
36 36
37static struct device_attribute ieee80211_dev_attrs[] = { 37static struct device_attribute ieee80211_dev_attrs[] = {
@@ -55,6 +55,41 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)
55} 55}
56#endif 56#endif
57 57
58static int wiphy_suspend(struct device *dev, pm_message_t state)
59{
60 struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
61 int ret = 0;
62
63 rdev->suspend_at = get_seconds();
64
65 if (rdev->ops->suspend) {
66 rtnl_lock();
67 ret = rdev->ops->suspend(&rdev->wiphy);
68 rtnl_unlock();
69 }
70
71 return ret;
72}
73
74static int wiphy_resume(struct device *dev)
75{
76 struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
77 int ret = 0;
78
79 /* Age scan results with time spent in suspend */
80 spin_lock_bh(&rdev->bss_lock);
81 cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at);
82 spin_unlock_bh(&rdev->bss_lock);
83
84 if (rdev->ops->resume) {
85 rtnl_lock();
86 ret = rdev->ops->resume(&rdev->wiphy);
87 rtnl_unlock();
88 }
89
90 return ret;
91}
92
58struct class ieee80211_class = { 93struct class ieee80211_class = {
59 .name = "ieee80211", 94 .name = "ieee80211",
60 .owner = THIS_MODULE, 95 .owner = THIS_MODULE,
@@ -63,6 +98,8 @@ struct class ieee80211_class = {
63#ifdef CONFIG_HOTPLUG 98#ifdef CONFIG_HOTPLUG
64 .dev_uevent = wiphy_uevent, 99 .dev_uevent = wiphy_uevent,
65#endif 100#endif
101 .suspend = wiphy_suspend,
102 .resume = wiphy_resume,
66}; 103};
67 104
68int wiphy_sysfs_init(void) 105int wiphy_sysfs_init(void)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index e76cc28b0345..487cdd9bcffc 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -9,7 +9,7 @@
9 9
10struct ieee80211_rate * 10struct ieee80211_rate *
11ieee80211_get_response_rate(struct ieee80211_supported_band *sband, 11ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
12 u64 basic_rates, int bitrate) 12 u32 basic_rates, int bitrate)
13{ 13{
14 struct ieee80211_rate *result = &sband->bitrates[0]; 14 struct ieee80211_rate *result = &sband->bitrates[0];
15 int i; 15 int i;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 58e489fd4aed..b84a9b4fe96a 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -137,3 +137,100 @@ int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info,
137 return 0; 137 return 0;
138} 138}
139EXPORT_SYMBOL(cfg80211_wext_giwmode); 139EXPORT_SYMBOL(cfg80211_wext_giwmode);
140
141
142int cfg80211_wext_giwrange(struct net_device *dev,
143 struct iw_request_info *info,
144 struct iw_point *data, char *extra)
145{
146 struct wireless_dev *wdev = dev->ieee80211_ptr;
147 struct iw_range *range = (struct iw_range *) extra;
148 enum ieee80211_band band;
149 int c = 0;
150
151 if (!wdev)
152 return -EOPNOTSUPP;
153
154 data->length = sizeof(struct iw_range);
155 memset(range, 0, sizeof(struct iw_range));
156
157 range->we_version_compiled = WIRELESS_EXT;
158 range->we_version_source = 21;
159 range->retry_capa = IW_RETRY_LIMIT;
160 range->retry_flags = IW_RETRY_LIMIT;
161 range->min_retry = 0;
162 range->max_retry = 255;
163 range->min_rts = 0;
164 range->max_rts = 2347;
165 range->min_frag = 256;
166 range->max_frag = 2346;
167
168 range->encoding_size[0] = 5;
169 range->encoding_size[1] = 13;
170 range->num_encoding_sizes = 2;
171 range->max_encoding_tokens = 4;
172
173 range->max_qual.updated = IW_QUAL_NOISE_INVALID;
174
175 switch (wdev->wiphy->signal_type) {
176 case CFG80211_SIGNAL_TYPE_NONE:
177 break;
178 case CFG80211_SIGNAL_TYPE_MBM:
179 range->max_qual.level = -110;
180 range->max_qual.qual = 70;
181 range->avg_qual.qual = 35;
182 range->max_qual.updated |= IW_QUAL_DBM;
183 range->max_qual.updated |= IW_QUAL_QUAL_UPDATED;
184 range->max_qual.updated |= IW_QUAL_LEVEL_UPDATED;
185 break;
186 case CFG80211_SIGNAL_TYPE_UNSPEC:
187 range->max_qual.level = 100;
188 range->max_qual.qual = 100;
189 range->avg_qual.qual = 50;
190 range->max_qual.updated |= IW_QUAL_QUAL_UPDATED;
191 range->max_qual.updated |= IW_QUAL_LEVEL_UPDATED;
192 break;
193 }
194
195 range->avg_qual.level = range->max_qual.level / 2;
196 range->avg_qual.noise = range->max_qual.noise / 2;
197 range->avg_qual.updated = range->max_qual.updated;
198
199 range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 |
200 IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP;
201
202
203 for (band = 0; band < IEEE80211_NUM_BANDS; band ++) {
204 int i;
205 struct ieee80211_supported_band *sband;
206
207 sband = wdev->wiphy->bands[band];
208
209 if (!sband)
210 continue;
211
212 for (i = 0; i < sband->n_channels && c < IW_MAX_FREQUENCIES; i++) {
213 struct ieee80211_channel *chan = &sband->channels[i];
214
215 if (!(chan->flags & IEEE80211_CHAN_DISABLED)) {
216 range->freq[c].i =
217 ieee80211_frequency_to_channel(
218 chan->center_freq);
219 range->freq[c].m = chan->center_freq;
220 range->freq[c].e = 6;
221 c++;
222 }
223 }
224 }
225 range->num_channels = c;
226 range->num_frequency = c;
227
228 IW_EVENT_CAPA_SET_KERNEL(range->event_capa);
229 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP);
230 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN);
231
232 range->scan_capa |= IW_SCAN_CAPA_ESSID;
233
234 return 0;
235}
236EXPORT_SYMBOL(cfg80211_wext_giwrange);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 9fc5b023d111..8f76f4009c24 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1609,7 +1609,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = {
1609SOCKOPS_WRAP(x25_proto, AF_X25); 1609SOCKOPS_WRAP(x25_proto, AF_X25);
1610 1610
1611static struct packet_type x25_packet_type = { 1611static struct packet_type x25_packet_type = {
1612 .type = __constant_htons(ETH_P_X25), 1612 .type = cpu_to_be16(ETH_P_X25),
1613 .func = x25_lapb_receive_frame, 1613 .func = x25_lapb_receive_frame,
1614}; 1614};
1615 1615