diff options
Diffstat (limited to 'net')
219 files changed, 21713 insertions, 3691 deletions
diff --git a/net/802/psnap.c b/net/802/psnap.c index 6ed711748f26..6fea0750662b 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c | |||
@@ -29,7 +29,7 @@ static struct llc_sap *snap_sap; | |||
29 | /* | 29 | /* |
30 | * Find a snap client by matching the 5 bytes. | 30 | * Find a snap client by matching the 5 bytes. |
31 | */ | 31 | */ |
32 | static struct datalink_proto *find_snap_client(unsigned char *desc) | 32 | static struct datalink_proto *find_snap_client(const unsigned char *desc) |
33 | { | 33 | { |
34 | struct datalink_proto *proto = NULL, *p; | 34 | struct datalink_proto *proto = NULL, *p; |
35 | 35 | ||
@@ -95,15 +95,16 @@ static int snap_request(struct datalink_proto *dl, | |||
95 | EXPORT_SYMBOL(register_snap_client); | 95 | EXPORT_SYMBOL(register_snap_client); |
96 | EXPORT_SYMBOL(unregister_snap_client); | 96 | EXPORT_SYMBOL(unregister_snap_client); |
97 | 97 | ||
98 | static char snap_err_msg[] __initdata = | 98 | static const char snap_err_msg[] __initconst = |
99 | KERN_CRIT "SNAP - unable to register with 802.2\n"; | 99 | KERN_CRIT "SNAP - unable to register with 802.2\n"; |
100 | 100 | ||
101 | static int __init snap_init(void) | 101 | static int __init snap_init(void) |
102 | { | 102 | { |
103 | snap_sap = llc_sap_open(0xAA, snap_rcv); | 103 | snap_sap = llc_sap_open(0xAA, snap_rcv); |
104 | 104 | if (!snap_sap) { | |
105 | if (!snap_sap) | ||
106 | printk(snap_err_msg); | 105 | printk(snap_err_msg); |
106 | return -EBUSY; | ||
107 | } | ||
107 | 108 | ||
108 | return 0; | 109 | return 0; |
109 | } | 110 | } |
@@ -121,7 +122,7 @@ module_exit(snap_exit); | |||
121 | /* | 122 | /* |
122 | * Register SNAP clients. We don't yet use this for IP. | 123 | * Register SNAP clients. We don't yet use this for IP. |
123 | */ | 124 | */ |
124 | struct datalink_proto *register_snap_client(unsigned char *desc, | 125 | struct datalink_proto *register_snap_client(const unsigned char *desc, |
125 | int (*rcvfunc)(struct sk_buff *, | 126 | int (*rcvfunc)(struct sk_buff *, |
126 | struct net_device *, | 127 | struct net_device *, |
127 | struct packet_type *, | 128 | struct packet_type *, |
@@ -136,7 +137,7 @@ struct datalink_proto *register_snap_client(unsigned char *desc, | |||
136 | 137 | ||
137 | proto = kmalloc(sizeof(*proto), GFP_ATOMIC); | 138 | proto = kmalloc(sizeof(*proto), GFP_ATOMIC); |
138 | if (proto) { | 139 | if (proto) { |
139 | memcpy(proto->type, desc,5); | 140 | memcpy(proto->type, desc, 5); |
140 | proto->rcvfunc = rcvfunc; | 141 | proto->rcvfunc = rcvfunc; |
141 | proto->header_length = 5 + 3; /* snap + 802.2 */ | 142 | proto->header_length = 5 + 3; /* snap + 802.2 */ |
142 | proto->request = snap_request; | 143 | proto->request = snap_request; |
diff --git a/net/802/tr.c b/net/802/tr.c index 158150fee462..e7eb13084d71 100644 --- a/net/802/tr.c +++ b/net/802/tr.c | |||
@@ -486,6 +486,7 @@ static struct rif_cache *rif_get_idx(loff_t pos) | |||
486 | } | 486 | } |
487 | 487 | ||
488 | static void *rif_seq_start(struct seq_file *seq, loff_t *pos) | 488 | static void *rif_seq_start(struct seq_file *seq, loff_t *pos) |
489 | __acquires(&rif_lock) | ||
489 | { | 490 | { |
490 | spin_lock_irq(&rif_lock); | 491 | spin_lock_irq(&rif_lock); |
491 | 492 | ||
@@ -517,6 +518,7 @@ static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
517 | } | 518 | } |
518 | 519 | ||
519 | static void rif_seq_stop(struct seq_file *seq, void *v) | 520 | static void rif_seq_stop(struct seq_file *seq, void *v) |
521 | __releases(&rif_lock) | ||
520 | { | 522 | { |
521 | spin_unlock_irq(&rif_lock); | 523 | spin_unlock_irq(&rif_lock); |
522 | } | 524 | } |
@@ -668,3 +670,5 @@ module_init(rif_init); | |||
668 | 670 | ||
669 | EXPORT_SYMBOL(tr_type_trans); | 671 | EXPORT_SYMBOL(tr_type_trans); |
670 | EXPORT_SYMBOL(alloc_trdev); | 672 | EXPORT_SYMBOL(alloc_trdev); |
673 | |||
674 | MODULE_LICENSE("GPL"); | ||
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 4163ea65bf41..2b7390e377b3 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c | |||
@@ -51,7 +51,7 @@ const char vlan_version[] = DRV_VERSION; | |||
51 | static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>"; | 51 | static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>"; |
52 | static const char vlan_buggyright[] = "David S. Miller <davem@redhat.com>"; | 52 | static const char vlan_buggyright[] = "David S. Miller <davem@redhat.com>"; |
53 | 53 | ||
54 | static struct packet_type vlan_packet_type = { | 54 | static struct packet_type vlan_packet_type __read_mostly = { |
55 | .type = cpu_to_be16(ETH_P_8021Q), | 55 | .type = cpu_to_be16(ETH_P_8021Q), |
56 | .func = vlan_skb_recv, /* VLAN receive method */ | 56 | .func = vlan_skb_recv, /* VLAN receive method */ |
57 | }; | 57 | }; |
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 70435af153f2..654e45f5719d 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c | |||
@@ -1,12 +1,16 @@ | |||
1 | #include <linux/skbuff.h> | 1 | #include <linux/skbuff.h> |
2 | #include <linux/netdevice.h> | 2 | #include <linux/netdevice.h> |
3 | #include <linux/if_vlan.h> | 3 | #include <linux/if_vlan.h> |
4 | #include <linux/netpoll.h> | ||
4 | #include "vlan.h" | 5 | #include "vlan.h" |
5 | 6 | ||
6 | /* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ | 7 | /* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ |
7 | int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, | 8 | int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, |
8 | u16 vlan_tci, int polling) | 9 | u16 vlan_tci, int polling) |
9 | { | 10 | { |
11 | if (netpoll_rx(skb)) | ||
12 | return NET_RX_DROP; | ||
13 | |||
10 | if (skb_bond_should_drop(skb)) | 14 | if (skb_bond_should_drop(skb)) |
11 | goto drop; | 15 | goto drop; |
12 | 16 | ||
@@ -94,12 +98,15 @@ static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, | |||
94 | return dev_gro_receive(napi, skb); | 98 | return dev_gro_receive(napi, skb); |
95 | 99 | ||
96 | drop: | 100 | drop: |
97 | return 2; | 101 | return GRO_DROP; |
98 | } | 102 | } |
99 | 103 | ||
100 | int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, | 104 | int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, |
101 | unsigned int vlan_tci, struct sk_buff *skb) | 105 | unsigned int vlan_tci, struct sk_buff *skb) |
102 | { | 106 | { |
107 | if (netpoll_rx_on(skb)) | ||
108 | return vlan_hwaccel_receive_skb(skb, grp, vlan_tci); | ||
109 | |||
103 | skb_gro_reset_offset(skb); | 110 | skb_gro_reset_offset(skb); |
104 | 111 | ||
105 | return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb); | 112 | return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb); |
@@ -114,6 +121,9 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, | |||
114 | if (!skb) | 121 | if (!skb) |
115 | return NET_RX_DROP; | 122 | return NET_RX_DROP; |
116 | 123 | ||
124 | if (netpoll_rx_on(skb)) | ||
125 | return vlan_hwaccel_receive_skb(skb, grp, vlan_tci); | ||
126 | |||
117 | return napi_frags_finish(napi, skb, | 127 | return napi_frags_finish(napi, skb, |
118 | vlan_gro_common(napi, grp, vlan_tci, skb)); | 128 | vlan_gro_common(napi, grp, vlan_tci, skb)); |
119 | } | 129 | } |
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4a19acd3a32b..1b34135cf990 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c | |||
@@ -553,7 +553,7 @@ static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa) | |||
553 | int err = 0; | 553 | int err = 0; |
554 | 554 | ||
555 | if (netif_device_present(real_dev) && ops->ndo_neigh_setup) | 555 | if (netif_device_present(real_dev) && ops->ndo_neigh_setup) |
556 | err = ops->ndo_neigh_setup(dev, pa); | 556 | err = ops->ndo_neigh_setup(real_dev, pa); |
557 | 557 | ||
558 | return err; | 558 | return err; |
559 | } | 559 | } |
@@ -639,6 +639,7 @@ static int vlan_dev_init(struct net_device *dev) | |||
639 | dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; | 639 | dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; |
640 | dev->netdev_ops = &vlan_netdev_ops; | 640 | dev->netdev_ops = &vlan_netdev_ops; |
641 | } | 641 | } |
642 | netdev_resync_ops(dev); | ||
642 | 643 | ||
643 | if (is_vlan_dev(real_dev)) | 644 | if (is_vlan_dev(real_dev)) |
644 | subclass = 1; | 645 | subclass = 1; |
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 1df0356f242b..c613ed08a5ee 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c | |||
@@ -417,7 +417,7 @@ static int p9_fd_write(struct p9_client *client, void *v, int len) | |||
417 | oldfs = get_fs(); | 417 | oldfs = get_fs(); |
418 | set_fs(get_ds()); | 418 | set_fs(get_ds()); |
419 | /* The cast to a user pointer is valid due to the set_fs() */ | 419 | /* The cast to a user pointer is valid due to the set_fs() */ |
420 | ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos); | 420 | ret = vfs_write(ts->wr, (__force void __user *)v, len, &ts->wr->f_pos); |
421 | set_fs(oldfs); | 421 | set_fs(oldfs); |
422 | 422 | ||
423 | if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) | 423 | if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) |
diff --git a/net/Kconfig b/net/Kconfig index a12bae0e3fe9..93998a9c39c2 100644 --- a/net/Kconfig +++ b/net/Kconfig | |||
@@ -24,9 +24,6 @@ if NET | |||
24 | 24 | ||
25 | menu "Networking options" | 25 | menu "Networking options" |
26 | 26 | ||
27 | config COMPAT_NET_DEV_OPS | ||
28 | def_bool y | ||
29 | |||
30 | source "net/packet/Kconfig" | 27 | source "net/packet/Kconfig" |
31 | source "net/unix/Kconfig" | 28 | source "net/unix/Kconfig" |
32 | source "net/xfrm/Kconfig" | 29 | source "net/xfrm/Kconfig" |
@@ -171,6 +168,7 @@ endif | |||
171 | 168 | ||
172 | source "net/dccp/Kconfig" | 169 | source "net/dccp/Kconfig" |
173 | source "net/sctp/Kconfig" | 170 | source "net/sctp/Kconfig" |
171 | source "net/rds/Kconfig" | ||
174 | source "net/tipc/Kconfig" | 172 | source "net/tipc/Kconfig" |
175 | source "net/atm/Kconfig" | 173 | source "net/atm/Kconfig" |
176 | source "net/802/Kconfig" | 174 | source "net/802/Kconfig" |
@@ -221,6 +219,17 @@ config NET_TCPPROBE | |||
221 | To compile this code as a module, choose M here: the | 219 | To compile this code as a module, choose M here: the |
222 | module will be called tcp_probe. | 220 | module will be called tcp_probe. |
223 | 221 | ||
222 | config NET_DROP_MONITOR | ||
223 | boolean "Network packet drop alerting service" | ||
224 | depends on INET && EXPERIMENTAL && TRACEPOINTS | ||
225 | ---help--- | ||
226 | This feature provides an alerting service to userspace in the | ||
227 | event that packets are discarded in the network stack. Alerts | ||
228 | are broadcast via netlink socket to any listening user space | ||
229 | process. If you don't need network drop alerts, or if you are ok | ||
230 | just checking the various proc files and other utilities for | ||
231 | drop statistics, say N here. | ||
232 | |||
224 | endmenu | 233 | endmenu |
225 | 234 | ||
226 | endmenu | 235 | endmenu |
diff --git a/net/Makefile b/net/Makefile index 0fcce89d7169..9e00a55a901b 100644 --- a/net/Makefile +++ b/net/Makefile | |||
@@ -49,6 +49,7 @@ obj-y += 8021q/ | |||
49 | endif | 49 | endif |
50 | obj-$(CONFIG_IP_DCCP) += dccp/ | 50 | obj-$(CONFIG_IP_DCCP) += dccp/ |
51 | obj-$(CONFIG_IP_SCTP) += sctp/ | 51 | obj-$(CONFIG_IP_SCTP) += sctp/ |
52 | obj-$(CONFIG_RDS) += rds/ | ||
52 | obj-y += wireless/ | 53 | obj-y += wireless/ |
53 | obj-$(CONFIG_MAC80211) += mac80211/ | 54 | obj-$(CONFIG_MAC80211) += mac80211/ |
54 | obj-$(CONFIG_TIPC) += tipc/ | 55 | obj-$(CONFIG_TIPC) += tipc/ |
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 510a6782da8f..3e0671df3a3f 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c | |||
@@ -1860,12 +1860,12 @@ static struct notifier_block ddp_notifier = { | |||
1860 | .notifier_call = ddp_device_event, | 1860 | .notifier_call = ddp_device_event, |
1861 | }; | 1861 | }; |
1862 | 1862 | ||
1863 | static struct packet_type ltalk_packet_type = { | 1863 | static struct packet_type ltalk_packet_type __read_mostly = { |
1864 | .type = cpu_to_be16(ETH_P_LOCALTALK), | 1864 | .type = cpu_to_be16(ETH_P_LOCALTALK), |
1865 | .func = ltalk_rcv, | 1865 | .func = ltalk_rcv, |
1866 | }; | 1866 | }; |
1867 | 1867 | ||
1868 | static struct packet_type ppptalk_packet_type = { | 1868 | static struct packet_type ppptalk_packet_type __read_mostly = { |
1869 | .type = cpu_to_be16(ETH_P_PPPTALK), | 1869 | .type = cpu_to_be16(ETH_P_PPPTALK), |
1870 | .func = atalk_rcv, | 1870 | .func = atalk_rcv, |
1871 | }; | 1871 | }; |
@@ -1877,7 +1877,7 @@ EXPORT_SYMBOL(aarp_send_ddp); | |||
1877 | EXPORT_SYMBOL(atrtr_get_dev); | 1877 | EXPORT_SYMBOL(atrtr_get_dev); |
1878 | EXPORT_SYMBOL(atalk_find_dev_addr); | 1878 | EXPORT_SYMBOL(atalk_find_dev_addr); |
1879 | 1879 | ||
1880 | static char atalk_err_snap[] __initdata = | 1880 | static const char atalk_err_snap[] __initconst = |
1881 | KERN_CRIT "Unable to register DDP with SNAP.\n"; | 1881 | KERN_CRIT "Unable to register DDP with SNAP.\n"; |
1882 | 1882 | ||
1883 | /* Called by proto.c on kernel start up */ | 1883 | /* Called by proto.c on kernel start up */ |
diff --git a/net/atm/clip.c b/net/atm/clip.c index da42fd06b61f..3dc0a3a42a57 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c | |||
@@ -552,10 +552,13 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) | |||
552 | return error; | 552 | return error; |
553 | } | 553 | } |
554 | 554 | ||
555 | static const struct net_device_ops clip_netdev_ops = { | ||
556 | .ndo_start_xmit = clip_start_xmit, | ||
557 | }; | ||
558 | |||
555 | static void clip_setup(struct net_device *dev) | 559 | static void clip_setup(struct net_device *dev) |
556 | { | 560 | { |
557 | dev->hard_start_xmit = clip_start_xmit; | 561 | dev->netdev_ops = &clip_netdev_ops; |
558 | /* sg_xmit ... */ | ||
559 | dev->type = ARPHRD_ATM; | 562 | dev->type = ARPHRD_ATM; |
560 | dev->hard_header_len = RFC1483LLC_LEN; | 563 | dev->hard_header_len = RFC1483LLC_LEN; |
561 | dev->mtu = RFC1626_MTU; | 564 | dev->mtu = RFC1626_MTU; |
@@ -615,7 +618,7 @@ static int clip_device_event(struct notifier_block *this, unsigned long event, | |||
615 | } | 618 | } |
616 | 619 | ||
617 | /* ignore non-CLIP devices */ | 620 | /* ignore non-CLIP devices */ |
618 | if (dev->type != ARPHRD_ATM || dev->hard_start_xmit != clip_start_xmit) | 621 | if (dev->type != ARPHRD_ATM || dev->netdev_ops != &clip_netdev_ops) |
619 | return NOTIFY_DONE; | 622 | return NOTIFY_DONE; |
620 | 623 | ||
621 | switch (event) { | 624 | switch (event) { |
diff --git a/net/atm/lec.c b/net/atm/lec.c index c0cba9a037e8..199b6bb79f42 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c | |||
@@ -502,7 +502,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) | |||
502 | priv->lane2_ops = NULL; | 502 | priv->lane2_ops = NULL; |
503 | if (priv->lane_version > 1) | 503 | if (priv->lane_version > 1) |
504 | priv->lane2_ops = &lane2_ops; | 504 | priv->lane2_ops = &lane2_ops; |
505 | if (dev->change_mtu(dev, mesg->content.config.mtu)) | 505 | if (dev_set_mtu(dev, mesg->content.config.mtu)) |
506 | printk("%s: change_mtu to %d failed\n", dev->name, | 506 | printk("%s: change_mtu to %d failed\n", dev->name, |
507 | mesg->content.config.mtu); | 507 | mesg->content.config.mtu); |
508 | priv->is_proxy = mesg->content.config.is_proxy; | 508 | priv->is_proxy = mesg->content.config.is_proxy; |
diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 039d5cc72c3d..e5bf11453a18 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c | |||
@@ -286,33 +286,32 @@ static void start_mpc(struct mpoa_client *mpc, struct net_device *dev) | |||
286 | { | 286 | { |
287 | 287 | ||
288 | dprintk("mpoa: (%s) start_mpc:\n", mpc->dev->name); | 288 | dprintk("mpoa: (%s) start_mpc:\n", mpc->dev->name); |
289 | if (dev->hard_start_xmit == NULL) { | 289 | if (!dev->netdev_ops) |
290 | printk("mpoa: (%s) start_mpc: dev->hard_start_xmit == NULL, not starting\n", | 290 | printk("mpoa: (%s) start_mpc not starting\n", dev->name); |
291 | dev->name); | 291 | else { |
292 | return; | 292 | mpc->old_ops = dev->netdev_ops; |
293 | mpc->new_ops = *mpc->old_ops; | ||
294 | mpc->new_ops.ndo_start_xmit = mpc_send_packet; | ||
295 | dev->netdev_ops = &mpc->new_ops; | ||
293 | } | 296 | } |
294 | mpc->old_hard_start_xmit = dev->hard_start_xmit; | ||
295 | dev->hard_start_xmit = mpc_send_packet; | ||
296 | |||
297 | return; | ||
298 | } | 297 | } |
299 | 298 | ||
300 | static void stop_mpc(struct mpoa_client *mpc) | 299 | static void stop_mpc(struct mpoa_client *mpc) |
301 | { | 300 | { |
302 | 301 | struct net_device *dev = mpc->dev; | |
303 | dprintk("mpoa: (%s) stop_mpc:", mpc->dev->name); | 302 | dprintk("mpoa: (%s) stop_mpc:", mpc->dev->name); |
304 | 303 | ||
305 | /* Lets not nullify lec device's dev->hard_start_xmit */ | 304 | /* Lets not nullify lec device's dev->hard_start_xmit */ |
306 | if (mpc->dev->hard_start_xmit != mpc_send_packet) { | 305 | if (dev->netdev_ops != &mpc->new_ops) { |
307 | dprintk(" mpc already stopped, not fatal\n"); | 306 | dprintk(" mpc already stopped, not fatal\n"); |
308 | return; | 307 | return; |
309 | } | 308 | } |
310 | dprintk("\n"); | 309 | dprintk("\n"); |
311 | mpc->dev->hard_start_xmit = mpc->old_hard_start_xmit; | ||
312 | mpc->old_hard_start_xmit = NULL; | ||
313 | /* close_shortcuts(mpc); ??? FIXME */ | ||
314 | 310 | ||
315 | return; | 311 | dev->netdev_ops = mpc->old_ops; |
312 | mpc->old_ops = NULL; | ||
313 | |||
314 | /* close_shortcuts(mpc); ??? FIXME */ | ||
316 | } | 315 | } |
317 | 316 | ||
318 | static const char *mpoa_device_type_string(char type) __attribute__ ((unused)); | 317 | static const char *mpoa_device_type_string(char type) __attribute__ ((unused)); |
@@ -531,7 +530,6 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc) | |||
531 | */ | 530 | */ |
532 | static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev) | 531 | static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev) |
533 | { | 532 | { |
534 | int retval; | ||
535 | struct mpoa_client *mpc; | 533 | struct mpoa_client *mpc; |
536 | struct ethhdr *eth; | 534 | struct ethhdr *eth; |
537 | int i = 0; | 535 | int i = 0; |
@@ -561,9 +559,7 @@ static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev) | |||
561 | } | 559 | } |
562 | 560 | ||
563 | non_ip: | 561 | non_ip: |
564 | retval = mpc->old_hard_start_xmit(skb,dev); | 562 | return mpc->old_ops->ndo_start_xmit(skb,dev); |
565 | |||
566 | return retval; | ||
567 | } | 563 | } |
568 | 564 | ||
569 | static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) | 565 | static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) |
diff --git a/net/atm/mpc.h b/net/atm/mpc.h index 24c386c35f57..0919a88bbc70 100644 --- a/net/atm/mpc.h +++ b/net/atm/mpc.h | |||
@@ -15,7 +15,7 @@ struct mpoa_client { | |||
15 | struct mpoa_client *next; | 15 | struct mpoa_client *next; |
16 | struct net_device *dev; /* lec in question */ | 16 | struct net_device *dev; /* lec in question */ |
17 | int dev_num; /* e.g. 2 for lec2 */ | 17 | int dev_num; /* e.g. 2 for lec2 */ |
18 | int (*old_hard_start_xmit)(struct sk_buff *skb, struct net_device *dev); | 18 | |
19 | struct atm_vcc *mpoad_vcc; /* control channel to mpoad */ | 19 | struct atm_vcc *mpoad_vcc; /* control channel to mpoad */ |
20 | uint8_t mps_ctrl_addr[ATM_ESA_LEN]; /* MPS control ATM address */ | 20 | uint8_t mps_ctrl_addr[ATM_ESA_LEN]; /* MPS control ATM address */ |
21 | uint8_t our_ctrl_addr[ATM_ESA_LEN]; /* MPC's control ATM address */ | 21 | uint8_t our_ctrl_addr[ATM_ESA_LEN]; /* MPC's control ATM address */ |
@@ -31,6 +31,9 @@ struct mpoa_client { | |||
31 | uint8_t *mps_macs; /* array of MPS MAC addresses, >=1 */ | 31 | uint8_t *mps_macs; /* array of MPS MAC addresses, >=1 */ |
32 | int number_of_mps_macs; /* number of the above MAC addresses */ | 32 | int number_of_mps_macs; /* number of the above MAC addresses */ |
33 | struct mpc_parameters parameters; /* parameters for this client */ | 33 | struct mpc_parameters parameters; /* parameters for this client */ |
34 | |||
35 | const struct net_device_ops *old_ops; | ||
36 | struct net_device_ops new_ops; | ||
34 | }; | 37 | }; |
35 | 38 | ||
36 | 39 | ||
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index d127fd3ba5c6..7da5ebb84e97 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c | |||
@@ -1435,6 +1435,11 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
1435 | size_t size; | 1435 | size_t size; |
1436 | int lv, err, addr_len = msg->msg_namelen; | 1436 | int lv, err, addr_len = msg->msg_namelen; |
1437 | 1437 | ||
1438 | /* AX.25 empty data frame has no meaning : don't send */ | ||
1439 | if (len == 0) { | ||
1440 | return (0); | ||
1441 | } | ||
1442 | |||
1438 | if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) | 1443 | if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) |
1439 | return -EINVAL; | 1444 | return -EINVAL; |
1440 | 1445 | ||
@@ -1529,10 +1534,8 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
1529 | dp = ax25->digipeat; | 1534 | dp = ax25->digipeat; |
1530 | } | 1535 | } |
1531 | 1536 | ||
1532 | SOCK_DEBUG(sk, "AX.25: sendto: Addresses built.\n"); | ||
1533 | |||
1534 | /* Build a packet */ | 1537 | /* Build a packet */ |
1535 | SOCK_DEBUG(sk, "AX.25: sendto: building packet.\n"); | 1538 | SOCK_DEBUG(sk, "AX.25: sendto: Addresses built. Building packet.\n"); |
1536 | 1539 | ||
1537 | /* Assume the worst case */ | 1540 | /* Assume the worst case */ |
1538 | size = len + ax25->ax25_dev->dev->hard_header_len; | 1541 | size = len + ax25->ax25_dev->dev->hard_header_len; |
@@ -1636,6 +1639,13 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1636 | skb_reset_transport_header(skb); | 1639 | skb_reset_transport_header(skb); |
1637 | copied = skb->len; | 1640 | copied = skb->len; |
1638 | 1641 | ||
1642 | /* AX.25 empty data frame has no meaning : ignore it */ | ||
1643 | if (copied == 0) { | ||
1644 | err = copied; | ||
1645 | skb_free_datagram(sk, skb); | ||
1646 | goto out; | ||
1647 | } | ||
1648 | |||
1639 | if (copied > size) { | 1649 | if (copied > size) { |
1640 | copied = size; | 1650 | copied = size; |
1641 | msg->msg_flags |= MSG_TRUNC; | 1651 | msg->msg_flags |= MSG_TRUNC; |
@@ -1985,9 +1995,8 @@ static const struct proto_ops ax25_proto_ops = { | |||
1985 | /* | 1995 | /* |
1986 | * Called by socket.c on kernel start up | 1996 | * Called by socket.c on kernel start up |
1987 | */ | 1997 | */ |
1988 | static struct packet_type ax25_packet_type = { | 1998 | static struct packet_type ax25_packet_type __read_mostly = { |
1989 | .type = cpu_to_be16(ETH_P_AX25), | 1999 | .type = cpu_to_be16(ETH_P_AX25), |
1990 | .dev = NULL, /* All devices */ | ||
1991 | .func = ax25_kiss_rcv, | 2000 | .func = ax25_kiss_rcv, |
1992 | }; | 2001 | }; |
1993 | 2002 | ||
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 744ed3f07ef3..02b9baa1930b 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c | |||
@@ -41,14 +41,13 @@ | |||
41 | 41 | ||
42 | #include <net/bluetooth/bluetooth.h> | 42 | #include <net/bluetooth/bluetooth.h> |
43 | 43 | ||
44 | #define VERSION "2.14" | 44 | #define VERSION "2.15" |
45 | 45 | ||
46 | /* Bluetooth sockets */ | 46 | /* Bluetooth sockets */ |
47 | #define BT_MAX_PROTO 8 | 47 | #define BT_MAX_PROTO 8 |
48 | static struct net_proto_family *bt_proto[BT_MAX_PROTO]; | 48 | static struct net_proto_family *bt_proto[BT_MAX_PROTO]; |
49 | static DEFINE_RWLOCK(bt_proto_lock); | 49 | static DEFINE_RWLOCK(bt_proto_lock); |
50 | 50 | ||
51 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
52 | static struct lock_class_key bt_lock_key[BT_MAX_PROTO]; | 51 | static struct lock_class_key bt_lock_key[BT_MAX_PROTO]; |
53 | static const char *bt_key_strings[BT_MAX_PROTO] = { | 52 | static const char *bt_key_strings[BT_MAX_PROTO] = { |
54 | "sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP", | 53 | "sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP", |
@@ -86,11 +85,6 @@ static inline void bt_sock_reclassify_lock(struct socket *sock, int proto) | |||
86 | bt_slock_key_strings[proto], &bt_slock_key[proto], | 85 | bt_slock_key_strings[proto], &bt_slock_key[proto], |
87 | bt_key_strings[proto], &bt_lock_key[proto]); | 86 | bt_key_strings[proto], &bt_lock_key[proto]); |
88 | } | 87 | } |
89 | #else | ||
90 | static inline void bt_sock_reclassify_lock(struct socket *sock, int proto) | ||
91 | { | ||
92 | } | ||
93 | #endif | ||
94 | 88 | ||
95 | int bt_sock_register(int proto, struct net_proto_family *ops) | 89 | int bt_sock_register(int proto, struct net_proto_family *ops) |
96 | { | 90 | { |
@@ -217,7 +211,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) | |||
217 | continue; | 211 | continue; |
218 | } | 212 | } |
219 | 213 | ||
220 | if (sk->sk_state == BT_CONNECTED || !newsock) { | 214 | if (sk->sk_state == BT_CONNECTED || !newsock || |
215 | bt_sk(parent)->defer_setup) { | ||
221 | bt_accept_unlink(sk); | 216 | bt_accept_unlink(sk); |
222 | if (newsock) | 217 | if (newsock) |
223 | sock_graft(sk, newsock); | 218 | sock_graft(sk, newsock); |
@@ -232,7 +227,7 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) | |||
232 | EXPORT_SYMBOL(bt_accept_dequeue); | 227 | EXPORT_SYMBOL(bt_accept_dequeue); |
233 | 228 | ||
234 | int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, | 229 | int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, |
235 | struct msghdr *msg, size_t len, int flags) | 230 | struct msghdr *msg, size_t len, int flags) |
236 | { | 231 | { |
237 | int noblock = flags & MSG_DONTWAIT; | 232 | int noblock = flags & MSG_DONTWAIT; |
238 | struct sock *sk = sock->sk; | 233 | struct sock *sk = sock->sk; |
@@ -277,7 +272,9 @@ static inline unsigned int bt_accept_poll(struct sock *parent) | |||
277 | 272 | ||
278 | list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { | 273 | list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { |
279 | sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); | 274 | sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); |
280 | if (sk->sk_state == BT_CONNECTED) | 275 | if (sk->sk_state == BT_CONNECTED || |
276 | (bt_sk(parent)->defer_setup && | ||
277 | sk->sk_state == BT_CONNECT2)) | ||
281 | return POLLIN | POLLRDNORM; | 278 | return POLLIN | POLLRDNORM; |
282 | } | 279 | } |
283 | 280 | ||
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index c9cac7719efe..0073ec8495da 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c | |||
@@ -126,8 +126,7 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const | |||
126 | 126 | ||
127 | session->reassembly[id] = nskb; | 127 | session->reassembly[id] = nskb; |
128 | 128 | ||
129 | if (skb) | 129 | kfree_skb(skb); |
130 | kfree_skb(skb); | ||
131 | } | 130 | } |
132 | 131 | ||
133 | static inline int cmtp_recv_frame(struct cmtp_session *session, struct sk_buff *skb) | 132 | static inline int cmtp_recv_frame(struct cmtp_session *session, struct sk_buff *skb) |
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index a4a789f24c8d..1181db08d9de 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c | |||
@@ -123,6 +123,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle) | |||
123 | conn->state = BT_CONNECT; | 123 | conn->state = BT_CONNECT; |
124 | conn->out = 1; | 124 | conn->out = 1; |
125 | 125 | ||
126 | conn->attempt++; | ||
127 | |||
126 | cp.handle = cpu_to_le16(handle); | 128 | cp.handle = cpu_to_le16(handle); |
127 | cp.pkt_type = cpu_to_le16(conn->pkt_type); | 129 | cp.pkt_type = cpu_to_le16(conn->pkt_type); |
128 | 130 | ||
@@ -139,6 +141,8 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle) | |||
139 | conn->state = BT_CONNECT; | 141 | conn->state = BT_CONNECT; |
140 | conn->out = 1; | 142 | conn->out = 1; |
141 | 143 | ||
144 | conn->attempt++; | ||
145 | |||
142 | cp.handle = cpu_to_le16(handle); | 146 | cp.handle = cpu_to_le16(handle); |
143 | cp.pkt_type = cpu_to_le16(conn->pkt_type); | 147 | cp.pkt_type = cpu_to_le16(conn->pkt_type); |
144 | 148 | ||
@@ -155,6 +159,7 @@ static void hci_conn_timeout(unsigned long arg) | |||
155 | { | 159 | { |
156 | struct hci_conn *conn = (void *) arg; | 160 | struct hci_conn *conn = (void *) arg; |
157 | struct hci_dev *hdev = conn->hdev; | 161 | struct hci_dev *hdev = conn->hdev; |
162 | __u8 reason; | ||
158 | 163 | ||
159 | BT_DBG("conn %p state %d", conn, conn->state); | 164 | BT_DBG("conn %p state %d", conn, conn->state); |
160 | 165 | ||
@@ -173,7 +178,8 @@ static void hci_conn_timeout(unsigned long arg) | |||
173 | break; | 178 | break; |
174 | case BT_CONFIG: | 179 | case BT_CONFIG: |
175 | case BT_CONNECTED: | 180 | case BT_CONNECTED: |
176 | hci_acl_disconn(conn, 0x13); | 181 | reason = hci_proto_disconn_ind(conn); |
182 | hci_acl_disconn(conn, reason); | ||
177 | break; | 183 | break; |
178 | default: | 184 | default: |
179 | conn->state = BT_CLOSED; | 185 | conn->state = BT_CLOSED; |
@@ -216,12 +222,13 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) | |||
216 | break; | 222 | break; |
217 | case SCO_LINK: | 223 | case SCO_LINK: |
218 | if (lmp_esco_capable(hdev)) | 224 | if (lmp_esco_capable(hdev)) |
219 | conn->pkt_type = hdev->esco_type & SCO_ESCO_MASK; | 225 | conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | |
226 | (hdev->esco_type & EDR_ESCO_MASK); | ||
220 | else | 227 | else |
221 | conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK; | 228 | conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK; |
222 | break; | 229 | break; |
223 | case ESCO_LINK: | 230 | case ESCO_LINK: |
224 | conn->pkt_type = hdev->esco_type; | 231 | conn->pkt_type = hdev->esco_type & ~EDR_ESCO_MASK; |
225 | break; | 232 | break; |
226 | } | 233 | } |
227 | 234 | ||
@@ -280,6 +287,8 @@ int hci_conn_del(struct hci_conn *conn) | |||
280 | 287 | ||
281 | skb_queue_purge(&conn->data_q); | 288 | skb_queue_purge(&conn->data_q); |
282 | 289 | ||
290 | hci_conn_del_sysfs(conn); | ||
291 | |||
283 | return 0; | 292 | return 0; |
284 | } | 293 | } |
285 | 294 | ||
@@ -325,7 +334,7 @@ EXPORT_SYMBOL(hci_get_route); | |||
325 | 334 | ||
326 | /* Create SCO or ACL connection. | 335 | /* Create SCO or ACL connection. |
327 | * Device _must_ be locked */ | 336 | * Device _must_ be locked */ |
328 | struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 auth_type) | 337 | struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type) |
329 | { | 338 | { |
330 | struct hci_conn *acl; | 339 | struct hci_conn *acl; |
331 | struct hci_conn *sco; | 340 | struct hci_conn *sco; |
@@ -340,6 +349,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 | |||
340 | hci_conn_hold(acl); | 349 | hci_conn_hold(acl); |
341 | 350 | ||
342 | if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { | 351 | if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { |
352 | acl->sec_level = sec_level; | ||
343 | acl->auth_type = auth_type; | 353 | acl->auth_type = auth_type; |
344 | hci_acl_connect(acl); | 354 | hci_acl_connect(acl); |
345 | } | 355 | } |
@@ -385,51 +395,59 @@ int hci_conn_check_link_mode(struct hci_conn *conn) | |||
385 | EXPORT_SYMBOL(hci_conn_check_link_mode); | 395 | EXPORT_SYMBOL(hci_conn_check_link_mode); |
386 | 396 | ||
387 | /* Authenticate remote device */ | 397 | /* Authenticate remote device */ |
388 | int hci_conn_auth(struct hci_conn *conn) | 398 | static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) |
389 | { | 399 | { |
390 | BT_DBG("conn %p", conn); | 400 | BT_DBG("conn %p", conn); |
391 | 401 | ||
392 | if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) { | 402 | if (sec_level > conn->sec_level) |
393 | if (!(conn->auth_type & 0x01)) { | 403 | conn->sec_level = sec_level; |
394 | conn->auth_type |= 0x01; | 404 | else if (conn->link_mode & HCI_LM_AUTH) |
395 | conn->link_mode &= ~HCI_LM_AUTH; | ||
396 | } | ||
397 | } | ||
398 | |||
399 | if (conn->link_mode & HCI_LM_AUTH) | ||
400 | return 1; | 405 | return 1; |
401 | 406 | ||
407 | conn->auth_type = auth_type; | ||
408 | |||
402 | if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { | 409 | if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { |
403 | struct hci_cp_auth_requested cp; | 410 | struct hci_cp_auth_requested cp; |
404 | cp.handle = cpu_to_le16(conn->handle); | 411 | cp.handle = cpu_to_le16(conn->handle); |
405 | hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, | 412 | hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, |
406 | sizeof(cp), &cp); | 413 | sizeof(cp), &cp); |
407 | } | 414 | } |
415 | |||
408 | return 0; | 416 | return 0; |
409 | } | 417 | } |
410 | EXPORT_SYMBOL(hci_conn_auth); | ||
411 | 418 | ||
412 | /* Enable encryption */ | 419 | /* Enable security */ |
413 | int hci_conn_encrypt(struct hci_conn *conn) | 420 | int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) |
414 | { | 421 | { |
415 | BT_DBG("conn %p", conn); | 422 | BT_DBG("conn %p", conn); |
416 | 423 | ||
424 | if (sec_level == BT_SECURITY_SDP) | ||
425 | return 1; | ||
426 | |||
427 | if (sec_level == BT_SECURITY_LOW) { | ||
428 | if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) | ||
429 | return hci_conn_auth(conn, sec_level, auth_type); | ||
430 | else | ||
431 | return 1; | ||
432 | } | ||
433 | |||
417 | if (conn->link_mode & HCI_LM_ENCRYPT) | 434 | if (conn->link_mode & HCI_LM_ENCRYPT) |
418 | return hci_conn_auth(conn); | 435 | return hci_conn_auth(conn, sec_level, auth_type); |
419 | 436 | ||
420 | if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) | 437 | if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) |
421 | return 0; | 438 | return 0; |
422 | 439 | ||
423 | if (hci_conn_auth(conn)) { | 440 | if (hci_conn_auth(conn, sec_level, auth_type)) { |
424 | struct hci_cp_set_conn_encrypt cp; | 441 | struct hci_cp_set_conn_encrypt cp; |
425 | cp.handle = cpu_to_le16(conn->handle); | 442 | cp.handle = cpu_to_le16(conn->handle); |
426 | cp.encrypt = 1; | 443 | cp.encrypt = 1; |
427 | hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, | 444 | hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, |
428 | sizeof(cp), &cp); | 445 | sizeof(cp), &cp); |
429 | } | 446 | } |
447 | |||
430 | return 0; | 448 | return 0; |
431 | } | 449 | } |
432 | EXPORT_SYMBOL(hci_conn_encrypt); | 450 | EXPORT_SYMBOL(hci_conn_security); |
433 | 451 | ||
434 | /* Change link key */ | 452 | /* Change link key */ |
435 | int hci_conn_change_link_key(struct hci_conn *conn) | 453 | int hci_conn_change_link_key(struct hci_conn *conn) |
@@ -442,12 +460,13 @@ int hci_conn_change_link_key(struct hci_conn *conn) | |||
442 | hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, | 460 | hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, |
443 | sizeof(cp), &cp); | 461 | sizeof(cp), &cp); |
444 | } | 462 | } |
463 | |||
445 | return 0; | 464 | return 0; |
446 | } | 465 | } |
447 | EXPORT_SYMBOL(hci_conn_change_link_key); | 466 | EXPORT_SYMBOL(hci_conn_change_link_key); |
448 | 467 | ||
449 | /* Switch role */ | 468 | /* Switch role */ |
450 | int hci_conn_switch_role(struct hci_conn *conn, uint8_t role) | 469 | int hci_conn_switch_role(struct hci_conn *conn, __u8 role) |
451 | { | 470 | { |
452 | BT_DBG("conn %p", conn); | 471 | BT_DBG("conn %p", conn); |
453 | 472 | ||
@@ -460,6 +479,7 @@ int hci_conn_switch_role(struct hci_conn *conn, uint8_t role) | |||
460 | cp.role = role; | 479 | cp.role = role; |
461 | hci_send_cmd(conn->hdev, HCI_OP_SWITCH_ROLE, sizeof(cp), &cp); | 480 | hci_send_cmd(conn->hdev, HCI_OP_SWITCH_ROLE, sizeof(cp), &cp); |
462 | } | 481 | } |
482 | |||
463 | return 0; | 483 | return 0; |
464 | } | 484 | } |
465 | EXPORT_SYMBOL(hci_conn_switch_role); | 485 | EXPORT_SYMBOL(hci_conn_switch_role); |
@@ -542,9 +562,7 @@ void hci_conn_hash_flush(struct hci_dev *hdev) | |||
542 | 562 | ||
543 | c->state = BT_CLOSED; | 563 | c->state = BT_CLOSED; |
544 | 564 | ||
545 | hci_conn_del_sysfs(c); | 565 | hci_proto_disconn_cfm(c, 0x16); |
546 | |||
547 | hci_proto_disconn_ind(c, 0x16); | ||
548 | hci_conn_del(c); | 566 | hci_conn_del(c); |
549 | } | 567 | } |
550 | } | 568 | } |
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ba78cc1eb8d9..cd061510b6bd 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c | |||
@@ -1565,8 +1565,7 @@ static void hci_cmd_task(unsigned long arg) | |||
1565 | 1565 | ||
1566 | /* Send queued commands */ | 1566 | /* Send queued commands */ |
1567 | if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) { | 1567 | if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) { |
1568 | if (hdev->sent_cmd) | 1568 | kfree_skb(hdev->sent_cmd); |
1569 | kfree_skb(hdev->sent_cmd); | ||
1570 | 1569 | ||
1571 | if ((hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC))) { | 1570 | if ((hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC))) { |
1572 | atomic_dec(&hdev->cmd_cnt); | 1571 | atomic_dec(&hdev->cmd_cnt); |
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f91ba690f5d2..55534244c3a0 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c | |||
@@ -484,6 +484,15 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, struct sk_buff *skb | |||
484 | if (hdev->features[4] & LMP_EV5) | 484 | if (hdev->features[4] & LMP_EV5) |
485 | hdev->esco_type |= (ESCO_EV5); | 485 | hdev->esco_type |= (ESCO_EV5); |
486 | 486 | ||
487 | if (hdev->features[5] & LMP_EDR_ESCO_2M) | ||
488 | hdev->esco_type |= (ESCO_2EV3); | ||
489 | |||
490 | if (hdev->features[5] & LMP_EDR_ESCO_3M) | ||
491 | hdev->esco_type |= (ESCO_3EV3); | ||
492 | |||
493 | if (hdev->features[5] & LMP_EDR_3S_ESCO) | ||
494 | hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5); | ||
495 | |||
487 | BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name, | 496 | BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name, |
488 | hdev->features[0], hdev->features[1], | 497 | hdev->features[0], hdev->features[1], |
489 | hdev->features[2], hdev->features[3], | 498 | hdev->features[2], hdev->features[3], |
@@ -914,7 +923,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s | |||
914 | if (ev->status) { | 923 | if (ev->status) { |
915 | hci_proto_connect_cfm(conn, ev->status); | 924 | hci_proto_connect_cfm(conn, ev->status); |
916 | hci_conn_del(conn); | 925 | hci_conn_del(conn); |
917 | } | 926 | } else if (ev->link_type != ACL_LINK) |
927 | hci_proto_connect_cfm(conn, ev->status); | ||
918 | 928 | ||
919 | unlock: | 929 | unlock: |
920 | hci_dev_unlock(hdev); | 930 | hci_dev_unlock(hdev); |
@@ -1009,9 +1019,7 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff | |||
1009 | if (conn) { | 1019 | if (conn) { |
1010 | conn->state = BT_CLOSED; | 1020 | conn->state = BT_CLOSED; |
1011 | 1021 | ||
1012 | hci_conn_del_sysfs(conn); | 1022 | hci_proto_disconn_cfm(conn, ev->reason); |
1013 | |||
1014 | hci_proto_disconn_ind(conn, ev->reason); | ||
1015 | hci_conn_del(conn); | 1023 | hci_conn_del(conn); |
1016 | } | 1024 | } |
1017 | 1025 | ||
@@ -1600,7 +1608,8 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b | |||
1600 | 1608 | ||
1601 | if (conn->state == BT_CONFIG) { | 1609 | if (conn->state == BT_CONFIG) { |
1602 | if (!ev->status && hdev->ssp_mode > 0 && | 1610 | if (!ev->status && hdev->ssp_mode > 0 && |
1603 | conn->ssp_mode > 0 && conn->out) { | 1611 | conn->ssp_mode > 0 && conn->out && |
1612 | conn->sec_level != BT_SECURITY_SDP) { | ||
1604 | struct hci_cp_auth_requested cp; | 1613 | struct hci_cp_auth_requested cp; |
1605 | cp.handle = ev->handle; | 1614 | cp.handle = ev->handle; |
1606 | hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, | 1615 | hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, |
@@ -1637,6 +1646,13 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu | |||
1637 | conn->type = SCO_LINK; | 1646 | conn->type = SCO_LINK; |
1638 | } | 1647 | } |
1639 | 1648 | ||
1649 | if (conn->out && ev->status == 0x1c && conn->attempt < 2) { | ||
1650 | conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | | ||
1651 | (hdev->esco_type & EDR_ESCO_MASK); | ||
1652 | hci_setup_sync(conn, conn->link->handle); | ||
1653 | goto unlock; | ||
1654 | } | ||
1655 | |||
1640 | if (!ev->status) { | 1656 | if (!ev->status) { |
1641 | conn->handle = __le16_to_cpu(ev->handle); | 1657 | conn->handle = __le16_to_cpu(ev->handle); |
1642 | conn->state = BT_CONNECTED; | 1658 | conn->state = BT_CONNECTED; |
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index b93748e224ff..ca4d3b40d5ce 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c | |||
@@ -50,9 +50,10 @@ | |||
50 | #include <net/bluetooth/hci_core.h> | 50 | #include <net/bluetooth/hci_core.h> |
51 | #include <net/bluetooth/l2cap.h> | 51 | #include <net/bluetooth/l2cap.h> |
52 | 52 | ||
53 | #define VERSION "2.11" | 53 | #define VERSION "2.13" |
54 | 54 | ||
55 | static u32 l2cap_feat_mask = 0x0000; | 55 | static u32 l2cap_feat_mask = 0x0080; |
56 | static u8 l2cap_fixed_chan[8] = { 0x02, }; | ||
56 | 57 | ||
57 | static const struct proto_ops l2cap_sock_ops; | 58 | static const struct proto_ops l2cap_sock_ops; |
58 | 59 | ||
@@ -77,9 +78,10 @@ static void l2cap_sock_timeout(unsigned long arg) | |||
77 | 78 | ||
78 | bh_lock_sock(sk); | 79 | bh_lock_sock(sk); |
79 | 80 | ||
80 | if (sk->sk_state == BT_CONNECT && | 81 | if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG) |
81 | (l2cap_pi(sk)->link_mode & (L2CAP_LM_AUTH | | 82 | reason = ECONNREFUSED; |
82 | L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE))) | 83 | else if (sk->sk_state == BT_CONNECT && |
84 | l2cap_pi(sk)->sec_level != BT_SECURITY_SDP) | ||
83 | reason = ECONNREFUSED; | 85 | reason = ECONNREFUSED; |
84 | else | 86 | else |
85 | reason = ETIMEDOUT; | 87 | reason = ETIMEDOUT; |
@@ -204,6 +206,8 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so | |||
204 | 206 | ||
205 | BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid); | 207 | BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid); |
206 | 208 | ||
209 | conn->disc_reason = 0x13; | ||
210 | |||
207 | l2cap_pi(sk)->conn = conn; | 211 | l2cap_pi(sk)->conn = conn; |
208 | 212 | ||
209 | if (sk->sk_type == SOCK_SEQPACKET) { | 213 | if (sk->sk_type == SOCK_SEQPACKET) { |
@@ -259,18 +263,35 @@ static void l2cap_chan_del(struct sock *sk, int err) | |||
259 | } | 263 | } |
260 | 264 | ||
261 | /* Service level security */ | 265 | /* Service level security */ |
262 | static inline int l2cap_check_link_mode(struct sock *sk) | 266 | static inline int l2cap_check_security(struct sock *sk) |
263 | { | 267 | { |
264 | struct l2cap_conn *conn = l2cap_pi(sk)->conn; | 268 | struct l2cap_conn *conn = l2cap_pi(sk)->conn; |
269 | __u8 auth_type; | ||
265 | 270 | ||
266 | if ((l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) || | 271 | if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { |
267 | (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) | 272 | if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) |
268 | return hci_conn_encrypt(conn->hcon); | 273 | auth_type = HCI_AT_NO_BONDING_MITM; |
274 | else | ||
275 | auth_type = HCI_AT_NO_BONDING; | ||
269 | 276 | ||
270 | if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH) | 277 | if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) |
271 | return hci_conn_auth(conn->hcon); | 278 | l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; |
279 | } else { | ||
280 | switch (l2cap_pi(sk)->sec_level) { | ||
281 | case BT_SECURITY_HIGH: | ||
282 | auth_type = HCI_AT_GENERAL_BONDING_MITM; | ||
283 | break; | ||
284 | case BT_SECURITY_MEDIUM: | ||
285 | auth_type = HCI_AT_GENERAL_BONDING; | ||
286 | break; | ||
287 | default: | ||
288 | auth_type = HCI_AT_NO_BONDING; | ||
289 | break; | ||
290 | } | ||
291 | } | ||
272 | 292 | ||
273 | return 1; | 293 | return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level, |
294 | auth_type); | ||
274 | } | 295 | } |
275 | 296 | ||
276 | static inline u8 l2cap_get_ident(struct l2cap_conn *conn) | 297 | static inline u8 l2cap_get_ident(struct l2cap_conn *conn) |
@@ -312,7 +333,10 @@ static void l2cap_do_start(struct sock *sk) | |||
312 | struct l2cap_conn *conn = l2cap_pi(sk)->conn; | 333 | struct l2cap_conn *conn = l2cap_pi(sk)->conn; |
313 | 334 | ||
314 | if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { | 335 | if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { |
315 | if (l2cap_check_link_mode(sk)) { | 336 | if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) |
337 | return; | ||
338 | |||
339 | if (l2cap_check_security(sk)) { | ||
316 | struct l2cap_conn_req req; | 340 | struct l2cap_conn_req req; |
317 | req.scid = cpu_to_le16(l2cap_pi(sk)->scid); | 341 | req.scid = cpu_to_le16(l2cap_pi(sk)->scid); |
318 | req.psm = l2cap_pi(sk)->psm; | 342 | req.psm = l2cap_pi(sk)->psm; |
@@ -356,7 +380,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) | |||
356 | } | 380 | } |
357 | 381 | ||
358 | if (sk->sk_state == BT_CONNECT) { | 382 | if (sk->sk_state == BT_CONNECT) { |
359 | if (l2cap_check_link_mode(sk)) { | 383 | if (l2cap_check_security(sk)) { |
360 | struct l2cap_conn_req req; | 384 | struct l2cap_conn_req req; |
361 | req.scid = cpu_to_le16(l2cap_pi(sk)->scid); | 385 | req.scid = cpu_to_le16(l2cap_pi(sk)->scid); |
362 | req.psm = l2cap_pi(sk)->psm; | 386 | req.psm = l2cap_pi(sk)->psm; |
@@ -371,10 +395,18 @@ static void l2cap_conn_start(struct l2cap_conn *conn) | |||
371 | rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); | 395 | rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); |
372 | rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); | 396 | rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); |
373 | 397 | ||
374 | if (l2cap_check_link_mode(sk)) { | 398 | if (l2cap_check_security(sk)) { |
375 | sk->sk_state = BT_CONFIG; | 399 | if (bt_sk(sk)->defer_setup) { |
376 | rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); | 400 | struct sock *parent = bt_sk(sk)->parent; |
377 | rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); | 401 | rsp.result = cpu_to_le16(L2CAP_CR_PEND); |
402 | rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND); | ||
403 | parent->sk_data_ready(parent, 0); | ||
404 | |||
405 | } else { | ||
406 | sk->sk_state = BT_CONFIG; | ||
407 | rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); | ||
408 | rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); | ||
409 | } | ||
378 | } else { | 410 | } else { |
379 | rsp.result = cpu_to_le16(L2CAP_CR_PEND); | 411 | rsp.result = cpu_to_le16(L2CAP_CR_PEND); |
380 | rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND); | 412 | rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND); |
@@ -426,7 +458,7 @@ static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) | |||
426 | read_lock(&l->lock); | 458 | read_lock(&l->lock); |
427 | 459 | ||
428 | for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { | 460 | for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { |
429 | if (l2cap_pi(sk)->link_mode & L2CAP_LM_RELIABLE) | 461 | if (l2cap_pi(sk)->force_reliable) |
430 | sk->sk_err = err; | 462 | sk->sk_err = err; |
431 | } | 463 | } |
432 | 464 | ||
@@ -437,6 +469,7 @@ static void l2cap_info_timeout(unsigned long arg) | |||
437 | { | 469 | { |
438 | struct l2cap_conn *conn = (void *) arg; | 470 | struct l2cap_conn *conn = (void *) arg; |
439 | 471 | ||
472 | conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; | ||
440 | conn->info_ident = 0; | 473 | conn->info_ident = 0; |
441 | 474 | ||
442 | l2cap_conn_start(conn); | 475 | l2cap_conn_start(conn); |
@@ -470,6 +503,8 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) | |||
470 | spin_lock_init(&conn->lock); | 503 | spin_lock_init(&conn->lock); |
471 | rwlock_init(&conn->chan_list.lock); | 504 | rwlock_init(&conn->chan_list.lock); |
472 | 505 | ||
506 | conn->disc_reason = 0x13; | ||
507 | |||
473 | return conn; | 508 | return conn; |
474 | } | 509 | } |
475 | 510 | ||
@@ -483,8 +518,7 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) | |||
483 | 518 | ||
484 | BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); | 519 | BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); |
485 | 520 | ||
486 | if (conn->rx_skb) | 521 | kfree_skb(conn->rx_skb); |
487 | kfree_skb(conn->rx_skb); | ||
488 | 522 | ||
489 | /* Kill channels */ | 523 | /* Kill channels */ |
490 | while ((sk = conn->chan_list.head)) { | 524 | while ((sk = conn->chan_list.head)) { |
@@ -608,7 +642,6 @@ static void __l2cap_sock_close(struct sock *sk, int reason) | |||
608 | 642 | ||
609 | case BT_CONNECTED: | 643 | case BT_CONNECTED: |
610 | case BT_CONFIG: | 644 | case BT_CONFIG: |
611 | case BT_CONNECT2: | ||
612 | if (sk->sk_type == SOCK_SEQPACKET) { | 645 | if (sk->sk_type == SOCK_SEQPACKET) { |
613 | struct l2cap_conn *conn = l2cap_pi(sk)->conn; | 646 | struct l2cap_conn *conn = l2cap_pi(sk)->conn; |
614 | struct l2cap_disconn_req req; | 647 | struct l2cap_disconn_req req; |
@@ -624,6 +657,27 @@ static void __l2cap_sock_close(struct sock *sk, int reason) | |||
624 | l2cap_chan_del(sk, reason); | 657 | l2cap_chan_del(sk, reason); |
625 | break; | 658 | break; |
626 | 659 | ||
660 | case BT_CONNECT2: | ||
661 | if (sk->sk_type == SOCK_SEQPACKET) { | ||
662 | struct l2cap_conn *conn = l2cap_pi(sk)->conn; | ||
663 | struct l2cap_conn_rsp rsp; | ||
664 | __u16 result; | ||
665 | |||
666 | if (bt_sk(sk)->defer_setup) | ||
667 | result = L2CAP_CR_SEC_BLOCK; | ||
668 | else | ||
669 | result = L2CAP_CR_BAD_PSM; | ||
670 | |||
671 | rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); | ||
672 | rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); | ||
673 | rsp.result = cpu_to_le16(result); | ||
674 | rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); | ||
675 | l2cap_send_cmd(conn, l2cap_pi(sk)->ident, | ||
676 | L2CAP_CONN_RSP, sizeof(rsp), &rsp); | ||
677 | } else | ||
678 | l2cap_chan_del(sk, reason); | ||
679 | break; | ||
680 | |||
627 | case BT_CONNECT: | 681 | case BT_CONNECT: |
628 | case BT_DISCONN: | 682 | case BT_DISCONN: |
629 | l2cap_chan_del(sk, reason); | 683 | l2cap_chan_del(sk, reason); |
@@ -653,13 +707,19 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) | |||
653 | 707 | ||
654 | if (parent) { | 708 | if (parent) { |
655 | sk->sk_type = parent->sk_type; | 709 | sk->sk_type = parent->sk_type; |
710 | bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup; | ||
711 | |||
656 | pi->imtu = l2cap_pi(parent)->imtu; | 712 | pi->imtu = l2cap_pi(parent)->imtu; |
657 | pi->omtu = l2cap_pi(parent)->omtu; | 713 | pi->omtu = l2cap_pi(parent)->omtu; |
658 | pi->link_mode = l2cap_pi(parent)->link_mode; | 714 | pi->sec_level = l2cap_pi(parent)->sec_level; |
715 | pi->role_switch = l2cap_pi(parent)->role_switch; | ||
716 | pi->force_reliable = l2cap_pi(parent)->force_reliable; | ||
659 | } else { | 717 | } else { |
660 | pi->imtu = L2CAP_DEFAULT_MTU; | 718 | pi->imtu = L2CAP_DEFAULT_MTU; |
661 | pi->omtu = 0; | 719 | pi->omtu = 0; |
662 | pi->link_mode = 0; | 720 | pi->sec_level = BT_SECURITY_LOW; |
721 | pi->role_switch = 0; | ||
722 | pi->force_reliable = 0; | ||
663 | } | 723 | } |
664 | 724 | ||
665 | /* Default config options */ | 725 | /* Default config options */ |
@@ -723,17 +783,24 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol) | |||
723 | return 0; | 783 | return 0; |
724 | } | 784 | } |
725 | 785 | ||
726 | static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) | 786 | static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) |
727 | { | 787 | { |
728 | struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; | ||
729 | struct sock *sk = sock->sk; | 788 | struct sock *sk = sock->sk; |
730 | int err = 0; | 789 | struct sockaddr_l2 la; |
790 | int len, err = 0; | ||
731 | 791 | ||
732 | BT_DBG("sk %p, %s %d", sk, batostr(&la->l2_bdaddr), la->l2_psm); | 792 | BT_DBG("sk %p", sk); |
733 | 793 | ||
734 | if (!addr || addr->sa_family != AF_BLUETOOTH) | 794 | if (!addr || addr->sa_family != AF_BLUETOOTH) |
735 | return -EINVAL; | 795 | return -EINVAL; |
736 | 796 | ||
797 | memset(&la, 0, sizeof(la)); | ||
798 | len = min_t(unsigned int, sizeof(la), alen); | ||
799 | memcpy(&la, addr, len); | ||
800 | |||
801 | if (la.l2_cid) | ||
802 | return -EINVAL; | ||
803 | |||
737 | lock_sock(sk); | 804 | lock_sock(sk); |
738 | 805 | ||
739 | if (sk->sk_state != BT_OPEN) { | 806 | if (sk->sk_state != BT_OPEN) { |
@@ -741,7 +808,7 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_ | |||
741 | goto done; | 808 | goto done; |
742 | } | 809 | } |
743 | 810 | ||
744 | if (la->l2_psm && btohs(la->l2_psm) < 0x1001 && | 811 | if (la.l2_psm && btohs(la.l2_psm) < 0x1001 && |
745 | !capable(CAP_NET_BIND_SERVICE)) { | 812 | !capable(CAP_NET_BIND_SERVICE)) { |
746 | err = -EACCES; | 813 | err = -EACCES; |
747 | goto done; | 814 | goto done; |
@@ -749,14 +816,17 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_ | |||
749 | 816 | ||
750 | write_lock_bh(&l2cap_sk_list.lock); | 817 | write_lock_bh(&l2cap_sk_list.lock); |
751 | 818 | ||
752 | if (la->l2_psm && __l2cap_get_sock_by_addr(la->l2_psm, &la->l2_bdaddr)) { | 819 | if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) { |
753 | err = -EADDRINUSE; | 820 | err = -EADDRINUSE; |
754 | } else { | 821 | } else { |
755 | /* Save source address */ | 822 | /* Save source address */ |
756 | bacpy(&bt_sk(sk)->src, &la->l2_bdaddr); | 823 | bacpy(&bt_sk(sk)->src, &la.l2_bdaddr); |
757 | l2cap_pi(sk)->psm = la->l2_psm; | 824 | l2cap_pi(sk)->psm = la.l2_psm; |
758 | l2cap_pi(sk)->sport = la->l2_psm; | 825 | l2cap_pi(sk)->sport = la.l2_psm; |
759 | sk->sk_state = BT_BOUND; | 826 | sk->sk_state = BT_BOUND; |
827 | |||
828 | if (btohs(la.l2_psm) == 0x0001 || btohs(la.l2_psm) == 0x0003) | ||
829 | l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; | ||
760 | } | 830 | } |
761 | 831 | ||
762 | write_unlock_bh(&l2cap_sk_list.lock); | 832 | write_unlock_bh(&l2cap_sk_list.lock); |
@@ -776,7 +846,8 @@ static int l2cap_do_connect(struct sock *sk) | |||
776 | __u8 auth_type; | 846 | __u8 auth_type; |
777 | int err = 0; | 847 | int err = 0; |
778 | 848 | ||
779 | BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst), l2cap_pi(sk)->psm); | 849 | BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst), |
850 | l2cap_pi(sk)->psm); | ||
780 | 851 | ||
781 | if (!(hdev = hci_get_route(dst, src))) | 852 | if (!(hdev = hci_get_route(dst, src))) |
782 | return -EHOSTUNREACH; | 853 | return -EHOSTUNREACH; |
@@ -785,21 +856,42 @@ static int l2cap_do_connect(struct sock *sk) | |||
785 | 856 | ||
786 | err = -ENOMEM; | 857 | err = -ENOMEM; |
787 | 858 | ||
788 | if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH || | 859 | if (sk->sk_type == SOCK_RAW) { |
789 | l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT || | 860 | switch (l2cap_pi(sk)->sec_level) { |
790 | l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) { | 861 | case BT_SECURITY_HIGH: |
791 | if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) | 862 | auth_type = HCI_AT_DEDICATED_BONDING_MITM; |
863 | break; | ||
864 | case BT_SECURITY_MEDIUM: | ||
865 | auth_type = HCI_AT_DEDICATED_BONDING; | ||
866 | break; | ||
867 | default: | ||
868 | auth_type = HCI_AT_NO_BONDING; | ||
869 | break; | ||
870 | } | ||
871 | } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { | ||
872 | if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) | ||
792 | auth_type = HCI_AT_NO_BONDING_MITM; | 873 | auth_type = HCI_AT_NO_BONDING_MITM; |
793 | else | 874 | else |
794 | auth_type = HCI_AT_GENERAL_BONDING_MITM; | ||
795 | } else { | ||
796 | if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) | ||
797 | auth_type = HCI_AT_NO_BONDING; | 875 | auth_type = HCI_AT_NO_BONDING; |
798 | else | 876 | |
877 | if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) | ||
878 | l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; | ||
879 | } else { | ||
880 | switch (l2cap_pi(sk)->sec_level) { | ||
881 | case BT_SECURITY_HIGH: | ||
882 | auth_type = HCI_AT_GENERAL_BONDING_MITM; | ||
883 | break; | ||
884 | case BT_SECURITY_MEDIUM: | ||
799 | auth_type = HCI_AT_GENERAL_BONDING; | 885 | auth_type = HCI_AT_GENERAL_BONDING; |
886 | break; | ||
887 | default: | ||
888 | auth_type = HCI_AT_NO_BONDING; | ||
889 | break; | ||
890 | } | ||
800 | } | 891 | } |
801 | 892 | ||
802 | hcon = hci_connect(hdev, ACL_LINK, dst, auth_type); | 893 | hcon = hci_connect(hdev, ACL_LINK, dst, |
894 | l2cap_pi(sk)->sec_level, auth_type); | ||
803 | if (!hcon) | 895 | if (!hcon) |
804 | goto done; | 896 | goto done; |
805 | 897 | ||
@@ -835,20 +927,25 @@ done: | |||
835 | 927 | ||
836 | static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) | 928 | static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) |
837 | { | 929 | { |
838 | struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; | ||
839 | struct sock *sk = sock->sk; | 930 | struct sock *sk = sock->sk; |
840 | int err = 0; | 931 | struct sockaddr_l2 la; |
841 | 932 | int len, err = 0; | |
842 | lock_sock(sk); | ||
843 | 933 | ||
844 | BT_DBG("sk %p", sk); | 934 | BT_DBG("sk %p", sk); |
845 | 935 | ||
846 | if (addr->sa_family != AF_BLUETOOTH || alen < sizeof(struct sockaddr_l2)) { | 936 | if (!addr || addr->sa_family != AF_BLUETOOTH) |
847 | err = -EINVAL; | 937 | return -EINVAL; |
848 | goto done; | 938 | |
849 | } | 939 | memset(&la, 0, sizeof(la)); |
940 | len = min_t(unsigned int, sizeof(la), alen); | ||
941 | memcpy(&la, addr, len); | ||
942 | |||
943 | if (la.l2_cid) | ||
944 | return -EINVAL; | ||
945 | |||
946 | lock_sock(sk); | ||
850 | 947 | ||
851 | if (sk->sk_type == SOCK_SEQPACKET && !la->l2_psm) { | 948 | if (sk->sk_type == SOCK_SEQPACKET && !la.l2_psm) { |
852 | err = -EINVAL; | 949 | err = -EINVAL; |
853 | goto done; | 950 | goto done; |
854 | } | 951 | } |
@@ -875,8 +972,8 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al | |||
875 | } | 972 | } |
876 | 973 | ||
877 | /* Set destination address and psm */ | 974 | /* Set destination address and psm */ |
878 | bacpy(&bt_sk(sk)->dst, &la->l2_bdaddr); | 975 | bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr); |
879 | l2cap_pi(sk)->psm = la->l2_psm; | 976 | l2cap_pi(sk)->psm = la.l2_psm; |
880 | 977 | ||
881 | if ((err = l2cap_do_connect(sk))) | 978 | if ((err = l2cap_do_connect(sk))) |
882 | goto done; | 979 | goto done; |
@@ -1000,12 +1097,16 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l | |||
1000 | addr->sa_family = AF_BLUETOOTH; | 1097 | addr->sa_family = AF_BLUETOOTH; |
1001 | *len = sizeof(struct sockaddr_l2); | 1098 | *len = sizeof(struct sockaddr_l2); |
1002 | 1099 | ||
1003 | if (peer) | 1100 | if (peer) { |
1101 | la->l2_psm = l2cap_pi(sk)->psm; | ||
1004 | bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst); | 1102 | bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst); |
1005 | else | 1103 | la->l2_cid = htobs(l2cap_pi(sk)->dcid); |
1104 | } else { | ||
1105 | la->l2_psm = l2cap_pi(sk)->sport; | ||
1006 | bacpy(&la->l2_bdaddr, &bt_sk(sk)->src); | 1106 | bacpy(&la->l2_bdaddr, &bt_sk(sk)->src); |
1107 | la->l2_cid = htobs(l2cap_pi(sk)->scid); | ||
1108 | } | ||
1007 | 1109 | ||
1008 | la->l2_psm = l2cap_pi(sk)->psm; | ||
1009 | return 0; | 1110 | return 0; |
1010 | } | 1111 | } |
1011 | 1112 | ||
@@ -1106,11 +1207,38 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms | |||
1106 | return err; | 1207 | return err; |
1107 | } | 1208 | } |
1108 | 1209 | ||
1109 | static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) | 1210 | static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) |
1211 | { | ||
1212 | struct sock *sk = sock->sk; | ||
1213 | |||
1214 | lock_sock(sk); | ||
1215 | |||
1216 | if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { | ||
1217 | struct l2cap_conn_rsp rsp; | ||
1218 | |||
1219 | sk->sk_state = BT_CONFIG; | ||
1220 | |||
1221 | rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); | ||
1222 | rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); | ||
1223 | rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); | ||
1224 | rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); | ||
1225 | l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident, | ||
1226 | L2CAP_CONN_RSP, sizeof(rsp), &rsp); | ||
1227 | |||
1228 | release_sock(sk); | ||
1229 | return 0; | ||
1230 | } | ||
1231 | |||
1232 | release_sock(sk); | ||
1233 | |||
1234 | return bt_sock_recvmsg(iocb, sock, msg, len, flags); | ||
1235 | } | ||
1236 | |||
1237 | static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, int optlen) | ||
1110 | { | 1238 | { |
1111 | struct sock *sk = sock->sk; | 1239 | struct sock *sk = sock->sk; |
1112 | struct l2cap_options opts; | 1240 | struct l2cap_options opts; |
1113 | int err = 0, len; | 1241 | int len, err = 0; |
1114 | u32 opt; | 1242 | u32 opt; |
1115 | 1243 | ||
1116 | BT_DBG("sk %p", sk); | 1244 | BT_DBG("sk %p", sk); |
@@ -1140,7 +1268,15 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch | |||
1140 | break; | 1268 | break; |
1141 | } | 1269 | } |
1142 | 1270 | ||
1143 | l2cap_pi(sk)->link_mode = opt; | 1271 | if (opt & L2CAP_LM_AUTH) |
1272 | l2cap_pi(sk)->sec_level = BT_SECURITY_LOW; | ||
1273 | if (opt & L2CAP_LM_ENCRYPT) | ||
1274 | l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM; | ||
1275 | if (opt & L2CAP_LM_SECURE) | ||
1276 | l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH; | ||
1277 | |||
1278 | l2cap_pi(sk)->role_switch = (opt & L2CAP_LM_MASTER); | ||
1279 | l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE); | ||
1144 | break; | 1280 | break; |
1145 | 1281 | ||
1146 | default: | 1282 | default: |
@@ -1152,12 +1288,77 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch | |||
1152 | return err; | 1288 | return err; |
1153 | } | 1289 | } |
1154 | 1290 | ||
1155 | static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) | 1291 | static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) |
1292 | { | ||
1293 | struct sock *sk = sock->sk; | ||
1294 | struct bt_security sec; | ||
1295 | int len, err = 0; | ||
1296 | u32 opt; | ||
1297 | |||
1298 | BT_DBG("sk %p", sk); | ||
1299 | |||
1300 | if (level == SOL_L2CAP) | ||
1301 | return l2cap_sock_setsockopt_old(sock, optname, optval, optlen); | ||
1302 | |||
1303 | if (level != SOL_BLUETOOTH) | ||
1304 | return -ENOPROTOOPT; | ||
1305 | |||
1306 | lock_sock(sk); | ||
1307 | |||
1308 | switch (optname) { | ||
1309 | case BT_SECURITY: | ||
1310 | if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) { | ||
1311 | err = -EINVAL; | ||
1312 | break; | ||
1313 | } | ||
1314 | |||
1315 | sec.level = BT_SECURITY_LOW; | ||
1316 | |||
1317 | len = min_t(unsigned int, sizeof(sec), optlen); | ||
1318 | if (copy_from_user((char *) &sec, optval, len)) { | ||
1319 | err = -EFAULT; | ||
1320 | break; | ||
1321 | } | ||
1322 | |||
1323 | if (sec.level < BT_SECURITY_LOW || | ||
1324 | sec.level > BT_SECURITY_HIGH) { | ||
1325 | err = -EINVAL; | ||
1326 | break; | ||
1327 | } | ||
1328 | |||
1329 | l2cap_pi(sk)->sec_level = sec.level; | ||
1330 | break; | ||
1331 | |||
1332 | case BT_DEFER_SETUP: | ||
1333 | if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { | ||
1334 | err = -EINVAL; | ||
1335 | break; | ||
1336 | } | ||
1337 | |||
1338 | if (get_user(opt, (u32 __user *) optval)) { | ||
1339 | err = -EFAULT; | ||
1340 | break; | ||
1341 | } | ||
1342 | |||
1343 | bt_sk(sk)->defer_setup = opt; | ||
1344 | break; | ||
1345 | |||
1346 | default: | ||
1347 | err = -ENOPROTOOPT; | ||
1348 | break; | ||
1349 | } | ||
1350 | |||
1351 | release_sock(sk); | ||
1352 | return err; | ||
1353 | } | ||
1354 | |||
1355 | static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) | ||
1156 | { | 1356 | { |
1157 | struct sock *sk = sock->sk; | 1357 | struct sock *sk = sock->sk; |
1158 | struct l2cap_options opts; | 1358 | struct l2cap_options opts; |
1159 | struct l2cap_conninfo cinfo; | 1359 | struct l2cap_conninfo cinfo; |
1160 | int len, err = 0; | 1360 | int len, err = 0; |
1361 | u32 opt; | ||
1161 | 1362 | ||
1162 | BT_DBG("sk %p", sk); | 1363 | BT_DBG("sk %p", sk); |
1163 | 1364 | ||
@@ -1180,12 +1381,36 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch | |||
1180 | break; | 1381 | break; |
1181 | 1382 | ||
1182 | case L2CAP_LM: | 1383 | case L2CAP_LM: |
1183 | if (put_user(l2cap_pi(sk)->link_mode, (u32 __user *) optval)) | 1384 | switch (l2cap_pi(sk)->sec_level) { |
1385 | case BT_SECURITY_LOW: | ||
1386 | opt = L2CAP_LM_AUTH; | ||
1387 | break; | ||
1388 | case BT_SECURITY_MEDIUM: | ||
1389 | opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT; | ||
1390 | break; | ||
1391 | case BT_SECURITY_HIGH: | ||
1392 | opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT | | ||
1393 | L2CAP_LM_SECURE; | ||
1394 | break; | ||
1395 | default: | ||
1396 | opt = 0; | ||
1397 | break; | ||
1398 | } | ||
1399 | |||
1400 | if (l2cap_pi(sk)->role_switch) | ||
1401 | opt |= L2CAP_LM_MASTER; | ||
1402 | |||
1403 | if (l2cap_pi(sk)->force_reliable) | ||
1404 | opt |= L2CAP_LM_RELIABLE; | ||
1405 | |||
1406 | if (put_user(opt, (u32 __user *) optval)) | ||
1184 | err = -EFAULT; | 1407 | err = -EFAULT; |
1185 | break; | 1408 | break; |
1186 | 1409 | ||
1187 | case L2CAP_CONNINFO: | 1410 | case L2CAP_CONNINFO: |
1188 | if (sk->sk_state != BT_CONNECTED) { | 1411 | if (sk->sk_state != BT_CONNECTED && |
1412 | !(sk->sk_state == BT_CONNECT2 && | ||
1413 | bt_sk(sk)->defer_setup)) { | ||
1189 | err = -ENOTCONN; | 1414 | err = -ENOTCONN; |
1190 | break; | 1415 | break; |
1191 | } | 1416 | } |
@@ -1208,6 +1433,60 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch | |||
1208 | return err; | 1433 | return err; |
1209 | } | 1434 | } |
1210 | 1435 | ||
1436 | static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) | ||
1437 | { | ||
1438 | struct sock *sk = sock->sk; | ||
1439 | struct bt_security sec; | ||
1440 | int len, err = 0; | ||
1441 | |||
1442 | BT_DBG("sk %p", sk); | ||
1443 | |||
1444 | if (level == SOL_L2CAP) | ||
1445 | return l2cap_sock_getsockopt_old(sock, optname, optval, optlen); | ||
1446 | |||
1447 | if (level != SOL_BLUETOOTH) | ||
1448 | return -ENOPROTOOPT; | ||
1449 | |||
1450 | if (get_user(len, optlen)) | ||
1451 | return -EFAULT; | ||
1452 | |||
1453 | lock_sock(sk); | ||
1454 | |||
1455 | switch (optname) { | ||
1456 | case BT_SECURITY: | ||
1457 | if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) { | ||
1458 | err = -EINVAL; | ||
1459 | break; | ||
1460 | } | ||
1461 | |||
1462 | sec.level = l2cap_pi(sk)->sec_level; | ||
1463 | |||
1464 | len = min_t(unsigned int, len, sizeof(sec)); | ||
1465 | if (copy_to_user(optval, (char *) &sec, len)) | ||
1466 | err = -EFAULT; | ||
1467 | |||
1468 | break; | ||
1469 | |||
1470 | case BT_DEFER_SETUP: | ||
1471 | if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { | ||
1472 | err = -EINVAL; | ||
1473 | break; | ||
1474 | } | ||
1475 | |||
1476 | if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval)) | ||
1477 | err = -EFAULT; | ||
1478 | |||
1479 | break; | ||
1480 | |||
1481 | default: | ||
1482 | err = -ENOPROTOOPT; | ||
1483 | break; | ||
1484 | } | ||
1485 | |||
1486 | release_sock(sk); | ||
1487 | return err; | ||
1488 | } | ||
1489 | |||
1211 | static int l2cap_sock_shutdown(struct socket *sock, int how) | 1490 | static int l2cap_sock_shutdown(struct socket *sock, int how) |
1212 | { | 1491 | { |
1213 | struct sock *sk = sock->sk; | 1492 | struct sock *sk = sock->sk; |
@@ -1270,11 +1549,6 @@ static void l2cap_chan_ready(struct sock *sk) | |||
1270 | */ | 1549 | */ |
1271 | parent->sk_data_ready(parent, 0); | 1550 | parent->sk_data_ready(parent, 0); |
1272 | } | 1551 | } |
1273 | |||
1274 | if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) { | ||
1275 | struct l2cap_conn *conn = l2cap_pi(sk)->conn; | ||
1276 | hci_conn_change_link_key(conn->hcon); | ||
1277 | } | ||
1278 | } | 1552 | } |
1279 | 1553 | ||
1280 | /* Copy frame to all raw sockets on that connection */ | 1554 | /* Copy frame to all raw sockets on that connection */ |
@@ -1549,8 +1823,11 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hd | |||
1549 | 1823 | ||
1550 | if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && | 1824 | if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && |
1551 | cmd->ident == conn->info_ident) { | 1825 | cmd->ident == conn->info_ident) { |
1552 | conn->info_ident = 0; | ||
1553 | del_timer(&conn->info_timer); | 1826 | del_timer(&conn->info_timer); |
1827 | |||
1828 | conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; | ||
1829 | conn->info_ident = 0; | ||
1830 | |||
1554 | l2cap_conn_start(conn); | 1831 | l2cap_conn_start(conn); |
1555 | } | 1832 | } |
1556 | 1833 | ||
@@ -1580,6 +1857,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd | |||
1580 | /* Check if the ACL is secure enough (if not SDP) */ | 1857 | /* Check if the ACL is secure enough (if not SDP) */ |
1581 | if (psm != cpu_to_le16(0x0001) && | 1858 | if (psm != cpu_to_le16(0x0001) && |
1582 | !hci_conn_check_link_mode(conn->hcon)) { | 1859 | !hci_conn_check_link_mode(conn->hcon)) { |
1860 | conn->disc_reason = 0x05; | ||
1583 | result = L2CAP_CR_SEC_BLOCK; | 1861 | result = L2CAP_CR_SEC_BLOCK; |
1584 | goto response; | 1862 | goto response; |
1585 | } | 1863 | } |
@@ -1621,11 +1899,18 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd | |||
1621 | 1899 | ||
1622 | l2cap_pi(sk)->ident = cmd->ident; | 1900 | l2cap_pi(sk)->ident = cmd->ident; |
1623 | 1901 | ||
1624 | if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { | 1902 | if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) { |
1625 | if (l2cap_check_link_mode(sk)) { | 1903 | if (l2cap_check_security(sk)) { |
1626 | sk->sk_state = BT_CONFIG; | 1904 | if (bt_sk(sk)->defer_setup) { |
1627 | result = L2CAP_CR_SUCCESS; | 1905 | sk->sk_state = BT_CONNECT2; |
1628 | status = L2CAP_CS_NO_INFO; | 1906 | result = L2CAP_CR_PEND; |
1907 | status = L2CAP_CS_AUTHOR_PEND; | ||
1908 | parent->sk_data_ready(parent, 0); | ||
1909 | } else { | ||
1910 | sk->sk_state = BT_CONFIG; | ||
1911 | result = L2CAP_CR_SUCCESS; | ||
1912 | status = L2CAP_CS_NO_INFO; | ||
1913 | } | ||
1629 | } else { | 1914 | } else { |
1630 | sk->sk_state = BT_CONNECT2; | 1915 | sk->sk_state = BT_CONNECT2; |
1631 | result = L2CAP_CR_PEND; | 1916 | result = L2CAP_CR_PEND; |
@@ -1695,11 +1980,14 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd | |||
1695 | l2cap_pi(sk)->dcid = dcid; | 1980 | l2cap_pi(sk)->dcid = dcid; |
1696 | l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; | 1981 | l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; |
1697 | 1982 | ||
1983 | l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND; | ||
1984 | |||
1698 | l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, | 1985 | l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, |
1699 | l2cap_build_conf_req(sk, req), req); | 1986 | l2cap_build_conf_req(sk, req), req); |
1700 | break; | 1987 | break; |
1701 | 1988 | ||
1702 | case L2CAP_CR_PEND: | 1989 | case L2CAP_CR_PEND: |
1990 | l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; | ||
1703 | break; | 1991 | break; |
1704 | 1992 | ||
1705 | default: | 1993 | default: |
@@ -1908,6 +2196,14 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm | |||
1908 | put_unaligned(cpu_to_le32(l2cap_feat_mask), (__le32 *) rsp->data); | 2196 | put_unaligned(cpu_to_le32(l2cap_feat_mask), (__le32 *) rsp->data); |
1909 | l2cap_send_cmd(conn, cmd->ident, | 2197 | l2cap_send_cmd(conn, cmd->ident, |
1910 | L2CAP_INFO_RSP, sizeof(buf), buf); | 2198 | L2CAP_INFO_RSP, sizeof(buf), buf); |
2199 | } else if (type == L2CAP_IT_FIXED_CHAN) { | ||
2200 | u8 buf[12]; | ||
2201 | struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; | ||
2202 | rsp->type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); | ||
2203 | rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); | ||
2204 | memcpy(buf + 4, l2cap_fixed_chan, 8); | ||
2205 | l2cap_send_cmd(conn, cmd->ident, | ||
2206 | L2CAP_INFO_RSP, sizeof(buf), buf); | ||
1911 | } else { | 2207 | } else { |
1912 | struct l2cap_info_rsp rsp; | 2208 | struct l2cap_info_rsp rsp; |
1913 | rsp.type = cpu_to_le16(type); | 2209 | rsp.type = cpu_to_le16(type); |
@@ -1929,14 +2225,31 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm | |||
1929 | 2225 | ||
1930 | BT_DBG("type 0x%4.4x result 0x%2.2x", type, result); | 2226 | BT_DBG("type 0x%4.4x result 0x%2.2x", type, result); |
1931 | 2227 | ||
1932 | conn->info_ident = 0; | ||
1933 | |||
1934 | del_timer(&conn->info_timer); | 2228 | del_timer(&conn->info_timer); |
1935 | 2229 | ||
1936 | if (type == L2CAP_IT_FEAT_MASK) | 2230 | if (type == L2CAP_IT_FEAT_MASK) { |
1937 | conn->feat_mask = get_unaligned_le32(rsp->data); | 2231 | conn->feat_mask = get_unaligned_le32(rsp->data); |
1938 | 2232 | ||
1939 | l2cap_conn_start(conn); | 2233 | if (conn->feat_mask & 0x0080) { |
2234 | struct l2cap_info_req req; | ||
2235 | req.type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); | ||
2236 | |||
2237 | conn->info_ident = l2cap_get_ident(conn); | ||
2238 | |||
2239 | l2cap_send_cmd(conn, conn->info_ident, | ||
2240 | L2CAP_INFO_REQ, sizeof(req), &req); | ||
2241 | } else { | ||
2242 | conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; | ||
2243 | conn->info_ident = 0; | ||
2244 | |||
2245 | l2cap_conn_start(conn); | ||
2246 | } | ||
2247 | } else if (type == L2CAP_IT_FIXED_CHAN) { | ||
2248 | conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; | ||
2249 | conn->info_ident = 0; | ||
2250 | |||
2251 | l2cap_conn_start(conn); | ||
2252 | } | ||
1940 | 2253 | ||
1941 | return 0; | 2254 | return 0; |
1942 | } | 2255 | } |
@@ -2143,10 +2456,15 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) | |||
2143 | continue; | 2456 | continue; |
2144 | 2457 | ||
2145 | if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr)) { | 2458 | if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr)) { |
2146 | lm1 |= (HCI_LM_ACCEPT | l2cap_pi(sk)->link_mode); | 2459 | lm1 |= HCI_LM_ACCEPT; |
2460 | if (l2cap_pi(sk)->role_switch) | ||
2461 | lm1 |= HCI_LM_MASTER; | ||
2147 | exact++; | 2462 | exact++; |
2148 | } else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) | 2463 | } else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) { |
2149 | lm2 |= (HCI_LM_ACCEPT | l2cap_pi(sk)->link_mode); | 2464 | lm2 |= HCI_LM_ACCEPT; |
2465 | if (l2cap_pi(sk)->role_switch) | ||
2466 | lm2 |= HCI_LM_MASTER; | ||
2467 | } | ||
2150 | } | 2468 | } |
2151 | read_unlock(&l2cap_sk_list.lock); | 2469 | read_unlock(&l2cap_sk_list.lock); |
2152 | 2470 | ||
@@ -2172,89 +2490,48 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) | |||
2172 | return 0; | 2490 | return 0; |
2173 | } | 2491 | } |
2174 | 2492 | ||
2175 | static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason) | 2493 | static int l2cap_disconn_ind(struct hci_conn *hcon) |
2176 | { | 2494 | { |
2177 | BT_DBG("hcon %p reason %d", hcon, reason); | 2495 | struct l2cap_conn *conn = hcon->l2cap_data; |
2178 | 2496 | ||
2179 | if (hcon->type != ACL_LINK) | 2497 | BT_DBG("hcon %p", hcon); |
2180 | return 0; | ||
2181 | 2498 | ||
2182 | l2cap_conn_del(hcon, bt_err(reason)); | 2499 | if (hcon->type != ACL_LINK || !conn) |
2500 | return 0x13; | ||
2183 | 2501 | ||
2184 | return 0; | 2502 | return conn->disc_reason; |
2185 | } | 2503 | } |
2186 | 2504 | ||
2187 | static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) | 2505 | static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) |
2188 | { | 2506 | { |
2189 | struct l2cap_chan_list *l; | 2507 | BT_DBG("hcon %p reason %d", hcon, reason); |
2190 | struct l2cap_conn *conn = hcon->l2cap_data; | ||
2191 | struct sock *sk; | ||
2192 | 2508 | ||
2193 | if (!conn) | 2509 | if (hcon->type != ACL_LINK) |
2194 | return 0; | 2510 | return 0; |
2195 | 2511 | ||
2196 | l = &conn->chan_list; | 2512 | l2cap_conn_del(hcon, bt_err(reason)); |
2197 | |||
2198 | BT_DBG("conn %p", conn); | ||
2199 | |||
2200 | read_lock(&l->lock); | ||
2201 | |||
2202 | for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { | ||
2203 | struct l2cap_pinfo *pi = l2cap_pi(sk); | ||
2204 | |||
2205 | bh_lock_sock(sk); | ||
2206 | |||
2207 | if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) && | ||
2208 | !(hcon->link_mode & HCI_LM_ENCRYPT) && | ||
2209 | !status) { | ||
2210 | bh_unlock_sock(sk); | ||
2211 | continue; | ||
2212 | } | ||
2213 | |||
2214 | if (sk->sk_state == BT_CONNECT) { | ||
2215 | if (!status) { | ||
2216 | struct l2cap_conn_req req; | ||
2217 | req.scid = cpu_to_le16(l2cap_pi(sk)->scid); | ||
2218 | req.psm = l2cap_pi(sk)->psm; | ||
2219 | |||
2220 | l2cap_pi(sk)->ident = l2cap_get_ident(conn); | ||
2221 | |||
2222 | l2cap_send_cmd(conn, l2cap_pi(sk)->ident, | ||
2223 | L2CAP_CONN_REQ, sizeof(req), &req); | ||
2224 | } else { | ||
2225 | l2cap_sock_clear_timer(sk); | ||
2226 | l2cap_sock_set_timer(sk, HZ / 10); | ||
2227 | } | ||
2228 | } else if (sk->sk_state == BT_CONNECT2) { | ||
2229 | struct l2cap_conn_rsp rsp; | ||
2230 | __u16 result; | ||
2231 | 2513 | ||
2232 | if (!status) { | 2514 | return 0; |
2233 | sk->sk_state = BT_CONFIG; | 2515 | } |
2234 | result = L2CAP_CR_SUCCESS; | ||
2235 | } else { | ||
2236 | sk->sk_state = BT_DISCONN; | ||
2237 | l2cap_sock_set_timer(sk, HZ / 10); | ||
2238 | result = L2CAP_CR_SEC_BLOCK; | ||
2239 | } | ||
2240 | 2516 | ||
2241 | rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); | 2517 | static inline void l2cap_check_encryption(struct sock *sk, u8 encrypt) |
2242 | rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); | 2518 | { |
2243 | rsp.result = cpu_to_le16(result); | 2519 | if (sk->sk_type != SOCK_SEQPACKET) |
2244 | rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); | 2520 | return; |
2245 | l2cap_send_cmd(conn, l2cap_pi(sk)->ident, | ||
2246 | L2CAP_CONN_RSP, sizeof(rsp), &rsp); | ||
2247 | } | ||
2248 | 2521 | ||
2249 | bh_unlock_sock(sk); | 2522 | if (encrypt == 0x00) { |
2523 | if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM) { | ||
2524 | l2cap_sock_clear_timer(sk); | ||
2525 | l2cap_sock_set_timer(sk, HZ * 5); | ||
2526 | } else if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) | ||
2527 | __l2cap_sock_close(sk, ECONNREFUSED); | ||
2528 | } else { | ||
2529 | if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM) | ||
2530 | l2cap_sock_clear_timer(sk); | ||
2250 | } | 2531 | } |
2251 | |||
2252 | read_unlock(&l->lock); | ||
2253 | |||
2254 | return 0; | ||
2255 | } | 2532 | } |
2256 | 2533 | ||
2257 | static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) | 2534 | static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) |
2258 | { | 2535 | { |
2259 | struct l2cap_chan_list *l; | 2536 | struct l2cap_chan_list *l; |
2260 | struct l2cap_conn *conn = hcon->l2cap_data; | 2537 | struct l2cap_conn *conn = hcon->l2cap_data; |
@@ -2270,15 +2547,16 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) | |||
2270 | read_lock(&l->lock); | 2547 | read_lock(&l->lock); |
2271 | 2548 | ||
2272 | for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { | 2549 | for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { |
2273 | struct l2cap_pinfo *pi = l2cap_pi(sk); | ||
2274 | |||
2275 | bh_lock_sock(sk); | 2550 | bh_lock_sock(sk); |
2276 | 2551 | ||
2277 | if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) && | 2552 | if (l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND) { |
2278 | (sk->sk_state == BT_CONNECTED || | 2553 | bh_unlock_sock(sk); |
2279 | sk->sk_state == BT_CONFIG) && | 2554 | continue; |
2280 | !status && encrypt == 0x00) { | 2555 | } |
2281 | __l2cap_sock_close(sk, ECONNREFUSED); | 2556 | |
2557 | if (!status && (sk->sk_state == BT_CONNECTED || | ||
2558 | sk->sk_state == BT_CONFIG)) { | ||
2559 | l2cap_check_encryption(sk, encrypt); | ||
2282 | bh_unlock_sock(sk); | 2560 | bh_unlock_sock(sk); |
2283 | continue; | 2561 | continue; |
2284 | } | 2562 | } |
@@ -2376,7 +2654,7 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl | |||
2376 | goto drop; | 2654 | goto drop; |
2377 | 2655 | ||
2378 | skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), | 2656 | skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), |
2379 | skb->len); | 2657 | skb->len); |
2380 | conn->rx_len = len - skb->len; | 2658 | conn->rx_len = len - skb->len; |
2381 | } else { | 2659 | } else { |
2382 | BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len); | 2660 | BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len); |
@@ -2398,7 +2676,7 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl | |||
2398 | } | 2676 | } |
2399 | 2677 | ||
2400 | skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), | 2678 | skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), |
2401 | skb->len); | 2679 | skb->len); |
2402 | conn->rx_len -= skb->len; | 2680 | conn->rx_len -= skb->len; |
2403 | 2681 | ||
2404 | if (!conn->rx_len) { | 2682 | if (!conn->rx_len) { |
@@ -2424,10 +2702,10 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf) | |||
2424 | sk_for_each(sk, node, &l2cap_sk_list.head) { | 2702 | sk_for_each(sk, node, &l2cap_sk_list.head) { |
2425 | struct l2cap_pinfo *pi = l2cap_pi(sk); | 2703 | struct l2cap_pinfo *pi = l2cap_pi(sk); |
2426 | 2704 | ||
2427 | str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d 0x%x\n", | 2705 | str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n", |
2428 | batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), | 2706 | batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), |
2429 | sk->sk_state, btohs(pi->psm), pi->scid, pi->dcid, | 2707 | sk->sk_state, btohs(pi->psm), pi->scid, pi->dcid, |
2430 | pi->imtu, pi->omtu, pi->link_mode); | 2708 | pi->imtu, pi->omtu, pi->sec_level); |
2431 | } | 2709 | } |
2432 | 2710 | ||
2433 | read_unlock_bh(&l2cap_sk_list.lock); | 2711 | read_unlock_bh(&l2cap_sk_list.lock); |
@@ -2447,7 +2725,7 @@ static const struct proto_ops l2cap_sock_ops = { | |||
2447 | .accept = l2cap_sock_accept, | 2725 | .accept = l2cap_sock_accept, |
2448 | .getname = l2cap_sock_getname, | 2726 | .getname = l2cap_sock_getname, |
2449 | .sendmsg = l2cap_sock_sendmsg, | 2727 | .sendmsg = l2cap_sock_sendmsg, |
2450 | .recvmsg = bt_sock_recvmsg, | 2728 | .recvmsg = l2cap_sock_recvmsg, |
2451 | .poll = bt_sock_poll, | 2729 | .poll = bt_sock_poll, |
2452 | .ioctl = bt_sock_ioctl, | 2730 | .ioctl = bt_sock_ioctl, |
2453 | .mmap = sock_no_mmap, | 2731 | .mmap = sock_no_mmap, |
@@ -2469,8 +2747,8 @@ static struct hci_proto l2cap_hci_proto = { | |||
2469 | .connect_ind = l2cap_connect_ind, | 2747 | .connect_ind = l2cap_connect_ind, |
2470 | .connect_cfm = l2cap_connect_cfm, | 2748 | .connect_cfm = l2cap_connect_cfm, |
2471 | .disconn_ind = l2cap_disconn_ind, | 2749 | .disconn_ind = l2cap_disconn_ind, |
2472 | .auth_cfm = l2cap_auth_cfm, | 2750 | .disconn_cfm = l2cap_disconn_cfm, |
2473 | .encrypt_cfm = l2cap_encrypt_cfm, | 2751 | .security_cfm = l2cap_security_cfm, |
2474 | .recv_acldata = l2cap_recv_acldata | 2752 | .recv_acldata = l2cap_recv_acldata |
2475 | }; | 2753 | }; |
2476 | 2754 | ||
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index acd84fd524b8..1d0fb0f23c63 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c | |||
@@ -46,7 +46,7 @@ | |||
46 | #include <net/bluetooth/l2cap.h> | 46 | #include <net/bluetooth/l2cap.h> |
47 | #include <net/bluetooth/rfcomm.h> | 47 | #include <net/bluetooth/rfcomm.h> |
48 | 48 | ||
49 | #define VERSION "1.10" | 49 | #define VERSION "1.11" |
50 | 50 | ||
51 | static int disable_cfc = 0; | 51 | static int disable_cfc = 0; |
52 | static int channel_mtu = -1; | 52 | static int channel_mtu = -1; |
@@ -223,19 +223,25 @@ static int rfcomm_l2sock_create(struct socket **sock) | |||
223 | return err; | 223 | return err; |
224 | } | 224 | } |
225 | 225 | ||
226 | static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d) | 226 | static inline int rfcomm_check_security(struct rfcomm_dlc *d) |
227 | { | 227 | { |
228 | struct sock *sk = d->session->sock->sk; | 228 | struct sock *sk = d->session->sock->sk; |
229 | __u8 auth_type; | ||
229 | 230 | ||
230 | if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) { | 231 | switch (d->sec_level) { |
231 | if (!hci_conn_encrypt(l2cap_pi(sk)->conn->hcon)) | 232 | case BT_SECURITY_HIGH: |
232 | return 1; | 233 | auth_type = HCI_AT_GENERAL_BONDING_MITM; |
233 | } else if (d->link_mode & RFCOMM_LM_AUTH) { | 234 | break; |
234 | if (!hci_conn_auth(l2cap_pi(sk)->conn->hcon)) | 235 | case BT_SECURITY_MEDIUM: |
235 | return 1; | 236 | auth_type = HCI_AT_GENERAL_BONDING; |
237 | break; | ||
238 | default: | ||
239 | auth_type = HCI_AT_NO_BONDING; | ||
240 | break; | ||
236 | } | 241 | } |
237 | 242 | ||
238 | return 0; | 243 | return hci_conn_security(l2cap_pi(sk)->conn->hcon, d->sec_level, |
244 | auth_type); | ||
239 | } | 245 | } |
240 | 246 | ||
241 | /* ---- RFCOMM DLCs ---- */ | 247 | /* ---- RFCOMM DLCs ---- */ |
@@ -388,10 +394,10 @@ static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, | |||
388 | d->cfc = (s->cfc == RFCOMM_CFC_UNKNOWN) ? 0 : s->cfc; | 394 | d->cfc = (s->cfc == RFCOMM_CFC_UNKNOWN) ? 0 : s->cfc; |
389 | 395 | ||
390 | if (s->state == BT_CONNECTED) { | 396 | if (s->state == BT_CONNECTED) { |
391 | if (rfcomm_check_link_mode(d)) | 397 | if (rfcomm_check_security(d)) |
392 | set_bit(RFCOMM_AUTH_PENDING, &d->flags); | ||
393 | else | ||
394 | rfcomm_send_pn(s, 1, d); | 398 | rfcomm_send_pn(s, 1, d); |
399 | else | ||
400 | set_bit(RFCOMM_AUTH_PENDING, &d->flags); | ||
395 | } | 401 | } |
396 | 402 | ||
397 | rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); | 403 | rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); |
@@ -421,9 +427,16 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) | |||
421 | d, d->state, d->dlci, err, s); | 427 | d, d->state, d->dlci, err, s); |
422 | 428 | ||
423 | switch (d->state) { | 429 | switch (d->state) { |
424 | case BT_CONNECTED: | ||
425 | case BT_CONFIG: | ||
426 | case BT_CONNECT: | 430 | case BT_CONNECT: |
431 | case BT_CONFIG: | ||
432 | if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { | ||
433 | set_bit(RFCOMM_AUTH_REJECT, &d->flags); | ||
434 | rfcomm_schedule(RFCOMM_SCHED_AUTH); | ||
435 | break; | ||
436 | } | ||
437 | /* Fall through */ | ||
438 | |||
439 | case BT_CONNECTED: | ||
427 | d->state = BT_DISCONN; | 440 | d->state = BT_DISCONN; |
428 | if (skb_queue_empty(&d->tx_queue)) { | 441 | if (skb_queue_empty(&d->tx_queue)) { |
429 | rfcomm_send_disc(s, d->dlci); | 442 | rfcomm_send_disc(s, d->dlci); |
@@ -434,6 +447,15 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) | |||
434 | } | 447 | } |
435 | break; | 448 | break; |
436 | 449 | ||
450 | case BT_OPEN: | ||
451 | case BT_CONNECT2: | ||
452 | if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { | ||
453 | set_bit(RFCOMM_AUTH_REJECT, &d->flags); | ||
454 | rfcomm_schedule(RFCOMM_SCHED_AUTH); | ||
455 | break; | ||
456 | } | ||
457 | /* Fall through */ | ||
458 | |||
437 | default: | 459 | default: |
438 | rfcomm_dlc_clear_timer(d); | 460 | rfcomm_dlc_clear_timer(d); |
439 | 461 | ||
@@ -636,6 +658,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst | |||
636 | bacpy(&addr.l2_bdaddr, src); | 658 | bacpy(&addr.l2_bdaddr, src); |
637 | addr.l2_family = AF_BLUETOOTH; | 659 | addr.l2_family = AF_BLUETOOTH; |
638 | addr.l2_psm = 0; | 660 | addr.l2_psm = 0; |
661 | addr.l2_cid = 0; | ||
639 | *err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); | 662 | *err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); |
640 | if (*err < 0) | 663 | if (*err < 0) |
641 | goto failed; | 664 | goto failed; |
@@ -657,6 +680,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst | |||
657 | bacpy(&addr.l2_bdaddr, dst); | 680 | bacpy(&addr.l2_bdaddr, dst); |
658 | addr.l2_family = AF_BLUETOOTH; | 681 | addr.l2_family = AF_BLUETOOTH; |
659 | addr.l2_psm = htobs(RFCOMM_PSM); | 682 | addr.l2_psm = htobs(RFCOMM_PSM); |
683 | addr.l2_cid = 0; | ||
660 | *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); | 684 | *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); |
661 | if (*err == 0 || *err == -EINPROGRESS) | 685 | if (*err == 0 || *err == -EINPROGRESS) |
662 | return s; | 686 | return s; |
@@ -1162,7 +1186,7 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci) | |||
1162 | return 0; | 1186 | return 0; |
1163 | } | 1187 | } |
1164 | 1188 | ||
1165 | static void rfcomm_dlc_accept(struct rfcomm_dlc *d) | 1189 | void rfcomm_dlc_accept(struct rfcomm_dlc *d) |
1166 | { | 1190 | { |
1167 | struct sock *sk = d->session->sock->sk; | 1191 | struct sock *sk = d->session->sock->sk; |
1168 | 1192 | ||
@@ -1175,12 +1199,31 @@ static void rfcomm_dlc_accept(struct rfcomm_dlc *d) | |||
1175 | d->state_change(d, 0); | 1199 | d->state_change(d, 0); |
1176 | rfcomm_dlc_unlock(d); | 1200 | rfcomm_dlc_unlock(d); |
1177 | 1201 | ||
1178 | if (d->link_mode & RFCOMM_LM_MASTER) | 1202 | if (d->role_switch) |
1179 | hci_conn_switch_role(l2cap_pi(sk)->conn->hcon, 0x00); | 1203 | hci_conn_switch_role(l2cap_pi(sk)->conn->hcon, 0x00); |
1180 | 1204 | ||
1181 | rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig); | 1205 | rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig); |
1182 | } | 1206 | } |
1183 | 1207 | ||
1208 | static void rfcomm_check_accept(struct rfcomm_dlc *d) | ||
1209 | { | ||
1210 | if (rfcomm_check_security(d)) { | ||
1211 | if (d->defer_setup) { | ||
1212 | set_bit(RFCOMM_DEFER_SETUP, &d->flags); | ||
1213 | rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); | ||
1214 | |||
1215 | rfcomm_dlc_lock(d); | ||
1216 | d->state = BT_CONNECT2; | ||
1217 | d->state_change(d, 0); | ||
1218 | rfcomm_dlc_unlock(d); | ||
1219 | } else | ||
1220 | rfcomm_dlc_accept(d); | ||
1221 | } else { | ||
1222 | set_bit(RFCOMM_AUTH_PENDING, &d->flags); | ||
1223 | rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); | ||
1224 | } | ||
1225 | } | ||
1226 | |||
1184 | static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci) | 1227 | static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci) |
1185 | { | 1228 | { |
1186 | struct rfcomm_dlc *d; | 1229 | struct rfcomm_dlc *d; |
@@ -1203,11 +1246,7 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci) | |||
1203 | if (d) { | 1246 | if (d) { |
1204 | if (d->state == BT_OPEN) { | 1247 | if (d->state == BT_OPEN) { |
1205 | /* DLC was previously opened by PN request */ | 1248 | /* DLC was previously opened by PN request */ |
1206 | if (rfcomm_check_link_mode(d)) { | 1249 | rfcomm_check_accept(d); |
1207 | set_bit(RFCOMM_AUTH_PENDING, &d->flags); | ||
1208 | rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); | ||
1209 | } else | ||
1210 | rfcomm_dlc_accept(d); | ||
1211 | } | 1250 | } |
1212 | return 0; | 1251 | return 0; |
1213 | } | 1252 | } |
@@ -1219,11 +1258,7 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci) | |||
1219 | d->addr = __addr(s->initiator, dlci); | 1258 | d->addr = __addr(s->initiator, dlci); |
1220 | rfcomm_dlc_link(s, d); | 1259 | rfcomm_dlc_link(s, d); |
1221 | 1260 | ||
1222 | if (rfcomm_check_link_mode(d)) { | 1261 | rfcomm_check_accept(d); |
1223 | set_bit(RFCOMM_AUTH_PENDING, &d->flags); | ||
1224 | rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); | ||
1225 | } else | ||
1226 | rfcomm_dlc_accept(d); | ||
1227 | } else { | 1262 | } else { |
1228 | rfcomm_send_dm(s, dlci); | 1263 | rfcomm_send_dm(s, dlci); |
1229 | } | 1264 | } |
@@ -1637,11 +1672,12 @@ static void rfcomm_process_connect(struct rfcomm_session *s) | |||
1637 | d = list_entry(p, struct rfcomm_dlc, list); | 1672 | d = list_entry(p, struct rfcomm_dlc, list); |
1638 | if (d->state == BT_CONFIG) { | 1673 | if (d->state == BT_CONFIG) { |
1639 | d->mtu = s->mtu; | 1674 | d->mtu = s->mtu; |
1640 | if (rfcomm_check_link_mode(d)) { | 1675 | if (rfcomm_check_security(d)) { |
1676 | rfcomm_send_pn(s, 1, d); | ||
1677 | } else { | ||
1641 | set_bit(RFCOMM_AUTH_PENDING, &d->flags); | 1678 | set_bit(RFCOMM_AUTH_PENDING, &d->flags); |
1642 | rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); | 1679 | rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); |
1643 | } else | 1680 | } |
1644 | rfcomm_send_pn(s, 1, d); | ||
1645 | } | 1681 | } |
1646 | } | 1682 | } |
1647 | } | 1683 | } |
@@ -1717,11 +1753,17 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s) | |||
1717 | if (d->out) { | 1753 | if (d->out) { |
1718 | rfcomm_send_pn(s, 1, d); | 1754 | rfcomm_send_pn(s, 1, d); |
1719 | rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); | 1755 | rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); |
1720 | } else | 1756 | } else { |
1721 | rfcomm_dlc_accept(d); | 1757 | if (d->defer_setup) { |
1722 | if (d->link_mode & RFCOMM_LM_SECURE) { | 1758 | set_bit(RFCOMM_DEFER_SETUP, &d->flags); |
1723 | struct sock *sk = s->sock->sk; | 1759 | rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); |
1724 | hci_conn_change_link_key(l2cap_pi(sk)->conn->hcon); | 1760 | |
1761 | rfcomm_dlc_lock(d); | ||
1762 | d->state = BT_CONNECT2; | ||
1763 | d->state_change(d, 0); | ||
1764 | rfcomm_dlc_unlock(d); | ||
1765 | } else | ||
1766 | rfcomm_dlc_accept(d); | ||
1725 | } | 1767 | } |
1726 | continue; | 1768 | continue; |
1727 | } else if (test_and_clear_bit(RFCOMM_AUTH_REJECT, &d->flags)) { | 1769 | } else if (test_and_clear_bit(RFCOMM_AUTH_REJECT, &d->flags)) { |
@@ -1734,6 +1776,9 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s) | |||
1734 | continue; | 1776 | continue; |
1735 | } | 1777 | } |
1736 | 1778 | ||
1779 | if (test_bit(RFCOMM_SEC_PENDING, &d->flags)) | ||
1780 | continue; | ||
1781 | |||
1737 | if (test_bit(RFCOMM_TX_THROTTLED, &s->flags)) | 1782 | if (test_bit(RFCOMM_TX_THROTTLED, &s->flags)) |
1738 | continue; | 1783 | continue; |
1739 | 1784 | ||
@@ -1876,6 +1921,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) | |||
1876 | bacpy(&addr.l2_bdaddr, ba); | 1921 | bacpy(&addr.l2_bdaddr, ba); |
1877 | addr.l2_family = AF_BLUETOOTH; | 1922 | addr.l2_family = AF_BLUETOOTH; |
1878 | addr.l2_psm = htobs(RFCOMM_PSM); | 1923 | addr.l2_psm = htobs(RFCOMM_PSM); |
1924 | addr.l2_cid = 0; | ||
1879 | err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); | 1925 | err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); |
1880 | if (err < 0) { | 1926 | if (err < 0) { |
1881 | BT_ERR("Bind failed %d", err); | 1927 | BT_ERR("Bind failed %d", err); |
@@ -1947,42 +1993,7 @@ static int rfcomm_run(void *unused) | |||
1947 | return 0; | 1993 | return 0; |
1948 | } | 1994 | } |
1949 | 1995 | ||
1950 | static void rfcomm_auth_cfm(struct hci_conn *conn, u8 status) | 1996 | static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) |
1951 | { | ||
1952 | struct rfcomm_session *s; | ||
1953 | struct rfcomm_dlc *d; | ||
1954 | struct list_head *p, *n; | ||
1955 | |||
1956 | BT_DBG("conn %p status 0x%02x", conn, status); | ||
1957 | |||
1958 | s = rfcomm_session_get(&conn->hdev->bdaddr, &conn->dst); | ||
1959 | if (!s) | ||
1960 | return; | ||
1961 | |||
1962 | rfcomm_session_hold(s); | ||
1963 | |||
1964 | list_for_each_safe(p, n, &s->dlcs) { | ||
1965 | d = list_entry(p, struct rfcomm_dlc, list); | ||
1966 | |||
1967 | if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) && | ||
1968 | !(conn->link_mode & HCI_LM_ENCRYPT) && !status) | ||
1969 | continue; | ||
1970 | |||
1971 | if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags)) | ||
1972 | continue; | ||
1973 | |||
1974 | if (!status) | ||
1975 | set_bit(RFCOMM_AUTH_ACCEPT, &d->flags); | ||
1976 | else | ||
1977 | set_bit(RFCOMM_AUTH_REJECT, &d->flags); | ||
1978 | } | ||
1979 | |||
1980 | rfcomm_session_put(s); | ||
1981 | |||
1982 | rfcomm_schedule(RFCOMM_SCHED_AUTH); | ||
1983 | } | ||
1984 | |||
1985 | static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt) | ||
1986 | { | 1997 | { |
1987 | struct rfcomm_session *s; | 1998 | struct rfcomm_session *s; |
1988 | struct rfcomm_dlc *d; | 1999 | struct rfcomm_dlc *d; |
@@ -1999,18 +2010,29 @@ static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt) | |||
1999 | list_for_each_safe(p, n, &s->dlcs) { | 2010 | list_for_each_safe(p, n, &s->dlcs) { |
2000 | d = list_entry(p, struct rfcomm_dlc, list); | 2011 | d = list_entry(p, struct rfcomm_dlc, list); |
2001 | 2012 | ||
2002 | if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) && | 2013 | if (test_and_clear_bit(RFCOMM_SEC_PENDING, &d->flags)) { |
2003 | (d->state == BT_CONNECTED || | 2014 | rfcomm_dlc_clear_timer(d); |
2004 | d->state == BT_CONFIG) && | 2015 | if (status || encrypt == 0x00) { |
2005 | !status && encrypt == 0x00) { | 2016 | __rfcomm_dlc_close(d, ECONNREFUSED); |
2006 | __rfcomm_dlc_close(d, ECONNREFUSED); | 2017 | continue; |
2007 | continue; | 2018 | } |
2019 | } | ||
2020 | |||
2021 | if (d->state == BT_CONNECTED && !status && encrypt == 0x00) { | ||
2022 | if (d->sec_level == BT_SECURITY_MEDIUM) { | ||
2023 | set_bit(RFCOMM_SEC_PENDING, &d->flags); | ||
2024 | rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); | ||
2025 | continue; | ||
2026 | } else if (d->sec_level == BT_SECURITY_HIGH) { | ||
2027 | __rfcomm_dlc_close(d, ECONNREFUSED); | ||
2028 | continue; | ||
2029 | } | ||
2008 | } | 2030 | } |
2009 | 2031 | ||
2010 | if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags)) | 2032 | if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags)) |
2011 | continue; | 2033 | continue; |
2012 | 2034 | ||
2013 | if (!status && encrypt) | 2035 | if (!status) |
2014 | set_bit(RFCOMM_AUTH_ACCEPT, &d->flags); | 2036 | set_bit(RFCOMM_AUTH_ACCEPT, &d->flags); |
2015 | else | 2037 | else |
2016 | set_bit(RFCOMM_AUTH_REJECT, &d->flags); | 2038 | set_bit(RFCOMM_AUTH_REJECT, &d->flags); |
@@ -2023,8 +2045,7 @@ static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt) | |||
2023 | 2045 | ||
2024 | static struct hci_cb rfcomm_cb = { | 2046 | static struct hci_cb rfcomm_cb = { |
2025 | .name = "RFCOMM", | 2047 | .name = "RFCOMM", |
2026 | .auth_cfm = rfcomm_auth_cfm, | 2048 | .security_cfm = rfcomm_security_cfm |
2027 | .encrypt_cfm = rfcomm_encrypt_cfm | ||
2028 | }; | 2049 | }; |
2029 | 2050 | ||
2030 | static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf) | 2051 | static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf) |
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index d3fc6fca38d0..7f482784e9f7 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c | |||
@@ -261,12 +261,19 @@ static void rfcomm_sock_init(struct sock *sk, struct sock *parent) | |||
261 | 261 | ||
262 | if (parent) { | 262 | if (parent) { |
263 | sk->sk_type = parent->sk_type; | 263 | sk->sk_type = parent->sk_type; |
264 | pi->link_mode = rfcomm_pi(parent)->link_mode; | 264 | pi->dlc->defer_setup = bt_sk(parent)->defer_setup; |
265 | |||
266 | pi->sec_level = rfcomm_pi(parent)->sec_level; | ||
267 | pi->role_switch = rfcomm_pi(parent)->role_switch; | ||
265 | } else { | 268 | } else { |
266 | pi->link_mode = 0; | 269 | pi->dlc->defer_setup = 0; |
270 | |||
271 | pi->sec_level = BT_SECURITY_LOW; | ||
272 | pi->role_switch = 0; | ||
267 | } | 273 | } |
268 | 274 | ||
269 | pi->dlc->link_mode = pi->link_mode; | 275 | pi->dlc->sec_level = pi->sec_level; |
276 | pi->dlc->role_switch = pi->role_switch; | ||
270 | } | 277 | } |
271 | 278 | ||
272 | static struct proto rfcomm_proto = { | 279 | static struct proto rfcomm_proto = { |
@@ -406,7 +413,8 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a | |||
406 | bacpy(&bt_sk(sk)->dst, &sa->rc_bdaddr); | 413 | bacpy(&bt_sk(sk)->dst, &sa->rc_bdaddr); |
407 | rfcomm_pi(sk)->channel = sa->rc_channel; | 414 | rfcomm_pi(sk)->channel = sa->rc_channel; |
408 | 415 | ||
409 | d->link_mode = rfcomm_pi(sk)->link_mode; | 416 | d->sec_level = rfcomm_pi(sk)->sec_level; |
417 | d->role_switch = rfcomm_pi(sk)->role_switch; | ||
410 | 418 | ||
411 | err = rfcomm_dlc_open(d, &bt_sk(sk)->src, &sa->rc_bdaddr, sa->rc_channel); | 419 | err = rfcomm_dlc_open(d, &bt_sk(sk)->src, &sa->rc_bdaddr, sa->rc_channel); |
412 | if (!err) | 420 | if (!err) |
@@ -554,6 +562,9 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
554 | struct sk_buff *skb; | 562 | struct sk_buff *skb; |
555 | int sent = 0; | 563 | int sent = 0; |
556 | 564 | ||
565 | if (test_bit(RFCOMM_DEFER_SETUP, &d->flags)) | ||
566 | return -ENOTCONN; | ||
567 | |||
557 | if (msg->msg_flags & MSG_OOB) | 568 | if (msg->msg_flags & MSG_OOB) |
558 | return -EOPNOTSUPP; | 569 | return -EOPNOTSUPP; |
559 | 570 | ||
@@ -570,8 +581,11 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
570 | 581 | ||
571 | skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE, | 582 | skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE, |
572 | msg->msg_flags & MSG_DONTWAIT, &err); | 583 | msg->msg_flags & MSG_DONTWAIT, &err); |
573 | if (!skb) | 584 | if (!skb) { |
585 | if (sent == 0) | ||
586 | sent = err; | ||
574 | break; | 587 | break; |
588 | } | ||
575 | skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE); | 589 | skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE); |
576 | 590 | ||
577 | err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); | 591 | err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); |
@@ -630,10 +644,16 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
630 | struct msghdr *msg, size_t size, int flags) | 644 | struct msghdr *msg, size_t size, int flags) |
631 | { | 645 | { |
632 | struct sock *sk = sock->sk; | 646 | struct sock *sk = sock->sk; |
647 | struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; | ||
633 | int err = 0; | 648 | int err = 0; |
634 | size_t target, copied = 0; | 649 | size_t target, copied = 0; |
635 | long timeo; | 650 | long timeo; |
636 | 651 | ||
652 | if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { | ||
653 | rfcomm_dlc_accept(d); | ||
654 | return 0; | ||
655 | } | ||
656 | |||
637 | if (flags & MSG_OOB) | 657 | if (flags & MSG_OOB) |
638 | return -EOPNOTSUPP; | 658 | return -EOPNOTSUPP; |
639 | 659 | ||
@@ -710,7 +730,7 @@ out: | |||
710 | return copied ? : err; | 730 | return copied ? : err; |
711 | } | 731 | } |
712 | 732 | ||
713 | static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) | 733 | static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, int optlen) |
714 | { | 734 | { |
715 | struct sock *sk = sock->sk; | 735 | struct sock *sk = sock->sk; |
716 | int err = 0; | 736 | int err = 0; |
@@ -727,7 +747,14 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c | |||
727 | break; | 747 | break; |
728 | } | 748 | } |
729 | 749 | ||
730 | rfcomm_pi(sk)->link_mode = opt; | 750 | if (opt & RFCOMM_LM_AUTH) |
751 | rfcomm_pi(sk)->sec_level = BT_SECURITY_LOW; | ||
752 | if (opt & RFCOMM_LM_ENCRYPT) | ||
753 | rfcomm_pi(sk)->sec_level = BT_SECURITY_MEDIUM; | ||
754 | if (opt & RFCOMM_LM_SECURE) | ||
755 | rfcomm_pi(sk)->sec_level = BT_SECURITY_HIGH; | ||
756 | |||
757 | rfcomm_pi(sk)->role_switch = (opt & RFCOMM_LM_MASTER); | ||
731 | break; | 758 | break; |
732 | 759 | ||
733 | default: | 760 | default: |
@@ -739,12 +766,76 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c | |||
739 | return err; | 766 | return err; |
740 | } | 767 | } |
741 | 768 | ||
742 | static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) | 769 | static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) |
770 | { | ||
771 | struct sock *sk = sock->sk; | ||
772 | struct bt_security sec; | ||
773 | int len, err = 0; | ||
774 | u32 opt; | ||
775 | |||
776 | BT_DBG("sk %p", sk); | ||
777 | |||
778 | if (level == SOL_RFCOMM) | ||
779 | return rfcomm_sock_setsockopt_old(sock, optname, optval, optlen); | ||
780 | |||
781 | if (level != SOL_BLUETOOTH) | ||
782 | return -ENOPROTOOPT; | ||
783 | |||
784 | lock_sock(sk); | ||
785 | |||
786 | switch (optname) { | ||
787 | case BT_SECURITY: | ||
788 | if (sk->sk_type != SOCK_STREAM) { | ||
789 | err = -EINVAL; | ||
790 | break; | ||
791 | } | ||
792 | |||
793 | sec.level = BT_SECURITY_LOW; | ||
794 | |||
795 | len = min_t(unsigned int, sizeof(sec), optlen); | ||
796 | if (copy_from_user((char *) &sec, optval, len)) { | ||
797 | err = -EFAULT; | ||
798 | break; | ||
799 | } | ||
800 | |||
801 | if (sec.level > BT_SECURITY_HIGH) { | ||
802 | err = -EINVAL; | ||
803 | break; | ||
804 | } | ||
805 | |||
806 | rfcomm_pi(sk)->sec_level = sec.level; | ||
807 | break; | ||
808 | |||
809 | case BT_DEFER_SETUP: | ||
810 | if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { | ||
811 | err = -EINVAL; | ||
812 | break; | ||
813 | } | ||
814 | |||
815 | if (get_user(opt, (u32 __user *) optval)) { | ||
816 | err = -EFAULT; | ||
817 | break; | ||
818 | } | ||
819 | |||
820 | bt_sk(sk)->defer_setup = opt; | ||
821 | break; | ||
822 | |||
823 | default: | ||
824 | err = -ENOPROTOOPT; | ||
825 | break; | ||
826 | } | ||
827 | |||
828 | release_sock(sk); | ||
829 | return err; | ||
830 | } | ||
831 | |||
832 | static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) | ||
743 | { | 833 | { |
744 | struct sock *sk = sock->sk; | 834 | struct sock *sk = sock->sk; |
745 | struct sock *l2cap_sk; | 835 | struct sock *l2cap_sk; |
746 | struct rfcomm_conninfo cinfo; | 836 | struct rfcomm_conninfo cinfo; |
747 | int len, err = 0; | 837 | int len, err = 0; |
838 | u32 opt; | ||
748 | 839 | ||
749 | BT_DBG("sk %p", sk); | 840 | BT_DBG("sk %p", sk); |
750 | 841 | ||
@@ -755,12 +846,32 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c | |||
755 | 846 | ||
756 | switch (optname) { | 847 | switch (optname) { |
757 | case RFCOMM_LM: | 848 | case RFCOMM_LM: |
758 | if (put_user(rfcomm_pi(sk)->link_mode, (u32 __user *) optval)) | 849 | switch (rfcomm_pi(sk)->sec_level) { |
850 | case BT_SECURITY_LOW: | ||
851 | opt = RFCOMM_LM_AUTH; | ||
852 | break; | ||
853 | case BT_SECURITY_MEDIUM: | ||
854 | opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT; | ||
855 | break; | ||
856 | case BT_SECURITY_HIGH: | ||
857 | opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT | | ||
858 | RFCOMM_LM_SECURE; | ||
859 | break; | ||
860 | default: | ||
861 | opt = 0; | ||
862 | break; | ||
863 | } | ||
864 | |||
865 | if (rfcomm_pi(sk)->role_switch) | ||
866 | opt |= RFCOMM_LM_MASTER; | ||
867 | |||
868 | if (put_user(opt, (u32 __user *) optval)) | ||
759 | err = -EFAULT; | 869 | err = -EFAULT; |
760 | break; | 870 | break; |
761 | 871 | ||
762 | case RFCOMM_CONNINFO: | 872 | case RFCOMM_CONNINFO: |
763 | if (sk->sk_state != BT_CONNECTED) { | 873 | if (sk->sk_state != BT_CONNECTED && |
874 | !rfcomm_pi(sk)->dlc->defer_setup) { | ||
764 | err = -ENOTCONN; | 875 | err = -ENOTCONN; |
765 | break; | 876 | break; |
766 | } | 877 | } |
@@ -785,6 +896,60 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c | |||
785 | return err; | 896 | return err; |
786 | } | 897 | } |
787 | 898 | ||
899 | static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) | ||
900 | { | ||
901 | struct sock *sk = sock->sk; | ||
902 | struct bt_security sec; | ||
903 | int len, err = 0; | ||
904 | |||
905 | BT_DBG("sk %p", sk); | ||
906 | |||
907 | if (level == SOL_RFCOMM) | ||
908 | return rfcomm_sock_getsockopt_old(sock, optname, optval, optlen); | ||
909 | |||
910 | if (level != SOL_BLUETOOTH) | ||
911 | return -ENOPROTOOPT; | ||
912 | |||
913 | if (get_user(len, optlen)) | ||
914 | return -EFAULT; | ||
915 | |||
916 | lock_sock(sk); | ||
917 | |||
918 | switch (optname) { | ||
919 | case BT_SECURITY: | ||
920 | if (sk->sk_type != SOCK_STREAM) { | ||
921 | err = -EINVAL; | ||
922 | break; | ||
923 | } | ||
924 | |||
925 | sec.level = rfcomm_pi(sk)->sec_level; | ||
926 | |||
927 | len = min_t(unsigned int, len, sizeof(sec)); | ||
928 | if (copy_to_user(optval, (char *) &sec, len)) | ||
929 | err = -EFAULT; | ||
930 | |||
931 | break; | ||
932 | |||
933 | case BT_DEFER_SETUP: | ||
934 | if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { | ||
935 | err = -EINVAL; | ||
936 | break; | ||
937 | } | ||
938 | |||
939 | if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval)) | ||
940 | err = -EFAULT; | ||
941 | |||
942 | break; | ||
943 | |||
944 | default: | ||
945 | err = -ENOPROTOOPT; | ||
946 | break; | ||
947 | } | ||
948 | |||
949 | release_sock(sk); | ||
950 | return err; | ||
951 | } | ||
952 | |||
788 | static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | 953 | static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) |
789 | { | 954 | { |
790 | struct sock *sk __maybe_unused = sock->sk; | 955 | struct sock *sk __maybe_unused = sock->sk; |
@@ -888,6 +1053,10 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * | |||
888 | 1053 | ||
889 | done: | 1054 | done: |
890 | bh_unlock_sock(parent); | 1055 | bh_unlock_sock(parent); |
1056 | |||
1057 | if (bt_sk(parent)->defer_setup) | ||
1058 | parent->sk_state_change(parent); | ||
1059 | |||
891 | return result; | 1060 | return result; |
892 | } | 1061 | } |
893 | 1062 | ||
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 46fd8bf9a690..51ae0c3e470a 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c | |||
@@ -195,7 +195,7 @@ static int sco_connect(struct sock *sk) | |||
195 | else | 195 | else |
196 | type = SCO_LINK; | 196 | type = SCO_LINK; |
197 | 197 | ||
198 | hcon = hci_connect(hdev, type, dst, HCI_AT_NO_BONDING); | 198 | hcon = hci_connect(hdev, type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING); |
199 | if (!hcon) | 199 | if (!hcon) |
200 | goto done; | 200 | goto done; |
201 | 201 | ||
@@ -668,7 +668,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char | |||
668 | return err; | 668 | return err; |
669 | } | 669 | } |
670 | 670 | ||
671 | static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) | 671 | static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) |
672 | { | 672 | { |
673 | struct sock *sk = sock->sk; | 673 | struct sock *sk = sock->sk; |
674 | struct sco_options opts; | 674 | struct sco_options opts; |
@@ -723,6 +723,31 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char | |||
723 | return err; | 723 | return err; |
724 | } | 724 | } |
725 | 725 | ||
726 | static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) | ||
727 | { | ||
728 | struct sock *sk = sock->sk; | ||
729 | int len, err = 0; | ||
730 | |||
731 | BT_DBG("sk %p", sk); | ||
732 | |||
733 | if (level == SOL_SCO) | ||
734 | return sco_sock_getsockopt_old(sock, optname, optval, optlen); | ||
735 | |||
736 | if (get_user(len, optlen)) | ||
737 | return -EFAULT; | ||
738 | |||
739 | lock_sock(sk); | ||
740 | |||
741 | switch (optname) { | ||
742 | default: | ||
743 | err = -ENOPROTOOPT; | ||
744 | break; | ||
745 | } | ||
746 | |||
747 | release_sock(sk); | ||
748 | return err; | ||
749 | } | ||
750 | |||
726 | static int sco_sock_release(struct socket *sock) | 751 | static int sco_sock_release(struct socket *sock) |
727 | { | 752 | { |
728 | struct sock *sk = sock->sk; | 753 | struct sock *sk = sock->sk; |
@@ -832,10 +857,30 @@ done: | |||
832 | /* ----- SCO interface with lower layer (HCI) ----- */ | 857 | /* ----- SCO interface with lower layer (HCI) ----- */ |
833 | static int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type) | 858 | static int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type) |
834 | { | 859 | { |
860 | register struct sock *sk; | ||
861 | struct hlist_node *node; | ||
862 | int lm = 0; | ||
863 | |||
864 | if (type != SCO_LINK && type != ESCO_LINK) | ||
865 | return 0; | ||
866 | |||
835 | BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); | 867 | BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); |
836 | 868 | ||
837 | /* Always accept connection */ | 869 | /* Find listening sockets */ |
838 | return HCI_LM_ACCEPT; | 870 | read_lock(&sco_sk_list.lock); |
871 | sk_for_each(sk, node, &sco_sk_list.head) { | ||
872 | if (sk->sk_state != BT_LISTEN) | ||
873 | continue; | ||
874 | |||
875 | if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr) || | ||
876 | !bacmp(&bt_sk(sk)->src, BDADDR_ANY)) { | ||
877 | lm |= HCI_LM_ACCEPT; | ||
878 | break; | ||
879 | } | ||
880 | } | ||
881 | read_unlock(&sco_sk_list.lock); | ||
882 | |||
883 | return lm; | ||
839 | } | 884 | } |
840 | 885 | ||
841 | static int sco_connect_cfm(struct hci_conn *hcon, __u8 status) | 886 | static int sco_connect_cfm(struct hci_conn *hcon, __u8 status) |
@@ -857,7 +902,7 @@ static int sco_connect_cfm(struct hci_conn *hcon, __u8 status) | |||
857 | return 0; | 902 | return 0; |
858 | } | 903 | } |
859 | 904 | ||
860 | static int sco_disconn_ind(struct hci_conn *hcon, __u8 reason) | 905 | static int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) |
861 | { | 906 | { |
862 | BT_DBG("hcon %p reason %d", hcon, reason); | 907 | BT_DBG("hcon %p reason %d", hcon, reason); |
863 | 908 | ||
@@ -940,7 +985,7 @@ static struct hci_proto sco_hci_proto = { | |||
940 | .id = HCI_PROTO_SCO, | 985 | .id = HCI_PROTO_SCO, |
941 | .connect_ind = sco_connect_ind, | 986 | .connect_ind = sco_connect_ind, |
942 | .connect_cfm = sco_connect_cfm, | 987 | .connect_cfm = sco_connect_cfm, |
943 | .disconn_ind = sco_disconn_ind, | 988 | .disconn_cfm = sco_disconn_cfm, |
944 | .recv_scodata = sco_recv_scodata | 989 | .recv_scodata = sco_recv_scodata |
945 | }; | 990 | }; |
946 | 991 | ||
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index ba7be195803c..fcffb3fb1177 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c | |||
@@ -98,7 +98,8 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) | |||
98 | kfree_skb(skb); | 98 | kfree_skb(skb); |
99 | goto errout; | 99 | goto errout; |
100 | } | 100 | } |
101 | err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); | 101 | rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); |
102 | return; | ||
102 | errout: | 103 | errout: |
103 | if (err < 0) | 104 | if (err < 0) |
104 | rtnl_set_sk_err(net, RTNLGRP_LINK, err); | 105 | rtnl_set_sk_err(net, RTNLGRP_LINK, err); |
diff --git a/net/can/af_can.c b/net/can/af_can.c index d90e8dd975fc..547bafc79e28 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c | |||
@@ -273,8 +273,7 @@ int can_send(struct sk_buff *skb, int loop) | |||
273 | err = net_xmit_errno(err); | 273 | err = net_xmit_errno(err); |
274 | 274 | ||
275 | if (err) { | 275 | if (err) { |
276 | if (newskb) | 276 | kfree_skb(newskb); |
277 | kfree_skb(newskb); | ||
278 | return err; | 277 | return err; |
279 | } | 278 | } |
280 | 279 | ||
diff --git a/net/core/Makefile b/net/core/Makefile index 26a37cb31923..796f46eece5f 100644 --- a/net/core/Makefile +++ b/net/core/Makefile | |||
@@ -17,3 +17,6 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o | |||
17 | obj-$(CONFIG_NETPOLL) += netpoll.o | 17 | obj-$(CONFIG_NETPOLL) += netpoll.o |
18 | obj-$(CONFIG_NET_DMA) += user_dma.o | 18 | obj-$(CONFIG_NET_DMA) += user_dma.o |
19 | obj-$(CONFIG_FIB_RULES) += fib_rules.o | 19 | obj-$(CONFIG_FIB_RULES) += fib_rules.o |
20 | obj-$(CONFIG_TRACEPOINTS) += net-traces.o | ||
21 | obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o | ||
22 | |||
diff --git a/net/core/datagram.c b/net/core/datagram.c index 5e2ac0c4b07c..d0de644b378d 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c | |||
@@ -208,7 +208,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, | |||
208 | 208 | ||
209 | void skb_free_datagram(struct sock *sk, struct sk_buff *skb) | 209 | void skb_free_datagram(struct sock *sk, struct sk_buff *skb) |
210 | { | 210 | { |
211 | kfree_skb(skb); | 211 | consume_skb(skb); |
212 | sk_mem_reclaim_partial(sk); | 212 | sk_mem_reclaim_partial(sk); |
213 | } | 213 | } |
214 | 214 | ||
diff --git a/net/core/dev.c b/net/core/dev.c index d393fc997cd9..052dd478d3e1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c | |||
@@ -135,14 +135,6 @@ | |||
135 | /* This should be increased if a protocol with a bigger head is added. */ | 135 | /* This should be increased if a protocol with a bigger head is added. */ |
136 | #define GRO_MAX_HEAD (MAX_HEADER + 128) | 136 | #define GRO_MAX_HEAD (MAX_HEADER + 128) |
137 | 137 | ||
138 | enum { | ||
139 | GRO_MERGED, | ||
140 | GRO_MERGED_FREE, | ||
141 | GRO_HELD, | ||
142 | GRO_NORMAL, | ||
143 | GRO_DROP, | ||
144 | }; | ||
145 | |||
146 | /* | 138 | /* |
147 | * The list of packet types we will receive (as opposed to discard) | 139 | * The list of packet types we will receive (as opposed to discard) |
148 | * and the routines to invoke. | 140 | * and the routines to invoke. |
@@ -1672,23 +1664,12 @@ static int dev_gso_segment(struct sk_buff *skb) | |||
1672 | return 0; | 1664 | return 0; |
1673 | } | 1665 | } |
1674 | 1666 | ||
1675 | static void tstamp_tx(struct sk_buff *skb) | ||
1676 | { | ||
1677 | union skb_shared_tx *shtx = | ||
1678 | skb_tx(skb); | ||
1679 | if (unlikely(shtx->software && | ||
1680 | !shtx->in_progress)) { | ||
1681 | skb_tstamp_tx(skb, NULL); | ||
1682 | } | ||
1683 | } | ||
1684 | |||
1685 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | 1667 | int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, |
1686 | struct netdev_queue *txq) | 1668 | struct netdev_queue *txq) |
1687 | { | 1669 | { |
1688 | const struct net_device_ops *ops = dev->netdev_ops; | 1670 | const struct net_device_ops *ops = dev->netdev_ops; |
1689 | int rc; | 1671 | int rc; |
1690 | 1672 | ||
1691 | prefetch(&dev->netdev_ops->ndo_start_xmit); | ||
1692 | if (likely(!skb->next)) { | 1673 | if (likely(!skb->next)) { |
1693 | if (!list_empty(&ptype_all)) | 1674 | if (!list_empty(&ptype_all)) |
1694 | dev_queue_xmit_nit(skb, dev); | 1675 | dev_queue_xmit_nit(skb, dev); |
@@ -1715,8 +1696,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, | |||
1715 | * the skb destructor before the call and restoring it | 1696 | * the skb destructor before the call and restoring it |
1716 | * afterwards, then doing the skb_orphan() ourselves? | 1697 | * afterwards, then doing the skb_orphan() ourselves? |
1717 | */ | 1698 | */ |
1718 | if (likely(!rc)) | ||
1719 | tstamp_tx(skb); | ||
1720 | return rc; | 1699 | return rc; |
1721 | } | 1700 | } |
1722 | 1701 | ||
@@ -1732,7 +1711,6 @@ gso: | |||
1732 | skb->next = nskb; | 1711 | skb->next = nskb; |
1733 | return rc; | 1712 | return rc; |
1734 | } | 1713 | } |
1735 | tstamp_tx(skb); | ||
1736 | if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) | 1714 | if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) |
1737 | return NETDEV_TX_BUSY; | 1715 | return NETDEV_TX_BUSY; |
1738 | } while (skb->next); | 1716 | } while (skb->next); |
@@ -1745,17 +1723,11 @@ out_kfree_skb: | |||
1745 | } | 1723 | } |
1746 | 1724 | ||
1747 | static u32 skb_tx_hashrnd; | 1725 | static u32 skb_tx_hashrnd; |
1748 | static int skb_tx_hashrnd_initialized = 0; | ||
1749 | 1726 | ||
1750 | static u16 skb_tx_hash(struct net_device *dev, struct sk_buff *skb) | 1727 | u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) |
1751 | { | 1728 | { |
1752 | u32 hash; | 1729 | u32 hash; |
1753 | 1730 | ||
1754 | if (unlikely(!skb_tx_hashrnd_initialized)) { | ||
1755 | get_random_bytes(&skb_tx_hashrnd, 4); | ||
1756 | skb_tx_hashrnd_initialized = 1; | ||
1757 | } | ||
1758 | |||
1759 | if (skb_rx_queue_recorded(skb)) { | 1731 | if (skb_rx_queue_recorded(skb)) { |
1760 | hash = skb_get_rx_queue(skb); | 1732 | hash = skb_get_rx_queue(skb); |
1761 | } else if (skb->sk && skb->sk->sk_hash) { | 1733 | } else if (skb->sk && skb->sk->sk_hash) { |
@@ -1767,6 +1739,7 @@ static u16 skb_tx_hash(struct net_device *dev, struct sk_buff *skb) | |||
1767 | 1739 | ||
1768 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); | 1740 | return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); |
1769 | } | 1741 | } |
1742 | EXPORT_SYMBOL(skb_tx_hash); | ||
1770 | 1743 | ||
1771 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, | 1744 | static struct netdev_queue *dev_pick_tx(struct net_device *dev, |
1772 | struct sk_buff *skb) | 1745 | struct sk_buff *skb) |
@@ -2273,12 +2246,6 @@ int netif_receive_skb(struct sk_buff *skb) | |||
2273 | 2246 | ||
2274 | rcu_read_lock(); | 2247 | rcu_read_lock(); |
2275 | 2248 | ||
2276 | /* Don't receive packets in an exiting network namespace */ | ||
2277 | if (!net_alive(dev_net(skb->dev))) { | ||
2278 | kfree_skb(skb); | ||
2279 | goto out; | ||
2280 | } | ||
2281 | |||
2282 | #ifdef CONFIG_NET_CLS_ACT | 2249 | #ifdef CONFIG_NET_CLS_ACT |
2283 | if (skb->tc_verd & TC_NCLS) { | 2250 | if (skb->tc_verd & TC_NCLS) { |
2284 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); | 2251 | skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); |
@@ -2499,6 +2466,9 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) | |||
2499 | { | 2466 | { |
2500 | struct sk_buff *p; | 2467 | struct sk_buff *p; |
2501 | 2468 | ||
2469 | if (netpoll_rx_on(skb)) | ||
2470 | return GRO_NORMAL; | ||
2471 | |||
2502 | for (p = napi->gro_list; p; p = p->next) { | 2472 | for (p = napi->gro_list; p; p = p->next) { |
2503 | NAPI_GRO_CB(p)->same_flow = !compare_ether_header( | 2473 | NAPI_GRO_CB(p)->same_flow = !compare_ether_header( |
2504 | skb_mac_header(p), skb_gro_mac_header(skb)); | 2474 | skb_mac_header(p), skb_gro_mac_header(skb)); |
@@ -2657,9 +2627,9 @@ static int process_backlog(struct napi_struct *napi, int quota) | |||
2657 | local_irq_disable(); | 2627 | local_irq_disable(); |
2658 | skb = __skb_dequeue(&queue->input_pkt_queue); | 2628 | skb = __skb_dequeue(&queue->input_pkt_queue); |
2659 | if (!skb) { | 2629 | if (!skb) { |
2660 | __napi_complete(napi); | ||
2661 | local_irq_enable(); | 2630 | local_irq_enable(); |
2662 | break; | 2631 | napi_complete(napi); |
2632 | goto out; | ||
2663 | } | 2633 | } |
2664 | local_irq_enable(); | 2634 | local_irq_enable(); |
2665 | 2635 | ||
@@ -2668,6 +2638,7 @@ static int process_backlog(struct napi_struct *napi, int quota) | |||
2668 | 2638 | ||
2669 | napi_gro_flush(napi); | 2639 | napi_gro_flush(napi); |
2670 | 2640 | ||
2641 | out: | ||
2671 | return work; | 2642 | return work; |
2672 | } | 2643 | } |
2673 | 2644 | ||
@@ -2741,7 +2712,7 @@ void netif_napi_del(struct napi_struct *napi) | |||
2741 | struct sk_buff *skb, *next; | 2712 | struct sk_buff *skb, *next; |
2742 | 2713 | ||
2743 | list_del_init(&napi->dev_list); | 2714 | list_del_init(&napi->dev_list); |
2744 | kfree(napi->skb); | 2715 | kfree_skb(napi->skb); |
2745 | 2716 | ||
2746 | for (skb = napi->gro_list; skb; skb = next) { | 2717 | for (skb = napi->gro_list; skb; skb = next) { |
2747 | next = skb->next; | 2718 | next = skb->next; |
@@ -4355,6 +4326,39 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) | |||
4355 | } | 4326 | } |
4356 | EXPORT_SYMBOL(netdev_fix_features); | 4327 | EXPORT_SYMBOL(netdev_fix_features); |
4357 | 4328 | ||
4329 | /* Some devices need to (re-)set their netdev_ops inside | ||
4330 | * ->init() or similar. If that happens, we have to setup | ||
4331 | * the compat pointers again. | ||
4332 | */ | ||
4333 | void netdev_resync_ops(struct net_device *dev) | ||
4334 | { | ||
4335 | #ifdef CONFIG_COMPAT_NET_DEV_OPS | ||
4336 | const struct net_device_ops *ops = dev->netdev_ops; | ||
4337 | |||
4338 | dev->init = ops->ndo_init; | ||
4339 | dev->uninit = ops->ndo_uninit; | ||
4340 | dev->open = ops->ndo_open; | ||
4341 | dev->change_rx_flags = ops->ndo_change_rx_flags; | ||
4342 | dev->set_rx_mode = ops->ndo_set_rx_mode; | ||
4343 | dev->set_multicast_list = ops->ndo_set_multicast_list; | ||
4344 | dev->set_mac_address = ops->ndo_set_mac_address; | ||
4345 | dev->validate_addr = ops->ndo_validate_addr; | ||
4346 | dev->do_ioctl = ops->ndo_do_ioctl; | ||
4347 | dev->set_config = ops->ndo_set_config; | ||
4348 | dev->change_mtu = ops->ndo_change_mtu; | ||
4349 | dev->neigh_setup = ops->ndo_neigh_setup; | ||
4350 | dev->tx_timeout = ops->ndo_tx_timeout; | ||
4351 | dev->get_stats = ops->ndo_get_stats; | ||
4352 | dev->vlan_rx_register = ops->ndo_vlan_rx_register; | ||
4353 | dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; | ||
4354 | dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; | ||
4355 | #ifdef CONFIG_NET_POLL_CONTROLLER | ||
4356 | dev->poll_controller = ops->ndo_poll_controller; | ||
4357 | #endif | ||
4358 | #endif | ||
4359 | } | ||
4360 | EXPORT_SYMBOL(netdev_resync_ops); | ||
4361 | |||
4358 | /** | 4362 | /** |
4359 | * register_netdevice - register a network device | 4363 | * register_netdevice - register a network device |
4360 | * @dev: device to register | 4364 | * @dev: device to register |
@@ -4399,27 +4403,7 @@ int register_netdevice(struct net_device *dev) | |||
4399 | * This is temporary until all network devices are converted. | 4403 | * This is temporary until all network devices are converted. |
4400 | */ | 4404 | */ |
4401 | if (dev->netdev_ops) { | 4405 | if (dev->netdev_ops) { |
4402 | const struct net_device_ops *ops = dev->netdev_ops; | 4406 | netdev_resync_ops(dev); |
4403 | |||
4404 | dev->init = ops->ndo_init; | ||
4405 | dev->uninit = ops->ndo_uninit; | ||
4406 | dev->open = ops->ndo_open; | ||
4407 | dev->change_rx_flags = ops->ndo_change_rx_flags; | ||
4408 | dev->set_rx_mode = ops->ndo_set_rx_mode; | ||
4409 | dev->set_multicast_list = ops->ndo_set_multicast_list; | ||
4410 | dev->set_mac_address = ops->ndo_set_mac_address; | ||
4411 | dev->validate_addr = ops->ndo_validate_addr; | ||
4412 | dev->do_ioctl = ops->ndo_do_ioctl; | ||
4413 | dev->set_config = ops->ndo_set_config; | ||
4414 | dev->change_mtu = ops->ndo_change_mtu; | ||
4415 | dev->tx_timeout = ops->ndo_tx_timeout; | ||
4416 | dev->get_stats = ops->ndo_get_stats; | ||
4417 | dev->vlan_rx_register = ops->ndo_vlan_rx_register; | ||
4418 | dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; | ||
4419 | dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; | ||
4420 | #ifdef CONFIG_NET_POLL_CONTROLLER | ||
4421 | dev->poll_controller = ops->ndo_poll_controller; | ||
4422 | #endif | ||
4423 | } else { | 4407 | } else { |
4424 | char drivername[64]; | 4408 | char drivername[64]; |
4425 | pr_info("%s (%s): not using net_device_ops yet\n", | 4409 | pr_info("%s (%s): not using net_device_ops yet\n", |
@@ -5291,6 +5275,14 @@ out: | |||
5291 | 5275 | ||
5292 | subsys_initcall(net_dev_init); | 5276 | subsys_initcall(net_dev_init); |
5293 | 5277 | ||
5278 | static int __init initialize_hashrnd(void) | ||
5279 | { | ||
5280 | get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); | ||
5281 | return 0; | ||
5282 | } | ||
5283 | |||
5284 | late_initcall_sync(initialize_hashrnd); | ||
5285 | |||
5294 | EXPORT_SYMBOL(__dev_get_by_index); | 5286 | EXPORT_SYMBOL(__dev_get_by_index); |
5295 | EXPORT_SYMBOL(__dev_get_by_name); | 5287 | EXPORT_SYMBOL(__dev_get_by_name); |
5296 | EXPORT_SYMBOL(__dev_remove_pack); | 5288 | EXPORT_SYMBOL(__dev_remove_pack); |
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c new file mode 100644 index 000000000000..9fd0dc3cca99 --- /dev/null +++ b/net/core/drop_monitor.c | |||
@@ -0,0 +1,263 @@ | |||
1 | /* | ||
2 | * Monitoring code for network dropped packet alerts | ||
3 | * | ||
4 | * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> | ||
5 | */ | ||
6 | |||
7 | #include <linux/netdevice.h> | ||
8 | #include <linux/etherdevice.h> | ||
9 | #include <linux/string.h> | ||
10 | #include <linux/if_arp.h> | ||
11 | #include <linux/inetdevice.h> | ||
12 | #include <linux/inet.h> | ||
13 | #include <linux/interrupt.h> | ||
14 | #include <linux/netpoll.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/delay.h> | ||
17 | #include <linux/types.h> | ||
18 | #include <linux/workqueue.h> | ||
19 | #include <linux/netlink.h> | ||
20 | #include <linux/net_dropmon.h> | ||
21 | #include <linux/percpu.h> | ||
22 | #include <linux/timer.h> | ||
23 | #include <linux/bitops.h> | ||
24 | #include <net/genetlink.h> | ||
25 | |||
26 | #include <trace/skb.h> | ||
27 | |||
28 | #include <asm/unaligned.h> | ||
29 | |||
30 | #define TRACE_ON 1 | ||
31 | #define TRACE_OFF 0 | ||
32 | |||
33 | static void send_dm_alert(struct work_struct *unused); | ||
34 | |||
35 | |||
36 | /* | ||
37 | * Globals, our netlink socket pointer | ||
38 | * and the work handle that will send up | ||
39 | * netlink alerts | ||
40 | */ | ||
41 | struct sock *dm_sock; | ||
42 | |||
43 | struct per_cpu_dm_data { | ||
44 | struct work_struct dm_alert_work; | ||
45 | struct sk_buff *skb; | ||
46 | atomic_t dm_hit_count; | ||
47 | struct timer_list send_timer; | ||
48 | }; | ||
49 | |||
50 | static struct genl_family net_drop_monitor_family = { | ||
51 | .id = GENL_ID_GENERATE, | ||
52 | .hdrsize = 0, | ||
53 | .name = "NET_DM", | ||
54 | .version = 1, | ||
55 | .maxattr = NET_DM_CMD_MAX, | ||
56 | }; | ||
57 | |||
58 | static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); | ||
59 | |||
60 | static int dm_hit_limit = 64; | ||
61 | static int dm_delay = 1; | ||
62 | |||
63 | |||
64 | static void reset_per_cpu_data(struct per_cpu_dm_data *data) | ||
65 | { | ||
66 | size_t al; | ||
67 | struct net_dm_alert_msg *msg; | ||
68 | |||
69 | al = sizeof(struct net_dm_alert_msg); | ||
70 | al += dm_hit_limit * sizeof(struct net_dm_drop_point); | ||
71 | data->skb = genlmsg_new(al, GFP_KERNEL); | ||
72 | genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, | ||
73 | 0, NET_DM_CMD_ALERT); | ||
74 | msg = __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_alert_msg)); | ||
75 | memset(msg, 0, al); | ||
76 | atomic_set(&data->dm_hit_count, dm_hit_limit); | ||
77 | } | ||
78 | |||
79 | static void send_dm_alert(struct work_struct *unused) | ||
80 | { | ||
81 | struct sk_buff *skb; | ||
82 | struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); | ||
83 | |||
84 | /* | ||
85 | * Grab the skb we're about to send | ||
86 | */ | ||
87 | skb = data->skb; | ||
88 | |||
89 | /* | ||
90 | * Replace it with a new one | ||
91 | */ | ||
92 | reset_per_cpu_data(data); | ||
93 | |||
94 | /* | ||
95 | * Ship it! | ||
96 | */ | ||
97 | genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); | ||
98 | |||
99 | } | ||
100 | |||
101 | /* | ||
102 | * This is the timer function to delay the sending of an alert | ||
103 | * in the event that more drops will arrive during the | ||
104 | * hysteresis period. Note that it operates under the timer interrupt | ||
105 | * so we don't need to disable preemption here | ||
106 | */ | ||
107 | static void sched_send_work(unsigned long unused) | ||
108 | { | ||
109 | struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); | ||
110 | |||
111 | schedule_work(&data->dm_alert_work); | ||
112 | } | ||
113 | |||
114 | static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) | ||
115 | { | ||
116 | struct net_dm_alert_msg *msg; | ||
117 | struct nlmsghdr *nlh; | ||
118 | int i; | ||
119 | struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); | ||
120 | |||
121 | |||
122 | if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { | ||
123 | /* | ||
124 | * we're already at zero, discard this hit | ||
125 | */ | ||
126 | goto out; | ||
127 | } | ||
128 | |||
129 | nlh = (struct nlmsghdr *)data->skb->data; | ||
130 | msg = genlmsg_data(nlmsg_data(nlh)); | ||
131 | for (i = 0; i < msg->entries; i++) { | ||
132 | if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { | ||
133 | msg->points[i].count++; | ||
134 | goto out; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * We need to create a new entry | ||
140 | */ | ||
141 | __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); | ||
142 | memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); | ||
143 | msg->points[msg->entries].count = 1; | ||
144 | msg->entries++; | ||
145 | |||
146 | if (!timer_pending(&data->send_timer)) { | ||
147 | data->send_timer.expires = jiffies + dm_delay * HZ; | ||
148 | add_timer_on(&data->send_timer, smp_processor_id()); | ||
149 | } | ||
150 | |||
151 | out: | ||
152 | return; | ||
153 | } | ||
154 | |||
155 | static int set_all_monitor_traces(int state) | ||
156 | { | ||
157 | int rc = 0; | ||
158 | |||
159 | switch (state) { | ||
160 | case TRACE_ON: | ||
161 | rc |= register_trace_kfree_skb(trace_kfree_skb_hit); | ||
162 | break; | ||
163 | case TRACE_OFF: | ||
164 | rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); | ||
165 | |||
166 | tracepoint_synchronize_unregister(); | ||
167 | break; | ||
168 | default: | ||
169 | rc = 1; | ||
170 | break; | ||
171 | } | ||
172 | |||
173 | if (rc) | ||
174 | return -EINPROGRESS; | ||
175 | return rc; | ||
176 | } | ||
177 | |||
178 | |||
179 | static int net_dm_cmd_config(struct sk_buff *skb, | ||
180 | struct genl_info *info) | ||
181 | { | ||
182 | return -ENOTSUPP; | ||
183 | } | ||
184 | |||
185 | static int net_dm_cmd_trace(struct sk_buff *skb, | ||
186 | struct genl_info *info) | ||
187 | { | ||
188 | switch (info->genlhdr->cmd) { | ||
189 | case NET_DM_CMD_START: | ||
190 | return set_all_monitor_traces(TRACE_ON); | ||
191 | break; | ||
192 | case NET_DM_CMD_STOP: | ||
193 | return set_all_monitor_traces(TRACE_OFF); | ||
194 | break; | ||
195 | } | ||
196 | |||
197 | return -ENOTSUPP; | ||
198 | } | ||
199 | |||
200 | |||
201 | static struct genl_ops dropmon_ops[] = { | ||
202 | { | ||
203 | .cmd = NET_DM_CMD_CONFIG, | ||
204 | .doit = net_dm_cmd_config, | ||
205 | }, | ||
206 | { | ||
207 | .cmd = NET_DM_CMD_START, | ||
208 | .doit = net_dm_cmd_trace, | ||
209 | }, | ||
210 | { | ||
211 | .cmd = NET_DM_CMD_STOP, | ||
212 | .doit = net_dm_cmd_trace, | ||
213 | }, | ||
214 | }; | ||
215 | |||
216 | static int __init init_net_drop_monitor(void) | ||
217 | { | ||
218 | int cpu; | ||
219 | int rc, i, ret; | ||
220 | struct per_cpu_dm_data *data; | ||
221 | printk(KERN_INFO "Initalizing network drop monitor service\n"); | ||
222 | |||
223 | if (sizeof(void *) > 8) { | ||
224 | printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); | ||
225 | return -ENOSPC; | ||
226 | } | ||
227 | |||
228 | if (genl_register_family(&net_drop_monitor_family) < 0) { | ||
229 | printk(KERN_ERR "Could not create drop monitor netlink family\n"); | ||
230 | return -EFAULT; | ||
231 | } | ||
232 | |||
233 | rc = -EFAULT; | ||
234 | |||
235 | for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) { | ||
236 | ret = genl_register_ops(&net_drop_monitor_family, | ||
237 | &dropmon_ops[i]); | ||
238 | if (ret) { | ||
239 | printk(KERN_CRIT "failed to register operation %d\n", | ||
240 | dropmon_ops[i].cmd); | ||
241 | goto out_unreg; | ||
242 | } | ||
243 | } | ||
244 | |||
245 | rc = 0; | ||
246 | |||
247 | for_each_present_cpu(cpu) { | ||
248 | data = &per_cpu(dm_cpu_data, cpu); | ||
249 | reset_per_cpu_data(data); | ||
250 | INIT_WORK(&data->dm_alert_work, send_dm_alert); | ||
251 | init_timer(&data->send_timer); | ||
252 | data->send_timer.data = cpu; | ||
253 | data->send_timer.function = sched_send_work; | ||
254 | } | ||
255 | goto out; | ||
256 | |||
257 | out_unreg: | ||
258 | genl_unregister_family(&net_drop_monitor_family); | ||
259 | out: | ||
260 | return rc; | ||
261 | } | ||
262 | |||
263 | late_initcall(init_net_drop_monitor); | ||
diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 947710a36ced..244ca56dffac 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c | |||
@@ -209,34 +209,62 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) | |||
209 | return 0; | 209 | return 0; |
210 | } | 210 | } |
211 | 211 | ||
212 | static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr) | 212 | static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) |
213 | { | 213 | { |
214 | struct ethtool_rxnfc cmd; | 214 | struct ethtool_rxnfc cmd; |
215 | 215 | ||
216 | if (!dev->ethtool_ops->set_rxhash) | 216 | if (!dev->ethtool_ops->set_rxnfc) |
217 | return -EOPNOTSUPP; | 217 | return -EOPNOTSUPP; |
218 | 218 | ||
219 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) | 219 | if (copy_from_user(&cmd, useraddr, sizeof(cmd))) |
220 | return -EFAULT; | 220 | return -EFAULT; |
221 | 221 | ||
222 | return dev->ethtool_ops->set_rxhash(dev, &cmd); | 222 | return dev->ethtool_ops->set_rxnfc(dev, &cmd); |
223 | } | 223 | } |
224 | 224 | ||
225 | static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr) | 225 | static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) |
226 | { | 226 | { |
227 | struct ethtool_rxnfc info; | 227 | struct ethtool_rxnfc info; |
228 | const struct ethtool_ops *ops = dev->ethtool_ops; | ||
229 | int ret; | ||
230 | void *rule_buf = NULL; | ||
228 | 231 | ||
229 | if (!dev->ethtool_ops->get_rxhash) | 232 | if (!ops->get_rxnfc) |
230 | return -EOPNOTSUPP; | 233 | return -EOPNOTSUPP; |
231 | 234 | ||
232 | if (copy_from_user(&info, useraddr, sizeof(info))) | 235 | if (copy_from_user(&info, useraddr, sizeof(info))) |
233 | return -EFAULT; | 236 | return -EFAULT; |
234 | 237 | ||
235 | dev->ethtool_ops->get_rxhash(dev, &info); | 238 | if (info.cmd == ETHTOOL_GRXCLSRLALL) { |
239 | if (info.rule_cnt > 0) { | ||
240 | rule_buf = kmalloc(info.rule_cnt * sizeof(u32), | ||
241 | GFP_USER); | ||
242 | if (!rule_buf) | ||
243 | return -ENOMEM; | ||
244 | } | ||
245 | } | ||
236 | 246 | ||
247 | ret = ops->get_rxnfc(dev, &info, rule_buf); | ||
248 | if (ret < 0) | ||
249 | goto err_out; | ||
250 | |||
251 | ret = -EFAULT; | ||
237 | if (copy_to_user(useraddr, &info, sizeof(info))) | 252 | if (copy_to_user(useraddr, &info, sizeof(info))) |
238 | return -EFAULT; | 253 | goto err_out; |
239 | return 0; | 254 | |
255 | if (rule_buf) { | ||
256 | useraddr += offsetof(struct ethtool_rxnfc, rule_locs); | ||
257 | if (copy_to_user(useraddr, rule_buf, | ||
258 | info.rule_cnt * sizeof(u32))) | ||
259 | goto err_out; | ||
260 | } | ||
261 | ret = 0; | ||
262 | |||
263 | err_out: | ||
264 | if (rule_buf) | ||
265 | kfree(rule_buf); | ||
266 | |||
267 | return ret; | ||
240 | } | 268 | } |
241 | 269 | ||
242 | static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) | 270 | static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) |
@@ -901,6 +929,10 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
901 | case ETHTOOL_GFLAGS: | 929 | case ETHTOOL_GFLAGS: |
902 | case ETHTOOL_GPFLAGS: | 930 | case ETHTOOL_GPFLAGS: |
903 | case ETHTOOL_GRXFH: | 931 | case ETHTOOL_GRXFH: |
932 | case ETHTOOL_GRXRINGS: | ||
933 | case ETHTOOL_GRXCLSRLCNT: | ||
934 | case ETHTOOL_GRXCLSRULE: | ||
935 | case ETHTOOL_GRXCLSRLALL: | ||
904 | break; | 936 | break; |
905 | default: | 937 | default: |
906 | if (!capable(CAP_NET_ADMIN)) | 938 | if (!capable(CAP_NET_ADMIN)) |
@@ -1052,10 +1084,16 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) | |||
1052 | dev->ethtool_ops->set_priv_flags); | 1084 | dev->ethtool_ops->set_priv_flags); |
1053 | break; | 1085 | break; |
1054 | case ETHTOOL_GRXFH: | 1086 | case ETHTOOL_GRXFH: |
1055 | rc = ethtool_get_rxhash(dev, useraddr); | 1087 | case ETHTOOL_GRXRINGS: |
1088 | case ETHTOOL_GRXCLSRLCNT: | ||
1089 | case ETHTOOL_GRXCLSRULE: | ||
1090 | case ETHTOOL_GRXCLSRLALL: | ||
1091 | rc = ethtool_get_rxnfc(dev, useraddr); | ||
1056 | break; | 1092 | break; |
1057 | case ETHTOOL_SRXFH: | 1093 | case ETHTOOL_SRXFH: |
1058 | rc = ethtool_set_rxhash(dev, useraddr); | 1094 | case ETHTOOL_SRXCLSRLDEL: |
1095 | case ETHTOOL_SRXCLSRLINS: | ||
1096 | rc = ethtool_set_rxnfc(dev, useraddr); | ||
1059 | break; | 1097 | break; |
1060 | case ETHTOOL_GGRO: | 1098 | case ETHTOOL_GGRO: |
1061 | rc = ethtool_get_gro(dev, useraddr); | 1099 | rc = ethtool_get_gro(dev, useraddr); |
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 32b3a0152d7a..98691e1466b8 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c | |||
@@ -588,7 +588,8 @@ static void notify_rule_change(int event, struct fib_rule *rule, | |||
588 | goto errout; | 588 | goto errout; |
589 | } | 589 | } |
590 | 590 | ||
591 | err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); | 591 | rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); |
592 | return; | ||
592 | errout: | 593 | errout: |
593 | if (err < 0) | 594 | if (err < 0) |
594 | rtnl_set_sk_err(net, ops->nlgroup, err); | 595 | rtnl_set_sk_err(net, ops->nlgroup, err); |
diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 278a142d1047..a1cbce7fdae5 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c | |||
@@ -871,8 +871,7 @@ static void neigh_timer_handler(unsigned long arg) | |||
871 | write_unlock(&neigh->lock); | 871 | write_unlock(&neigh->lock); |
872 | neigh->ops->solicit(neigh, skb); | 872 | neigh->ops->solicit(neigh, skb); |
873 | atomic_inc(&neigh->probes); | 873 | atomic_inc(&neigh->probes); |
874 | if (skb) | 874 | kfree_skb(skb); |
875 | kfree_skb(skb); | ||
876 | } else { | 875 | } else { |
877 | out: | 876 | out: |
878 | write_unlock(&neigh->lock); | 877 | write_unlock(&neigh->lock); |
@@ -908,8 +907,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) | |||
908 | neigh->updated = jiffies; | 907 | neigh->updated = jiffies; |
909 | write_unlock_bh(&neigh->lock); | 908 | write_unlock_bh(&neigh->lock); |
910 | 909 | ||
911 | if (skb) | 910 | kfree_skb(skb); |
912 | kfree_skb(skb); | ||
913 | return 1; | 911 | return 1; |
914 | } | 912 | } |
915 | } else if (neigh->nud_state & NUD_STALE) { | 913 | } else if (neigh->nud_state & NUD_STALE) { |
@@ -1656,7 +1654,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
1656 | flags &= ~NEIGH_UPDATE_F_OVERRIDE; | 1654 | flags &= ~NEIGH_UPDATE_F_OVERRIDE; |
1657 | } | 1655 | } |
1658 | 1656 | ||
1659 | err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); | 1657 | if (ndm->ndm_flags & NTF_USE) { |
1658 | neigh_event_send(neigh, NULL); | ||
1659 | err = 0; | ||
1660 | } else | ||
1661 | err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); | ||
1660 | neigh_release(neigh); | 1662 | neigh_release(neigh); |
1661 | goto out_dev_put; | 1663 | goto out_dev_put; |
1662 | } | 1664 | } |
@@ -2534,7 +2536,8 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) | |||
2534 | kfree_skb(skb); | 2536 | kfree_skb(skb); |
2535 | goto errout; | 2537 | goto errout; |
2536 | } | 2538 | } |
2537 | err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); | 2539 | rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); |
2540 | return; | ||
2538 | errout: | 2541 | errout: |
2539 | if (err < 0) | 2542 | if (err < 0) |
2540 | rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); | 2543 | rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); |
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 6ac29a46e23e..2da59a0ac4ac 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c | |||
@@ -77,7 +77,9 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, | |||
77 | if (endp == buf) | 77 | if (endp == buf) |
78 | goto err; | 78 | goto err; |
79 | 79 | ||
80 | rtnl_lock(); | 80 | if (!rtnl_trylock()) |
81 | return -ERESTARTSYS; | ||
82 | |||
81 | if (dev_isalive(net)) { | 83 | if (dev_isalive(net)) { |
82 | if ((ret = (*set)(net, new)) == 0) | 84 | if ((ret = (*set)(net, new)) == 0) |
83 | ret = len; | 85 | ret = len; |
@@ -496,7 +498,7 @@ int netdev_register_kobject(struct net_device *net) | |||
496 | dev->groups = groups; | 498 | dev->groups = groups; |
497 | 499 | ||
498 | BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); | 500 | BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); |
499 | dev_set_name(dev, net->name); | 501 | dev_set_name(dev, "%s", net->name); |
500 | 502 | ||
501 | #ifdef CONFIG_SYSFS | 503 | #ifdef CONFIG_SYSFS |
502 | *groups++ = &netstat_group; | 504 | *groups++ = &netstat_group; |
diff --git a/net/core/net-traces.c b/net/core/net-traces.c new file mode 100644 index 000000000000..c8fb45665e4f --- /dev/null +++ b/net/core/net-traces.c | |||
@@ -0,0 +1,29 @@ | |||
1 | /* | ||
2 | * consolidates trace point definitions | ||
3 | * | ||
4 | * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com> | ||
5 | */ | ||
6 | |||
7 | #include <linux/netdevice.h> | ||
8 | #include <linux/etherdevice.h> | ||
9 | #include <linux/string.h> | ||
10 | #include <linux/if_arp.h> | ||
11 | #include <linux/inetdevice.h> | ||
12 | #include <linux/inet.h> | ||
13 | #include <linux/interrupt.h> | ||
14 | #include <linux/netpoll.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/delay.h> | ||
17 | #include <linux/rcupdate.h> | ||
18 | #include <linux/types.h> | ||
19 | #include <linux/workqueue.h> | ||
20 | #include <linux/netlink.h> | ||
21 | #include <linux/net_dropmon.h> | ||
22 | #include <trace/skb.h> | ||
23 | |||
24 | #include <asm/unaligned.h> | ||
25 | #include <asm/bitops.h> | ||
26 | |||
27 | |||
28 | DEFINE_TRACE(kfree_skb); | ||
29 | EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); | ||
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 55151faaf90c..e3bebd36f053 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c | |||
@@ -32,24 +32,14 @@ static __net_init int setup_net(struct net *net) | |||
32 | { | 32 | { |
33 | /* Must be called with net_mutex held */ | 33 | /* Must be called with net_mutex held */ |
34 | struct pernet_operations *ops; | 34 | struct pernet_operations *ops; |
35 | int error; | 35 | int error = 0; |
36 | struct net_generic *ng; | ||
37 | 36 | ||
38 | atomic_set(&net->count, 1); | 37 | atomic_set(&net->count, 1); |
38 | |||
39 | #ifdef NETNS_REFCNT_DEBUG | 39 | #ifdef NETNS_REFCNT_DEBUG |
40 | atomic_set(&net->use_count, 0); | 40 | atomic_set(&net->use_count, 0); |
41 | #endif | 41 | #endif |
42 | 42 | ||
43 | error = -ENOMEM; | ||
44 | ng = kzalloc(sizeof(struct net_generic) + | ||
45 | INITIAL_NET_GEN_PTRS * sizeof(void *), GFP_KERNEL); | ||
46 | if (ng == NULL) | ||
47 | goto out; | ||
48 | |||
49 | ng->len = INITIAL_NET_GEN_PTRS; | ||
50 | rcu_assign_pointer(net->gen, ng); | ||
51 | |||
52 | error = 0; | ||
53 | list_for_each_entry(ops, &pernet_list, list) { | 43 | list_for_each_entry(ops, &pernet_list, list) { |
54 | if (ops->init) { | 44 | if (ops->init) { |
55 | error = ops->init(net); | 45 | error = ops->init(net); |
@@ -70,24 +60,50 @@ out_undo: | |||
70 | } | 60 | } |
71 | 61 | ||
72 | rcu_barrier(); | 62 | rcu_barrier(); |
73 | kfree(ng); | ||
74 | goto out; | 63 | goto out; |
75 | } | 64 | } |
76 | 65 | ||
66 | static struct net_generic *net_alloc_generic(void) | ||
67 | { | ||
68 | struct net_generic *ng; | ||
69 | size_t generic_size = sizeof(struct net_generic) + | ||
70 | INITIAL_NET_GEN_PTRS * sizeof(void *); | ||
71 | |||
72 | ng = kzalloc(generic_size, GFP_KERNEL); | ||
73 | if (ng) | ||
74 | ng->len = INITIAL_NET_GEN_PTRS; | ||
75 | |||
76 | return ng; | ||
77 | } | ||
78 | |||
77 | #ifdef CONFIG_NET_NS | 79 | #ifdef CONFIG_NET_NS |
78 | static struct kmem_cache *net_cachep; | 80 | static struct kmem_cache *net_cachep; |
79 | static struct workqueue_struct *netns_wq; | 81 | static struct workqueue_struct *netns_wq; |
80 | 82 | ||
81 | static struct net *net_alloc(void) | 83 | static struct net *net_alloc(void) |
82 | { | 84 | { |
83 | return kmem_cache_zalloc(net_cachep, GFP_KERNEL); | 85 | struct net *net = NULL; |
86 | struct net_generic *ng; | ||
87 | |||
88 | ng = net_alloc_generic(); | ||
89 | if (!ng) | ||
90 | goto out; | ||
91 | |||
92 | net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); | ||
93 | if (!net) | ||
94 | goto out_free; | ||
95 | |||
96 | rcu_assign_pointer(net->gen, ng); | ||
97 | out: | ||
98 | return net; | ||
99 | |||
100 | out_free: | ||
101 | kfree(ng); | ||
102 | goto out; | ||
84 | } | 103 | } |
85 | 104 | ||
86 | static void net_free(struct net *net) | 105 | static void net_free(struct net *net) |
87 | { | 106 | { |
88 | if (!net) | ||
89 | return; | ||
90 | |||
91 | #ifdef NETNS_REFCNT_DEBUG | 107 | #ifdef NETNS_REFCNT_DEBUG |
92 | if (unlikely(atomic_read(&net->use_count) != 0)) { | 108 | if (unlikely(atomic_read(&net->use_count) != 0)) { |
93 | printk(KERN_EMERG "network namespace not free! Usage: %d\n", | 109 | printk(KERN_EMERG "network namespace not free! Usage: %d\n", |
@@ -112,27 +128,28 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) | |||
112 | err = -ENOMEM; | 128 | err = -ENOMEM; |
113 | new_net = net_alloc(); | 129 | new_net = net_alloc(); |
114 | if (!new_net) | 130 | if (!new_net) |
115 | goto out; | 131 | goto out_err; |
116 | 132 | ||
117 | mutex_lock(&net_mutex); | 133 | mutex_lock(&net_mutex); |
118 | err = setup_net(new_net); | 134 | err = setup_net(new_net); |
119 | if (err) | 135 | if (!err) { |
120 | goto out_unlock; | 136 | rtnl_lock(); |
121 | 137 | list_add_tail(&new_net->list, &net_namespace_list); | |
122 | rtnl_lock(); | 138 | rtnl_unlock(); |
123 | list_add_tail(&new_net->list, &net_namespace_list); | 139 | } |
124 | rtnl_unlock(); | ||
125 | |||
126 | |||
127 | out_unlock: | ||
128 | mutex_unlock(&net_mutex); | 140 | mutex_unlock(&net_mutex); |
141 | |||
142 | if (err) | ||
143 | goto out_free; | ||
129 | out: | 144 | out: |
130 | put_net(old_net); | 145 | put_net(old_net); |
131 | if (err) { | ||
132 | net_free(new_net); | ||
133 | new_net = ERR_PTR(err); | ||
134 | } | ||
135 | return new_net; | 146 | return new_net; |
147 | |||
148 | out_free: | ||
149 | net_free(new_net); | ||
150 | out_err: | ||
151 | new_net = ERR_PTR(err); | ||
152 | goto out; | ||
136 | } | 153 | } |
137 | 154 | ||
138 | static void cleanup_net(struct work_struct *work) | 155 | static void cleanup_net(struct work_struct *work) |
@@ -140,9 +157,6 @@ static void cleanup_net(struct work_struct *work) | |||
140 | struct pernet_operations *ops; | 157 | struct pernet_operations *ops; |
141 | struct net *net; | 158 | struct net *net; |
142 | 159 | ||
143 | /* Be very certain incoming network packets will not find us */ | ||
144 | rcu_barrier(); | ||
145 | |||
146 | net = container_of(work, struct net, work); | 160 | net = container_of(work, struct net, work); |
147 | 161 | ||
148 | mutex_lock(&net_mutex); | 162 | mutex_lock(&net_mutex); |
@@ -188,6 +202,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) | |||
188 | 202 | ||
189 | static int __init net_ns_init(void) | 203 | static int __init net_ns_init(void) |
190 | { | 204 | { |
205 | struct net_generic *ng; | ||
191 | int err; | 206 | int err; |
192 | 207 | ||
193 | printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); | 208 | printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); |
@@ -202,6 +217,12 @@ static int __init net_ns_init(void) | |||
202 | panic("Could not create netns workq"); | 217 | panic("Could not create netns workq"); |
203 | #endif | 218 | #endif |
204 | 219 | ||
220 | ng = net_alloc_generic(); | ||
221 | if (!ng) | ||
222 | panic("Could not allocate generic netns"); | ||
223 | |||
224 | rcu_assign_pointer(init_net.gen, ng); | ||
225 | |||
205 | mutex_lock(&net_mutex); | 226 | mutex_lock(&net_mutex); |
206 | err = setup_net(&init_net); | 227 | err = setup_net(&init_net); |
207 | 228 | ||
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 65498483325a..32d419f5ac98 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c | |||
@@ -3275,8 +3275,7 @@ static void pktgen_stop(struct pktgen_thread *t) | |||
3275 | 3275 | ||
3276 | list_for_each_entry(pkt_dev, &t->if_list, list) { | 3276 | list_for_each_entry(pkt_dev, &t->if_list, list) { |
3277 | pktgen_stop_device(pkt_dev); | 3277 | pktgen_stop_device(pkt_dev); |
3278 | if (pkt_dev->skb) | 3278 | kfree_skb(pkt_dev->skb); |
3279 | kfree_skb(pkt_dev->skb); | ||
3280 | 3279 | ||
3281 | pkt_dev->skb = NULL; | 3280 | pkt_dev->skb = NULL; |
3282 | } | 3281 | } |
@@ -3303,8 +3302,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) | |||
3303 | if (!cur->removal_mark) | 3302 | if (!cur->removal_mark) |
3304 | continue; | 3303 | continue; |
3305 | 3304 | ||
3306 | if (cur->skb) | 3305 | kfree_skb(cur->skb); |
3307 | kfree_skb(cur->skb); | ||
3308 | cur->skb = NULL; | 3306 | cur->skb = NULL; |
3309 | 3307 | ||
3310 | pktgen_remove_device(t, cur); | 3308 | pktgen_remove_device(t, cur); |
@@ -3328,8 +3326,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) | |||
3328 | list_for_each_safe(q, n, &t->if_list) { | 3326 | list_for_each_safe(q, n, &t->if_list) { |
3329 | cur = list_entry(q, struct pktgen_dev, list); | 3327 | cur = list_entry(q, struct pktgen_dev, list); |
3330 | 3328 | ||
3331 | if (cur->skb) | 3329 | kfree_skb(cur->skb); |
3332 | kfree_skb(cur->skb); | ||
3333 | cur->skb = NULL; | 3330 | cur->skb = NULL; |
3334 | 3331 | ||
3335 | pktgen_remove_device(t, cur); | 3332 | pktgen_remove_device(t, cur); |
@@ -3393,8 +3390,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) | |||
3393 | 3390 | ||
3394 | if (!netif_running(odev)) { | 3391 | if (!netif_running(odev)) { |
3395 | pktgen_stop_device(pkt_dev); | 3392 | pktgen_stop_device(pkt_dev); |
3396 | if (pkt_dev->skb) | 3393 | kfree_skb(pkt_dev->skb); |
3397 | kfree_skb(pkt_dev->skb); | ||
3398 | pkt_dev->skb = NULL; | 3394 | pkt_dev->skb = NULL; |
3399 | goto out; | 3395 | goto out; |
3400 | } | 3396 | } |
@@ -3415,8 +3411,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) | |||
3415 | if ((++pkt_dev->clone_count >= pkt_dev->clone_skb) | 3411 | if ((++pkt_dev->clone_count >= pkt_dev->clone_skb) |
3416 | || (!pkt_dev->skb)) { | 3412 | || (!pkt_dev->skb)) { |
3417 | /* build a new pkt */ | 3413 | /* build a new pkt */ |
3418 | if (pkt_dev->skb) | 3414 | kfree_skb(pkt_dev->skb); |
3419 | kfree_skb(pkt_dev->skb); | ||
3420 | 3415 | ||
3421 | pkt_dev->skb = fill_packet(odev, pkt_dev); | 3416 | pkt_dev->skb = fill_packet(odev, pkt_dev); |
3422 | if (pkt_dev->skb == NULL) { | 3417 | if (pkt_dev->skb == NULL) { |
@@ -3498,8 +3493,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) | |||
3498 | 3493 | ||
3499 | /* Done with this */ | 3494 | /* Done with this */ |
3500 | pktgen_stop_device(pkt_dev); | 3495 | pktgen_stop_device(pkt_dev); |
3501 | if (pkt_dev->skb) | 3496 | kfree_skb(pkt_dev->skb); |
3502 | kfree_skb(pkt_dev->skb); | ||
3503 | pkt_dev->skb = NULL; | 3497 | pkt_dev->skb = NULL; |
3504 | } | 3498 | } |
3505 | out:; | 3499 | out:; |
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 790dd205bb5d..d78030f88bd0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c | |||
@@ -455,8 +455,8 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) | |||
455 | return nlmsg_unicast(rtnl, skb, pid); | 455 | return nlmsg_unicast(rtnl, skb, pid); |
456 | } | 456 | } |
457 | 457 | ||
458 | int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, | 458 | void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, |
459 | struct nlmsghdr *nlh, gfp_t flags) | 459 | struct nlmsghdr *nlh, gfp_t flags) |
460 | { | 460 | { |
461 | struct sock *rtnl = net->rtnl; | 461 | struct sock *rtnl = net->rtnl; |
462 | int report = 0; | 462 | int report = 0; |
@@ -464,7 +464,7 @@ int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, | |||
464 | if (nlh) | 464 | if (nlh) |
465 | report = nlmsg_report(nlh); | 465 | report = nlmsg_report(nlh); |
466 | 466 | ||
467 | return nlmsg_notify(rtnl, skb, pid, group, report, flags); | 467 | nlmsg_notify(rtnl, skb, pid, group, report, flags); |
468 | } | 468 | } |
469 | 469 | ||
470 | void rtnl_set_sk_err(struct net *net, u32 group, int error) | 470 | void rtnl_set_sk_err(struct net *net, u32 group, int error) |
@@ -1246,7 +1246,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) | |||
1246 | kfree_skb(skb); | 1246 | kfree_skb(skb); |
1247 | goto errout; | 1247 | goto errout; |
1248 | } | 1248 | } |
1249 | err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); | 1249 | rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); |
1250 | return; | ||
1250 | errout: | 1251 | errout: |
1251 | if (err < 0) | 1252 | if (err < 0) |
1252 | rtnl_set_sk_err(net, RTNLGRP_LINK, err); | 1253 | rtnl_set_sk_err(net, RTNLGRP_LINK, err); |
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e5a8351ff12d..6acbf9e79eb1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c | |||
@@ -65,6 +65,7 @@ | |||
65 | 65 | ||
66 | #include <asm/uaccess.h> | 66 | #include <asm/uaccess.h> |
67 | #include <asm/system.h> | 67 | #include <asm/system.h> |
68 | #include <trace/skb.h> | ||
68 | 69 | ||
69 | #include "kmap_skb.h" | 70 | #include "kmap_skb.h" |
70 | 71 | ||
@@ -146,14 +147,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) | |||
146 | } | 147 | } |
147 | EXPORT_SYMBOL(skb_under_panic); | 148 | EXPORT_SYMBOL(skb_under_panic); |
148 | 149 | ||
149 | void skb_truesize_bug(struct sk_buff *skb) | ||
150 | { | ||
151 | WARN(net_ratelimit(), KERN_ERR "SKB BUG: Invalid truesize (%u) " | ||
152 | "len=%u, sizeof(sk_buff)=%Zd\n", | ||
153 | skb->truesize, skb->len, sizeof(struct sk_buff)); | ||
154 | } | ||
155 | EXPORT_SYMBOL(skb_truesize_bug); | ||
156 | |||
157 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few | 150 | /* Allocate a new skbuff. We do this ourselves so we can fill in a few |
158 | * 'private' fields and also do memory statistics to find all the | 151 | * 'private' fields and also do memory statistics to find all the |
159 | * [BEEP] leaks. | 152 | * [BEEP] leaks. |
@@ -450,11 +443,32 @@ void kfree_skb(struct sk_buff *skb) | |||
450 | smp_rmb(); | 443 | smp_rmb(); |
451 | else if (likely(!atomic_dec_and_test(&skb->users))) | 444 | else if (likely(!atomic_dec_and_test(&skb->users))) |
452 | return; | 445 | return; |
446 | trace_kfree_skb(skb, __builtin_return_address(0)); | ||
453 | __kfree_skb(skb); | 447 | __kfree_skb(skb); |
454 | } | 448 | } |
455 | EXPORT_SYMBOL(kfree_skb); | 449 | EXPORT_SYMBOL(kfree_skb); |
456 | 450 | ||
457 | /** | 451 | /** |
452 | * consume_skb - free an skbuff | ||
453 | * @skb: buffer to free | ||
454 | * | ||
455 | * Drop a ref to the buffer and free it if the usage count has hit zero | ||
456 | * Functions identically to kfree_skb, but kfree_skb assumes that the frame | ||
457 | * is being dropped after a failure and notes that | ||
458 | */ | ||
459 | void consume_skb(struct sk_buff *skb) | ||
460 | { | ||
461 | if (unlikely(!skb)) | ||
462 | return; | ||
463 | if (likely(atomic_read(&skb->users) == 1)) | ||
464 | smp_rmb(); | ||
465 | else if (likely(!atomic_dec_and_test(&skb->users))) | ||
466 | return; | ||
467 | __kfree_skb(skb); | ||
468 | } | ||
469 | EXPORT_SYMBOL(consume_skb); | ||
470 | |||
471 | /** | ||
458 | * skb_recycle_check - check if skb can be reused for receive | 472 | * skb_recycle_check - check if skb can be reused for receive |
459 | * @skb: buffer | 473 | * @skb: buffer |
460 | * @skb_size: minimum receive buffer size | 474 | * @skb_size: minimum receive buffer size |
@@ -1216,8 +1230,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) | |||
1216 | insp = list; | 1230 | insp = list; |
1217 | } | 1231 | } |
1218 | if (!pskb_pull(list, eat)) { | 1232 | if (!pskb_pull(list, eat)) { |
1219 | if (clone) | 1233 | kfree_skb(clone); |
1220 | kfree_skb(clone); | ||
1221 | return NULL; | 1234 | return NULL; |
1222 | } | 1235 | } |
1223 | break; | 1236 | break; |
diff --git a/net/core/sock.c b/net/core/sock.c index 40887e76652c..0620046e4eba 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -150,7 +150,7 @@ static const char *af_family_key_strings[AF_MAX+1] = { | |||
150 | "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , | 150 | "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , |
151 | "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , | 151 | "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , |
152 | "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , | 152 | "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , |
153 | "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , | 153 | "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , |
154 | "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , | 154 | "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , |
155 | "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , | 155 | "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , |
156 | "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , | 156 | "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , |
@@ -165,7 +165,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { | |||
165 | "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , | 165 | "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , |
166 | "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , | 166 | "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , |
167 | "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , | 167 | "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , |
168 | "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" , | 168 | "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" , |
169 | "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , | 169 | "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , |
170 | "slock-27" , "slock-28" , "slock-AF_CAN" , | 170 | "slock-27" , "slock-28" , "slock-AF_CAN" , |
171 | "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , | 171 | "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , |
@@ -180,7 +180,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { | |||
180 | "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , | 180 | "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , |
181 | "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , | 181 | "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , |
182 | "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , | 182 | "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , |
183 | "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" , | 183 | "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" , |
184 | "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , | 184 | "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , |
185 | "clock-27" , "clock-28" , "clock-AF_CAN" , | 185 | "clock-27" , "clock-28" , "clock-AF_CAN" , |
186 | "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , | 186 | "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , |
@@ -725,7 +725,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname, | |||
725 | if (len < 0) | 725 | if (len < 0) |
726 | return -EINVAL; | 726 | return -EINVAL; |
727 | 727 | ||
728 | v.val = 0; | 728 | memset(&v, 0, sizeof(v)); |
729 | 729 | ||
730 | switch(optname) { | 730 | switch(optname) { |
731 | case SO_DEBUG: | 731 | case SO_DEBUG: |
@@ -1185,7 +1185,6 @@ void sock_rfree(struct sk_buff *skb) | |||
1185 | { | 1185 | { |
1186 | struct sock *sk = skb->sk; | 1186 | struct sock *sk = skb->sk; |
1187 | 1187 | ||
1188 | skb_truesize_check(skb); | ||
1189 | atomic_sub(skb->truesize, &sk->sk_rmem_alloc); | 1188 | atomic_sub(skb->truesize, &sk->sk_rmem_alloc); |
1190 | sk_mem_uncharge(skb->sk, skb->truesize); | 1189 | sk_mem_uncharge(skb->sk, skb->truesize); |
1191 | } | 1190 | } |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 83d3398559ea..7db1de0497c6 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/socket.h> | 11 | #include <linux/socket.h> |
12 | #include <linux/netdevice.h> | 12 | #include <linux/netdevice.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <net/ip.h> | ||
14 | #include <net/sock.h> | 15 | #include <net/sock.h> |
15 | 16 | ||
16 | static struct ctl_table net_core_table[] = { | 17 | static struct ctl_table net_core_table[] = { |
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 45f95e55f873..7ea557b7c6b1 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h | |||
@@ -20,6 +20,9 @@ | |||
20 | /* We can spread an ack vector across multiple options */ | 20 | /* We can spread an ack vector across multiple options */ |
21 | #define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2) | 21 | #define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2) |
22 | 22 | ||
23 | /* Estimated minimum average Ack Vector length - used for updating MPS */ | ||
24 | #define DCCPAV_MIN_OPTLEN 16 | ||
25 | |||
23 | #define DCCP_ACKVEC_STATE_RECEIVED 0 | 26 | #define DCCP_ACKVEC_STATE_RECEIVED 0 |
24 | #define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) | 27 | #define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) |
25 | #define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) | 28 | #define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) |
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 08a569ff02d1..d6bc47363b1c 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h | |||
@@ -63,11 +63,14 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); | |||
63 | * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields | 63 | * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields |
64 | * Hence a safe upper bound for the maximum option length is 1020-28 = 992 | 64 | * Hence a safe upper bound for the maximum option length is 1020-28 = 992 |
65 | */ | 65 | */ |
66 | #define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int)) | 66 | #define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(uint32_t)) |
67 | #define DCCP_MAX_PACKET_HDR 28 | 67 | #define DCCP_MAX_PACKET_HDR 28 |
68 | #define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR) | 68 | #define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR) |
69 | #define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER) | 69 | #define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER) |
70 | 70 | ||
71 | /* Upper bound for initial feature-negotiation overhead (padded to 32 bits) */ | ||
72 | #define DCCP_FEATNEG_OVERHEAD (32 * sizeof(uint32_t)) | ||
73 | |||
71 | #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT | 74 | #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT |
72 | * state, about 60 seconds */ | 75 | * state, about 60 seconds */ |
73 | 76 | ||
diff --git a/net/dccp/output.c b/net/dccp/output.c index 22a618af4893..36bcc00654d3 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c | |||
@@ -161,21 +161,27 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) | |||
161 | struct inet_connection_sock *icsk = inet_csk(sk); | 161 | struct inet_connection_sock *icsk = inet_csk(sk); |
162 | struct dccp_sock *dp = dccp_sk(sk); | 162 | struct dccp_sock *dp = dccp_sk(sk); |
163 | u32 ccmps = dccp_determine_ccmps(dp); | 163 | u32 ccmps = dccp_determine_ccmps(dp); |
164 | int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; | 164 | u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; |
165 | 165 | ||
166 | /* Account for header lengths and IPv4/v6 option overhead */ | 166 | /* Account for header lengths and IPv4/v6 option overhead */ |
167 | cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + | 167 | cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + |
168 | sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); | 168 | sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); |
169 | 169 | ||
170 | /* | 170 | /* |
171 | * FIXME: this should come from the CCID infrastructure, where, say, | 171 | * Leave enough headroom for common DCCP header options. |
172 | * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets | 172 | * This only considers options which may appear on DCCP-Data packets, as |
173 | * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED | 173 | * per table 3 in RFC 4340, 5.8. When running out of space for other |
174 | * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to | 174 | * options (eg. Ack Vector which can take up to 255 bytes), it is better |
175 | * make it a multiple of 4 | 175 | * to schedule a separate Ack. Thus we leave headroom for the following: |
176 | * - 1 byte for Slow Receiver (11.6) | ||
177 | * - 6 bytes for Timestamp (13.1) | ||
178 | * - 10 bytes for Timestamp Echo (13.3) | ||
179 | * - 8 bytes for NDP count (7.7, when activated) | ||
180 | * - 6 bytes for Data Checksum (9.3) | ||
181 | * - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled) | ||
176 | */ | 182 | */ |
177 | 183 | cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 + | |
178 | cur_mps -= roundup(5 + 6 + 10 + 6 + 6 + 6, 4); | 184 | (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4); |
179 | 185 | ||
180 | /* And store cached results */ | 186 | /* And store cached results */ |
181 | icsk->icsk_pmtu_cookie = pmtu; | 187 | icsk->icsk_pmtu_cookie = pmtu; |
@@ -270,7 +276,20 @@ void dccp_write_xmit(struct sock *sk, int block) | |||
270 | const int len = skb->len; | 276 | const int len = skb->len; |
271 | 277 | ||
272 | if (sk->sk_state == DCCP_PARTOPEN) { | 278 | if (sk->sk_state == DCCP_PARTOPEN) { |
273 | /* See 8.1.5. Handshake Completion */ | 279 | const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; |
280 | /* | ||
281 | * See 8.1.5 - Handshake Completion. | ||
282 | * | ||
283 | * For robustness we resend Confirm options until the client has | ||
284 | * entered OPEN. During the initial feature negotiation, the MPS | ||
285 | * is smaller than usual, reduced by the Change/Confirm options. | ||
286 | */ | ||
287 | if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { | ||
288 | DCCP_WARN("Payload too large (%d) for featneg.\n", len); | ||
289 | dccp_send_ack(sk); | ||
290 | dccp_feat_list_purge(&dp->dccps_featneg); | ||
291 | } | ||
292 | |||
274 | inet_csk_schedule_ack(sk); | 293 | inet_csk_schedule_ack(sk); |
275 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, | 294 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, |
276 | inet_csk(sk)->icsk_rto, | 295 | inet_csk(sk)->icsk_rto, |
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 12bf7d4c16c6..9647d911f916 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c | |||
@@ -1246,11 +1246,12 @@ static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | |||
1246 | 1246 | ||
1247 | case TIOCINQ: | 1247 | case TIOCINQ: |
1248 | lock_sock(sk); | 1248 | lock_sock(sk); |
1249 | if ((skb = skb_peek(&scp->other_receive_queue)) != NULL) { | 1249 | skb = skb_peek(&scp->other_receive_queue); |
1250 | if (skb) { | ||
1250 | amount = skb->len; | 1251 | amount = skb->len; |
1251 | } else { | 1252 | } else { |
1252 | struct sk_buff *skb = sk->sk_receive_queue.next; | 1253 | skb = sk->sk_receive_queue.next; |
1253 | for(;;) { | 1254 | for (;;) { |
1254 | if (skb == | 1255 | if (skb == |
1255 | (struct sk_buff *)&sk->sk_receive_queue) | 1256 | (struct sk_buff *)&sk->sk_receive_queue) |
1256 | break; | 1257 | break; |
@@ -1579,16 +1580,16 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us | |||
1579 | default: | 1580 | default: |
1580 | #ifdef CONFIG_NETFILTER | 1581 | #ifdef CONFIG_NETFILTER |
1581 | { | 1582 | { |
1582 | int val, len; | 1583 | int ret, len; |
1583 | 1584 | ||
1584 | if(get_user(len, optlen)) | 1585 | if(get_user(len, optlen)) |
1585 | return -EFAULT; | 1586 | return -EFAULT; |
1586 | 1587 | ||
1587 | val = nf_getsockopt(sk, PF_DECnet, optname, | 1588 | ret = nf_getsockopt(sk, PF_DECnet, optname, |
1588 | optval, &len); | 1589 | optval, &len); |
1589 | if (val >= 0) | 1590 | if (ret >= 0) |
1590 | val = put_user(len, optlen); | 1591 | ret = put_user(len, optlen); |
1591 | return val; | 1592 | return ret; |
1592 | } | 1593 | } |
1593 | #endif | 1594 | #endif |
1594 | case DSO_STREAM: | 1595 | case DSO_STREAM: |
@@ -2071,8 +2072,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
2071 | } | 2072 | } |
2072 | out: | 2073 | out: |
2073 | 2074 | ||
2074 | if (skb) | 2075 | kfree_skb(skb); |
2075 | kfree_skb(skb); | ||
2076 | 2076 | ||
2077 | release_sock(sk); | 2077 | release_sock(sk); |
2078 | 2078 | ||
@@ -2112,9 +2112,8 @@ static struct notifier_block dn_dev_notifier = { | |||
2112 | 2112 | ||
2113 | extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); | 2113 | extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); |
2114 | 2114 | ||
2115 | static struct packet_type dn_dix_packet_type = { | 2115 | static struct packet_type dn_dix_packet_type __read_mostly = { |
2116 | .type = cpu_to_be16(ETH_P_DNA_RT), | 2116 | .type = cpu_to_be16(ETH_P_DNA_RT), |
2117 | .dev = NULL, /* All devices */ | ||
2118 | .func = dn_route_rcv, | 2117 | .func = dn_route_rcv, |
2119 | }; | 2118 | }; |
2120 | 2119 | ||
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index daf2b98b15fe..1c6a5bb6f0c8 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c | |||
@@ -684,7 +684,6 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) | |||
684 | return -ENODEV; | 684 | return -ENODEV; |
685 | 685 | ||
686 | if ((dn_db = dev->dn_ptr) == NULL) { | 686 | if ((dn_db = dev->dn_ptr) == NULL) { |
687 | int err; | ||
688 | dn_db = dn_dev_create(dev, &err); | 687 | dn_db = dn_dev_create(dev, &err); |
689 | if (!dn_db) | 688 | if (!dn_db) |
690 | return err; | 689 | return err; |
@@ -769,7 +768,8 @@ static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa) | |||
769 | kfree_skb(skb); | 768 | kfree_skb(skb); |
770 | goto errout; | 769 | goto errout; |
771 | } | 770 | } |
772 | err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); | 771 | rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); |
772 | return; | ||
773 | errout: | 773 | errout: |
774 | if (err < 0) | 774 | if (err < 0) |
775 | rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err); | 775 | rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err); |
@@ -1322,6 +1322,7 @@ static inline int is_dn_dev(struct net_device *dev) | |||
1322 | } | 1322 | } |
1323 | 1323 | ||
1324 | static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) | 1324 | static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) |
1325 | __acquires(&dev_base_lock) | ||
1325 | { | 1326 | { |
1326 | int i; | 1327 | int i; |
1327 | struct net_device *dev; | 1328 | struct net_device *dev; |
@@ -1364,6 +1365,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
1364 | } | 1365 | } |
1365 | 1366 | ||
1366 | static void dn_dev_seq_stop(struct seq_file *seq, void *v) | 1367 | static void dn_dev_seq_stop(struct seq_file *seq, void *v) |
1368 | __releases(&dev_base_lock) | ||
1367 | { | 1369 | { |
1368 | read_unlock(&dev_base_lock); | 1370 | read_unlock(&dev_base_lock); |
1369 | } | 1371 | } |
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5130dee0b384..0cc4394117df 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c | |||
@@ -380,7 +380,6 @@ static int dn_return_short(struct sk_buff *skb) | |||
380 | unsigned char *ptr; | 380 | unsigned char *ptr; |
381 | __le16 *src; | 381 | __le16 *src; |
382 | __le16 *dst; | 382 | __le16 *dst; |
383 | __le16 tmp; | ||
384 | 383 | ||
385 | /* Add back headers */ | 384 | /* Add back headers */ |
386 | skb_push(skb, skb->data - skb_network_header(skb)); | 385 | skb_push(skb, skb->data - skb_network_header(skb)); |
@@ -399,10 +398,7 @@ static int dn_return_short(struct sk_buff *skb) | |||
399 | ptr += 2; | 398 | ptr += 2; |
400 | *ptr = 0; /* Zero hop count */ | 399 | *ptr = 0; /* Zero hop count */ |
401 | 400 | ||
402 | /* Swap source and destination */ | 401 | swap(*src, *dst); |
403 | tmp = *src; | ||
404 | *src = *dst; | ||
405 | *dst = tmp; | ||
406 | 402 | ||
407 | skb->pkt_type = PACKET_OUTGOING; | 403 | skb->pkt_type = PACKET_OUTGOING; |
408 | dn_rt_finish_output(skb, NULL, NULL); | 404 | dn_rt_finish_output(skb, NULL, NULL); |
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 69ad9280c693..67054b0d550f 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c | |||
@@ -375,7 +375,8 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, | |||
375 | kfree_skb(skb); | 375 | kfree_skb(skb); |
376 | goto errout; | 376 | goto errout; |
377 | } | 377 | } |
378 | err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); | 378 | rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); |
379 | return; | ||
379 | errout: | 380 | errout: |
380 | if (err < 0) | 381 | if (err < 0) |
381 | rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err); | 382 | rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err); |
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 965397af9a80..5bcd592ae6dd 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c | |||
@@ -179,7 +179,7 @@ static int dn_node_address_handler(ctl_table *table, int write, | |||
179 | } | 179 | } |
180 | 180 | ||
181 | if (write) { | 181 | if (write) { |
182 | int len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1); | 182 | len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1); |
183 | 183 | ||
184 | if (copy_from_user(addr, buffer, len)) | 184 | if (copy_from_user(addr, buffer, len)) |
185 | return -EFAULT; | 185 | return -EFAULT; |
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 49211b35725b..c51b55400dc5 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig | |||
@@ -41,13 +41,13 @@ config NET_DSA_MV88E6XXX_NEED_PPU | |||
41 | default n | 41 | default n |
42 | 42 | ||
43 | config NET_DSA_MV88E6131 | 43 | config NET_DSA_MV88E6131 |
44 | bool "Marvell 88E6131 ethernet switch chip support" | 44 | bool "Marvell 88E6095/6095F/6131 ethernet switch chip support" |
45 | select NET_DSA_MV88E6XXX | 45 | select NET_DSA_MV88E6XXX |
46 | select NET_DSA_MV88E6XXX_NEED_PPU | 46 | select NET_DSA_MV88E6XXX_NEED_PPU |
47 | select NET_DSA_TAG_DSA | 47 | select NET_DSA_TAG_DSA |
48 | ---help--- | 48 | ---help--- |
49 | This enables support for the Marvell 88E6131 ethernet switch | 49 | This enables support for the Marvell 88E6095/6095F/6131 |
50 | chip. | 50 | ethernet switch chips. |
51 | 51 | ||
52 | config NET_DSA_MV88E6123_61_65 | 52 | config NET_DSA_MV88E6123_61_65 |
53 | bool "Marvell 88E6123/6161/6165 ethernet switch chip support" | 53 | bool "Marvell 88E6123/6161/6165 ethernet switch chip support" |
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 33e99462023a..71489f69a42c 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * net/dsa/dsa.c - Hardware switch handling | 2 | * net/dsa/dsa.c - Hardware switch handling |
3 | * Copyright (c) 2008 Marvell Semiconductor | 3 | * Copyright (c) 2008-2009 Marvell Semiconductor |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
@@ -67,12 +67,13 @@ dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name) | |||
67 | 67 | ||
68 | /* basic switch operations **************************************************/ | 68 | /* basic switch operations **************************************************/ |
69 | static struct dsa_switch * | 69 | static struct dsa_switch * |
70 | dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, | 70 | dsa_switch_setup(struct dsa_switch_tree *dst, int index, |
71 | struct mii_bus *bus, struct net_device *dev) | 71 | struct device *parent, struct mii_bus *bus) |
72 | { | 72 | { |
73 | struct dsa_chip_data *pd = dst->pd->chip + index; | ||
74 | struct dsa_switch_driver *drv; | ||
73 | struct dsa_switch *ds; | 75 | struct dsa_switch *ds; |
74 | int ret; | 76 | int ret; |
75 | struct dsa_switch_driver *drv; | ||
76 | char *name; | 77 | char *name; |
77 | int i; | 78 | int i; |
78 | 79 | ||
@@ -81,11 +82,12 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, | |||
81 | */ | 82 | */ |
82 | drv = dsa_switch_probe(bus, pd->sw_addr, &name); | 83 | drv = dsa_switch_probe(bus, pd->sw_addr, &name); |
83 | if (drv == NULL) { | 84 | if (drv == NULL) { |
84 | printk(KERN_ERR "%s: could not detect attached switch\n", | 85 | printk(KERN_ERR "%s[%d]: could not detect attached switch\n", |
85 | dev->name); | 86 | dst->master_netdev->name, index); |
86 | return ERR_PTR(-EINVAL); | 87 | return ERR_PTR(-EINVAL); |
87 | } | 88 | } |
88 | printk(KERN_INFO "%s: detected a %s switch\n", dev->name, name); | 89 | printk(KERN_INFO "%s[%d]: detected a %s switch\n", |
90 | dst->master_netdev->name, index, name); | ||
89 | 91 | ||
90 | 92 | ||
91 | /* | 93 | /* |
@@ -95,18 +97,16 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, | |||
95 | if (ds == NULL) | 97 | if (ds == NULL) |
96 | return ERR_PTR(-ENOMEM); | 98 | return ERR_PTR(-ENOMEM); |
97 | 99 | ||
98 | ds->pd = pd; | 100 | ds->dst = dst; |
99 | ds->master_netdev = dev; | 101 | ds->index = index; |
100 | ds->master_mii_bus = bus; | 102 | ds->pd = dst->pd->chip + index; |
101 | |||
102 | ds->drv = drv; | 103 | ds->drv = drv; |
103 | ds->tag_protocol = drv->tag_protocol; | 104 | ds->master_mii_bus = bus; |
104 | 105 | ||
105 | 106 | ||
106 | /* | 107 | /* |
107 | * Validate supplied switch configuration. | 108 | * Validate supplied switch configuration. |
108 | */ | 109 | */ |
109 | ds->cpu_port = -1; | ||
110 | for (i = 0; i < DSA_MAX_PORTS; i++) { | 110 | for (i = 0; i < DSA_MAX_PORTS; i++) { |
111 | char *name; | 111 | char *name; |
112 | 112 | ||
@@ -115,32 +115,28 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, | |||
115 | continue; | 115 | continue; |
116 | 116 | ||
117 | if (!strcmp(name, "cpu")) { | 117 | if (!strcmp(name, "cpu")) { |
118 | if (ds->cpu_port != -1) { | 118 | if (dst->cpu_switch != -1) { |
119 | printk(KERN_ERR "multiple cpu ports?!\n"); | 119 | printk(KERN_ERR "multiple cpu ports?!\n"); |
120 | ret = -EINVAL; | 120 | ret = -EINVAL; |
121 | goto out; | 121 | goto out; |
122 | } | 122 | } |
123 | ds->cpu_port = i; | 123 | dst->cpu_switch = index; |
124 | dst->cpu_port = i; | ||
125 | } else if (!strcmp(name, "dsa")) { | ||
126 | ds->dsa_port_mask |= 1 << i; | ||
124 | } else { | 127 | } else { |
125 | ds->valid_port_mask |= 1 << i; | 128 | ds->phys_port_mask |= 1 << i; |
126 | } | 129 | } |
127 | } | 130 | } |
128 | 131 | ||
129 | if (ds->cpu_port == -1) { | ||
130 | printk(KERN_ERR "no cpu port?!\n"); | ||
131 | ret = -EINVAL; | ||
132 | goto out; | ||
133 | } | ||
134 | |||
135 | 132 | ||
136 | /* | 133 | /* |
137 | * If we use a tagging format that doesn't have an ethertype | 134 | * If the CPU connects to this switch, set the switch tree |
138 | * field, make sure that all packets from this point on get | 135 | * tagging protocol to the preferred tagging format of this |
139 | * sent to the tag format's receive function. (Which will | 136 | * switch. |
140 | * discard received packets until we set ds->ports[] below.) | ||
141 | */ | 137 | */ |
142 | wmb(); | 138 | if (ds->dst->cpu_switch == index) |
143 | dev->dsa_ptr = (void *)ds; | 139 | ds->dst->tag_protocol = drv->tag_protocol; |
144 | 140 | ||
145 | 141 | ||
146 | /* | 142 | /* |
@@ -150,7 +146,7 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, | |||
150 | if (ret < 0) | 146 | if (ret < 0) |
151 | goto out; | 147 | goto out; |
152 | 148 | ||
153 | ret = drv->set_addr(ds, dev->dev_addr); | 149 | ret = drv->set_addr(ds, dst->master_netdev->dev_addr); |
154 | if (ret < 0) | 150 | if (ret < 0) |
155 | goto out; | 151 | goto out; |
156 | 152 | ||
@@ -169,18 +165,18 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, | |||
169 | /* | 165 | /* |
170 | * Create network devices for physical switch ports. | 166 | * Create network devices for physical switch ports. |
171 | */ | 167 | */ |
172 | wmb(); | ||
173 | for (i = 0; i < DSA_MAX_PORTS; i++) { | 168 | for (i = 0; i < DSA_MAX_PORTS; i++) { |
174 | struct net_device *slave_dev; | 169 | struct net_device *slave_dev; |
175 | 170 | ||
176 | if (!(ds->valid_port_mask & (1 << i))) | 171 | if (!(ds->phys_port_mask & (1 << i))) |
177 | continue; | 172 | continue; |
178 | 173 | ||
179 | slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]); | 174 | slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]); |
180 | if (slave_dev == NULL) { | 175 | if (slave_dev == NULL) { |
181 | printk(KERN_ERR "%s: can't create dsa slave " | 176 | printk(KERN_ERR "%s[%d]: can't create dsa " |
182 | "device for port %d(%s)\n", | 177 | "slave device for port %d(%s)\n", |
183 | dev->name, i, pd->port_names[i]); | 178 | dst->master_netdev->name, |
179 | index, i, pd->port_names[i]); | ||
184 | continue; | 180 | continue; |
185 | } | 181 | } |
186 | 182 | ||
@@ -192,7 +188,6 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, | |||
192 | out_free: | 188 | out_free: |
193 | mdiobus_free(ds->slave_mii_bus); | 189 | mdiobus_free(ds->slave_mii_bus); |
194 | out: | 190 | out: |
195 | dev->dsa_ptr = NULL; | ||
196 | kfree(ds); | 191 | kfree(ds); |
197 | return ERR_PTR(ret); | 192 | return ERR_PTR(ret); |
198 | } | 193 | } |
@@ -212,35 +207,42 @@ static void dsa_switch_destroy(struct dsa_switch *ds) | |||
212 | */ | 207 | */ |
213 | bool dsa_uses_dsa_tags(void *dsa_ptr) | 208 | bool dsa_uses_dsa_tags(void *dsa_ptr) |
214 | { | 209 | { |
215 | struct dsa_switch *ds = dsa_ptr; | 210 | struct dsa_switch_tree *dst = dsa_ptr; |
216 | 211 | ||
217 | return !!(ds->tag_protocol == htons(ETH_P_DSA)); | 212 | return !!(dst->tag_protocol == htons(ETH_P_DSA)); |
218 | } | 213 | } |
219 | 214 | ||
220 | bool dsa_uses_trailer_tags(void *dsa_ptr) | 215 | bool dsa_uses_trailer_tags(void *dsa_ptr) |
221 | { | 216 | { |
222 | struct dsa_switch *ds = dsa_ptr; | 217 | struct dsa_switch_tree *dst = dsa_ptr; |
223 | 218 | ||
224 | return !!(ds->tag_protocol == htons(ETH_P_TRAILER)); | 219 | return !!(dst->tag_protocol == htons(ETH_P_TRAILER)); |
225 | } | 220 | } |
226 | 221 | ||
227 | 222 | ||
228 | /* link polling *************************************************************/ | 223 | /* link polling *************************************************************/ |
229 | static void dsa_link_poll_work(struct work_struct *ugly) | 224 | static void dsa_link_poll_work(struct work_struct *ugly) |
230 | { | 225 | { |
231 | struct dsa_switch *ds; | 226 | struct dsa_switch_tree *dst; |
227 | int i; | ||
228 | |||
229 | dst = container_of(ugly, struct dsa_switch_tree, link_poll_work); | ||
232 | 230 | ||
233 | ds = container_of(ugly, struct dsa_switch, link_poll_work); | 231 | for (i = 0; i < dst->pd->nr_chips; i++) { |
232 | struct dsa_switch *ds = dst->ds[i]; | ||
234 | 233 | ||
235 | ds->drv->poll_link(ds); | 234 | if (ds != NULL && ds->drv->poll_link != NULL) |
236 | mod_timer(&ds->link_poll_timer, round_jiffies(jiffies + HZ)); | 235 | ds->drv->poll_link(ds); |
236 | } | ||
237 | |||
238 | mod_timer(&dst->link_poll_timer, round_jiffies(jiffies + HZ)); | ||
237 | } | 239 | } |
238 | 240 | ||
239 | static void dsa_link_poll_timer(unsigned long _ds) | 241 | static void dsa_link_poll_timer(unsigned long _dst) |
240 | { | 242 | { |
241 | struct dsa_switch *ds = (void *)_ds; | 243 | struct dsa_switch_tree *dst = (void *)_dst; |
242 | 244 | ||
243 | schedule_work(&ds->link_poll_work); | 245 | schedule_work(&dst->link_poll_work); |
244 | } | 246 | } |
245 | 247 | ||
246 | 248 | ||
@@ -303,18 +305,14 @@ static int dsa_probe(struct platform_device *pdev) | |||
303 | static int dsa_version_printed; | 305 | static int dsa_version_printed; |
304 | struct dsa_platform_data *pd = pdev->dev.platform_data; | 306 | struct dsa_platform_data *pd = pdev->dev.platform_data; |
305 | struct net_device *dev; | 307 | struct net_device *dev; |
306 | struct mii_bus *bus; | 308 | struct dsa_switch_tree *dst; |
307 | struct dsa_switch *ds; | 309 | int i; |
308 | 310 | ||
309 | if (!dsa_version_printed++) | 311 | if (!dsa_version_printed++) |
310 | printk(KERN_NOTICE "Distributed Switch Architecture " | 312 | printk(KERN_NOTICE "Distributed Switch Architecture " |
311 | "driver version %s\n", dsa_driver_version); | 313 | "driver version %s\n", dsa_driver_version); |
312 | 314 | ||
313 | if (pd == NULL || pd->mii_bus == NULL || pd->netdev == NULL) | 315 | if (pd == NULL || pd->netdev == NULL) |
314 | return -EINVAL; | ||
315 | |||
316 | bus = dev_to_mii_bus(pd->mii_bus); | ||
317 | if (bus == NULL) | ||
318 | return -EINVAL; | 316 | return -EINVAL; |
319 | 317 | ||
320 | dev = dev_to_net_device(pd->netdev); | 318 | dev = dev_to_net_device(pd->netdev); |
@@ -326,36 +324,79 @@ static int dsa_probe(struct platform_device *pdev) | |||
326 | return -EEXIST; | 324 | return -EEXIST; |
327 | } | 325 | } |
328 | 326 | ||
329 | ds = dsa_switch_setup(&pdev->dev, pd, bus, dev); | 327 | dst = kzalloc(sizeof(*dst), GFP_KERNEL); |
330 | if (IS_ERR(ds)) { | 328 | if (dst == NULL) { |
331 | dev_put(dev); | 329 | dev_put(dev); |
332 | return PTR_ERR(ds); | 330 | return -ENOMEM; |
333 | } | 331 | } |
334 | 332 | ||
335 | if (ds->drv->poll_link != NULL) { | 333 | platform_set_drvdata(pdev, dst); |
336 | INIT_WORK(&ds->link_poll_work, dsa_link_poll_work); | 334 | |
337 | init_timer(&ds->link_poll_timer); | 335 | dst->pd = pd; |
338 | ds->link_poll_timer.data = (unsigned long)ds; | 336 | dst->master_netdev = dev; |
339 | ds->link_poll_timer.function = dsa_link_poll_timer; | 337 | dst->cpu_switch = -1; |
340 | ds->link_poll_timer.expires = round_jiffies(jiffies + HZ); | 338 | dst->cpu_port = -1; |
341 | add_timer(&ds->link_poll_timer); | 339 | |
340 | for (i = 0; i < pd->nr_chips; i++) { | ||
341 | struct mii_bus *bus; | ||
342 | struct dsa_switch *ds; | ||
343 | |||
344 | bus = dev_to_mii_bus(pd->chip[i].mii_bus); | ||
345 | if (bus == NULL) { | ||
346 | printk(KERN_ERR "%s[%d]: no mii bus found for " | ||
347 | "dsa switch\n", dev->name, i); | ||
348 | continue; | ||
349 | } | ||
350 | |||
351 | ds = dsa_switch_setup(dst, i, &pdev->dev, bus); | ||
352 | if (IS_ERR(ds)) { | ||
353 | printk(KERN_ERR "%s[%d]: couldn't create dsa switch " | ||
354 | "instance (error %ld)\n", dev->name, i, | ||
355 | PTR_ERR(ds)); | ||
356 | continue; | ||
357 | } | ||
358 | |||
359 | dst->ds[i] = ds; | ||
360 | if (ds->drv->poll_link != NULL) | ||
361 | dst->link_poll_needed = 1; | ||
342 | } | 362 | } |
343 | 363 | ||
344 | platform_set_drvdata(pdev, ds); | 364 | /* |
365 | * If we use a tagging format that doesn't have an ethertype | ||
366 | * field, make sure that all packets from this point on get | ||
367 | * sent to the tag format's receive function. | ||
368 | */ | ||
369 | wmb(); | ||
370 | dev->dsa_ptr = (void *)dst; | ||
371 | |||
372 | if (dst->link_poll_needed) { | ||
373 | INIT_WORK(&dst->link_poll_work, dsa_link_poll_work); | ||
374 | init_timer(&dst->link_poll_timer); | ||
375 | dst->link_poll_timer.data = (unsigned long)dst; | ||
376 | dst->link_poll_timer.function = dsa_link_poll_timer; | ||
377 | dst->link_poll_timer.expires = round_jiffies(jiffies + HZ); | ||
378 | add_timer(&dst->link_poll_timer); | ||
379 | } | ||
345 | 380 | ||
346 | return 0; | 381 | return 0; |
347 | } | 382 | } |
348 | 383 | ||
349 | static int dsa_remove(struct platform_device *pdev) | 384 | static int dsa_remove(struct platform_device *pdev) |
350 | { | 385 | { |
351 | struct dsa_switch *ds = platform_get_drvdata(pdev); | 386 | struct dsa_switch_tree *dst = platform_get_drvdata(pdev); |
387 | int i; | ||
352 | 388 | ||
353 | if (ds->drv->poll_link != NULL) | 389 | if (dst->link_poll_needed) |
354 | del_timer_sync(&ds->link_poll_timer); | 390 | del_timer_sync(&dst->link_poll_timer); |
355 | 391 | ||
356 | flush_scheduled_work(); | 392 | flush_scheduled_work(); |
357 | 393 | ||
358 | dsa_switch_destroy(ds); | 394 | for (i = 0; i < dst->pd->nr_chips; i++) { |
395 | struct dsa_switch *ds = dst->ds[i]; | ||
396 | |||
397 | if (ds != NULL) | ||
398 | dsa_switch_destroy(ds); | ||
399 | } | ||
359 | 400 | ||
360 | return 0; | 401 | return 0; |
361 | } | 402 | } |
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 7063378a1ebf..41055f33d28a 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * net/dsa/dsa_priv.h - Hardware switch handling | 2 | * net/dsa/dsa_priv.h - Hardware switch handling |
3 | * Copyright (c) 2008 Marvell Semiconductor | 3 | * Copyright (c) 2008-2009 Marvell Semiconductor |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
@@ -19,42 +19,107 @@ | |||
19 | 19 | ||
20 | struct dsa_switch { | 20 | struct dsa_switch { |
21 | /* | 21 | /* |
22 | * Configuration data for the platform device that owns | 22 | * Parent switch tree, and switch index. |
23 | * this dsa switch instance. | ||
24 | */ | 23 | */ |
25 | struct dsa_platform_data *pd; | 24 | struct dsa_switch_tree *dst; |
25 | int index; | ||
26 | 26 | ||
27 | /* | 27 | /* |
28 | * References to network device and mii bus to use. | 28 | * Configuration data for this switch. |
29 | */ | 29 | */ |
30 | struct net_device *master_netdev; | 30 | struct dsa_chip_data *pd; |
31 | struct mii_bus *master_mii_bus; | ||
32 | 31 | ||
33 | /* | 32 | /* |
34 | * The used switch driver and frame tagging type. | 33 | * The used switch driver. |
35 | */ | 34 | */ |
36 | struct dsa_switch_driver *drv; | 35 | struct dsa_switch_driver *drv; |
37 | __be16 tag_protocol; | 36 | |
37 | /* | ||
38 | * Reference to mii bus to use. | ||
39 | */ | ||
40 | struct mii_bus *master_mii_bus; | ||
38 | 41 | ||
39 | /* | 42 | /* |
40 | * Slave mii_bus and devices for the individual ports. | 43 | * Slave mii_bus and devices for the individual ports. |
41 | */ | 44 | */ |
42 | int cpu_port; | 45 | u32 dsa_port_mask; |
43 | u32 valid_port_mask; | 46 | u32 phys_port_mask; |
44 | struct mii_bus *slave_mii_bus; | 47 | struct mii_bus *slave_mii_bus; |
45 | struct net_device *ports[DSA_MAX_PORTS]; | 48 | struct net_device *ports[DSA_MAX_PORTS]; |
49 | }; | ||
50 | |||
51 | struct dsa_switch_tree { | ||
52 | /* | ||
53 | * Configuration data for the platform device that owns | ||
54 | * this dsa switch tree instance. | ||
55 | */ | ||
56 | struct dsa_platform_data *pd; | ||
57 | |||
58 | /* | ||
59 | * Reference to network device to use, and which tagging | ||
60 | * protocol to use. | ||
61 | */ | ||
62 | struct net_device *master_netdev; | ||
63 | __be16 tag_protocol; | ||
64 | |||
65 | /* | ||
66 | * The switch and port to which the CPU is attached. | ||
67 | */ | ||
68 | s8 cpu_switch; | ||
69 | s8 cpu_port; | ||
46 | 70 | ||
47 | /* | 71 | /* |
48 | * Link state polling. | 72 | * Link state polling. |
49 | */ | 73 | */ |
50 | struct work_struct link_poll_work; | 74 | int link_poll_needed; |
51 | struct timer_list link_poll_timer; | 75 | struct work_struct link_poll_work; |
76 | struct timer_list link_poll_timer; | ||
77 | |||
78 | /* | ||
79 | * Data for the individual switch chips. | ||
80 | */ | ||
81 | struct dsa_switch *ds[DSA_MAX_SWITCHES]; | ||
52 | }; | 82 | }; |
53 | 83 | ||
84 | static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) | ||
85 | { | ||
86 | return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port); | ||
87 | } | ||
88 | |||
89 | static inline u8 dsa_upstream_port(struct dsa_switch *ds) | ||
90 | { | ||
91 | struct dsa_switch_tree *dst = ds->dst; | ||
92 | |||
93 | /* | ||
94 | * If this is the root switch (i.e. the switch that connects | ||
95 | * to the CPU), return the cpu port number on this switch. | ||
96 | * Else return the (DSA) port number that connects to the | ||
97 | * switch that is one hop closer to the cpu. | ||
98 | */ | ||
99 | if (dst->cpu_switch == ds->index) | ||
100 | return dst->cpu_port; | ||
101 | else | ||
102 | return ds->pd->rtable[dst->cpu_switch]; | ||
103 | } | ||
104 | |||
54 | struct dsa_slave_priv { | 105 | struct dsa_slave_priv { |
106 | /* | ||
107 | * The linux network interface corresponding to this | ||
108 | * switch port. | ||
109 | */ | ||
55 | struct net_device *dev; | 110 | struct net_device *dev; |
111 | |||
112 | /* | ||
113 | * Which switch this port is a part of, and the port index | ||
114 | * for this port. | ||
115 | */ | ||
56 | struct dsa_switch *parent; | 116 | struct dsa_switch *parent; |
57 | int port; | 117 | u8 port; |
118 | |||
119 | /* | ||
120 | * The phylib phy_device pointer for the PHY connected | ||
121 | * to this port. | ||
122 | */ | ||
58 | struct phy_device *phy; | 123 | struct phy_device *phy; |
59 | }; | 124 | }; |
60 | 125 | ||
diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c index 85081ae9fe89..83277f463af7 100644 --- a/net/dsa/mv88e6060.c +++ b/net/dsa/mv88e6060.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * net/dsa/mv88e6060.c - Driver for Marvell 88e6060 switch chips | 2 | * net/dsa/mv88e6060.c - Driver for Marvell 88e6060 switch chips |
3 | * Copyright (c) 2008 Marvell Semiconductor | 3 | * Copyright (c) 2008-2009 Marvell Semiconductor |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
@@ -81,7 +81,7 @@ static int mv88e6060_switch_reset(struct dsa_switch *ds) | |||
81 | /* | 81 | /* |
82 | * Reset the switch. | 82 | * Reset the switch. |
83 | */ | 83 | */ |
84 | REG_WRITE(REG_GLOBAL, 0x0A, 0xa130); | 84 | REG_WRITE(REG_GLOBAL, 0x0a, 0xa130); |
85 | 85 | ||
86 | /* | 86 | /* |
87 | * Wait up to one second for reset to complete. | 87 | * Wait up to one second for reset to complete. |
@@ -128,7 +128,7 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p) | |||
128 | * state to Forwarding. Additionally, if this is the CPU | 128 | * state to Forwarding. Additionally, if this is the CPU |
129 | * port, enable Ingress and Egress Trailer tagging mode. | 129 | * port, enable Ingress and Egress Trailer tagging mode. |
130 | */ | 130 | */ |
131 | REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x4103 : 0x0003); | 131 | REG_WRITE(addr, 0x04, dsa_is_cpu_port(ds, p) ? 0x4103 : 0x0003); |
132 | 132 | ||
133 | /* | 133 | /* |
134 | * Port based VLAN map: give each port its own address | 134 | * Port based VLAN map: give each port its own address |
@@ -138,9 +138,9 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p) | |||
138 | */ | 138 | */ |
139 | REG_WRITE(addr, 0x06, | 139 | REG_WRITE(addr, 0x06, |
140 | ((p & 0xf) << 12) | | 140 | ((p & 0xf) << 12) | |
141 | ((p == ds->cpu_port) ? | 141 | (dsa_is_cpu_port(ds, p) ? |
142 | ds->valid_port_mask : | 142 | ds->phys_port_mask : |
143 | (1 << ds->cpu_port))); | 143 | (1 << ds->dst->cpu_port))); |
144 | 144 | ||
145 | /* | 145 | /* |
146 | * Port Association Vector: when learning source addresses | 146 | * Port Association Vector: when learning source addresses |
diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c index 100318722214..52faaa21a4d9 100644 --- a/net/dsa/mv88e6123_61_65.c +++ b/net/dsa/mv88e6123_61_65.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support | 2 | * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support |
3 | * Copyright (c) 2008 Marvell Semiconductor | 3 | * Copyright (c) 2008-2009 Marvell Semiconductor |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
@@ -98,17 +98,17 @@ static int mv88e6123_61_65_setup_global(struct dsa_switch *ds) | |||
98 | return ret; | 98 | return ret; |
99 | 99 | ||
100 | /* | 100 | /* |
101 | * Configure the cpu port, and configure the cpu port as the | 101 | * Configure the upstream port, and configure the upstream |
102 | * port to which ingress and egress monitor frames are to be | 102 | * port as the port to which ingress and egress monitor frames |
103 | * sent. | 103 | * are to be sent. |
104 | */ | 104 | */ |
105 | REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1110)); | 105 | REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110)); |
106 | 106 | ||
107 | /* | 107 | /* |
108 | * Disable remote management for now, and set the switch's | 108 | * Disable remote management for now, and set the switch's |
109 | * DSA device number to zero. | 109 | * DSA device number. |
110 | */ | 110 | */ |
111 | REG_WRITE(REG_GLOBAL, 0x1c, 0x0000); | 111 | REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f); |
112 | 112 | ||
113 | /* | 113 | /* |
114 | * Send all frames with destination addresses matching | 114 | * Send all frames with destination addresses matching |
@@ -133,10 +133,17 @@ static int mv88e6123_61_65_setup_global(struct dsa_switch *ds) | |||
133 | REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); | 133 | REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); |
134 | 134 | ||
135 | /* | 135 | /* |
136 | * Map all DSA device IDs to the CPU port. | 136 | * Program the DSA routing table. |
137 | */ | 137 | */ |
138 | for (i = 0; i < 32; i++) | 138 | for (i = 0; i < 32; i++) { |
139 | REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port); | 139 | int nexthop; |
140 | |||
141 | nexthop = 0x1f; | ||
142 | if (i != ds->index && i < ds->dst->pd->nr_chips) | ||
143 | nexthop = ds->pd->rtable[i] & 0x1f; | ||
144 | |||
145 | REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); | ||
146 | } | ||
140 | 147 | ||
141 | /* | 148 | /* |
142 | * Clear all trunk masks. | 149 | * Clear all trunk masks. |
@@ -176,12 +183,18 @@ static int mv88e6123_61_65_setup_global(struct dsa_switch *ds) | |||
176 | static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) | 183 | static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) |
177 | { | 184 | { |
178 | int addr = REG_PORT(p); | 185 | int addr = REG_PORT(p); |
186 | u16 val; | ||
179 | 187 | ||
180 | /* | 188 | /* |
181 | * MAC Forcing register: don't force link, speed, duplex | 189 | * MAC Forcing register: don't force link, speed, duplex |
182 | * or flow control state to any particular values. | 190 | * or flow control state to any particular values on physical |
191 | * ports, but force the CPU port and all DSA ports to 1000 Mb/s | ||
192 | * full duplex. | ||
183 | */ | 193 | */ |
184 | REG_WRITE(addr, 0x01, 0x0003); | 194 | if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) |
195 | REG_WRITE(addr, 0x01, 0x003e); | ||
196 | else | ||
197 | REG_WRITE(addr, 0x01, 0x0003); | ||
185 | 198 | ||
186 | /* | 199 | /* |
187 | * Do not limit the period of time that this port can be | 200 | * Do not limit the period of time that this port can be |
@@ -192,37 +205,50 @@ static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) | |||
192 | 205 | ||
193 | /* | 206 | /* |
194 | * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, | 207 | * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, |
195 | * configure the requested (DSA/EDSA) tagging mode if this is | 208 | * disable Header mode, enable IGMP/MLD snooping, disable VLAN |
196 | * the CPU port, disable Header mode, enable IGMP/MLD snooping, | 209 | * tunneling, determine priority by looking at 802.1p and IP |
197 | * disable VLAN tunneling, determine priority by looking at | 210 | * priority fields (IP prio has precedence), and set STP state |
198 | * 802.1p and IP priority fields (IP prio has precedence), and | 211 | * to Forwarding. |
199 | * set STP state to Forwarding. Finally, if this is the CPU | 212 | * |
200 | * port, additionally enable forwarding of unknown unicast and | 213 | * If this is the CPU link, use DSA or EDSA tagging depending |
201 | * multicast addresses. | 214 | * on which tagging mode was configured. |
202 | */ | 215 | * |
203 | REG_WRITE(addr, 0x04, | 216 | * If this is a link to another switch, use DSA tagging mode. |
204 | (p == ds->cpu_port) ? | 217 | * |
205 | (ds->tag_protocol == htons(ETH_P_DSA)) ? | 218 | * If this is the upstream port for this switch, enable |
206 | 0x053f : 0x373f : | 219 | * forwarding of unknown unicasts and multicasts. |
207 | 0x0433); | 220 | */ |
221 | val = 0x0433; | ||
222 | if (dsa_is_cpu_port(ds, p)) { | ||
223 | if (ds->dst->tag_protocol == htons(ETH_P_EDSA)) | ||
224 | val |= 0x3300; | ||
225 | else | ||
226 | val |= 0x0100; | ||
227 | } | ||
228 | if (ds->dsa_port_mask & (1 << p)) | ||
229 | val |= 0x0100; | ||
230 | if (p == dsa_upstream_port(ds)) | ||
231 | val |= 0x000c; | ||
232 | REG_WRITE(addr, 0x04, val); | ||
208 | 233 | ||
209 | /* | 234 | /* |
210 | * Port Control 1: disable trunking. Also, if this is the | 235 | * Port Control 1: disable trunking. Also, if this is the |
211 | * CPU port, enable learn messages to be sent to this port. | 236 | * CPU port, enable learn messages to be sent to this port. |
212 | */ | 237 | */ |
213 | REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000); | 238 | REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000); |
214 | 239 | ||
215 | /* | 240 | /* |
216 | * Port based VLAN map: give each port its own address | 241 | * Port based VLAN map: give each port its own address |
217 | * database, allow the CPU port to talk to each of the 'real' | 242 | * database, allow the CPU port to talk to each of the 'real' |
218 | * ports, and allow each of the 'real' ports to only talk to | 243 | * ports, and allow each of the 'real' ports to only talk to |
219 | * the CPU port. | 244 | * the upstream port. |
220 | */ | 245 | */ |
221 | REG_WRITE(addr, 0x06, | 246 | val = (p & 0xf) << 12; |
222 | ((p & 0xf) << 12) | | 247 | if (dsa_is_cpu_port(ds, p)) |
223 | ((p == ds->cpu_port) ? | 248 | val |= ds->phys_port_mask; |
224 | ds->valid_port_mask : | 249 | else |
225 | (1 << ds->cpu_port))); | 250 | val |= 1 << dsa_upstream_port(ds); |
251 | REG_WRITE(addr, 0x06, val); | ||
226 | 252 | ||
227 | /* | 253 | /* |
228 | * Default VLAN ID and priority: don't set a default VLAN | 254 | * Default VLAN ID and priority: don't set a default VLAN |
diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c index 70fae2444cb6..bb2b41bc854e 100644 --- a/net/dsa/mv88e6131.c +++ b/net/dsa/mv88e6131.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * net/dsa/mv88e6131.c - Marvell 88e6131 switch chip support | 2 | * net/dsa/mv88e6131.c - Marvell 88e6095/6095f/6131 switch chip support |
3 | * Copyright (c) 2008 Marvell Semiconductor | 3 | * Copyright (c) 2008-2009 Marvell Semiconductor |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
@@ -21,6 +21,8 @@ static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) | |||
21 | ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); | 21 | ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); |
22 | if (ret >= 0) { | 22 | if (ret >= 0) { |
23 | ret &= 0xfff0; | 23 | ret &= 0xfff0; |
24 | if (ret == 0x0950) | ||
25 | return "Marvell 88E6095/88E6095F"; | ||
24 | if (ret == 0x1060) | 26 | if (ret == 0x1060) |
25 | return "Marvell 88E6131"; | 27 | return "Marvell 88E6131"; |
26 | } | 28 | } |
@@ -36,7 +38,7 @@ static int mv88e6131_switch_reset(struct dsa_switch *ds) | |||
36 | /* | 38 | /* |
37 | * Set all ports to the disabled state. | 39 | * Set all ports to the disabled state. |
38 | */ | 40 | */ |
39 | for (i = 0; i < 8; i++) { | 41 | for (i = 0; i < 11; i++) { |
40 | ret = REG_READ(REG_PORT(i), 0x04); | 42 | ret = REG_READ(REG_PORT(i), 0x04); |
41 | REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); | 43 | REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); |
42 | } | 44 | } |
@@ -100,17 +102,17 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) | |||
100 | REG_WRITE(REG_GLOBAL, 0x19, 0x8100); | 102 | REG_WRITE(REG_GLOBAL, 0x19, 0x8100); |
101 | 103 | ||
102 | /* | 104 | /* |
103 | * Disable ARP mirroring, and configure the cpu port as the | 105 | * Disable ARP mirroring, and configure the upstream port as |
104 | * port to which ingress and egress monitor frames are to be | 106 | * the port to which ingress and egress monitor frames are to |
105 | * sent. | 107 | * be sent. |
106 | */ | 108 | */ |
107 | REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1100) | 0x00f0); | 109 | REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1100) | 0x00f0); |
108 | 110 | ||
109 | /* | 111 | /* |
110 | * Disable cascade port functionality, and set the switch's | 112 | * Disable cascade port functionality, and set the switch's |
111 | * DSA device number to zero. | 113 | * DSA device number. |
112 | */ | 114 | */ |
113 | REG_WRITE(REG_GLOBAL, 0x1c, 0xe000); | 115 | REG_WRITE(REG_GLOBAL, 0x1c, 0xe000 | (ds->index & 0x1f)); |
114 | 116 | ||
115 | /* | 117 | /* |
116 | * Send all frames with destination addresses matching | 118 | * Send all frames with destination addresses matching |
@@ -127,16 +129,23 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) | |||
127 | REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); | 129 | REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); |
128 | 130 | ||
129 | /* | 131 | /* |
130 | * Map all DSA device IDs to the CPU port. | 132 | * Program the DSA routing table. |
131 | */ | 133 | */ |
132 | for (i = 0; i < 32; i++) | 134 | for (i = 0; i < 32; i++) { |
133 | REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port); | 135 | int nexthop; |
136 | |||
137 | nexthop = 0x1f; | ||
138 | if (i != ds->index && i < ds->dst->pd->nr_chips) | ||
139 | nexthop = ds->pd->rtable[i] & 0x1f; | ||
140 | |||
141 | REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); | ||
142 | } | ||
134 | 143 | ||
135 | /* | 144 | /* |
136 | * Clear all trunk masks. | 145 | * Clear all trunk masks. |
137 | */ | 146 | */ |
138 | for (i = 0; i < 8; i++) | 147 | for (i = 0; i < 8; i++) |
139 | REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); | 148 | REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0x7ff); |
140 | 149 | ||
141 | /* | 150 | /* |
142 | * Clear all trunk mappings. | 151 | * Clear all trunk mappings. |
@@ -156,12 +165,18 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) | |||
156 | static int mv88e6131_setup_port(struct dsa_switch *ds, int p) | 165 | static int mv88e6131_setup_port(struct dsa_switch *ds, int p) |
157 | { | 166 | { |
158 | int addr = REG_PORT(p); | 167 | int addr = REG_PORT(p); |
168 | u16 val; | ||
159 | 169 | ||
160 | /* | 170 | /* |
161 | * MAC Forcing register: don't force link, speed, duplex | 171 | * MAC Forcing register: don't force link, speed, duplex |
162 | * or flow control state to any particular values. | 172 | * or flow control state to any particular values on physical |
173 | * ports, but force the CPU port and all DSA ports to 1000 Mb/s | ||
174 | * full duplex. | ||
163 | */ | 175 | */ |
164 | REG_WRITE(addr, 0x01, 0x0003); | 176 | if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) |
177 | REG_WRITE(addr, 0x01, 0x003e); | ||
178 | else | ||
179 | REG_WRITE(addr, 0x01, 0x0003); | ||
165 | 180 | ||
166 | /* | 181 | /* |
167 | * Port Control: disable Core Tag, disable Drop-on-Lock, | 182 | * Port Control: disable Core Tag, disable Drop-on-Lock, |
@@ -169,29 +184,40 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p) | |||
169 | * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN | 184 | * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN |
170 | * tunneling, determine priority by looking at 802.1p and | 185 | * tunneling, determine priority by looking at 802.1p and |
171 | * IP priority fields (IP prio has precedence), and set STP | 186 | * IP priority fields (IP prio has precedence), and set STP |
172 | * state to Forwarding. Finally, if this is the CPU port, | 187 | * state to Forwarding. |
173 | * additionally enable DSA tagging and forwarding of unknown | 188 | * |
174 | * unicast addresses. | 189 | * If this is the upstream port for this switch, enable |
190 | * forwarding of unknown unicasts, and enable DSA tagging | ||
191 | * mode. | ||
192 | * | ||
193 | * If this is the link to another switch, use DSA tagging | ||
194 | * mode, but do not enable forwarding of unknown unicasts. | ||
175 | */ | 195 | */ |
176 | REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x0537 : 0x0433); | 196 | val = 0x0433; |
197 | if (p == dsa_upstream_port(ds)) | ||
198 | val |= 0x0104; | ||
199 | if (ds->dsa_port_mask & (1 << p)) | ||
200 | val |= 0x0100; | ||
201 | REG_WRITE(addr, 0x04, val); | ||
177 | 202 | ||
178 | /* | 203 | /* |
179 | * Port Control 1: disable trunking. Also, if this is the | 204 | * Port Control 1: disable trunking. Also, if this is the |
180 | * CPU port, enable learn messages to be sent to this port. | 205 | * CPU port, enable learn messages to be sent to this port. |
181 | */ | 206 | */ |
182 | REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000); | 207 | REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000); |
183 | 208 | ||
184 | /* | 209 | /* |
185 | * Port based VLAN map: give each port its own address | 210 | * Port based VLAN map: give each port its own address |
186 | * database, allow the CPU port to talk to each of the 'real' | 211 | * database, allow the CPU port to talk to each of the 'real' |
187 | * ports, and allow each of the 'real' ports to only talk to | 212 | * ports, and allow each of the 'real' ports to only talk to |
188 | * the CPU port. | 213 | * the upstream port. |
189 | */ | 214 | */ |
190 | REG_WRITE(addr, 0x06, | 215 | val = (p & 0xf) << 12; |
191 | ((p & 0xf) << 12) | | 216 | if (dsa_is_cpu_port(ds, p)) |
192 | ((p == ds->cpu_port) ? | 217 | val |= ds->phys_port_mask; |
193 | ds->valid_port_mask : | 218 | else |
194 | (1 << ds->cpu_port))); | 219 | val |= 1 << dsa_upstream_port(ds); |
220 | REG_WRITE(addr, 0x06, val); | ||
195 | 221 | ||
196 | /* | 222 | /* |
197 | * Default VLAN ID and priority: don't set a default VLAN | 223 | * Default VLAN ID and priority: don't set a default VLAN |
@@ -207,13 +233,15 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p) | |||
207 | * untagged frames on this port, do a destination address | 233 | * untagged frames on this port, do a destination address |
208 | * lookup on received packets as usual, don't send a copy | 234 | * lookup on received packets as usual, don't send a copy |
209 | * of all transmitted/received frames on this port to the | 235 | * of all transmitted/received frames on this port to the |
210 | * CPU, and configure the CPU port number. Also, if this | 236 | * CPU, and configure the upstream port number. |
211 | * is the CPU port, enable forwarding of unknown multicast | 237 | * |
212 | * addresses. | 238 | * If this is the upstream port for this switch, enable |
239 | * forwarding of unknown multicast addresses. | ||
213 | */ | 240 | */ |
214 | REG_WRITE(addr, 0x08, | 241 | val = 0x0080 | dsa_upstream_port(ds); |
215 | ((p == ds->cpu_port) ? 0x00c0 : 0x0080) | | 242 | if (p == dsa_upstream_port(ds)) |
216 | ds->cpu_port); | 243 | val |= 0x0040; |
244 | REG_WRITE(addr, 0x08, val); | ||
217 | 245 | ||
218 | /* | 246 | /* |
219 | * Rate Control: disable ingress rate limiting. | 247 | * Rate Control: disable ingress rate limiting. |
@@ -268,7 +296,7 @@ static int mv88e6131_setup(struct dsa_switch *ds) | |||
268 | if (ret < 0) | 296 | if (ret < 0) |
269 | return ret; | 297 | return ret; |
270 | 298 | ||
271 | for (i = 0; i < 6; i++) { | 299 | for (i = 0; i < 11; i++) { |
272 | ret = mv88e6131_setup_port(ds, i); | 300 | ret = mv88e6131_setup_port(ds, i); |
273 | if (ret < 0) | 301 | if (ret < 0) |
274 | return ret; | 302 | return ret; |
@@ -279,7 +307,7 @@ static int mv88e6131_setup(struct dsa_switch *ds) | |||
279 | 307 | ||
280 | static int mv88e6131_port_to_phy_addr(int port) | 308 | static int mv88e6131_port_to_phy_addr(int port) |
281 | { | 309 | { |
282 | if (port >= 0 && port != 3 && port <= 7) | 310 | if (port >= 0 && port <= 11) |
283 | return port; | 311 | return port; |
284 | return -1; | 312 | return -1; |
285 | } | 313 | } |
diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a68fd79e9eca..ed131181215d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * net/dsa/slave.c - Slave device handling | 2 | * net/dsa/slave.c - Slave device handling |
3 | * Copyright (c) 2008 Marvell Semiconductor | 3 | * Copyright (c) 2008-2009 Marvell Semiconductor |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
@@ -19,7 +19,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) | |||
19 | { | 19 | { |
20 | struct dsa_switch *ds = bus->priv; | 20 | struct dsa_switch *ds = bus->priv; |
21 | 21 | ||
22 | if (ds->valid_port_mask & (1 << addr)) | 22 | if (ds->phys_port_mask & (1 << addr)) |
23 | return ds->drv->phy_read(ds, addr, reg); | 23 | return ds->drv->phy_read(ds, addr, reg); |
24 | 24 | ||
25 | return 0xffff; | 25 | return 0xffff; |
@@ -29,7 +29,7 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) | |||
29 | { | 29 | { |
30 | struct dsa_switch *ds = bus->priv; | 30 | struct dsa_switch *ds = bus->priv; |
31 | 31 | ||
32 | if (ds->valid_port_mask & (1 << addr)) | 32 | if (ds->phys_port_mask & (1 << addr)) |
33 | return ds->drv->phy_write(ds, addr, reg, val); | 33 | return ds->drv->phy_write(ds, addr, reg, val); |
34 | 34 | ||
35 | return 0; | 35 | return 0; |
@@ -43,15 +43,24 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) | |||
43 | ds->slave_mii_bus->write = dsa_slave_phy_write; | 43 | ds->slave_mii_bus->write = dsa_slave_phy_write; |
44 | snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x", | 44 | snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x", |
45 | ds->master_mii_bus->id, ds->pd->sw_addr); | 45 | ds->master_mii_bus->id, ds->pd->sw_addr); |
46 | ds->slave_mii_bus->parent = &(ds->master_mii_bus->dev); | 46 | ds->slave_mii_bus->parent = &ds->master_mii_bus->dev; |
47 | } | 47 | } |
48 | 48 | ||
49 | 49 | ||
50 | /* slave device handling ****************************************************/ | 50 | /* slave device handling ****************************************************/ |
51 | static int dsa_slave_init(struct net_device *dev) | ||
52 | { | ||
53 | struct dsa_slave_priv *p = netdev_priv(dev); | ||
54 | |||
55 | dev->iflink = p->parent->dst->master_netdev->ifindex; | ||
56 | |||
57 | return 0; | ||
58 | } | ||
59 | |||
51 | static int dsa_slave_open(struct net_device *dev) | 60 | static int dsa_slave_open(struct net_device *dev) |
52 | { | 61 | { |
53 | struct dsa_slave_priv *p = netdev_priv(dev); | 62 | struct dsa_slave_priv *p = netdev_priv(dev); |
54 | struct net_device *master = p->parent->master_netdev; | 63 | struct net_device *master = p->parent->dst->master_netdev; |
55 | int err; | 64 | int err; |
56 | 65 | ||
57 | if (!(master->flags & IFF_UP)) | 66 | if (!(master->flags & IFF_UP)) |
@@ -89,7 +98,7 @@ out: | |||
89 | static int dsa_slave_close(struct net_device *dev) | 98 | static int dsa_slave_close(struct net_device *dev) |
90 | { | 99 | { |
91 | struct dsa_slave_priv *p = netdev_priv(dev); | 100 | struct dsa_slave_priv *p = netdev_priv(dev); |
92 | struct net_device *master = p->parent->master_netdev; | 101 | struct net_device *master = p->parent->dst->master_netdev; |
93 | 102 | ||
94 | dev_mc_unsync(master, dev); | 103 | dev_mc_unsync(master, dev); |
95 | dev_unicast_unsync(master, dev); | 104 | dev_unicast_unsync(master, dev); |
@@ -107,7 +116,7 @@ static int dsa_slave_close(struct net_device *dev) | |||
107 | static void dsa_slave_change_rx_flags(struct net_device *dev, int change) | 116 | static void dsa_slave_change_rx_flags(struct net_device *dev, int change) |
108 | { | 117 | { |
109 | struct dsa_slave_priv *p = netdev_priv(dev); | 118 | struct dsa_slave_priv *p = netdev_priv(dev); |
110 | struct net_device *master = p->parent->master_netdev; | 119 | struct net_device *master = p->parent->dst->master_netdev; |
111 | 120 | ||
112 | if (change & IFF_ALLMULTI) | 121 | if (change & IFF_ALLMULTI) |
113 | dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); | 122 | dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); |
@@ -118,7 +127,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change) | |||
118 | static void dsa_slave_set_rx_mode(struct net_device *dev) | 127 | static void dsa_slave_set_rx_mode(struct net_device *dev) |
119 | { | 128 | { |
120 | struct dsa_slave_priv *p = netdev_priv(dev); | 129 | struct dsa_slave_priv *p = netdev_priv(dev); |
121 | struct net_device *master = p->parent->master_netdev; | 130 | struct net_device *master = p->parent->dst->master_netdev; |
122 | 131 | ||
123 | dev_mc_sync(master, dev); | 132 | dev_mc_sync(master, dev); |
124 | dev_unicast_sync(master, dev); | 133 | dev_unicast_sync(master, dev); |
@@ -127,7 +136,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev) | |||
127 | static int dsa_slave_set_mac_address(struct net_device *dev, void *a) | 136 | static int dsa_slave_set_mac_address(struct net_device *dev, void *a) |
128 | { | 137 | { |
129 | struct dsa_slave_priv *p = netdev_priv(dev); | 138 | struct dsa_slave_priv *p = netdev_priv(dev); |
130 | struct net_device *master = p->parent->master_netdev; | 139 | struct net_device *master = p->parent->dst->master_netdev; |
131 | struct sockaddr *addr = a; | 140 | struct sockaddr *addr = a; |
132 | int err; | 141 | int err; |
133 | 142 | ||
@@ -288,6 +297,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { | |||
288 | 297 | ||
289 | #ifdef CONFIG_NET_DSA_TAG_DSA | 298 | #ifdef CONFIG_NET_DSA_TAG_DSA |
290 | static const struct net_device_ops dsa_netdev_ops = { | 299 | static const struct net_device_ops dsa_netdev_ops = { |
300 | .ndo_init = dsa_slave_init, | ||
291 | .ndo_open = dsa_slave_open, | 301 | .ndo_open = dsa_slave_open, |
292 | .ndo_stop = dsa_slave_close, | 302 | .ndo_stop = dsa_slave_close, |
293 | .ndo_start_xmit = dsa_xmit, | 303 | .ndo_start_xmit = dsa_xmit, |
@@ -300,6 +310,7 @@ static const struct net_device_ops dsa_netdev_ops = { | |||
300 | #endif | 310 | #endif |
301 | #ifdef CONFIG_NET_DSA_TAG_EDSA | 311 | #ifdef CONFIG_NET_DSA_TAG_EDSA |
302 | static const struct net_device_ops edsa_netdev_ops = { | 312 | static const struct net_device_ops edsa_netdev_ops = { |
313 | .ndo_init = dsa_slave_init, | ||
303 | .ndo_open = dsa_slave_open, | 314 | .ndo_open = dsa_slave_open, |
304 | .ndo_stop = dsa_slave_close, | 315 | .ndo_stop = dsa_slave_close, |
305 | .ndo_start_xmit = edsa_xmit, | 316 | .ndo_start_xmit = edsa_xmit, |
@@ -312,6 +323,7 @@ static const struct net_device_ops edsa_netdev_ops = { | |||
312 | #endif | 323 | #endif |
313 | #ifdef CONFIG_NET_DSA_TAG_TRAILER | 324 | #ifdef CONFIG_NET_DSA_TAG_TRAILER |
314 | static const struct net_device_ops trailer_netdev_ops = { | 325 | static const struct net_device_ops trailer_netdev_ops = { |
326 | .ndo_init = dsa_slave_init, | ||
315 | .ndo_open = dsa_slave_open, | 327 | .ndo_open = dsa_slave_open, |
316 | .ndo_stop = dsa_slave_close, | 328 | .ndo_stop = dsa_slave_close, |
317 | .ndo_start_xmit = trailer_xmit, | 329 | .ndo_start_xmit = trailer_xmit, |
@@ -328,7 +340,7 @@ struct net_device * | |||
328 | dsa_slave_create(struct dsa_switch *ds, struct device *parent, | 340 | dsa_slave_create(struct dsa_switch *ds, struct device *parent, |
329 | int port, char *name) | 341 | int port, char *name) |
330 | { | 342 | { |
331 | struct net_device *master = ds->master_netdev; | 343 | struct net_device *master = ds->dst->master_netdev; |
332 | struct net_device *slave_dev; | 344 | struct net_device *slave_dev; |
333 | struct dsa_slave_priv *p; | 345 | struct dsa_slave_priv *p; |
334 | int ret; | 346 | int ret; |
@@ -343,7 +355,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, | |||
343 | memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN); | 355 | memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN); |
344 | slave_dev->tx_queue_len = 0; | 356 | slave_dev->tx_queue_len = 0; |
345 | 357 | ||
346 | switch (ds->tag_protocol) { | 358 | switch (ds->dst->tag_protocol) { |
347 | #ifdef CONFIG_NET_DSA_TAG_DSA | 359 | #ifdef CONFIG_NET_DSA_TAG_DSA |
348 | case htons(ETH_P_DSA): | 360 | case htons(ETH_P_DSA): |
349 | slave_dev->netdev_ops = &dsa_netdev_ops; | 361 | slave_dev->netdev_ops = &dsa_netdev_ops; |
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 63e532a69fdb..8fa25bafe6ca 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging | 2 | * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging |
3 | * Copyright (c) 2008 Marvell Semiconductor | 3 | * Copyright (c) 2008-2009 Marvell Semiconductor |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
@@ -36,7 +36,7 @@ int dsa_xmit(struct sk_buff *skb, struct net_device *dev) | |||
36 | * Construct tagged FROM_CPU DSA tag from 802.1q tag. | 36 | * Construct tagged FROM_CPU DSA tag from 802.1q tag. |
37 | */ | 37 | */ |
38 | dsa_header = skb->data + 2 * ETH_ALEN; | 38 | dsa_header = skb->data + 2 * ETH_ALEN; |
39 | dsa_header[0] = 0x60; | 39 | dsa_header[0] = 0x60 | p->parent->index; |
40 | dsa_header[1] = p->port << 3; | 40 | dsa_header[1] = p->port << 3; |
41 | 41 | ||
42 | /* | 42 | /* |
@@ -57,7 +57,7 @@ int dsa_xmit(struct sk_buff *skb, struct net_device *dev) | |||
57 | * Construct untagged FROM_CPU DSA tag. | 57 | * Construct untagged FROM_CPU DSA tag. |
58 | */ | 58 | */ |
59 | dsa_header = skb->data + 2 * ETH_ALEN; | 59 | dsa_header = skb->data + 2 * ETH_ALEN; |
60 | dsa_header[0] = 0x40; | 60 | dsa_header[0] = 0x40 | p->parent->index; |
61 | dsa_header[1] = p->port << 3; | 61 | dsa_header[1] = p->port << 3; |
62 | dsa_header[2] = 0x00; | 62 | dsa_header[2] = 0x00; |
63 | dsa_header[3] = 0x00; | 63 | dsa_header[3] = 0x00; |
@@ -65,7 +65,7 @@ int dsa_xmit(struct sk_buff *skb, struct net_device *dev) | |||
65 | 65 | ||
66 | skb->protocol = htons(ETH_P_DSA); | 66 | skb->protocol = htons(ETH_P_DSA); |
67 | 67 | ||
68 | skb->dev = p->parent->master_netdev; | 68 | skb->dev = p->parent->dst->master_netdev; |
69 | dev_queue_xmit(skb); | 69 | dev_queue_xmit(skb); |
70 | 70 | ||
71 | return NETDEV_TX_OK; | 71 | return NETDEV_TX_OK; |
@@ -78,11 +78,13 @@ out_free: | |||
78 | static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, | 78 | static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, |
79 | struct packet_type *pt, struct net_device *orig_dev) | 79 | struct packet_type *pt, struct net_device *orig_dev) |
80 | { | 80 | { |
81 | struct dsa_switch *ds = dev->dsa_ptr; | 81 | struct dsa_switch_tree *dst = dev->dsa_ptr; |
82 | struct dsa_switch *ds; | ||
82 | u8 *dsa_header; | 83 | u8 *dsa_header; |
84 | int source_device; | ||
83 | int source_port; | 85 | int source_port; |
84 | 86 | ||
85 | if (unlikely(ds == NULL)) | 87 | if (unlikely(dst == NULL)) |
86 | goto out_drop; | 88 | goto out_drop; |
87 | 89 | ||
88 | skb = skb_unshare(skb, GFP_ATOMIC); | 90 | skb = skb_unshare(skb, GFP_ATOMIC); |
@@ -98,16 +100,24 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, | |||
98 | dsa_header = skb->data - 2; | 100 | dsa_header = skb->data - 2; |
99 | 101 | ||
100 | /* | 102 | /* |
101 | * Check that frame type is either TO_CPU or FORWARD, and | 103 | * Check that frame type is either TO_CPU or FORWARD. |
102 | * that the source device is zero. | ||
103 | */ | 104 | */ |
104 | if ((dsa_header[0] & 0xdf) != 0x00 && (dsa_header[0] & 0xdf) != 0xc0) | 105 | if ((dsa_header[0] & 0xc0) != 0x00 && (dsa_header[0] & 0xc0) != 0xc0) |
105 | goto out_drop; | 106 | goto out_drop; |
106 | 107 | ||
107 | /* | 108 | /* |
108 | * Check that the source port is a registered DSA port. | 109 | * Determine source device and port. |
109 | */ | 110 | */ |
111 | source_device = dsa_header[0] & 0x1f; | ||
110 | source_port = (dsa_header[1] >> 3) & 0x1f; | 112 | source_port = (dsa_header[1] >> 3) & 0x1f; |
113 | |||
114 | /* | ||
115 | * Check that the source device exists and that the source | ||
116 | * port is a registered DSA port. | ||
117 | */ | ||
118 | if (source_device >= dst->pd->nr_chips) | ||
119 | goto out_drop; | ||
120 | ds = dst->ds[source_device]; | ||
111 | if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) | 121 | if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) |
112 | goto out_drop; | 122 | goto out_drop; |
113 | 123 | ||
@@ -175,7 +185,7 @@ out: | |||
175 | return 0; | 185 | return 0; |
176 | } | 186 | } |
177 | 187 | ||
178 | static struct packet_type dsa_packet_type = { | 188 | static struct packet_type dsa_packet_type __read_mostly = { |
179 | .type = cpu_to_be16(ETH_P_DSA), | 189 | .type = cpu_to_be16(ETH_P_DSA), |
180 | .func = dsa_rcv, | 190 | .func = dsa_rcv, |
181 | }; | 191 | }; |
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 6197f9a7ef42..815607bd286f 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * net/dsa/tag_edsa.c - Ethertype DSA tagging | 2 | * net/dsa/tag_edsa.c - Ethertype DSA tagging |
3 | * Copyright (c) 2008 Marvell Semiconductor | 3 | * Copyright (c) 2008-2009 Marvell Semiconductor |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
@@ -45,7 +45,7 @@ int edsa_xmit(struct sk_buff *skb, struct net_device *dev) | |||
45 | edsa_header[1] = ETH_P_EDSA & 0xff; | 45 | edsa_header[1] = ETH_P_EDSA & 0xff; |
46 | edsa_header[2] = 0x00; | 46 | edsa_header[2] = 0x00; |
47 | edsa_header[3] = 0x00; | 47 | edsa_header[3] = 0x00; |
48 | edsa_header[4] = 0x60; | 48 | edsa_header[4] = 0x60 | p->parent->index; |
49 | edsa_header[5] = p->port << 3; | 49 | edsa_header[5] = p->port << 3; |
50 | 50 | ||
51 | /* | 51 | /* |
@@ -70,7 +70,7 @@ int edsa_xmit(struct sk_buff *skb, struct net_device *dev) | |||
70 | edsa_header[1] = ETH_P_EDSA & 0xff; | 70 | edsa_header[1] = ETH_P_EDSA & 0xff; |
71 | edsa_header[2] = 0x00; | 71 | edsa_header[2] = 0x00; |
72 | edsa_header[3] = 0x00; | 72 | edsa_header[3] = 0x00; |
73 | edsa_header[4] = 0x40; | 73 | edsa_header[4] = 0x40 | p->parent->index; |
74 | edsa_header[5] = p->port << 3; | 74 | edsa_header[5] = p->port << 3; |
75 | edsa_header[6] = 0x00; | 75 | edsa_header[6] = 0x00; |
76 | edsa_header[7] = 0x00; | 76 | edsa_header[7] = 0x00; |
@@ -78,7 +78,7 @@ int edsa_xmit(struct sk_buff *skb, struct net_device *dev) | |||
78 | 78 | ||
79 | skb->protocol = htons(ETH_P_EDSA); | 79 | skb->protocol = htons(ETH_P_EDSA); |
80 | 80 | ||
81 | skb->dev = p->parent->master_netdev; | 81 | skb->dev = p->parent->dst->master_netdev; |
82 | dev_queue_xmit(skb); | 82 | dev_queue_xmit(skb); |
83 | 83 | ||
84 | return NETDEV_TX_OK; | 84 | return NETDEV_TX_OK; |
@@ -91,11 +91,13 @@ out_free: | |||
91 | static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, | 91 | static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, |
92 | struct packet_type *pt, struct net_device *orig_dev) | 92 | struct packet_type *pt, struct net_device *orig_dev) |
93 | { | 93 | { |
94 | struct dsa_switch *ds = dev->dsa_ptr; | 94 | struct dsa_switch_tree *dst = dev->dsa_ptr; |
95 | struct dsa_switch *ds; | ||
95 | u8 *edsa_header; | 96 | u8 *edsa_header; |
97 | int source_device; | ||
96 | int source_port; | 98 | int source_port; |
97 | 99 | ||
98 | if (unlikely(ds == NULL)) | 100 | if (unlikely(dst == NULL)) |
99 | goto out_drop; | 101 | goto out_drop; |
100 | 102 | ||
101 | skb = skb_unshare(skb, GFP_ATOMIC); | 103 | skb = skb_unshare(skb, GFP_ATOMIC); |
@@ -111,16 +113,24 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, | |||
111 | edsa_header = skb->data + 2; | 113 | edsa_header = skb->data + 2; |
112 | 114 | ||
113 | /* | 115 | /* |
114 | * Check that frame type is either TO_CPU or FORWARD, and | 116 | * Check that frame type is either TO_CPU or FORWARD. |
115 | * that the source device is zero. | ||
116 | */ | 117 | */ |
117 | if ((edsa_header[0] & 0xdf) != 0x00 && (edsa_header[0] & 0xdf) != 0xc0) | 118 | if ((edsa_header[0] & 0xc0) != 0x00 && (edsa_header[0] & 0xc0) != 0xc0) |
118 | goto out_drop; | 119 | goto out_drop; |
119 | 120 | ||
120 | /* | 121 | /* |
121 | * Check that the source port is a registered DSA port. | 122 | * Determine source device and port. |
122 | */ | 123 | */ |
124 | source_device = edsa_header[0] & 0x1f; | ||
123 | source_port = (edsa_header[1] >> 3) & 0x1f; | 125 | source_port = (edsa_header[1] >> 3) & 0x1f; |
126 | |||
127 | /* | ||
128 | * Check that the source device exists and that the source | ||
129 | * port is a registered DSA port. | ||
130 | */ | ||
131 | if (source_device >= dst->pd->nr_chips) | ||
132 | goto out_drop; | ||
133 | ds = dst->ds[source_device]; | ||
124 | if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) | 134 | if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) |
125 | goto out_drop; | 135 | goto out_drop; |
126 | 136 | ||
@@ -194,7 +204,7 @@ out: | |||
194 | return 0; | 204 | return 0; |
195 | } | 205 | } |
196 | 206 | ||
197 | static struct packet_type edsa_packet_type = { | 207 | static struct packet_type edsa_packet_type __read_mostly = { |
198 | .type = cpu_to_be16(ETH_P_EDSA), | 208 | .type = cpu_to_be16(ETH_P_EDSA), |
199 | .func = edsa_rcv, | 209 | .func = edsa_rcv, |
200 | }; | 210 | }; |
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index d7e7f424ff0c..1c3e30c38b86 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * net/dsa/tag_trailer.c - Trailer tag format handling | 2 | * net/dsa/tag_trailer.c - Trailer tag format handling |
3 | * Copyright (c) 2008 Marvell Semiconductor | 3 | * Copyright (c) 2008-2009 Marvell Semiconductor |
4 | * | 4 | * |
5 | * This program is free software; you can redistribute it and/or modify | 5 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 6 | * it under the terms of the GNU General Public License as published by |
@@ -59,7 +59,7 @@ int trailer_xmit(struct sk_buff *skb, struct net_device *dev) | |||
59 | 59 | ||
60 | nskb->protocol = htons(ETH_P_TRAILER); | 60 | nskb->protocol = htons(ETH_P_TRAILER); |
61 | 61 | ||
62 | nskb->dev = p->parent->master_netdev; | 62 | nskb->dev = p->parent->dst->master_netdev; |
63 | dev_queue_xmit(nskb); | 63 | dev_queue_xmit(nskb); |
64 | 64 | ||
65 | return NETDEV_TX_OK; | 65 | return NETDEV_TX_OK; |
@@ -68,12 +68,14 @@ int trailer_xmit(struct sk_buff *skb, struct net_device *dev) | |||
68 | static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, | 68 | static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, |
69 | struct packet_type *pt, struct net_device *orig_dev) | 69 | struct packet_type *pt, struct net_device *orig_dev) |
70 | { | 70 | { |
71 | struct dsa_switch *ds = dev->dsa_ptr; | 71 | struct dsa_switch_tree *dst = dev->dsa_ptr; |
72 | struct dsa_switch *ds; | ||
72 | u8 *trailer; | 73 | u8 *trailer; |
73 | int source_port; | 74 | int source_port; |
74 | 75 | ||
75 | if (unlikely(ds == NULL)) | 76 | if (unlikely(dst == NULL)) |
76 | goto out_drop; | 77 | goto out_drop; |
78 | ds = dst->ds[0]; | ||
77 | 79 | ||
78 | skb = skb_unshare(skb, GFP_ATOMIC); | 80 | skb = skb_unshare(skb, GFP_ATOMIC); |
79 | if (skb == NULL) | 81 | if (skb == NULL) |
@@ -111,7 +113,7 @@ out: | |||
111 | return 0; | 113 | return 0; |
112 | } | 114 | } |
113 | 115 | ||
114 | static struct packet_type trailer_packet_type = { | 116 | static struct packet_type trailer_packet_type __read_mostly = { |
115 | .type = cpu_to_be16(ETH_P_TRAILER), | 117 | .type = cpu_to_be16(ETH_P_TRAILER), |
116 | .func = trailer_rcv, | 118 | .func = trailer_rcv, |
117 | }; | 119 | }; |
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 7bf35582f656..6f479fa522c3 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c | |||
@@ -1102,7 +1102,7 @@ drop: | |||
1102 | return NET_RX_DROP; | 1102 | return NET_RX_DROP; |
1103 | } | 1103 | } |
1104 | 1104 | ||
1105 | static struct packet_type econet_packet_type = { | 1105 | static struct packet_type econet_packet_type __read_mostly = { |
1106 | .type = cpu_to_be16(ETH_P_ECONET), | 1106 | .type = cpu_to_be16(ETH_P_ECONET), |
1107 | .func = econet_rcv, | 1107 | .func = econet_rcv, |
1108 | }; | 1108 | }; |
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 691268f3a359..b2cf91e4ccaa 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig | |||
@@ -35,7 +35,7 @@ config IP_ADVANCED_ROUTER | |||
35 | 35 | ||
36 | at boot time after the /proc file system has been mounted. | 36 | at boot time after the /proc file system has been mounted. |
37 | 37 | ||
38 | If you turn on IP forwarding, you will also get the rp_filter, which | 38 | If you turn on IP forwarding, you should consider the rp_filter, which |
39 | automatically rejects incoming packets if the routing table entry | 39 | automatically rejects incoming packets if the routing table entry |
40 | for their source address doesn't match the network interface they're | 40 | for their source address doesn't match the network interface they're |
41 | arriving on. This has security advantages because it prevents the | 41 | arriving on. This has security advantages because it prevents the |
@@ -46,12 +46,16 @@ config IP_ADVANCED_ROUTER | |||
46 | rp_filter on use: | 46 | rp_filter on use: |
47 | 47 | ||
48 | echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter | 48 | echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter |
49 | or | 49 | and |
50 | echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter | 50 | echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter |
51 | 51 | ||
52 | Note that some distributions enable it in startup scripts. | ||
53 | For details about rp_filter strict and loose mode read | ||
54 | <file:Documentation/networking/ip-sysctl.txt>. | ||
55 | |||
52 | If unsure, say N here. | 56 | If unsure, say N here. |
53 | 57 | ||
54 | choice | 58 | choice |
55 | prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" | 59 | prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" |
56 | depends on IP_ADVANCED_ROUTER | 60 | depends on IP_ADVANCED_ROUTER |
57 | default ASK_IP_FIB_HASH | 61 | default ASK_IP_FIB_HASH |
@@ -59,27 +63,29 @@ choice | |||
59 | config ASK_IP_FIB_HASH | 63 | config ASK_IP_FIB_HASH |
60 | bool "FIB_HASH" | 64 | bool "FIB_HASH" |
61 | ---help--- | 65 | ---help--- |
62 | Current FIB is very proven and good enough for most users. | 66 | Current FIB is very proven and good enough for most users. |
63 | 67 | ||
64 | config IP_FIB_TRIE | 68 | config IP_FIB_TRIE |
65 | bool "FIB_TRIE" | 69 | bool "FIB_TRIE" |
66 | ---help--- | 70 | ---help--- |
67 | Use new experimental LC-trie as FIB lookup algorithm. | 71 | Use new experimental LC-trie as FIB lookup algorithm. |
68 | This improves lookup performance if you have a large | 72 | This improves lookup performance if you have a large |
69 | number of routes. | 73 | number of routes. |
70 | 74 | ||
71 | LC-trie is a longest matching prefix lookup algorithm which | 75 | LC-trie is a longest matching prefix lookup algorithm which |
72 | performs better than FIB_HASH for large routing tables. | 76 | performs better than FIB_HASH for large routing tables. |
73 | But, it consumes more memory and is more complex. | 77 | But, it consumes more memory and is more complex. |
74 | 78 | ||
75 | LC-trie is described in: | 79 | LC-trie is described in: |
76 | 80 | ||
77 | IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson | 81 | IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson |
78 | IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 | 82 | IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, |
79 | An experimental study of compression methods for dynamic tries | 83 | June 1999 |
80 | Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. | 84 | |
81 | http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ | 85 | An experimental study of compression methods for dynamic tries |
82 | 86 | Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. | |
87 | http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ | ||
88 | |||
83 | endchoice | 89 | endchoice |
84 | 90 | ||
85 | config IP_FIB_HASH | 91 | config IP_FIB_HASH |
@@ -191,7 +197,7 @@ config IP_PNP_RARP | |||
191 | <file:Documentation/filesystems/nfsroot.txt> for details. | 197 | <file:Documentation/filesystems/nfsroot.txt> for details. |
192 | 198 | ||
193 | # not yet ready.. | 199 | # not yet ready.. |
194 | # bool ' IP: ARP support' CONFIG_IP_PNP_ARP | 200 | # bool ' IP: ARP support' CONFIG_IP_PNP_ARP |
195 | config NET_IPIP | 201 | config NET_IPIP |
196 | tristate "IP: tunneling" | 202 | tristate "IP: tunneling" |
197 | select INET_TUNNEL | 203 | select INET_TUNNEL |
@@ -361,7 +367,7 @@ config INET_IPCOMP | |||
361 | ---help--- | 367 | ---help--- |
362 | Support for IP Payload Compression Protocol (IPComp) (RFC3173), | 368 | Support for IP Payload Compression Protocol (IPComp) (RFC3173), |
363 | typically needed for IPsec. | 369 | typically needed for IPsec. |
364 | 370 | ||
365 | If unsure, say Y. | 371 | If unsure, say Y. |
366 | 372 | ||
367 | config INET_XFRM_TUNNEL | 373 | config INET_XFRM_TUNNEL |
@@ -415,7 +421,7 @@ config INET_DIAG | |||
415 | Support for INET (TCP, DCCP, etc) socket monitoring interface used by | 421 | Support for INET (TCP, DCCP, etc) socket monitoring interface used by |
416 | native Linux tools such as ss. ss is included in iproute2, currently | 422 | native Linux tools such as ss. ss is included in iproute2, currently |
417 | downloadable at <http://linux-net.osdl.org/index.php/Iproute2>. | 423 | downloadable at <http://linux-net.osdl.org/index.php/Iproute2>. |
418 | 424 | ||
419 | If unsure, say Y. | 425 | If unsure, say Y. |
420 | 426 | ||
421 | config INET_TCP_DIAG | 427 | config INET_TCP_DIAG |
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 627be4dc7fb0..d5aaabbb7cb3 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c | |||
@@ -1500,7 +1500,7 @@ static int ipv4_proc_init(void); | |||
1500 | * IP protocol layer initialiser | 1500 | * IP protocol layer initialiser |
1501 | */ | 1501 | */ |
1502 | 1502 | ||
1503 | static struct packet_type ip_packet_type = { | 1503 | static struct packet_type ip_packet_type __read_mostly = { |
1504 | .type = cpu_to_be16(ETH_P_IP), | 1504 | .type = cpu_to_be16(ETH_P_IP), |
1505 | .func = ip_rcv, | 1505 | .func = ip_rcv, |
1506 | .gso_send_check = inet_gso_send_check, | 1506 | .gso_send_check = inet_gso_send_check, |
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 3f6b7354699b..f11931c18381 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c | |||
@@ -801,8 +801,11 @@ static int arp_process(struct sk_buff *skb) | |||
801 | * cache. | 801 | * cache. |
802 | */ | 802 | */ |
803 | 803 | ||
804 | /* Special case: IPv4 duplicate address detection packet (RFC2131) */ | 804 | /* |
805 | if (sip == 0) { | 805 | * Special case: IPv4 duplicate address detection packet (RFC2131) |
806 | * and Gratuitous ARP/ARP Announce. (RFC3927, Section 2.4) | ||
807 | */ | ||
808 | if (sip == 0 || tip == sip) { | ||
806 | if (arp->ar_op == htons(ARPOP_REQUEST) && | 809 | if (arp->ar_op == htons(ARPOP_REQUEST) && |
807 | inet_addr_type(net, tip) == RTN_LOCAL && | 810 | inet_addr_type(net, tip) == RTN_LOCAL && |
808 | !arp_ignore(in_dev, sip, tip)) | 811 | !arp_ignore(in_dev, sip, tip)) |
@@ -892,7 +895,7 @@ static int arp_process(struct sk_buff *skb) | |||
892 | out: | 895 | out: |
893 | if (in_dev) | 896 | if (in_dev) |
894 | in_dev_put(in_dev); | 897 | in_dev_put(in_dev); |
895 | kfree_skb(skb); | 898 | consume_skb(skb); |
896 | return 0; | 899 | return 0; |
897 | } | 900 | } |
898 | 901 | ||
@@ -1225,7 +1228,7 @@ void arp_ifdown(struct net_device *dev) | |||
1225 | * Called once on startup. | 1228 | * Called once on startup. |
1226 | */ | 1229 | */ |
1227 | 1230 | ||
1228 | static struct packet_type arp_packet_type = { | 1231 | static struct packet_type arp_packet_type __read_mostly = { |
1229 | .type = cpu_to_be16(ETH_P_ARP), | 1232 | .type = cpu_to_be16(ETH_P_ARP), |
1230 | .func = arp_rcv, | 1233 | .func = arp_rcv, |
1231 | }; | 1234 | }; |
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 6bb2635b5ded..7bc992976d29 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c | |||
@@ -3,11 +3,16 @@ | |||
3 | * | 3 | * |
4 | * This is an implementation of the CIPSO 2.2 protocol as specified in | 4 | * This is an implementation of the CIPSO 2.2 protocol as specified in |
5 | * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in | 5 | * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in |
6 | * FIPS-188, copies of both documents can be found in the Documentation | 6 | * FIPS-188. While CIPSO never became a full IETF RFC standard many vendors |
7 | * directory. While CIPSO never became a full IETF RFC standard many vendors | ||
8 | * have chosen to adopt the protocol and over the years it has become a | 7 | * have chosen to adopt the protocol and over the years it has become a |
9 | * de-facto standard for labeled networking. | 8 | * de-facto standard for labeled networking. |
10 | * | 9 | * |
10 | * The CIPSO draft specification can be found in the kernel's Documentation | ||
11 | * directory as well as the following URL: | ||
12 | * http://netlabel.sourceforge.net/files/draft-ietf-cipso-ipsecurity-01.txt | ||
13 | * The FIPS-188 specification can be found at the following URL: | ||
14 | * http://www.itl.nist.gov/fipspubs/fip188.htm | ||
15 | * | ||
11 | * Author: Paul Moore <paul.moore@hp.com> | 16 | * Author: Paul Moore <paul.moore@hp.com> |
12 | * | 17 | * |
13 | */ | 18 | */ |
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index d519a6a66726..126bb911880f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c | |||
@@ -1216,7 +1216,8 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, | |||
1216 | kfree_skb(skb); | 1216 | kfree_skb(skb); |
1217 | goto errout; | 1217 | goto errout; |
1218 | } | 1218 | } |
1219 | err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); | 1219 | rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); |
1220 | return; | ||
1220 | errout: | 1221 | errout: |
1221 | if (err < 0) | 1222 | if (err < 0) |
1222 | rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); | 1223 | rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); |
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 741e4fa3e474..cafcc49d0993 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c | |||
@@ -275,7 +275,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, | |||
275 | fib_res_put(&res); | 275 | fib_res_put(&res); |
276 | if (no_addr) | 276 | if (no_addr) |
277 | goto last_resort; | 277 | goto last_resort; |
278 | if (rpf) | 278 | if (rpf == 1) |
279 | goto e_inval; | 279 | goto e_inval; |
280 | fl.oif = dev->ifindex; | 280 | fl.oif = dev->ifindex; |
281 | 281 | ||
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4817dea3bc73..f831df500907 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c | |||
@@ -322,8 +322,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, | |||
322 | kfree_skb(skb); | 322 | kfree_skb(skb); |
323 | goto errout; | 323 | goto errout; |
324 | } | 324 | } |
325 | err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, | 325 | rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, |
326 | info->nlh, GFP_KERNEL); | 326 | info->nlh, GFP_KERNEL); |
327 | return; | ||
327 | errout: | 328 | errout: |
328 | if (err < 0) | 329 | if (err < 0) |
329 | rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); | 330 | rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); |
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 382800a62b31..3f50807237e0 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c | |||
@@ -1207,7 +1207,7 @@ static struct pernet_operations __net_initdata icmp_sk_ops = { | |||
1207 | 1207 | ||
1208 | int __init icmp_init(void) | 1208 | int __init icmp_init(void) |
1209 | { | 1209 | { |
1210 | return register_pernet_device(&icmp_sk_ops); | 1210 | return register_pernet_subsys(&icmp_sk_ops); |
1211 | } | 1211 | } |
1212 | 1212 | ||
1213 | EXPORT_SYMBOL(icmp_err_convert); | 1213 | EXPORT_SYMBOL(icmp_err_convert); |
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 6c52e08f786e..eaf3e2c8646a 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c | |||
@@ -267,6 +267,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, | |||
267 | 267 | ||
268 | struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, | 268 | struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, |
269 | struct inet_frags *f, void *key, unsigned int hash) | 269 | struct inet_frags *f, void *key, unsigned int hash) |
270 | __releases(&f->lock) | ||
270 | { | 271 | { |
271 | struct inet_frag_queue *q; | 272 | struct inet_frag_queue *q; |
272 | struct hlist_node *n; | 273 | struct hlist_node *n; |
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 6659ac000eeb..7985346653bd 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c | |||
@@ -463,6 +463,7 @@ err: | |||
463 | static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | 463 | static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, |
464 | struct net_device *dev) | 464 | struct net_device *dev) |
465 | { | 465 | { |
466 | struct net *net = container_of(qp->q.net, struct net, ipv4.frags); | ||
466 | struct iphdr *iph; | 467 | struct iphdr *iph; |
467 | struct sk_buff *fp, *head = qp->q.fragments; | 468 | struct sk_buff *fp, *head = qp->q.fragments; |
468 | int len; | 469 | int len; |
@@ -548,7 +549,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, | |||
548 | iph = ip_hdr(head); | 549 | iph = ip_hdr(head); |
549 | iph->frag_off = 0; | 550 | iph->frag_off = 0; |
550 | iph->tot_len = htons(len); | 551 | iph->tot_len = htons(len); |
551 | IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMOKS); | 552 | IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); |
552 | qp->q.fragments = NULL; | 553 | qp->q.fragments = NULL; |
553 | return 0; | 554 | return 0; |
554 | 555 | ||
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 07a188afb3ac..e62510d5ea5a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c | |||
@@ -491,7 +491,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) | |||
491 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) | 491 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) |
492 | goto out; | 492 | goto out; |
493 | 493 | ||
494 | if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) | 494 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) |
495 | t->err_count++; | 495 | t->err_count++; |
496 | else | 496 | else |
497 | t->err_count = 1; | 497 | t->err_count = 1; |
@@ -803,7 +803,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
803 | #endif | 803 | #endif |
804 | 804 | ||
805 | if (tunnel->err_count > 0) { | 805 | if (tunnel->err_count > 0) { |
806 | if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { | 806 | if (time_before(jiffies, |
807 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { | ||
807 | tunnel->err_count--; | 808 | tunnel->err_count--; |
808 | 809 | ||
809 | dst_link_failure(skb); | 810 | dst_link_failure(skb); |
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 5079dfbc6f38..9054139795af 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c | |||
@@ -327,7 +327,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) | |||
327 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) | 327 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) |
328 | goto out; | 328 | goto out; |
329 | 329 | ||
330 | if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) | 330 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) |
331 | t->err_count++; | 331 | t->err_count++; |
332 | else | 332 | else |
333 | t->err_count = 1; | 333 | t->err_count = 1; |
@@ -466,7 +466,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
466 | } | 466 | } |
467 | 467 | ||
468 | if (tunnel->err_count > 0) { | 468 | if (tunnel->err_count > 0) { |
469 | if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { | 469 | if (time_before(jiffies, |
470 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { | ||
470 | tunnel->err_count--; | 471 | tunnel->err_count--; |
471 | dst_link_failure(skb); | 472 | dst_link_failure(skb); |
472 | } else | 473 | } else |
@@ -750,7 +751,7 @@ static struct xfrm_tunnel ipip_handler = { | |||
750 | .priority = 1, | 751 | .priority = 1, |
751 | }; | 752 | }; |
752 | 753 | ||
753 | static char banner[] __initdata = | 754 | static const char banner[] __initconst = |
754 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; | 755 | KERN_INFO "IPv4 over IPv4 tunneling driver\n"; |
755 | 756 | ||
756 | static void ipip_destroy_tunnels(struct ipip_net *ipn) | 757 | static void ipip_destroy_tunnels(struct ipip_net *ipn) |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 90b2f3c192ff..2451aeb5ac23 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -661,6 +661,47 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) | |||
661 | return NULL; | 661 | return NULL; |
662 | } | 662 | } |
663 | 663 | ||
664 | static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, | ||
665 | int large_allowed) | ||
666 | { | ||
667 | struct tcp_sock *tp = tcp_sk(sk); | ||
668 | u32 xmit_size_goal, old_size_goal; | ||
669 | |||
670 | xmit_size_goal = mss_now; | ||
671 | |||
672 | if (large_allowed && sk_can_gso(sk)) { | ||
673 | xmit_size_goal = ((sk->sk_gso_max_size - 1) - | ||
674 | inet_csk(sk)->icsk_af_ops->net_header_len - | ||
675 | inet_csk(sk)->icsk_ext_hdr_len - | ||
676 | tp->tcp_header_len); | ||
677 | |||
678 | xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); | ||
679 | |||
680 | /* We try hard to avoid divides here */ | ||
681 | old_size_goal = tp->xmit_size_goal_segs * mss_now; | ||
682 | |||
683 | if (likely(old_size_goal <= xmit_size_goal && | ||
684 | old_size_goal + mss_now > xmit_size_goal)) { | ||
685 | xmit_size_goal = old_size_goal; | ||
686 | } else { | ||
687 | tp->xmit_size_goal_segs = xmit_size_goal / mss_now; | ||
688 | xmit_size_goal = tp->xmit_size_goal_segs * mss_now; | ||
689 | } | ||
690 | } | ||
691 | |||
692 | return max(xmit_size_goal, mss_now); | ||
693 | } | ||
694 | |||
695 | static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) | ||
696 | { | ||
697 | int mss_now; | ||
698 | |||
699 | mss_now = tcp_current_mss(sk); | ||
700 | *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); | ||
701 | |||
702 | return mss_now; | ||
703 | } | ||
704 | |||
664 | static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, | 705 | static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, |
665 | size_t psize, int flags) | 706 | size_t psize, int flags) |
666 | { | 707 | { |
@@ -677,13 +718,12 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse | |||
677 | 718 | ||
678 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 719 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
679 | 720 | ||
680 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); | 721 | mss_now = tcp_send_mss(sk, &size_goal, flags); |
681 | size_goal = tp->xmit_size_goal; | ||
682 | copied = 0; | 722 | copied = 0; |
683 | 723 | ||
684 | err = -EPIPE; | 724 | err = -EPIPE; |
685 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 725 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
686 | goto do_error; | 726 | goto out_err; |
687 | 727 | ||
688 | while (psize > 0) { | 728 | while (psize > 0) { |
689 | struct sk_buff *skb = tcp_write_queue_tail(sk); | 729 | struct sk_buff *skb = tcp_write_queue_tail(sk); |
@@ -761,8 +801,7 @@ wait_for_memory: | |||
761 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | 801 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) |
762 | goto do_error; | 802 | goto do_error; |
763 | 803 | ||
764 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); | 804 | mss_now = tcp_send_mss(sk, &size_goal, flags); |
765 | size_goal = tp->xmit_size_goal; | ||
766 | } | 805 | } |
767 | 806 | ||
768 | out: | 807 | out: |
@@ -844,8 +883,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
844 | /* This should be in poll */ | 883 | /* This should be in poll */ |
845 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); | 884 | clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); |
846 | 885 | ||
847 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); | 886 | mss_now = tcp_send_mss(sk, &size_goal, flags); |
848 | size_goal = tp->xmit_size_goal; | ||
849 | 887 | ||
850 | /* Ok commence sending. */ | 888 | /* Ok commence sending. */ |
851 | iovlen = msg->msg_iovlen; | 889 | iovlen = msg->msg_iovlen; |
@@ -854,7 +892,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | |||
854 | 892 | ||
855 | err = -EPIPE; | 893 | err = -EPIPE; |
856 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) | 894 | if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) |
857 | goto do_error; | 895 | goto out_err; |
858 | 896 | ||
859 | while (--iovlen >= 0) { | 897 | while (--iovlen >= 0) { |
860 | int seglen = iov->iov_len; | 898 | int seglen = iov->iov_len; |
@@ -1007,8 +1045,7 @@ wait_for_memory: | |||
1007 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) | 1045 | if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) |
1008 | goto do_error; | 1046 | goto do_error; |
1009 | 1047 | ||
1010 | mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); | 1048 | mss_now = tcp_send_mss(sk, &size_goal, flags); |
1011 | size_goal = tp->xmit_size_goal; | ||
1012 | } | 1049 | } |
1013 | } | 1050 | } |
1014 | 1051 | ||
@@ -1045,8 +1082,7 @@ out_err: | |||
1045 | */ | 1082 | */ |
1046 | 1083 | ||
1047 | static int tcp_recv_urg(struct sock *sk, long timeo, | 1084 | static int tcp_recv_urg(struct sock *sk, long timeo, |
1048 | struct msghdr *msg, int len, int flags, | 1085 | struct msghdr *msg, int len, int flags) |
1049 | int *addr_len) | ||
1050 | { | 1086 | { |
1051 | struct tcp_sock *tp = tcp_sk(sk); | 1087 | struct tcp_sock *tp = tcp_sk(sk); |
1052 | 1088 | ||
@@ -1661,7 +1697,7 @@ out: | |||
1661 | return err; | 1697 | return err; |
1662 | 1698 | ||
1663 | recv_urg: | 1699 | recv_urg: |
1664 | err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); | 1700 | err = tcp_recv_urg(sk, timeo, msg, len, flags); |
1665 | goto out; | 1701 | goto out; |
1666 | } | 1702 | } |
1667 | 1703 | ||
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 7eb7636db0d0..3b53fd1af23f 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c | |||
@@ -149,16 +149,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
149 | tcp_slow_start(tp); | 149 | tcp_slow_start(tp); |
150 | else { | 150 | else { |
151 | bictcp_update(ca, tp->snd_cwnd); | 151 | bictcp_update(ca, tp->snd_cwnd); |
152 | 152 | tcp_cong_avoid_ai(tp, ca->cnt); | |
153 | /* In dangerous area, increase slowly. | ||
154 | * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd | ||
155 | */ | ||
156 | if (tp->snd_cwnd_cnt >= ca->cnt) { | ||
157 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
158 | tp->snd_cwnd++; | ||
159 | tp->snd_cwnd_cnt = 0; | ||
160 | } else | ||
161 | tp->snd_cwnd_cnt++; | ||
162 | } | 153 | } |
163 | 154 | ||
164 | } | 155 | } |
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4ec5b4e97c4e..e92beb9e55e0 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c | |||
@@ -336,6 +336,19 @@ void tcp_slow_start(struct tcp_sock *tp) | |||
336 | } | 336 | } |
337 | EXPORT_SYMBOL_GPL(tcp_slow_start); | 337 | EXPORT_SYMBOL_GPL(tcp_slow_start); |
338 | 338 | ||
339 | /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */ | ||
340 | void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w) | ||
341 | { | ||
342 | if (tp->snd_cwnd_cnt >= w) { | ||
343 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
344 | tp->snd_cwnd++; | ||
345 | tp->snd_cwnd_cnt = 0; | ||
346 | } else { | ||
347 | tp->snd_cwnd_cnt++; | ||
348 | } | ||
349 | } | ||
350 | EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); | ||
351 | |||
339 | /* | 352 | /* |
340 | * TCP Reno congestion control | 353 | * TCP Reno congestion control |
341 | * This is special case used for fallback as well. | 354 | * This is special case used for fallback as well. |
@@ -365,13 +378,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
365 | tp->snd_cwnd++; | 378 | tp->snd_cwnd++; |
366 | } | 379 | } |
367 | } else { | 380 | } else { |
368 | /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ | 381 | tcp_cong_avoid_ai(tp, tp->snd_cwnd); |
369 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | ||
370 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
371 | tp->snd_cwnd++; | ||
372 | tp->snd_cwnd_cnt = 0; | ||
373 | } else | ||
374 | tp->snd_cwnd_cnt++; | ||
375 | } | 382 | } |
376 | } | 383 | } |
377 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); | 384 | EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); |
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index ee467ec40c4f..71d5f2f29fa6 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -294,16 +294,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
294 | tcp_slow_start(tp); | 294 | tcp_slow_start(tp); |
295 | } else { | 295 | } else { |
296 | bictcp_update(ca, tp->snd_cwnd); | 296 | bictcp_update(ca, tp->snd_cwnd); |
297 | 297 | tcp_cong_avoid_ai(tp, ca->cnt); | |
298 | /* In dangerous area, increase slowly. | ||
299 | * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd | ||
300 | */ | ||
301 | if (tp->snd_cwnd_cnt >= ca->cnt) { | ||
302 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
303 | tp->snd_cwnd++; | ||
304 | tp->snd_cwnd_cnt = 0; | ||
305 | } else | ||
306 | tp->snd_cwnd_cnt++; | ||
307 | } | 298 | } |
308 | 299 | ||
309 | } | 300 | } |
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 937549b8a921..26d5c7fc7de5 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c | |||
@@ -115,8 +115,7 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt | |||
115 | return; | 115 | return; |
116 | 116 | ||
117 | /* achieved throughput calculations */ | 117 | /* achieved throughput calculations */ |
118 | if (icsk->icsk_ca_state != TCP_CA_Open && | 118 | if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_Disorder))) { |
119 | icsk->icsk_ca_state != TCP_CA_Disorder) { | ||
120 | ca->packetcount = 0; | 119 | ca->packetcount = 0; |
121 | ca->lasttime = now; | 120 | ca->lasttime = now; |
122 | return; | 121 | return; |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a6961d75c7ea..2bc8e27a163d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -64,6 +64,7 @@ | |||
64 | #include <linux/mm.h> | 64 | #include <linux/mm.h> |
65 | #include <linux/module.h> | 65 | #include <linux/module.h> |
66 | #include <linux/sysctl.h> | 66 | #include <linux/sysctl.h> |
67 | #include <linux/kernel.h> | ||
67 | #include <net/dst.h> | 68 | #include <net/dst.h> |
68 | #include <net/tcp.h> | 69 | #include <net/tcp.h> |
69 | #include <net/inet_common.h> | 70 | #include <net/inet_common.h> |
@@ -1178,10 +1179,18 @@ static void tcp_mark_lost_retrans(struct sock *sk) | |||
1178 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) | 1179 | if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) |
1179 | continue; | 1180 | continue; |
1180 | 1181 | ||
1181 | if (after(received_upto, ack_seq) && | 1182 | /* TODO: We would like to get rid of tcp_is_fack(tp) only |
1182 | (tcp_is_fack(tp) || | 1183 | * constraint here (see above) but figuring out that at |
1183 | !before(received_upto, | 1184 | * least tp->reordering SACK blocks reside between ack_seq |
1184 | ack_seq + tp->reordering * tp->mss_cache))) { | 1185 | * and received_upto is not easy task to do cheaply with |
1186 | * the available datastructures. | ||
1187 | * | ||
1188 | * Whether FACK should check here for tp->reordering segs | ||
1189 | * in-between one could argue for either way (it would be | ||
1190 | * rather simple to implement as we could count fack_count | ||
1191 | * during the walk and do tp->fackets_out - fack_count). | ||
1192 | */ | ||
1193 | if (after(received_upto, ack_seq)) { | ||
1185 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; | 1194 | TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; |
1186 | tp->retrans_out -= tcp_skb_pcount(skb); | 1195 | tp->retrans_out -= tcp_skb_pcount(skb); |
1187 | 1196 | ||
@@ -1374,7 +1383,8 @@ static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, | |||
1374 | 1383 | ||
1375 | static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | 1384 | static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, |
1376 | struct tcp_sacktag_state *state, | 1385 | struct tcp_sacktag_state *state, |
1377 | unsigned int pcount, int shifted, int mss) | 1386 | unsigned int pcount, int shifted, int mss, |
1387 | int dup_sack) | ||
1378 | { | 1388 | { |
1379 | struct tcp_sock *tp = tcp_sk(sk); | 1389 | struct tcp_sock *tp = tcp_sk(sk); |
1380 | struct sk_buff *prev = tcp_write_queue_prev(sk, skb); | 1390 | struct sk_buff *prev = tcp_write_queue_prev(sk, skb); |
@@ -1410,7 +1420,7 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, | |||
1410 | } | 1420 | } |
1411 | 1421 | ||
1412 | /* We discard results */ | 1422 | /* We discard results */ |
1413 | tcp_sacktag_one(skb, sk, state, 0, pcount); | 1423 | tcp_sacktag_one(skb, sk, state, dup_sack, pcount); |
1414 | 1424 | ||
1415 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ | 1425 | /* Difference in this won't matter, both ACKed by the same cumul. ACK */ |
1416 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); | 1426 | TCP_SKB_CB(prev)->sacked |= (TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS); |
@@ -1561,7 +1571,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, | |||
1561 | 1571 | ||
1562 | if (!skb_shift(prev, skb, len)) | 1572 | if (!skb_shift(prev, skb, len)) |
1563 | goto fallback; | 1573 | goto fallback; |
1564 | if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss)) | 1574 | if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) |
1565 | goto out; | 1575 | goto out; |
1566 | 1576 | ||
1567 | /* Hole filled allows collapsing with the next as well, this is very | 1577 | /* Hole filled allows collapsing with the next as well, this is very |
@@ -1580,7 +1590,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, | |||
1580 | len = skb->len; | 1590 | len = skb->len; |
1581 | if (skb_shift(prev, skb, len)) { | 1591 | if (skb_shift(prev, skb, len)) { |
1582 | pcount += tcp_skb_pcount(skb); | 1592 | pcount += tcp_skb_pcount(skb); |
1583 | tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss); | 1593 | tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0); |
1584 | } | 1594 | } |
1585 | 1595 | ||
1586 | out: | 1596 | out: |
@@ -1793,11 +1803,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, | |||
1793 | for (i = used_sacks - 1; i > 0; i--) { | 1803 | for (i = used_sacks - 1; i > 0; i--) { |
1794 | for (j = 0; j < i; j++) { | 1804 | for (j = 0; j < i; j++) { |
1795 | if (after(sp[j].start_seq, sp[j + 1].start_seq)) { | 1805 | if (after(sp[j].start_seq, sp[j + 1].start_seq)) { |
1796 | struct tcp_sack_block tmp; | 1806 | swap(sp[j], sp[j + 1]); |
1797 | |||
1798 | tmp = sp[j]; | ||
1799 | sp[j] = sp[j + 1]; | ||
1800 | sp[j + 1] = tmp; | ||
1801 | 1807 | ||
1802 | /* Track where the first SACK block goes to */ | 1808 | /* Track where the first SACK block goes to */ |
1803 | if (j == first_sack_index) | 1809 | if (j == first_sack_index) |
@@ -2452,6 +2458,44 @@ static int tcp_time_to_recover(struct sock *sk) | |||
2452 | return 0; | 2458 | return 0; |
2453 | } | 2459 | } |
2454 | 2460 | ||
2461 | /* New heuristics: it is possible only after we switched to restart timer | ||
2462 | * each time when something is ACKed. Hence, we can detect timed out packets | ||
2463 | * during fast retransmit without falling to slow start. | ||
2464 | * | ||
2465 | * Usefulness of this as is very questionable, since we should know which of | ||
2466 | * the segments is the next to timeout which is relatively expensive to find | ||
2467 | * in general case unless we add some data structure just for that. The | ||
2468 | * current approach certainly won't find the right one too often and when it | ||
2469 | * finally does find _something_ it usually marks large part of the window | ||
2470 | * right away (because a retransmission with a larger timestamp blocks the | ||
2471 | * loop from advancing). -ij | ||
2472 | */ | ||
2473 | static void tcp_timeout_skbs(struct sock *sk) | ||
2474 | { | ||
2475 | struct tcp_sock *tp = tcp_sk(sk); | ||
2476 | struct sk_buff *skb; | ||
2477 | |||
2478 | if (!tcp_is_fack(tp) || !tcp_head_timedout(sk)) | ||
2479 | return; | ||
2480 | |||
2481 | skb = tp->scoreboard_skb_hint; | ||
2482 | if (tp->scoreboard_skb_hint == NULL) | ||
2483 | skb = tcp_write_queue_head(sk); | ||
2484 | |||
2485 | tcp_for_write_queue_from(skb, sk) { | ||
2486 | if (skb == tcp_send_head(sk)) | ||
2487 | break; | ||
2488 | if (!tcp_skb_timedout(sk, skb)) | ||
2489 | break; | ||
2490 | |||
2491 | tcp_skb_mark_lost(tp, skb); | ||
2492 | } | ||
2493 | |||
2494 | tp->scoreboard_skb_hint = skb; | ||
2495 | |||
2496 | tcp_verify_left_out(tp); | ||
2497 | } | ||
2498 | |||
2455 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is | 2499 | /* Mark head of queue up as lost. With RFC3517 SACK, the packets is |
2456 | * is against sacked "cnt", otherwise it's against facked "cnt" | 2500 | * is against sacked "cnt", otherwise it's against facked "cnt" |
2457 | */ | 2501 | */ |
@@ -2524,30 +2568,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) | |||
2524 | tcp_mark_head_lost(sk, sacked_upto); | 2568 | tcp_mark_head_lost(sk, sacked_upto); |
2525 | } | 2569 | } |
2526 | 2570 | ||
2527 | /* New heuristics: it is possible only after we switched | 2571 | tcp_timeout_skbs(sk); |
2528 | * to restart timer each time when something is ACKed. | ||
2529 | * Hence, we can detect timed out packets during fast | ||
2530 | * retransmit without falling to slow start. | ||
2531 | */ | ||
2532 | if (tcp_is_fack(tp) && tcp_head_timedout(sk)) { | ||
2533 | struct sk_buff *skb; | ||
2534 | |||
2535 | skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint | ||
2536 | : tcp_write_queue_head(sk); | ||
2537 | |||
2538 | tcp_for_write_queue_from(skb, sk) { | ||
2539 | if (skb == tcp_send_head(sk)) | ||
2540 | break; | ||
2541 | if (!tcp_skb_timedout(sk, skb)) | ||
2542 | break; | ||
2543 | |||
2544 | tcp_skb_mark_lost(tp, skb); | ||
2545 | } | ||
2546 | |||
2547 | tp->scoreboard_skb_hint = skb; | ||
2548 | |||
2549 | tcp_verify_left_out(tp); | ||
2550 | } | ||
2551 | } | 2572 | } |
2552 | 2573 | ||
2553 | /* CWND moderation, preventing bursts due to too big ACKs | 2574 | /* CWND moderation, preventing bursts due to too big ACKs |
@@ -2812,7 +2833,7 @@ static void tcp_mtup_probe_failed(struct sock *sk) | |||
2812 | icsk->icsk_mtup.probe_size = 0; | 2833 | icsk->icsk_mtup.probe_size = 0; |
2813 | } | 2834 | } |
2814 | 2835 | ||
2815 | static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) | 2836 | static void tcp_mtup_probe_success(struct sock *sk) |
2816 | { | 2837 | { |
2817 | struct tcp_sock *tp = tcp_sk(sk); | 2838 | struct tcp_sock *tp = tcp_sk(sk); |
2818 | struct inet_connection_sock *icsk = inet_csk(sk); | 2839 | struct inet_connection_sock *icsk = inet_csk(sk); |
@@ -2840,7 +2861,7 @@ void tcp_simple_retransmit(struct sock *sk) | |||
2840 | const struct inet_connection_sock *icsk = inet_csk(sk); | 2861 | const struct inet_connection_sock *icsk = inet_csk(sk); |
2841 | struct tcp_sock *tp = tcp_sk(sk); | 2862 | struct tcp_sock *tp = tcp_sk(sk); |
2842 | struct sk_buff *skb; | 2863 | struct sk_buff *skb; |
2843 | unsigned int mss = tcp_current_mss(sk, 0); | 2864 | unsigned int mss = tcp_current_mss(sk); |
2844 | u32 prior_lost = tp->lost_out; | 2865 | u32 prior_lost = tp->lost_out; |
2845 | 2866 | ||
2846 | tcp_for_write_queue(skb, sk) { | 2867 | tcp_for_write_queue(skb, sk) { |
@@ -3177,7 +3198,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3177 | 3198 | ||
3178 | while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { | 3199 | while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { |
3179 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); | 3200 | struct tcp_skb_cb *scb = TCP_SKB_CB(skb); |
3180 | u32 end_seq; | ||
3181 | u32 acked_pcount; | 3201 | u32 acked_pcount; |
3182 | u8 sacked = scb->sacked; | 3202 | u8 sacked = scb->sacked; |
3183 | 3203 | ||
@@ -3192,16 +3212,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3192 | break; | 3212 | break; |
3193 | 3213 | ||
3194 | fully_acked = 0; | 3214 | fully_acked = 0; |
3195 | end_seq = tp->snd_una; | ||
3196 | } else { | 3215 | } else { |
3197 | acked_pcount = tcp_skb_pcount(skb); | 3216 | acked_pcount = tcp_skb_pcount(skb); |
3198 | end_seq = scb->end_seq; | ||
3199 | } | ||
3200 | |||
3201 | /* MTU probing checks */ | ||
3202 | if (fully_acked && icsk->icsk_mtup.probe_size && | ||
3203 | !after(tp->mtu_probe.probe_seq_end, scb->end_seq)) { | ||
3204 | tcp_mtup_probe_success(sk, skb); | ||
3205 | } | 3217 | } |
3206 | 3218 | ||
3207 | if (sacked & TCPCB_RETRANS) { | 3219 | if (sacked & TCPCB_RETRANS) { |
@@ -3266,24 +3278,26 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, | |||
3266 | const struct tcp_congestion_ops *ca_ops | 3278 | const struct tcp_congestion_ops *ca_ops |
3267 | = inet_csk(sk)->icsk_ca_ops; | 3279 | = inet_csk(sk)->icsk_ca_ops; |
3268 | 3280 | ||
3281 | if (unlikely(icsk->icsk_mtup.probe_size && | ||
3282 | !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { | ||
3283 | tcp_mtup_probe_success(sk); | ||
3284 | } | ||
3285 | |||
3269 | tcp_ack_update_rtt(sk, flag, seq_rtt); | 3286 | tcp_ack_update_rtt(sk, flag, seq_rtt); |
3270 | tcp_rearm_rto(sk); | 3287 | tcp_rearm_rto(sk); |
3271 | 3288 | ||
3272 | if (tcp_is_reno(tp)) { | 3289 | if (tcp_is_reno(tp)) { |
3273 | tcp_remove_reno_sacks(sk, pkts_acked); | 3290 | tcp_remove_reno_sacks(sk, pkts_acked); |
3274 | } else { | 3291 | } else { |
3292 | int delta; | ||
3293 | |||
3275 | /* Non-retransmitted hole got filled? That's reordering */ | 3294 | /* Non-retransmitted hole got filled? That's reordering */ |
3276 | if (reord < prior_fackets) | 3295 | if (reord < prior_fackets) |
3277 | tcp_update_reordering(sk, tp->fackets_out - reord, 0); | 3296 | tcp_update_reordering(sk, tp->fackets_out - reord, 0); |
3278 | 3297 | ||
3279 | /* No need to care for underflows here because | 3298 | delta = tcp_is_fack(tp) ? pkts_acked : |
3280 | * the lost_skb_hint gets NULLed if we're past it | 3299 | prior_sacked - tp->sacked_out; |
3281 | * (or something non-trivial happened) | 3300 | tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); |
3282 | */ | ||
3283 | if (tcp_is_fack(tp)) | ||
3284 | tp->lost_cnt_hint -= pkts_acked; | ||
3285 | else | ||
3286 | tp->lost_cnt_hint -= prior_sacked - tp->sacked_out; | ||
3287 | } | 3301 | } |
3288 | 3302 | ||
3289 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); | 3303 | tp->fackets_out -= min(pkts_acked, tp->fackets_out); |
@@ -3395,7 +3409,7 @@ static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack, | |||
3395 | 3409 | ||
3396 | if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { | 3410 | if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { |
3397 | flag |= FLAG_WIN_UPDATE; | 3411 | flag |= FLAG_WIN_UPDATE; |
3398 | tcp_update_wl(tp, ack, ack_seq); | 3412 | tcp_update_wl(tp, ack_seq); |
3399 | 3413 | ||
3400 | if (tp->snd_wnd != nwin) { | 3414 | if (tp->snd_wnd != nwin) { |
3401 | tp->snd_wnd = nwin; | 3415 | tp->snd_wnd = nwin; |
@@ -3571,15 +3585,18 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
3571 | int prior_packets; | 3585 | int prior_packets; |
3572 | int frto_cwnd = 0; | 3586 | int frto_cwnd = 0; |
3573 | 3587 | ||
3574 | /* If the ack is newer than sent or older than previous acks | 3588 | /* If the ack is older than previous acks |
3575 | * then we can probably ignore it. | 3589 | * then we can probably ignore it. |
3576 | */ | 3590 | */ |
3577 | if (after(ack, tp->snd_nxt)) | ||
3578 | goto uninteresting_ack; | ||
3579 | |||
3580 | if (before(ack, prior_snd_una)) | 3591 | if (before(ack, prior_snd_una)) |
3581 | goto old_ack; | 3592 | goto old_ack; |
3582 | 3593 | ||
3594 | /* If the ack includes data we haven't sent yet, discard | ||
3595 | * this segment (RFC793 Section 3.9). | ||
3596 | */ | ||
3597 | if (after(ack, tp->snd_nxt)) | ||
3598 | goto invalid_ack; | ||
3599 | |||
3583 | if (after(ack, prior_snd_una)) | 3600 | if (after(ack, prior_snd_una)) |
3584 | flag |= FLAG_SND_UNA_ADVANCED; | 3601 | flag |= FLAG_SND_UNA_ADVANCED; |
3585 | 3602 | ||
@@ -3600,7 +3617,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) | |||
3600 | * No more checks are required. | 3617 | * No more checks are required. |
3601 | * Note, we use the fact that SND.UNA>=SND.WL2. | 3618 | * Note, we use the fact that SND.UNA>=SND.WL2. |
3602 | */ | 3619 | */ |
3603 | tcp_update_wl(tp, ack, ack_seq); | 3620 | tcp_update_wl(tp, ack_seq); |
3604 | tp->snd_una = ack; | 3621 | tp->snd_una = ack; |
3605 | flag |= FLAG_WIN_UPDATE; | 3622 | flag |= FLAG_WIN_UPDATE; |
3606 | 3623 | ||
@@ -3669,6 +3686,10 @@ no_queue: | |||
3669 | tcp_ack_probe(sk); | 3686 | tcp_ack_probe(sk); |
3670 | return 1; | 3687 | return 1; |
3671 | 3688 | ||
3689 | invalid_ack: | ||
3690 | SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt); | ||
3691 | return -1; | ||
3692 | |||
3672 | old_ack: | 3693 | old_ack: |
3673 | if (TCP_SKB_CB(skb)->sacked) { | 3694 | if (TCP_SKB_CB(skb)->sacked) { |
3674 | tcp_sacktag_write_queue(sk, skb, prior_snd_una); | 3695 | tcp_sacktag_write_queue(sk, skb, prior_snd_una); |
@@ -3676,8 +3697,7 @@ old_ack: | |||
3676 | tcp_try_keep_open(sk); | 3697 | tcp_try_keep_open(sk); |
3677 | } | 3698 | } |
3678 | 3699 | ||
3679 | uninteresting_ack: | 3700 | SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); |
3680 | SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); | ||
3681 | return 0; | 3701 | return 0; |
3682 | } | 3702 | } |
3683 | 3703 | ||
@@ -3865,8 +3885,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) | |||
3865 | * Not only, also it occurs for expired timestamps. | 3885 | * Not only, also it occurs for expired timestamps. |
3866 | */ | 3886 | */ |
3867 | 3887 | ||
3868 | if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 || | 3888 | if (tcp_paws_check(&tp->rx_opt, 0)) |
3869 | get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS) | ||
3870 | tcp_store_ts_recent(tp); | 3889 | tcp_store_ts_recent(tp); |
3871 | } | 3890 | } |
3872 | } | 3891 | } |
@@ -3918,9 +3937,9 @@ static inline int tcp_paws_discard(const struct sock *sk, | |||
3918 | const struct sk_buff *skb) | 3937 | const struct sk_buff *skb) |
3919 | { | 3938 | { |
3920 | const struct tcp_sock *tp = tcp_sk(sk); | 3939 | const struct tcp_sock *tp = tcp_sk(sk); |
3921 | return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && | 3940 | |
3922 | get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && | 3941 | return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) && |
3923 | !tcp_disordered_ack(sk, skb)); | 3942 | !tcp_disordered_ack(sk, skb); |
3924 | } | 3943 | } |
3925 | 3944 | ||
3926 | /* Check segment sequence number for validity. | 3945 | /* Check segment sequence number for validity. |
@@ -4078,7 +4097,6 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) | |||
4078 | tp->rx_opt.dsack = 1; | 4097 | tp->rx_opt.dsack = 1; |
4079 | tp->duplicate_sack[0].start_seq = seq; | 4098 | tp->duplicate_sack[0].start_seq = seq; |
4080 | tp->duplicate_sack[0].end_seq = end_seq; | 4099 | tp->duplicate_sack[0].end_seq = end_seq; |
4081 | tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1; | ||
4082 | } | 4100 | } |
4083 | } | 4101 | } |
4084 | 4102 | ||
@@ -4133,8 +4151,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) | |||
4133 | * Decrease num_sacks. | 4151 | * Decrease num_sacks. |
4134 | */ | 4152 | */ |
4135 | tp->rx_opt.num_sacks--; | 4153 | tp->rx_opt.num_sacks--; |
4136 | tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + | ||
4137 | tp->rx_opt.dsack; | ||
4138 | for (i = this_sack; i < tp->rx_opt.num_sacks; i++) | 4154 | for (i = this_sack; i < tp->rx_opt.num_sacks; i++) |
4139 | sp[i] = sp[i + 1]; | 4155 | sp[i] = sp[i + 1]; |
4140 | continue; | 4156 | continue; |
@@ -4143,20 +4159,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) | |||
4143 | } | 4159 | } |
4144 | } | 4160 | } |
4145 | 4161 | ||
4146 | static inline void tcp_sack_swap(struct tcp_sack_block *sack1, | ||
4147 | struct tcp_sack_block *sack2) | ||
4148 | { | ||
4149 | __u32 tmp; | ||
4150 | |||
4151 | tmp = sack1->start_seq; | ||
4152 | sack1->start_seq = sack2->start_seq; | ||
4153 | sack2->start_seq = tmp; | ||
4154 | |||
4155 | tmp = sack1->end_seq; | ||
4156 | sack1->end_seq = sack2->end_seq; | ||
4157 | sack2->end_seq = tmp; | ||
4158 | } | ||
4159 | |||
4160 | static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) | 4162 | static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) |
4161 | { | 4163 | { |
4162 | struct tcp_sock *tp = tcp_sk(sk); | 4164 | struct tcp_sock *tp = tcp_sk(sk); |
@@ -4171,7 +4173,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) | |||
4171 | if (tcp_sack_extend(sp, seq, end_seq)) { | 4173 | if (tcp_sack_extend(sp, seq, end_seq)) { |
4172 | /* Rotate this_sack to the first one. */ | 4174 | /* Rotate this_sack to the first one. */ |
4173 | for (; this_sack > 0; this_sack--, sp--) | 4175 | for (; this_sack > 0; this_sack--, sp--) |
4174 | tcp_sack_swap(sp, sp - 1); | 4176 | swap(*sp, *(sp - 1)); |
4175 | if (cur_sacks > 1) | 4177 | if (cur_sacks > 1) |
4176 | tcp_sack_maybe_coalesce(tp); | 4178 | tcp_sack_maybe_coalesce(tp); |
4177 | return; | 4179 | return; |
@@ -4197,7 +4199,6 @@ new_sack: | |||
4197 | sp->start_seq = seq; | 4199 | sp->start_seq = seq; |
4198 | sp->end_seq = end_seq; | 4200 | sp->end_seq = end_seq; |
4199 | tp->rx_opt.num_sacks++; | 4201 | tp->rx_opt.num_sacks++; |
4200 | tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; | ||
4201 | } | 4202 | } |
4202 | 4203 | ||
4203 | /* RCV.NXT advances, some SACKs should be eaten. */ | 4204 | /* RCV.NXT advances, some SACKs should be eaten. */ |
@@ -4211,7 +4212,6 @@ static void tcp_sack_remove(struct tcp_sock *tp) | |||
4211 | /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ | 4212 | /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ |
4212 | if (skb_queue_empty(&tp->out_of_order_queue)) { | 4213 | if (skb_queue_empty(&tp->out_of_order_queue)) { |
4213 | tp->rx_opt.num_sacks = 0; | 4214 | tp->rx_opt.num_sacks = 0; |
4214 | tp->rx_opt.eff_sacks = tp->rx_opt.dsack; | ||
4215 | return; | 4215 | return; |
4216 | } | 4216 | } |
4217 | 4217 | ||
@@ -4232,11 +4232,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) | |||
4232 | this_sack++; | 4232 | this_sack++; |
4233 | sp++; | 4233 | sp++; |
4234 | } | 4234 | } |
4235 | if (num_sacks != tp->rx_opt.num_sacks) { | 4235 | tp->rx_opt.num_sacks = num_sacks; |
4236 | tp->rx_opt.num_sacks = num_sacks; | ||
4237 | tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + | ||
4238 | tp->rx_opt.dsack; | ||
4239 | } | ||
4240 | } | 4236 | } |
4241 | 4237 | ||
4242 | /* This one checks to see if we can put data from the | 4238 | /* This one checks to see if we can put data from the |
@@ -4312,10 +4308,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) | |||
4312 | 4308 | ||
4313 | TCP_ECN_accept_cwr(tp, skb); | 4309 | TCP_ECN_accept_cwr(tp, skb); |
4314 | 4310 | ||
4315 | if (tp->rx_opt.dsack) { | 4311 | tp->rx_opt.dsack = 0; |
4316 | tp->rx_opt.dsack = 0; | ||
4317 | tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks; | ||
4318 | } | ||
4319 | 4312 | ||
4320 | /* Queue data for delivery to the user. | 4313 | /* Queue data for delivery to the user. |
4321 | * Packets in sequence go to the receive queue. | 4314 | * Packets in sequence go to the receive queue. |
@@ -4434,8 +4427,6 @@ drop: | |||
4434 | /* Initial out of order segment, build 1 SACK. */ | 4427 | /* Initial out of order segment, build 1 SACK. */ |
4435 | if (tcp_is_sack(tp)) { | 4428 | if (tcp_is_sack(tp)) { |
4436 | tp->rx_opt.num_sacks = 1; | 4429 | tp->rx_opt.num_sacks = 1; |
4437 | tp->rx_opt.dsack = 0; | ||
4438 | tp->rx_opt.eff_sacks = 1; | ||
4439 | tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; | 4430 | tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; |
4440 | tp->selective_acks[0].end_seq = | 4431 | tp->selective_acks[0].end_seq = |
4441 | TCP_SKB_CB(skb)->end_seq; | 4432 | TCP_SKB_CB(skb)->end_seq; |
@@ -5156,7 +5147,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, | |||
5156 | */ | 5147 | */ |
5157 | 5148 | ||
5158 | if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && | 5149 | if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && |
5159 | TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { | 5150 | TCP_SKB_CB(skb)->seq == tp->rcv_nxt && |
5151 | !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { | ||
5160 | int tcp_header_len = tp->tcp_header_len; | 5152 | int tcp_header_len = tp->tcp_header_len; |
5161 | 5153 | ||
5162 | /* Timestamp header prediction: tcp_header_len | 5154 | /* Timestamp header prediction: tcp_header_len |
@@ -5309,8 +5301,8 @@ slow_path: | |||
5309 | return -res; | 5301 | return -res; |
5310 | 5302 | ||
5311 | step5: | 5303 | step5: |
5312 | if (th->ack) | 5304 | if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) |
5313 | tcp_ack(sk, skb, FLAG_SLOWPATH); | 5305 | goto discard; |
5314 | 5306 | ||
5315 | tcp_rcv_rtt_measure_ts(sk, skb); | 5307 | tcp_rcv_rtt_measure_ts(sk, skb); |
5316 | 5308 | ||
@@ -5408,7 +5400,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, | |||
5408 | * never scaled. | 5400 | * never scaled. |
5409 | */ | 5401 | */ |
5410 | tp->snd_wnd = ntohs(th->window); | 5402 | tp->snd_wnd = ntohs(th->window); |
5411 | tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq); | 5403 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); |
5412 | 5404 | ||
5413 | if (!tp->rx_opt.wscale_ok) { | 5405 | if (!tp->rx_opt.wscale_ok) { |
5414 | tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; | 5406 | tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; |
@@ -5509,7 +5501,7 @@ discard: | |||
5509 | 5501 | ||
5510 | /* PAWS check. */ | 5502 | /* PAWS check. */ |
5511 | if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && | 5503 | if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && |
5512 | tcp_paws_check(&tp->rx_opt, 0)) | 5504 | tcp_paws_reject(&tp->rx_opt, 0)) |
5513 | goto discard_and_undo; | 5505 | goto discard_and_undo; |
5514 | 5506 | ||
5515 | if (th->syn) { | 5507 | if (th->syn) { |
@@ -5647,7 +5639,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5647 | 5639 | ||
5648 | /* step 5: check the ACK field */ | 5640 | /* step 5: check the ACK field */ |
5649 | if (th->ack) { | 5641 | if (th->ack) { |
5650 | int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH); | 5642 | int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; |
5651 | 5643 | ||
5652 | switch (sk->sk_state) { | 5644 | switch (sk->sk_state) { |
5653 | case TCP_SYN_RECV: | 5645 | case TCP_SYN_RECV: |
@@ -5669,8 +5661,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, | |||
5669 | tp->snd_una = TCP_SKB_CB(skb)->ack_seq; | 5661 | tp->snd_una = TCP_SKB_CB(skb)->ack_seq; |
5670 | tp->snd_wnd = ntohs(th->window) << | 5662 | tp->snd_wnd = ntohs(th->window) << |
5671 | tp->rx_opt.snd_wscale; | 5663 | tp->rx_opt.snd_wscale; |
5672 | tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, | 5664 | tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); |
5673 | TCP_SKB_CB(skb)->seq); | ||
5674 | 5665 | ||
5675 | /* tcp_ack considers this ACK as duplicate | 5666 | /* tcp_ack considers this ACK as duplicate |
5676 | * and does not calculate rtt. | 5667 | * and does not calculate rtt. |
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f6b962f56ab4..d0a314879d81 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c | |||
@@ -1226,15 +1226,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) | |||
1226 | if (want_cookie && !tmp_opt.saw_tstamp) | 1226 | if (want_cookie && !tmp_opt.saw_tstamp) |
1227 | tcp_clear_options(&tmp_opt); | 1227 | tcp_clear_options(&tmp_opt); |
1228 | 1228 | ||
1229 | if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) { | ||
1230 | /* Some OSes (unknown ones, but I see them on web server, which | ||
1231 | * contains information interesting only for windows' | ||
1232 | * users) do not send their stamp in SYN. It is easy case. | ||
1233 | * We simply do not advertise TS support. | ||
1234 | */ | ||
1235 | tmp_opt.saw_tstamp = 0; | ||
1236 | tmp_opt.tstamp_ok = 0; | ||
1237 | } | ||
1238 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; | 1229 | tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; |
1239 | 1230 | ||
1240 | tcp_openreq_init(req, &tmp_opt, skb); | 1231 | tcp_openreq_init(req, &tmp_opt, skb); |
@@ -2443,7 +2434,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { | |||
2443 | void __init tcp_v4_init(void) | 2434 | void __init tcp_v4_init(void) |
2444 | { | 2435 | { |
2445 | inet_hashinfo_init(&tcp_hashinfo); | 2436 | inet_hashinfo_init(&tcp_hashinfo); |
2446 | if (register_pernet_device(&tcp_sk_ops)) | 2437 | if (register_pernet_subsys(&tcp_sk_ops)) |
2447 | panic("Failed to create the TCP control socket.\n"); | 2438 | panic("Failed to create the TCP control socket.\n"); |
2448 | } | 2439 | } |
2449 | 2440 | ||
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f67effbb102b..43bbba7926ee 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c | |||
@@ -107,7 +107,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, | |||
107 | if (tmp_opt.saw_tstamp) { | 107 | if (tmp_opt.saw_tstamp) { |
108 | tmp_opt.ts_recent = tcptw->tw_ts_recent; | 108 | tmp_opt.ts_recent = tcptw->tw_ts_recent; |
109 | tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; | 109 | tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; |
110 | paws_reject = tcp_paws_check(&tmp_opt, th->rst); | 110 | paws_reject = tcp_paws_reject(&tmp_opt, th->rst); |
111 | } | 111 | } |
112 | } | 112 | } |
113 | 113 | ||
@@ -399,7 +399,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
399 | 399 | ||
400 | tcp_prequeue_init(newtp); | 400 | tcp_prequeue_init(newtp); |
401 | 401 | ||
402 | tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn); | 402 | tcp_init_wl(newtp, treq->rcv_isn); |
403 | 403 | ||
404 | newtp->srtt = 0; | 404 | newtp->srtt = 0; |
405 | newtp->mdev = TCP_TIMEOUT_INIT; | 405 | newtp->mdev = TCP_TIMEOUT_INIT; |
@@ -434,9 +434,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, | |||
434 | newtp->rx_opt.saw_tstamp = 0; | 434 | newtp->rx_opt.saw_tstamp = 0; |
435 | 435 | ||
436 | newtp->rx_opt.dsack = 0; | 436 | newtp->rx_opt.dsack = 0; |
437 | newtp->rx_opt.eff_sacks = 0; | ||
438 | |||
439 | newtp->rx_opt.num_sacks = 0; | 437 | newtp->rx_opt.num_sacks = 0; |
438 | |||
440 | newtp->urg_data = 0; | 439 | newtp->urg_data = 0; |
441 | 440 | ||
442 | if (sock_flag(newsk, SOCK_KEEPOPEN)) | 441 | if (sock_flag(newsk, SOCK_KEEPOPEN)) |
@@ -512,7 +511,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, | |||
512 | * from another data. | 511 | * from another data. |
513 | */ | 512 | */ |
514 | tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); | 513 | tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); |
515 | paws_reject = tcp_paws_check(&tmp_opt, th->rst); | 514 | paws_reject = tcp_paws_reject(&tmp_opt, th->rst); |
516 | } | 515 | } |
517 | } | 516 | } |
518 | 517 | ||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dda42f0bd7a3..c1f259d2d33b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -441,10 +441,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, | |||
441 | *ptr++ = htonl(sp[this_sack].end_seq); | 441 | *ptr++ = htonl(sp[this_sack].end_seq); |
442 | } | 442 | } |
443 | 443 | ||
444 | if (tp->rx_opt.dsack) { | 444 | tp->rx_opt.dsack = 0; |
445 | tp->rx_opt.dsack = 0; | ||
446 | tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks; | ||
447 | } | ||
448 | } | 445 | } |
449 | } | 446 | } |
450 | 447 | ||
@@ -550,6 +547,7 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | |||
550 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; | 547 | struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; |
551 | struct tcp_sock *tp = tcp_sk(sk); | 548 | struct tcp_sock *tp = tcp_sk(sk); |
552 | unsigned size = 0; | 549 | unsigned size = 0; |
550 | unsigned int eff_sacks; | ||
553 | 551 | ||
554 | #ifdef CONFIG_TCP_MD5SIG | 552 | #ifdef CONFIG_TCP_MD5SIG |
555 | *md5 = tp->af_specific->md5_lookup(sk, sk); | 553 | *md5 = tp->af_specific->md5_lookup(sk, sk); |
@@ -568,10 +566,11 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, | |||
568 | size += TCPOLEN_TSTAMP_ALIGNED; | 566 | size += TCPOLEN_TSTAMP_ALIGNED; |
569 | } | 567 | } |
570 | 568 | ||
571 | if (unlikely(tp->rx_opt.eff_sacks)) { | 569 | eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; |
570 | if (unlikely(eff_sacks)) { | ||
572 | const unsigned remaining = MAX_TCP_OPTION_SPACE - size; | 571 | const unsigned remaining = MAX_TCP_OPTION_SPACE - size; |
573 | opts->num_sack_blocks = | 572 | opts->num_sack_blocks = |
574 | min_t(unsigned, tp->rx_opt.eff_sacks, | 573 | min_t(unsigned, eff_sacks, |
575 | (remaining - TCPOLEN_SACK_BASE_ALIGNED) / | 574 | (remaining - TCPOLEN_SACK_BASE_ALIGNED) / |
576 | TCPOLEN_SACK_PERBLOCK); | 575 | TCPOLEN_SACK_PERBLOCK); |
577 | size += TCPOLEN_SACK_BASE_ALIGNED + | 576 | size += TCPOLEN_SACK_BASE_ALIGNED + |
@@ -663,10 +662,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, | |||
663 | th->urg_ptr = 0; | 662 | th->urg_ptr = 0; |
664 | 663 | ||
665 | /* The urg_mode check is necessary during a below snd_una win probe */ | 664 | /* The urg_mode check is necessary during a below snd_una win probe */ |
666 | if (unlikely(tcp_urg_mode(tp) && | 665 | if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) { |
667 | between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { | 666 | if (before(tp->snd_up, tcb->seq + 0x10000)) { |
668 | th->urg_ptr = htons(tp->snd_up - tcb->seq); | 667 | th->urg_ptr = htons(tp->snd_up - tcb->seq); |
669 | th->urg = 1; | 668 | th->urg = 1; |
669 | } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { | ||
670 | th->urg_ptr = 0xFFFF; | ||
671 | th->urg = 1; | ||
672 | } | ||
670 | } | 673 | } |
671 | 674 | ||
672 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); | 675 | tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); |
@@ -763,11 +766,10 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
763 | struct sk_buff *buff; | 766 | struct sk_buff *buff; |
764 | int nsize, old_factor; | 767 | int nsize, old_factor; |
765 | int nlen; | 768 | int nlen; |
766 | u16 flags; | 769 | u8 flags; |
767 | 770 | ||
768 | BUG_ON(len > skb->len); | 771 | BUG_ON(len > skb->len); |
769 | 772 | ||
770 | tcp_clear_retrans_hints_partial(tp); | ||
771 | nsize = skb_headlen(skb) - len; | 773 | nsize = skb_headlen(skb) - len; |
772 | if (nsize < 0) | 774 | if (nsize < 0) |
773 | nsize = 0; | 775 | nsize = 0; |
@@ -850,6 +852,12 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, | |||
850 | tcp_verify_left_out(tp); | 852 | tcp_verify_left_out(tp); |
851 | } | 853 | } |
852 | tcp_adjust_fackets_out(sk, skb, diff); | 854 | tcp_adjust_fackets_out(sk, skb, diff); |
855 | |||
856 | if (tp->lost_skb_hint && | ||
857 | before(TCP_SKB_CB(skb)->seq, | ||
858 | TCP_SKB_CB(tp->lost_skb_hint)->seq) && | ||
859 | (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked)) | ||
860 | tp->lost_cnt_hint -= diff; | ||
853 | } | 861 | } |
854 | 862 | ||
855 | /* Link BUFF into the send queue. */ | 863 | /* Link BUFF into the send queue. */ |
@@ -913,7 +921,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) | |||
913 | * factor and mss. | 921 | * factor and mss. |
914 | */ | 922 | */ |
915 | if (tcp_skb_pcount(skb) > 1) | 923 | if (tcp_skb_pcount(skb) > 1) |
916 | tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1)); | 924 | tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk)); |
917 | 925 | ||
918 | return 0; | 926 | return 0; |
919 | } | 927 | } |
@@ -974,15 +982,6 @@ void tcp_mtup_init(struct sock *sk) | |||
974 | icsk->icsk_mtup.probe_size = 0; | 982 | icsk->icsk_mtup.probe_size = 0; |
975 | } | 983 | } |
976 | 984 | ||
977 | /* Bound MSS / TSO packet size with the half of the window */ | ||
978 | static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) | ||
979 | { | ||
980 | if (tp->max_window && pktsize > (tp->max_window >> 1)) | ||
981 | return max(tp->max_window >> 1, 68U - tp->tcp_header_len); | ||
982 | else | ||
983 | return pktsize; | ||
984 | } | ||
985 | |||
986 | /* This function synchronize snd mss to current pmtu/exthdr set. | 985 | /* This function synchronize snd mss to current pmtu/exthdr set. |
987 | 986 | ||
988 | tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts | 987 | tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts |
@@ -1029,22 +1028,17 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) | |||
1029 | /* Compute the current effective MSS, taking SACKs and IP options, | 1028 | /* Compute the current effective MSS, taking SACKs and IP options, |
1030 | * and even PMTU discovery events into account. | 1029 | * and even PMTU discovery events into account. |
1031 | */ | 1030 | */ |
1032 | unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | 1031 | unsigned int tcp_current_mss(struct sock *sk) |
1033 | { | 1032 | { |
1034 | struct tcp_sock *tp = tcp_sk(sk); | 1033 | struct tcp_sock *tp = tcp_sk(sk); |
1035 | struct dst_entry *dst = __sk_dst_get(sk); | 1034 | struct dst_entry *dst = __sk_dst_get(sk); |
1036 | u32 mss_now; | 1035 | u32 mss_now; |
1037 | u16 xmit_size_goal; | ||
1038 | int doing_tso = 0; | ||
1039 | unsigned header_len; | 1036 | unsigned header_len; |
1040 | struct tcp_out_options opts; | 1037 | struct tcp_out_options opts; |
1041 | struct tcp_md5sig_key *md5; | 1038 | struct tcp_md5sig_key *md5; |
1042 | 1039 | ||
1043 | mss_now = tp->mss_cache; | 1040 | mss_now = tp->mss_cache; |
1044 | 1041 | ||
1045 | if (large_allowed && sk_can_gso(sk)) | ||
1046 | doing_tso = 1; | ||
1047 | |||
1048 | if (dst) { | 1042 | if (dst) { |
1049 | u32 mtu = dst_mtu(dst); | 1043 | u32 mtu = dst_mtu(dst); |
1050 | if (mtu != inet_csk(sk)->icsk_pmtu_cookie) | 1044 | if (mtu != inet_csk(sk)->icsk_pmtu_cookie) |
@@ -1062,19 +1056,6 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) | |||
1062 | mss_now -= delta; | 1056 | mss_now -= delta; |
1063 | } | 1057 | } |
1064 | 1058 | ||
1065 | xmit_size_goal = mss_now; | ||
1066 | |||
1067 | if (doing_tso) { | ||
1068 | xmit_size_goal = ((sk->sk_gso_max_size - 1) - | ||
1069 | inet_csk(sk)->icsk_af_ops->net_header_len - | ||
1070 | inet_csk(sk)->icsk_ext_hdr_len - | ||
1071 | tp->tcp_header_len); | ||
1072 | |||
1073 | xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); | ||
1074 | xmit_size_goal -= (xmit_size_goal % mss_now); | ||
1075 | } | ||
1076 | tp->xmit_size_goal = xmit_size_goal; | ||
1077 | |||
1078 | return mss_now; | 1059 | return mss_now; |
1079 | } | 1060 | } |
1080 | 1061 | ||
@@ -1256,7 +1237,7 @@ int tcp_may_send_now(struct sock *sk) | |||
1256 | struct sk_buff *skb = tcp_send_head(sk); | 1237 | struct sk_buff *skb = tcp_send_head(sk); |
1257 | 1238 | ||
1258 | return (skb && | 1239 | return (skb && |
1259 | tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), | 1240 | tcp_snd_test(sk, skb, tcp_current_mss(sk), |
1260 | (tcp_skb_is_last(sk, skb) ? | 1241 | (tcp_skb_is_last(sk, skb) ? |
1261 | tp->nonagle : TCP_NAGLE_PUSH))); | 1242 | tp->nonagle : TCP_NAGLE_PUSH))); |
1262 | } | 1243 | } |
@@ -1273,7 +1254,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, | |||
1273 | { | 1254 | { |
1274 | struct sk_buff *buff; | 1255 | struct sk_buff *buff; |
1275 | int nlen = skb->len - len; | 1256 | int nlen = skb->len - len; |
1276 | u16 flags; | 1257 | u8 flags; |
1277 | 1258 | ||
1278 | /* All of a TSO frame must be composed of paged data. */ | 1259 | /* All of a TSO frame must be composed of paged data. */ |
1279 | if (skb->len != skb->data_len) | 1260 | if (skb->len != skb->data_len) |
@@ -1352,6 +1333,10 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) | |||
1352 | if (limit >= sk->sk_gso_max_size) | 1333 | if (limit >= sk->sk_gso_max_size) |
1353 | goto send_now; | 1334 | goto send_now; |
1354 | 1335 | ||
1336 | /* Middle in queue won't get any more data, full sendable already? */ | ||
1337 | if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) | ||
1338 | goto send_now; | ||
1339 | |||
1355 | if (sysctl_tcp_tso_win_divisor) { | 1340 | if (sysctl_tcp_tso_win_divisor) { |
1356 | u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); | 1341 | u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); |
1357 | 1342 | ||
@@ -1405,11 +1390,11 @@ static int tcp_mtu_probe(struct sock *sk) | |||
1405 | icsk->icsk_mtup.probe_size || | 1390 | icsk->icsk_mtup.probe_size || |
1406 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open || | 1391 | inet_csk(sk)->icsk_ca_state != TCP_CA_Open || |
1407 | tp->snd_cwnd < 11 || | 1392 | tp->snd_cwnd < 11 || |
1408 | tp->rx_opt.eff_sacks) | 1393 | tp->rx_opt.num_sacks || tp->rx_opt.dsack) |
1409 | return -1; | 1394 | return -1; |
1410 | 1395 | ||
1411 | /* Very simple search strategy: just double the MSS. */ | 1396 | /* Very simple search strategy: just double the MSS. */ |
1412 | mss_now = tcp_current_mss(sk, 0); | 1397 | mss_now = tcp_current_mss(sk); |
1413 | probe_size = 2 * tp->mss_cache; | 1398 | probe_size = 2 * tp->mss_cache; |
1414 | size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; | 1399 | size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; |
1415 | if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { | 1400 | if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { |
@@ -1754,11 +1739,9 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | |||
1754 | struct tcp_sock *tp = tcp_sk(sk); | 1739 | struct tcp_sock *tp = tcp_sk(sk); |
1755 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); | 1740 | struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); |
1756 | int skb_size, next_skb_size; | 1741 | int skb_size, next_skb_size; |
1757 | u16 flags; | ||
1758 | 1742 | ||
1759 | skb_size = skb->len; | 1743 | skb_size = skb->len; |
1760 | next_skb_size = next_skb->len; | 1744 | next_skb_size = next_skb->len; |
1761 | flags = TCP_SKB_CB(skb)->flags; | ||
1762 | 1745 | ||
1763 | BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); | 1746 | BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); |
1764 | 1747 | ||
@@ -1778,9 +1761,8 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) | |||
1778 | /* Update sequence range on original skb. */ | 1761 | /* Update sequence range on original skb. */ |
1779 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; | 1762 | TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; |
1780 | 1763 | ||
1781 | /* Merge over control information. */ | 1764 | /* Merge over control information. This moves PSH/FIN etc. over */ |
1782 | flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ | 1765 | TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags; |
1783 | TCP_SKB_CB(skb)->flags = flags; | ||
1784 | 1766 | ||
1785 | /* All done, get rid of second SKB and account for it so | 1767 | /* All done, get rid of second SKB and account for it so |
1786 | * packet counting does not break. | 1768 | * packet counting does not break. |
@@ -1894,7 +1876,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1894 | if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) | 1876 | if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) |
1895 | return -EHOSTUNREACH; /* Routing failure or similar. */ | 1877 | return -EHOSTUNREACH; /* Routing failure or similar. */ |
1896 | 1878 | ||
1897 | cur_mss = tcp_current_mss(sk, 0); | 1879 | cur_mss = tcp_current_mss(sk); |
1898 | 1880 | ||
1899 | /* If receiver has shrunk his window, and skb is out of | 1881 | /* If receiver has shrunk his window, and skb is out of |
1900 | * new window, do not retransmit it. The exception is the | 1882 | * new window, do not retransmit it. The exception is the |
@@ -1908,6 +1890,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) | |||
1908 | if (skb->len > cur_mss) { | 1890 | if (skb->len > cur_mss) { |
1909 | if (tcp_fragment(sk, skb, cur_mss, cur_mss)) | 1891 | if (tcp_fragment(sk, skb, cur_mss, cur_mss)) |
1910 | return -ENOMEM; /* We'll try again later. */ | 1892 | return -ENOMEM; /* We'll try again later. */ |
1893 | } else { | ||
1894 | tcp_init_tso_segs(sk, skb, cur_mss); | ||
1911 | } | 1895 | } |
1912 | 1896 | ||
1913 | tcp_retrans_try_collapse(sk, skb, cur_mss); | 1897 | tcp_retrans_try_collapse(sk, skb, cur_mss); |
@@ -2023,7 +2007,6 @@ void tcp_xmit_retransmit_queue(struct sock *sk) | |||
2023 | last_lost = tp->snd_una; | 2007 | last_lost = tp->snd_una; |
2024 | } | 2008 | } |
2025 | 2009 | ||
2026 | /* First pass: retransmit lost packets. */ | ||
2027 | tcp_for_write_queue_from(skb, sk) { | 2010 | tcp_for_write_queue_from(skb, sk) { |
2028 | __u8 sacked = TCP_SKB_CB(skb)->sacked; | 2011 | __u8 sacked = TCP_SKB_CB(skb)->sacked; |
2029 | 2012 | ||
@@ -2062,7 +2045,7 @@ begin_fwd: | |||
2062 | goto begin_fwd; | 2045 | goto begin_fwd; |
2063 | 2046 | ||
2064 | } else if (!(sacked & TCPCB_LOST)) { | 2047 | } else if (!(sacked & TCPCB_LOST)) { |
2065 | if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS)) | 2048 | if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED))) |
2066 | hole = skb; | 2049 | hole = skb; |
2067 | continue; | 2050 | continue; |
2068 | 2051 | ||
@@ -2101,7 +2084,7 @@ void tcp_send_fin(struct sock *sk) | |||
2101 | * unsent frames. But be careful about outgoing SACKS | 2084 | * unsent frames. But be careful about outgoing SACKS |
2102 | * and IP options. | 2085 | * and IP options. |
2103 | */ | 2086 | */ |
2104 | mss_now = tcp_current_mss(sk, 1); | 2087 | mss_now = tcp_current_mss(sk); |
2105 | 2088 | ||
2106 | if (tcp_send_head(sk) != NULL) { | 2089 | if (tcp_send_head(sk) != NULL) { |
2107 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; | 2090 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; |
@@ -2326,7 +2309,7 @@ static void tcp_connect_init(struct sock *sk) | |||
2326 | sk->sk_err = 0; | 2309 | sk->sk_err = 0; |
2327 | sock_reset_flag(sk, SOCK_DONE); | 2310 | sock_reset_flag(sk, SOCK_DONE); |
2328 | tp->snd_wnd = 0; | 2311 | tp->snd_wnd = 0; |
2329 | tcp_init_wl(tp, tp->write_seq, 0); | 2312 | tcp_init_wl(tp, 0); |
2330 | tp->snd_una = tp->write_seq; | 2313 | tp->snd_una = tp->write_seq; |
2331 | tp->snd_sml = tp->write_seq; | 2314 | tp->snd_sml = tp->write_seq; |
2332 | tp->snd_up = tp->write_seq; | 2315 | tp->snd_up = tp->write_seq; |
@@ -2513,7 +2496,7 @@ int tcp_write_wakeup(struct sock *sk) | |||
2513 | if ((skb = tcp_send_head(sk)) != NULL && | 2496 | if ((skb = tcp_send_head(sk)) != NULL && |
2514 | before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { | 2497 | before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { |
2515 | int err; | 2498 | int err; |
2516 | unsigned int mss = tcp_current_mss(sk, 0); | 2499 | unsigned int mss = tcp_current_mss(sk); |
2517 | unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; | 2500 | unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; |
2518 | 2501 | ||
2519 | if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) | 2502 | if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) |
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 25524d4e372a..59f5b5e7c566 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c | |||
@@ -165,9 +165,10 @@ static int tcpprobe_sprint(char *tbuf, int n) | |||
165 | static ssize_t tcpprobe_read(struct file *file, char __user *buf, | 165 | static ssize_t tcpprobe_read(struct file *file, char __user *buf, |
166 | size_t len, loff_t *ppos) | 166 | size_t len, loff_t *ppos) |
167 | { | 167 | { |
168 | int error = 0, cnt = 0; | 168 | int error = 0; |
169 | size_t cnt = 0; | ||
169 | 170 | ||
170 | if (!buf || len < 0) | 171 | if (!buf) |
171 | return -EINVAL; | 172 | return -EINVAL; |
172 | 173 | ||
173 | while (cnt < len) { | 174 | while (cnt < len) { |
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 2747ec7bfb63..a76513779e2b 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c | |||
@@ -1,6 +1,6 @@ | |||
1 | /* Tom Kelly's Scalable TCP | 1 | /* Tom Kelly's Scalable TCP |
2 | * | 2 | * |
3 | * See htt://www-lce.eng.cam.ac.uk/~ctk21/scalable/ | 3 | * See http://www.deneholme.net/tom/scalable/ |
4 | * | 4 | * |
5 | * John Heffner <jheffner@sc.edu> | 5 | * John Heffner <jheffner@sc.edu> |
6 | */ | 6 | */ |
@@ -24,14 +24,8 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
24 | 24 | ||
25 | if (tp->snd_cwnd <= tp->snd_ssthresh) | 25 | if (tp->snd_cwnd <= tp->snd_ssthresh) |
26 | tcp_slow_start(tp); | 26 | tcp_slow_start(tp); |
27 | else { | 27 | else |
28 | tp->snd_cwnd_cnt++; | 28 | tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)); |
29 | if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){ | ||
30 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
31 | tp->snd_cwnd++; | ||
32 | tp->snd_cwnd_cnt = 0; | ||
33 | } | ||
34 | } | ||
35 | } | 29 | } |
36 | 30 | ||
37 | static u32 tcp_scalable_ssthresh(struct sock *sk) | 31 | static u32 tcp_scalable_ssthresh(struct sock *sk) |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0170e914f1b0..b144a26359bc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -328,19 +328,16 @@ static void tcp_retransmit_timer(struct sock *sk) | |||
328 | if (icsk->icsk_retransmits == 0) { | 328 | if (icsk->icsk_retransmits == 0) { |
329 | int mib_idx; | 329 | int mib_idx; |
330 | 330 | ||
331 | if (icsk->icsk_ca_state == TCP_CA_Disorder || | 331 | if (icsk->icsk_ca_state == TCP_CA_Disorder) { |
332 | icsk->icsk_ca_state == TCP_CA_Recovery) { | 332 | if (tcp_is_sack(tp)) |
333 | if (tcp_is_sack(tp)) { | 333 | mib_idx = LINUX_MIB_TCPSACKFAILURES; |
334 | if (icsk->icsk_ca_state == TCP_CA_Recovery) | 334 | else |
335 | mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; | 335 | mib_idx = LINUX_MIB_TCPRENOFAILURES; |
336 | else | 336 | } else if (icsk->icsk_ca_state == TCP_CA_Recovery) { |
337 | mib_idx = LINUX_MIB_TCPSACKFAILURES; | 337 | if (tcp_is_sack(tp)) |
338 | } else { | 338 | mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; |
339 | if (icsk->icsk_ca_state == TCP_CA_Recovery) | 339 | else |
340 | mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; | 340 | mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; |
341 | else | ||
342 | mib_idx = LINUX_MIB_TCPRENOFAILURES; | ||
343 | } | ||
344 | } else if (icsk->icsk_ca_state == TCP_CA_Loss) { | 341 | } else if (icsk->icsk_ca_state == TCP_CA_Loss) { |
345 | mib_idx = LINUX_MIB_TCPLOSSFAILURES; | 342 | mib_idx = LINUX_MIB_TCPLOSSFAILURES; |
346 | } else { | 343 | } else { |
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index d08b2e855c22..e9bbff746488 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c | |||
@@ -159,12 +159,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
159 | /* In the "non-congestive state", increase cwnd | 159 | /* In the "non-congestive state", increase cwnd |
160 | * every rtt. | 160 | * every rtt. |
161 | */ | 161 | */ |
162 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | 162 | tcp_cong_avoid_ai(tp, tp->snd_cwnd); |
163 | if (tp->snd_cwnd < tp->snd_cwnd_clamp) | ||
164 | tp->snd_cwnd++; | ||
165 | tp->snd_cwnd_cnt = 0; | ||
166 | } else | ||
167 | tp->snd_cwnd_cnt++; | ||
168 | } else { | 163 | } else { |
169 | /* In the "congestive state", increase cwnd | 164 | /* In the "congestive state", increase cwnd |
170 | * every other rtt. | 165 | * every other rtt. |
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 9ec843a9bbb2..66b6821b984e 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c | |||
@@ -94,14 +94,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
94 | 94 | ||
95 | } else { | 95 | } else { |
96 | /* Reno */ | 96 | /* Reno */ |
97 | 97 | tcp_cong_avoid_ai(tp, tp->snd_cwnd); | |
98 | if (tp->snd_cwnd_cnt < tp->snd_cwnd) | ||
99 | tp->snd_cwnd_cnt++; | ||
100 | |||
101 | if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { | ||
102 | tp->snd_cwnd++; | ||
103 | tp->snd_cwnd_cnt = 0; | ||
104 | } | ||
105 | } | 98 | } |
106 | 99 | ||
107 | /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. | 100 | /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. |
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4bd178a111d5..05b7abb99f69 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c | |||
@@ -1184,7 +1184,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, | |||
1184 | sk = sknext; | 1184 | sk = sknext; |
1185 | } while (sknext); | 1185 | } while (sknext); |
1186 | } else | 1186 | } else |
1187 | kfree_skb(skb); | 1187 | consume_skb(skb); |
1188 | spin_unlock(&hslot->lock); | 1188 | spin_unlock(&hslot->lock); |
1189 | return 0; | 1189 | return 0; |
1190 | } | 1190 | } |
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 03e2a1ad71e9..8499da9e76a2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c | |||
@@ -40,6 +40,7 @@ | |||
40 | 40 | ||
41 | #include <linux/errno.h> | 41 | #include <linux/errno.h> |
42 | #include <linux/types.h> | 42 | #include <linux/types.h> |
43 | #include <linux/kernel.h> | ||
43 | #include <linux/socket.h> | 44 | #include <linux/socket.h> |
44 | #include <linux/sockios.h> | 45 | #include <linux/sockios.h> |
45 | #include <linux/net.h> | 46 | #include <linux/net.h> |
@@ -493,15 +494,17 @@ static void addrconf_forward_change(struct net *net, __s32 newf) | |||
493 | read_unlock(&dev_base_lock); | 494 | read_unlock(&dev_base_lock); |
494 | } | 495 | } |
495 | 496 | ||
496 | static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) | 497 | static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) |
497 | { | 498 | { |
498 | struct net *net; | 499 | struct net *net; |
499 | 500 | ||
500 | net = (struct net *)table->extra2; | 501 | net = (struct net *)table->extra2; |
501 | if (p == &net->ipv6.devconf_dflt->forwarding) | 502 | if (p == &net->ipv6.devconf_dflt->forwarding) |
502 | return; | 503 | return 0; |
504 | |||
505 | if (!rtnl_trylock()) | ||
506 | return -ERESTARTSYS; | ||
503 | 507 | ||
504 | rtnl_lock(); | ||
505 | if (p == &net->ipv6.devconf_all->forwarding) { | 508 | if (p == &net->ipv6.devconf_all->forwarding) { |
506 | __s32 newf = net->ipv6.devconf_all->forwarding; | 509 | __s32 newf = net->ipv6.devconf_all->forwarding; |
507 | net->ipv6.devconf_dflt->forwarding = newf; | 510 | net->ipv6.devconf_dflt->forwarding = newf; |
@@ -512,6 +515,7 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) | |||
512 | 515 | ||
513 | if (*p) | 516 | if (*p) |
514 | rt6_purge_dflt_routers(net); | 517 | rt6_purge_dflt_routers(net); |
518 | return 1; | ||
515 | } | 519 | } |
516 | #endif | 520 | #endif |
517 | 521 | ||
@@ -587,6 +591,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, | |||
587 | { | 591 | { |
588 | struct inet6_ifaddr *ifa = NULL; | 592 | struct inet6_ifaddr *ifa = NULL; |
589 | struct rt6_info *rt; | 593 | struct rt6_info *rt; |
594 | struct net *net = dev_net(idev->dev); | ||
590 | int hash; | 595 | int hash; |
591 | int err = 0; | 596 | int err = 0; |
592 | int addr_type = ipv6_addr_type(addr); | 597 | int addr_type = ipv6_addr_type(addr); |
@@ -603,6 +608,11 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, | |||
603 | goto out2; | 608 | goto out2; |
604 | } | 609 | } |
605 | 610 | ||
611 | if (idev->cnf.disable_ipv6 || net->ipv6.devconf_all->disable_ipv6) { | ||
612 | err = -EACCES; | ||
613 | goto out2; | ||
614 | } | ||
615 | |||
606 | write_lock(&addrconf_hash_lock); | 616 | write_lock(&addrconf_hash_lock); |
607 | 617 | ||
608 | /* Ignore adding duplicate addresses on an interface */ | 618 | /* Ignore adding duplicate addresses on an interface */ |
@@ -1206,16 +1216,12 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, | |||
1206 | } | 1216 | } |
1207 | break; | 1217 | break; |
1208 | } else if (minihiscore < miniscore) { | 1218 | } else if (minihiscore < miniscore) { |
1209 | struct ipv6_saddr_score *tmp; | ||
1210 | |||
1211 | if (hiscore->ifa) | 1219 | if (hiscore->ifa) |
1212 | in6_ifa_put(hiscore->ifa); | 1220 | in6_ifa_put(hiscore->ifa); |
1213 | 1221 | ||
1214 | in6_ifa_hold(score->ifa); | 1222 | in6_ifa_hold(score->ifa); |
1215 | 1223 | ||
1216 | tmp = hiscore; | 1224 | swap(hiscore, score); |
1217 | hiscore = score; | ||
1218 | score = tmp; | ||
1219 | 1225 | ||
1220 | /* restore our iterator */ | 1226 | /* restore our iterator */ |
1221 | score->ifa = hiscore->ifa; | 1227 | score->ifa = hiscore->ifa; |
@@ -1430,6 +1436,11 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp) | |||
1430 | void addrconf_dad_failure(struct inet6_ifaddr *ifp) | 1436 | void addrconf_dad_failure(struct inet6_ifaddr *ifp) |
1431 | { | 1437 | { |
1432 | struct inet6_dev *idev = ifp->idev; | 1438 | struct inet6_dev *idev = ifp->idev; |
1439 | |||
1440 | if (net_ratelimit()) | ||
1441 | printk(KERN_INFO "%s: IPv6 duplicate address detected!\n", | ||
1442 | ifp->idev->dev->name); | ||
1443 | |||
1433 | if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { | 1444 | if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { |
1434 | struct in6_addr addr; | 1445 | struct in6_addr addr; |
1435 | 1446 | ||
@@ -1440,11 +1451,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) | |||
1440 | ipv6_addr_equal(&ifp->addr, &addr)) { | 1451 | ipv6_addr_equal(&ifp->addr, &addr)) { |
1441 | /* DAD failed for link-local based on MAC address */ | 1452 | /* DAD failed for link-local based on MAC address */ |
1442 | idev->cnf.disable_ipv6 = 1; | 1453 | idev->cnf.disable_ipv6 = 1; |
1454 | |||
1455 | printk(KERN_INFO "%s: IPv6 being disabled!\n", | ||
1456 | ifp->idev->dev->name); | ||
1443 | } | 1457 | } |
1444 | } | 1458 | } |
1445 | 1459 | ||
1446 | if (net_ratelimit()) | ||
1447 | printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); | ||
1448 | addrconf_dad_stop(ifp); | 1460 | addrconf_dad_stop(ifp); |
1449 | } | 1461 | } |
1450 | 1462 | ||
@@ -2599,9 +2611,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) | |||
2599 | 2611 | ||
2600 | ASSERT_RTNL(); | 2612 | ASSERT_RTNL(); |
2601 | 2613 | ||
2602 | if ((dev->flags & IFF_LOOPBACK) && how == 1) | ||
2603 | how = 0; | ||
2604 | |||
2605 | rt6_ifdown(net, dev); | 2614 | rt6_ifdown(net, dev); |
2606 | neigh_ifdown(&nd_tbl, dev); | 2615 | neigh_ifdown(&nd_tbl, dev); |
2607 | 2616 | ||
@@ -2823,11 +2832,6 @@ static void addrconf_dad_timer(unsigned long data) | |||
2823 | read_unlock_bh(&idev->lock); | 2832 | read_unlock_bh(&idev->lock); |
2824 | goto out; | 2833 | goto out; |
2825 | } | 2834 | } |
2826 | if (idev->cnf.accept_dad > 1 && idev->cnf.disable_ipv6) { | ||
2827 | read_unlock_bh(&idev->lock); | ||
2828 | addrconf_dad_failure(ifp); | ||
2829 | return; | ||
2830 | } | ||
2831 | spin_lock_bh(&ifp->lock); | 2835 | spin_lock_bh(&ifp->lock); |
2832 | if (ifp->probes == 0) { | 2836 | if (ifp->probes == 0) { |
2833 | /* | 2837 | /* |
@@ -3638,7 +3642,8 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) | |||
3638 | kfree_skb(skb); | 3642 | kfree_skb(skb); |
3639 | goto errout; | 3643 | goto errout; |
3640 | } | 3644 | } |
3641 | err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); | 3645 | rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); |
3646 | return; | ||
3642 | errout: | 3647 | errout: |
3643 | if (err < 0) | 3648 | if (err < 0) |
3644 | rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); | 3649 | rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); |
@@ -3849,7 +3854,8 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) | |||
3849 | kfree_skb(skb); | 3854 | kfree_skb(skb); |
3850 | goto errout; | 3855 | goto errout; |
3851 | } | 3856 | } |
3852 | err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); | 3857 | rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); |
3858 | return; | ||
3853 | errout: | 3859 | errout: |
3854 | if (err < 0) | 3860 | if (err < 0) |
3855 | rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); | 3861 | rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); |
@@ -3919,7 +3925,8 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, | |||
3919 | kfree_skb(skb); | 3925 | kfree_skb(skb); |
3920 | goto errout; | 3926 | goto errout; |
3921 | } | 3927 | } |
3922 | err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); | 3928 | rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); |
3929 | return; | ||
3923 | errout: | 3930 | errout: |
3924 | if (err < 0) | 3931 | if (err < 0) |
3925 | rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); | 3932 | rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); |
@@ -3974,7 +3981,7 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, | |||
3974 | ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); | 3981 | ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); |
3975 | 3982 | ||
3976 | if (write) | 3983 | if (write) |
3977 | addrconf_fixup_forwarding(ctl, valp, val); | 3984 | ret = addrconf_fixup_forwarding(ctl, valp, val); |
3978 | return ret; | 3985 | return ret; |
3979 | } | 3986 | } |
3980 | 3987 | ||
@@ -4010,8 +4017,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, | |||
4010 | } | 4017 | } |
4011 | 4018 | ||
4012 | *valp = new; | 4019 | *valp = new; |
4013 | addrconf_fixup_forwarding(table, valp, val); | 4020 | return addrconf_fixup_forwarding(table, valp, val); |
4014 | return 1; | ||
4015 | } | 4021 | } |
4016 | 4022 | ||
4017 | static struct addrconf_sysctl_table | 4023 | static struct addrconf_sysctl_table |
@@ -4437,25 +4443,6 @@ int unregister_inet6addr_notifier(struct notifier_block *nb) | |||
4437 | 4443 | ||
4438 | EXPORT_SYMBOL(unregister_inet6addr_notifier); | 4444 | EXPORT_SYMBOL(unregister_inet6addr_notifier); |
4439 | 4445 | ||
4440 | static void addrconf_net_exit(struct net *net) | ||
4441 | { | ||
4442 | struct net_device *dev; | ||
4443 | |||
4444 | rtnl_lock(); | ||
4445 | /* clean dev list */ | ||
4446 | for_each_netdev(net, dev) { | ||
4447 | if (__in6_dev_get(dev) == NULL) | ||
4448 | continue; | ||
4449 | addrconf_ifdown(dev, 1); | ||
4450 | } | ||
4451 | addrconf_ifdown(net->loopback_dev, 2); | ||
4452 | rtnl_unlock(); | ||
4453 | } | ||
4454 | |||
4455 | static struct pernet_operations addrconf_net_ops = { | ||
4456 | .exit = addrconf_net_exit, | ||
4457 | }; | ||
4458 | |||
4459 | /* | 4446 | /* |
4460 | * Init / cleanup code | 4447 | * Init / cleanup code |
4461 | */ | 4448 | */ |
@@ -4497,10 +4484,6 @@ int __init addrconf_init(void) | |||
4497 | if (err) | 4484 | if (err) |
4498 | goto errlo; | 4485 | goto errlo; |
4499 | 4486 | ||
4500 | err = register_pernet_device(&addrconf_net_ops); | ||
4501 | if (err) | ||
4502 | return err; | ||
4503 | |||
4504 | register_netdevice_notifier(&ipv6_dev_notf); | 4487 | register_netdevice_notifier(&ipv6_dev_notf); |
4505 | 4488 | ||
4506 | addrconf_verify(0); | 4489 | addrconf_verify(0); |
@@ -4530,15 +4513,22 @@ errlo: | |||
4530 | void addrconf_cleanup(void) | 4513 | void addrconf_cleanup(void) |
4531 | { | 4514 | { |
4532 | struct inet6_ifaddr *ifa; | 4515 | struct inet6_ifaddr *ifa; |
4516 | struct net_device *dev; | ||
4533 | int i; | 4517 | int i; |
4534 | 4518 | ||
4535 | unregister_netdevice_notifier(&ipv6_dev_notf); | 4519 | unregister_netdevice_notifier(&ipv6_dev_notf); |
4536 | unregister_pernet_device(&addrconf_net_ops); | ||
4537 | |||
4538 | unregister_pernet_subsys(&addrconf_ops); | 4520 | unregister_pernet_subsys(&addrconf_ops); |
4539 | 4521 | ||
4540 | rtnl_lock(); | 4522 | rtnl_lock(); |
4541 | 4523 | ||
4524 | /* clean dev list */ | ||
4525 | for_each_netdev(&init_net, dev) { | ||
4526 | if (__in6_dev_get(dev) == NULL) | ||
4527 | continue; | ||
4528 | addrconf_ifdown(dev, 1); | ||
4529 | } | ||
4530 | addrconf_ifdown(init_net.loopback_dev, 2); | ||
4531 | |||
4542 | /* | 4532 | /* |
4543 | * Check hash table. | 4533 | * Check hash table. |
4544 | */ | 4534 | */ |
@@ -4559,6 +4549,4 @@ void addrconf_cleanup(void) | |||
4559 | 4549 | ||
4560 | del_timer(&addr_chk_timer); | 4550 | del_timer(&addr_chk_timer); |
4561 | rtnl_unlock(); | 4551 | rtnl_unlock(); |
4562 | |||
4563 | unregister_pernet_subsys(&addrconf_net_ops); | ||
4564 | } | 4552 | } |
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index fa2ac7ee662f..fbf533cc9dce 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c | |||
@@ -72,6 +72,10 @@ MODULE_LICENSE("GPL"); | |||
72 | static struct list_head inetsw6[SOCK_MAX]; | 72 | static struct list_head inetsw6[SOCK_MAX]; |
73 | static DEFINE_SPINLOCK(inetsw6_lock); | 73 | static DEFINE_SPINLOCK(inetsw6_lock); |
74 | 74 | ||
75 | static int disable_ipv6 = 0; | ||
76 | module_param_named(disable, disable_ipv6, int, 0); | ||
77 | MODULE_PARM_DESC(disable, "Disable IPv6 such that it is non-functional"); | ||
78 | |||
75 | static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) | 79 | static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) |
76 | { | 80 | { |
77 | const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo); | 81 | const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo); |
@@ -889,7 +893,7 @@ out_unlock: | |||
889 | return err; | 893 | return err; |
890 | } | 894 | } |
891 | 895 | ||
892 | static struct packet_type ipv6_packet_type = { | 896 | static struct packet_type ipv6_packet_type __read_mostly = { |
893 | .type = cpu_to_be16(ETH_P_IPV6), | 897 | .type = cpu_to_be16(ETH_P_IPV6), |
894 | .func = ipv6_rcv, | 898 | .func = ipv6_rcv, |
895 | .gso_send_check = ipv6_gso_send_check, | 899 | .gso_send_check = ipv6_gso_send_check, |
@@ -1001,10 +1005,21 @@ static int __init inet6_init(void) | |||
1001 | { | 1005 | { |
1002 | struct sk_buff *dummy_skb; | 1006 | struct sk_buff *dummy_skb; |
1003 | struct list_head *r; | 1007 | struct list_head *r; |
1004 | int err; | 1008 | int err = 0; |
1005 | 1009 | ||
1006 | BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); | 1010 | BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); |
1007 | 1011 | ||
1012 | /* Register the socket-side information for inet6_create. */ | ||
1013 | for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) | ||
1014 | INIT_LIST_HEAD(r); | ||
1015 | |||
1016 | if (disable_ipv6) { | ||
1017 | printk(KERN_INFO | ||
1018 | "IPv6: Loaded, but administratively disabled, " | ||
1019 | "reboot required to enable\n"); | ||
1020 | goto out; | ||
1021 | } | ||
1022 | |||
1008 | err = proto_register(&tcpv6_prot, 1); | 1023 | err = proto_register(&tcpv6_prot, 1); |
1009 | if (err) | 1024 | if (err) |
1010 | goto out; | 1025 | goto out; |
@@ -1022,10 +1037,6 @@ static int __init inet6_init(void) | |||
1022 | goto out_unregister_udplite_proto; | 1037 | goto out_unregister_udplite_proto; |
1023 | 1038 | ||
1024 | 1039 | ||
1025 | /* Register the socket-side information for inet6_create. */ | ||
1026 | for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) | ||
1027 | INIT_LIST_HEAD(r); | ||
1028 | |||
1029 | /* We MUST register RAW sockets before we create the ICMP6, | 1040 | /* We MUST register RAW sockets before we create the ICMP6, |
1030 | * IGMP6, or NDISC control sockets. | 1041 | * IGMP6, or NDISC control sockets. |
1031 | */ | 1042 | */ |
@@ -1191,6 +1202,9 @@ module_init(inet6_init); | |||
1191 | 1202 | ||
1192 | static void __exit inet6_exit(void) | 1203 | static void __exit inet6_exit(void) |
1193 | { | 1204 | { |
1205 | if (disable_ipv6) | ||
1206 | return; | ||
1207 | |||
1194 | /* First of all disallow new sockets creation. */ | 1208 | /* First of all disallow new sockets creation. */ |
1195 | sock_unregister(PF_INET6); | 1209 | sock_unregister(PF_INET6); |
1196 | /* Disallow any further netlink messages */ | 1210 | /* Disallow any further netlink messages */ |
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 8fe267feb81e..1bcc3431859e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c | |||
@@ -258,11 +258,11 @@ unique: | |||
258 | 258 | ||
259 | if (twp != NULL) { | 259 | if (twp != NULL) { |
260 | *twp = tw; | 260 | *twp = tw; |
261 | NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED); | 261 | NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); |
262 | } else if (tw != NULL) { | 262 | } else if (tw != NULL) { |
263 | /* Silly. Should hash-dance instead... */ | 263 | /* Silly. Should hash-dance instead... */ |
264 | inet_twsk_deschedule(tw, death_row); | 264 | inet_twsk_deschedule(tw, death_row); |
265 | NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITRECYCLED); | 265 | NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED); |
266 | 266 | ||
267 | inet_twsk_put(tw); | 267 | inet_twsk_put(tw); |
268 | } | 268 | } |
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 40f324655e24..d31df0f4bc9a 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c | |||
@@ -218,8 +218,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, | |||
218 | if (opt) | 218 | if (opt) |
219 | sock_kfree_s(sk, opt, opt->tot_len); | 219 | sock_kfree_s(sk, opt, opt->tot_len); |
220 | pktopt = xchg(&np->pktoptions, NULL); | 220 | pktopt = xchg(&np->pktoptions, NULL); |
221 | if (pktopt) | 221 | kfree_skb(pktopt); |
222 | kfree_skb(pktopt); | ||
223 | 222 | ||
224 | sk->sk_destruct = inet_sock_destruct; | 223 | sk->sk_destruct = inet_sock_destruct; |
225 | /* | 224 | /* |
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3cd83b85e9ef..9f061d1adbc2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c | |||
@@ -1095,11 +1095,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) | |||
1095 | &ipv6_hdr(ra)->saddr); | 1095 | &ipv6_hdr(ra)->saddr); |
1096 | nlmsg_end(skb, nlh); | 1096 | nlmsg_end(skb, nlh); |
1097 | 1097 | ||
1098 | err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, | 1098 | rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); |
1099 | GFP_ATOMIC); | ||
1100 | if (err < 0) | ||
1101 | goto errout; | ||
1102 | |||
1103 | return; | 1099 | return; |
1104 | 1100 | ||
1105 | nla_put_failure: | 1101 | nla_put_failure: |
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 165b256a6fa0..41b8a956e1be 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | |||
@@ -205,8 +205,9 @@ icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, | |||
205 | 205 | ||
206 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && | 206 | if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && |
207 | nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { | 207 | nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { |
208 | nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, | 208 | if (LOG_INVALID(net, IPPROTO_ICMPV6)) |
209 | "nf_ct_icmpv6: ICMPv6 checksum failed\n"); | 209 | nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, |
210 | "nf_ct_icmpv6: ICMPv6 checksum failed "); | ||
210 | return -NF_ACCEPT; | 211 | return -NF_ACCEPT; |
211 | } | 212 | } |
212 | 213 | ||
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index ed4d79a9e4a6..058a5e4a60c3 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c | |||
@@ -528,14 +528,14 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) | |||
528 | if (!ipv6_ext_hdr(nexthdr)) { | 528 | if (!ipv6_ext_hdr(nexthdr)) { |
529 | return -1; | 529 | return -1; |
530 | } | 530 | } |
531 | if (len < (int)sizeof(struct ipv6_opt_hdr)) { | ||
532 | pr_debug("too short\n"); | ||
533 | return -1; | ||
534 | } | ||
535 | if (nexthdr == NEXTHDR_NONE) { | 531 | if (nexthdr == NEXTHDR_NONE) { |
536 | pr_debug("next header is none\n"); | 532 | pr_debug("next header is none\n"); |
537 | return -1; | 533 | return -1; |
538 | } | 534 | } |
535 | if (len < (int)sizeof(struct ipv6_opt_hdr)) { | ||
536 | pr_debug("too short\n"); | ||
537 | return -1; | ||
538 | } | ||
539 | if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) | 539 | if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) |
540 | BUG(); | 540 | BUG(); |
541 | if (nexthdr == NEXTHDR_AUTH) | 541 | if (nexthdr == NEXTHDR_AUTH) |
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 3c575118fca5..e9ac7a12f595 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c | |||
@@ -452,6 +452,7 @@ err: | |||
452 | static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, | 452 | static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, |
453 | struct net_device *dev) | 453 | struct net_device *dev) |
454 | { | 454 | { |
455 | struct net *net = container_of(fq->q.net, struct net, ipv6.frags); | ||
455 | struct sk_buff *fp, *head = fq->q.fragments; | 456 | struct sk_buff *fp, *head = fq->q.fragments; |
456 | int payload_len; | 457 | int payload_len; |
457 | unsigned int nhoff; | 458 | unsigned int nhoff; |
@@ -551,8 +552,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, | |||
551 | head->csum); | 552 | head->csum); |
552 | 553 | ||
553 | rcu_read_lock(); | 554 | rcu_read_lock(); |
554 | IP6_INC_STATS_BH(dev_net(dev), | 555 | IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); |
555 | __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); | ||
556 | rcu_read_unlock(); | 556 | rcu_read_unlock(); |
557 | fq->q.fragments = NULL; | 557 | fq->q.fragments = NULL; |
558 | return 1; | 558 | return 1; |
@@ -566,8 +566,7 @@ out_oom: | |||
566 | printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); | 566 | printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); |
567 | out_fail: | 567 | out_fail: |
568 | rcu_read_lock(); | 568 | rcu_read_lock(); |
569 | IP6_INC_STATS_BH(dev_net(dev), | 569 | IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); |
570 | __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); | ||
571 | rcu_read_unlock(); | 570 | rcu_read_unlock(); |
572 | return -1; | 571 | return -1; |
573 | } | 572 | } |
diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c3d486a3edad..1394ddb6e35c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c | |||
@@ -2400,8 +2400,9 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) | |||
2400 | kfree_skb(skb); | 2400 | kfree_skb(skb); |
2401 | goto errout; | 2401 | goto errout; |
2402 | } | 2402 | } |
2403 | err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, | 2403 | rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, |
2404 | info->nlh, gfp_any()); | 2404 | info->nlh, gfp_any()); |
2405 | return; | ||
2405 | errout: | 2406 | errout: |
2406 | if (err < 0) | 2407 | if (err < 0) |
2407 | rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); | 2408 | rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); |
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d3467e563f02..664ab82e03b2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c | |||
@@ -188,9 +188,9 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, | |||
188 | } | 188 | } |
189 | 189 | ||
190 | nt = netdev_priv(dev); | 190 | nt = netdev_priv(dev); |
191 | ipip6_tunnel_init(dev); | ||
192 | 191 | ||
193 | nt->parms = *parms; | 192 | nt->parms = *parms; |
193 | ipip6_tunnel_init(dev); | ||
194 | 194 | ||
195 | if (parms->i_flags & SIT_ISATAP) | 195 | if (parms->i_flags & SIT_ISATAP) |
196 | dev->priv_flags |= IFF_ISATAP; | 196 | dev->priv_flags |= IFF_ISATAP; |
@@ -454,7 +454,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) | |||
454 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) | 454 | if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) |
455 | goto out; | 455 | goto out; |
456 | 456 | ||
457 | if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) | 457 | if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) |
458 | t->err_count++; | 458 | t->err_count++; |
459 | else | 459 | else |
460 | t->err_count = 1; | 460 | t->err_count = 1; |
@@ -658,7 +658,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) | |||
658 | } | 658 | } |
659 | 659 | ||
660 | if (tunnel->err_count > 0) { | 660 | if (tunnel->err_count > 0) { |
661 | if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { | 661 | if (time_before(jiffies, |
662 | tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { | ||
662 | tunnel->err_count--; | 663 | tunnel->err_count--; |
663 | dst_link_failure(skb); | 664 | dst_link_failure(skb); |
664 | } else | 665 | } else |
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 00f1269e11e9..4b5aa1854260 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c | |||
@@ -533,8 +533,7 @@ static inline void syn_flood_warning(struct sk_buff *skb) | |||
533 | 533 | ||
534 | static void tcp_v6_reqsk_destructor(struct request_sock *req) | 534 | static void tcp_v6_reqsk_destructor(struct request_sock *req) |
535 | { | 535 | { |
536 | if (inet6_rsk(req)->pktopts) | 536 | kfree_skb(inet6_rsk(req)->pktopts); |
537 | kfree_skb(inet6_rsk(req)->pktopts); | ||
538 | } | 537 | } |
539 | 538 | ||
540 | #ifdef CONFIG_TCP_MD5SIG | 539 | #ifdef CONFIG_TCP_MD5SIG |
@@ -1611,8 +1610,7 @@ ipv6_pktoptions: | |||
1611 | } | 1610 | } |
1612 | } | 1611 | } |
1613 | 1612 | ||
1614 | if (opt_skb) | 1613 | kfree_skb(opt_skb); |
1615 | kfree_skb(opt_skb); | ||
1616 | return 0; | 1614 | return 0; |
1617 | } | 1615 | } |
1618 | 1616 | ||
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 0e685b05496e..f417b77fa0e1 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c | |||
@@ -69,7 +69,7 @@ __xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass) | |||
69 | 69 | ||
70 | for (i = 0; i < n; i++) { | 70 | for (i = 0; i < n; i++) { |
71 | dst[count[class[i] - 1]++] = src[i]; | 71 | dst[count[class[i] - 1]++] = src[i]; |
72 | src[i] = 0; | 72 | src[i] = NULL; |
73 | } | 73 | } |
74 | 74 | ||
75 | return 0; | 75 | return 0; |
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 43d0ffc6d565..1627050e29fd 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c | |||
@@ -1958,12 +1958,12 @@ static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { | |||
1958 | 1958 | ||
1959 | SOCKOPS_WRAP(ipx_dgram, PF_IPX); | 1959 | SOCKOPS_WRAP(ipx_dgram, PF_IPX); |
1960 | 1960 | ||
1961 | static struct packet_type ipx_8023_packet_type = { | 1961 | static struct packet_type ipx_8023_packet_type __read_mostly = { |
1962 | .type = cpu_to_be16(ETH_P_802_3), | 1962 | .type = cpu_to_be16(ETH_P_802_3), |
1963 | .func = ipx_rcv, | 1963 | .func = ipx_rcv, |
1964 | }; | 1964 | }; |
1965 | 1965 | ||
1966 | static struct packet_type ipx_dix_packet_type = { | 1966 | static struct packet_type ipx_dix_packet_type __read_mostly = { |
1967 | .type = cpu_to_be16(ETH_P_IPX), | 1967 | .type = cpu_to_be16(ETH_P_IPX), |
1968 | .func = ipx_rcv, | 1968 | .func = ipx_rcv, |
1969 | }; | 1969 | }; |
@@ -1975,15 +1975,15 @@ static struct notifier_block ipx_dev_notifier = { | |||
1975 | extern struct datalink_proto *make_EII_client(void); | 1975 | extern struct datalink_proto *make_EII_client(void); |
1976 | extern void destroy_EII_client(struct datalink_proto *); | 1976 | extern void destroy_EII_client(struct datalink_proto *); |
1977 | 1977 | ||
1978 | static unsigned char ipx_8022_type = 0xE0; | 1978 | static const unsigned char ipx_8022_type = 0xE0; |
1979 | static unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; | 1979 | static const unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; |
1980 | static char ipx_EII_err_msg[] __initdata = | 1980 | static const char ipx_EII_err_msg[] __initconst = |
1981 | KERN_CRIT "IPX: Unable to register with Ethernet II\n"; | 1981 | KERN_CRIT "IPX: Unable to register with Ethernet II\n"; |
1982 | static char ipx_8023_err_msg[] __initdata = | 1982 | static const char ipx_8023_err_msg[] __initconst = |
1983 | KERN_CRIT "IPX: Unable to register with 802.3\n"; | 1983 | KERN_CRIT "IPX: Unable to register with 802.3\n"; |
1984 | static char ipx_llc_err_msg[] __initdata = | 1984 | static const char ipx_llc_err_msg[] __initconst = |
1985 | KERN_CRIT "IPX: Unable to register with 802.2\n"; | 1985 | KERN_CRIT "IPX: Unable to register with 802.2\n"; |
1986 | static char ipx_snap_err_msg[] __initdata = | 1986 | static const char ipx_snap_err_msg[] __initconst = |
1987 | KERN_CRIT "IPX: Unable to register with SNAP\n"; | 1987 | KERN_CRIT "IPX: Unable to register with SNAP\n"; |
1988 | 1988 | ||
1989 | static int __init ipx_init(void) | 1989 | static int __init ipx_init(void) |
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index ea319e3ddc18..bf92e1473447 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c | |||
@@ -149,13 +149,14 @@ int irda_device_is_receiving(struct net_device *dev) | |||
149 | 149 | ||
150 | IRDA_DEBUG(2, "%s()\n", __func__); | 150 | IRDA_DEBUG(2, "%s()\n", __func__); |
151 | 151 | ||
152 | if (!dev->do_ioctl) { | 152 | if (!dev->netdev_ops->ndo_do_ioctl) { |
153 | IRDA_ERROR("%s: do_ioctl not impl. by device driver\n", | 153 | IRDA_ERROR("%s: do_ioctl not impl. by device driver\n", |
154 | __func__); | 154 | __func__); |
155 | return -1; | 155 | return -1; |
156 | } | 156 | } |
157 | 157 | ||
158 | ret = dev->do_ioctl(dev, (struct ifreq *) &req, SIOCGRECEIVING); | 158 | ret = (dev->netdev_ops->ndo_do_ioctl)(dev, (struct ifreq *) &req, |
159 | SIOCGRECEIVING); | ||
159 | if (ret < 0) | 160 | if (ret < 0) |
160 | return ret; | 161 | return ret; |
161 | 162 | ||
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 05112be99569..724bcf951b80 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c | |||
@@ -45,6 +45,16 @@ static int irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev); | |||
45 | static void irlan_eth_set_multicast_list( struct net_device *dev); | 45 | static void irlan_eth_set_multicast_list( struct net_device *dev); |
46 | static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev); | 46 | static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev); |
47 | 47 | ||
48 | static const struct net_device_ops irlan_eth_netdev_ops = { | ||
49 | .ndo_open = irlan_eth_open, | ||
50 | .ndo_stop = irlan_eth_close, | ||
51 | .ndo_start_xmit = irlan_eth_xmit, | ||
52 | .ndo_get_stats = irlan_eth_get_stats, | ||
53 | .ndo_set_multicast_list = irlan_eth_set_multicast_list, | ||
54 | .ndo_change_mtu = eth_change_mtu, | ||
55 | .ndo_validate_addr = eth_validate_addr, | ||
56 | }; | ||
57 | |||
48 | /* | 58 | /* |
49 | * Function irlan_eth_setup (dev) | 59 | * Function irlan_eth_setup (dev) |
50 | * | 60 | * |
@@ -53,14 +63,11 @@ static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev); | |||
53 | */ | 63 | */ |
54 | static void irlan_eth_setup(struct net_device *dev) | 64 | static void irlan_eth_setup(struct net_device *dev) |
55 | { | 65 | { |
56 | dev->open = irlan_eth_open; | 66 | ether_setup(dev); |
57 | dev->stop = irlan_eth_close; | 67 | |
58 | dev->hard_start_xmit = irlan_eth_xmit; | 68 | dev->netdev_ops = &irlan_eth_netdev_ops; |
59 | dev->get_stats = irlan_eth_get_stats; | ||
60 | dev->set_multicast_list = irlan_eth_set_multicast_list; | ||
61 | dev->destructor = free_netdev; | 69 | dev->destructor = free_netdev; |
62 | 70 | ||
63 | ether_setup(dev); | ||
64 | 71 | ||
65 | /* | 72 | /* |
66 | * Lets do all queueing in IrTTP instead of this device driver. | 73 | * Lets do all queueing in IrTTP instead of this device driver. |
diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 1bb607f2f5c7..303a68d92731 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c | |||
@@ -55,7 +55,7 @@ EXPORT_SYMBOL(irda_debug); | |||
55 | /* Packet type handler. | 55 | /* Packet type handler. |
56 | * Tell the kernel how IrDA packets should be handled. | 56 | * Tell the kernel how IrDA packets should be handled. |
57 | */ | 57 | */ |
58 | static struct packet_type irda_packet_type = { | 58 | static struct packet_type irda_packet_type __read_mostly = { |
59 | .type = cpu_to_be16(ETH_P_IRDA), | 59 | .type = cpu_to_be16(ETH_P_IRDA), |
60 | .func = irlap_driver_rcv, /* Packet type handler irlap_frame.c */ | 60 | .func = irlap_driver_rcv, /* Packet type handler irlap_frame.c */ |
61 | }; | 61 | }; |
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index eb8a2a0b6eb7..49e786535dc8 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c | |||
@@ -1171,8 +1171,7 @@ static void iucv_callback_txdone(struct iucv_path *path, | |||
1171 | 1171 | ||
1172 | spin_unlock_irqrestore(&list->lock, flags); | 1172 | spin_unlock_irqrestore(&list->lock, flags); |
1173 | 1173 | ||
1174 | if (this) | 1174 | kfree_skb(this); |
1175 | kfree_skb(this); | ||
1176 | } | 1175 | } |
1177 | BUG_ON(!this); | 1176 | BUG_ON(!this); |
1178 | 1177 | ||
diff --git a/net/key/af_key.c b/net/key/af_key.c index 7dcbde3ea7d9..643c1be2d02e 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c | |||
@@ -313,8 +313,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, | |||
313 | if (one_sk != NULL) | 313 | if (one_sk != NULL) |
314 | err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); | 314 | err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); |
315 | 315 | ||
316 | if (skb2) | 316 | kfree_skb(skb2); |
317 | kfree_skb(skb2); | ||
318 | kfree_skb(skb); | 317 | kfree_skb(skb); |
319 | return err; | 318 | return err; |
320 | } | 319 | } |
@@ -3573,8 +3572,7 @@ static int pfkey_sendmsg(struct kiocb *kiocb, | |||
3573 | out: | 3572 | out: |
3574 | if (err && hdr && pfkey_error(hdr, err, sk) == 0) | 3573 | if (err && hdr && pfkey_error(hdr, err, sk) == 0) |
3575 | err = 0; | 3574 | err = 0; |
3576 | if (skb) | 3575 | kfree_skb(skb); |
3577 | kfree_skb(skb); | ||
3578 | 3576 | ||
3579 | return err ? : len; | 3577 | return err ? : len; |
3580 | } | 3578 | } |
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 56fd85ab358e..febae702685c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c | |||
@@ -1118,11 +1118,11 @@ static const struct proto_ops llc_ui_ops = { | |||
1118 | .sendpage = sock_no_sendpage, | 1118 | .sendpage = sock_no_sendpage, |
1119 | }; | 1119 | }; |
1120 | 1120 | ||
1121 | static char llc_proc_err_msg[] __initdata = | 1121 | static const char llc_proc_err_msg[] __initconst = |
1122 | KERN_CRIT "LLC: Unable to register the proc_fs entries\n"; | 1122 | KERN_CRIT "LLC: Unable to register the proc_fs entries\n"; |
1123 | static char llc_sysctl_err_msg[] __initdata = | 1123 | static const char llc_sysctl_err_msg[] __initconst = |
1124 | KERN_CRIT "LLC: Unable to register the sysctl entries\n"; | 1124 | KERN_CRIT "LLC: Unable to register the sysctl entries\n"; |
1125 | static char llc_sock_err_msg[] __initdata = | 1125 | static const char llc_sock_err_msg[] __initconst = |
1126 | KERN_CRIT "LLC: Unable to register the network family\n"; | 1126 | KERN_CRIT "LLC: Unable to register the network family\n"; |
1127 | 1127 | ||
1128 | static int __init llc2_init(void) | 1128 | static int __init llc2_init(void) |
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 5c6d89c6d51d..3477624a4906 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c | |||
@@ -332,8 +332,7 @@ int llc_conn_remove_acked_pdus(struct sock *sk, u8 nr, u16 *how_many_unacked) | |||
332 | 332 | ||
333 | for (i = 0; i < pdu_pos && i < q_len; i++) { | 333 | for (i = 0; i < pdu_pos && i < q_len; i++) { |
334 | skb = skb_dequeue(&llc->pdu_unack_q); | 334 | skb = skb_dequeue(&llc->pdu_unack_q); |
335 | if (skb) | 335 | kfree_skb(skb); |
336 | kfree_skb(skb); | ||
337 | nbr_acked++; | 336 | nbr_acked++; |
338 | } | 337 | } |
339 | out: | 338 | out: |
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index a7fe1adc378d..ff4c0ab96a69 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c | |||
@@ -147,12 +147,12 @@ void llc_sap_close(struct llc_sap *sap) | |||
147 | kfree(sap); | 147 | kfree(sap); |
148 | } | 148 | } |
149 | 149 | ||
150 | static struct packet_type llc_packet_type = { | 150 | static struct packet_type llc_packet_type __read_mostly = { |
151 | .type = cpu_to_be16(ETH_P_802_2), | 151 | .type = cpu_to_be16(ETH_P_802_2), |
152 | .func = llc_rcv, | 152 | .func = llc_rcv, |
153 | }; | 153 | }; |
154 | 154 | ||
155 | static struct packet_type llc_tr_packet_type = { | 155 | static struct packet_type llc_tr_packet_type __read_mostly = { |
156 | .type = cpu_to_be16(ETH_P_TR_802_2), | 156 | .type = cpu_to_be16(ETH_P_TR_802_2), |
157 | .func = llc_rcv, | 157 | .func = llc_rcv, |
158 | }; | 158 | }; |
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 3503a3d21318..0e3ab88bb706 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile | |||
@@ -9,6 +9,7 @@ mac80211-y := \ | |||
9 | wpa.o \ | 9 | wpa.o \ |
10 | scan.o \ | 10 | scan.o \ |
11 | ht.o agg-tx.o agg-rx.o \ | 11 | ht.o agg-tx.o agg-rx.o \ |
12 | ibss.o \ | ||
12 | mlme.o \ | 13 | mlme.o \ |
13 | iface.o \ | 14 | iface.o \ |
14 | rate.o \ | 15 | rate.o \ |
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 3112bfd441b6..a95affc94629 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c | |||
@@ -129,7 +129,6 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d | |||
129 | u8 dialog_token, u16 status, u16 policy, | 129 | u8 dialog_token, u16 status, u16 policy, |
130 | u16 buf_size, u16 timeout) | 130 | u16 buf_size, u16 timeout) |
131 | { | 131 | { |
132 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | ||
133 | struct ieee80211_local *local = sdata->local; | 132 | struct ieee80211_local *local = sdata->local; |
134 | struct sk_buff *skb; | 133 | struct sk_buff *skb; |
135 | struct ieee80211_mgmt *mgmt; | 134 | struct ieee80211_mgmt *mgmt; |
@@ -151,8 +150,9 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d | |||
151 | if (sdata->vif.type == NL80211_IFTYPE_AP || | 150 | if (sdata->vif.type == NL80211_IFTYPE_AP || |
152 | sdata->vif.type == NL80211_IFTYPE_AP_VLAN) | 151 | sdata->vif.type == NL80211_IFTYPE_AP_VLAN) |
153 | memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); | 152 | memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); |
154 | else | 153 | else if (sdata->vif.type == NL80211_IFTYPE_STATION) |
155 | memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); | 154 | memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); |
155 | |||
156 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | | 156 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | |
157 | IEEE80211_STYPE_ACTION); | 157 | IEEE80211_STYPE_ACTION); |
158 | 158 | ||
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 1232d9f01ca9..1df116d4d6e7 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c | |||
@@ -49,7 +49,6 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, | |||
49 | u16 agg_size, u16 timeout) | 49 | u16 agg_size, u16 timeout) |
50 | { | 50 | { |
51 | struct ieee80211_local *local = sdata->local; | 51 | struct ieee80211_local *local = sdata->local; |
52 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | ||
53 | struct sk_buff *skb; | 52 | struct sk_buff *skb; |
54 | struct ieee80211_mgmt *mgmt; | 53 | struct ieee80211_mgmt *mgmt; |
55 | u16 capab; | 54 | u16 capab; |
@@ -69,8 +68,8 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, | |||
69 | if (sdata->vif.type == NL80211_IFTYPE_AP || | 68 | if (sdata->vif.type == NL80211_IFTYPE_AP || |
70 | sdata->vif.type == NL80211_IFTYPE_AP_VLAN) | 69 | sdata->vif.type == NL80211_IFTYPE_AP_VLAN) |
71 | memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); | 70 | memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); |
72 | else | 71 | else if (sdata->vif.type == NL80211_IFTYPE_STATION) |
73 | memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); | 72 | memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); |
74 | 73 | ||
75 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | | 74 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | |
76 | IEEE80211_STYPE_ACTION); | 75 | IEEE80211_STYPE_ACTION); |
@@ -132,9 +131,24 @@ static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, | |||
132 | 131 | ||
133 | state = &sta->ampdu_mlme.tid_state_tx[tid]; | 132 | state = &sta->ampdu_mlme.tid_state_tx[tid]; |
134 | 133 | ||
135 | if (local->hw.ampdu_queues) | 134 | if (local->hw.ampdu_queues) { |
136 | ieee80211_stop_queue(&local->hw, sta->tid_to_tx_q[tid]); | 135 | if (initiator) { |
136 | /* | ||
137 | * Stop the AC queue to avoid issues where we send | ||
138 | * unaggregated frames already before the delba. | ||
139 | */ | ||
140 | ieee80211_stop_queue_by_reason(&local->hw, | ||
141 | local->hw.queues + sta->tid_to_tx_q[tid], | ||
142 | IEEE80211_QUEUE_STOP_REASON_AGGREGATION); | ||
143 | } | ||
137 | 144 | ||
145 | /* | ||
146 | * Pretend the driver woke the queue, just in case | ||
147 | * it disabled it before the session was stopped. | ||
148 | */ | ||
149 | ieee80211_wake_queue( | ||
150 | &local->hw, local->hw.queues + sta->tid_to_tx_q[tid]); | ||
151 | } | ||
138 | *state = HT_AGG_STATE_REQ_STOP_BA_MSK | | 152 | *state = HT_AGG_STATE_REQ_STOP_BA_MSK | |
139 | (initiator << HT_AGG_STATE_INITIATOR_SHIFT); | 153 | (initiator << HT_AGG_STATE_INITIATOR_SHIFT); |
140 | 154 | ||
@@ -144,8 +158,6 @@ static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, | |||
144 | /* HW shall not deny going back to legacy */ | 158 | /* HW shall not deny going back to legacy */ |
145 | if (WARN_ON(ret)) { | 159 | if (WARN_ON(ret)) { |
146 | *state = HT_AGG_STATE_OPERATIONAL; | 160 | *state = HT_AGG_STATE_OPERATIONAL; |
147 | if (local->hw.ampdu_queues) | ||
148 | ieee80211_wake_queue(&local->hw, sta->tid_to_tx_q[tid]); | ||
149 | } | 161 | } |
150 | 162 | ||
151 | return ret; | 163 | return ret; |
@@ -189,14 +201,19 @@ static void sta_addba_resp_timer_expired(unsigned long data) | |||
189 | spin_unlock_bh(&sta->lock); | 201 | spin_unlock_bh(&sta->lock); |
190 | } | 202 | } |
191 | 203 | ||
204 | static inline int ieee80211_ac_from_tid(int tid) | ||
205 | { | ||
206 | return ieee802_1d_to_ac[tid & 7]; | ||
207 | } | ||
208 | |||
192 | int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) | 209 | int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) |
193 | { | 210 | { |
194 | struct ieee80211_local *local = hw_to_local(hw); | 211 | struct ieee80211_local *local = hw_to_local(hw); |
195 | struct sta_info *sta; | 212 | struct sta_info *sta; |
196 | struct ieee80211_sub_if_data *sdata; | 213 | struct ieee80211_sub_if_data *sdata; |
197 | u16 start_seq_num; | ||
198 | u8 *state; | 214 | u8 *state; |
199 | int ret = 0; | 215 | int i, qn = -1, ret = 0; |
216 | u16 start_seq_num; | ||
200 | 217 | ||
201 | if (WARN_ON(!local->ops->ampdu_action)) | 218 | if (WARN_ON(!local->ops->ampdu_action)) |
202 | return -EINVAL; | 219 | return -EINVAL; |
@@ -209,6 +226,13 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) | |||
209 | ra, tid); | 226 | ra, tid); |
210 | #endif /* CONFIG_MAC80211_HT_DEBUG */ | 227 | #endif /* CONFIG_MAC80211_HT_DEBUG */ |
211 | 228 | ||
229 | if (hw->ampdu_queues && ieee80211_ac_from_tid(tid) == 0) { | ||
230 | #ifdef CONFIG_MAC80211_HT_DEBUG | ||
231 | printk(KERN_DEBUG "rejecting on voice AC\n"); | ||
232 | #endif | ||
233 | return -EINVAL; | ||
234 | } | ||
235 | |||
212 | rcu_read_lock(); | 236 | rcu_read_lock(); |
213 | 237 | ||
214 | sta = sta_info_get(local, ra); | 238 | sta = sta_info_get(local, ra); |
@@ -217,7 +241,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) | |||
217 | printk(KERN_DEBUG "Could not find the station\n"); | 241 | printk(KERN_DEBUG "Could not find the station\n"); |
218 | #endif | 242 | #endif |
219 | ret = -ENOENT; | 243 | ret = -ENOENT; |
220 | goto exit; | 244 | goto unlock; |
221 | } | 245 | } |
222 | 246 | ||
223 | /* | 247 | /* |
@@ -230,11 +254,13 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) | |||
230 | sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN && | 254 | sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN && |
231 | sta->sdata->vif.type != NL80211_IFTYPE_AP) { | 255 | sta->sdata->vif.type != NL80211_IFTYPE_AP) { |
232 | ret = -EINVAL; | 256 | ret = -EINVAL; |
233 | goto exit; | 257 | goto unlock; |
234 | } | 258 | } |
235 | 259 | ||
236 | spin_lock_bh(&sta->lock); | 260 | spin_lock_bh(&sta->lock); |
237 | 261 | ||
262 | sdata = sta->sdata; | ||
263 | |||
238 | /* we have tried too many times, receiver does not want A-MPDU */ | 264 | /* we have tried too many times, receiver does not want A-MPDU */ |
239 | if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { | 265 | if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { |
240 | ret = -EBUSY; | 266 | ret = -EBUSY; |
@@ -252,6 +278,42 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) | |||
252 | goto err_unlock_sta; | 278 | goto err_unlock_sta; |
253 | } | 279 | } |
254 | 280 | ||
281 | if (hw->ampdu_queues) { | ||
282 | spin_lock(&local->queue_stop_reason_lock); | ||
283 | /* reserve a new queue for this session */ | ||
284 | for (i = 0; i < local->hw.ampdu_queues; i++) { | ||
285 | if (local->ampdu_ac_queue[i] < 0) { | ||
286 | qn = i; | ||
287 | local->ampdu_ac_queue[qn] = | ||
288 | ieee80211_ac_from_tid(tid); | ||
289 | break; | ||
290 | } | ||
291 | } | ||
292 | spin_unlock(&local->queue_stop_reason_lock); | ||
293 | |||
294 | if (qn < 0) { | ||
295 | #ifdef CONFIG_MAC80211_HT_DEBUG | ||
296 | printk(KERN_DEBUG "BA request denied - " | ||
297 | "queue unavailable for tid %d\n", tid); | ||
298 | #endif /* CONFIG_MAC80211_HT_DEBUG */ | ||
299 | ret = -ENOSPC; | ||
300 | goto err_unlock_sta; | ||
301 | } | ||
302 | |||
303 | /* | ||
304 | * If we successfully allocate the session, we can't have | ||
305 | * anything going on on the queue this TID maps into, so | ||
306 | * stop it for now. This is a "virtual" stop using the same | ||
307 | * mechanism that drivers will use. | ||
308 | * | ||
309 | * XXX: queue up frames for this session in the sta_info | ||
310 | * struct instead to avoid hitting all other STAs. | ||
311 | */ | ||
312 | ieee80211_stop_queue_by_reason( | ||
313 | &local->hw, hw->queues + qn, | ||
314 | IEEE80211_QUEUE_STOP_REASON_AGGREGATION); | ||
315 | } | ||
316 | |||
255 | /* prepare A-MPDU MLME for Tx aggregation */ | 317 | /* prepare A-MPDU MLME for Tx aggregation */ |
256 | sta->ampdu_mlme.tid_tx[tid] = | 318 | sta->ampdu_mlme.tid_tx[tid] = |
257 | kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); | 319 | kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); |
@@ -262,8 +324,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) | |||
262 | tid); | 324 | tid); |
263 | #endif | 325 | #endif |
264 | ret = -ENOMEM; | 326 | ret = -ENOMEM; |
265 | goto err_unlock_sta; | 327 | goto err_return_queue; |
266 | } | 328 | } |
329 | |||
267 | /* Tx timer */ | 330 | /* Tx timer */ |
268 | sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = | 331 | sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = |
269 | sta_addba_resp_timer_expired; | 332 | sta_addba_resp_timer_expired; |
@@ -271,49 +334,25 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) | |||
271 | (unsigned long)&sta->timer_to_tid[tid]; | 334 | (unsigned long)&sta->timer_to_tid[tid]; |
272 | init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); | 335 | init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); |
273 | 336 | ||
274 | if (hw->ampdu_queues) { | ||
275 | /* create a new queue for this aggregation */ | ||
276 | ret = ieee80211_ht_agg_queue_add(local, sta, tid); | ||
277 | |||
278 | /* case no queue is available to aggregation | ||
279 | * don't switch to aggregation */ | ||
280 | if (ret) { | ||
281 | #ifdef CONFIG_MAC80211_HT_DEBUG | ||
282 | printk(KERN_DEBUG "BA request denied - " | ||
283 | "queue unavailable for tid %d\n", tid); | ||
284 | #endif /* CONFIG_MAC80211_HT_DEBUG */ | ||
285 | goto err_unlock_queue; | ||
286 | } | ||
287 | } | ||
288 | sdata = sta->sdata; | ||
289 | |||
290 | /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the | 337 | /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the |
291 | * call back right away, it must see that the flow has begun */ | 338 | * call back right away, it must see that the flow has begun */ |
292 | *state |= HT_ADDBA_REQUESTED_MSK; | 339 | *state |= HT_ADDBA_REQUESTED_MSK; |
293 | 340 | ||
294 | /* This is slightly racy because the queue isn't stopped */ | ||
295 | start_seq_num = sta->tid_seq[tid]; | 341 | start_seq_num = sta->tid_seq[tid]; |
296 | 342 | ||
297 | ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START, | 343 | ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START, |
298 | &sta->sta, tid, &start_seq_num); | 344 | &sta->sta, tid, &start_seq_num); |
299 | 345 | ||
300 | if (ret) { | 346 | if (ret) { |
301 | /* No need to requeue the packets in the agg queue, since we | ||
302 | * held the tx lock: no packet could be enqueued to the newly | ||
303 | * allocated queue */ | ||
304 | if (hw->ampdu_queues) | ||
305 | ieee80211_ht_agg_queue_remove(local, sta, tid, 0); | ||
306 | #ifdef CONFIG_MAC80211_HT_DEBUG | 347 | #ifdef CONFIG_MAC80211_HT_DEBUG |
307 | printk(KERN_DEBUG "BA request denied - HW unavailable for" | 348 | printk(KERN_DEBUG "BA request denied - HW unavailable for" |
308 | " tid %d\n", tid); | 349 | " tid %d\n", tid); |
309 | #endif /* CONFIG_MAC80211_HT_DEBUG */ | 350 | #endif /* CONFIG_MAC80211_HT_DEBUG */ |
310 | *state = HT_AGG_STATE_IDLE; | 351 | *state = HT_AGG_STATE_IDLE; |
311 | goto err_unlock_queue; | 352 | goto err_free; |
312 | } | 353 | } |
354 | sta->tid_to_tx_q[tid] = qn; | ||
313 | 355 | ||
314 | /* Will put all the packets in the new SW queue */ | ||
315 | if (hw->ampdu_queues) | ||
316 | ieee80211_requeue(local, ieee802_1d_to_ac[tid]); | ||
317 | spin_unlock_bh(&sta->lock); | 356 | spin_unlock_bh(&sta->lock); |
318 | 357 | ||
319 | /* send an addBA request */ | 358 | /* send an addBA request */ |
@@ -322,7 +361,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) | |||
322 | sta->ampdu_mlme.dialog_token_allocator; | 361 | sta->ampdu_mlme.dialog_token_allocator; |
323 | sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; | 362 | sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; |
324 | 363 | ||
325 | |||
326 | ieee80211_send_addba_request(sta->sdata, ra, tid, | 364 | ieee80211_send_addba_request(sta->sdata, ra, tid, |
327 | sta->ampdu_mlme.tid_tx[tid]->dialog_token, | 365 | sta->ampdu_mlme.tid_tx[tid]->dialog_token, |
328 | sta->ampdu_mlme.tid_tx[tid]->ssn, | 366 | sta->ampdu_mlme.tid_tx[tid]->ssn, |
@@ -334,15 +372,24 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) | |||
334 | #ifdef CONFIG_MAC80211_HT_DEBUG | 372 | #ifdef CONFIG_MAC80211_HT_DEBUG |
335 | printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); | 373 | printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); |
336 | #endif | 374 | #endif |
337 | goto exit; | 375 | goto unlock; |
338 | 376 | ||
339 | err_unlock_queue: | 377 | err_free: |
340 | kfree(sta->ampdu_mlme.tid_tx[tid]); | 378 | kfree(sta->ampdu_mlme.tid_tx[tid]); |
341 | sta->ampdu_mlme.tid_tx[tid] = NULL; | 379 | sta->ampdu_mlme.tid_tx[tid] = NULL; |
342 | ret = -EBUSY; | 380 | err_return_queue: |
343 | err_unlock_sta: | 381 | if (qn >= 0) { |
382 | /* We failed, so start queue again right away. */ | ||
383 | ieee80211_wake_queue_by_reason(hw, hw->queues + qn, | ||
384 | IEEE80211_QUEUE_STOP_REASON_AGGREGATION); | ||
385 | /* give queue back to pool */ | ||
386 | spin_lock(&local->queue_stop_reason_lock); | ||
387 | local->ampdu_ac_queue[qn] = -1; | ||
388 | spin_unlock(&local->queue_stop_reason_lock); | ||
389 | } | ||
390 | err_unlock_sta: | ||
344 | spin_unlock_bh(&sta->lock); | 391 | spin_unlock_bh(&sta->lock); |
345 | exit: | 392 | unlock: |
346 | rcu_read_unlock(); | 393 | rcu_read_unlock(); |
347 | return ret; | 394 | return ret; |
348 | } | 395 | } |
@@ -375,7 +422,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) | |||
375 | state = &sta->ampdu_mlme.tid_state_tx[tid]; | 422 | state = &sta->ampdu_mlme.tid_state_tx[tid]; |
376 | spin_lock_bh(&sta->lock); | 423 | spin_lock_bh(&sta->lock); |
377 | 424 | ||
378 | if (!(*state & HT_ADDBA_REQUESTED_MSK)) { | 425 | if (WARN_ON(!(*state & HT_ADDBA_REQUESTED_MSK))) { |
379 | #ifdef CONFIG_MAC80211_HT_DEBUG | 426 | #ifdef CONFIG_MAC80211_HT_DEBUG |
380 | printk(KERN_DEBUG "addBA was not requested yet, state is %d\n", | 427 | printk(KERN_DEBUG "addBA was not requested yet, state is %d\n", |
381 | *state); | 428 | *state); |
@@ -385,7 +432,8 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) | |||
385 | return; | 432 | return; |
386 | } | 433 | } |
387 | 434 | ||
388 | WARN_ON_ONCE(*state & HT_ADDBA_DRV_READY_MSK); | 435 | if (WARN_ON(*state & HT_ADDBA_DRV_READY_MSK)) |
436 | goto out; | ||
389 | 437 | ||
390 | *state |= HT_ADDBA_DRV_READY_MSK; | 438 | *state |= HT_ADDBA_DRV_READY_MSK; |
391 | 439 | ||
@@ -393,9 +441,18 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) | |||
393 | #ifdef CONFIG_MAC80211_HT_DEBUG | 441 | #ifdef CONFIG_MAC80211_HT_DEBUG |
394 | printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); | 442 | printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); |
395 | #endif | 443 | #endif |
396 | if (hw->ampdu_queues) | 444 | if (hw->ampdu_queues) { |
397 | ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); | 445 | /* |
446 | * Wake up this queue, we stopped it earlier, | ||
447 | * this will in turn wake the entire AC. | ||
448 | */ | ||
449 | ieee80211_wake_queue_by_reason(hw, | ||
450 | hw->queues + sta->tid_to_tx_q[tid], | ||
451 | IEEE80211_QUEUE_STOP_REASON_AGGREGATION); | ||
452 | } | ||
398 | } | 453 | } |
454 | |||
455 | out: | ||
399 | spin_unlock_bh(&sta->lock); | 456 | spin_unlock_bh(&sta->lock); |
400 | rcu_read_unlock(); | 457 | rcu_read_unlock(); |
401 | } | 458 | } |
@@ -485,7 +542,6 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) | |||
485 | struct ieee80211_local *local = hw_to_local(hw); | 542 | struct ieee80211_local *local = hw_to_local(hw); |
486 | struct sta_info *sta; | 543 | struct sta_info *sta; |
487 | u8 *state; | 544 | u8 *state; |
488 | int agg_queue; | ||
489 | 545 | ||
490 | if (tid >= STA_TID_NUM) { | 546 | if (tid >= STA_TID_NUM) { |
491 | #ifdef CONFIG_MAC80211_HT_DEBUG | 547 | #ifdef CONFIG_MAC80211_HT_DEBUG |
@@ -527,19 +583,19 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) | |||
527 | ieee80211_send_delba(sta->sdata, ra, tid, | 583 | ieee80211_send_delba(sta->sdata, ra, tid, |
528 | WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); | 584 | WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); |
529 | 585 | ||
530 | if (hw->ampdu_queues) { | 586 | spin_lock_bh(&sta->lock); |
531 | agg_queue = sta->tid_to_tx_q[tid]; | ||
532 | ieee80211_ht_agg_queue_remove(local, sta, tid, 1); | ||
533 | 587 | ||
534 | /* We just requeued the all the frames that were in the | 588 | if (*state & HT_AGG_STATE_INITIATOR_MSK && |
535 | * removed queue, and since we might miss a softirq we do | 589 | hw->ampdu_queues) { |
536 | * netif_schedule_queue. ieee80211_wake_queue is not used | 590 | /* |
537 | * here as this queue is not necessarily stopped | 591 | * Wake up this queue, we stopped it earlier, |
592 | * this will in turn wake the entire AC. | ||
538 | */ | 593 | */ |
539 | netif_schedule_queue(netdev_get_tx_queue(local->mdev, | 594 | ieee80211_wake_queue_by_reason(hw, |
540 | agg_queue)); | 595 | hw->queues + sta->tid_to_tx_q[tid], |
596 | IEEE80211_QUEUE_STOP_REASON_AGGREGATION); | ||
541 | } | 597 | } |
542 | spin_lock_bh(&sta->lock); | 598 | |
543 | *state = HT_AGG_STATE_IDLE; | 599 | *state = HT_AGG_STATE_IDLE; |
544 | sta->ampdu_mlme.addba_req_num[tid] = 0; | 600 | sta->ampdu_mlme.addba_req_num[tid] = 0; |
545 | kfree(sta->ampdu_mlme.tid_tx[tid]); | 601 | kfree(sta->ampdu_mlme.tid_tx[tid]); |
@@ -613,12 +669,21 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, | |||
613 | #endif /* CONFIG_MAC80211_HT_DEBUG */ | 669 | #endif /* CONFIG_MAC80211_HT_DEBUG */ |
614 | if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) | 670 | if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) |
615 | == WLAN_STATUS_SUCCESS) { | 671 | == WLAN_STATUS_SUCCESS) { |
672 | u8 curstate = *state; | ||
673 | |||
616 | *state |= HT_ADDBA_RECEIVED_MSK; | 674 | *state |= HT_ADDBA_RECEIVED_MSK; |
617 | sta->ampdu_mlme.addba_req_num[tid] = 0; | ||
618 | 675 | ||
619 | if (*state == HT_AGG_STATE_OPERATIONAL && | 676 | if (hw->ampdu_queues && *state != curstate && |
620 | local->hw.ampdu_queues) | 677 | *state == HT_AGG_STATE_OPERATIONAL) { |
621 | ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); | 678 | /* |
679 | * Wake up this queue, we stopped it earlier, | ||
680 | * this will in turn wake the entire AC. | ||
681 | */ | ||
682 | ieee80211_wake_queue_by_reason(hw, | ||
683 | hw->queues + sta->tid_to_tx_q[tid], | ||
684 | IEEE80211_QUEUE_STOP_REASON_AGGREGATION); | ||
685 | } | ||
686 | sta->ampdu_mlme.addba_req_num[tid] = 0; | ||
622 | 687 | ||
623 | if (local->ops->ampdu_action) { | 688 | if (local->ops->ampdu_action) { |
624 | (void)local->ops->ampdu_action(hw, | 689 | (void)local->ops->ampdu_action(hw, |
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c8d969be440b..58693e52d458 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c | |||
@@ -341,11 +341,15 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) | |||
341 | sinfo->filled = STATION_INFO_INACTIVE_TIME | | 341 | sinfo->filled = STATION_INFO_INACTIVE_TIME | |
342 | STATION_INFO_RX_BYTES | | 342 | STATION_INFO_RX_BYTES | |
343 | STATION_INFO_TX_BYTES | | 343 | STATION_INFO_TX_BYTES | |
344 | STATION_INFO_RX_PACKETS | | ||
345 | STATION_INFO_TX_PACKETS | | ||
344 | STATION_INFO_TX_BITRATE; | 346 | STATION_INFO_TX_BITRATE; |
345 | 347 | ||
346 | sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); | 348 | sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); |
347 | sinfo->rx_bytes = sta->rx_bytes; | 349 | sinfo->rx_bytes = sta->rx_bytes; |
348 | sinfo->tx_bytes = sta->tx_bytes; | 350 | sinfo->tx_bytes = sta->tx_bytes; |
351 | sinfo->rx_packets = sta->rx_packets; | ||
352 | sinfo->tx_packets = sta->tx_packets; | ||
349 | 353 | ||
350 | if (sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { | 354 | if (sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { |
351 | sinfo->filled |= STATION_INFO_SIGNAL; | 355 | sinfo->filled |= STATION_INFO_SIGNAL; |
@@ -447,7 +451,8 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, | |||
447 | * This is a kludge. beacon interval should really be part | 451 | * This is a kludge. beacon interval should really be part |
448 | * of the beacon information. | 452 | * of the beacon information. |
449 | */ | 453 | */ |
450 | if (params->interval) { | 454 | if (params->interval && (sdata->local->hw.conf.beacon_int != |
455 | params->interval)) { | ||
451 | sdata->local->hw.conf.beacon_int = params->interval; | 456 | sdata->local->hw.conf.beacon_int = params->interval; |
452 | err = ieee80211_hw_config(sdata->local, | 457 | err = ieee80211_hw_config(sdata->local, |
453 | IEEE80211_CONF_CHANGE_BEACON_INTERVAL); | 458 | IEEE80211_CONF_CHANGE_BEACON_INTERVAL); |
@@ -1180,45 +1185,45 @@ static int set_mgmt_extra_ie_sta(struct ieee80211_sub_if_data *sdata, | |||
1180 | u8 subtype, u8 *ies, size_t ies_len) | 1185 | u8 subtype, u8 *ies, size_t ies_len) |
1181 | { | 1186 | { |
1182 | struct ieee80211_local *local = sdata->local; | 1187 | struct ieee80211_local *local = sdata->local; |
1183 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | 1188 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
1184 | 1189 | ||
1185 | switch (subtype) { | 1190 | switch (subtype) { |
1186 | case IEEE80211_STYPE_PROBE_REQ >> 4: | 1191 | case IEEE80211_STYPE_PROBE_REQ >> 4: |
1187 | if (local->ops->hw_scan) | 1192 | if (local->ops->hw_scan) |
1188 | break; | 1193 | break; |
1189 | kfree(ifsta->ie_probereq); | 1194 | kfree(ifmgd->ie_probereq); |
1190 | ifsta->ie_probereq = ies; | 1195 | ifmgd->ie_probereq = ies; |
1191 | ifsta->ie_probereq_len = ies_len; | 1196 | ifmgd->ie_probereq_len = ies_len; |
1192 | return 0; | 1197 | return 0; |
1193 | case IEEE80211_STYPE_PROBE_RESP >> 4: | 1198 | case IEEE80211_STYPE_PROBE_RESP >> 4: |
1194 | kfree(ifsta->ie_proberesp); | 1199 | kfree(ifmgd->ie_proberesp); |
1195 | ifsta->ie_proberesp = ies; | 1200 | ifmgd->ie_proberesp = ies; |
1196 | ifsta->ie_proberesp_len = ies_len; | 1201 | ifmgd->ie_proberesp_len = ies_len; |
1197 | return 0; | 1202 | return 0; |
1198 | case IEEE80211_STYPE_AUTH >> 4: | 1203 | case IEEE80211_STYPE_AUTH >> 4: |
1199 | kfree(ifsta->ie_auth); | 1204 | kfree(ifmgd->ie_auth); |
1200 | ifsta->ie_auth = ies; | 1205 | ifmgd->ie_auth = ies; |
1201 | ifsta->ie_auth_len = ies_len; | 1206 | ifmgd->ie_auth_len = ies_len; |
1202 | return 0; | 1207 | return 0; |
1203 | case IEEE80211_STYPE_ASSOC_REQ >> 4: | 1208 | case IEEE80211_STYPE_ASSOC_REQ >> 4: |
1204 | kfree(ifsta->ie_assocreq); | 1209 | kfree(ifmgd->ie_assocreq); |
1205 | ifsta->ie_assocreq = ies; | 1210 | ifmgd->ie_assocreq = ies; |
1206 | ifsta->ie_assocreq_len = ies_len; | 1211 | ifmgd->ie_assocreq_len = ies_len; |
1207 | return 0; | 1212 | return 0; |
1208 | case IEEE80211_STYPE_REASSOC_REQ >> 4: | 1213 | case IEEE80211_STYPE_REASSOC_REQ >> 4: |
1209 | kfree(ifsta->ie_reassocreq); | 1214 | kfree(ifmgd->ie_reassocreq); |
1210 | ifsta->ie_reassocreq = ies; | 1215 | ifmgd->ie_reassocreq = ies; |
1211 | ifsta->ie_reassocreq_len = ies_len; | 1216 | ifmgd->ie_reassocreq_len = ies_len; |
1212 | return 0; | 1217 | return 0; |
1213 | case IEEE80211_STYPE_DEAUTH >> 4: | 1218 | case IEEE80211_STYPE_DEAUTH >> 4: |
1214 | kfree(ifsta->ie_deauth); | 1219 | kfree(ifmgd->ie_deauth); |
1215 | ifsta->ie_deauth = ies; | 1220 | ifmgd->ie_deauth = ies; |
1216 | ifsta->ie_deauth_len = ies_len; | 1221 | ifmgd->ie_deauth_len = ies_len; |
1217 | return 0; | 1222 | return 0; |
1218 | case IEEE80211_STYPE_DISASSOC >> 4: | 1223 | case IEEE80211_STYPE_DISASSOC >> 4: |
1219 | kfree(ifsta->ie_disassoc); | 1224 | kfree(ifmgd->ie_disassoc); |
1220 | ifsta->ie_disassoc = ies; | 1225 | ifmgd->ie_disassoc = ies; |
1221 | ifsta->ie_disassoc_len = ies_len; | 1226 | ifmgd->ie_disassoc_len = ies_len; |
1222 | return 0; | 1227 | return 0; |
1223 | } | 1228 | } |
1224 | 1229 | ||
@@ -1248,7 +1253,6 @@ static int ieee80211_set_mgmt_extra_ie(struct wiphy *wiphy, | |||
1248 | 1253 | ||
1249 | switch (sdata->vif.type) { | 1254 | switch (sdata->vif.type) { |
1250 | case NL80211_IFTYPE_STATION: | 1255 | case NL80211_IFTYPE_STATION: |
1251 | case NL80211_IFTYPE_ADHOC: | ||
1252 | ret = set_mgmt_extra_ie_sta(sdata, params->subtype, | 1256 | ret = set_mgmt_extra_ie_sta(sdata, params->subtype, |
1253 | ies, ies_len); | 1257 | ies, ies_len); |
1254 | break; | 1258 | break; |
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index c54219301724..e3420329f4e6 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c | |||
@@ -94,31 +94,31 @@ IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC); | |||
94 | IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC); | 94 | IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC); |
95 | IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC); | 95 | IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC); |
96 | 96 | ||
97 | /* STA/IBSS attributes */ | 97 | /* STA attributes */ |
98 | IEEE80211_IF_FILE(state, u.sta.state, DEC); | 98 | IEEE80211_IF_FILE(state, u.mgd.state, DEC); |
99 | IEEE80211_IF_FILE(bssid, u.sta.bssid, MAC); | 99 | IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); |
100 | IEEE80211_IF_FILE(prev_bssid, u.sta.prev_bssid, MAC); | 100 | IEEE80211_IF_FILE(prev_bssid, u.mgd.prev_bssid, MAC); |
101 | IEEE80211_IF_FILE(ssid_len, u.sta.ssid_len, SIZE); | 101 | IEEE80211_IF_FILE(ssid_len, u.mgd.ssid_len, SIZE); |
102 | IEEE80211_IF_FILE(aid, u.sta.aid, DEC); | 102 | IEEE80211_IF_FILE(aid, u.mgd.aid, DEC); |
103 | IEEE80211_IF_FILE(ap_capab, u.sta.ap_capab, HEX); | 103 | IEEE80211_IF_FILE(ap_capab, u.mgd.ap_capab, HEX); |
104 | IEEE80211_IF_FILE(capab, u.sta.capab, HEX); | 104 | IEEE80211_IF_FILE(capab, u.mgd.capab, HEX); |
105 | IEEE80211_IF_FILE(extra_ie_len, u.sta.extra_ie_len, SIZE); | 105 | IEEE80211_IF_FILE(extra_ie_len, u.mgd.extra_ie_len, SIZE); |
106 | IEEE80211_IF_FILE(auth_tries, u.sta.auth_tries, DEC); | 106 | IEEE80211_IF_FILE(auth_tries, u.mgd.auth_tries, DEC); |
107 | IEEE80211_IF_FILE(assoc_tries, u.sta.assoc_tries, DEC); | 107 | IEEE80211_IF_FILE(assoc_tries, u.mgd.assoc_tries, DEC); |
108 | IEEE80211_IF_FILE(auth_algs, u.sta.auth_algs, HEX); | 108 | IEEE80211_IF_FILE(auth_algs, u.mgd.auth_algs, HEX); |
109 | IEEE80211_IF_FILE(auth_alg, u.sta.auth_alg, DEC); | 109 | IEEE80211_IF_FILE(auth_alg, u.mgd.auth_alg, DEC); |
110 | IEEE80211_IF_FILE(auth_transaction, u.sta.auth_transaction, DEC); | 110 | IEEE80211_IF_FILE(auth_transaction, u.mgd.auth_transaction, DEC); |
111 | 111 | ||
112 | static ssize_t ieee80211_if_fmt_flags( | 112 | static ssize_t ieee80211_if_fmt_flags( |
113 | const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) | 113 | const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) |
114 | { | 114 | { |
115 | return scnprintf(buf, buflen, "%s%s%s%s%s%s%s\n", | 115 | return scnprintf(buf, buflen, "%s%s%s%s%s%s%s\n", |
116 | sdata->u.sta.flags & IEEE80211_STA_SSID_SET ? "SSID\n" : "", | 116 | sdata->u.mgd.flags & IEEE80211_STA_SSID_SET ? "SSID\n" : "", |
117 | sdata->u.sta.flags & IEEE80211_STA_BSSID_SET ? "BSSID\n" : "", | 117 | sdata->u.mgd.flags & IEEE80211_STA_BSSID_SET ? "BSSID\n" : "", |
118 | sdata->u.sta.flags & IEEE80211_STA_PREV_BSSID_SET ? "prev BSSID\n" : "", | 118 | sdata->u.mgd.flags & IEEE80211_STA_PREV_BSSID_SET ? "prev BSSID\n" : "", |
119 | sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "", | 119 | sdata->u.mgd.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "", |
120 | sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "", | 120 | sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "", |
121 | sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "", | 121 | sdata->u.mgd.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "", |
122 | sdata->vif.bss_conf.use_cts_prot ? "CTS prot\n" : ""); | 122 | sdata->vif.bss_conf.use_cts_prot ? "CTS prot\n" : ""); |
123 | } | 123 | } |
124 | __IEEE80211_IF_FILE(flags); | 124 | __IEEE80211_IF_FILE(flags); |
@@ -283,9 +283,11 @@ static void add_files(struct ieee80211_sub_if_data *sdata) | |||
283 | #endif | 283 | #endif |
284 | break; | 284 | break; |
285 | case NL80211_IFTYPE_STATION: | 285 | case NL80211_IFTYPE_STATION: |
286 | case NL80211_IFTYPE_ADHOC: | ||
287 | add_sta_files(sdata); | 286 | add_sta_files(sdata); |
288 | break; | 287 | break; |
288 | case NL80211_IFTYPE_ADHOC: | ||
289 | /* XXX */ | ||
290 | break; | ||
289 | case NL80211_IFTYPE_AP: | 291 | case NL80211_IFTYPE_AP: |
290 | add_ap_files(sdata); | 292 | add_ap_files(sdata); |
291 | break; | 293 | break; |
@@ -418,9 +420,11 @@ static void del_files(struct ieee80211_sub_if_data *sdata) | |||
418 | #endif | 420 | #endif |
419 | break; | 421 | break; |
420 | case NL80211_IFTYPE_STATION: | 422 | case NL80211_IFTYPE_STATION: |
421 | case NL80211_IFTYPE_ADHOC: | ||
422 | del_sta_files(sdata); | 423 | del_sta_files(sdata); |
423 | break; | 424 | break; |
425 | case NL80211_IFTYPE_ADHOC: | ||
426 | /* XXX */ | ||
427 | break; | ||
424 | case NL80211_IFTYPE_AP: | 428 | case NL80211_IFTYPE_AP: |
425 | del_ap_files(sdata); | 429 | del_ap_files(sdata); |
426 | break; | 430 | break; |
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 82ea0b63a386..4e3c72f20de7 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <net/wireless.h> | 17 | #include <net/wireless.h> |
18 | #include <net/mac80211.h> | 18 | #include <net/mac80211.h> |
19 | #include "ieee80211_i.h" | 19 | #include "ieee80211_i.h" |
20 | #include "rate.h" | ||
20 | 21 | ||
21 | void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, | 22 | void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, |
22 | struct ieee80211_ht_cap *ht_cap_ie, | 23 | struct ieee80211_ht_cap *ht_cap_ie, |
@@ -93,7 +94,9 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, | |||
93 | { | 94 | { |
94 | struct ieee80211_local *local = sdata->local; | 95 | struct ieee80211_local *local = sdata->local; |
95 | struct ieee80211_supported_band *sband; | 96 | struct ieee80211_supported_band *sband; |
97 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
96 | struct ieee80211_bss_ht_conf ht; | 98 | struct ieee80211_bss_ht_conf ht; |
99 | struct sta_info *sta; | ||
97 | u32 changed = 0; | 100 | u32 changed = 0; |
98 | bool enable_ht = true, ht_changed; | 101 | bool enable_ht = true, ht_changed; |
99 | enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; | 102 | enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; |
@@ -136,6 +139,16 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, | |||
136 | if (ht_changed) { | 139 | if (ht_changed) { |
137 | /* channel_type change automatically detected */ | 140 | /* channel_type change automatically detected */ |
138 | ieee80211_hw_config(local, 0); | 141 | ieee80211_hw_config(local, 0); |
142 | |||
143 | rcu_read_lock(); | ||
144 | |||
145 | sta = sta_info_get(local, ifmgd->bssid); | ||
146 | if (sta) | ||
147 | rate_control_rate_update(local, sband, sta, | ||
148 | IEEE80211_RC_HT_CHANGED); | ||
149 | |||
150 | rcu_read_unlock(); | ||
151 | |||
139 | } | 152 | } |
140 | 153 | ||
141 | /* disable HT */ | 154 | /* disable HT */ |
@@ -169,7 +182,6 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, | |||
169 | u16 initiator, u16 reason_code) | 182 | u16 initiator, u16 reason_code) |
170 | { | 183 | { |
171 | struct ieee80211_local *local = sdata->local; | 184 | struct ieee80211_local *local = sdata->local; |
172 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | ||
173 | struct sk_buff *skb; | 185 | struct sk_buff *skb; |
174 | struct ieee80211_mgmt *mgmt; | 186 | struct ieee80211_mgmt *mgmt; |
175 | u16 params; | 187 | u16 params; |
@@ -190,8 +202,9 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, | |||
190 | if (sdata->vif.type == NL80211_IFTYPE_AP || | 202 | if (sdata->vif.type == NL80211_IFTYPE_AP || |
191 | sdata->vif.type == NL80211_IFTYPE_AP_VLAN) | 203 | sdata->vif.type == NL80211_IFTYPE_AP_VLAN) |
192 | memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); | 204 | memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); |
193 | else | 205 | else if (sdata->vif.type == NL80211_IFTYPE_STATION) |
194 | memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); | 206 | memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); |
207 | |||
195 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | | 208 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | |
196 | IEEE80211_STYPE_ACTION); | 209 | IEEE80211_STYPE_ACTION); |
197 | 210 | ||
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c new file mode 100644 index 000000000000..f4becc12904e --- /dev/null +++ b/net/mac80211/ibss.c | |||
@@ -0,0 +1,907 @@ | |||
1 | /* | ||
2 | * IBSS mode implementation | ||
3 | * Copyright 2003-2008, Jouni Malinen <j@w1.fi> | ||
4 | * Copyright 2004, Instant802 Networks, Inc. | ||
5 | * Copyright 2005, Devicescape Software, Inc. | ||
6 | * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> | ||
7 | * Copyright 2007, Michael Wu <flamingice@sourmilk.net> | ||
8 | * Copyright 2009, Johannes Berg <johannes@sipsolutions.net> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License version 2 as | ||
12 | * published by the Free Software Foundation. | ||
13 | */ | ||
14 | |||
15 | #include <linux/delay.h> | ||
16 | #include <linux/if_ether.h> | ||
17 | #include <linux/skbuff.h> | ||
18 | #include <linux/if_arp.h> | ||
19 | #include <linux/etherdevice.h> | ||
20 | #include <linux/rtnetlink.h> | ||
21 | #include <net/mac80211.h> | ||
22 | #include <asm/unaligned.h> | ||
23 | |||
24 | #include "ieee80211_i.h" | ||
25 | #include "rate.h" | ||
26 | |||
27 | #define IEEE80211_SCAN_INTERVAL (2 * HZ) | ||
28 | #define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ) | ||
29 | #define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) | ||
30 | |||
31 | #define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) | ||
32 | #define IEEE80211_IBSS_MERGE_DELAY 0x400000 | ||
33 | #define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) | ||
34 | |||
35 | #define IEEE80211_IBSS_MAX_STA_ENTRIES 128 | ||
36 | |||
37 | |||
38 | static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, | ||
39 | struct ieee80211_mgmt *mgmt, | ||
40 | size_t len) | ||
41 | { | ||
42 | u16 auth_alg, auth_transaction, status_code; | ||
43 | |||
44 | if (len < 24 + 6) | ||
45 | return; | ||
46 | |||
47 | auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); | ||
48 | auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); | ||
49 | status_code = le16_to_cpu(mgmt->u.auth.status_code); | ||
50 | |||
51 | /* | ||
52 | * IEEE 802.11 standard does not require authentication in IBSS | ||
53 | * networks and most implementations do not seem to use it. | ||
54 | * However, try to reply to authentication attempts if someone | ||
55 | * has actually implemented this. | ||
56 | */ | ||
57 | if (auth_alg == WLAN_AUTH_OPEN && auth_transaction == 1) | ||
58 | ieee80211_send_auth(sdata, 2, WLAN_AUTH_OPEN, NULL, 0, | ||
59 | sdata->u.ibss.bssid, 0); | ||
60 | } | ||
61 | |||
62 | static int __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, | ||
63 | const u8 *bssid, const int beacon_int, | ||
64 | const int freq, | ||
65 | const size_t supp_rates_len, | ||
66 | const u8 *supp_rates, | ||
67 | const u16 capability, u64 tsf) | ||
68 | { | ||
69 | struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; | ||
70 | struct ieee80211_local *local = sdata->local; | ||
71 | int res = 0, rates, i, j; | ||
72 | struct sk_buff *skb; | ||
73 | struct ieee80211_mgmt *mgmt; | ||
74 | u8 *pos; | ||
75 | struct ieee80211_supported_band *sband; | ||
76 | union iwreq_data wrqu; | ||
77 | |||
78 | if (local->ops->reset_tsf) { | ||
79 | /* Reset own TSF to allow time synchronization work. */ | ||
80 | local->ops->reset_tsf(local_to_hw(local)); | ||
81 | } | ||
82 | |||
83 | if ((ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET) && | ||
84 | memcmp(ifibss->bssid, bssid, ETH_ALEN) == 0) | ||
85 | return res; | ||
86 | |||
87 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); | ||
88 | if (!skb) { | ||
89 | printk(KERN_DEBUG "%s: failed to allocate buffer for probe " | ||
90 | "response\n", sdata->dev->name); | ||
91 | return -ENOMEM; | ||
92 | } | ||
93 | |||
94 | if (!(ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET)) { | ||
95 | /* Remove possible STA entries from other IBSS networks. */ | ||
96 | sta_info_flush_delayed(sdata); | ||
97 | } | ||
98 | |||
99 | memcpy(ifibss->bssid, bssid, ETH_ALEN); | ||
100 | res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); | ||
101 | if (res) | ||
102 | return res; | ||
103 | |||
104 | local->hw.conf.beacon_int = beacon_int >= 10 ? beacon_int : 10; | ||
105 | |||
106 | sdata->drop_unencrypted = capability & | ||
107 | WLAN_CAPABILITY_PRIVACY ? 1 : 0; | ||
108 | |||
109 | res = ieee80211_set_freq(sdata, freq); | ||
110 | |||
111 | if (res) | ||
112 | return res; | ||
113 | |||
114 | sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; | ||
115 | |||
116 | /* Build IBSS probe response */ | ||
117 | |||
118 | skb_reserve(skb, local->hw.extra_tx_headroom); | ||
119 | |||
120 | mgmt = (struct ieee80211_mgmt *) | ||
121 | skb_put(skb, 24 + sizeof(mgmt->u.beacon)); | ||
122 | memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); | ||
123 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | | ||
124 | IEEE80211_STYPE_PROBE_RESP); | ||
125 | memset(mgmt->da, 0xff, ETH_ALEN); | ||
126 | memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); | ||
127 | memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN); | ||
128 | mgmt->u.beacon.beacon_int = | ||
129 | cpu_to_le16(local->hw.conf.beacon_int); | ||
130 | mgmt->u.beacon.timestamp = cpu_to_le64(tsf); | ||
131 | mgmt->u.beacon.capab_info = cpu_to_le16(capability); | ||
132 | |||
133 | pos = skb_put(skb, 2 + ifibss->ssid_len); | ||
134 | *pos++ = WLAN_EID_SSID; | ||
135 | *pos++ = ifibss->ssid_len; | ||
136 | memcpy(pos, ifibss->ssid, ifibss->ssid_len); | ||
137 | |||
138 | rates = supp_rates_len; | ||
139 | if (rates > 8) | ||
140 | rates = 8; | ||
141 | pos = skb_put(skb, 2 + rates); | ||
142 | *pos++ = WLAN_EID_SUPP_RATES; | ||
143 | *pos++ = rates; | ||
144 | memcpy(pos, supp_rates, rates); | ||
145 | |||
146 | if (sband->band == IEEE80211_BAND_2GHZ) { | ||
147 | pos = skb_put(skb, 2 + 1); | ||
148 | *pos++ = WLAN_EID_DS_PARAMS; | ||
149 | *pos++ = 1; | ||
150 | *pos++ = ieee80211_frequency_to_channel(freq); | ||
151 | } | ||
152 | |||
153 | pos = skb_put(skb, 2 + 2); | ||
154 | *pos++ = WLAN_EID_IBSS_PARAMS; | ||
155 | *pos++ = 2; | ||
156 | /* FIX: set ATIM window based on scan results */ | ||
157 | *pos++ = 0; | ||
158 | *pos++ = 0; | ||
159 | |||
160 | if (supp_rates_len > 8) { | ||
161 | rates = supp_rates_len - 8; | ||
162 | pos = skb_put(skb, 2 + rates); | ||
163 | *pos++ = WLAN_EID_EXT_SUPP_RATES; | ||
164 | *pos++ = rates; | ||
165 | memcpy(pos, &supp_rates[8], rates); | ||
166 | } | ||
167 | |||
168 | ifibss->probe_resp = skb; | ||
169 | |||
170 | ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON | | ||
171 | IEEE80211_IFCC_BEACON_ENABLED); | ||
172 | |||
173 | |||
174 | rates = 0; | ||
175 | for (i = 0; i < supp_rates_len; i++) { | ||
176 | int bitrate = (supp_rates[i] & 0x7f) * 5; | ||
177 | for (j = 0; j < sband->n_bitrates; j++) | ||
178 | if (sband->bitrates[j].bitrate == bitrate) | ||
179 | rates |= BIT(j); | ||
180 | } | ||
181 | |||
182 | ieee80211_sta_def_wmm_params(sdata, supp_rates_len, supp_rates); | ||
183 | |||
184 | ifibss->flags |= IEEE80211_IBSS_PREV_BSSID_SET; | ||
185 | ifibss->state = IEEE80211_IBSS_MLME_JOINED; | ||
186 | mod_timer(&ifibss->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); | ||
187 | |||
188 | memset(&wrqu, 0, sizeof(wrqu)); | ||
189 | memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); | ||
190 | wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); | ||
191 | |||
192 | return res; | ||
193 | } | ||
194 | |||
195 | static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, | ||
196 | struct ieee80211_bss *bss) | ||
197 | { | ||
198 | return __ieee80211_sta_join_ibss(sdata, | ||
199 | bss->cbss.bssid, | ||
200 | bss->cbss.beacon_interval, | ||
201 | bss->cbss.channel->center_freq, | ||
202 | bss->supp_rates_len, bss->supp_rates, | ||
203 | bss->cbss.capability, | ||
204 | bss->cbss.tsf); | ||
205 | } | ||
206 | |||
207 | static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, | ||
208 | struct ieee80211_mgmt *mgmt, | ||
209 | size_t len, | ||
210 | struct ieee80211_rx_status *rx_status, | ||
211 | struct ieee802_11_elems *elems, | ||
212 | bool beacon) | ||
213 | { | ||
214 | struct ieee80211_local *local = sdata->local; | ||
215 | int freq; | ||
216 | struct ieee80211_bss *bss; | ||
217 | struct sta_info *sta; | ||
218 | struct ieee80211_channel *channel; | ||
219 | u64 beacon_timestamp, rx_timestamp; | ||
220 | u32 supp_rates = 0; | ||
221 | enum ieee80211_band band = rx_status->band; | ||
222 | |||
223 | if (elems->ds_params && elems->ds_params_len == 1) | ||
224 | freq = ieee80211_channel_to_frequency(elems->ds_params[0]); | ||
225 | else | ||
226 | freq = rx_status->freq; | ||
227 | |||
228 | channel = ieee80211_get_channel(local->hw.wiphy, freq); | ||
229 | |||
230 | if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) | ||
231 | return; | ||
232 | |||
233 | if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates && | ||
234 | memcmp(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) { | ||
235 | supp_rates = ieee80211_sta_get_rates(local, elems, band); | ||
236 | |||
237 | rcu_read_lock(); | ||
238 | |||
239 | sta = sta_info_get(local, mgmt->sa); | ||
240 | if (sta) { | ||
241 | u32 prev_rates; | ||
242 | |||
243 | prev_rates = sta->sta.supp_rates[band]; | ||
244 | /* make sure mandatory rates are always added */ | ||
245 | sta->sta.supp_rates[band] = supp_rates | | ||
246 | ieee80211_mandatory_rates(local, band); | ||
247 | |||
248 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
249 | if (sta->sta.supp_rates[band] != prev_rates) | ||
250 | printk(KERN_DEBUG "%s: updated supp_rates set " | ||
251 | "for %pM based on beacon info (0x%llx | " | ||
252 | "0x%llx -> 0x%llx)\n", | ||
253 | sdata->dev->name, | ||
254 | sta->sta.addr, | ||
255 | (unsigned long long) prev_rates, | ||
256 | (unsigned long long) supp_rates, | ||
257 | (unsigned long long) sta->sta.supp_rates[band]); | ||
258 | #endif | ||
259 | } else | ||
260 | ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); | ||
261 | |||
262 | rcu_read_unlock(); | ||
263 | } | ||
264 | |||
265 | bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, | ||
266 | channel, beacon); | ||
267 | if (!bss) | ||
268 | return; | ||
269 | |||
270 | /* was just updated in ieee80211_bss_info_update */ | ||
271 | beacon_timestamp = bss->cbss.tsf; | ||
272 | |||
273 | /* check if we need to merge IBSS */ | ||
274 | |||
275 | /* merge only on beacons (???) */ | ||
276 | if (!beacon) | ||
277 | goto put_bss; | ||
278 | |||
279 | /* we use a fixed BSSID */ | ||
280 | if (sdata->u.ibss.flags & IEEE80211_IBSS_BSSID_SET) | ||
281 | goto put_bss; | ||
282 | |||
283 | /* not an IBSS */ | ||
284 | if (!(bss->cbss.capability & WLAN_CAPABILITY_IBSS)) | ||
285 | goto put_bss; | ||
286 | |||
287 | /* different channel */ | ||
288 | if (bss->cbss.channel != local->oper_channel) | ||
289 | goto put_bss; | ||
290 | |||
291 | /* different SSID */ | ||
292 | if (elems->ssid_len != sdata->u.ibss.ssid_len || | ||
293 | memcmp(elems->ssid, sdata->u.ibss.ssid, | ||
294 | sdata->u.ibss.ssid_len)) | ||
295 | goto put_bss; | ||
296 | |||
297 | /* same BSSID */ | ||
298 | if (memcmp(bss->cbss.bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) | ||
299 | goto put_bss; | ||
300 | |||
301 | if (rx_status->flag & RX_FLAG_TSFT) { | ||
302 | /* | ||
303 | * For correct IBSS merging we need mactime; since mactime is | ||
304 | * defined as the time the first data symbol of the frame hits | ||
305 | * the PHY, and the timestamp of the beacon is defined as "the | ||
306 | * time that the data symbol containing the first bit of the | ||
307 | * timestamp is transmitted to the PHY plus the transmitting | ||
308 | * STA's delays through its local PHY from the MAC-PHY | ||
309 | * interface to its interface with the WM" (802.11 11.1.2) | ||
310 | * - equals the time this bit arrives at the receiver - we have | ||
311 | * to take into account the offset between the two. | ||
312 | * | ||
313 | * E.g. at 1 MBit that means mactime is 192 usec earlier | ||
314 | * (=24 bytes * 8 usecs/byte) than the beacon timestamp. | ||
315 | */ | ||
316 | int rate; | ||
317 | |||
318 | if (rx_status->flag & RX_FLAG_HT) | ||
319 | rate = 65; /* TODO: HT rates */ | ||
320 | else | ||
321 | rate = local->hw.wiphy->bands[band]-> | ||
322 | bitrates[rx_status->rate_idx].bitrate; | ||
323 | |||
324 | rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate); | ||
325 | } else if (local && local->ops && local->ops->get_tsf) | ||
326 | /* second best option: get current TSF */ | ||
327 | rx_timestamp = local->ops->get_tsf(local_to_hw(local)); | ||
328 | else | ||
329 | /* can't merge without knowing the TSF */ | ||
330 | rx_timestamp = -1LLU; | ||
331 | |||
332 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
333 | printk(KERN_DEBUG "RX beacon SA=%pM BSSID=" | ||
334 | "%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", | ||
335 | mgmt->sa, mgmt->bssid, | ||
336 | (unsigned long long)rx_timestamp, | ||
337 | (unsigned long long)beacon_timestamp, | ||
338 | (unsigned long long)(rx_timestamp - beacon_timestamp), | ||
339 | jiffies); | ||
340 | #endif | ||
341 | |||
342 | /* give slow hardware some time to do the TSF sync */ | ||
343 | if (rx_timestamp < IEEE80211_IBSS_MERGE_DELAY) | ||
344 | goto put_bss; | ||
345 | |||
346 | if (beacon_timestamp > rx_timestamp) { | ||
347 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
348 | printk(KERN_DEBUG "%s: beacon TSF higher than " | ||
349 | "local TSF - IBSS merge with BSSID %pM\n", | ||
350 | sdata->dev->name, mgmt->bssid); | ||
351 | #endif | ||
352 | ieee80211_sta_join_ibss(sdata, bss); | ||
353 | ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); | ||
354 | } | ||
355 | |||
356 | put_bss: | ||
357 | ieee80211_rx_bss_put(local, bss); | ||
358 | } | ||
359 | |||
360 | /* | ||
361 | * Add a new IBSS station, will also be called by the RX code when, | ||
362 | * in IBSS mode, receiving a frame from a yet-unknown station, hence | ||
363 | * must be callable in atomic context. | ||
364 | */ | ||
365 | struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, | ||
366 | u8 *bssid,u8 *addr, u32 supp_rates) | ||
367 | { | ||
368 | struct ieee80211_local *local = sdata->local; | ||
369 | struct sta_info *sta; | ||
370 | int band = local->hw.conf.channel->band; | ||
371 | |||
372 | /* TODO: Could consider removing the least recently used entry and | ||
373 | * allow new one to be added. */ | ||
374 | if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { | ||
375 | if (net_ratelimit()) { | ||
376 | printk(KERN_DEBUG "%s: No room for a new IBSS STA " | ||
377 | "entry %pM\n", sdata->dev->name, addr); | ||
378 | } | ||
379 | return NULL; | ||
380 | } | ||
381 | |||
382 | if (compare_ether_addr(bssid, sdata->u.ibss.bssid)) | ||
383 | return NULL; | ||
384 | |||
385 | #ifdef CONFIG_MAC80211_VERBOSE_DEBUG | ||
386 | printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n", | ||
387 | wiphy_name(local->hw.wiphy), addr, sdata->dev->name); | ||
388 | #endif | ||
389 | |||
390 | sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); | ||
391 | if (!sta) | ||
392 | return NULL; | ||
393 | |||
394 | set_sta_flags(sta, WLAN_STA_AUTHORIZED); | ||
395 | |||
396 | /* make sure mandatory rates are always added */ | ||
397 | sta->sta.supp_rates[band] = supp_rates | | ||
398 | ieee80211_mandatory_rates(local, band); | ||
399 | |||
400 | rate_control_rate_init(sta); | ||
401 | |||
402 | if (sta_info_insert(sta)) | ||
403 | return NULL; | ||
404 | |||
405 | return sta; | ||
406 | } | ||
407 | |||
408 | static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) | ||
409 | { | ||
410 | struct ieee80211_local *local = sdata->local; | ||
411 | int active = 0; | ||
412 | struct sta_info *sta; | ||
413 | |||
414 | rcu_read_lock(); | ||
415 | |||
416 | list_for_each_entry_rcu(sta, &local->sta_list, list) { | ||
417 | if (sta->sdata == sdata && | ||
418 | time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL, | ||
419 | jiffies)) { | ||
420 | active++; | ||
421 | break; | ||
422 | } | ||
423 | } | ||
424 | |||
425 | rcu_read_unlock(); | ||
426 | |||
427 | return active; | ||
428 | } | ||
429 | |||
430 | |||
431 | static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) | ||
432 | { | ||
433 | struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; | ||
434 | |||
435 | mod_timer(&ifibss->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); | ||
436 | |||
437 | ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT); | ||
438 | if (ieee80211_sta_active_ibss(sdata)) | ||
439 | return; | ||
440 | |||
441 | if ((ifibss->flags & IEEE80211_IBSS_BSSID_SET) && | ||
442 | (!(ifibss->flags & IEEE80211_IBSS_AUTO_CHANNEL_SEL))) | ||
443 | return; | ||
444 | |||
445 | printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other " | ||
446 | "IBSS networks with same SSID (merge)\n", sdata->dev->name); | ||
447 | |||
448 | /* XXX maybe racy? */ | ||
449 | if (sdata->local->scan_req) | ||
450 | return; | ||
451 | |||
452 | memcpy(sdata->local->int_scan_req.ssids[0].ssid, | ||
453 | ifibss->ssid, IEEE80211_MAX_SSID_LEN); | ||
454 | sdata->local->int_scan_req.ssids[0].ssid_len = ifibss->ssid_len; | ||
455 | ieee80211_request_scan(sdata, &sdata->local->int_scan_req); | ||
456 | } | ||
457 | |||
458 | static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) | ||
459 | { | ||
460 | struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; | ||
461 | struct ieee80211_local *local = sdata->local; | ||
462 | struct ieee80211_supported_band *sband; | ||
463 | u8 *pos; | ||
464 | u8 bssid[ETH_ALEN]; | ||
465 | u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; | ||
466 | u16 capability; | ||
467 | int i; | ||
468 | |||
469 | if (ifibss->flags & IEEE80211_IBSS_BSSID_SET) { | ||
470 | memcpy(bssid, ifibss->bssid, ETH_ALEN); | ||
471 | } else { | ||
472 | /* Generate random, not broadcast, locally administered BSSID. Mix in | ||
473 | * own MAC address to make sure that devices that do not have proper | ||
474 | * random number generator get different BSSID. */ | ||
475 | get_random_bytes(bssid, ETH_ALEN); | ||
476 | for (i = 0; i < ETH_ALEN; i++) | ||
477 | bssid[i] ^= sdata->dev->dev_addr[i]; | ||
478 | bssid[0] &= ~0x01; | ||
479 | bssid[0] |= 0x02; | ||
480 | } | ||
481 | |||
482 | printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n", | ||
483 | sdata->dev->name, bssid); | ||
484 | |||
485 | sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; | ||
486 | |||
487 | if (local->hw.conf.beacon_int == 0) | ||
488 | local->hw.conf.beacon_int = 100; | ||
489 | |||
490 | capability = WLAN_CAPABILITY_IBSS; | ||
491 | |||
492 | if (sdata->default_key) | ||
493 | capability |= WLAN_CAPABILITY_PRIVACY; | ||
494 | else | ||
495 | sdata->drop_unencrypted = 0; | ||
496 | |||
497 | pos = supp_rates; | ||
498 | for (i = 0; i < sband->n_bitrates; i++) { | ||
499 | int rate = sband->bitrates[i].bitrate; | ||
500 | *pos++ = (u8) (rate / 5); | ||
501 | } | ||
502 | |||
503 | return __ieee80211_sta_join_ibss(sdata, | ||
504 | bssid, local->hw.conf.beacon_int, | ||
505 | local->hw.conf.channel->center_freq, | ||
506 | sband->n_bitrates, supp_rates, | ||
507 | capability, 0); | ||
508 | } | ||
509 | |||
510 | static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) | ||
511 | { | ||
512 | struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; | ||
513 | struct ieee80211_local *local = sdata->local; | ||
514 | struct ieee80211_bss *bss; | ||
515 | const u8 *bssid = NULL; | ||
516 | int active_ibss; | ||
517 | |||
518 | if (ifibss->ssid_len == 0) | ||
519 | return -EINVAL; | ||
520 | |||
521 | active_ibss = ieee80211_sta_active_ibss(sdata); | ||
522 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
523 | printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n", | ||
524 | sdata->dev->name, active_ibss); | ||
525 | #endif /* CONFIG_MAC80211_IBSS_DEBUG */ | ||
526 | |||
527 | if (active_ibss) | ||
528 | return 0; | ||
529 | |||
530 | if (ifibss->flags & IEEE80211_IBSS_BSSID_SET) | ||
531 | bssid = ifibss->bssid; | ||
532 | bss = (void *)cfg80211_get_bss(local->hw.wiphy, NULL, bssid, | ||
533 | ifibss->ssid, ifibss->ssid_len, | ||
534 | WLAN_CAPABILITY_IBSS, | ||
535 | WLAN_CAPABILITY_IBSS); | ||
536 | |||
537 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
538 | if (bss) | ||
539 | printk(KERN_DEBUG " sta_find_ibss: selected %pM current " | ||
540 | "%pM\n", bss->cbss.bssid, ifibss->bssid); | ||
541 | #endif /* CONFIG_MAC80211_IBSS_DEBUG */ | ||
542 | |||
543 | if (bss && | ||
544 | (!(ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET) || | ||
545 | memcmp(ifibss->bssid, bss->cbss.bssid, ETH_ALEN))) { | ||
546 | int ret; | ||
547 | |||
548 | printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM" | ||
549 | " based on configured SSID\n", | ||
550 | sdata->dev->name, bss->cbss.bssid); | ||
551 | |||
552 | ret = ieee80211_sta_join_ibss(sdata, bss); | ||
553 | ieee80211_rx_bss_put(local, bss); | ||
554 | return ret; | ||
555 | } else if (bss) | ||
556 | ieee80211_rx_bss_put(local, bss); | ||
557 | |||
558 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
559 | printk(KERN_DEBUG " did not try to join ibss\n"); | ||
560 | #endif /* CONFIG_MAC80211_IBSS_DEBUG */ | ||
561 | |||
562 | /* Selected IBSS not found in current scan results - try to scan */ | ||
563 | if (ifibss->state == IEEE80211_IBSS_MLME_JOINED && | ||
564 | !ieee80211_sta_active_ibss(sdata)) { | ||
565 | mod_timer(&ifibss->timer, jiffies + | ||
566 | IEEE80211_IBSS_MERGE_INTERVAL); | ||
567 | } else if (time_after(jiffies, local->last_scan_completed + | ||
568 | IEEE80211_SCAN_INTERVAL)) { | ||
569 | printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to " | ||
570 | "join\n", sdata->dev->name); | ||
571 | |||
572 | /* XXX maybe racy? */ | ||
573 | if (local->scan_req) | ||
574 | return -EBUSY; | ||
575 | |||
576 | memcpy(local->int_scan_req.ssids[0].ssid, | ||
577 | ifibss->ssid, IEEE80211_MAX_SSID_LEN); | ||
578 | local->int_scan_req.ssids[0].ssid_len = ifibss->ssid_len; | ||
579 | return ieee80211_request_scan(sdata, &local->int_scan_req); | ||
580 | } else if (ifibss->state != IEEE80211_IBSS_MLME_JOINED) { | ||
581 | int interval = IEEE80211_SCAN_INTERVAL; | ||
582 | |||
583 | if (time_after(jiffies, ifibss->ibss_join_req + | ||
584 | IEEE80211_IBSS_JOIN_TIMEOUT)) { | ||
585 | if (!(local->oper_channel->flags & | ||
586 | IEEE80211_CHAN_NO_IBSS)) | ||
587 | return ieee80211_sta_create_ibss(sdata); | ||
588 | printk(KERN_DEBUG "%s: IBSS not allowed on" | ||
589 | " %d MHz\n", sdata->dev->name, | ||
590 | local->hw.conf.channel->center_freq); | ||
591 | |||
592 | /* No IBSS found - decrease scan interval and continue | ||
593 | * scanning. */ | ||
594 | interval = IEEE80211_SCAN_INTERVAL_SLOW; | ||
595 | } | ||
596 | |||
597 | ifibss->state = IEEE80211_IBSS_MLME_SEARCH; | ||
598 | mod_timer(&ifibss->timer, jiffies + interval); | ||
599 | return 0; | ||
600 | } | ||
601 | |||
602 | return 0; | ||
603 | } | ||
604 | |||
605 | static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, | ||
606 | struct ieee80211_mgmt *mgmt, | ||
607 | size_t len) | ||
608 | { | ||
609 | struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; | ||
610 | struct ieee80211_local *local = sdata->local; | ||
611 | int tx_last_beacon; | ||
612 | struct sk_buff *skb; | ||
613 | struct ieee80211_mgmt *resp; | ||
614 | u8 *pos, *end; | ||
615 | |||
616 | if (ifibss->state != IEEE80211_IBSS_MLME_JOINED || | ||
617 | len < 24 + 2 || !ifibss->probe_resp) | ||
618 | return; | ||
619 | |||
620 | if (local->ops->tx_last_beacon) | ||
621 | tx_last_beacon = local->ops->tx_last_beacon(local_to_hw(local)); | ||
622 | else | ||
623 | tx_last_beacon = 1; | ||
624 | |||
625 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
626 | printk(KERN_DEBUG "%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM" | ||
627 | " (tx_last_beacon=%d)\n", | ||
628 | sdata->dev->name, mgmt->sa, mgmt->da, | ||
629 | mgmt->bssid, tx_last_beacon); | ||
630 | #endif /* CONFIG_MAC80211_IBSS_DEBUG */ | ||
631 | |||
632 | if (!tx_last_beacon) | ||
633 | return; | ||
634 | |||
635 | if (memcmp(mgmt->bssid, ifibss->bssid, ETH_ALEN) != 0 && | ||
636 | memcmp(mgmt->bssid, "\xff\xff\xff\xff\xff\xff", ETH_ALEN) != 0) | ||
637 | return; | ||
638 | |||
639 | end = ((u8 *) mgmt) + len; | ||
640 | pos = mgmt->u.probe_req.variable; | ||
641 | if (pos[0] != WLAN_EID_SSID || | ||
642 | pos + 2 + pos[1] > end) { | ||
643 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
644 | printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq " | ||
645 | "from %pM\n", | ||
646 | sdata->dev->name, mgmt->sa); | ||
647 | #endif | ||
648 | return; | ||
649 | } | ||
650 | if (pos[1] != 0 && | ||
651 | (pos[1] != ifibss->ssid_len || | ||
652 | memcmp(pos + 2, ifibss->ssid, ifibss->ssid_len) != 0)) { | ||
653 | /* Ignore ProbeReq for foreign SSID */ | ||
654 | return; | ||
655 | } | ||
656 | |||
657 | /* Reply with ProbeResp */ | ||
658 | skb = skb_copy(ifibss->probe_resp, GFP_KERNEL); | ||
659 | if (!skb) | ||
660 | return; | ||
661 | |||
662 | resp = (struct ieee80211_mgmt *) skb->data; | ||
663 | memcpy(resp->da, mgmt->sa, ETH_ALEN); | ||
664 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
665 | printk(KERN_DEBUG "%s: Sending ProbeResp to %pM\n", | ||
666 | sdata->dev->name, resp->da); | ||
667 | #endif /* CONFIG_MAC80211_IBSS_DEBUG */ | ||
668 | ieee80211_tx_skb(sdata, skb, 0); | ||
669 | } | ||
670 | |||
671 | static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, | ||
672 | struct ieee80211_mgmt *mgmt, | ||
673 | size_t len, | ||
674 | struct ieee80211_rx_status *rx_status) | ||
675 | { | ||
676 | size_t baselen; | ||
677 | struct ieee802_11_elems elems; | ||
678 | |||
679 | if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN)) | ||
680 | return; /* ignore ProbeResp to foreign address */ | ||
681 | |||
682 | baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; | ||
683 | if (baselen > len) | ||
684 | return; | ||
685 | |||
686 | ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, | ||
687 | &elems); | ||
688 | |||
689 | ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false); | ||
690 | } | ||
691 | |||
692 | static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, | ||
693 | struct ieee80211_mgmt *mgmt, | ||
694 | size_t len, | ||
695 | struct ieee80211_rx_status *rx_status) | ||
696 | { | ||
697 | size_t baselen; | ||
698 | struct ieee802_11_elems elems; | ||
699 | |||
700 | /* Process beacon from the current BSS */ | ||
701 | baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; | ||
702 | if (baselen > len) | ||
703 | return; | ||
704 | |||
705 | ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); | ||
706 | |||
707 | ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true); | ||
708 | } | ||
709 | |||
710 | static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, | ||
711 | struct sk_buff *skb) | ||
712 | { | ||
713 | struct ieee80211_rx_status *rx_status; | ||
714 | struct ieee80211_mgmt *mgmt; | ||
715 | u16 fc; | ||
716 | |||
717 | rx_status = (struct ieee80211_rx_status *) skb->cb; | ||
718 | mgmt = (struct ieee80211_mgmt *) skb->data; | ||
719 | fc = le16_to_cpu(mgmt->frame_control); | ||
720 | |||
721 | switch (fc & IEEE80211_FCTL_STYPE) { | ||
722 | case IEEE80211_STYPE_PROBE_REQ: | ||
723 | ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len); | ||
724 | break; | ||
725 | case IEEE80211_STYPE_PROBE_RESP: | ||
726 | ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, | ||
727 | rx_status); | ||
728 | break; | ||
729 | case IEEE80211_STYPE_BEACON: | ||
730 | ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, | ||
731 | rx_status); | ||
732 | break; | ||
733 | case IEEE80211_STYPE_AUTH: | ||
734 | ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len); | ||
735 | break; | ||
736 | } | ||
737 | |||
738 | kfree_skb(skb); | ||
739 | } | ||
740 | |||
741 | static void ieee80211_ibss_work(struct work_struct *work) | ||
742 | { | ||
743 | struct ieee80211_sub_if_data *sdata = | ||
744 | container_of(work, struct ieee80211_sub_if_data, u.ibss.work); | ||
745 | struct ieee80211_local *local = sdata->local; | ||
746 | struct ieee80211_if_ibss *ifibss; | ||
747 | struct sk_buff *skb; | ||
748 | |||
749 | if (!netif_running(sdata->dev)) | ||
750 | return; | ||
751 | |||
752 | if (local->sw_scanning || local->hw_scanning) | ||
753 | return; | ||
754 | |||
755 | if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_ADHOC)) | ||
756 | return; | ||
757 | ifibss = &sdata->u.ibss; | ||
758 | |||
759 | while ((skb = skb_dequeue(&ifibss->skb_queue))) | ||
760 | ieee80211_ibss_rx_queued_mgmt(sdata, skb); | ||
761 | |||
762 | if (!test_and_clear_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request)) | ||
763 | return; | ||
764 | |||
765 | switch (ifibss->state) { | ||
766 | case IEEE80211_IBSS_MLME_SEARCH: | ||
767 | ieee80211_sta_find_ibss(sdata); | ||
768 | break; | ||
769 | case IEEE80211_IBSS_MLME_JOINED: | ||
770 | ieee80211_sta_merge_ibss(sdata); | ||
771 | break; | ||
772 | default: | ||
773 | WARN_ON(1); | ||
774 | break; | ||
775 | } | ||
776 | } | ||
777 | |||
778 | static void ieee80211_ibss_timer(unsigned long data) | ||
779 | { | ||
780 | struct ieee80211_sub_if_data *sdata = | ||
781 | (struct ieee80211_sub_if_data *) data; | ||
782 | struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; | ||
783 | struct ieee80211_local *local = sdata->local; | ||
784 | |||
785 | set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request); | ||
786 | queue_work(local->hw.workqueue, &ifibss->work); | ||
787 | } | ||
788 | |||
789 | void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) | ||
790 | { | ||
791 | struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; | ||
792 | |||
793 | INIT_WORK(&ifibss->work, ieee80211_ibss_work); | ||
794 | setup_timer(&ifibss->timer, ieee80211_ibss_timer, | ||
795 | (unsigned long) sdata); | ||
796 | skb_queue_head_init(&ifibss->skb_queue); | ||
797 | |||
798 | ifibss->flags |= IEEE80211_IBSS_AUTO_BSSID_SEL | | ||
799 | IEEE80211_IBSS_AUTO_CHANNEL_SEL; | ||
800 | } | ||
801 | |||
802 | int ieee80211_ibss_commit(struct ieee80211_sub_if_data *sdata) | ||
803 | { | ||
804 | struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; | ||
805 | |||
806 | ifibss->flags &= ~IEEE80211_IBSS_PREV_BSSID_SET; | ||
807 | |||
808 | if (ifibss->ssid_len) | ||
809 | ifibss->flags |= IEEE80211_IBSS_SSID_SET; | ||
810 | else | ||
811 | ifibss->flags &= ~IEEE80211_IBSS_SSID_SET; | ||
812 | |||
813 | ifibss->ibss_join_req = jiffies; | ||
814 | ifibss->state = IEEE80211_IBSS_MLME_SEARCH; | ||
815 | |||
816 | return ieee80211_sta_find_ibss(sdata); | ||
817 | } | ||
818 | |||
819 | int ieee80211_ibss_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) | ||
820 | { | ||
821 | struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; | ||
822 | |||
823 | if (len > IEEE80211_MAX_SSID_LEN) | ||
824 | return -EINVAL; | ||
825 | |||
826 | if (ifibss->ssid_len != len || memcmp(ifibss->ssid, ssid, len) != 0) { | ||
827 | memset(ifibss->ssid, 0, sizeof(ifibss->ssid)); | ||
828 | memcpy(ifibss->ssid, ssid, len); | ||
829 | ifibss->ssid_len = len; | ||
830 | } | ||
831 | |||
832 | return ieee80211_ibss_commit(sdata); | ||
833 | } | ||
834 | |||
835 | int ieee80211_ibss_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len) | ||
836 | { | ||
837 | struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; | ||
838 | |||
839 | memcpy(ssid, ifibss->ssid, ifibss->ssid_len); | ||
840 | *len = ifibss->ssid_len; | ||
841 | |||
842 | return 0; | ||
843 | } | ||
844 | |||
845 | int ieee80211_ibss_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) | ||
846 | { | ||
847 | struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; | ||
848 | |||
849 | if (is_valid_ether_addr(bssid)) { | ||
850 | memcpy(ifibss->bssid, bssid, ETH_ALEN); | ||
851 | ifibss->flags |= IEEE80211_IBSS_BSSID_SET; | ||
852 | } else { | ||
853 | memset(ifibss->bssid, 0, ETH_ALEN); | ||
854 | ifibss->flags &= ~IEEE80211_IBSS_BSSID_SET; | ||
855 | } | ||
856 | |||
857 | if (netif_running(sdata->dev)) { | ||
858 | if (ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID)) { | ||
859 | printk(KERN_DEBUG "%s: Failed to config new BSSID to " | ||
860 | "the low-level driver\n", sdata->dev->name); | ||
861 | } | ||
862 | } | ||
863 | |||
864 | return ieee80211_ibss_commit(sdata); | ||
865 | } | ||
866 | |||
867 | /* scan finished notification */ | ||
868 | void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) | ||
869 | { | ||
870 | struct ieee80211_sub_if_data *sdata = local->scan_sdata; | ||
871 | struct ieee80211_if_ibss *ifibss; | ||
872 | |||
873 | if (sdata && sdata->vif.type == NL80211_IFTYPE_ADHOC) { | ||
874 | ifibss = &sdata->u.ibss; | ||
875 | if ((!(ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET)) || | ||
876 | !ieee80211_sta_active_ibss(sdata)) | ||
877 | ieee80211_sta_find_ibss(sdata); | ||
878 | } | ||
879 | } | ||
880 | |||
881 | ieee80211_rx_result | ||
882 | ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, | ||
883 | struct ieee80211_rx_status *rx_status) | ||
884 | { | ||
885 | struct ieee80211_local *local = sdata->local; | ||
886 | struct ieee80211_mgmt *mgmt; | ||
887 | u16 fc; | ||
888 | |||
889 | if (skb->len < 24) | ||
890 | return RX_DROP_MONITOR; | ||
891 | |||
892 | mgmt = (struct ieee80211_mgmt *) skb->data; | ||
893 | fc = le16_to_cpu(mgmt->frame_control); | ||
894 | |||
895 | switch (fc & IEEE80211_FCTL_STYPE) { | ||
896 | case IEEE80211_STYPE_PROBE_RESP: | ||
897 | case IEEE80211_STYPE_BEACON: | ||
898 | memcpy(skb->cb, rx_status, sizeof(*rx_status)); | ||
899 | case IEEE80211_STYPE_PROBE_REQ: | ||
900 | case IEEE80211_STYPE_AUTH: | ||
901 | skb_queue_tail(&sdata->u.ibss.skb_queue, skb); | ||
902 | queue_work(local->hw.workqueue, &sdata->u.ibss.work); | ||
903 | return RX_QUEUED; | ||
904 | } | ||
905 | |||
906 | return RX_DROP_MONITOR; | ||
907 | } | ||
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2cb743ed9f9c..fbb91f1aebb2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h | |||
@@ -239,7 +239,7 @@ struct mesh_preq_queue { | |||
239 | u8 flags; | 239 | u8 flags; |
240 | }; | 240 | }; |
241 | 241 | ||
242 | /* flags used in struct ieee80211_if_sta.flags */ | 242 | /* flags used in struct ieee80211_if_managed.flags */ |
243 | #define IEEE80211_STA_SSID_SET BIT(0) | 243 | #define IEEE80211_STA_SSID_SET BIT(0) |
244 | #define IEEE80211_STA_BSSID_SET BIT(1) | 244 | #define IEEE80211_STA_BSSID_SET BIT(1) |
245 | #define IEEE80211_STA_PREV_BSSID_SET BIT(2) | 245 | #define IEEE80211_STA_PREV_BSSID_SET BIT(2) |
@@ -262,31 +262,30 @@ struct mesh_preq_queue { | |||
262 | #define IEEE80211_STA_REQ_AUTH 2 | 262 | #define IEEE80211_STA_REQ_AUTH 2 |
263 | #define IEEE80211_STA_REQ_RUN 3 | 263 | #define IEEE80211_STA_REQ_RUN 3 |
264 | 264 | ||
265 | /* STA/IBSS MLME states */ | ||
266 | enum ieee80211_sta_mlme_state { | ||
267 | IEEE80211_STA_MLME_DISABLED, | ||
268 | IEEE80211_STA_MLME_DIRECT_PROBE, | ||
269 | IEEE80211_STA_MLME_AUTHENTICATE, | ||
270 | IEEE80211_STA_MLME_ASSOCIATE, | ||
271 | IEEE80211_STA_MLME_ASSOCIATED, | ||
272 | IEEE80211_STA_MLME_IBSS_SEARCH, | ||
273 | IEEE80211_STA_MLME_IBSS_JOINED, | ||
274 | }; | ||
275 | |||
276 | /* bitfield of allowed auth algs */ | 265 | /* bitfield of allowed auth algs */ |
277 | #define IEEE80211_AUTH_ALG_OPEN BIT(0) | 266 | #define IEEE80211_AUTH_ALG_OPEN BIT(0) |
278 | #define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1) | 267 | #define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1) |
279 | #define IEEE80211_AUTH_ALG_LEAP BIT(2) | 268 | #define IEEE80211_AUTH_ALG_LEAP BIT(2) |
280 | 269 | ||
281 | struct ieee80211_if_sta { | 270 | struct ieee80211_if_managed { |
282 | struct timer_list timer; | 271 | struct timer_list timer; |
283 | struct timer_list chswitch_timer; | 272 | struct timer_list chswitch_timer; |
284 | struct work_struct work; | 273 | struct work_struct work; |
285 | struct work_struct chswitch_work; | 274 | struct work_struct chswitch_work; |
275 | |||
286 | u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; | 276 | u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; |
277 | |||
287 | u8 ssid[IEEE80211_MAX_SSID_LEN]; | 278 | u8 ssid[IEEE80211_MAX_SSID_LEN]; |
288 | enum ieee80211_sta_mlme_state state; | ||
289 | size_t ssid_len; | 279 | size_t ssid_len; |
280 | |||
281 | enum { | ||
282 | IEEE80211_STA_MLME_DISABLED, | ||
283 | IEEE80211_STA_MLME_DIRECT_PROBE, | ||
284 | IEEE80211_STA_MLME_AUTHENTICATE, | ||
285 | IEEE80211_STA_MLME_ASSOCIATE, | ||
286 | IEEE80211_STA_MLME_ASSOCIATED, | ||
287 | } state; | ||
288 | |||
290 | u16 aid; | 289 | u16 aid; |
291 | u16 ap_capab, capab; | 290 | u16 ap_capab, capab; |
292 | u8 *extra_ie; /* to be added to the end of AssocReq */ | 291 | u8 *extra_ie; /* to be added to the end of AssocReq */ |
@@ -319,10 +318,6 @@ struct ieee80211_if_sta { | |||
319 | IEEE80211_MFP_REQUIRED | 318 | IEEE80211_MFP_REQUIRED |
320 | } mfp; /* management frame protection */ | 319 | } mfp; /* management frame protection */ |
321 | 320 | ||
322 | unsigned long ibss_join_req; | ||
323 | struct sk_buff *probe_resp; /* ProbeResp template for IBSS */ | ||
324 | u32 supp_rates_bits[IEEE80211_NUM_BANDS]; | ||
325 | |||
326 | int wmm_last_param_set; | 321 | int wmm_last_param_set; |
327 | 322 | ||
328 | /* Extra IE data for management frames */ | 323 | /* Extra IE data for management frames */ |
@@ -342,6 +337,42 @@ struct ieee80211_if_sta { | |||
342 | size_t ie_disassoc_len; | 337 | size_t ie_disassoc_len; |
343 | }; | 338 | }; |
344 | 339 | ||
340 | enum ieee80211_ibss_flags { | ||
341 | IEEE80211_IBSS_AUTO_CHANNEL_SEL = BIT(0), | ||
342 | IEEE80211_IBSS_AUTO_BSSID_SEL = BIT(1), | ||
343 | IEEE80211_IBSS_BSSID_SET = BIT(2), | ||
344 | IEEE80211_IBSS_PREV_BSSID_SET = BIT(3), | ||
345 | IEEE80211_IBSS_SSID_SET = BIT(4), | ||
346 | }; | ||
347 | |||
348 | enum ieee80211_ibss_request { | ||
349 | IEEE80211_IBSS_REQ_RUN = 0, | ||
350 | }; | ||
351 | |||
352 | struct ieee80211_if_ibss { | ||
353 | struct timer_list timer; | ||
354 | struct work_struct work; | ||
355 | |||
356 | struct sk_buff_head skb_queue; | ||
357 | |||
358 | u8 ssid[IEEE80211_MAX_SSID_LEN]; | ||
359 | u8 ssid_len; | ||
360 | |||
361 | u32 flags; | ||
362 | |||
363 | u8 bssid[ETH_ALEN]; | ||
364 | |||
365 | unsigned long request; | ||
366 | |||
367 | unsigned long ibss_join_req; | ||
368 | struct sk_buff *probe_resp; /* ProbeResp template for IBSS */ | ||
369 | |||
370 | enum { | ||
371 | IEEE80211_IBSS_MLME_SEARCH, | ||
372 | IEEE80211_IBSS_MLME_JOINED, | ||
373 | } state; | ||
374 | }; | ||
375 | |||
345 | struct ieee80211_if_mesh { | 376 | struct ieee80211_if_mesh { |
346 | struct work_struct work; | 377 | struct work_struct work; |
347 | struct timer_list housekeeping_timer; | 378 | struct timer_list housekeeping_timer; |
@@ -445,7 +476,8 @@ struct ieee80211_sub_if_data { | |||
445 | struct ieee80211_if_ap ap; | 476 | struct ieee80211_if_ap ap; |
446 | struct ieee80211_if_wds wds; | 477 | struct ieee80211_if_wds wds; |
447 | struct ieee80211_if_vlan vlan; | 478 | struct ieee80211_if_vlan vlan; |
448 | struct ieee80211_if_sta sta; | 479 | struct ieee80211_if_managed mgd; |
480 | struct ieee80211_if_ibss ibss; | ||
449 | #ifdef CONFIG_MAC80211_MESH | 481 | #ifdef CONFIG_MAC80211_MESH |
450 | struct ieee80211_if_mesh mesh; | 482 | struct ieee80211_if_mesh mesh; |
451 | #endif | 483 | #endif |
@@ -564,12 +596,10 @@ enum { | |||
564 | enum queue_stop_reason { | 596 | enum queue_stop_reason { |
565 | IEEE80211_QUEUE_STOP_REASON_DRIVER, | 597 | IEEE80211_QUEUE_STOP_REASON_DRIVER, |
566 | IEEE80211_QUEUE_STOP_REASON_PS, | 598 | IEEE80211_QUEUE_STOP_REASON_PS, |
567 | IEEE80211_QUEUE_STOP_REASON_CSA | 599 | IEEE80211_QUEUE_STOP_REASON_CSA, |
600 | IEEE80211_QUEUE_STOP_REASON_AGGREGATION, | ||
568 | }; | 601 | }; |
569 | 602 | ||
570 | /* maximum number of hardware queues we support. */ | ||
571 | #define QD_MAX_QUEUES (IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_QUEUES) | ||
572 | |||
573 | struct ieee80211_master_priv { | 603 | struct ieee80211_master_priv { |
574 | struct ieee80211_local *local; | 604 | struct ieee80211_local *local; |
575 | }; | 605 | }; |
@@ -582,9 +612,15 @@ struct ieee80211_local { | |||
582 | 612 | ||
583 | const struct ieee80211_ops *ops; | 613 | const struct ieee80211_ops *ops; |
584 | 614 | ||
585 | unsigned long queue_pool[BITS_TO_LONGS(QD_MAX_QUEUES)]; | 615 | /* AC queue corresponding to each AMPDU queue */ |
586 | unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES]; | 616 | s8 ampdu_ac_queue[IEEE80211_MAX_AMPDU_QUEUES]; |
617 | unsigned int amdpu_ac_stop_refcnt[IEEE80211_MAX_AMPDU_QUEUES]; | ||
618 | |||
619 | unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES + | ||
620 | IEEE80211_MAX_AMPDU_QUEUES]; | ||
621 | /* also used to protect ampdu_ac_queue and amdpu_ac_stop_refcnt */ | ||
587 | spinlock_t queue_stop_reason_lock; | 622 | spinlock_t queue_stop_reason_lock; |
623 | |||
588 | struct net_device *mdev; /* wmaster# - "master" 802.11 device */ | 624 | struct net_device *mdev; /* wmaster# - "master" 802.11 device */ |
589 | int open_count; | 625 | int open_count; |
590 | int monitors, cooked_mntrs; | 626 | int monitors, cooked_mntrs; |
@@ -888,34 +924,41 @@ void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx); | |||
888 | void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, | 924 | void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, |
889 | u32 changed); | 925 | u32 changed); |
890 | void ieee80211_configure_filter(struct ieee80211_local *local); | 926 | void ieee80211_configure_filter(struct ieee80211_local *local); |
927 | u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); | ||
891 | 928 | ||
892 | /* wireless extensions */ | 929 | /* wireless extensions */ |
893 | extern const struct iw_handler_def ieee80211_iw_handler_def; | 930 | extern const struct iw_handler_def ieee80211_iw_handler_def; |
894 | 931 | ||
895 | /* STA/IBSS code */ | 932 | /* STA code */ |
896 | void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); | 933 | void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); |
897 | void ieee80211_scan_work(struct work_struct *work); | 934 | ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, |
898 | void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, | 935 | struct sk_buff *skb, |
899 | struct ieee80211_rx_status *rx_status); | 936 | struct ieee80211_rx_status *rx_status); |
937 | int ieee80211_sta_commit(struct ieee80211_sub_if_data *sdata); | ||
900 | int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len); | 938 | int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len); |
901 | int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len); | 939 | int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len); |
902 | int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid); | 940 | int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid); |
903 | void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, | 941 | void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata); |
904 | struct ieee80211_if_sta *ifsta); | ||
905 | struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, | ||
906 | u8 *bssid, u8 *addr, u32 supp_rates); | ||
907 | int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason); | 942 | int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason); |
908 | int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason); | 943 | int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason); |
909 | u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); | ||
910 | u32 ieee80211_sta_get_rates(struct ieee80211_local *local, | ||
911 | struct ieee802_11_elems *elems, | ||
912 | enum ieee80211_band band); | ||
913 | void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, | ||
914 | u8 *ssid, size_t ssid_len); | ||
915 | void ieee80211_send_pspoll(struct ieee80211_local *local, | 944 | void ieee80211_send_pspoll(struct ieee80211_local *local, |
916 | struct ieee80211_sub_if_data *sdata); | 945 | struct ieee80211_sub_if_data *sdata); |
917 | 946 | ||
947 | /* IBSS code */ | ||
948 | int ieee80211_ibss_commit(struct ieee80211_sub_if_data *sdata); | ||
949 | int ieee80211_ibss_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len); | ||
950 | int ieee80211_ibss_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len); | ||
951 | int ieee80211_ibss_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid); | ||
952 | void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); | ||
953 | void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata); | ||
954 | ieee80211_rx_result | ||
955 | ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, | ||
956 | struct ieee80211_rx_status *rx_status); | ||
957 | struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, | ||
958 | u8 *bssid, u8 *addr, u32 supp_rates); | ||
959 | |||
918 | /* scan/BSS handling */ | 960 | /* scan/BSS handling */ |
961 | void ieee80211_scan_work(struct work_struct *work); | ||
919 | int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, | 962 | int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, |
920 | struct cfg80211_scan_request *req); | 963 | struct cfg80211_scan_request *req); |
921 | int ieee80211_scan_results(struct ieee80211_local *local, | 964 | int ieee80211_scan_results(struct ieee80211_local *local, |
@@ -929,6 +972,7 @@ int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, | |||
929 | char *ie, size_t len); | 972 | char *ie, size_t len); |
930 | 973 | ||
931 | void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local); | 974 | void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local); |
975 | void ieee80211_scan_failed(struct ieee80211_local *local); | ||
932 | int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, | 976 | int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, |
933 | struct cfg80211_scan_request *req); | 977 | struct cfg80211_scan_request *req); |
934 | struct ieee80211_bss * | 978 | struct ieee80211_bss * |
@@ -1042,6 +1086,25 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, | |||
1042 | enum queue_stop_reason reason); | 1086 | enum queue_stop_reason reason); |
1043 | void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, | 1087 | void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, |
1044 | enum queue_stop_reason reason); | 1088 | enum queue_stop_reason reason); |
1089 | void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, | ||
1090 | enum queue_stop_reason reason); | ||
1091 | void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, | ||
1092 | enum queue_stop_reason reason); | ||
1093 | |||
1094 | void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, | ||
1095 | u16 transaction, u16 auth_alg, | ||
1096 | u8 *extra, size_t extra_len, | ||
1097 | const u8 *bssid, int encrypt); | ||
1098 | void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, | ||
1099 | u8 *ssid, size_t ssid_len, | ||
1100 | u8 *ie, size_t ie_len); | ||
1101 | |||
1102 | void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, | ||
1103 | const size_t supp_rates_len, | ||
1104 | const u8 *supp_rates); | ||
1105 | u32 ieee80211_sta_get_rates(struct ieee80211_local *local, | ||
1106 | struct ieee802_11_elems *elems, | ||
1107 | enum ieee80211_band band); | ||
1045 | 1108 | ||
1046 | #ifdef CONFIG_MAC80211_NOINLINE | 1109 | #ifdef CONFIG_MAC80211_NOINLINE |
1047 | #define debug_noinline noinline | 1110 | #define debug_noinline noinline |
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index df94b9365264..f9f27b9cadbe 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c | |||
@@ -236,7 +236,10 @@ static int ieee80211_open(struct net_device *dev) | |||
236 | break; | 236 | break; |
237 | case NL80211_IFTYPE_STATION: | 237 | case NL80211_IFTYPE_STATION: |
238 | case NL80211_IFTYPE_ADHOC: | 238 | case NL80211_IFTYPE_ADHOC: |
239 | sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; | 239 | if (sdata->vif.type == NL80211_IFTYPE_STATION) |
240 | sdata->u.mgd.flags &= ~IEEE80211_STA_PREV_BSSID_SET; | ||
241 | else | ||
242 | sdata->u.ibss.flags &= ~IEEE80211_IBSS_PREV_BSSID_SET; | ||
240 | /* fall through */ | 243 | /* fall through */ |
241 | default: | 244 | default: |
242 | conf.vif = &sdata->vif; | 245 | conf.vif = &sdata->vif; |
@@ -321,11 +324,10 @@ static int ieee80211_open(struct net_device *dev) | |||
321 | * yet be effective. Trigger execution of ieee80211_sta_work | 324 | * yet be effective. Trigger execution of ieee80211_sta_work |
322 | * to fix this. | 325 | * to fix this. |
323 | */ | 326 | */ |
324 | if (sdata->vif.type == NL80211_IFTYPE_STATION || | 327 | if (sdata->vif.type == NL80211_IFTYPE_STATION) |
325 | sdata->vif.type == NL80211_IFTYPE_ADHOC) { | 328 | queue_work(local->hw.workqueue, &sdata->u.mgd.work); |
326 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | 329 | else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) |
327 | queue_work(local->hw.workqueue, &ifsta->work); | 330 | queue_work(local->hw.workqueue, &sdata->u.ibss.work); |
328 | } | ||
329 | 331 | ||
330 | netif_tx_start_all_queues(dev); | 332 | netif_tx_start_all_queues(dev); |
331 | 333 | ||
@@ -368,6 +370,18 @@ static int ieee80211_stop(struct net_device *dev) | |||
368 | rcu_read_unlock(); | 370 | rcu_read_unlock(); |
369 | 371 | ||
370 | /* | 372 | /* |
373 | * Announce that we are leaving the network, in case we are a | ||
374 | * station interface type. This must be done before removing | ||
375 | * all stations associated with sta_info_flush, otherwise STA | ||
376 | * information will be gone and no announce being done. | ||
377 | */ | ||
378 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { | ||
379 | if (sdata->u.mgd.state != IEEE80211_STA_MLME_DISABLED) | ||
380 | ieee80211_sta_deauthenticate(sdata, | ||
381 | WLAN_REASON_DEAUTH_LEAVING); | ||
382 | } | ||
383 | |||
384 | /* | ||
371 | * Remove all stations associated with this interface. | 385 | * Remove all stations associated with this interface. |
372 | * | 386 | * |
373 | * This must be done before calling ops->remove_interface() | 387 | * This must be done before calling ops->remove_interface() |
@@ -452,15 +466,9 @@ static int ieee80211_stop(struct net_device *dev) | |||
452 | netif_addr_unlock_bh(local->mdev); | 466 | netif_addr_unlock_bh(local->mdev); |
453 | break; | 467 | break; |
454 | case NL80211_IFTYPE_STATION: | 468 | case NL80211_IFTYPE_STATION: |
455 | case NL80211_IFTYPE_ADHOC: | 469 | memset(sdata->u.mgd.bssid, 0, ETH_ALEN); |
456 | /* Announce that we are leaving the network. */ | 470 | del_timer_sync(&sdata->u.mgd.chswitch_timer); |
457 | if (sdata->u.sta.state != IEEE80211_STA_MLME_DISABLED) | 471 | del_timer_sync(&sdata->u.mgd.timer); |
458 | ieee80211_sta_deauthenticate(sdata, | ||
459 | WLAN_REASON_DEAUTH_LEAVING); | ||
460 | |||
461 | memset(sdata->u.sta.bssid, 0, ETH_ALEN); | ||
462 | del_timer_sync(&sdata->u.sta.chswitch_timer); | ||
463 | del_timer_sync(&sdata->u.sta.timer); | ||
464 | /* | 472 | /* |
465 | * If the timer fired while we waited for it, it will have | 473 | * If the timer fired while we waited for it, it will have |
466 | * requeued the work. Now the work will be running again | 474 | * requeued the work. Now the work will be running again |
@@ -468,8 +476,8 @@ static int ieee80211_stop(struct net_device *dev) | |||
468 | * whether the interface is running, which, at this point, | 476 | * whether the interface is running, which, at this point, |
469 | * it no longer is. | 477 | * it no longer is. |
470 | */ | 478 | */ |
471 | cancel_work_sync(&sdata->u.sta.work); | 479 | cancel_work_sync(&sdata->u.mgd.work); |
472 | cancel_work_sync(&sdata->u.sta.chswitch_work); | 480 | cancel_work_sync(&sdata->u.mgd.chswitch_work); |
473 | /* | 481 | /* |
474 | * When we get here, the interface is marked down. | 482 | * When we get here, the interface is marked down. |
475 | * Call synchronize_rcu() to wait for the RX path | 483 | * Call synchronize_rcu() to wait for the RX path |
@@ -477,13 +485,22 @@ static int ieee80211_stop(struct net_device *dev) | |||
477 | * frames at this very time on another CPU. | 485 | * frames at this very time on another CPU. |
478 | */ | 486 | */ |
479 | synchronize_rcu(); | 487 | synchronize_rcu(); |
480 | skb_queue_purge(&sdata->u.sta.skb_queue); | 488 | skb_queue_purge(&sdata->u.mgd.skb_queue); |
481 | 489 | ||
482 | sdata->u.sta.flags &= ~(IEEE80211_STA_PRIVACY_INVOKED | | 490 | sdata->u.mgd.flags &= ~(IEEE80211_STA_PRIVACY_INVOKED | |
483 | IEEE80211_STA_TKIP_WEP_USED); | 491 | IEEE80211_STA_TKIP_WEP_USED); |
484 | kfree(sdata->u.sta.extra_ie); | 492 | kfree(sdata->u.mgd.extra_ie); |
485 | sdata->u.sta.extra_ie = NULL; | 493 | sdata->u.mgd.extra_ie = NULL; |
486 | sdata->u.sta.extra_ie_len = 0; | 494 | sdata->u.mgd.extra_ie_len = 0; |
495 | /* fall through */ | ||
496 | case NL80211_IFTYPE_ADHOC: | ||
497 | if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { | ||
498 | memset(sdata->u.ibss.bssid, 0, ETH_ALEN); | ||
499 | del_timer_sync(&sdata->u.ibss.timer); | ||
500 | cancel_work_sync(&sdata->u.ibss.work); | ||
501 | synchronize_rcu(); | ||
502 | skb_queue_purge(&sdata->u.ibss.skb_queue); | ||
503 | } | ||
487 | /* fall through */ | 504 | /* fall through */ |
488 | case NL80211_IFTYPE_MESH_POINT: | 505 | case NL80211_IFTYPE_MESH_POINT: |
489 | if (ieee80211_vif_is_mesh(&sdata->vif)) { | 506 | if (ieee80211_vif_is_mesh(&sdata->vif)) { |
@@ -629,19 +646,20 @@ static void ieee80211_teardown_sdata(struct net_device *dev) | |||
629 | if (ieee80211_vif_is_mesh(&sdata->vif)) | 646 | if (ieee80211_vif_is_mesh(&sdata->vif)) |
630 | mesh_rmc_free(sdata); | 647 | mesh_rmc_free(sdata); |
631 | break; | 648 | break; |
632 | case NL80211_IFTYPE_STATION: | ||
633 | case NL80211_IFTYPE_ADHOC: | 649 | case NL80211_IFTYPE_ADHOC: |
634 | kfree(sdata->u.sta.extra_ie); | 650 | kfree_skb(sdata->u.ibss.probe_resp); |
635 | kfree(sdata->u.sta.assocreq_ies); | 651 | break; |
636 | kfree(sdata->u.sta.assocresp_ies); | 652 | case NL80211_IFTYPE_STATION: |
637 | kfree_skb(sdata->u.sta.probe_resp); | 653 | kfree(sdata->u.mgd.extra_ie); |
638 | kfree(sdata->u.sta.ie_probereq); | 654 | kfree(sdata->u.mgd.assocreq_ies); |
639 | kfree(sdata->u.sta.ie_proberesp); | 655 | kfree(sdata->u.mgd.assocresp_ies); |
640 | kfree(sdata->u.sta.ie_auth); | 656 | kfree(sdata->u.mgd.ie_probereq); |
641 | kfree(sdata->u.sta.ie_assocreq); | 657 | kfree(sdata->u.mgd.ie_proberesp); |
642 | kfree(sdata->u.sta.ie_reassocreq); | 658 | kfree(sdata->u.mgd.ie_auth); |
643 | kfree(sdata->u.sta.ie_deauth); | 659 | kfree(sdata->u.mgd.ie_assocreq); |
644 | kfree(sdata->u.sta.ie_disassoc); | 660 | kfree(sdata->u.mgd.ie_reassocreq); |
661 | kfree(sdata->u.mgd.ie_deauth); | ||
662 | kfree(sdata->u.mgd.ie_disassoc); | ||
645 | break; | 663 | break; |
646 | case NL80211_IFTYPE_WDS: | 664 | case NL80211_IFTYPE_WDS: |
647 | case NL80211_IFTYPE_AP_VLAN: | 665 | case NL80211_IFTYPE_AP_VLAN: |
@@ -708,9 +726,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, | |||
708 | INIT_LIST_HEAD(&sdata->u.ap.vlans); | 726 | INIT_LIST_HEAD(&sdata->u.ap.vlans); |
709 | break; | 727 | break; |
710 | case NL80211_IFTYPE_STATION: | 728 | case NL80211_IFTYPE_STATION: |
711 | case NL80211_IFTYPE_ADHOC: | ||
712 | ieee80211_sta_setup_sdata(sdata); | 729 | ieee80211_sta_setup_sdata(sdata); |
713 | break; | 730 | break; |
731 | case NL80211_IFTYPE_ADHOC: | ||
732 | ieee80211_ibss_setup_sdata(sdata); | ||
733 | break; | ||
714 | case NL80211_IFTYPE_MESH_POINT: | 734 | case NL80211_IFTYPE_MESH_POINT: |
715 | if (ieee80211_vif_is_mesh(&sdata->vif)) | 735 | if (ieee80211_vif_is_mesh(&sdata->vif)) |
716 | ieee80211_mesh_init_sdata(sdata); | 736 | ieee80211_mesh_init_sdata(sdata); |
@@ -798,6 +818,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, | |||
798 | 818 | ||
799 | memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); | 819 | memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); |
800 | SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); | 820 | SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); |
821 | ndev->features |= NETIF_F_NETNS_LOCAL; | ||
801 | 822 | ||
802 | /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */ | 823 | /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */ |
803 | sdata = netdev_priv(ndev); | 824 | sdata = netdev_priv(ndev); |
diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 19b480de4bbc..687acf23054d 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c | |||
@@ -400,7 +400,7 @@ void ieee80211_key_link(struct ieee80211_key *key, | |||
400 | */ | 400 | */ |
401 | 401 | ||
402 | /* same here, the AP could be using QoS */ | 402 | /* same here, the AP could be using QoS */ |
403 | ap = sta_info_get(key->local, key->sdata->u.sta.bssid); | 403 | ap = sta_info_get(key->local, key->sdata->u.mgd.bssid); |
404 | if (ap) { | 404 | if (ap) { |
405 | if (test_sta_flags(ap, WLAN_STA_WME)) | 405 | if (test_sta_flags(ap, WLAN_STA_WME)) |
406 | key->conf.flags |= | 406 | key->conf.flags |= |
diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 5667f4e8067f..f38db4d37e5d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c | |||
@@ -169,9 +169,10 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) | |||
169 | 169 | ||
170 | memset(&conf, 0, sizeof(conf)); | 170 | memset(&conf, 0, sizeof(conf)); |
171 | 171 | ||
172 | if (sdata->vif.type == NL80211_IFTYPE_STATION || | 172 | if (sdata->vif.type == NL80211_IFTYPE_STATION) |
173 | sdata->vif.type == NL80211_IFTYPE_ADHOC) | 173 | conf.bssid = sdata->u.mgd.bssid; |
174 | conf.bssid = sdata->u.sta.bssid; | 174 | else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) |
175 | conf.bssid = sdata->u.ibss.bssid; | ||
175 | else if (sdata->vif.type == NL80211_IFTYPE_AP) | 176 | else if (sdata->vif.type == NL80211_IFTYPE_AP) |
176 | conf.bssid = sdata->dev->dev_addr; | 177 | conf.bssid = sdata->dev->dev_addr; |
177 | else if (ieee80211_vif_is_mesh(&sdata->vif)) { | 178 | else if (ieee80211_vif_is_mesh(&sdata->vif)) { |
@@ -210,7 +211,7 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) | |||
210 | !!rcu_dereference(sdata->u.ap.beacon); | 211 | !!rcu_dereference(sdata->u.ap.beacon); |
211 | break; | 212 | break; |
212 | case NL80211_IFTYPE_ADHOC: | 213 | case NL80211_IFTYPE_ADHOC: |
213 | conf.enable_beacon = !!sdata->u.sta.probe_resp; | 214 | conf.enable_beacon = !!sdata->u.ibss.probe_resp; |
214 | break; | 215 | break; |
215 | case NL80211_IFTYPE_MESH_POINT: | 216 | case NL80211_IFTYPE_MESH_POINT: |
216 | conf.enable_beacon = true; | 217 | conf.enable_beacon = true; |
@@ -705,7 +706,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, | |||
705 | const struct ieee80211_ops *ops) | 706 | const struct ieee80211_ops *ops) |
706 | { | 707 | { |
707 | struct ieee80211_local *local; | 708 | struct ieee80211_local *local; |
708 | int priv_size; | 709 | int priv_size, i; |
709 | struct wiphy *wiphy; | 710 | struct wiphy *wiphy; |
710 | 711 | ||
711 | /* Ensure 32-byte alignment of our private data and hw private data. | 712 | /* Ensure 32-byte alignment of our private data and hw private data. |
@@ -779,6 +780,11 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, | |||
779 | setup_timer(&local->dynamic_ps_timer, | 780 | setup_timer(&local->dynamic_ps_timer, |
780 | ieee80211_dynamic_ps_timer, (unsigned long) local); | 781 | ieee80211_dynamic_ps_timer, (unsigned long) local); |
781 | 782 | ||
783 | for (i = 0; i < IEEE80211_MAX_AMPDU_QUEUES; i++) | ||
784 | local->ampdu_ac_queue[i] = -1; | ||
785 | /* using an s8 won't work with more than that */ | ||
786 | BUILD_BUG_ON(IEEE80211_MAX_AMPDU_QUEUES > 127); | ||
787 | |||
782 | sta_info_init(local); | 788 | sta_info_init(local); |
783 | 789 | ||
784 | tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, | 790 | tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, |
@@ -855,6 +861,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) | |||
855 | /* mac80211 always supports monitor */ | 861 | /* mac80211 always supports monitor */ |
856 | local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); | 862 | local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); |
857 | 863 | ||
864 | if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) | ||
865 | local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; | ||
866 | else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) | ||
867 | local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC; | ||
868 | |||
858 | result = wiphy_register(local->hw.wiphy); | 869 | result = wiphy_register(local->hw.wiphy); |
859 | if (result < 0) | 870 | if (result < 0) |
860 | goto fail_wiphy_register; | 871 | goto fail_wiphy_register; |
@@ -872,7 +883,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) | |||
872 | 883 | ||
873 | mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv), | 884 | mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv), |
874 | "wmaster%d", ieee80211_master_setup, | 885 | "wmaster%d", ieee80211_master_setup, |
875 | ieee80211_num_queues(hw)); | 886 | hw->queues); |
876 | if (!mdev) | 887 | if (!mdev) |
877 | goto fail_mdev_alloc; | 888 | goto fail_mdev_alloc; |
878 | 889 | ||
@@ -916,6 +927,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) | |||
916 | 927 | ||
917 | memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); | 928 | memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); |
918 | SET_NETDEV_DEV(local->mdev, wiphy_dev(local->hw.wiphy)); | 929 | SET_NETDEV_DEV(local->mdev, wiphy_dev(local->hw.wiphy)); |
930 | local->mdev->features |= NETIF_F_NETNS_LOCAL; | ||
919 | 931 | ||
920 | result = register_netdevice(local->mdev); | 932 | result = register_netdevice(local->mdev); |
921 | if (result < 0) | 933 | if (result < 0) |
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index fbb766afe599..841b8450b3de 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c | |||
@@ -15,11 +15,8 @@ | |||
15 | #include <linux/if_ether.h> | 15 | #include <linux/if_ether.h> |
16 | #include <linux/skbuff.h> | 16 | #include <linux/skbuff.h> |
17 | #include <linux/if_arp.h> | 17 | #include <linux/if_arp.h> |
18 | #include <linux/wireless.h> | ||
19 | #include <linux/random.h> | ||
20 | #include <linux/etherdevice.h> | 18 | #include <linux/etherdevice.h> |
21 | #include <linux/rtnetlink.h> | 19 | #include <linux/rtnetlink.h> |
22 | #include <net/iw_handler.h> | ||
23 | #include <net/mac80211.h> | 20 | #include <net/mac80211.h> |
24 | #include <asm/unaligned.h> | 21 | #include <asm/unaligned.h> |
25 | 22 | ||
@@ -35,15 +32,6 @@ | |||
35 | #define IEEE80211_MONITORING_INTERVAL (2 * HZ) | 32 | #define IEEE80211_MONITORING_INTERVAL (2 * HZ) |
36 | #define IEEE80211_PROBE_INTERVAL (60 * HZ) | 33 | #define IEEE80211_PROBE_INTERVAL (60 * HZ) |
37 | #define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ) | 34 | #define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ) |
38 | #define IEEE80211_SCAN_INTERVAL (2 * HZ) | ||
39 | #define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ) | ||
40 | #define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) | ||
41 | |||
42 | #define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) | ||
43 | #define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) | ||
44 | |||
45 | #define IEEE80211_IBSS_MAX_STA_ENTRIES 128 | ||
46 | |||
47 | 35 | ||
48 | /* utils */ | 36 | /* utils */ |
49 | static int ecw2cw(int ecw) | 37 | static int ecw2cw(int ecw) |
@@ -92,43 +80,6 @@ static int ieee80211_compatible_rates(struct ieee80211_bss *bss, | |||
92 | return count; | 80 | return count; |
93 | } | 81 | } |
94 | 82 | ||
95 | /* also used by mesh code */ | ||
96 | u32 ieee80211_sta_get_rates(struct ieee80211_local *local, | ||
97 | struct ieee802_11_elems *elems, | ||
98 | enum ieee80211_band band) | ||
99 | { | ||
100 | struct ieee80211_supported_band *sband; | ||
101 | struct ieee80211_rate *bitrates; | ||
102 | size_t num_rates; | ||
103 | u32 supp_rates; | ||
104 | int i, j; | ||
105 | sband = local->hw.wiphy->bands[band]; | ||
106 | |||
107 | if (!sband) { | ||
108 | WARN_ON(1); | ||
109 | sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; | ||
110 | } | ||
111 | |||
112 | bitrates = sband->bitrates; | ||
113 | num_rates = sband->n_bitrates; | ||
114 | supp_rates = 0; | ||
115 | for (i = 0; i < elems->supp_rates_len + | ||
116 | elems->ext_supp_rates_len; i++) { | ||
117 | u8 rate = 0; | ||
118 | int own_rate; | ||
119 | if (i < elems->supp_rates_len) | ||
120 | rate = elems->supp_rates[i]; | ||
121 | else if (elems->ext_supp_rates) | ||
122 | rate = elems->ext_supp_rates | ||
123 | [i - elems->supp_rates_len]; | ||
124 | own_rate = 5 * (rate & 0x7f); | ||
125 | for (j = 0; j < num_rates; j++) | ||
126 | if (bitrates[j].bitrate == own_rate) | ||
127 | supp_rates |= BIT(j); | ||
128 | } | ||
129 | return supp_rates; | ||
130 | } | ||
131 | |||
132 | /* frame sending functions */ | 83 | /* frame sending functions */ |
133 | 84 | ||
134 | static void add_extra_ies(struct sk_buff *skb, u8 *ies, size_t ies_len) | 85 | static void add_extra_ies(struct sk_buff *skb, u8 *ies, size_t ies_len) |
@@ -137,113 +88,9 @@ static void add_extra_ies(struct sk_buff *skb, u8 *ies, size_t ies_len) | |||
137 | memcpy(skb_put(skb, ies_len), ies, ies_len); | 88 | memcpy(skb_put(skb, ies_len), ies, ies_len); |
138 | } | 89 | } |
139 | 90 | ||
140 | /* also used by scanning code */ | 91 | static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) |
141 | void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, | ||
142 | u8 *ssid, size_t ssid_len) | ||
143 | { | ||
144 | struct ieee80211_local *local = sdata->local; | ||
145 | struct ieee80211_supported_band *sband; | ||
146 | struct sk_buff *skb; | ||
147 | struct ieee80211_mgmt *mgmt; | ||
148 | u8 *pos, *supp_rates, *esupp_rates = NULL; | ||
149 | int i; | ||
150 | |||
151 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 + | ||
152 | sdata->u.sta.ie_probereq_len); | ||
153 | if (!skb) { | ||
154 | printk(KERN_DEBUG "%s: failed to allocate buffer for probe " | ||
155 | "request\n", sdata->dev->name); | ||
156 | return; | ||
157 | } | ||
158 | skb_reserve(skb, local->hw.extra_tx_headroom); | ||
159 | |||
160 | mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); | ||
161 | memset(mgmt, 0, 24); | ||
162 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | | ||
163 | IEEE80211_STYPE_PROBE_REQ); | ||
164 | memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); | ||
165 | if (dst) { | ||
166 | memcpy(mgmt->da, dst, ETH_ALEN); | ||
167 | memcpy(mgmt->bssid, dst, ETH_ALEN); | ||
168 | } else { | ||
169 | memset(mgmt->da, 0xff, ETH_ALEN); | ||
170 | memset(mgmt->bssid, 0xff, ETH_ALEN); | ||
171 | } | ||
172 | pos = skb_put(skb, 2 + ssid_len); | ||
173 | *pos++ = WLAN_EID_SSID; | ||
174 | *pos++ = ssid_len; | ||
175 | memcpy(pos, ssid, ssid_len); | ||
176 | |||
177 | supp_rates = skb_put(skb, 2); | ||
178 | supp_rates[0] = WLAN_EID_SUPP_RATES; | ||
179 | supp_rates[1] = 0; | ||
180 | sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; | ||
181 | |||
182 | for (i = 0; i < sband->n_bitrates; i++) { | ||
183 | struct ieee80211_rate *rate = &sband->bitrates[i]; | ||
184 | if (esupp_rates) { | ||
185 | pos = skb_put(skb, 1); | ||
186 | esupp_rates[1]++; | ||
187 | } else if (supp_rates[1] == 8) { | ||
188 | esupp_rates = skb_put(skb, 3); | ||
189 | esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES; | ||
190 | esupp_rates[1] = 1; | ||
191 | pos = &esupp_rates[2]; | ||
192 | } else { | ||
193 | pos = skb_put(skb, 1); | ||
194 | supp_rates[1]++; | ||
195 | } | ||
196 | *pos = rate->bitrate / 5; | ||
197 | } | ||
198 | |||
199 | add_extra_ies(skb, sdata->u.sta.ie_probereq, | ||
200 | sdata->u.sta.ie_probereq_len); | ||
201 | |||
202 | ieee80211_tx_skb(sdata, skb, 0); | ||
203 | } | ||
204 | |||
205 | static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, | ||
206 | struct ieee80211_if_sta *ifsta, | ||
207 | int transaction, u8 *extra, size_t extra_len, | ||
208 | int encrypt) | ||
209 | { | ||
210 | struct ieee80211_local *local = sdata->local; | ||
211 | struct sk_buff *skb; | ||
212 | struct ieee80211_mgmt *mgmt; | ||
213 | |||
214 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + | ||
215 | sizeof(*mgmt) + 6 + extra_len + | ||
216 | sdata->u.sta.ie_auth_len); | ||
217 | if (!skb) { | ||
218 | printk(KERN_DEBUG "%s: failed to allocate buffer for auth " | ||
219 | "frame\n", sdata->dev->name); | ||
220 | return; | ||
221 | } | ||
222 | skb_reserve(skb, local->hw.extra_tx_headroom); | ||
223 | |||
224 | mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6); | ||
225 | memset(mgmt, 0, 24 + 6); | ||
226 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | | ||
227 | IEEE80211_STYPE_AUTH); | ||
228 | if (encrypt) | ||
229 | mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); | ||
230 | memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); | ||
231 | memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); | ||
232 | memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); | ||
233 | mgmt->u.auth.auth_alg = cpu_to_le16(ifsta->auth_alg); | ||
234 | mgmt->u.auth.auth_transaction = cpu_to_le16(transaction); | ||
235 | ifsta->auth_transaction = transaction + 1; | ||
236 | mgmt->u.auth.status_code = cpu_to_le16(0); | ||
237 | if (extra) | ||
238 | memcpy(skb_put(skb, extra_len), extra, extra_len); | ||
239 | add_extra_ies(skb, sdata->u.sta.ie_auth, sdata->u.sta.ie_auth_len); | ||
240 | |||
241 | ieee80211_tx_skb(sdata, skb, encrypt); | ||
242 | } | ||
243 | |||
244 | static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, | ||
245 | struct ieee80211_if_sta *ifsta) | ||
246 | { | 92 | { |
93 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
247 | struct ieee80211_local *local = sdata->local; | 94 | struct ieee80211_local *local = sdata->local; |
248 | struct sk_buff *skb; | 95 | struct sk_buff *skb; |
249 | struct ieee80211_mgmt *mgmt; | 96 | struct ieee80211_mgmt *mgmt; |
@@ -256,17 +103,17 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, | |||
256 | u32 rates = 0; | 103 | u32 rates = 0; |
257 | size_t e_ies_len; | 104 | size_t e_ies_len; |
258 | 105 | ||
259 | if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { | 106 | if (ifmgd->flags & IEEE80211_IBSS_PREV_BSSID_SET) { |
260 | e_ies = sdata->u.sta.ie_reassocreq; | 107 | e_ies = sdata->u.mgd.ie_reassocreq; |
261 | e_ies_len = sdata->u.sta.ie_reassocreq_len; | 108 | e_ies_len = sdata->u.mgd.ie_reassocreq_len; |
262 | } else { | 109 | } else { |
263 | e_ies = sdata->u.sta.ie_assocreq; | 110 | e_ies = sdata->u.mgd.ie_assocreq; |
264 | e_ies_len = sdata->u.sta.ie_assocreq_len; | 111 | e_ies_len = sdata->u.mgd.ie_assocreq_len; |
265 | } | 112 | } |
266 | 113 | ||
267 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + | 114 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + |
268 | sizeof(*mgmt) + 200 + ifsta->extra_ie_len + | 115 | sizeof(*mgmt) + 200 + ifmgd->extra_ie_len + |
269 | ifsta->ssid_len + e_ies_len); | 116 | ifmgd->ssid_len + e_ies_len); |
270 | if (!skb) { | 117 | if (!skb) { |
271 | printk(KERN_DEBUG "%s: failed to allocate buffer for assoc " | 118 | printk(KERN_DEBUG "%s: failed to allocate buffer for assoc " |
272 | "frame\n", sdata->dev->name); | 119 | "frame\n", sdata->dev->name); |
@@ -276,7 +123,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, | |||
276 | 123 | ||
277 | sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; | 124 | sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; |
278 | 125 | ||
279 | capab = ifsta->capab; | 126 | capab = ifmgd->capab; |
280 | 127 | ||
281 | if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) { | 128 | if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) { |
282 | if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) | 129 | if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) |
@@ -285,9 +132,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, | |||
285 | capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; | 132 | capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; |
286 | } | 133 | } |
287 | 134 | ||
288 | bss = ieee80211_rx_bss_get(local, ifsta->bssid, | 135 | bss = ieee80211_rx_bss_get(local, ifmgd->bssid, |
289 | local->hw.conf.channel->center_freq, | 136 | local->hw.conf.channel->center_freq, |
290 | ifsta->ssid, ifsta->ssid_len); | 137 | ifmgd->ssid, ifmgd->ssid_len); |
291 | if (bss) { | 138 | if (bss) { |
292 | if (bss->cbss.capability & WLAN_CAPABILITY_PRIVACY) | 139 | if (bss->cbss.capability & WLAN_CAPABILITY_PRIVACY) |
293 | capab |= WLAN_CAPABILITY_PRIVACY; | 140 | capab |= WLAN_CAPABILITY_PRIVACY; |
@@ -312,18 +159,18 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, | |||
312 | 159 | ||
313 | mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); | 160 | mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); |
314 | memset(mgmt, 0, 24); | 161 | memset(mgmt, 0, 24); |
315 | memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); | 162 | memcpy(mgmt->da, ifmgd->bssid, ETH_ALEN); |
316 | memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); | 163 | memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); |
317 | memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); | 164 | memcpy(mgmt->bssid, ifmgd->bssid, ETH_ALEN); |
318 | 165 | ||
319 | if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { | 166 | if (ifmgd->flags & IEEE80211_STA_PREV_BSSID_SET) { |
320 | skb_put(skb, 10); | 167 | skb_put(skb, 10); |
321 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | | 168 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | |
322 | IEEE80211_STYPE_REASSOC_REQ); | 169 | IEEE80211_STYPE_REASSOC_REQ); |
323 | mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab); | 170 | mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab); |
324 | mgmt->u.reassoc_req.listen_interval = | 171 | mgmt->u.reassoc_req.listen_interval = |
325 | cpu_to_le16(local->hw.conf.listen_interval); | 172 | cpu_to_le16(local->hw.conf.listen_interval); |
326 | memcpy(mgmt->u.reassoc_req.current_ap, ifsta->prev_bssid, | 173 | memcpy(mgmt->u.reassoc_req.current_ap, ifmgd->prev_bssid, |
327 | ETH_ALEN); | 174 | ETH_ALEN); |
328 | } else { | 175 | } else { |
329 | skb_put(skb, 4); | 176 | skb_put(skb, 4); |
@@ -335,10 +182,10 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, | |||
335 | } | 182 | } |
336 | 183 | ||
337 | /* SSID */ | 184 | /* SSID */ |
338 | ies = pos = skb_put(skb, 2 + ifsta->ssid_len); | 185 | ies = pos = skb_put(skb, 2 + ifmgd->ssid_len); |
339 | *pos++ = WLAN_EID_SSID; | 186 | *pos++ = WLAN_EID_SSID; |
340 | *pos++ = ifsta->ssid_len; | 187 | *pos++ = ifmgd->ssid_len; |
341 | memcpy(pos, ifsta->ssid, ifsta->ssid_len); | 188 | memcpy(pos, ifmgd->ssid, ifmgd->ssid_len); |
342 | 189 | ||
343 | /* add all rates which were marked to be used above */ | 190 | /* add all rates which were marked to be used above */ |
344 | supp_rates_len = rates_len; | 191 | supp_rates_len = rates_len; |
@@ -393,12 +240,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, | |||
393 | } | 240 | } |
394 | } | 241 | } |
395 | 242 | ||
396 | if (ifsta->extra_ie) { | 243 | if (ifmgd->extra_ie) { |
397 | pos = skb_put(skb, ifsta->extra_ie_len); | 244 | pos = skb_put(skb, ifmgd->extra_ie_len); |
398 | memcpy(pos, ifsta->extra_ie, ifsta->extra_ie_len); | 245 | memcpy(pos, ifmgd->extra_ie, ifmgd->extra_ie_len); |
399 | } | 246 | } |
400 | 247 | ||
401 | if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { | 248 | if (wmm && (ifmgd->flags & IEEE80211_STA_WMM_ENABLED)) { |
402 | pos = skb_put(skb, 9); | 249 | pos = skb_put(skb, 9); |
403 | *pos++ = WLAN_EID_VENDOR_SPECIFIC; | 250 | *pos++ = WLAN_EID_VENDOR_SPECIFIC; |
404 | *pos++ = 7; /* len */ | 251 | *pos++ = 7; /* len */ |
@@ -418,11 +265,11 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, | |||
418 | * mode (11a/b/g) if any one of these ciphers is | 265 | * mode (11a/b/g) if any one of these ciphers is |
419 | * configured as pairwise. | 266 | * configured as pairwise. |
420 | */ | 267 | */ |
421 | if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) && | 268 | if (wmm && (ifmgd->flags & IEEE80211_STA_WMM_ENABLED) && |
422 | sband->ht_cap.ht_supported && | 269 | sband->ht_cap.ht_supported && |
423 | (ht_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_INFORMATION)) && | 270 | (ht_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_INFORMATION)) && |
424 | ht_ie[1] >= sizeof(struct ieee80211_ht_info) && | 271 | ht_ie[1] >= sizeof(struct ieee80211_ht_info) && |
425 | (!(ifsta->flags & IEEE80211_STA_TKIP_WEP_USED))) { | 272 | (!(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED))) { |
426 | struct ieee80211_ht_info *ht_info = | 273 | struct ieee80211_ht_info *ht_info = |
427 | (struct ieee80211_ht_info *)(ht_ie + 2); | 274 | (struct ieee80211_ht_info *)(ht_ie + 2); |
428 | u16 cap = sband->ht_cap.cap; | 275 | u16 cap = sband->ht_cap.cap; |
@@ -459,11 +306,11 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, | |||
459 | 306 | ||
460 | add_extra_ies(skb, e_ies, e_ies_len); | 307 | add_extra_ies(skb, e_ies, e_ies_len); |
461 | 308 | ||
462 | kfree(ifsta->assocreq_ies); | 309 | kfree(ifmgd->assocreq_ies); |
463 | ifsta->assocreq_ies_len = (skb->data + skb->len) - ies; | 310 | ifmgd->assocreq_ies_len = (skb->data + skb->len) - ies; |
464 | ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_KERNEL); | 311 | ifmgd->assocreq_ies = kmalloc(ifmgd->assocreq_ies_len, GFP_KERNEL); |
465 | if (ifsta->assocreq_ies) | 312 | if (ifmgd->assocreq_ies) |
466 | memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len); | 313 | memcpy(ifmgd->assocreq_ies, ies, ifmgd->assocreq_ies_len); |
467 | 314 | ||
468 | ieee80211_tx_skb(sdata, skb, 0); | 315 | ieee80211_tx_skb(sdata, skb, 0); |
469 | } | 316 | } |
@@ -473,18 +320,18 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, | |||
473 | u16 stype, u16 reason) | 320 | u16 stype, u16 reason) |
474 | { | 321 | { |
475 | struct ieee80211_local *local = sdata->local; | 322 | struct ieee80211_local *local = sdata->local; |
476 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | 323 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
477 | struct sk_buff *skb; | 324 | struct sk_buff *skb; |
478 | struct ieee80211_mgmt *mgmt; | 325 | struct ieee80211_mgmt *mgmt; |
479 | u8 *ies; | 326 | u8 *ies; |
480 | size_t ies_len; | 327 | size_t ies_len; |
481 | 328 | ||
482 | if (stype == IEEE80211_STYPE_DEAUTH) { | 329 | if (stype == IEEE80211_STYPE_DEAUTH) { |
483 | ies = sdata->u.sta.ie_deauth; | 330 | ies = sdata->u.mgd.ie_deauth; |
484 | ies_len = sdata->u.sta.ie_deauth_len; | 331 | ies_len = sdata->u.mgd.ie_deauth_len; |
485 | } else { | 332 | } else { |
486 | ies = sdata->u.sta.ie_disassoc; | 333 | ies = sdata->u.mgd.ie_disassoc; |
487 | ies_len = sdata->u.sta.ie_disassoc_len; | 334 | ies_len = sdata->u.mgd.ie_disassoc_len; |
488 | } | 335 | } |
489 | 336 | ||
490 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + | 337 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + |
@@ -498,9 +345,9 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, | |||
498 | 345 | ||
499 | mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); | 346 | mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); |
500 | memset(mgmt, 0, 24); | 347 | memset(mgmt, 0, 24); |
501 | memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); | 348 | memcpy(mgmt->da, ifmgd->bssid, ETH_ALEN); |
502 | memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); | 349 | memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); |
503 | memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); | 350 | memcpy(mgmt->bssid, ifmgd->bssid, ETH_ALEN); |
504 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype); | 351 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype); |
505 | skb_put(skb, 2); | 352 | skb_put(skb, 2); |
506 | /* u.deauth.reason_code == u.disassoc.reason_code */ | 353 | /* u.deauth.reason_code == u.disassoc.reason_code */ |
@@ -508,13 +355,13 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, | |||
508 | 355 | ||
509 | add_extra_ies(skb, ies, ies_len); | 356 | add_extra_ies(skb, ies, ies_len); |
510 | 357 | ||
511 | ieee80211_tx_skb(sdata, skb, ifsta->flags & IEEE80211_STA_MFP_ENABLED); | 358 | ieee80211_tx_skb(sdata, skb, ifmgd->flags & IEEE80211_STA_MFP_ENABLED); |
512 | } | 359 | } |
513 | 360 | ||
514 | void ieee80211_send_pspoll(struct ieee80211_local *local, | 361 | void ieee80211_send_pspoll(struct ieee80211_local *local, |
515 | struct ieee80211_sub_if_data *sdata) | 362 | struct ieee80211_sub_if_data *sdata) |
516 | { | 363 | { |
517 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | 364 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
518 | struct ieee80211_pspoll *pspoll; | 365 | struct ieee80211_pspoll *pspoll; |
519 | struct sk_buff *skb; | 366 | struct sk_buff *skb; |
520 | u16 fc; | 367 | u16 fc; |
@@ -531,43 +378,20 @@ void ieee80211_send_pspoll(struct ieee80211_local *local, | |||
531 | memset(pspoll, 0, sizeof(*pspoll)); | 378 | memset(pspoll, 0, sizeof(*pspoll)); |
532 | fc = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL | IEEE80211_FCTL_PM; | 379 | fc = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL | IEEE80211_FCTL_PM; |
533 | pspoll->frame_control = cpu_to_le16(fc); | 380 | pspoll->frame_control = cpu_to_le16(fc); |
534 | pspoll->aid = cpu_to_le16(ifsta->aid); | 381 | pspoll->aid = cpu_to_le16(ifmgd->aid); |
535 | 382 | ||
536 | /* aid in PS-Poll has its two MSBs each set to 1 */ | 383 | /* aid in PS-Poll has its two MSBs each set to 1 */ |
537 | pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14); | 384 | pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14); |
538 | 385 | ||
539 | memcpy(pspoll->bssid, ifsta->bssid, ETH_ALEN); | 386 | memcpy(pspoll->bssid, ifmgd->bssid, ETH_ALEN); |
540 | memcpy(pspoll->ta, sdata->dev->dev_addr, ETH_ALEN); | 387 | memcpy(pspoll->ta, sdata->dev->dev_addr, ETH_ALEN); |
541 | 388 | ||
542 | ieee80211_tx_skb(sdata, skb, 0); | 389 | ieee80211_tx_skb(sdata, skb, 0); |
543 | |||
544 | return; | ||
545 | } | 390 | } |
546 | 391 | ||
547 | /* MLME */ | 392 | /* MLME */ |
548 | static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, | ||
549 | const size_t supp_rates_len, | ||
550 | const u8 *supp_rates) | ||
551 | { | ||
552 | struct ieee80211_local *local = sdata->local; | ||
553 | int i, have_higher_than_11mbit = 0; | ||
554 | |||
555 | /* cf. IEEE 802.11 9.2.12 */ | ||
556 | for (i = 0; i < supp_rates_len; i++) | ||
557 | if ((supp_rates[i] & 0x7f) * 5 > 110) | ||
558 | have_higher_than_11mbit = 1; | ||
559 | |||
560 | if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && | ||
561 | have_higher_than_11mbit) | ||
562 | sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; | ||
563 | else | ||
564 | sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; | ||
565 | |||
566 | ieee80211_set_wmm_default(sdata); | ||
567 | } | ||
568 | |||
569 | static void ieee80211_sta_wmm_params(struct ieee80211_local *local, | 393 | static void ieee80211_sta_wmm_params(struct ieee80211_local *local, |
570 | struct ieee80211_if_sta *ifsta, | 394 | struct ieee80211_if_managed *ifmgd, |
571 | u8 *wmm_param, size_t wmm_param_len) | 395 | u8 *wmm_param, size_t wmm_param_len) |
572 | { | 396 | { |
573 | struct ieee80211_tx_queue_params params; | 397 | struct ieee80211_tx_queue_params params; |
@@ -575,7 +399,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, | |||
575 | int count; | 399 | int count; |
576 | u8 *pos; | 400 | u8 *pos; |
577 | 401 | ||
578 | if (!(ifsta->flags & IEEE80211_STA_WMM_ENABLED)) | 402 | if (!(ifmgd->flags & IEEE80211_STA_WMM_ENABLED)) |
579 | return; | 403 | return; |
580 | 404 | ||
581 | if (!wmm_param) | 405 | if (!wmm_param) |
@@ -584,18 +408,15 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, | |||
584 | if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1) | 408 | if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1) |
585 | return; | 409 | return; |
586 | count = wmm_param[6] & 0x0f; | 410 | count = wmm_param[6] & 0x0f; |
587 | if (count == ifsta->wmm_last_param_set) | 411 | if (count == ifmgd->wmm_last_param_set) |
588 | return; | 412 | return; |
589 | ifsta->wmm_last_param_set = count; | 413 | ifmgd->wmm_last_param_set = count; |
590 | 414 | ||
591 | pos = wmm_param + 8; | 415 | pos = wmm_param + 8; |
592 | left = wmm_param_len - 8; | 416 | left = wmm_param_len - 8; |
593 | 417 | ||
594 | memset(¶ms, 0, sizeof(params)); | 418 | memset(¶ms, 0, sizeof(params)); |
595 | 419 | ||
596 | if (!local->ops->conf_tx) | ||
597 | return; | ||
598 | |||
599 | local->wmm_acm = 0; | 420 | local->wmm_acm = 0; |
600 | for (; left >= 4; left -= 4, pos += 4) { | 421 | for (; left >= 4; left -= 4, pos += 4) { |
601 | int aci = (pos[0] >> 5) & 0x03; | 422 | int aci = (pos[0] >> 5) & 0x03; |
@@ -603,26 +424,26 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, | |||
603 | int queue; | 424 | int queue; |
604 | 425 | ||
605 | switch (aci) { | 426 | switch (aci) { |
606 | case 1: | 427 | case 1: /* AC_BK */ |
607 | queue = 3; | 428 | queue = 3; |
608 | if (acm) | 429 | if (acm) |
609 | local->wmm_acm |= BIT(0) | BIT(3); | 430 | local->wmm_acm |= BIT(1) | BIT(2); /* BK/- */ |
610 | break; | 431 | break; |
611 | case 2: | 432 | case 2: /* AC_VI */ |
612 | queue = 1; | 433 | queue = 1; |
613 | if (acm) | 434 | if (acm) |
614 | local->wmm_acm |= BIT(4) | BIT(5); | 435 | local->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */ |
615 | break; | 436 | break; |
616 | case 3: | 437 | case 3: /* AC_VO */ |
617 | queue = 0; | 438 | queue = 0; |
618 | if (acm) | 439 | if (acm) |
619 | local->wmm_acm |= BIT(6) | BIT(7); | 440 | local->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */ |
620 | break; | 441 | break; |
621 | case 0: | 442 | case 0: /* AC_BE */ |
622 | default: | 443 | default: |
623 | queue = 2; | 444 | queue = 2; |
624 | if (acm) | 445 | if (acm) |
625 | local->wmm_acm |= BIT(1) | BIT(2); | 446 | local->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */ |
626 | break; | 447 | break; |
627 | } | 448 | } |
628 | 449 | ||
@@ -636,9 +457,8 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, | |||
636 | local->mdev->name, queue, aci, acm, params.aifs, params.cw_min, | 457 | local->mdev->name, queue, aci, acm, params.aifs, params.cw_min, |
637 | params.cw_max, params.txop); | 458 | params.cw_max, params.txop); |
638 | #endif | 459 | #endif |
639 | /* TODO: handle ACM (block TX, fallback to next lowest allowed | 460 | if (local->ops->conf_tx && |
640 | * AC for now) */ | 461 | local->ops->conf_tx(local_to_hw(local), queue, ¶ms)) { |
641 | if (local->ops->conf_tx(local_to_hw(local), queue, ¶ms)) { | ||
642 | printk(KERN_DEBUG "%s: failed to set TX queue " | 462 | printk(KERN_DEBUG "%s: failed to set TX queue " |
643 | "parameters for queue %d\n", local->mdev->name, queue); | 463 | "parameters for queue %d\n", local->mdev->name, queue); |
644 | } | 464 | } |
@@ -671,7 +491,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, | |||
671 | { | 491 | { |
672 | struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; | 492 | struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; |
673 | #ifdef CONFIG_MAC80211_VERBOSE_DEBUG | 493 | #ifdef CONFIG_MAC80211_VERBOSE_DEBUG |
674 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | 494 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
675 | #endif | 495 | #endif |
676 | u32 changed = 0; | 496 | u32 changed = 0; |
677 | bool use_protection; | 497 | bool use_protection; |
@@ -694,7 +514,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, | |||
694 | printk(KERN_DEBUG "%s: CTS protection %s (BSSID=%pM)\n", | 514 | printk(KERN_DEBUG "%s: CTS protection %s (BSSID=%pM)\n", |
695 | sdata->dev->name, | 515 | sdata->dev->name, |
696 | use_protection ? "enabled" : "disabled", | 516 | use_protection ? "enabled" : "disabled", |
697 | ifsta->bssid); | 517 | ifmgd->bssid); |
698 | } | 518 | } |
699 | #endif | 519 | #endif |
700 | bss_conf->use_cts_prot = use_protection; | 520 | bss_conf->use_cts_prot = use_protection; |
@@ -708,7 +528,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, | |||
708 | " (BSSID=%pM)\n", | 528 | " (BSSID=%pM)\n", |
709 | sdata->dev->name, | 529 | sdata->dev->name, |
710 | use_short_preamble ? "short" : "long", | 530 | use_short_preamble ? "short" : "long", |
711 | ifsta->bssid); | 531 | ifmgd->bssid); |
712 | } | 532 | } |
713 | #endif | 533 | #endif |
714 | bss_conf->use_short_preamble = use_short_preamble; | 534 | bss_conf->use_short_preamble = use_short_preamble; |
@@ -722,7 +542,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, | |||
722 | " (BSSID=%pM)\n", | 542 | " (BSSID=%pM)\n", |
723 | sdata->dev->name, | 543 | sdata->dev->name, |
724 | use_short_slot ? "short" : "long", | 544 | use_short_slot ? "short" : "long", |
725 | ifsta->bssid); | 545 | ifmgd->bssid); |
726 | } | 546 | } |
727 | #endif | 547 | #endif |
728 | bss_conf->use_short_slot = use_short_slot; | 548 | bss_conf->use_short_slot = use_short_slot; |
@@ -732,57 +552,57 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, | |||
732 | return changed; | 552 | return changed; |
733 | } | 553 | } |
734 | 554 | ||
735 | static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata, | 555 | static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata) |
736 | struct ieee80211_if_sta *ifsta) | ||
737 | { | 556 | { |
738 | union iwreq_data wrqu; | 557 | union iwreq_data wrqu; |
558 | |||
739 | memset(&wrqu, 0, sizeof(wrqu)); | 559 | memset(&wrqu, 0, sizeof(wrqu)); |
740 | if (ifsta->flags & IEEE80211_STA_ASSOCIATED) | 560 | if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) |
741 | memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); | 561 | memcpy(wrqu.ap_addr.sa_data, sdata->u.mgd.bssid, ETH_ALEN); |
742 | wrqu.ap_addr.sa_family = ARPHRD_ETHER; | 562 | wrqu.ap_addr.sa_family = ARPHRD_ETHER; |
743 | wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); | 563 | wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); |
744 | } | 564 | } |
745 | 565 | ||
746 | static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata, | 566 | static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata) |
747 | struct ieee80211_if_sta *ifsta) | ||
748 | { | 567 | { |
568 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
749 | char *buf; | 569 | char *buf; |
750 | size_t len; | 570 | size_t len; |
751 | int i; | 571 | int i; |
752 | union iwreq_data wrqu; | 572 | union iwreq_data wrqu; |
753 | 573 | ||
754 | if (!ifsta->assocreq_ies && !ifsta->assocresp_ies) | 574 | if (!ifmgd->assocreq_ies && !ifmgd->assocresp_ies) |
755 | return; | 575 | return; |
756 | 576 | ||
757 | buf = kmalloc(50 + 2 * (ifsta->assocreq_ies_len + | 577 | buf = kmalloc(50 + 2 * (ifmgd->assocreq_ies_len + |
758 | ifsta->assocresp_ies_len), GFP_KERNEL); | 578 | ifmgd->assocresp_ies_len), GFP_KERNEL); |
759 | if (!buf) | 579 | if (!buf) |
760 | return; | 580 | return; |
761 | 581 | ||
762 | len = sprintf(buf, "ASSOCINFO("); | 582 | len = sprintf(buf, "ASSOCINFO("); |
763 | if (ifsta->assocreq_ies) { | 583 | if (ifmgd->assocreq_ies) { |
764 | len += sprintf(buf + len, "ReqIEs="); | 584 | len += sprintf(buf + len, "ReqIEs="); |
765 | for (i = 0; i < ifsta->assocreq_ies_len; i++) { | 585 | for (i = 0; i < ifmgd->assocreq_ies_len; i++) { |
766 | len += sprintf(buf + len, "%02x", | 586 | len += sprintf(buf + len, "%02x", |
767 | ifsta->assocreq_ies[i]); | 587 | ifmgd->assocreq_ies[i]); |
768 | } | 588 | } |
769 | } | 589 | } |
770 | if (ifsta->assocresp_ies) { | 590 | if (ifmgd->assocresp_ies) { |
771 | if (ifsta->assocreq_ies) | 591 | if (ifmgd->assocreq_ies) |
772 | len += sprintf(buf + len, " "); | 592 | len += sprintf(buf + len, " "); |
773 | len += sprintf(buf + len, "RespIEs="); | 593 | len += sprintf(buf + len, "RespIEs="); |
774 | for (i = 0; i < ifsta->assocresp_ies_len; i++) { | 594 | for (i = 0; i < ifmgd->assocresp_ies_len; i++) { |
775 | len += sprintf(buf + len, "%02x", | 595 | len += sprintf(buf + len, "%02x", |
776 | ifsta->assocresp_ies[i]); | 596 | ifmgd->assocresp_ies[i]); |
777 | } | 597 | } |
778 | } | 598 | } |
779 | len += sprintf(buf + len, ")"); | 599 | len += sprintf(buf + len, ")"); |
780 | 600 | ||
781 | if (len > IW_CUSTOM_MAX) { | 601 | if (len > IW_CUSTOM_MAX) { |
782 | len = sprintf(buf, "ASSOCRESPIE="); | 602 | len = sprintf(buf, "ASSOCRESPIE="); |
783 | for (i = 0; i < ifsta->assocresp_ies_len; i++) { | 603 | for (i = 0; i < ifmgd->assocresp_ies_len; i++) { |
784 | len += sprintf(buf + len, "%02x", | 604 | len += sprintf(buf + len, "%02x", |
785 | ifsta->assocresp_ies[i]); | 605 | ifmgd->assocresp_ies[i]); |
786 | } | 606 | } |
787 | } | 607 | } |
788 | 608 | ||
@@ -797,20 +617,20 @@ static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata, | |||
797 | 617 | ||
798 | 618 | ||
799 | static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, | 619 | static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, |
800 | struct ieee80211_if_sta *ifsta, | ||
801 | u32 bss_info_changed) | 620 | u32 bss_info_changed) |
802 | { | 621 | { |
622 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
803 | struct ieee80211_local *local = sdata->local; | 623 | struct ieee80211_local *local = sdata->local; |
804 | struct ieee80211_conf *conf = &local_to_hw(local)->conf; | 624 | struct ieee80211_conf *conf = &local_to_hw(local)->conf; |
805 | 625 | ||
806 | struct ieee80211_bss *bss; | 626 | struct ieee80211_bss *bss; |
807 | 627 | ||
808 | bss_info_changed |= BSS_CHANGED_ASSOC; | 628 | bss_info_changed |= BSS_CHANGED_ASSOC; |
809 | ifsta->flags |= IEEE80211_STA_ASSOCIATED; | 629 | ifmgd->flags |= IEEE80211_STA_ASSOCIATED; |
810 | 630 | ||
811 | bss = ieee80211_rx_bss_get(local, ifsta->bssid, | 631 | bss = ieee80211_rx_bss_get(local, ifmgd->bssid, |
812 | conf->channel->center_freq, | 632 | conf->channel->center_freq, |
813 | ifsta->ssid, ifsta->ssid_len); | 633 | ifmgd->ssid, ifmgd->ssid_len); |
814 | if (bss) { | 634 | if (bss) { |
815 | /* set timing information */ | 635 | /* set timing information */ |
816 | sdata->vif.bss_conf.beacon_int = bss->cbss.beacon_interval; | 636 | sdata->vif.bss_conf.beacon_int = bss->cbss.beacon_interval; |
@@ -823,11 +643,11 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, | |||
823 | ieee80211_rx_bss_put(local, bss); | 643 | ieee80211_rx_bss_put(local, bss); |
824 | } | 644 | } |
825 | 645 | ||
826 | ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; | 646 | ifmgd->flags |= IEEE80211_STA_PREV_BSSID_SET; |
827 | memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); | 647 | memcpy(ifmgd->prev_bssid, sdata->u.mgd.bssid, ETH_ALEN); |
828 | ieee80211_sta_send_associnfo(sdata, ifsta); | 648 | ieee80211_sta_send_associnfo(sdata); |
829 | 649 | ||
830 | ifsta->last_probe = jiffies; | 650 | ifmgd->last_probe = jiffies; |
831 | ieee80211_led_assoc(local, 1); | 651 | ieee80211_led_assoc(local, 1); |
832 | 652 | ||
833 | sdata->vif.bss_conf.assoc = 1; | 653 | sdata->vif.bss_conf.assoc = 1; |
@@ -856,70 +676,74 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, | |||
856 | netif_tx_start_all_queues(sdata->dev); | 676 | netif_tx_start_all_queues(sdata->dev); |
857 | netif_carrier_on(sdata->dev); | 677 | netif_carrier_on(sdata->dev); |
858 | 678 | ||
859 | ieee80211_sta_send_apinfo(sdata, ifsta); | 679 | ieee80211_sta_send_apinfo(sdata); |
860 | } | 680 | } |
861 | 681 | ||
862 | static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata, | 682 | static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata) |
863 | struct ieee80211_if_sta *ifsta) | ||
864 | { | 683 | { |
865 | ifsta->direct_probe_tries++; | 684 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
866 | if (ifsta->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) { | 685 | |
686 | ifmgd->direct_probe_tries++; | ||
687 | if (ifmgd->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) { | ||
867 | printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n", | 688 | printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n", |
868 | sdata->dev->name, ifsta->bssid); | 689 | sdata->dev->name, ifmgd->bssid); |
869 | ifsta->state = IEEE80211_STA_MLME_DISABLED; | 690 | ifmgd->state = IEEE80211_STA_MLME_DISABLED; |
870 | ieee80211_sta_send_apinfo(sdata, ifsta); | 691 | ieee80211_sta_send_apinfo(sdata); |
871 | 692 | ||
872 | /* | 693 | /* |
873 | * Most likely AP is not in the range so remove the | 694 | * Most likely AP is not in the range so remove the |
874 | * bss information associated to the AP | 695 | * bss information associated to the AP |
875 | */ | 696 | */ |
876 | ieee80211_rx_bss_remove(sdata, ifsta->bssid, | 697 | ieee80211_rx_bss_remove(sdata, ifmgd->bssid, |
877 | sdata->local->hw.conf.channel->center_freq, | 698 | sdata->local->hw.conf.channel->center_freq, |
878 | ifsta->ssid, ifsta->ssid_len); | 699 | ifmgd->ssid, ifmgd->ssid_len); |
879 | return; | 700 | return; |
880 | } | 701 | } |
881 | 702 | ||
882 | printk(KERN_DEBUG "%s: direct probe to AP %pM try %d\n", | 703 | printk(KERN_DEBUG "%s: direct probe to AP %pM try %d\n", |
883 | sdata->dev->name, ifsta->bssid, | 704 | sdata->dev->name, ifmgd->bssid, |
884 | ifsta->direct_probe_tries); | 705 | ifmgd->direct_probe_tries); |
885 | 706 | ||
886 | ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; | 707 | ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE; |
887 | 708 | ||
888 | set_bit(IEEE80211_STA_REQ_DIRECT_PROBE, &ifsta->request); | 709 | set_bit(IEEE80211_STA_REQ_DIRECT_PROBE, &ifmgd->request); |
889 | 710 | ||
890 | /* Direct probe is sent to broadcast address as some APs | 711 | /* Direct probe is sent to broadcast address as some APs |
891 | * will not answer to direct packet in unassociated state. | 712 | * will not answer to direct packet in unassociated state. |
892 | */ | 713 | */ |
893 | ieee80211_send_probe_req(sdata, NULL, | 714 | ieee80211_send_probe_req(sdata, NULL, |
894 | ifsta->ssid, ifsta->ssid_len); | 715 | ifmgd->ssid, ifmgd->ssid_len, NULL, 0); |
895 | 716 | ||
896 | mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); | 717 | mod_timer(&ifmgd->timer, jiffies + IEEE80211_AUTH_TIMEOUT); |
897 | } | 718 | } |
898 | 719 | ||
899 | 720 | ||
900 | static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata, | 721 | static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata) |
901 | struct ieee80211_if_sta *ifsta) | ||
902 | { | 722 | { |
903 | ifsta->auth_tries++; | 723 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
904 | if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) { | 724 | |
725 | ifmgd->auth_tries++; | ||
726 | if (ifmgd->auth_tries > IEEE80211_AUTH_MAX_TRIES) { | ||
905 | printk(KERN_DEBUG "%s: authentication with AP %pM" | 727 | printk(KERN_DEBUG "%s: authentication with AP %pM" |
906 | " timed out\n", | 728 | " timed out\n", |
907 | sdata->dev->name, ifsta->bssid); | 729 | sdata->dev->name, ifmgd->bssid); |
908 | ifsta->state = IEEE80211_STA_MLME_DISABLED; | 730 | ifmgd->state = IEEE80211_STA_MLME_DISABLED; |
909 | ieee80211_sta_send_apinfo(sdata, ifsta); | 731 | ieee80211_sta_send_apinfo(sdata); |
910 | ieee80211_rx_bss_remove(sdata, ifsta->bssid, | 732 | ieee80211_rx_bss_remove(sdata, ifmgd->bssid, |
911 | sdata->local->hw.conf.channel->center_freq, | 733 | sdata->local->hw.conf.channel->center_freq, |
912 | ifsta->ssid, ifsta->ssid_len); | 734 | ifmgd->ssid, ifmgd->ssid_len); |
913 | return; | 735 | return; |
914 | } | 736 | } |
915 | 737 | ||
916 | ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; | 738 | ifmgd->state = IEEE80211_STA_MLME_AUTHENTICATE; |
917 | printk(KERN_DEBUG "%s: authenticate with AP %pM\n", | 739 | printk(KERN_DEBUG "%s: authenticate with AP %pM\n", |
918 | sdata->dev->name, ifsta->bssid); | 740 | sdata->dev->name, ifmgd->bssid); |
919 | 741 | ||
920 | ieee80211_send_auth(sdata, ifsta, 1, NULL, 0, 0); | 742 | ieee80211_send_auth(sdata, 1, ifmgd->auth_alg, NULL, 0, |
743 | ifmgd->bssid, 0); | ||
744 | ifmgd->auth_transaction = 2; | ||
921 | 745 | ||
922 | mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); | 746 | mod_timer(&ifmgd->timer, jiffies + IEEE80211_AUTH_TIMEOUT); |
923 | } | 747 | } |
924 | 748 | ||
925 | /* | 749 | /* |
@@ -927,27 +751,28 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata, | |||
927 | * if self disconnected or a reason code from the AP. | 751 | * if self disconnected or a reason code from the AP. |
928 | */ | 752 | */ |
929 | static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, | 753 | static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, |
930 | struct ieee80211_if_sta *ifsta, bool deauth, | 754 | bool deauth, bool self_disconnected, |
931 | bool self_disconnected, u16 reason) | 755 | u16 reason) |
932 | { | 756 | { |
757 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
933 | struct ieee80211_local *local = sdata->local; | 758 | struct ieee80211_local *local = sdata->local; |
934 | struct sta_info *sta; | 759 | struct sta_info *sta; |
935 | u32 changed = 0, config_changed = 0; | 760 | u32 changed = 0, config_changed = 0; |
936 | 761 | ||
937 | rcu_read_lock(); | 762 | rcu_read_lock(); |
938 | 763 | ||
939 | sta = sta_info_get(local, ifsta->bssid); | 764 | sta = sta_info_get(local, ifmgd->bssid); |
940 | if (!sta) { | 765 | if (!sta) { |
941 | rcu_read_unlock(); | 766 | rcu_read_unlock(); |
942 | return; | 767 | return; |
943 | } | 768 | } |
944 | 769 | ||
945 | if (deauth) { | 770 | if (deauth) { |
946 | ifsta->direct_probe_tries = 0; | 771 | ifmgd->direct_probe_tries = 0; |
947 | ifsta->auth_tries = 0; | 772 | ifmgd->auth_tries = 0; |
948 | } | 773 | } |
949 | ifsta->assoc_scan_tries = 0; | 774 | ifmgd->assoc_scan_tries = 0; |
950 | ifsta->assoc_tries = 0; | 775 | ifmgd->assoc_tries = 0; |
951 | 776 | ||
952 | netif_tx_stop_all_queues(sdata->dev); | 777 | netif_tx_stop_all_queues(sdata->dev); |
953 | netif_carrier_off(sdata->dev); | 778 | netif_carrier_off(sdata->dev); |
@@ -963,20 +788,20 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, | |||
963 | IEEE80211_STYPE_DISASSOC, reason); | 788 | IEEE80211_STYPE_DISASSOC, reason); |
964 | } | 789 | } |
965 | 790 | ||
966 | ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; | 791 | ifmgd->flags &= ~IEEE80211_STA_ASSOCIATED; |
967 | changed |= ieee80211_reset_erp_info(sdata); | 792 | changed |= ieee80211_reset_erp_info(sdata); |
968 | 793 | ||
969 | ieee80211_led_assoc(local, 0); | 794 | ieee80211_led_assoc(local, 0); |
970 | changed |= BSS_CHANGED_ASSOC; | 795 | changed |= BSS_CHANGED_ASSOC; |
971 | sdata->vif.bss_conf.assoc = false; | 796 | sdata->vif.bss_conf.assoc = false; |
972 | 797 | ||
973 | ieee80211_sta_send_apinfo(sdata, ifsta); | 798 | ieee80211_sta_send_apinfo(sdata); |
974 | 799 | ||
975 | if (self_disconnected || reason == WLAN_REASON_DISASSOC_STA_HAS_LEFT) { | 800 | if (self_disconnected || reason == WLAN_REASON_DISASSOC_STA_HAS_LEFT) { |
976 | ifsta->state = IEEE80211_STA_MLME_DISABLED; | 801 | ifmgd->state = IEEE80211_STA_MLME_DISABLED; |
977 | ieee80211_rx_bss_remove(sdata, ifsta->bssid, | 802 | ieee80211_rx_bss_remove(sdata, ifmgd->bssid, |
978 | sdata->local->hw.conf.channel->center_freq, | 803 | sdata->local->hw.conf.channel->center_freq, |
979 | ifsta->ssid, ifsta->ssid_len); | 804 | ifmgd->ssid, ifmgd->ssid_len); |
980 | } | 805 | } |
981 | 806 | ||
982 | rcu_read_unlock(); | 807 | rcu_read_unlock(); |
@@ -999,7 +824,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, | |||
999 | 824 | ||
1000 | rcu_read_lock(); | 825 | rcu_read_lock(); |
1001 | 826 | ||
1002 | sta = sta_info_get(local, ifsta->bssid); | 827 | sta = sta_info_get(local, ifmgd->bssid); |
1003 | if (!sta) { | 828 | if (!sta) { |
1004 | rcu_read_unlock(); | 829 | rcu_read_unlock(); |
1005 | return; | 830 | return; |
@@ -1020,27 +845,27 @@ static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata) | |||
1020 | return 1; | 845 | return 1; |
1021 | } | 846 | } |
1022 | 847 | ||
1023 | static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata, | 848 | static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata) |
1024 | struct ieee80211_if_sta *ifsta) | ||
1025 | { | 849 | { |
850 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
1026 | struct ieee80211_local *local = sdata->local; | 851 | struct ieee80211_local *local = sdata->local; |
1027 | struct ieee80211_bss *bss; | 852 | struct ieee80211_bss *bss; |
1028 | int bss_privacy; | 853 | int bss_privacy; |
1029 | int wep_privacy; | 854 | int wep_privacy; |
1030 | int privacy_invoked; | 855 | int privacy_invoked; |
1031 | 856 | ||
1032 | if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL)) | 857 | if (!ifmgd || (ifmgd->flags & IEEE80211_STA_MIXED_CELL)) |
1033 | return 0; | 858 | return 0; |
1034 | 859 | ||
1035 | bss = ieee80211_rx_bss_get(local, ifsta->bssid, | 860 | bss = ieee80211_rx_bss_get(local, ifmgd->bssid, |
1036 | local->hw.conf.channel->center_freq, | 861 | local->hw.conf.channel->center_freq, |
1037 | ifsta->ssid, ifsta->ssid_len); | 862 | ifmgd->ssid, ifmgd->ssid_len); |
1038 | if (!bss) | 863 | if (!bss) |
1039 | return 0; | 864 | return 0; |
1040 | 865 | ||
1041 | bss_privacy = !!(bss->cbss.capability & WLAN_CAPABILITY_PRIVACY); | 866 | bss_privacy = !!(bss->cbss.capability & WLAN_CAPABILITY_PRIVACY); |
1042 | wep_privacy = !!ieee80211_sta_wep_configured(sdata); | 867 | wep_privacy = !!ieee80211_sta_wep_configured(sdata); |
1043 | privacy_invoked = !!(ifsta->flags & IEEE80211_STA_PRIVACY_INVOKED); | 868 | privacy_invoked = !!(ifmgd->flags & IEEE80211_STA_PRIVACY_INVOKED); |
1044 | 869 | ||
1045 | ieee80211_rx_bss_put(local, bss); | 870 | ieee80211_rx_bss_put(local, bss); |
1046 | 871 | ||
@@ -1050,41 +875,42 @@ static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata, | |||
1050 | return 1; | 875 | return 1; |
1051 | } | 876 | } |
1052 | 877 | ||
1053 | static void ieee80211_associate(struct ieee80211_sub_if_data *sdata, | 878 | static void ieee80211_associate(struct ieee80211_sub_if_data *sdata) |
1054 | struct ieee80211_if_sta *ifsta) | ||
1055 | { | 879 | { |
1056 | ifsta->assoc_tries++; | 880 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
1057 | if (ifsta->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) { | 881 | |
882 | ifmgd->assoc_tries++; | ||
883 | if (ifmgd->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) { | ||
1058 | printk(KERN_DEBUG "%s: association with AP %pM" | 884 | printk(KERN_DEBUG "%s: association with AP %pM" |
1059 | " timed out\n", | 885 | " timed out\n", |
1060 | sdata->dev->name, ifsta->bssid); | 886 | sdata->dev->name, ifmgd->bssid); |
1061 | ifsta->state = IEEE80211_STA_MLME_DISABLED; | 887 | ifmgd->state = IEEE80211_STA_MLME_DISABLED; |
1062 | ieee80211_sta_send_apinfo(sdata, ifsta); | 888 | ieee80211_sta_send_apinfo(sdata); |
1063 | ieee80211_rx_bss_remove(sdata, ifsta->bssid, | 889 | ieee80211_rx_bss_remove(sdata, ifmgd->bssid, |
1064 | sdata->local->hw.conf.channel->center_freq, | 890 | sdata->local->hw.conf.channel->center_freq, |
1065 | ifsta->ssid, ifsta->ssid_len); | 891 | ifmgd->ssid, ifmgd->ssid_len); |
1066 | return; | 892 | return; |
1067 | } | 893 | } |
1068 | 894 | ||
1069 | ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; | 895 | ifmgd->state = IEEE80211_STA_MLME_ASSOCIATE; |
1070 | printk(KERN_DEBUG "%s: associate with AP %pM\n", | 896 | printk(KERN_DEBUG "%s: associate with AP %pM\n", |
1071 | sdata->dev->name, ifsta->bssid); | 897 | sdata->dev->name, ifmgd->bssid); |
1072 | if (ieee80211_privacy_mismatch(sdata, ifsta)) { | 898 | if (ieee80211_privacy_mismatch(sdata)) { |
1073 | printk(KERN_DEBUG "%s: mismatch in privacy configuration and " | 899 | printk(KERN_DEBUG "%s: mismatch in privacy configuration and " |
1074 | "mixed-cell disabled - abort association\n", sdata->dev->name); | 900 | "mixed-cell disabled - abort association\n", sdata->dev->name); |
1075 | ifsta->state = IEEE80211_STA_MLME_DISABLED; | 901 | ifmgd->state = IEEE80211_STA_MLME_DISABLED; |
1076 | return; | 902 | return; |
1077 | } | 903 | } |
1078 | 904 | ||
1079 | ieee80211_send_assoc(sdata, ifsta); | 905 | ieee80211_send_assoc(sdata); |
1080 | 906 | ||
1081 | mod_timer(&ifsta->timer, jiffies + IEEE80211_ASSOC_TIMEOUT); | 907 | mod_timer(&ifmgd->timer, jiffies + IEEE80211_ASSOC_TIMEOUT); |
1082 | } | 908 | } |
1083 | 909 | ||
1084 | 910 | ||
1085 | static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, | 911 | static void ieee80211_associated(struct ieee80211_sub_if_data *sdata) |
1086 | struct ieee80211_if_sta *ifsta) | ||
1087 | { | 912 | { |
913 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
1088 | struct ieee80211_local *local = sdata->local; | 914 | struct ieee80211_local *local = sdata->local; |
1089 | struct sta_info *sta; | 915 | struct sta_info *sta; |
1090 | int disassoc; | 916 | int disassoc; |
@@ -1094,38 +920,40 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, | |||
1094 | * for better APs. */ | 920 | * for better APs. */ |
1095 | /* TODO: remove expired BSSes */ | 921 | /* TODO: remove expired BSSes */ |
1096 | 922 | ||
1097 | ifsta->state = IEEE80211_STA_MLME_ASSOCIATED; | 923 | ifmgd->state = IEEE80211_STA_MLME_ASSOCIATED; |
1098 | 924 | ||
1099 | rcu_read_lock(); | 925 | rcu_read_lock(); |
1100 | 926 | ||
1101 | sta = sta_info_get(local, ifsta->bssid); | 927 | sta = sta_info_get(local, ifmgd->bssid); |
1102 | if (!sta) { | 928 | if (!sta) { |
1103 | printk(KERN_DEBUG "%s: No STA entry for own AP %pM\n", | 929 | printk(KERN_DEBUG "%s: No STA entry for own AP %pM\n", |
1104 | sdata->dev->name, ifsta->bssid); | 930 | sdata->dev->name, ifmgd->bssid); |
1105 | disassoc = 1; | 931 | disassoc = 1; |
1106 | } else { | 932 | } else { |
1107 | disassoc = 0; | 933 | disassoc = 0; |
1108 | if (time_after(jiffies, | 934 | if (time_after(jiffies, |
1109 | sta->last_rx + IEEE80211_MONITORING_INTERVAL)) { | 935 | sta->last_rx + IEEE80211_MONITORING_INTERVAL)) { |
1110 | if (ifsta->flags & IEEE80211_STA_PROBEREQ_POLL) { | 936 | if (ifmgd->flags & IEEE80211_STA_PROBEREQ_POLL) { |
1111 | printk(KERN_DEBUG "%s: No ProbeResp from " | 937 | printk(KERN_DEBUG "%s: No ProbeResp from " |
1112 | "current AP %pM - assume out of " | 938 | "current AP %pM - assume out of " |
1113 | "range\n", | 939 | "range\n", |
1114 | sdata->dev->name, ifsta->bssid); | 940 | sdata->dev->name, ifmgd->bssid); |
1115 | disassoc = 1; | 941 | disassoc = 1; |
1116 | } else | 942 | } else |
1117 | ieee80211_send_probe_req(sdata, ifsta->bssid, | 943 | ieee80211_send_probe_req(sdata, ifmgd->bssid, |
1118 | ifsta->ssid, | 944 | ifmgd->ssid, |
1119 | ifsta->ssid_len); | 945 | ifmgd->ssid_len, |
1120 | ifsta->flags ^= IEEE80211_STA_PROBEREQ_POLL; | 946 | NULL, 0); |
947 | ifmgd->flags ^= IEEE80211_STA_PROBEREQ_POLL; | ||
1121 | } else { | 948 | } else { |
1122 | ifsta->flags &= ~IEEE80211_STA_PROBEREQ_POLL; | 949 | ifmgd->flags &= ~IEEE80211_STA_PROBEREQ_POLL; |
1123 | if (time_after(jiffies, ifsta->last_probe + | 950 | if (time_after(jiffies, ifmgd->last_probe + |
1124 | IEEE80211_PROBE_INTERVAL)) { | 951 | IEEE80211_PROBE_INTERVAL)) { |
1125 | ifsta->last_probe = jiffies; | 952 | ifmgd->last_probe = jiffies; |
1126 | ieee80211_send_probe_req(sdata, ifsta->bssid, | 953 | ieee80211_send_probe_req(sdata, ifmgd->bssid, |
1127 | ifsta->ssid, | 954 | ifmgd->ssid, |
1128 | ifsta->ssid_len); | 955 | ifmgd->ssid_len, |
956 | NULL, 0); | ||
1129 | } | 957 | } |
1130 | } | 958 | } |
1131 | } | 959 | } |
@@ -1133,25 +961,25 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, | |||
1133 | rcu_read_unlock(); | 961 | rcu_read_unlock(); |
1134 | 962 | ||
1135 | if (disassoc) | 963 | if (disassoc) |
1136 | ieee80211_set_disassoc(sdata, ifsta, true, true, | 964 | ieee80211_set_disassoc(sdata, true, true, |
1137 | WLAN_REASON_PREV_AUTH_NOT_VALID); | 965 | WLAN_REASON_PREV_AUTH_NOT_VALID); |
1138 | else | 966 | else |
1139 | mod_timer(&ifsta->timer, jiffies + | 967 | mod_timer(&ifmgd->timer, jiffies + |
1140 | IEEE80211_MONITORING_INTERVAL); | 968 | IEEE80211_MONITORING_INTERVAL); |
1141 | } | 969 | } |
1142 | 970 | ||
1143 | 971 | ||
1144 | static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata, | 972 | static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata) |
1145 | struct ieee80211_if_sta *ifsta) | ||
1146 | { | 973 | { |
974 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
975 | |||
1147 | printk(KERN_DEBUG "%s: authenticated\n", sdata->dev->name); | 976 | printk(KERN_DEBUG "%s: authenticated\n", sdata->dev->name); |
1148 | ifsta->flags |= IEEE80211_STA_AUTHENTICATED; | 977 | ifmgd->flags |= IEEE80211_STA_AUTHENTICATED; |
1149 | ieee80211_associate(sdata, ifsta); | 978 | ieee80211_associate(sdata); |
1150 | } | 979 | } |
1151 | 980 | ||
1152 | 981 | ||
1153 | static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, | 982 | static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, |
1154 | struct ieee80211_if_sta *ifsta, | ||
1155 | struct ieee80211_mgmt *mgmt, | 983 | struct ieee80211_mgmt *mgmt, |
1156 | size_t len) | 984 | size_t len) |
1157 | { | 985 | { |
@@ -1162,59 +990,37 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, | |||
1162 | ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); | 990 | ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); |
1163 | if (!elems.challenge) | 991 | if (!elems.challenge) |
1164 | return; | 992 | return; |
1165 | ieee80211_send_auth(sdata, ifsta, 3, elems.challenge - 2, | 993 | ieee80211_send_auth(sdata, 3, sdata->u.mgd.auth_alg, |
1166 | elems.challenge_len + 2, 1); | 994 | elems.challenge - 2, elems.challenge_len + 2, |
1167 | } | 995 | sdata->u.mgd.bssid, 1); |
1168 | 996 | sdata->u.mgd.auth_transaction = 4; | |
1169 | static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, | ||
1170 | struct ieee80211_if_sta *ifsta, | ||
1171 | struct ieee80211_mgmt *mgmt, | ||
1172 | size_t len) | ||
1173 | { | ||
1174 | u16 auth_alg, auth_transaction, status_code; | ||
1175 | |||
1176 | if (len < 24 + 6) | ||
1177 | return; | ||
1178 | |||
1179 | auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); | ||
1180 | auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); | ||
1181 | status_code = le16_to_cpu(mgmt->u.auth.status_code); | ||
1182 | |||
1183 | /* | ||
1184 | * IEEE 802.11 standard does not require authentication in IBSS | ||
1185 | * networks and most implementations do not seem to use it. | ||
1186 | * However, try to reply to authentication attempts if someone | ||
1187 | * has actually implemented this. | ||
1188 | */ | ||
1189 | if (auth_alg == WLAN_AUTH_OPEN && auth_transaction == 1) | ||
1190 | ieee80211_send_auth(sdata, ifsta, 2, NULL, 0, 0); | ||
1191 | } | 997 | } |
1192 | 998 | ||
1193 | static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, | 999 | static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, |
1194 | struct ieee80211_if_sta *ifsta, | ||
1195 | struct ieee80211_mgmt *mgmt, | 1000 | struct ieee80211_mgmt *mgmt, |
1196 | size_t len) | 1001 | size_t len) |
1197 | { | 1002 | { |
1003 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
1198 | u16 auth_alg, auth_transaction, status_code; | 1004 | u16 auth_alg, auth_transaction, status_code; |
1199 | 1005 | ||
1200 | if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE) | 1006 | if (ifmgd->state != IEEE80211_STA_MLME_AUTHENTICATE) |
1201 | return; | 1007 | return; |
1202 | 1008 | ||
1203 | if (len < 24 + 6) | 1009 | if (len < 24 + 6) |
1204 | return; | 1010 | return; |
1205 | 1011 | ||
1206 | if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) | 1012 | if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN) != 0) |
1207 | return; | 1013 | return; |
1208 | 1014 | ||
1209 | if (memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) | 1015 | if (memcmp(ifmgd->bssid, mgmt->bssid, ETH_ALEN) != 0) |
1210 | return; | 1016 | return; |
1211 | 1017 | ||
1212 | auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); | 1018 | auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); |
1213 | auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); | 1019 | auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); |
1214 | status_code = le16_to_cpu(mgmt->u.auth.status_code); | 1020 | status_code = le16_to_cpu(mgmt->u.auth.status_code); |
1215 | 1021 | ||
1216 | if (auth_alg != ifsta->auth_alg || | 1022 | if (auth_alg != ifmgd->auth_alg || |
1217 | auth_transaction != ifsta->auth_transaction) | 1023 | auth_transaction != ifmgd->auth_transaction) |
1218 | return; | 1024 | return; |
1219 | 1025 | ||
1220 | if (status_code != WLAN_STATUS_SUCCESS) { | 1026 | if (status_code != WLAN_STATUS_SUCCESS) { |
@@ -1223,15 +1029,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, | |||
1223 | const int num_algs = ARRAY_SIZE(algs); | 1029 | const int num_algs = ARRAY_SIZE(algs); |
1224 | int i, pos; | 1030 | int i, pos; |
1225 | algs[0] = algs[1] = algs[2] = 0xff; | 1031 | algs[0] = algs[1] = algs[2] = 0xff; |
1226 | if (ifsta->auth_algs & IEEE80211_AUTH_ALG_OPEN) | 1032 | if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_OPEN) |
1227 | algs[0] = WLAN_AUTH_OPEN; | 1033 | algs[0] = WLAN_AUTH_OPEN; |
1228 | if (ifsta->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY) | 1034 | if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY) |
1229 | algs[1] = WLAN_AUTH_SHARED_KEY; | 1035 | algs[1] = WLAN_AUTH_SHARED_KEY; |
1230 | if (ifsta->auth_algs & IEEE80211_AUTH_ALG_LEAP) | 1036 | if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_LEAP) |
1231 | algs[2] = WLAN_AUTH_LEAP; | 1037 | algs[2] = WLAN_AUTH_LEAP; |
1232 | if (ifsta->auth_alg == WLAN_AUTH_OPEN) | 1038 | if (ifmgd->auth_alg == WLAN_AUTH_OPEN) |
1233 | pos = 0; | 1039 | pos = 0; |
1234 | else if (ifsta->auth_alg == WLAN_AUTH_SHARED_KEY) | 1040 | else if (ifmgd->auth_alg == WLAN_AUTH_SHARED_KEY) |
1235 | pos = 1; | 1041 | pos = 1; |
1236 | else | 1042 | else |
1237 | pos = 2; | 1043 | pos = 2; |
@@ -1239,101 +1045,101 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, | |||
1239 | pos++; | 1045 | pos++; |
1240 | if (pos >= num_algs) | 1046 | if (pos >= num_algs) |
1241 | pos = 0; | 1047 | pos = 0; |
1242 | if (algs[pos] == ifsta->auth_alg || | 1048 | if (algs[pos] == ifmgd->auth_alg || |
1243 | algs[pos] == 0xff) | 1049 | algs[pos] == 0xff) |
1244 | continue; | 1050 | continue; |
1245 | if (algs[pos] == WLAN_AUTH_SHARED_KEY && | 1051 | if (algs[pos] == WLAN_AUTH_SHARED_KEY && |
1246 | !ieee80211_sta_wep_configured(sdata)) | 1052 | !ieee80211_sta_wep_configured(sdata)) |
1247 | continue; | 1053 | continue; |
1248 | ifsta->auth_alg = algs[pos]; | 1054 | ifmgd->auth_alg = algs[pos]; |
1249 | break; | 1055 | break; |
1250 | } | 1056 | } |
1251 | } | 1057 | } |
1252 | return; | 1058 | return; |
1253 | } | 1059 | } |
1254 | 1060 | ||
1255 | switch (ifsta->auth_alg) { | 1061 | switch (ifmgd->auth_alg) { |
1256 | case WLAN_AUTH_OPEN: | 1062 | case WLAN_AUTH_OPEN: |
1257 | case WLAN_AUTH_LEAP: | 1063 | case WLAN_AUTH_LEAP: |
1258 | ieee80211_auth_completed(sdata, ifsta); | 1064 | ieee80211_auth_completed(sdata); |
1259 | break; | 1065 | break; |
1260 | case WLAN_AUTH_SHARED_KEY: | 1066 | case WLAN_AUTH_SHARED_KEY: |
1261 | if (ifsta->auth_transaction == 4) | 1067 | if (ifmgd->auth_transaction == 4) |
1262 | ieee80211_auth_completed(sdata, ifsta); | 1068 | ieee80211_auth_completed(sdata); |
1263 | else | 1069 | else |
1264 | ieee80211_auth_challenge(sdata, ifsta, mgmt, len); | 1070 | ieee80211_auth_challenge(sdata, mgmt, len); |
1265 | break; | 1071 | break; |
1266 | } | 1072 | } |
1267 | } | 1073 | } |
1268 | 1074 | ||
1269 | 1075 | ||
1270 | static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, | 1076 | static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, |
1271 | struct ieee80211_if_sta *ifsta, | ||
1272 | struct ieee80211_mgmt *mgmt, | 1077 | struct ieee80211_mgmt *mgmt, |
1273 | size_t len) | 1078 | size_t len) |
1274 | { | 1079 | { |
1080 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
1275 | u16 reason_code; | 1081 | u16 reason_code; |
1276 | 1082 | ||
1277 | if (len < 24 + 2) | 1083 | if (len < 24 + 2) |
1278 | return; | 1084 | return; |
1279 | 1085 | ||
1280 | if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN)) | 1086 | if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN)) |
1281 | return; | 1087 | return; |
1282 | 1088 | ||
1283 | reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); | 1089 | reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); |
1284 | 1090 | ||
1285 | if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) | 1091 | if (ifmgd->flags & IEEE80211_STA_AUTHENTICATED) |
1286 | printk(KERN_DEBUG "%s: deauthenticated (Reason: %u)\n", | 1092 | printk(KERN_DEBUG "%s: deauthenticated (Reason: %u)\n", |
1287 | sdata->dev->name, reason_code); | 1093 | sdata->dev->name, reason_code); |
1288 | 1094 | ||
1289 | if (ifsta->state == IEEE80211_STA_MLME_AUTHENTICATE || | 1095 | if (ifmgd->state == IEEE80211_STA_MLME_AUTHENTICATE || |
1290 | ifsta->state == IEEE80211_STA_MLME_ASSOCIATE || | 1096 | ifmgd->state == IEEE80211_STA_MLME_ASSOCIATE || |
1291 | ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { | 1097 | ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) { |
1292 | ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; | 1098 | ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE; |
1293 | mod_timer(&ifsta->timer, jiffies + | 1099 | mod_timer(&ifmgd->timer, jiffies + |
1294 | IEEE80211_RETRY_AUTH_INTERVAL); | 1100 | IEEE80211_RETRY_AUTH_INTERVAL); |
1295 | } | 1101 | } |
1296 | 1102 | ||
1297 | ieee80211_set_disassoc(sdata, ifsta, true, false, 0); | 1103 | ieee80211_set_disassoc(sdata, true, false, 0); |
1298 | ifsta->flags &= ~IEEE80211_STA_AUTHENTICATED; | 1104 | ifmgd->flags &= ~IEEE80211_STA_AUTHENTICATED; |
1299 | } | 1105 | } |
1300 | 1106 | ||
1301 | 1107 | ||
1302 | static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, | 1108 | static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, |
1303 | struct ieee80211_if_sta *ifsta, | ||
1304 | struct ieee80211_mgmt *mgmt, | 1109 | struct ieee80211_mgmt *mgmt, |
1305 | size_t len) | 1110 | size_t len) |
1306 | { | 1111 | { |
1112 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
1307 | u16 reason_code; | 1113 | u16 reason_code; |
1308 | 1114 | ||
1309 | if (len < 24 + 2) | 1115 | if (len < 24 + 2) |
1310 | return; | 1116 | return; |
1311 | 1117 | ||
1312 | if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN)) | 1118 | if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN)) |
1313 | return; | 1119 | return; |
1314 | 1120 | ||
1315 | reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); | 1121 | reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); |
1316 | 1122 | ||
1317 | if (ifsta->flags & IEEE80211_STA_ASSOCIATED) | 1123 | if (ifmgd->flags & IEEE80211_STA_ASSOCIATED) |
1318 | printk(KERN_DEBUG "%s: disassociated (Reason: %u)\n", | 1124 | printk(KERN_DEBUG "%s: disassociated (Reason: %u)\n", |
1319 | sdata->dev->name, reason_code); | 1125 | sdata->dev->name, reason_code); |
1320 | 1126 | ||
1321 | if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { | 1127 | if (ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) { |
1322 | ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; | 1128 | ifmgd->state = IEEE80211_STA_MLME_ASSOCIATE; |
1323 | mod_timer(&ifsta->timer, jiffies + | 1129 | mod_timer(&ifmgd->timer, jiffies + |
1324 | IEEE80211_RETRY_AUTH_INTERVAL); | 1130 | IEEE80211_RETRY_AUTH_INTERVAL); |
1325 | } | 1131 | } |
1326 | 1132 | ||
1327 | ieee80211_set_disassoc(sdata, ifsta, false, false, reason_code); | 1133 | ieee80211_set_disassoc(sdata, false, false, reason_code); |
1328 | } | 1134 | } |
1329 | 1135 | ||
1330 | 1136 | ||
1331 | static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, | 1137 | static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, |
1332 | struct ieee80211_if_sta *ifsta, | ||
1333 | struct ieee80211_mgmt *mgmt, | 1138 | struct ieee80211_mgmt *mgmt, |
1334 | size_t len, | 1139 | size_t len, |
1335 | int reassoc) | 1140 | int reassoc) |
1336 | { | 1141 | { |
1142 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
1337 | struct ieee80211_local *local = sdata->local; | 1143 | struct ieee80211_local *local = sdata->local; |
1338 | struct ieee80211_supported_band *sband; | 1144 | struct ieee80211_supported_band *sband; |
1339 | struct sta_info *sta; | 1145 | struct sta_info *sta; |
@@ -1350,13 +1156,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, | |||
1350 | /* AssocResp and ReassocResp have identical structure, so process both | 1156 | /* AssocResp and ReassocResp have identical structure, so process both |
1351 | * of them in this function. */ | 1157 | * of them in this function. */ |
1352 | 1158 | ||
1353 | if (ifsta->state != IEEE80211_STA_MLME_ASSOCIATE) | 1159 | if (ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE) |
1354 | return; | 1160 | return; |
1355 | 1161 | ||
1356 | if (len < 24 + 6) | 1162 | if (len < 24 + 6) |
1357 | return; | 1163 | return; |
1358 | 1164 | ||
1359 | if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) | 1165 | if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN) != 0) |
1360 | return; | 1166 | return; |
1361 | 1167 | ||
1362 | capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); | 1168 | capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); |
@@ -1381,7 +1187,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, | |||
1381 | "comeback duration %u TU (%u ms)\n", | 1187 | "comeback duration %u TU (%u ms)\n", |
1382 | sdata->dev->name, tu, ms); | 1188 | sdata->dev->name, tu, ms); |
1383 | if (ms > IEEE80211_ASSOC_TIMEOUT) | 1189 | if (ms > IEEE80211_ASSOC_TIMEOUT) |
1384 | mod_timer(&ifsta->timer, | 1190 | mod_timer(&ifmgd->timer, |
1385 | jiffies + msecs_to_jiffies(ms)); | 1191 | jiffies + msecs_to_jiffies(ms)); |
1386 | return; | 1192 | return; |
1387 | } | 1193 | } |
@@ -1392,7 +1198,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, | |||
1392 | /* if this was a reassociation, ensure we try a "full" | 1198 | /* if this was a reassociation, ensure we try a "full" |
1393 | * association next time. This works around some broken APs | 1199 | * association next time. This works around some broken APs |
1394 | * which do not correctly reject reassociation requests. */ | 1200 | * which do not correctly reject reassociation requests. */ |
1395 | ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; | 1201 | ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET; |
1396 | return; | 1202 | return; |
1397 | } | 1203 | } |
1398 | 1204 | ||
@@ -1408,23 +1214,23 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, | |||
1408 | } | 1214 | } |
1409 | 1215 | ||
1410 | printk(KERN_DEBUG "%s: associated\n", sdata->dev->name); | 1216 | printk(KERN_DEBUG "%s: associated\n", sdata->dev->name); |
1411 | ifsta->aid = aid; | 1217 | ifmgd->aid = aid; |
1412 | ifsta->ap_capab = capab_info; | 1218 | ifmgd->ap_capab = capab_info; |
1413 | 1219 | ||
1414 | kfree(ifsta->assocresp_ies); | 1220 | kfree(ifmgd->assocresp_ies); |
1415 | ifsta->assocresp_ies_len = len - (pos - (u8 *) mgmt); | 1221 | ifmgd->assocresp_ies_len = len - (pos - (u8 *) mgmt); |
1416 | ifsta->assocresp_ies = kmalloc(ifsta->assocresp_ies_len, GFP_KERNEL); | 1222 | ifmgd->assocresp_ies = kmalloc(ifmgd->assocresp_ies_len, GFP_KERNEL); |
1417 | if (ifsta->assocresp_ies) | 1223 | if (ifmgd->assocresp_ies) |
1418 | memcpy(ifsta->assocresp_ies, pos, ifsta->assocresp_ies_len); | 1224 | memcpy(ifmgd->assocresp_ies, pos, ifmgd->assocresp_ies_len); |
1419 | 1225 | ||
1420 | rcu_read_lock(); | 1226 | rcu_read_lock(); |
1421 | 1227 | ||
1422 | /* Add STA entry for the AP */ | 1228 | /* Add STA entry for the AP */ |
1423 | sta = sta_info_get(local, ifsta->bssid); | 1229 | sta = sta_info_get(local, ifmgd->bssid); |
1424 | if (!sta) { | 1230 | if (!sta) { |
1425 | newsta = true; | 1231 | newsta = true; |
1426 | 1232 | ||
1427 | sta = sta_info_alloc(sdata, ifsta->bssid, GFP_ATOMIC); | 1233 | sta = sta_info_alloc(sdata, ifmgd->bssid, GFP_ATOMIC); |
1428 | if (!sta) { | 1234 | if (!sta) { |
1429 | printk(KERN_DEBUG "%s: failed to alloc STA entry for" | 1235 | printk(KERN_DEBUG "%s: failed to alloc STA entry for" |
1430 | " the AP\n", sdata->dev->name); | 1236 | " the AP\n", sdata->dev->name); |
@@ -1497,7 +1303,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, | |||
1497 | else | 1303 | else |
1498 | sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; | 1304 | sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; |
1499 | 1305 | ||
1500 | if (elems.ht_cap_elem) | 1306 | /* If TKIP/WEP is used, no need to parse AP's HT capabilities */ |
1307 | if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED)) | ||
1501 | ieee80211_ht_cap_ie_to_sta_ht_cap(sband, | 1308 | ieee80211_ht_cap_ie_to_sta_ht_cap(sband, |
1502 | elems.ht_cap_elem, &sta->sta.ht_cap); | 1309 | elems.ht_cap_elem, &sta->sta.ht_cap); |
1503 | 1310 | ||
@@ -1505,7 +1312,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, | |||
1505 | 1312 | ||
1506 | rate_control_rate_init(sta); | 1313 | rate_control_rate_init(sta); |
1507 | 1314 | ||
1508 | if (ifsta->flags & IEEE80211_STA_MFP_ENABLED) | 1315 | if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) |
1509 | set_sta_flags(sta, WLAN_STA_MFP); | 1316 | set_sta_flags(sta, WLAN_STA_MFP); |
1510 | 1317 | ||
1511 | if (elems.wmm_param) | 1318 | if (elems.wmm_param) |
@@ -1524,11 +1331,12 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, | |||
1524 | rcu_read_unlock(); | 1331 | rcu_read_unlock(); |
1525 | 1332 | ||
1526 | if (elems.wmm_param) | 1333 | if (elems.wmm_param) |
1527 | ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, | 1334 | ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param, |
1528 | elems.wmm_param_len); | 1335 | elems.wmm_param_len); |
1529 | 1336 | ||
1530 | if (elems.ht_info_elem && elems.wmm_param && | 1337 | if (elems.ht_info_elem && elems.wmm_param && |
1531 | (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) | 1338 | (ifmgd->flags & IEEE80211_STA_WMM_ENABLED) && |
1339 | !(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED)) | ||
1532 | changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, | 1340 | changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, |
1533 | ap_ht_cap_flags); | 1341 | ap_ht_cap_flags); |
1534 | 1342 | ||
@@ -1536,163 +1344,12 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, | |||
1536 | * ieee80211_set_associated() will tell the driver */ | 1344 | * ieee80211_set_associated() will tell the driver */ |
1537 | bss_conf->aid = aid; | 1345 | bss_conf->aid = aid; |
1538 | bss_conf->assoc_capability = capab_info; | 1346 | bss_conf->assoc_capability = capab_info; |
1539 | ieee80211_set_associated(sdata, ifsta, changed); | 1347 | ieee80211_set_associated(sdata, changed); |
1540 | 1348 | ||
1541 | ieee80211_associated(sdata, ifsta); | 1349 | ieee80211_associated(sdata); |
1542 | } | 1350 | } |
1543 | 1351 | ||
1544 | 1352 | ||
1545 | static int __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, | ||
1546 | struct ieee80211_if_sta *ifsta, | ||
1547 | const u8 *bssid, const int beacon_int, | ||
1548 | const int freq, | ||
1549 | const size_t supp_rates_len, | ||
1550 | const u8 *supp_rates, | ||
1551 | const u16 capability) | ||
1552 | { | ||
1553 | struct ieee80211_local *local = sdata->local; | ||
1554 | int res = 0, rates, i, j; | ||
1555 | struct sk_buff *skb; | ||
1556 | struct ieee80211_mgmt *mgmt; | ||
1557 | u8 *pos; | ||
1558 | struct ieee80211_supported_band *sband; | ||
1559 | union iwreq_data wrqu; | ||
1560 | |||
1561 | if (local->ops->reset_tsf) { | ||
1562 | /* Reset own TSF to allow time synchronization work. */ | ||
1563 | local->ops->reset_tsf(local_to_hw(local)); | ||
1564 | } | ||
1565 | |||
1566 | if ((ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) && | ||
1567 | memcmp(ifsta->bssid, bssid, ETH_ALEN) == 0) | ||
1568 | return res; | ||
1569 | |||
1570 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 + | ||
1571 | sdata->u.sta.ie_proberesp_len); | ||
1572 | if (!skb) { | ||
1573 | printk(KERN_DEBUG "%s: failed to allocate buffer for probe " | ||
1574 | "response\n", sdata->dev->name); | ||
1575 | return -ENOMEM; | ||
1576 | } | ||
1577 | |||
1578 | if (!(ifsta->flags & IEEE80211_STA_PREV_BSSID_SET)) { | ||
1579 | /* Remove possible STA entries from other IBSS networks. */ | ||
1580 | sta_info_flush_delayed(sdata); | ||
1581 | } | ||
1582 | |||
1583 | memcpy(ifsta->bssid, bssid, ETH_ALEN); | ||
1584 | res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); | ||
1585 | if (res) | ||
1586 | return res; | ||
1587 | |||
1588 | local->hw.conf.beacon_int = beacon_int >= 10 ? beacon_int : 10; | ||
1589 | |||
1590 | sdata->drop_unencrypted = capability & | ||
1591 | WLAN_CAPABILITY_PRIVACY ? 1 : 0; | ||
1592 | |||
1593 | res = ieee80211_set_freq(sdata, freq); | ||
1594 | |||
1595 | if (res) | ||
1596 | return res; | ||
1597 | |||
1598 | sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; | ||
1599 | |||
1600 | /* Build IBSS probe response */ | ||
1601 | |||
1602 | skb_reserve(skb, local->hw.extra_tx_headroom); | ||
1603 | |||
1604 | mgmt = (struct ieee80211_mgmt *) | ||
1605 | skb_put(skb, 24 + sizeof(mgmt->u.beacon)); | ||
1606 | memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); | ||
1607 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | | ||
1608 | IEEE80211_STYPE_PROBE_RESP); | ||
1609 | memset(mgmt->da, 0xff, ETH_ALEN); | ||
1610 | memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); | ||
1611 | memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); | ||
1612 | mgmt->u.beacon.beacon_int = | ||
1613 | cpu_to_le16(local->hw.conf.beacon_int); | ||
1614 | mgmt->u.beacon.capab_info = cpu_to_le16(capability); | ||
1615 | |||
1616 | pos = skb_put(skb, 2 + ifsta->ssid_len); | ||
1617 | *pos++ = WLAN_EID_SSID; | ||
1618 | *pos++ = ifsta->ssid_len; | ||
1619 | memcpy(pos, ifsta->ssid, ifsta->ssid_len); | ||
1620 | |||
1621 | rates = supp_rates_len; | ||
1622 | if (rates > 8) | ||
1623 | rates = 8; | ||
1624 | pos = skb_put(skb, 2 + rates); | ||
1625 | *pos++ = WLAN_EID_SUPP_RATES; | ||
1626 | *pos++ = rates; | ||
1627 | memcpy(pos, supp_rates, rates); | ||
1628 | |||
1629 | if (sband->band == IEEE80211_BAND_2GHZ) { | ||
1630 | pos = skb_put(skb, 2 + 1); | ||
1631 | *pos++ = WLAN_EID_DS_PARAMS; | ||
1632 | *pos++ = 1; | ||
1633 | *pos++ = ieee80211_frequency_to_channel(freq); | ||
1634 | } | ||
1635 | |||
1636 | pos = skb_put(skb, 2 + 2); | ||
1637 | *pos++ = WLAN_EID_IBSS_PARAMS; | ||
1638 | *pos++ = 2; | ||
1639 | /* FIX: set ATIM window based on scan results */ | ||
1640 | *pos++ = 0; | ||
1641 | *pos++ = 0; | ||
1642 | |||
1643 | if (supp_rates_len > 8) { | ||
1644 | rates = supp_rates_len - 8; | ||
1645 | pos = skb_put(skb, 2 + rates); | ||
1646 | *pos++ = WLAN_EID_EXT_SUPP_RATES; | ||
1647 | *pos++ = rates; | ||
1648 | memcpy(pos, &supp_rates[8], rates); | ||
1649 | } | ||
1650 | |||
1651 | add_extra_ies(skb, sdata->u.sta.ie_proberesp, | ||
1652 | sdata->u.sta.ie_proberesp_len); | ||
1653 | |||
1654 | ifsta->probe_resp = skb; | ||
1655 | |||
1656 | ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON | | ||
1657 | IEEE80211_IFCC_BEACON_ENABLED); | ||
1658 | |||
1659 | |||
1660 | rates = 0; | ||
1661 | for (i = 0; i < supp_rates_len; i++) { | ||
1662 | int bitrate = (supp_rates[i] & 0x7f) * 5; | ||
1663 | for (j = 0; j < sband->n_bitrates; j++) | ||
1664 | if (sband->bitrates[j].bitrate == bitrate) | ||
1665 | rates |= BIT(j); | ||
1666 | } | ||
1667 | ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates; | ||
1668 | |||
1669 | ieee80211_sta_def_wmm_params(sdata, supp_rates_len, supp_rates); | ||
1670 | |||
1671 | ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; | ||
1672 | ifsta->state = IEEE80211_STA_MLME_IBSS_JOINED; | ||
1673 | mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); | ||
1674 | |||
1675 | ieee80211_led_assoc(local, true); | ||
1676 | |||
1677 | memset(&wrqu, 0, sizeof(wrqu)); | ||
1678 | memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); | ||
1679 | wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); | ||
1680 | |||
1681 | return res; | ||
1682 | } | ||
1683 | |||
1684 | static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, | ||
1685 | struct ieee80211_if_sta *ifsta, | ||
1686 | struct ieee80211_bss *bss) | ||
1687 | { | ||
1688 | return __ieee80211_sta_join_ibss(sdata, ifsta, | ||
1689 | bss->cbss.bssid, | ||
1690 | bss->cbss.beacon_interval, | ||
1691 | bss->cbss.channel->center_freq, | ||
1692 | bss->supp_rates_len, bss->supp_rates, | ||
1693 | bss->cbss.capability); | ||
1694 | } | ||
1695 | |||
1696 | static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, | 1353 | static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, |
1697 | struct ieee80211_mgmt *mgmt, | 1354 | struct ieee80211_mgmt *mgmt, |
1698 | size_t len, | 1355 | size_t len, |
@@ -1703,11 +1360,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, | |||
1703 | struct ieee80211_local *local = sdata->local; | 1360 | struct ieee80211_local *local = sdata->local; |
1704 | int freq; | 1361 | int freq; |
1705 | struct ieee80211_bss *bss; | 1362 | struct ieee80211_bss *bss; |
1706 | struct sta_info *sta; | ||
1707 | struct ieee80211_channel *channel; | 1363 | struct ieee80211_channel *channel; |
1708 | u64 beacon_timestamp, rx_timestamp; | ||
1709 | u32 supp_rates = 0; | ||
1710 | enum ieee80211_band band = rx_status->band; | ||
1711 | 1364 | ||
1712 | if (elems->ds_params && elems->ds_params_len == 1) | 1365 | if (elems->ds_params && elems->ds_params_len == 1) |
1713 | freq = ieee80211_channel_to_frequency(elems->ds_params[0]); | 1366 | freq = ieee80211_channel_to_frequency(elems->ds_params[0]); |
@@ -1719,133 +1372,18 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, | |||
1719 | if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) | 1372 | if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) |
1720 | return; | 1373 | return; |
1721 | 1374 | ||
1722 | if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates && | ||
1723 | memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) { | ||
1724 | supp_rates = ieee80211_sta_get_rates(local, elems, band); | ||
1725 | |||
1726 | rcu_read_lock(); | ||
1727 | |||
1728 | sta = sta_info_get(local, mgmt->sa); | ||
1729 | if (sta) { | ||
1730 | u32 prev_rates; | ||
1731 | |||
1732 | prev_rates = sta->sta.supp_rates[band]; | ||
1733 | /* make sure mandatory rates are always added */ | ||
1734 | sta->sta.supp_rates[band] = supp_rates | | ||
1735 | ieee80211_mandatory_rates(local, band); | ||
1736 | |||
1737 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
1738 | if (sta->sta.supp_rates[band] != prev_rates) | ||
1739 | printk(KERN_DEBUG "%s: updated supp_rates set " | ||
1740 | "for %pM based on beacon info (0x%llx | " | ||
1741 | "0x%llx -> 0x%llx)\n", | ||
1742 | sdata->dev->name, | ||
1743 | sta->sta.addr, | ||
1744 | (unsigned long long) prev_rates, | ||
1745 | (unsigned long long) supp_rates, | ||
1746 | (unsigned long long) sta->sta.supp_rates[band]); | ||
1747 | #endif | ||
1748 | } else { | ||
1749 | ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); | ||
1750 | } | ||
1751 | |||
1752 | rcu_read_unlock(); | ||
1753 | } | ||
1754 | |||
1755 | bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, | 1375 | bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, |
1756 | channel, beacon); | 1376 | channel, beacon); |
1757 | if (!bss) | 1377 | if (!bss) |
1758 | return; | 1378 | return; |
1759 | 1379 | ||
1760 | if (elems->ch_switch_elem && (elems->ch_switch_elem_len == 3) && | 1380 | if (elems->ch_switch_elem && (elems->ch_switch_elem_len == 3) && |
1761 | (memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0)) { | 1381 | (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN) == 0)) { |
1762 | struct ieee80211_channel_sw_ie *sw_elem = | 1382 | struct ieee80211_channel_sw_ie *sw_elem = |
1763 | (struct ieee80211_channel_sw_ie *)elems->ch_switch_elem; | 1383 | (struct ieee80211_channel_sw_ie *)elems->ch_switch_elem; |
1764 | ieee80211_process_chanswitch(sdata, sw_elem, bss); | 1384 | ieee80211_process_chanswitch(sdata, sw_elem, bss); |
1765 | } | 1385 | } |
1766 | 1386 | ||
1767 | /* was just updated in ieee80211_bss_info_update */ | ||
1768 | beacon_timestamp = bss->cbss.tsf; | ||
1769 | |||
1770 | if (sdata->vif.type != NL80211_IFTYPE_ADHOC) | ||
1771 | goto put_bss; | ||
1772 | |||
1773 | /* check if we need to merge IBSS */ | ||
1774 | |||
1775 | /* merge only on beacons (???) */ | ||
1776 | if (!beacon) | ||
1777 | goto put_bss; | ||
1778 | |||
1779 | /* we use a fixed BSSID */ | ||
1780 | if (sdata->u.sta.flags & IEEE80211_STA_BSSID_SET) | ||
1781 | goto put_bss; | ||
1782 | |||
1783 | /* not an IBSS */ | ||
1784 | if (!(bss->cbss.capability & WLAN_CAPABILITY_IBSS)) | ||
1785 | goto put_bss; | ||
1786 | |||
1787 | /* different channel */ | ||
1788 | if (bss->cbss.channel != local->oper_channel) | ||
1789 | goto put_bss; | ||
1790 | |||
1791 | /* different SSID */ | ||
1792 | if (elems->ssid_len != sdata->u.sta.ssid_len || | ||
1793 | memcmp(elems->ssid, sdata->u.sta.ssid, | ||
1794 | sdata->u.sta.ssid_len)) | ||
1795 | goto put_bss; | ||
1796 | |||
1797 | if (rx_status->flag & RX_FLAG_TSFT) { | ||
1798 | /* | ||
1799 | * For correct IBSS merging we need mactime; since mactime is | ||
1800 | * defined as the time the first data symbol of the frame hits | ||
1801 | * the PHY, and the timestamp of the beacon is defined as "the | ||
1802 | * time that the data symbol containing the first bit of the | ||
1803 | * timestamp is transmitted to the PHY plus the transmitting | ||
1804 | * STA's delays through its local PHY from the MAC-PHY | ||
1805 | * interface to its interface with the WM" (802.11 11.1.2) | ||
1806 | * - equals the time this bit arrives at the receiver - we have | ||
1807 | * to take into account the offset between the two. | ||
1808 | * | ||
1809 | * E.g. at 1 MBit that means mactime is 192 usec earlier | ||
1810 | * (=24 bytes * 8 usecs/byte) than the beacon timestamp. | ||
1811 | */ | ||
1812 | int rate; | ||
1813 | |||
1814 | if (rx_status->flag & RX_FLAG_HT) | ||
1815 | rate = 65; /* TODO: HT rates */ | ||
1816 | else | ||
1817 | rate = local->hw.wiphy->bands[band]-> | ||
1818 | bitrates[rx_status->rate_idx].bitrate; | ||
1819 | |||
1820 | rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate); | ||
1821 | } else if (local && local->ops && local->ops->get_tsf) | ||
1822 | /* second best option: get current TSF */ | ||
1823 | rx_timestamp = local->ops->get_tsf(local_to_hw(local)); | ||
1824 | else | ||
1825 | /* can't merge without knowing the TSF */ | ||
1826 | rx_timestamp = -1LLU; | ||
1827 | |||
1828 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
1829 | printk(KERN_DEBUG "RX beacon SA=%pM BSSID=" | ||
1830 | "%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", | ||
1831 | mgmt->sa, mgmt->bssid, | ||
1832 | (unsigned long long)rx_timestamp, | ||
1833 | (unsigned long long)beacon_timestamp, | ||
1834 | (unsigned long long)(rx_timestamp - beacon_timestamp), | ||
1835 | jiffies); | ||
1836 | #endif | ||
1837 | |||
1838 | if (beacon_timestamp > rx_timestamp) { | ||
1839 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
1840 | printk(KERN_DEBUG "%s: beacon TSF higher than " | ||
1841 | "local TSF - IBSS merge with BSSID %pM\n", | ||
1842 | sdata->dev->name, mgmt->bssid); | ||
1843 | #endif | ||
1844 | ieee80211_sta_join_ibss(sdata, &sdata->u.sta, bss); | ||
1845 | ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); | ||
1846 | } | ||
1847 | |||
1848 | put_bss: | ||
1849 | ieee80211_rx_bss_put(local, bss); | 1387 | ieee80211_rx_bss_put(local, bss); |
1850 | } | 1388 | } |
1851 | 1389 | ||
@@ -1857,7 +1395,6 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, | |||
1857 | { | 1395 | { |
1858 | size_t baselen; | 1396 | size_t baselen; |
1859 | struct ieee802_11_elems elems; | 1397 | struct ieee802_11_elems elems; |
1860 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | ||
1861 | 1398 | ||
1862 | if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN)) | 1399 | if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN)) |
1863 | return; /* ignore ProbeResp to foreign address */ | 1400 | return; /* ignore ProbeResp to foreign address */ |
@@ -1873,20 +1410,19 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, | |||
1873 | 1410 | ||
1874 | /* direct probe may be part of the association flow */ | 1411 | /* direct probe may be part of the association flow */ |
1875 | if (test_and_clear_bit(IEEE80211_STA_REQ_DIRECT_PROBE, | 1412 | if (test_and_clear_bit(IEEE80211_STA_REQ_DIRECT_PROBE, |
1876 | &ifsta->request)) { | 1413 | &sdata->u.mgd.request)) { |
1877 | printk(KERN_DEBUG "%s direct probe responded\n", | 1414 | printk(KERN_DEBUG "%s direct probe responded\n", |
1878 | sdata->dev->name); | 1415 | sdata->dev->name); |
1879 | ieee80211_authenticate(sdata, ifsta); | 1416 | ieee80211_authenticate(sdata); |
1880 | } | 1417 | } |
1881 | } | 1418 | } |
1882 | 1419 | ||
1883 | |||
1884 | static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, | 1420 | static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, |
1885 | struct ieee80211_mgmt *mgmt, | 1421 | struct ieee80211_mgmt *mgmt, |
1886 | size_t len, | 1422 | size_t len, |
1887 | struct ieee80211_rx_status *rx_status) | 1423 | struct ieee80211_rx_status *rx_status) |
1888 | { | 1424 | { |
1889 | struct ieee80211_if_sta *ifsta; | 1425 | struct ieee80211_if_managed *ifmgd; |
1890 | size_t baselen; | 1426 | size_t baselen; |
1891 | struct ieee802_11_elems elems; | 1427 | struct ieee802_11_elems elems; |
1892 | struct ieee80211_local *local = sdata->local; | 1428 | struct ieee80211_local *local = sdata->local; |
@@ -1905,21 +1441,21 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, | |||
1905 | 1441 | ||
1906 | if (sdata->vif.type != NL80211_IFTYPE_STATION) | 1442 | if (sdata->vif.type != NL80211_IFTYPE_STATION) |
1907 | return; | 1443 | return; |
1908 | ifsta = &sdata->u.sta; | ||
1909 | 1444 | ||
1910 | if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED) || | 1445 | ifmgd = &sdata->u.mgd; |
1911 | memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) | 1446 | |
1447 | if (!(ifmgd->flags & IEEE80211_STA_ASSOCIATED) || | ||
1448 | memcmp(ifmgd->bssid, mgmt->bssid, ETH_ALEN) != 0) | ||
1912 | return; | 1449 | return; |
1913 | 1450 | ||
1914 | if (rx_status->freq != local->hw.conf.channel->center_freq) | 1451 | if (rx_status->freq != local->hw.conf.channel->center_freq) |
1915 | return; | 1452 | return; |
1916 | 1453 | ||
1917 | ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, | 1454 | ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param, |
1918 | elems.wmm_param_len); | 1455 | elems.wmm_param_len); |
1919 | 1456 | ||
1920 | if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK && | 1457 | if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) { |
1921 | local->hw.conf.flags & IEEE80211_CONF_PS) { | 1458 | directed_tim = ieee80211_check_tim(&elems, ifmgd->aid); |
1922 | directed_tim = ieee80211_check_tim(&elems, ifsta->aid); | ||
1923 | 1459 | ||
1924 | if (directed_tim) { | 1460 | if (directed_tim) { |
1925 | if (local->hw.conf.dynamic_ps_timeout > 0) { | 1461 | if (local->hw.conf.dynamic_ps_timeout > 0) { |
@@ -1954,14 +1490,15 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, | |||
1954 | erp_valid, erp_value); | 1490 | erp_valid, erp_value); |
1955 | 1491 | ||
1956 | 1492 | ||
1957 | if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param) { | 1493 | if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param && |
1494 | !(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED)) { | ||
1958 | struct sta_info *sta; | 1495 | struct sta_info *sta; |
1959 | struct ieee80211_supported_band *sband; | 1496 | struct ieee80211_supported_band *sband; |
1960 | u16 ap_ht_cap_flags; | 1497 | u16 ap_ht_cap_flags; |
1961 | 1498 | ||
1962 | rcu_read_lock(); | 1499 | rcu_read_lock(); |
1963 | 1500 | ||
1964 | sta = sta_info_get(local, ifsta->bssid); | 1501 | sta = sta_info_get(local, ifmgd->bssid); |
1965 | if (!sta) { | 1502 | if (!sta) { |
1966 | rcu_read_unlock(); | 1503 | rcu_read_unlock(); |
1967 | return; | 1504 | return; |
@@ -1997,85 +1534,16 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, | |||
1997 | ieee80211_bss_info_change_notify(sdata, changed); | 1534 | ieee80211_bss_info_change_notify(sdata, changed); |
1998 | } | 1535 | } |
1999 | 1536 | ||
2000 | 1537 | ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, | |
2001 | static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, | 1538 | struct sk_buff *skb, |
2002 | struct ieee80211_if_sta *ifsta, | 1539 | struct ieee80211_rx_status *rx_status) |
2003 | struct ieee80211_mgmt *mgmt, | ||
2004 | size_t len) | ||
2005 | { | 1540 | { |
2006 | struct ieee80211_local *local = sdata->local; | 1541 | struct ieee80211_local *local = sdata->local; |
2007 | int tx_last_beacon; | ||
2008 | struct sk_buff *skb; | ||
2009 | struct ieee80211_mgmt *resp; | ||
2010 | u8 *pos, *end; | ||
2011 | |||
2012 | if (ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED || | ||
2013 | len < 24 + 2 || !ifsta->probe_resp) | ||
2014 | return; | ||
2015 | |||
2016 | if (local->ops->tx_last_beacon) | ||
2017 | tx_last_beacon = local->ops->tx_last_beacon(local_to_hw(local)); | ||
2018 | else | ||
2019 | tx_last_beacon = 1; | ||
2020 | |||
2021 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
2022 | printk(KERN_DEBUG "%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM" | ||
2023 | " (tx_last_beacon=%d)\n", | ||
2024 | sdata->dev->name, mgmt->sa, mgmt->da, | ||
2025 | mgmt->bssid, tx_last_beacon); | ||
2026 | #endif /* CONFIG_MAC80211_IBSS_DEBUG */ | ||
2027 | |||
2028 | if (!tx_last_beacon) | ||
2029 | return; | ||
2030 | |||
2031 | if (memcmp(mgmt->bssid, ifsta->bssid, ETH_ALEN) != 0 && | ||
2032 | memcmp(mgmt->bssid, "\xff\xff\xff\xff\xff\xff", ETH_ALEN) != 0) | ||
2033 | return; | ||
2034 | |||
2035 | end = ((u8 *) mgmt) + len; | ||
2036 | pos = mgmt->u.probe_req.variable; | ||
2037 | if (pos[0] != WLAN_EID_SSID || | ||
2038 | pos + 2 + pos[1] > end) { | ||
2039 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
2040 | printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq " | ||
2041 | "from %pM\n", | ||
2042 | sdata->dev->name, mgmt->sa); | ||
2043 | #endif | ||
2044 | return; | ||
2045 | } | ||
2046 | if (pos[1] != 0 && | ||
2047 | (pos[1] != ifsta->ssid_len || | ||
2048 | memcmp(pos + 2, ifsta->ssid, ifsta->ssid_len) != 0)) { | ||
2049 | /* Ignore ProbeReq for foreign SSID */ | ||
2050 | return; | ||
2051 | } | ||
2052 | |||
2053 | /* Reply with ProbeResp */ | ||
2054 | skb = skb_copy(ifsta->probe_resp, GFP_KERNEL); | ||
2055 | if (!skb) | ||
2056 | return; | ||
2057 | |||
2058 | resp = (struct ieee80211_mgmt *) skb->data; | ||
2059 | memcpy(resp->da, mgmt->sa, ETH_ALEN); | ||
2060 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
2061 | printk(KERN_DEBUG "%s: Sending ProbeResp to %pM\n", | ||
2062 | sdata->dev->name, resp->da); | ||
2063 | #endif /* CONFIG_MAC80211_IBSS_DEBUG */ | ||
2064 | ieee80211_tx_skb(sdata, skb, 0); | ||
2065 | } | ||
2066 | |||
2067 | void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, | ||
2068 | struct ieee80211_rx_status *rx_status) | ||
2069 | { | ||
2070 | struct ieee80211_local *local = sdata->local; | ||
2071 | struct ieee80211_if_sta *ifsta; | ||
2072 | struct ieee80211_mgmt *mgmt; | 1542 | struct ieee80211_mgmt *mgmt; |
2073 | u16 fc; | 1543 | u16 fc; |
2074 | 1544 | ||
2075 | if (skb->len < 24) | 1545 | if (skb->len < 24) |
2076 | goto fail; | 1546 | return RX_DROP_MONITOR; |
2077 | |||
2078 | ifsta = &sdata->u.sta; | ||
2079 | 1547 | ||
2080 | mgmt = (struct ieee80211_mgmt *) skb->data; | 1548 | mgmt = (struct ieee80211_mgmt *) skb->data; |
2081 | fc = le16_to_cpu(mgmt->frame_control); | 1549 | fc = le16_to_cpu(mgmt->frame_control); |
@@ -2090,147 +1558,68 @@ void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff * | |||
2090 | case IEEE80211_STYPE_REASSOC_RESP: | 1558 | case IEEE80211_STYPE_REASSOC_RESP: |
2091 | case IEEE80211_STYPE_DEAUTH: | 1559 | case IEEE80211_STYPE_DEAUTH: |
2092 | case IEEE80211_STYPE_DISASSOC: | 1560 | case IEEE80211_STYPE_DISASSOC: |
2093 | skb_queue_tail(&ifsta->skb_queue, skb); | 1561 | skb_queue_tail(&sdata->u.mgd.skb_queue, skb); |
2094 | queue_work(local->hw.workqueue, &ifsta->work); | 1562 | queue_work(local->hw.workqueue, &sdata->u.mgd.work); |
2095 | return; | 1563 | return RX_QUEUED; |
2096 | } | 1564 | } |
2097 | 1565 | ||
2098 | fail: | 1566 | return RX_DROP_MONITOR; |
2099 | kfree_skb(skb); | ||
2100 | } | 1567 | } |
2101 | 1568 | ||
2102 | static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, | 1569 | static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, |
2103 | struct sk_buff *skb) | 1570 | struct sk_buff *skb) |
2104 | { | 1571 | { |
2105 | struct ieee80211_rx_status *rx_status; | 1572 | struct ieee80211_rx_status *rx_status; |
2106 | struct ieee80211_if_sta *ifsta; | ||
2107 | struct ieee80211_mgmt *mgmt; | 1573 | struct ieee80211_mgmt *mgmt; |
2108 | u16 fc; | 1574 | u16 fc; |
2109 | 1575 | ||
2110 | ifsta = &sdata->u.sta; | ||
2111 | |||
2112 | rx_status = (struct ieee80211_rx_status *) skb->cb; | 1576 | rx_status = (struct ieee80211_rx_status *) skb->cb; |
2113 | mgmt = (struct ieee80211_mgmt *) skb->data; | 1577 | mgmt = (struct ieee80211_mgmt *) skb->data; |
2114 | fc = le16_to_cpu(mgmt->frame_control); | 1578 | fc = le16_to_cpu(mgmt->frame_control); |
2115 | 1579 | ||
2116 | if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { | 1580 | switch (fc & IEEE80211_FCTL_STYPE) { |
2117 | switch (fc & IEEE80211_FCTL_STYPE) { | 1581 | case IEEE80211_STYPE_PROBE_RESP: |
2118 | case IEEE80211_STYPE_PROBE_REQ: | 1582 | ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, |
2119 | ieee80211_rx_mgmt_probe_req(sdata, ifsta, mgmt, | 1583 | rx_status); |
2120 | skb->len); | 1584 | break; |
2121 | break; | 1585 | case IEEE80211_STYPE_BEACON: |
2122 | case IEEE80211_STYPE_PROBE_RESP: | 1586 | ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, |
2123 | ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, | 1587 | rx_status); |
2124 | rx_status); | 1588 | break; |
2125 | break; | 1589 | case IEEE80211_STYPE_AUTH: |
2126 | case IEEE80211_STYPE_BEACON: | 1590 | ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len); |
2127 | ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, | 1591 | break; |
2128 | rx_status); | 1592 | case IEEE80211_STYPE_ASSOC_RESP: |
2129 | break; | 1593 | ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, 0); |
2130 | case IEEE80211_STYPE_AUTH: | 1594 | break; |
2131 | ieee80211_rx_mgmt_auth_ibss(sdata, ifsta, mgmt, | 1595 | case IEEE80211_STYPE_REASSOC_RESP: |
2132 | skb->len); | 1596 | ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, 1); |
2133 | break; | 1597 | break; |
2134 | } | 1598 | case IEEE80211_STYPE_DEAUTH: |
2135 | } else { /* NL80211_IFTYPE_STATION */ | 1599 | ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len); |
2136 | switch (fc & IEEE80211_FCTL_STYPE) { | 1600 | break; |
2137 | case IEEE80211_STYPE_PROBE_RESP: | 1601 | case IEEE80211_STYPE_DISASSOC: |
2138 | ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, | 1602 | ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); |
2139 | rx_status); | 1603 | break; |
2140 | break; | ||
2141 | case IEEE80211_STYPE_BEACON: | ||
2142 | ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, | ||
2143 | rx_status); | ||
2144 | break; | ||
2145 | case IEEE80211_STYPE_AUTH: | ||
2146 | ieee80211_rx_mgmt_auth(sdata, ifsta, mgmt, skb->len); | ||
2147 | break; | ||
2148 | case IEEE80211_STYPE_ASSOC_RESP: | ||
2149 | ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, | ||
2150 | skb->len, 0); | ||
2151 | break; | ||
2152 | case IEEE80211_STYPE_REASSOC_RESP: | ||
2153 | ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, | ||
2154 | skb->len, 1); | ||
2155 | break; | ||
2156 | case IEEE80211_STYPE_DEAUTH: | ||
2157 | ieee80211_rx_mgmt_deauth(sdata, ifsta, mgmt, skb->len); | ||
2158 | break; | ||
2159 | case IEEE80211_STYPE_DISASSOC: | ||
2160 | ieee80211_rx_mgmt_disassoc(sdata, ifsta, mgmt, | ||
2161 | skb->len); | ||
2162 | break; | ||
2163 | } | ||
2164 | } | 1604 | } |
2165 | 1605 | ||
2166 | kfree_skb(skb); | 1606 | kfree_skb(skb); |
2167 | } | 1607 | } |
2168 | 1608 | ||
2169 | |||
2170 | static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) | ||
2171 | { | ||
2172 | struct ieee80211_local *local = sdata->local; | ||
2173 | int active = 0; | ||
2174 | struct sta_info *sta; | ||
2175 | |||
2176 | rcu_read_lock(); | ||
2177 | |||
2178 | list_for_each_entry_rcu(sta, &local->sta_list, list) { | ||
2179 | if (sta->sdata == sdata && | ||
2180 | time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL, | ||
2181 | jiffies)) { | ||
2182 | active++; | ||
2183 | break; | ||
2184 | } | ||
2185 | } | ||
2186 | |||
2187 | rcu_read_unlock(); | ||
2188 | |||
2189 | return active; | ||
2190 | } | ||
2191 | |||
2192 | |||
2193 | static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata, | ||
2194 | struct ieee80211_if_sta *ifsta) | ||
2195 | { | ||
2196 | mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); | ||
2197 | |||
2198 | ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT); | ||
2199 | if (ieee80211_sta_active_ibss(sdata)) | ||
2200 | return; | ||
2201 | |||
2202 | if ((sdata->u.sta.flags & IEEE80211_STA_BSSID_SET) && | ||
2203 | (!(sdata->u.sta.flags & IEEE80211_STA_AUTO_CHANNEL_SEL))) | ||
2204 | return; | ||
2205 | |||
2206 | printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other " | ||
2207 | "IBSS networks with same SSID (merge)\n", sdata->dev->name); | ||
2208 | |||
2209 | /* XXX maybe racy? */ | ||
2210 | if (sdata->local->scan_req) | ||
2211 | return; | ||
2212 | |||
2213 | memcpy(sdata->local->int_scan_req.ssids[0].ssid, | ||
2214 | ifsta->ssid, IEEE80211_MAX_SSID_LEN); | ||
2215 | sdata->local->int_scan_req.ssids[0].ssid_len = ifsta->ssid_len; | ||
2216 | ieee80211_request_scan(sdata, &sdata->local->int_scan_req); | ||
2217 | } | ||
2218 | |||
2219 | |||
2220 | static void ieee80211_sta_timer(unsigned long data) | 1609 | static void ieee80211_sta_timer(unsigned long data) |
2221 | { | 1610 | { |
2222 | struct ieee80211_sub_if_data *sdata = | 1611 | struct ieee80211_sub_if_data *sdata = |
2223 | (struct ieee80211_sub_if_data *) data; | 1612 | (struct ieee80211_sub_if_data *) data; |
2224 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | 1613 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
2225 | struct ieee80211_local *local = sdata->local; | 1614 | struct ieee80211_local *local = sdata->local; |
2226 | 1615 | ||
2227 | set_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); | 1616 | set_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request); |
2228 | queue_work(local->hw.workqueue, &ifsta->work); | 1617 | queue_work(local->hw.workqueue, &ifmgd->work); |
2229 | } | 1618 | } |
2230 | 1619 | ||
2231 | static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata, | 1620 | static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata) |
2232 | struct ieee80211_if_sta *ifsta) | ||
2233 | { | 1621 | { |
1622 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
2234 | struct ieee80211_local *local = sdata->local; | 1623 | struct ieee80211_local *local = sdata->local; |
2235 | 1624 | ||
2236 | if (local->ops->reset_tsf) { | 1625 | if (local->ops->reset_tsf) { |
@@ -2238,191 +1627,39 @@ static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata, | |||
2238 | local->ops->reset_tsf(local_to_hw(local)); | 1627 | local->ops->reset_tsf(local_to_hw(local)); |
2239 | } | 1628 | } |
2240 | 1629 | ||
2241 | ifsta->wmm_last_param_set = -1; /* allow any WMM update */ | 1630 | ifmgd->wmm_last_param_set = -1; /* allow any WMM update */ |
2242 | 1631 | ||
2243 | 1632 | ||
2244 | if (ifsta->auth_algs & IEEE80211_AUTH_ALG_OPEN) | 1633 | if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_OPEN) |
2245 | ifsta->auth_alg = WLAN_AUTH_OPEN; | 1634 | ifmgd->auth_alg = WLAN_AUTH_OPEN; |
2246 | else if (ifsta->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY) | 1635 | else if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY) |
2247 | ifsta->auth_alg = WLAN_AUTH_SHARED_KEY; | 1636 | ifmgd->auth_alg = WLAN_AUTH_SHARED_KEY; |
2248 | else if (ifsta->auth_algs & IEEE80211_AUTH_ALG_LEAP) | 1637 | else if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_LEAP) |
2249 | ifsta->auth_alg = WLAN_AUTH_LEAP; | 1638 | ifmgd->auth_alg = WLAN_AUTH_LEAP; |
2250 | else | 1639 | else |
2251 | ifsta->auth_alg = WLAN_AUTH_OPEN; | 1640 | ifmgd->auth_alg = WLAN_AUTH_OPEN; |
2252 | ifsta->auth_transaction = -1; | 1641 | ifmgd->auth_transaction = -1; |
2253 | ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; | 1642 | ifmgd->flags &= ~IEEE80211_STA_ASSOCIATED; |
2254 | ifsta->assoc_scan_tries = 0; | 1643 | ifmgd->assoc_scan_tries = 0; |
2255 | ifsta->direct_probe_tries = 0; | 1644 | ifmgd->direct_probe_tries = 0; |
2256 | ifsta->auth_tries = 0; | 1645 | ifmgd->auth_tries = 0; |
2257 | ifsta->assoc_tries = 0; | 1646 | ifmgd->assoc_tries = 0; |
2258 | netif_tx_stop_all_queues(sdata->dev); | 1647 | netif_tx_stop_all_queues(sdata->dev); |
2259 | netif_carrier_off(sdata->dev); | 1648 | netif_carrier_off(sdata->dev); |
2260 | } | 1649 | } |
2261 | 1650 | ||
2262 | static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata, | 1651 | static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata) |
2263 | struct ieee80211_if_sta *ifsta) | ||
2264 | { | ||
2265 | struct ieee80211_local *local = sdata->local; | ||
2266 | struct ieee80211_supported_band *sband; | ||
2267 | u8 *pos; | ||
2268 | u8 bssid[ETH_ALEN]; | ||
2269 | u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; | ||
2270 | u16 capability; | ||
2271 | int i; | ||
2272 | |||
2273 | if (sdata->u.sta.flags & IEEE80211_STA_BSSID_SET) { | ||
2274 | memcpy(bssid, ifsta->bssid, ETH_ALEN); | ||
2275 | } else { | ||
2276 | /* Generate random, not broadcast, locally administered BSSID. Mix in | ||
2277 | * own MAC address to make sure that devices that do not have proper | ||
2278 | * random number generator get different BSSID. */ | ||
2279 | get_random_bytes(bssid, ETH_ALEN); | ||
2280 | for (i = 0; i < ETH_ALEN; i++) | ||
2281 | bssid[i] ^= sdata->dev->dev_addr[i]; | ||
2282 | bssid[0] &= ~0x01; | ||
2283 | bssid[0] |= 0x02; | ||
2284 | } | ||
2285 | |||
2286 | printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n", | ||
2287 | sdata->dev->name, bssid); | ||
2288 | |||
2289 | sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; | ||
2290 | |||
2291 | if (local->hw.conf.beacon_int == 0) | ||
2292 | local->hw.conf.beacon_int = 100; | ||
2293 | |||
2294 | capability = WLAN_CAPABILITY_IBSS; | ||
2295 | |||
2296 | if (sdata->default_key) | ||
2297 | capability |= WLAN_CAPABILITY_PRIVACY; | ||
2298 | else | ||
2299 | sdata->drop_unencrypted = 0; | ||
2300 | |||
2301 | pos = supp_rates; | ||
2302 | for (i = 0; i < sband->n_bitrates; i++) { | ||
2303 | int rate = sband->bitrates[i].bitrate; | ||
2304 | *pos++ = (u8) (rate / 5); | ||
2305 | } | ||
2306 | |||
2307 | return __ieee80211_sta_join_ibss(sdata, ifsta, | ||
2308 | bssid, local->hw.conf.beacon_int, | ||
2309 | local->hw.conf.channel->center_freq, | ||
2310 | sband->n_bitrates, supp_rates, | ||
2311 | capability); | ||
2312 | } | ||
2313 | |||
2314 | |||
2315 | static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata, | ||
2316 | struct ieee80211_if_sta *ifsta) | ||
2317 | { | ||
2318 | struct ieee80211_local *local = sdata->local; | ||
2319 | struct ieee80211_bss *bss; | ||
2320 | int active_ibss; | ||
2321 | |||
2322 | if (ifsta->ssid_len == 0) | ||
2323 | return -EINVAL; | ||
2324 | |||
2325 | active_ibss = ieee80211_sta_active_ibss(sdata); | ||
2326 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
2327 | printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n", | ||
2328 | sdata->dev->name, active_ibss); | ||
2329 | #endif /* CONFIG_MAC80211_IBSS_DEBUG */ | ||
2330 | |||
2331 | if (active_ibss) | ||
2332 | return 0; | ||
2333 | |||
2334 | if (ifsta->flags & IEEE80211_STA_BSSID_SET) | ||
2335 | bss = ieee80211_rx_bss_get(local, ifsta->bssid, 0, | ||
2336 | ifsta->ssid, ifsta->ssid_len); | ||
2337 | else | ||
2338 | bss = (void *)cfg80211_get_ibss(local->hw.wiphy, | ||
2339 | NULL, | ||
2340 | ifsta->ssid, ifsta->ssid_len); | ||
2341 | |||
2342 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
2343 | if (bss) | ||
2344 | printk(KERN_DEBUG " sta_find_ibss: selected %pM current " | ||
2345 | "%pM\n", bss->cbss.bssid, ifsta->bssid); | ||
2346 | #endif /* CONFIG_MAC80211_IBSS_DEBUG */ | ||
2347 | |||
2348 | if (bss && | ||
2349 | (!(ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) || | ||
2350 | memcmp(ifsta->bssid, bss->cbss.bssid, ETH_ALEN))) { | ||
2351 | int ret; | ||
2352 | |||
2353 | printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM" | ||
2354 | " based on configured SSID\n", | ||
2355 | sdata->dev->name, bss->cbss.bssid); | ||
2356 | |||
2357 | ret = ieee80211_sta_join_ibss(sdata, ifsta, bss); | ||
2358 | ieee80211_rx_bss_put(local, bss); | ||
2359 | return ret; | ||
2360 | } else if (bss) | ||
2361 | ieee80211_rx_bss_put(local, bss); | ||
2362 | |||
2363 | #ifdef CONFIG_MAC80211_IBSS_DEBUG | ||
2364 | printk(KERN_DEBUG " did not try to join ibss\n"); | ||
2365 | #endif /* CONFIG_MAC80211_IBSS_DEBUG */ | ||
2366 | |||
2367 | /* Selected IBSS not found in current scan results - try to scan */ | ||
2368 | if (ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED && | ||
2369 | !ieee80211_sta_active_ibss(sdata)) { | ||
2370 | mod_timer(&ifsta->timer, jiffies + | ||
2371 | IEEE80211_IBSS_MERGE_INTERVAL); | ||
2372 | } else if (time_after(jiffies, local->last_scan_completed + | ||
2373 | IEEE80211_SCAN_INTERVAL)) { | ||
2374 | printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to " | ||
2375 | "join\n", sdata->dev->name); | ||
2376 | |||
2377 | /* XXX maybe racy? */ | ||
2378 | if (local->scan_req) | ||
2379 | return -EBUSY; | ||
2380 | |||
2381 | memcpy(local->int_scan_req.ssids[0].ssid, | ||
2382 | ifsta->ssid, IEEE80211_MAX_SSID_LEN); | ||
2383 | local->int_scan_req.ssids[0].ssid_len = ifsta->ssid_len; | ||
2384 | return ieee80211_request_scan(sdata, &local->int_scan_req); | ||
2385 | } else if (ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED) { | ||
2386 | int interval = IEEE80211_SCAN_INTERVAL; | ||
2387 | |||
2388 | if (time_after(jiffies, ifsta->ibss_join_req + | ||
2389 | IEEE80211_IBSS_JOIN_TIMEOUT)) { | ||
2390 | if ((ifsta->flags & IEEE80211_STA_CREATE_IBSS) && | ||
2391 | (!(local->oper_channel->flags & | ||
2392 | IEEE80211_CHAN_NO_IBSS))) | ||
2393 | return ieee80211_sta_create_ibss(sdata, ifsta); | ||
2394 | if (ifsta->flags & IEEE80211_STA_CREATE_IBSS) { | ||
2395 | printk(KERN_DEBUG "%s: IBSS not allowed on" | ||
2396 | " %d MHz\n", sdata->dev->name, | ||
2397 | local->hw.conf.channel->center_freq); | ||
2398 | } | ||
2399 | |||
2400 | /* No IBSS found - decrease scan interval and continue | ||
2401 | * scanning. */ | ||
2402 | interval = IEEE80211_SCAN_INTERVAL_SLOW; | ||
2403 | } | ||
2404 | |||
2405 | ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH; | ||
2406 | mod_timer(&ifsta->timer, jiffies + interval); | ||
2407 | return 0; | ||
2408 | } | ||
2409 | |||
2410 | return 0; | ||
2411 | } | ||
2412 | |||
2413 | |||
2414 | static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, | ||
2415 | struct ieee80211_if_sta *ifsta) | ||
2416 | { | 1652 | { |
1653 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
2417 | struct ieee80211_local *local = sdata->local; | 1654 | struct ieee80211_local *local = sdata->local; |
2418 | struct ieee80211_bss *bss; | 1655 | struct ieee80211_bss *bss; |
2419 | u8 *bssid = ifsta->bssid, *ssid = ifsta->ssid; | 1656 | u8 *bssid = ifmgd->bssid, *ssid = ifmgd->ssid; |
2420 | u8 ssid_len = ifsta->ssid_len; | 1657 | u8 ssid_len = ifmgd->ssid_len; |
2421 | u16 capa_mask = WLAN_CAPABILITY_ESS; | 1658 | u16 capa_mask = WLAN_CAPABILITY_ESS; |
2422 | u16 capa_val = WLAN_CAPABILITY_ESS; | 1659 | u16 capa_val = WLAN_CAPABILITY_ESS; |
2423 | struct ieee80211_channel *chan = local->oper_channel; | 1660 | struct ieee80211_channel *chan = local->oper_channel; |
2424 | 1661 | ||
2425 | if (ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | | 1662 | if (ifmgd->flags & (IEEE80211_STA_AUTO_SSID_SEL | |
2426 | IEEE80211_STA_AUTO_BSSID_SEL | | 1663 | IEEE80211_STA_AUTO_BSSID_SEL | |
2427 | IEEE80211_STA_AUTO_CHANNEL_SEL)) { | 1664 | IEEE80211_STA_AUTO_CHANNEL_SEL)) { |
2428 | capa_mask |= WLAN_CAPABILITY_PRIVACY; | 1665 | capa_mask |= WLAN_CAPABILITY_PRIVACY; |
@@ -2430,13 +1667,13 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, | |||
2430 | capa_val |= WLAN_CAPABILITY_PRIVACY; | 1667 | capa_val |= WLAN_CAPABILITY_PRIVACY; |
2431 | } | 1668 | } |
2432 | 1669 | ||
2433 | if (ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) | 1670 | if (ifmgd->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) |
2434 | chan = NULL; | 1671 | chan = NULL; |
2435 | 1672 | ||
2436 | if (ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) | 1673 | if (ifmgd->flags & IEEE80211_STA_AUTO_BSSID_SEL) |
2437 | bssid = NULL; | 1674 | bssid = NULL; |
2438 | 1675 | ||
2439 | if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) { | 1676 | if (ifmgd->flags & IEEE80211_STA_AUTO_SSID_SEL) { |
2440 | ssid = NULL; | 1677 | ssid = NULL; |
2441 | ssid_len = 0; | 1678 | ssid_len = 0; |
2442 | } | 1679 | } |
@@ -2447,16 +1684,16 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, | |||
2447 | 1684 | ||
2448 | if (bss) { | 1685 | if (bss) { |
2449 | ieee80211_set_freq(sdata, bss->cbss.channel->center_freq); | 1686 | ieee80211_set_freq(sdata, bss->cbss.channel->center_freq); |
2450 | if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) | 1687 | if (!(ifmgd->flags & IEEE80211_STA_SSID_SET)) |
2451 | ieee80211_sta_set_ssid(sdata, bss->ssid, | 1688 | ieee80211_sta_set_ssid(sdata, bss->ssid, |
2452 | bss->ssid_len); | 1689 | bss->ssid_len); |
2453 | ieee80211_sta_set_bssid(sdata, bss->cbss.bssid); | 1690 | ieee80211_sta_set_bssid(sdata, bss->cbss.bssid); |
2454 | ieee80211_sta_def_wmm_params(sdata, bss->supp_rates_len, | 1691 | ieee80211_sta_def_wmm_params(sdata, bss->supp_rates_len, |
2455 | bss->supp_rates); | 1692 | bss->supp_rates); |
2456 | if (sdata->u.sta.mfp == IEEE80211_MFP_REQUIRED) | 1693 | if (sdata->u.mgd.mfp == IEEE80211_MFP_REQUIRED) |
2457 | sdata->u.sta.flags |= IEEE80211_STA_MFP_ENABLED; | 1694 | sdata->u.mgd.flags |= IEEE80211_STA_MFP_ENABLED; |
2458 | else | 1695 | else |
2459 | sdata->u.sta.flags &= ~IEEE80211_STA_MFP_ENABLED; | 1696 | sdata->u.mgd.flags &= ~IEEE80211_STA_MFP_ENABLED; |
2460 | 1697 | ||
2461 | /* Send out direct probe if no probe resp was received or | 1698 | /* Send out direct probe if no probe resp was received or |
2462 | * the one we have is outdated | 1699 | * the one we have is outdated |
@@ -2464,31 +1701,34 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, | |||
2464 | if (!bss->last_probe_resp || | 1701 | if (!bss->last_probe_resp || |
2465 | time_after(jiffies, bss->last_probe_resp | 1702 | time_after(jiffies, bss->last_probe_resp |
2466 | + IEEE80211_SCAN_RESULT_EXPIRE)) | 1703 | + IEEE80211_SCAN_RESULT_EXPIRE)) |
2467 | ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; | 1704 | ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE; |
2468 | else | 1705 | else |
2469 | ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; | 1706 | ifmgd->state = IEEE80211_STA_MLME_AUTHENTICATE; |
2470 | 1707 | ||
2471 | ieee80211_rx_bss_put(local, bss); | 1708 | ieee80211_rx_bss_put(local, bss); |
2472 | ieee80211_sta_reset_auth(sdata, ifsta); | 1709 | ieee80211_sta_reset_auth(sdata); |
2473 | return 0; | 1710 | return 0; |
2474 | } else { | 1711 | } else { |
2475 | if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) { | 1712 | if (ifmgd->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) { |
2476 | ifsta->assoc_scan_tries++; | 1713 | ifmgd->assoc_scan_tries++; |
2477 | /* XXX maybe racy? */ | 1714 | /* XXX maybe racy? */ |
2478 | if (local->scan_req) | 1715 | if (local->scan_req) |
2479 | return -1; | 1716 | return -1; |
2480 | memcpy(local->int_scan_req.ssids[0].ssid, | 1717 | memcpy(local->int_scan_req.ssids[0].ssid, |
2481 | ifsta->ssid, IEEE80211_MAX_SSID_LEN); | 1718 | ifmgd->ssid, IEEE80211_MAX_SSID_LEN); |
2482 | if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) | 1719 | if (ifmgd->flags & IEEE80211_STA_AUTO_SSID_SEL) |
2483 | local->int_scan_req.ssids[0].ssid_len = 0; | 1720 | local->int_scan_req.ssids[0].ssid_len = 0; |
2484 | else | 1721 | else |
2485 | local->int_scan_req.ssids[0].ssid_len = ifsta->ssid_len; | 1722 | local->int_scan_req.ssids[0].ssid_len = ifmgd->ssid_len; |
2486 | ieee80211_start_scan(sdata, &local->int_scan_req); | 1723 | |
2487 | ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; | 1724 | if (ieee80211_start_scan(sdata, &local->int_scan_req)) |
2488 | set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); | 1725 | ieee80211_scan_failed(local); |
1726 | |||
1727 | ifmgd->state = IEEE80211_STA_MLME_AUTHENTICATE; | ||
1728 | set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request); | ||
2489 | } else { | 1729 | } else { |
2490 | ifsta->assoc_scan_tries = 0; | 1730 | ifmgd->assoc_scan_tries = 0; |
2491 | ifsta->state = IEEE80211_STA_MLME_DISABLED; | 1731 | ifmgd->state = IEEE80211_STA_MLME_DISABLED; |
2492 | } | 1732 | } |
2493 | } | 1733 | } |
2494 | return -1; | 1734 | return -1; |
@@ -2498,9 +1738,9 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, | |||
2498 | static void ieee80211_sta_work(struct work_struct *work) | 1738 | static void ieee80211_sta_work(struct work_struct *work) |
2499 | { | 1739 | { |
2500 | struct ieee80211_sub_if_data *sdata = | 1740 | struct ieee80211_sub_if_data *sdata = |
2501 | container_of(work, struct ieee80211_sub_if_data, u.sta.work); | 1741 | container_of(work, struct ieee80211_sub_if_data, u.mgd.work); |
2502 | struct ieee80211_local *local = sdata->local; | 1742 | struct ieee80211_local *local = sdata->local; |
2503 | struct ieee80211_if_sta *ifsta; | 1743 | struct ieee80211_if_managed *ifmgd; |
2504 | struct sk_buff *skb; | 1744 | struct sk_buff *skb; |
2505 | 1745 | ||
2506 | if (!netif_running(sdata->dev)) | 1746 | if (!netif_running(sdata->dev)) |
@@ -2509,60 +1749,60 @@ static void ieee80211_sta_work(struct work_struct *work) | |||
2509 | if (local->sw_scanning || local->hw_scanning) | 1749 | if (local->sw_scanning || local->hw_scanning) |
2510 | return; | 1750 | return; |
2511 | 1751 | ||
2512 | if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION && | 1752 | if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) |
2513 | sdata->vif.type != NL80211_IFTYPE_ADHOC)) | ||
2514 | return; | 1753 | return; |
2515 | ifsta = &sdata->u.sta; | 1754 | ifmgd = &sdata->u.mgd; |
2516 | 1755 | ||
2517 | while ((skb = skb_dequeue(&ifsta->skb_queue))) | 1756 | while ((skb = skb_dequeue(&ifmgd->skb_queue))) |
2518 | ieee80211_sta_rx_queued_mgmt(sdata, skb); | 1757 | ieee80211_sta_rx_queued_mgmt(sdata, skb); |
2519 | 1758 | ||
2520 | if (ifsta->state != IEEE80211_STA_MLME_DIRECT_PROBE && | 1759 | if (ifmgd->state != IEEE80211_STA_MLME_DIRECT_PROBE && |
2521 | ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && | 1760 | ifmgd->state != IEEE80211_STA_MLME_AUTHENTICATE && |
2522 | ifsta->state != IEEE80211_STA_MLME_ASSOCIATE && | 1761 | ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE && |
2523 | test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) { | 1762 | test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request)) { |
2524 | ieee80211_start_scan(sdata, local->scan_req); | 1763 | /* |
1764 | * The call to ieee80211_start_scan can fail but ieee80211_request_scan | ||
1765 | * (which queued ieee80211_sta_work) did not return an error. Thus, call | ||
1766 | * ieee80211_scan_failed here if ieee80211_start_scan fails in order to | ||
1767 | * notify the scan requester. | ||
1768 | */ | ||
1769 | if (ieee80211_start_scan(sdata, local->scan_req)) | ||
1770 | ieee80211_scan_failed(local); | ||
2525 | return; | 1771 | return; |
2526 | } | 1772 | } |
2527 | 1773 | ||
2528 | if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) { | 1774 | if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request)) { |
2529 | if (ieee80211_sta_config_auth(sdata, ifsta)) | 1775 | if (ieee80211_sta_config_auth(sdata)) |
2530 | return; | 1776 | return; |
2531 | clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); | 1777 | clear_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request); |
2532 | } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request)) | 1778 | } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request)) |
2533 | return; | 1779 | return; |
2534 | 1780 | ||
2535 | switch (ifsta->state) { | 1781 | switch (ifmgd->state) { |
2536 | case IEEE80211_STA_MLME_DISABLED: | 1782 | case IEEE80211_STA_MLME_DISABLED: |
2537 | break; | 1783 | break; |
2538 | case IEEE80211_STA_MLME_DIRECT_PROBE: | 1784 | case IEEE80211_STA_MLME_DIRECT_PROBE: |
2539 | ieee80211_direct_probe(sdata, ifsta); | 1785 | ieee80211_direct_probe(sdata); |
2540 | break; | 1786 | break; |
2541 | case IEEE80211_STA_MLME_AUTHENTICATE: | 1787 | case IEEE80211_STA_MLME_AUTHENTICATE: |
2542 | ieee80211_authenticate(sdata, ifsta); | 1788 | ieee80211_authenticate(sdata); |
2543 | break; | 1789 | break; |
2544 | case IEEE80211_STA_MLME_ASSOCIATE: | 1790 | case IEEE80211_STA_MLME_ASSOCIATE: |
2545 | ieee80211_associate(sdata, ifsta); | 1791 | ieee80211_associate(sdata); |
2546 | break; | 1792 | break; |
2547 | case IEEE80211_STA_MLME_ASSOCIATED: | 1793 | case IEEE80211_STA_MLME_ASSOCIATED: |
2548 | ieee80211_associated(sdata, ifsta); | 1794 | ieee80211_associated(sdata); |
2549 | break; | ||
2550 | case IEEE80211_STA_MLME_IBSS_SEARCH: | ||
2551 | ieee80211_sta_find_ibss(sdata, ifsta); | ||
2552 | break; | ||
2553 | case IEEE80211_STA_MLME_IBSS_JOINED: | ||
2554 | ieee80211_sta_merge_ibss(sdata, ifsta); | ||
2555 | break; | 1795 | break; |
2556 | default: | 1796 | default: |
2557 | WARN_ON(1); | 1797 | WARN_ON(1); |
2558 | break; | 1798 | break; |
2559 | } | 1799 | } |
2560 | 1800 | ||
2561 | if (ieee80211_privacy_mismatch(sdata, ifsta)) { | 1801 | if (ieee80211_privacy_mismatch(sdata)) { |
2562 | printk(KERN_DEBUG "%s: privacy configuration mismatch and " | 1802 | printk(KERN_DEBUG "%s: privacy configuration mismatch and " |
2563 | "mixed-cell disabled - disassociate\n", sdata->dev->name); | 1803 | "mixed-cell disabled - disassociate\n", sdata->dev->name); |
2564 | 1804 | ||
2565 | ieee80211_set_disassoc(sdata, ifsta, false, true, | 1805 | ieee80211_set_disassoc(sdata, false, true, |
2566 | WLAN_REASON_UNSPECIFIED); | 1806 | WLAN_REASON_UNSPECIFIED); |
2567 | } | 1807 | } |
2568 | } | 1808 | } |
@@ -2571,155 +1811,106 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) | |||
2571 | { | 1811 | { |
2572 | if (sdata->vif.type == NL80211_IFTYPE_STATION) | 1812 | if (sdata->vif.type == NL80211_IFTYPE_STATION) |
2573 | queue_work(sdata->local->hw.workqueue, | 1813 | queue_work(sdata->local->hw.workqueue, |
2574 | &sdata->u.sta.work); | 1814 | &sdata->u.mgd.work); |
2575 | } | 1815 | } |
2576 | 1816 | ||
2577 | /* interface setup */ | 1817 | /* interface setup */ |
2578 | void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) | 1818 | void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) |
2579 | { | 1819 | { |
2580 | struct ieee80211_if_sta *ifsta; | 1820 | struct ieee80211_if_managed *ifmgd; |
2581 | 1821 | ||
2582 | ifsta = &sdata->u.sta; | 1822 | ifmgd = &sdata->u.mgd; |
2583 | INIT_WORK(&ifsta->work, ieee80211_sta_work); | 1823 | INIT_WORK(&ifmgd->work, ieee80211_sta_work); |
2584 | INIT_WORK(&ifsta->chswitch_work, ieee80211_chswitch_work); | 1824 | INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); |
2585 | setup_timer(&ifsta->timer, ieee80211_sta_timer, | 1825 | setup_timer(&ifmgd->timer, ieee80211_sta_timer, |
2586 | (unsigned long) sdata); | 1826 | (unsigned long) sdata); |
2587 | setup_timer(&ifsta->chswitch_timer, ieee80211_chswitch_timer, | 1827 | setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer, |
2588 | (unsigned long) sdata); | 1828 | (unsigned long) sdata); |
2589 | skb_queue_head_init(&ifsta->skb_queue); | 1829 | skb_queue_head_init(&ifmgd->skb_queue); |
2590 | 1830 | ||
2591 | ifsta->capab = WLAN_CAPABILITY_ESS; | 1831 | ifmgd->capab = WLAN_CAPABILITY_ESS; |
2592 | ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN | | 1832 | ifmgd->auth_algs = IEEE80211_AUTH_ALG_OPEN | |
2593 | IEEE80211_AUTH_ALG_SHARED_KEY; | 1833 | IEEE80211_AUTH_ALG_SHARED_KEY; |
2594 | ifsta->flags |= IEEE80211_STA_CREATE_IBSS | | 1834 | ifmgd->flags |= IEEE80211_STA_CREATE_IBSS | |
2595 | IEEE80211_STA_AUTO_BSSID_SEL | | 1835 | IEEE80211_STA_AUTO_BSSID_SEL | |
2596 | IEEE80211_STA_AUTO_CHANNEL_SEL; | 1836 | IEEE80211_STA_AUTO_CHANNEL_SEL; |
2597 | if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4) | 1837 | if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4) |
2598 | ifsta->flags |= IEEE80211_STA_WMM_ENABLED; | 1838 | ifmgd->flags |= IEEE80211_STA_WMM_ENABLED; |
2599 | } | ||
2600 | |||
2601 | /* | ||
2602 | * Add a new IBSS station, will also be called by the RX code when, | ||
2603 | * in IBSS mode, receiving a frame from a yet-unknown station, hence | ||
2604 | * must be callable in atomic context. | ||
2605 | */ | ||
2606 | struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, | ||
2607 | u8 *bssid,u8 *addr, u32 supp_rates) | ||
2608 | { | ||
2609 | struct ieee80211_local *local = sdata->local; | ||
2610 | struct sta_info *sta; | ||
2611 | int band = local->hw.conf.channel->band; | ||
2612 | |||
2613 | /* TODO: Could consider removing the least recently used entry and | ||
2614 | * allow new one to be added. */ | ||
2615 | if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { | ||
2616 | if (net_ratelimit()) { | ||
2617 | printk(KERN_DEBUG "%s: No room for a new IBSS STA " | ||
2618 | "entry %pM\n", sdata->dev->name, addr); | ||
2619 | } | ||
2620 | return NULL; | ||
2621 | } | ||
2622 | |||
2623 | if (compare_ether_addr(bssid, sdata->u.sta.bssid)) | ||
2624 | return NULL; | ||
2625 | |||
2626 | #ifdef CONFIG_MAC80211_VERBOSE_DEBUG | ||
2627 | printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n", | ||
2628 | wiphy_name(local->hw.wiphy), addr, sdata->dev->name); | ||
2629 | #endif | ||
2630 | |||
2631 | sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); | ||
2632 | if (!sta) | ||
2633 | return NULL; | ||
2634 | |||
2635 | set_sta_flags(sta, WLAN_STA_AUTHORIZED); | ||
2636 | |||
2637 | /* make sure mandatory rates are always added */ | ||
2638 | sta->sta.supp_rates[band] = supp_rates | | ||
2639 | ieee80211_mandatory_rates(local, band); | ||
2640 | |||
2641 | rate_control_rate_init(sta); | ||
2642 | |||
2643 | if (sta_info_insert(sta)) | ||
2644 | return NULL; | ||
2645 | |||
2646 | return sta; | ||
2647 | } | 1839 | } |
2648 | 1840 | ||
2649 | /* configuration hooks */ | 1841 | /* configuration hooks */ |
2650 | void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, | 1842 | void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata) |
2651 | struct ieee80211_if_sta *ifsta) | ||
2652 | { | 1843 | { |
1844 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; | ||
2653 | struct ieee80211_local *local = sdata->local; | 1845 | struct ieee80211_local *local = sdata->local; |
2654 | 1846 | ||
2655 | if (sdata->vif.type != NL80211_IFTYPE_STATION) | 1847 | if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) |
2656 | return; | 1848 | return; |
2657 | 1849 | ||
2658 | if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | | 1850 | if ((ifmgd->flags & (IEEE80211_STA_BSSID_SET | |
2659 | IEEE80211_STA_AUTO_BSSID_SEL)) && | 1851 | IEEE80211_STA_AUTO_BSSID_SEL)) && |
2660 | (ifsta->flags & (IEEE80211_STA_SSID_SET | | 1852 | (ifmgd->flags & (IEEE80211_STA_SSID_SET | |
2661 | IEEE80211_STA_AUTO_SSID_SEL))) { | 1853 | IEEE80211_STA_AUTO_SSID_SEL))) { |
2662 | 1854 | ||
2663 | if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) | 1855 | if (ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) |
2664 | ieee80211_set_disassoc(sdata, ifsta, true, true, | 1856 | ieee80211_set_disassoc(sdata, true, true, |
2665 | WLAN_REASON_DEAUTH_LEAVING); | 1857 | WLAN_REASON_DEAUTH_LEAVING); |
2666 | 1858 | ||
2667 | set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); | 1859 | set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request); |
2668 | queue_work(local->hw.workqueue, &ifsta->work); | 1860 | queue_work(local->hw.workqueue, &ifmgd->work); |
2669 | } | 1861 | } |
2670 | } | 1862 | } |
2671 | 1863 | ||
2672 | int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) | 1864 | int ieee80211_sta_commit(struct ieee80211_sub_if_data *sdata) |
2673 | { | 1865 | { |
2674 | struct ieee80211_if_sta *ifsta; | 1866 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
2675 | 1867 | ||
2676 | if (len > IEEE80211_MAX_SSID_LEN) | 1868 | ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET; |
2677 | return -EINVAL; | ||
2678 | 1869 | ||
2679 | ifsta = &sdata->u.sta; | 1870 | if (ifmgd->ssid_len) |
1871 | ifmgd->flags |= IEEE80211_STA_SSID_SET; | ||
1872 | else | ||
1873 | ifmgd->flags &= ~IEEE80211_STA_SSID_SET; | ||
2680 | 1874 | ||
2681 | if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) { | 1875 | return 0; |
2682 | memset(ifsta->ssid, 0, sizeof(ifsta->ssid)); | 1876 | } |
2683 | memcpy(ifsta->ssid, ssid, len); | ||
2684 | ifsta->ssid_len = len; | ||
2685 | } | ||
2686 | 1877 | ||
2687 | ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; | 1878 | int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) |
1879 | { | ||
1880 | struct ieee80211_if_managed *ifmgd; | ||
2688 | 1881 | ||
2689 | if (len) | 1882 | if (len > IEEE80211_MAX_SSID_LEN) |
2690 | ifsta->flags |= IEEE80211_STA_SSID_SET; | 1883 | return -EINVAL; |
2691 | else | 1884 | |
2692 | ifsta->flags &= ~IEEE80211_STA_SSID_SET; | 1885 | ifmgd = &sdata->u.mgd; |
2693 | 1886 | ||
2694 | if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { | 1887 | if (ifmgd->ssid_len != len || memcmp(ifmgd->ssid, ssid, len) != 0) { |
2695 | ifsta->ibss_join_req = jiffies; | 1888 | memset(ifmgd->ssid, 0, sizeof(ifmgd->ssid)); |
2696 | ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH; | 1889 | memcpy(ifmgd->ssid, ssid, len); |
2697 | return ieee80211_sta_find_ibss(sdata, ifsta); | 1890 | ifmgd->ssid_len = len; |
2698 | } | 1891 | } |
2699 | 1892 | ||
2700 | return 0; | 1893 | return ieee80211_sta_commit(sdata); |
2701 | } | 1894 | } |
2702 | 1895 | ||
2703 | int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len) | 1896 | int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len) |
2704 | { | 1897 | { |
2705 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | 1898 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
2706 | memcpy(ssid, ifsta->ssid, ifsta->ssid_len); | 1899 | memcpy(ssid, ifmgd->ssid, ifmgd->ssid_len); |
2707 | *len = ifsta->ssid_len; | 1900 | *len = ifmgd->ssid_len; |
2708 | return 0; | 1901 | return 0; |
2709 | } | 1902 | } |
2710 | 1903 | ||
2711 | int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) | 1904 | int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) |
2712 | { | 1905 | { |
2713 | struct ieee80211_if_sta *ifsta; | 1906 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
2714 | |||
2715 | ifsta = &sdata->u.sta; | ||
2716 | 1907 | ||
2717 | if (is_valid_ether_addr(bssid)) { | 1908 | if (is_valid_ether_addr(bssid)) { |
2718 | memcpy(ifsta->bssid, bssid, ETH_ALEN); | 1909 | memcpy(ifmgd->bssid, bssid, ETH_ALEN); |
2719 | ifsta->flags |= IEEE80211_STA_BSSID_SET; | 1910 | ifmgd->flags |= IEEE80211_STA_BSSID_SET; |
2720 | } else { | 1911 | } else { |
2721 | memset(ifsta->bssid, 0, ETH_ALEN); | 1912 | memset(ifmgd->bssid, 0, ETH_ALEN); |
2722 | ifsta->flags &= ~IEEE80211_STA_BSSID_SET; | 1913 | ifmgd->flags &= ~IEEE80211_STA_BSSID_SET; |
2723 | } | 1914 | } |
2724 | 1915 | ||
2725 | if (netif_running(sdata->dev)) { | 1916 | if (netif_running(sdata->dev)) { |
@@ -2729,47 +1920,44 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) | |||
2729 | } | 1920 | } |
2730 | } | 1921 | } |
2731 | 1922 | ||
2732 | return ieee80211_sta_set_ssid(sdata, ifsta->ssid, ifsta->ssid_len); | 1923 | return ieee80211_sta_commit(sdata); |
2733 | } | 1924 | } |
2734 | 1925 | ||
2735 | int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len) | 1926 | int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len) |
2736 | { | 1927 | { |
2737 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | 1928 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
2738 | 1929 | ||
2739 | kfree(ifsta->extra_ie); | 1930 | kfree(ifmgd->extra_ie); |
2740 | if (len == 0) { | 1931 | if (len == 0) { |
2741 | ifsta->extra_ie = NULL; | 1932 | ifmgd->extra_ie = NULL; |
2742 | ifsta->extra_ie_len = 0; | 1933 | ifmgd->extra_ie_len = 0; |
2743 | return 0; | 1934 | return 0; |
2744 | } | 1935 | } |
2745 | ifsta->extra_ie = kmalloc(len, GFP_KERNEL); | 1936 | ifmgd->extra_ie = kmalloc(len, GFP_KERNEL); |
2746 | if (!ifsta->extra_ie) { | 1937 | if (!ifmgd->extra_ie) { |
2747 | ifsta->extra_ie_len = 0; | 1938 | ifmgd->extra_ie_len = 0; |
2748 | return -ENOMEM; | 1939 | return -ENOMEM; |
2749 | } | 1940 | } |
2750 | memcpy(ifsta->extra_ie, ie, len); | 1941 | memcpy(ifmgd->extra_ie, ie, len); |
2751 | ifsta->extra_ie_len = len; | 1942 | ifmgd->extra_ie_len = len; |
2752 | return 0; | 1943 | return 0; |
2753 | } | 1944 | } |
2754 | 1945 | ||
2755 | int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason) | 1946 | int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason) |
2756 | { | 1947 | { |
2757 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | ||
2758 | |||
2759 | printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n", | 1948 | printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n", |
2760 | sdata->dev->name, reason); | 1949 | sdata->dev->name, reason); |
2761 | 1950 | ||
2762 | if (sdata->vif.type != NL80211_IFTYPE_STATION && | 1951 | if (sdata->vif.type != NL80211_IFTYPE_STATION) |
2763 | sdata->vif.type != NL80211_IFTYPE_ADHOC) | ||
2764 | return -EINVAL; | 1952 | return -EINVAL; |
2765 | 1953 | ||
2766 | ieee80211_set_disassoc(sdata, ifsta, true, true, reason); | 1954 | ieee80211_set_disassoc(sdata, true, true, reason); |
2767 | return 0; | 1955 | return 0; |
2768 | } | 1956 | } |
2769 | 1957 | ||
2770 | int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason) | 1958 | int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason) |
2771 | { | 1959 | { |
2772 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | 1960 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
2773 | 1961 | ||
2774 | printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n", | 1962 | printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n", |
2775 | sdata->dev->name, reason); | 1963 | sdata->dev->name, reason); |
@@ -2777,10 +1965,10 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason) | |||
2777 | if (sdata->vif.type != NL80211_IFTYPE_STATION) | 1965 | if (sdata->vif.type != NL80211_IFTYPE_STATION) |
2778 | return -EINVAL; | 1966 | return -EINVAL; |
2779 | 1967 | ||
2780 | if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED)) | 1968 | if (!(ifmgd->flags & IEEE80211_STA_ASSOCIATED)) |
2781 | return -1; | 1969 | return -ENOLINK; |
2782 | 1970 | ||
2783 | ieee80211_set_disassoc(sdata, ifsta, false, true, reason); | 1971 | ieee80211_set_disassoc(sdata, false, true, reason); |
2784 | return 0; | 1972 | return 0; |
2785 | } | 1973 | } |
2786 | 1974 | ||
@@ -2788,14 +1976,6 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason) | |||
2788 | void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) | 1976 | void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) |
2789 | { | 1977 | { |
2790 | struct ieee80211_sub_if_data *sdata = local->scan_sdata; | 1978 | struct ieee80211_sub_if_data *sdata = local->scan_sdata; |
2791 | struct ieee80211_if_sta *ifsta; | ||
2792 | |||
2793 | if (sdata && sdata->vif.type == NL80211_IFTYPE_ADHOC) { | ||
2794 | ifsta = &sdata->u.sta; | ||
2795 | if ((!(ifsta->flags & IEEE80211_STA_PREV_BSSID_SET)) || | ||
2796 | !ieee80211_sta_active_ibss(sdata)) | ||
2797 | ieee80211_sta_find_ibss(sdata, ifsta); | ||
2798 | } | ||
2799 | 1979 | ||
2800 | /* Restart STA timers */ | 1980 | /* Restart STA timers */ |
2801 | rcu_read_lock(); | 1981 | rcu_read_lock(); |
@@ -2842,3 +2022,36 @@ void ieee80211_dynamic_ps_timer(unsigned long data) | |||
2842 | 2022 | ||
2843 | queue_work(local->hw.workqueue, &local->dynamic_ps_enable_work); | 2023 | queue_work(local->hw.workqueue, &local->dynamic_ps_enable_work); |
2844 | } | 2024 | } |
2025 | |||
2026 | void ieee80211_send_nullfunc(struct ieee80211_local *local, | ||
2027 | struct ieee80211_sub_if_data *sdata, | ||
2028 | int powersave) | ||
2029 | { | ||
2030 | struct sk_buff *skb; | ||
2031 | struct ieee80211_hdr *nullfunc; | ||
2032 | __le16 fc; | ||
2033 | |||
2034 | if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) | ||
2035 | return; | ||
2036 | |||
2037 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24); | ||
2038 | if (!skb) { | ||
2039 | printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc " | ||
2040 | "frame\n", sdata->dev->name); | ||
2041 | return; | ||
2042 | } | ||
2043 | skb_reserve(skb, local->hw.extra_tx_headroom); | ||
2044 | |||
2045 | nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24); | ||
2046 | memset(nullfunc, 0, 24); | ||
2047 | fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | | ||
2048 | IEEE80211_FCTL_TODS); | ||
2049 | if (powersave) | ||
2050 | fc |= cpu_to_le16(IEEE80211_FCTL_PM); | ||
2051 | nullfunc->frame_control = fc; | ||
2052 | memcpy(nullfunc->addr1, sdata->u.mgd.bssid, ETH_ALEN); | ||
2053 | memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN); | ||
2054 | memcpy(nullfunc->addr3, sdata->u.mgd.bssid, ETH_ALEN); | ||
2055 | |||
2056 | ieee80211_tx_skb(sdata, skb, 0); | ||
2057 | } | ||
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 928da625e281..b9164c9a9563 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h | |||
@@ -62,6 +62,18 @@ static inline void rate_control_rate_init(struct sta_info *sta) | |||
62 | ref->ops->rate_init(ref->priv, sband, ista, priv_sta); | 62 | ref->ops->rate_init(ref->priv, sband, ista, priv_sta); |
63 | } | 63 | } |
64 | 64 | ||
65 | static inline void rate_control_rate_update(struct ieee80211_local *local, | ||
66 | struct ieee80211_supported_band *sband, | ||
67 | struct sta_info *sta, u32 changed) | ||
68 | { | ||
69 | struct rate_control_ref *ref = local->rate_ctrl; | ||
70 | struct ieee80211_sta *ista = &sta->sta; | ||
71 | void *priv_sta = sta->rate_ctrl_priv; | ||
72 | |||
73 | if (ref->ops->rate_update) | ||
74 | ref->ops->rate_update(ref->priv, sband, ista, | ||
75 | priv_sta, changed); | ||
76 | } | ||
65 | 77 | ||
66 | static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, | 78 | static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, |
67 | struct ieee80211_sta *sta, | 79 | struct ieee80211_sta *sta, |
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1327d424bf31..66f7ecf51b92 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c | |||
@@ -838,7 +838,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) | |||
838 | if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) { | 838 | if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) { |
839 | u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, | 839 | u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, |
840 | NL80211_IFTYPE_ADHOC); | 840 | NL80211_IFTYPE_ADHOC); |
841 | if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) | 841 | if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0) |
842 | sta->last_rx = jiffies; | 842 | sta->last_rx = jiffies; |
843 | } else | 843 | } else |
844 | if (!is_multicast_ether_addr(hdr->addr1) || | 844 | if (!is_multicast_ether_addr(hdr->addr1) || |
@@ -1702,13 +1702,13 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, | |||
1702 | return; | 1702 | return; |
1703 | } | 1703 | } |
1704 | 1704 | ||
1705 | if (compare_ether_addr(mgmt->sa, sdata->u.sta.bssid) != 0 || | 1705 | if (compare_ether_addr(mgmt->sa, sdata->u.mgd.bssid) != 0 || |
1706 | compare_ether_addr(mgmt->bssid, sdata->u.sta.bssid) != 0) { | 1706 | compare_ether_addr(mgmt->bssid, sdata->u.mgd.bssid) != 0) { |
1707 | /* Not from the current AP. */ | 1707 | /* Not from the current AP. */ |
1708 | return; | 1708 | return; |
1709 | } | 1709 | } |
1710 | 1710 | ||
1711 | if (sdata->u.sta.state == IEEE80211_STA_MLME_ASSOCIATE) { | 1711 | if (sdata->u.mgd.state == IEEE80211_STA_MLME_ASSOCIATE) { |
1712 | /* Association in progress; ignore SA Query */ | 1712 | /* Association in progress; ignore SA Query */ |
1713 | return; | 1713 | return; |
1714 | } | 1714 | } |
@@ -1727,7 +1727,7 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, | |||
1727 | memset(resp, 0, 24); | 1727 | memset(resp, 0, 24); |
1728 | memcpy(resp->da, mgmt->sa, ETH_ALEN); | 1728 | memcpy(resp->da, mgmt->sa, ETH_ALEN); |
1729 | memcpy(resp->sa, sdata->dev->dev_addr, ETH_ALEN); | 1729 | memcpy(resp->sa, sdata->dev->dev_addr, ETH_ALEN); |
1730 | memcpy(resp->bssid, sdata->u.sta.bssid, ETH_ALEN); | 1730 | memcpy(resp->bssid, sdata->u.mgd.bssid, ETH_ALEN); |
1731 | resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | | 1731 | resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | |
1732 | IEEE80211_STYPE_ACTION); | 1732 | IEEE80211_STYPE_ACTION); |
1733 | skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query)); | 1733 | skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query)); |
@@ -1745,7 +1745,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) | |||
1745 | { | 1745 | { |
1746 | struct ieee80211_local *local = rx->local; | 1746 | struct ieee80211_local *local = rx->local; |
1747 | struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); | 1747 | struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); |
1748 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | ||
1749 | struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; | 1748 | struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; |
1750 | struct ieee80211_bss *bss; | 1749 | struct ieee80211_bss *bss; |
1751 | int len = rx->skb->len; | 1750 | int len = rx->skb->len; |
@@ -1803,6 +1802,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) | |||
1803 | case WLAN_CATEGORY_SPECTRUM_MGMT: | 1802 | case WLAN_CATEGORY_SPECTRUM_MGMT: |
1804 | if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) | 1803 | if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) |
1805 | return RX_DROP_MONITOR; | 1804 | return RX_DROP_MONITOR; |
1805 | |||
1806 | if (sdata->vif.type != NL80211_IFTYPE_STATION) | ||
1807 | return RX_DROP_MONITOR; | ||
1808 | |||
1806 | switch (mgmt->u.action.u.measurement.action_code) { | 1809 | switch (mgmt->u.action.u.measurement.action_code) { |
1807 | case WLAN_ACTION_SPCT_MSR_REQ: | 1810 | case WLAN_ACTION_SPCT_MSR_REQ: |
1808 | if (len < (IEEE80211_MIN_ACTION_SIZE + | 1811 | if (len < (IEEE80211_MIN_ACTION_SIZE + |
@@ -1815,12 +1818,13 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) | |||
1815 | sizeof(mgmt->u.action.u.chan_switch))) | 1818 | sizeof(mgmt->u.action.u.chan_switch))) |
1816 | return RX_DROP_MONITOR; | 1819 | return RX_DROP_MONITOR; |
1817 | 1820 | ||
1818 | if (memcmp(mgmt->bssid, ifsta->bssid, ETH_ALEN) != 0) | 1821 | if (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN)) |
1819 | return RX_DROP_MONITOR; | 1822 | return RX_DROP_MONITOR; |
1820 | 1823 | ||
1821 | bss = ieee80211_rx_bss_get(local, ifsta->bssid, | 1824 | bss = ieee80211_rx_bss_get(local, sdata->u.mgd.bssid, |
1822 | local->hw.conf.channel->center_freq, | 1825 | local->hw.conf.channel->center_freq, |
1823 | ifsta->ssid, ifsta->ssid_len); | 1826 | sdata->u.mgd.ssid, |
1827 | sdata->u.mgd.ssid_len); | ||
1824 | if (!bss) | 1828 | if (!bss) |
1825 | return RX_DROP_MONITOR; | 1829 | return RX_DROP_MONITOR; |
1826 | 1830 | ||
@@ -1876,11 +1880,14 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) | |||
1876 | sdata->vif.type != NL80211_IFTYPE_ADHOC) | 1880 | sdata->vif.type != NL80211_IFTYPE_ADHOC) |
1877 | return RX_DROP_MONITOR; | 1881 | return RX_DROP_MONITOR; |
1878 | 1882 | ||
1879 | if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) | ||
1880 | return RX_DROP_MONITOR; | ||
1881 | 1883 | ||
1882 | ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status); | 1884 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { |
1883 | return RX_QUEUED; | 1885 | if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) |
1886 | return RX_DROP_MONITOR; | ||
1887 | return ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status); | ||
1888 | } | ||
1889 | |||
1890 | return ieee80211_ibss_rx_mgmt(sdata, rx->skb, rx->status); | ||
1884 | } | 1891 | } |
1885 | 1892 | ||
1886 | static void ieee80211_rx_michael_mic_report(struct net_device *dev, | 1893 | static void ieee80211_rx_michael_mic_report(struct net_device *dev, |
@@ -2083,7 +2090,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, | |||
2083 | case NL80211_IFTYPE_STATION: | 2090 | case NL80211_IFTYPE_STATION: |
2084 | if (!bssid) | 2091 | if (!bssid) |
2085 | return 0; | 2092 | return 0; |
2086 | if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { | 2093 | if (!ieee80211_bssid_match(bssid, sdata->u.mgd.bssid)) { |
2087 | if (!(rx->flags & IEEE80211_RX_IN_SCAN)) | 2094 | if (!(rx->flags & IEEE80211_RX_IN_SCAN)) |
2088 | return 0; | 2095 | return 0; |
2089 | rx->flags &= ~IEEE80211_RX_RA_MATCH; | 2096 | rx->flags &= ~IEEE80211_RX_RA_MATCH; |
@@ -2101,7 +2108,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, | |||
2101 | if (ieee80211_is_beacon(hdr->frame_control)) { | 2108 | if (ieee80211_is_beacon(hdr->frame_control)) { |
2102 | return 1; | 2109 | return 1; |
2103 | } | 2110 | } |
2104 | else if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { | 2111 | else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { |
2105 | if (!(rx->flags & IEEE80211_RX_IN_SCAN)) | 2112 | if (!(rx->flags & IEEE80211_RX_IN_SCAN)) |
2106 | return 0; | 2113 | return 0; |
2107 | rx->flags &= ~IEEE80211_RX_RA_MATCH; | 2114 | rx->flags &= ~IEEE80211_RX_RA_MATCH; |
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index f883ab9f1e6e..5030a3c87509 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c | |||
@@ -63,20 +63,15 @@ ieee80211_bss_info_update(struct ieee80211_local *local, | |||
63 | { | 63 | { |
64 | struct ieee80211_bss *bss; | 64 | struct ieee80211_bss *bss; |
65 | int clen; | 65 | int clen; |
66 | enum cfg80211_signal_type sigtype = CFG80211_SIGNAL_TYPE_NONE; | ||
67 | s32 signal = 0; | 66 | s32 signal = 0; |
68 | 67 | ||
69 | if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { | 68 | if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) |
70 | sigtype = CFG80211_SIGNAL_TYPE_MBM; | ||
71 | signal = rx_status->signal * 100; | 69 | signal = rx_status->signal * 100; |
72 | } else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) { | 70 | else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) |
73 | sigtype = CFG80211_SIGNAL_TYPE_UNSPEC; | ||
74 | signal = (rx_status->signal * 100) / local->hw.max_signal; | 71 | signal = (rx_status->signal * 100) / local->hw.max_signal; |
75 | } | ||
76 | 72 | ||
77 | bss = (void *)cfg80211_inform_bss_frame(local->hw.wiphy, channel, | 73 | bss = (void *)cfg80211_inform_bss_frame(local->hw.wiphy, channel, |
78 | mgmt, len, signal, sigtype, | 74 | mgmt, len, signal, GFP_ATOMIC); |
79 | GFP_ATOMIC); | ||
80 | 75 | ||
81 | if (!bss) | 76 | if (!bss) |
82 | return NULL; | 77 | return NULL; |
@@ -207,34 +202,16 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, | |||
207 | return RX_QUEUED; | 202 | return RX_QUEUED; |
208 | } | 203 | } |
209 | 204 | ||
210 | void ieee80211_send_nullfunc(struct ieee80211_local *local, | 205 | void ieee80211_scan_failed(struct ieee80211_local *local) |
211 | struct ieee80211_sub_if_data *sdata, | ||
212 | int powersave) | ||
213 | { | 206 | { |
214 | struct sk_buff *skb; | 207 | if (WARN_ON(!local->scan_req)) |
215 | struct ieee80211_hdr *nullfunc; | ||
216 | __le16 fc; | ||
217 | |||
218 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24); | ||
219 | if (!skb) { | ||
220 | printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc " | ||
221 | "frame\n", sdata->dev->name); | ||
222 | return; | 208 | return; |
223 | } | 209 | |
224 | skb_reserve(skb, local->hw.extra_tx_headroom); | 210 | /* notify cfg80211 about the failed scan */ |
225 | 211 | if (local->scan_req != &local->int_scan_req) | |
226 | nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24); | 212 | cfg80211_scan_done(local->scan_req, true); |
227 | memset(nullfunc, 0, 24); | 213 | |
228 | fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | | 214 | local->scan_req = NULL; |
229 | IEEE80211_FCTL_TODS); | ||
230 | if (powersave) | ||
231 | fc |= cpu_to_le16(IEEE80211_FCTL_PM); | ||
232 | nullfunc->frame_control = fc; | ||
233 | memcpy(nullfunc->addr1, sdata->u.sta.bssid, ETH_ALEN); | ||
234 | memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN); | ||
235 | memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN); | ||
236 | |||
237 | ieee80211_tx_skb(sdata, skb, 0); | ||
238 | } | 215 | } |
239 | 216 | ||
240 | void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) | 217 | void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) |
@@ -280,6 +257,9 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) | |||
280 | netif_addr_unlock(local->mdev); | 257 | netif_addr_unlock(local->mdev); |
281 | netif_tx_unlock_bh(local->mdev); | 258 | netif_tx_unlock_bh(local->mdev); |
282 | 259 | ||
260 | if (local->ops->sw_scan_complete) | ||
261 | local->ops->sw_scan_complete(local_to_hw(local)); | ||
262 | |||
283 | mutex_lock(&local->iflist_mtx); | 263 | mutex_lock(&local->iflist_mtx); |
284 | list_for_each_entry(sdata, &local->interfaces, list) { | 264 | list_for_each_entry(sdata, &local->interfaces, list) { |
285 | if (!netif_running(sdata->dev)) | 265 | if (!netif_running(sdata->dev)) |
@@ -287,7 +267,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) | |||
287 | 267 | ||
288 | /* Tell AP we're back */ | 268 | /* Tell AP we're back */ |
289 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { | 269 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { |
290 | if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { | 270 | if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) { |
291 | ieee80211_send_nullfunc(local, sdata, 0); | 271 | ieee80211_send_nullfunc(local, sdata, 0); |
292 | netif_tx_wake_all_queues(sdata->dev); | 272 | netif_tx_wake_all_queues(sdata->dev); |
293 | } | 273 | } |
@@ -305,6 +285,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) | |||
305 | 285 | ||
306 | done: | 286 | done: |
307 | ieee80211_mlme_notify_scan_completed(local); | 287 | ieee80211_mlme_notify_scan_completed(local); |
288 | ieee80211_ibss_notify_scan_completed(local); | ||
308 | ieee80211_mesh_notify_scan_completed(local); | 289 | ieee80211_mesh_notify_scan_completed(local); |
309 | } | 290 | } |
310 | EXPORT_SYMBOL(ieee80211_scan_completed); | 291 | EXPORT_SYMBOL(ieee80211_scan_completed); |
@@ -367,7 +348,8 @@ void ieee80211_scan_work(struct work_struct *work) | |||
367 | ieee80211_send_probe_req( | 348 | ieee80211_send_probe_req( |
368 | sdata, NULL, | 349 | sdata, NULL, |
369 | local->scan_req->ssids[i].ssid, | 350 | local->scan_req->ssids[i].ssid, |
370 | local->scan_req->ssids[i].ssid_len); | 351 | local->scan_req->ssids[i].ssid_len, |
352 | local->scan_req->ie, local->scan_req->ie_len); | ||
371 | next_delay = IEEE80211_CHANNEL_TIME; | 353 | next_delay = IEEE80211_CHANNEL_TIME; |
372 | break; | 354 | break; |
373 | } | 355 | } |
@@ -428,6 +410,8 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, | |||
428 | } | 410 | } |
429 | 411 | ||
430 | local->sw_scanning = true; | 412 | local->sw_scanning = true; |
413 | if (local->ops->sw_scan_start) | ||
414 | local->ops->sw_scan_start(local_to_hw(local)); | ||
431 | 415 | ||
432 | mutex_lock(&local->iflist_mtx); | 416 | mutex_lock(&local->iflist_mtx); |
433 | list_for_each_entry(sdata, &local->interfaces, list) { | 417 | list_for_each_entry(sdata, &local->interfaces, list) { |
@@ -442,7 +426,7 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, | |||
442 | IEEE80211_IFCC_BEACON_ENABLED); | 426 | IEEE80211_IFCC_BEACON_ENABLED); |
443 | 427 | ||
444 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { | 428 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { |
445 | if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { | 429 | if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) { |
446 | netif_tx_stop_all_queues(sdata->dev); | 430 | netif_tx_stop_all_queues(sdata->dev); |
447 | ieee80211_send_nullfunc(local, sdata, 1); | 431 | ieee80211_send_nullfunc(local, sdata, 1); |
448 | } | 432 | } |
@@ -477,7 +461,7 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, | |||
477 | struct cfg80211_scan_request *req) | 461 | struct cfg80211_scan_request *req) |
478 | { | 462 | { |
479 | struct ieee80211_local *local = sdata->local; | 463 | struct ieee80211_local *local = sdata->local; |
480 | struct ieee80211_if_sta *ifsta; | 464 | struct ieee80211_if_managed *ifmgd; |
481 | 465 | ||
482 | if (!req) | 466 | if (!req) |
483 | return -EINVAL; | 467 | return -EINVAL; |
@@ -502,9 +486,9 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, | |||
502 | return -EBUSY; | 486 | return -EBUSY; |
503 | } | 487 | } |
504 | 488 | ||
505 | ifsta = &sdata->u.sta; | 489 | ifmgd = &sdata->u.mgd; |
506 | set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request); | 490 | set_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request); |
507 | queue_work(local->hw.workqueue, &ifsta->work); | 491 | queue_work(local->hw.workqueue, &ifmgd->work); |
508 | 492 | ||
509 | return 0; | 493 | return 0; |
510 | } | 494 | } |
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 47bb2aed2813..5f7a2624ed74 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c | |||
@@ -88,16 +88,16 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, | |||
88 | void ieee80211_chswitch_work(struct work_struct *work) | 88 | void ieee80211_chswitch_work(struct work_struct *work) |
89 | { | 89 | { |
90 | struct ieee80211_sub_if_data *sdata = | 90 | struct ieee80211_sub_if_data *sdata = |
91 | container_of(work, struct ieee80211_sub_if_data, u.sta.chswitch_work); | 91 | container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work); |
92 | struct ieee80211_bss *bss; | 92 | struct ieee80211_bss *bss; |
93 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | 93 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
94 | 94 | ||
95 | if (!netif_running(sdata->dev)) | 95 | if (!netif_running(sdata->dev)) |
96 | return; | 96 | return; |
97 | 97 | ||
98 | bss = ieee80211_rx_bss_get(sdata->local, ifsta->bssid, | 98 | bss = ieee80211_rx_bss_get(sdata->local, ifmgd->bssid, |
99 | sdata->local->hw.conf.channel->center_freq, | 99 | sdata->local->hw.conf.channel->center_freq, |
100 | ifsta->ssid, ifsta->ssid_len); | 100 | ifmgd->ssid, ifmgd->ssid_len); |
101 | if (!bss) | 101 | if (!bss) |
102 | goto exit; | 102 | goto exit; |
103 | 103 | ||
@@ -108,7 +108,7 @@ void ieee80211_chswitch_work(struct work_struct *work) | |||
108 | 108 | ||
109 | ieee80211_rx_bss_put(sdata->local, bss); | 109 | ieee80211_rx_bss_put(sdata->local, bss); |
110 | exit: | 110 | exit: |
111 | ifsta->flags &= ~IEEE80211_STA_CSA_RECEIVED; | 111 | ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; |
112 | ieee80211_wake_queues_by_reason(&sdata->local->hw, | 112 | ieee80211_wake_queues_by_reason(&sdata->local->hw, |
113 | IEEE80211_QUEUE_STOP_REASON_CSA); | 113 | IEEE80211_QUEUE_STOP_REASON_CSA); |
114 | } | 114 | } |
@@ -117,9 +117,9 @@ void ieee80211_chswitch_timer(unsigned long data) | |||
117 | { | 117 | { |
118 | struct ieee80211_sub_if_data *sdata = | 118 | struct ieee80211_sub_if_data *sdata = |
119 | (struct ieee80211_sub_if_data *) data; | 119 | (struct ieee80211_sub_if_data *) data; |
120 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | 120 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
121 | 121 | ||
122 | queue_work(sdata->local->hw.workqueue, &ifsta->chswitch_work); | 122 | queue_work(sdata->local->hw.workqueue, &ifmgd->chswitch_work); |
123 | } | 123 | } |
124 | 124 | ||
125 | void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, | 125 | void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, |
@@ -127,14 +127,14 @@ void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, | |||
127 | struct ieee80211_bss *bss) | 127 | struct ieee80211_bss *bss) |
128 | { | 128 | { |
129 | struct ieee80211_channel *new_ch; | 129 | struct ieee80211_channel *new_ch; |
130 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | 130 | struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; |
131 | int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num); | 131 | int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num); |
132 | 132 | ||
133 | /* FIXME: Handle ADHOC later */ | 133 | /* FIXME: Handle ADHOC later */ |
134 | if (sdata->vif.type != NL80211_IFTYPE_STATION) | 134 | if (sdata->vif.type != NL80211_IFTYPE_STATION) |
135 | return; | 135 | return; |
136 | 136 | ||
137 | if (ifsta->state != IEEE80211_STA_MLME_ASSOCIATED) | 137 | if (ifmgd->state != IEEE80211_STA_MLME_ASSOCIATED) |
138 | return; | 138 | return; |
139 | 139 | ||
140 | if (sdata->local->sw_scanning || sdata->local->hw_scanning) | 140 | if (sdata->local->sw_scanning || sdata->local->hw_scanning) |
@@ -143,7 +143,7 @@ void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, | |||
143 | /* Disregard subsequent beacons if we are already running a timer | 143 | /* Disregard subsequent beacons if we are already running a timer |
144 | processing a CSA */ | 144 | processing a CSA */ |
145 | 145 | ||
146 | if (ifsta->flags & IEEE80211_STA_CSA_RECEIVED) | 146 | if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED) |
147 | return; | 147 | return; |
148 | 148 | ||
149 | new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); | 149 | new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); |
@@ -153,12 +153,12 @@ void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, | |||
153 | sdata->local->csa_channel = new_ch; | 153 | sdata->local->csa_channel = new_ch; |
154 | 154 | ||
155 | if (sw_elem->count <= 1) { | 155 | if (sw_elem->count <= 1) { |
156 | queue_work(sdata->local->hw.workqueue, &ifsta->chswitch_work); | 156 | queue_work(sdata->local->hw.workqueue, &ifmgd->chswitch_work); |
157 | } else { | 157 | } else { |
158 | ieee80211_stop_queues_by_reason(&sdata->local->hw, | 158 | ieee80211_stop_queues_by_reason(&sdata->local->hw, |
159 | IEEE80211_QUEUE_STOP_REASON_CSA); | 159 | IEEE80211_QUEUE_STOP_REASON_CSA); |
160 | ifsta->flags |= IEEE80211_STA_CSA_RECEIVED; | 160 | ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; |
161 | mod_timer(&ifsta->chswitch_timer, | 161 | mod_timer(&ifmgd->chswitch_timer, |
162 | jiffies + | 162 | jiffies + |
163 | msecs_to_jiffies(sw_elem->count * | 163 | msecs_to_jiffies(sw_elem->count * |
164 | bss->cbss.beacon_interval)); | 164 | bss->cbss.beacon_interval)); |
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 634f65c0130e..4ba3c540fcf3 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c | |||
@@ -202,6 +202,18 @@ void sta_info_destroy(struct sta_info *sta) | |||
202 | /* Make sure timer won't free the tid_rx struct, see below */ | 202 | /* Make sure timer won't free the tid_rx struct, see below */ |
203 | if (tid_rx) | 203 | if (tid_rx) |
204 | tid_rx->shutdown = true; | 204 | tid_rx->shutdown = true; |
205 | |||
206 | /* | ||
207 | * The stop callback cannot find this station any more, but | ||
208 | * it didn't complete its work -- start the queue if necessary | ||
209 | */ | ||
210 | if (sta->ampdu_mlme.tid_state_tx[i] & HT_AGG_STATE_INITIATOR_MSK && | ||
211 | sta->ampdu_mlme.tid_state_tx[i] & HT_AGG_STATE_REQ_STOP_BA_MSK && | ||
212 | local->hw.ampdu_queues) | ||
213 | ieee80211_wake_queue_by_reason(&local->hw, | ||
214 | local->hw.queues + sta->tid_to_tx_q[i], | ||
215 | IEEE80211_QUEUE_STOP_REASON_AGGREGATION); | ||
216 | |||
205 | spin_unlock_bh(&sta->lock); | 217 | spin_unlock_bh(&sta->lock); |
206 | 218 | ||
207 | /* | 219 | /* |
@@ -275,8 +287,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, | |||
275 | * enable session_timer's data differentiation. refer to | 287 | * enable session_timer's data differentiation. refer to |
276 | * sta_rx_agg_session_timer_expired for useage */ | 288 | * sta_rx_agg_session_timer_expired for useage */ |
277 | sta->timer_to_tid[i] = i; | 289 | sta->timer_to_tid[i] = i; |
278 | /* tid to tx queue: initialize according to HW (0 is valid) */ | 290 | sta->tid_to_tx_q[i] = -1; |
279 | sta->tid_to_tx_q[i] = ieee80211_num_queues(&local->hw); | ||
280 | /* rx */ | 291 | /* rx */ |
281 | sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE; | 292 | sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE; |
282 | sta->ampdu_mlme.tid_rx[i] = NULL; | 293 | sta->ampdu_mlme.tid_rx[i] = NULL; |
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index d9653231992f..1f45573c580c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h | |||
@@ -90,6 +90,7 @@ struct tid_ampdu_tx { | |||
90 | * @buf_size: buffer size for incoming A-MPDUs | 90 | * @buf_size: buffer size for incoming A-MPDUs |
91 | * @timeout: reset timer value (in TUs). | 91 | * @timeout: reset timer value (in TUs). |
92 | * @dialog_token: dialog token for aggregation session | 92 | * @dialog_token: dialog token for aggregation session |
93 | * @shutdown: this session is being shut down due to STA removal | ||
93 | */ | 94 | */ |
94 | struct tid_ampdu_rx { | 95 | struct tid_ampdu_rx { |
95 | struct sk_buff **reorder_buf; | 96 | struct sk_buff **reorder_buf; |
@@ -200,7 +201,7 @@ struct sta_ampdu_mlme { | |||
200 | * @tid_seq: per-TID sequence numbers for sending to this STA | 201 | * @tid_seq: per-TID sequence numbers for sending to this STA |
201 | * @ampdu_mlme: A-MPDU state machine state | 202 | * @ampdu_mlme: A-MPDU state machine state |
202 | * @timer_to_tid: identity mapping to ID timers | 203 | * @timer_to_tid: identity mapping to ID timers |
203 | * @tid_to_tx_q: map tid to tx queue | 204 | * @tid_to_tx_q: map tid to tx queue (invalid == negative values) |
204 | * @llid: Local link ID | 205 | * @llid: Local link ID |
205 | * @plid: Peer link ID | 206 | * @plid: Peer link ID |
206 | * @reason: Cancel reason on PLINK_HOLDING state | 207 | * @reason: Cancel reason on PLINK_HOLDING state |
@@ -275,7 +276,7 @@ struct sta_info { | |||
275 | */ | 276 | */ |
276 | struct sta_ampdu_mlme ampdu_mlme; | 277 | struct sta_ampdu_mlme ampdu_mlme; |
277 | u8 timer_to_tid[STA_TID_NUM]; | 278 | u8 timer_to_tid[STA_TID_NUM]; |
278 | u8 tid_to_tx_q[STA_TID_NUM]; | 279 | s8 tid_to_tx_q[STA_TID_NUM]; |
279 | 280 | ||
280 | #ifdef CONFIG_MAC80211_MESH | 281 | #ifdef CONFIG_MAC80211_MESH |
281 | /* | 282 | /* |
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 33926831c648..457238a2f3fc 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c | |||
@@ -784,6 +784,8 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) | |||
784 | skb_copy_queue_mapping(frag, first); | 784 | skb_copy_queue_mapping(frag, first); |
785 | 785 | ||
786 | frag->do_not_encrypt = first->do_not_encrypt; | 786 | frag->do_not_encrypt = first->do_not_encrypt; |
787 | frag->dev = first->dev; | ||
788 | frag->iif = first->iif; | ||
787 | 789 | ||
788 | pos += copylen; | 790 | pos += copylen; |
789 | left -= copylen; | 791 | left -= copylen; |
@@ -876,7 +878,6 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) | |||
876 | return TX_CONTINUE; | 878 | return TX_CONTINUE; |
877 | } | 879 | } |
878 | 880 | ||
879 | |||
880 | /* actual transmit path */ | 881 | /* actual transmit path */ |
881 | 882 | ||
882 | /* | 883 | /* |
@@ -1016,12 +1017,20 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, | |||
1016 | tx->sta = sta_info_get(local, hdr->addr1); | 1017 | tx->sta = sta_info_get(local, hdr->addr1); |
1017 | 1018 | ||
1018 | if (tx->sta && ieee80211_is_data_qos(hdr->frame_control)) { | 1019 | if (tx->sta && ieee80211_is_data_qos(hdr->frame_control)) { |
1020 | unsigned long flags; | ||
1019 | qc = ieee80211_get_qos_ctl(hdr); | 1021 | qc = ieee80211_get_qos_ctl(hdr); |
1020 | tid = *qc & IEEE80211_QOS_CTL_TID_MASK; | 1022 | tid = *qc & IEEE80211_QOS_CTL_TID_MASK; |
1021 | 1023 | ||
1024 | spin_lock_irqsave(&tx->sta->lock, flags); | ||
1022 | state = &tx->sta->ampdu_mlme.tid_state_tx[tid]; | 1025 | state = &tx->sta->ampdu_mlme.tid_state_tx[tid]; |
1023 | if (*state == HT_AGG_STATE_OPERATIONAL) | 1026 | if (*state == HT_AGG_STATE_OPERATIONAL) { |
1024 | info->flags |= IEEE80211_TX_CTL_AMPDU; | 1027 | info->flags |= IEEE80211_TX_CTL_AMPDU; |
1028 | if (local->hw.ampdu_queues) | ||
1029 | skb_set_queue_mapping( | ||
1030 | skb, tx->local->hw.queues + | ||
1031 | tx->sta->tid_to_tx_q[tid]); | ||
1032 | } | ||
1033 | spin_unlock_irqrestore(&tx->sta->lock, flags); | ||
1025 | } | 1034 | } |
1026 | 1035 | ||
1027 | if (is_multicast_ether_addr(hdr->addr1)) { | 1036 | if (is_multicast_ether_addr(hdr->addr1)) { |
@@ -1085,7 +1094,8 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, | |||
1085 | int ret, i; | 1094 | int ret, i; |
1086 | 1095 | ||
1087 | if (skb) { | 1096 | if (skb) { |
1088 | if (netif_subqueue_stopped(local->mdev, skb)) | 1097 | if (ieee80211_queue_stopped(&local->hw, |
1098 | skb_get_queue_mapping(skb))) | ||
1089 | return IEEE80211_TX_PENDING; | 1099 | return IEEE80211_TX_PENDING; |
1090 | 1100 | ||
1091 | ret = local->ops->tx(local_to_hw(local), skb); | 1101 | ret = local->ops->tx(local_to_hw(local), skb); |
@@ -1101,8 +1111,8 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, | |||
1101 | info = IEEE80211_SKB_CB(tx->extra_frag[i]); | 1111 | info = IEEE80211_SKB_CB(tx->extra_frag[i]); |
1102 | info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT | | 1112 | info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT | |
1103 | IEEE80211_TX_CTL_FIRST_FRAGMENT); | 1113 | IEEE80211_TX_CTL_FIRST_FRAGMENT); |
1104 | if (netif_subqueue_stopped(local->mdev, | 1114 | if (ieee80211_queue_stopped(&local->hw, |
1105 | tx->extra_frag[i])) | 1115 | skb_get_queue_mapping(tx->extra_frag[i]))) |
1106 | return IEEE80211_TX_FRAG_AGAIN; | 1116 | return IEEE80211_TX_FRAG_AGAIN; |
1107 | 1117 | ||
1108 | ret = local->ops->tx(local_to_hw(local), | 1118 | ret = local->ops->tx(local_to_hw(local), |
@@ -1625,7 +1635,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, | |||
1625 | case NL80211_IFTYPE_STATION: | 1635 | case NL80211_IFTYPE_STATION: |
1626 | fc |= cpu_to_le16(IEEE80211_FCTL_TODS); | 1636 | fc |= cpu_to_le16(IEEE80211_FCTL_TODS); |
1627 | /* BSSID SA DA */ | 1637 | /* BSSID SA DA */ |
1628 | memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); | 1638 | memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); |
1629 | memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); | 1639 | memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); |
1630 | memcpy(hdr.addr3, skb->data, ETH_ALEN); | 1640 | memcpy(hdr.addr3, skb->data, ETH_ALEN); |
1631 | hdrlen = 24; | 1641 | hdrlen = 24; |
@@ -1634,7 +1644,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, | |||
1634 | /* DA SA BSSID */ | 1644 | /* DA SA BSSID */ |
1635 | memcpy(hdr.addr1, skb->data, ETH_ALEN); | 1645 | memcpy(hdr.addr1, skb->data, ETH_ALEN); |
1636 | memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); | 1646 | memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); |
1637 | memcpy(hdr.addr3, sdata->u.sta.bssid, ETH_ALEN); | 1647 | memcpy(hdr.addr3, sdata->u.ibss.bssid, ETH_ALEN); |
1638 | hdrlen = 24; | 1648 | hdrlen = 24; |
1639 | break; | 1649 | break; |
1640 | default: | 1650 | default: |
@@ -1920,7 +1930,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, | |||
1920 | struct ieee80211_tx_info *info; | 1930 | struct ieee80211_tx_info *info; |
1921 | struct ieee80211_sub_if_data *sdata = NULL; | 1931 | struct ieee80211_sub_if_data *sdata = NULL; |
1922 | struct ieee80211_if_ap *ap = NULL; | 1932 | struct ieee80211_if_ap *ap = NULL; |
1923 | struct ieee80211_if_sta *ifsta = NULL; | ||
1924 | struct beacon_data *beacon; | 1933 | struct beacon_data *beacon; |
1925 | struct ieee80211_supported_band *sband; | 1934 | struct ieee80211_supported_band *sband; |
1926 | enum ieee80211_band band = local->hw.conf.channel->band; | 1935 | enum ieee80211_band band = local->hw.conf.channel->band; |
@@ -1972,13 +1981,13 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, | |||
1972 | } else | 1981 | } else |
1973 | goto out; | 1982 | goto out; |
1974 | } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { | 1983 | } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { |
1984 | struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; | ||
1975 | struct ieee80211_hdr *hdr; | 1985 | struct ieee80211_hdr *hdr; |
1976 | ifsta = &sdata->u.sta; | ||
1977 | 1986 | ||
1978 | if (!ifsta->probe_resp) | 1987 | if (!ifibss->probe_resp) |
1979 | goto out; | 1988 | goto out; |
1980 | 1989 | ||
1981 | skb = skb_copy(ifsta->probe_resp, GFP_ATOMIC); | 1990 | skb = skb_copy(ifibss->probe_resp, GFP_ATOMIC); |
1982 | if (!skb) | 1991 | if (!skb) |
1983 | goto out; | 1992 | goto out; |
1984 | 1993 | ||
diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 73c7d7345abd..e0431a1d218b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c | |||
@@ -344,15 +344,36 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, | |||
344 | { | 344 | { |
345 | struct ieee80211_local *local = hw_to_local(hw); | 345 | struct ieee80211_local *local = hw_to_local(hw); |
346 | 346 | ||
347 | /* we don't need to track ampdu queues */ | 347 | if (queue >= hw->queues) { |
348 | if (queue < ieee80211_num_regular_queues(hw)) { | 348 | if (local->ampdu_ac_queue[queue - hw->queues] < 0) |
349 | __clear_bit(reason, &local->queue_stop_reasons[queue]); | 349 | return; |
350 | |||
351 | /* | ||
352 | * for virtual aggregation queues, we need to refcount the | ||
353 | * internal mac80211 disable (multiple times!), keep track of | ||
354 | * driver disable _and_ make sure the regular queue is | ||
355 | * actually enabled. | ||
356 | */ | ||
357 | if (reason == IEEE80211_QUEUE_STOP_REASON_AGGREGATION) | ||
358 | local->amdpu_ac_stop_refcnt[queue - hw->queues]--; | ||
359 | else | ||
360 | __clear_bit(reason, &local->queue_stop_reasons[queue]); | ||
350 | 361 | ||
351 | if (local->queue_stop_reasons[queue] != 0) | 362 | if (local->queue_stop_reasons[queue] || |
352 | /* someone still has this queue stopped */ | 363 | local->amdpu_ac_stop_refcnt[queue - hw->queues]) |
353 | return; | 364 | return; |
365 | |||
366 | /* now go on to treat the corresponding regular queue */ | ||
367 | queue = local->ampdu_ac_queue[queue - hw->queues]; | ||
368 | reason = IEEE80211_QUEUE_STOP_REASON_AGGREGATION; | ||
354 | } | 369 | } |
355 | 370 | ||
371 | __clear_bit(reason, &local->queue_stop_reasons[queue]); | ||
372 | |||
373 | if (local->queue_stop_reasons[queue] != 0) | ||
374 | /* someone still has this queue stopped */ | ||
375 | return; | ||
376 | |||
356 | if (test_bit(queue, local->queues_pending)) { | 377 | if (test_bit(queue, local->queues_pending)) { |
357 | set_bit(queue, local->queues_pending_run); | 378 | set_bit(queue, local->queues_pending_run); |
358 | tasklet_schedule(&local->tx_pending_tasklet); | 379 | tasklet_schedule(&local->tx_pending_tasklet); |
@@ -361,8 +382,8 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, | |||
361 | } | 382 | } |
362 | } | 383 | } |
363 | 384 | ||
364 | static void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, | 385 | void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, |
365 | enum queue_stop_reason reason) | 386 | enum queue_stop_reason reason) |
366 | { | 387 | { |
367 | struct ieee80211_local *local = hw_to_local(hw); | 388 | struct ieee80211_local *local = hw_to_local(hw); |
368 | unsigned long flags; | 389 | unsigned long flags; |
@@ -384,15 +405,33 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, | |||
384 | { | 405 | { |
385 | struct ieee80211_local *local = hw_to_local(hw); | 406 | struct ieee80211_local *local = hw_to_local(hw); |
386 | 407 | ||
387 | /* we don't need to track ampdu queues */ | 408 | if (queue >= hw->queues) { |
388 | if (queue < ieee80211_num_regular_queues(hw)) | 409 | if (local->ampdu_ac_queue[queue - hw->queues] < 0) |
389 | __set_bit(reason, &local->queue_stop_reasons[queue]); | 410 | return; |
411 | |||
412 | /* | ||
413 | * for virtual aggregation queues, we need to refcount the | ||
414 | * internal mac80211 disable (multiple times!), keep track of | ||
415 | * driver disable _and_ make sure the regular queue is | ||
416 | * actually enabled. | ||
417 | */ | ||
418 | if (reason == IEEE80211_QUEUE_STOP_REASON_AGGREGATION) | ||
419 | local->amdpu_ac_stop_refcnt[queue - hw->queues]++; | ||
420 | else | ||
421 | __set_bit(reason, &local->queue_stop_reasons[queue]); | ||
422 | |||
423 | /* now go on to treat the corresponding regular queue */ | ||
424 | queue = local->ampdu_ac_queue[queue - hw->queues]; | ||
425 | reason = IEEE80211_QUEUE_STOP_REASON_AGGREGATION; | ||
426 | } | ||
427 | |||
428 | __set_bit(reason, &local->queue_stop_reasons[queue]); | ||
390 | 429 | ||
391 | netif_stop_subqueue(local->mdev, queue); | 430 | netif_stop_subqueue(local->mdev, queue); |
392 | } | 431 | } |
393 | 432 | ||
394 | static void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, | 433 | void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, |
395 | enum queue_stop_reason reason) | 434 | enum queue_stop_reason reason) |
396 | { | 435 | { |
397 | struct ieee80211_local *local = hw_to_local(hw); | 436 | struct ieee80211_local *local = hw_to_local(hw); |
398 | unsigned long flags; | 437 | unsigned long flags; |
@@ -418,7 +457,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, | |||
418 | 457 | ||
419 | spin_lock_irqsave(&local->queue_stop_reason_lock, flags); | 458 | spin_lock_irqsave(&local->queue_stop_reason_lock, flags); |
420 | 459 | ||
421 | for (i = 0; i < ieee80211_num_queues(hw); i++) | 460 | for (i = 0; i < hw->queues; i++) |
422 | __ieee80211_stop_queue(hw, i, reason); | 461 | __ieee80211_stop_queue(hw, i, reason); |
423 | 462 | ||
424 | spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); | 463 | spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); |
@@ -434,6 +473,16 @@ EXPORT_SYMBOL(ieee80211_stop_queues); | |||
434 | int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue) | 473 | int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue) |
435 | { | 474 | { |
436 | struct ieee80211_local *local = hw_to_local(hw); | 475 | struct ieee80211_local *local = hw_to_local(hw); |
476 | unsigned long flags; | ||
477 | |||
478 | if (queue >= hw->queues) { | ||
479 | spin_lock_irqsave(&local->queue_stop_reason_lock, flags); | ||
480 | queue = local->ampdu_ac_queue[queue - hw->queues]; | ||
481 | spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); | ||
482 | if (queue < 0) | ||
483 | return true; | ||
484 | } | ||
485 | |||
437 | return __netif_subqueue_stopped(local->mdev, queue); | 486 | return __netif_subqueue_stopped(local->mdev, queue); |
438 | } | 487 | } |
439 | EXPORT_SYMBOL(ieee80211_queue_stopped); | 488 | EXPORT_SYMBOL(ieee80211_queue_stopped); |
@@ -701,6 +750,27 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata) | |||
701 | local->ops->conf_tx(local_to_hw(local), i, &qparam); | 750 | local->ops->conf_tx(local_to_hw(local), i, &qparam); |
702 | } | 751 | } |
703 | 752 | ||
753 | void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, | ||
754 | const size_t supp_rates_len, | ||
755 | const u8 *supp_rates) | ||
756 | { | ||
757 | struct ieee80211_local *local = sdata->local; | ||
758 | int i, have_higher_than_11mbit = 0; | ||
759 | |||
760 | /* cf. IEEE 802.11 9.2.12 */ | ||
761 | for (i = 0; i < supp_rates_len; i++) | ||
762 | if ((supp_rates[i] & 0x7f) * 5 > 110) | ||
763 | have_higher_than_11mbit = 1; | ||
764 | |||
765 | if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && | ||
766 | have_higher_than_11mbit) | ||
767 | sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; | ||
768 | else | ||
769 | sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; | ||
770 | |||
771 | ieee80211_set_wmm_default(sdata); | ||
772 | } | ||
773 | |||
704 | void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, | 774 | void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, |
705 | int encrypt) | 775 | int encrypt) |
706 | { | 776 | { |
@@ -767,3 +837,161 @@ u32 ieee80211_mandatory_rates(struct ieee80211_local *local, | |||
767 | mandatory_rates |= BIT(i); | 837 | mandatory_rates |= BIT(i); |
768 | return mandatory_rates; | 838 | return mandatory_rates; |
769 | } | 839 | } |
840 | |||
841 | void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, | ||
842 | u16 transaction, u16 auth_alg, | ||
843 | u8 *extra, size_t extra_len, | ||
844 | const u8 *bssid, int encrypt) | ||
845 | { | ||
846 | struct ieee80211_local *local = sdata->local; | ||
847 | struct sk_buff *skb; | ||
848 | struct ieee80211_mgmt *mgmt; | ||
849 | const u8 *ie_auth = NULL; | ||
850 | int ie_auth_len = 0; | ||
851 | |||
852 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { | ||
853 | ie_auth_len = sdata->u.mgd.ie_auth_len; | ||
854 | ie_auth = sdata->u.mgd.ie_auth; | ||
855 | } | ||
856 | |||
857 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + | ||
858 | sizeof(*mgmt) + 6 + extra_len + ie_auth_len); | ||
859 | if (!skb) { | ||
860 | printk(KERN_DEBUG "%s: failed to allocate buffer for auth " | ||
861 | "frame\n", sdata->dev->name); | ||
862 | return; | ||
863 | } | ||
864 | skb_reserve(skb, local->hw.extra_tx_headroom); | ||
865 | |||
866 | mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6); | ||
867 | memset(mgmt, 0, 24 + 6); | ||
868 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | | ||
869 | IEEE80211_STYPE_AUTH); | ||
870 | if (encrypt) | ||
871 | mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); | ||
872 | memcpy(mgmt->da, bssid, ETH_ALEN); | ||
873 | memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); | ||
874 | memcpy(mgmt->bssid, bssid, ETH_ALEN); | ||
875 | mgmt->u.auth.auth_alg = cpu_to_le16(auth_alg); | ||
876 | mgmt->u.auth.auth_transaction = cpu_to_le16(transaction); | ||
877 | mgmt->u.auth.status_code = cpu_to_le16(0); | ||
878 | if (extra) | ||
879 | memcpy(skb_put(skb, extra_len), extra, extra_len); | ||
880 | if (ie_auth) | ||
881 | memcpy(skb_put(skb, ie_auth_len), ie_auth, ie_auth_len); | ||
882 | |||
883 | ieee80211_tx_skb(sdata, skb, encrypt); | ||
884 | } | ||
885 | |||
886 | void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, | ||
887 | u8 *ssid, size_t ssid_len, | ||
888 | u8 *ie, size_t ie_len) | ||
889 | { | ||
890 | struct ieee80211_local *local = sdata->local; | ||
891 | struct ieee80211_supported_band *sband; | ||
892 | struct sk_buff *skb; | ||
893 | struct ieee80211_mgmt *mgmt; | ||
894 | u8 *pos, *supp_rates, *esupp_rates = NULL, *extra_preq_ie = NULL; | ||
895 | int i, extra_preq_ie_len = 0; | ||
896 | |||
897 | switch (sdata->vif.type) { | ||
898 | case NL80211_IFTYPE_STATION: | ||
899 | extra_preq_ie_len = sdata->u.mgd.ie_probereq_len; | ||
900 | extra_preq_ie = sdata->u.mgd.ie_probereq; | ||
901 | break; | ||
902 | default: | ||
903 | break; | ||
904 | } | ||
905 | |||
906 | skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 + | ||
907 | ie_len + extra_preq_ie_len); | ||
908 | if (!skb) { | ||
909 | printk(KERN_DEBUG "%s: failed to allocate buffer for probe " | ||
910 | "request\n", sdata->dev->name); | ||
911 | return; | ||
912 | } | ||
913 | skb_reserve(skb, local->hw.extra_tx_headroom); | ||
914 | |||
915 | mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); | ||
916 | memset(mgmt, 0, 24); | ||
917 | mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | | ||
918 | IEEE80211_STYPE_PROBE_REQ); | ||
919 | memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); | ||
920 | if (dst) { | ||
921 | memcpy(mgmt->da, dst, ETH_ALEN); | ||
922 | memcpy(mgmt->bssid, dst, ETH_ALEN); | ||
923 | } else { | ||
924 | memset(mgmt->da, 0xff, ETH_ALEN); | ||
925 | memset(mgmt->bssid, 0xff, ETH_ALEN); | ||
926 | } | ||
927 | pos = skb_put(skb, 2 + ssid_len); | ||
928 | *pos++ = WLAN_EID_SSID; | ||
929 | *pos++ = ssid_len; | ||
930 | memcpy(pos, ssid, ssid_len); | ||
931 | |||
932 | supp_rates = skb_put(skb, 2); | ||
933 | supp_rates[0] = WLAN_EID_SUPP_RATES; | ||
934 | supp_rates[1] = 0; | ||
935 | sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; | ||
936 | |||
937 | for (i = 0; i < sband->n_bitrates; i++) { | ||
938 | struct ieee80211_rate *rate = &sband->bitrates[i]; | ||
939 | if (esupp_rates) { | ||
940 | pos = skb_put(skb, 1); | ||
941 | esupp_rates[1]++; | ||
942 | } else if (supp_rates[1] == 8) { | ||
943 | esupp_rates = skb_put(skb, 3); | ||
944 | esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES; | ||
945 | esupp_rates[1] = 1; | ||
946 | pos = &esupp_rates[2]; | ||
947 | } else { | ||
948 | pos = skb_put(skb, 1); | ||
949 | supp_rates[1]++; | ||
950 | } | ||
951 | *pos = rate->bitrate / 5; | ||
952 | } | ||
953 | |||
954 | if (ie) | ||
955 | memcpy(skb_put(skb, ie_len), ie, ie_len); | ||
956 | if (extra_preq_ie) | ||
957 | memcpy(skb_put(skb, extra_preq_ie_len), extra_preq_ie, | ||
958 | extra_preq_ie_len); | ||
959 | |||
960 | ieee80211_tx_skb(sdata, skb, 0); | ||
961 | } | ||
962 | |||
963 | u32 ieee80211_sta_get_rates(struct ieee80211_local *local, | ||
964 | struct ieee802_11_elems *elems, | ||
965 | enum ieee80211_band band) | ||
966 | { | ||
967 | struct ieee80211_supported_band *sband; | ||
968 | struct ieee80211_rate *bitrates; | ||
969 | size_t num_rates; | ||
970 | u32 supp_rates; | ||
971 | int i, j; | ||
972 | sband = local->hw.wiphy->bands[band]; | ||
973 | |||
974 | if (!sband) { | ||
975 | WARN_ON(1); | ||
976 | sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; | ||
977 | } | ||
978 | |||
979 | bitrates = sband->bitrates; | ||
980 | num_rates = sband->n_bitrates; | ||
981 | supp_rates = 0; | ||
982 | for (i = 0; i < elems->supp_rates_len + | ||
983 | elems->ext_supp_rates_len; i++) { | ||
984 | u8 rate = 0; | ||
985 | int own_rate; | ||
986 | if (i < elems->supp_rates_len) | ||
987 | rate = elems->supp_rates[i]; | ||
988 | else if (elems->ext_supp_rates) | ||
989 | rate = elems->ext_supp_rates | ||
990 | [i - elems->supp_rates_len]; | ||
991 | own_rate = 5 * (rate & 0x7f); | ||
992 | for (j = 0; j < num_rates; j++) | ||
993 | if (bitrates[j].bitrate == own_rate) | ||
994 | supp_rates |= BIT(j); | ||
995 | } | ||
996 | return supp_rates; | ||
997 | } | ||
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 2b023dce8b24..935c63ed3dfa 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c | |||
@@ -132,139 +132,37 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, | |||
132 | if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) | 132 | if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) |
133 | return -EOPNOTSUPP; | 133 | return -EOPNOTSUPP; |
134 | 134 | ||
135 | if (sdata->vif.type == NL80211_IFTYPE_STATION || | 135 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { |
136 | sdata->vif.type == NL80211_IFTYPE_ADHOC) { | ||
137 | int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length); | 136 | int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length); |
138 | if (ret) | 137 | if (ret) |
139 | return ret; | 138 | return ret; |
140 | sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; | 139 | sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; |
141 | ieee80211_sta_req_auth(sdata, &sdata->u.sta); | 140 | ieee80211_sta_req_auth(sdata); |
142 | return 0; | 141 | return 0; |
143 | } | 142 | } |
144 | 143 | ||
145 | return -EOPNOTSUPP; | 144 | return -EOPNOTSUPP; |
146 | } | 145 | } |
147 | 146 | ||
148 | static u8 ieee80211_get_wstats_flags(struct ieee80211_local *local) | ||
149 | { | ||
150 | u8 wstats_flags = 0; | ||
151 | |||
152 | wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC | | ||
153 | IEEE80211_HW_SIGNAL_DBM) ? | ||
154 | IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID; | ||
155 | wstats_flags |= local->hw.flags & IEEE80211_HW_NOISE_DBM ? | ||
156 | IW_QUAL_NOISE_UPDATED : IW_QUAL_NOISE_INVALID; | ||
157 | if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) | ||
158 | wstats_flags |= IW_QUAL_DBM; | ||
159 | |||
160 | return wstats_flags; | ||
161 | } | ||
162 | |||
163 | static int ieee80211_ioctl_giwrange(struct net_device *dev, | ||
164 | struct iw_request_info *info, | ||
165 | struct iw_point *data, char *extra) | ||
166 | { | ||
167 | struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); | ||
168 | struct iw_range *range = (struct iw_range *) extra; | ||
169 | enum ieee80211_band band; | ||
170 | int c = 0; | ||
171 | |||
172 | data->length = sizeof(struct iw_range); | ||
173 | memset(range, 0, sizeof(struct iw_range)); | ||
174 | |||
175 | range->we_version_compiled = WIRELESS_EXT; | ||
176 | range->we_version_source = 21; | ||
177 | range->retry_capa = IW_RETRY_LIMIT; | ||
178 | range->retry_flags = IW_RETRY_LIMIT; | ||
179 | range->min_retry = 0; | ||
180 | range->max_retry = 255; | ||
181 | range->min_rts = 0; | ||
182 | range->max_rts = 2347; | ||
183 | range->min_frag = 256; | ||
184 | range->max_frag = 2346; | ||
185 | |||
186 | range->encoding_size[0] = 5; | ||
187 | range->encoding_size[1] = 13; | ||
188 | range->num_encoding_sizes = 2; | ||
189 | range->max_encoding_tokens = NUM_DEFAULT_KEYS; | ||
190 | |||
191 | /* cfg80211 requires this, and enforces 0..100 */ | ||
192 | if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) | ||
193 | range->max_qual.level = 100; | ||
194 | else if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) | ||
195 | range->max_qual.level = -110; | ||
196 | else | ||
197 | range->max_qual.level = 0; | ||
198 | |||
199 | if (local->hw.flags & IEEE80211_HW_NOISE_DBM) | ||
200 | range->max_qual.noise = -110; | ||
201 | else | ||
202 | range->max_qual.noise = 0; | ||
203 | |||
204 | range->max_qual.qual = 100; | ||
205 | range->max_qual.updated = ieee80211_get_wstats_flags(local); | ||
206 | |||
207 | range->avg_qual.qual = 50; | ||
208 | /* not always true but better than nothing */ | ||
209 | range->avg_qual.level = range->max_qual.level / 2; | ||
210 | range->avg_qual.noise = range->max_qual.noise / 2; | ||
211 | range->avg_qual.updated = ieee80211_get_wstats_flags(local); | ||
212 | |||
213 | range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 | | ||
214 | IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP; | ||
215 | |||
216 | |||
217 | for (band = 0; band < IEEE80211_NUM_BANDS; band ++) { | ||
218 | int i; | ||
219 | struct ieee80211_supported_band *sband; | ||
220 | |||
221 | sband = local->hw.wiphy->bands[band]; | ||
222 | |||
223 | if (!sband) | ||
224 | continue; | ||
225 | |||
226 | for (i = 0; i < sband->n_channels && c < IW_MAX_FREQUENCIES; i++) { | ||
227 | struct ieee80211_channel *chan = &sband->channels[i]; | ||
228 | |||
229 | if (!(chan->flags & IEEE80211_CHAN_DISABLED)) { | ||
230 | range->freq[c].i = | ||
231 | ieee80211_frequency_to_channel( | ||
232 | chan->center_freq); | ||
233 | range->freq[c].m = chan->center_freq; | ||
234 | range->freq[c].e = 6; | ||
235 | c++; | ||
236 | } | ||
237 | } | ||
238 | } | ||
239 | range->num_channels = c; | ||
240 | range->num_frequency = c; | ||
241 | |||
242 | IW_EVENT_CAPA_SET_KERNEL(range->event_capa); | ||
243 | IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); | ||
244 | IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); | ||
245 | |||
246 | range->scan_capa |= IW_SCAN_CAPA_ESSID; | ||
247 | |||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | |||
252 | static int ieee80211_ioctl_siwfreq(struct net_device *dev, | 147 | static int ieee80211_ioctl_siwfreq(struct net_device *dev, |
253 | struct iw_request_info *info, | 148 | struct iw_request_info *info, |
254 | struct iw_freq *freq, char *extra) | 149 | struct iw_freq *freq, char *extra) |
255 | { | 150 | { |
256 | struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); | 151 | struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); |
257 | 152 | ||
258 | if (sdata->vif.type == NL80211_IFTYPE_ADHOC || | 153 | if (sdata->vif.type == NL80211_IFTYPE_ADHOC) |
259 | sdata->vif.type == NL80211_IFTYPE_STATION) | 154 | sdata->u.ibss.flags &= ~IEEE80211_IBSS_AUTO_CHANNEL_SEL; |
260 | sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; | 155 | else if (sdata->vif.type == NL80211_IFTYPE_STATION) |
156 | sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; | ||
261 | 157 | ||
262 | /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ | 158 | /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ |
263 | if (freq->e == 0) { | 159 | if (freq->e == 0) { |
264 | if (freq->m < 0) { | 160 | if (freq->m < 0) { |
265 | if (sdata->vif.type == NL80211_IFTYPE_ADHOC || | 161 | if (sdata->vif.type == NL80211_IFTYPE_ADHOC) |
266 | sdata->vif.type == NL80211_IFTYPE_STATION) | 162 | sdata->u.ibss.flags |= |
267 | sdata->u.sta.flags |= | 163 | IEEE80211_IBSS_AUTO_CHANNEL_SEL; |
164 | else if (sdata->vif.type == NL80211_IFTYPE_STATION) | ||
165 | sdata->u.mgd.flags |= | ||
268 | IEEE80211_STA_AUTO_CHANNEL_SEL; | 166 | IEEE80211_STA_AUTO_CHANNEL_SEL; |
269 | return 0; | 167 | return 0; |
270 | } else | 168 | } else |
@@ -301,32 +199,35 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, | |||
301 | { | 199 | { |
302 | struct ieee80211_sub_if_data *sdata; | 200 | struct ieee80211_sub_if_data *sdata; |
303 | size_t len = data->length; | 201 | size_t len = data->length; |
202 | int ret; | ||
304 | 203 | ||
305 | /* iwconfig uses nul termination in SSID.. */ | 204 | /* iwconfig uses nul termination in SSID.. */ |
306 | if (len > 0 && ssid[len - 1] == '\0') | 205 | if (len > 0 && ssid[len - 1] == '\0') |
307 | len--; | 206 | len--; |
308 | 207 | ||
309 | sdata = IEEE80211_DEV_TO_SUB_IF(dev); | 208 | sdata = IEEE80211_DEV_TO_SUB_IF(dev); |
310 | if (sdata->vif.type == NL80211_IFTYPE_STATION || | 209 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { |
311 | sdata->vif.type == NL80211_IFTYPE_ADHOC) { | ||
312 | int ret; | ||
313 | if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { | 210 | if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { |
314 | if (len > IEEE80211_MAX_SSID_LEN) | 211 | if (len > IEEE80211_MAX_SSID_LEN) |
315 | return -EINVAL; | 212 | return -EINVAL; |
316 | memcpy(sdata->u.sta.ssid, ssid, len); | 213 | memcpy(sdata->u.mgd.ssid, ssid, len); |
317 | sdata->u.sta.ssid_len = len; | 214 | sdata->u.mgd.ssid_len = len; |
318 | return 0; | 215 | return 0; |
319 | } | 216 | } |
217 | |||
320 | if (data->flags) | 218 | if (data->flags) |
321 | sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; | 219 | sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; |
322 | else | 220 | else |
323 | sdata->u.sta.flags |= IEEE80211_STA_AUTO_SSID_SEL; | 221 | sdata->u.mgd.flags |= IEEE80211_STA_AUTO_SSID_SEL; |
222 | |||
324 | ret = ieee80211_sta_set_ssid(sdata, ssid, len); | 223 | ret = ieee80211_sta_set_ssid(sdata, ssid, len); |
325 | if (ret) | 224 | if (ret) |
326 | return ret; | 225 | return ret; |
327 | ieee80211_sta_req_auth(sdata, &sdata->u.sta); | 226 | |
227 | ieee80211_sta_req_auth(sdata); | ||
328 | return 0; | 228 | return 0; |
329 | } | 229 | } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) |
230 | return ieee80211_ibss_set_ssid(sdata, ssid, len); | ||
330 | 231 | ||
331 | return -EOPNOTSUPP; | 232 | return -EOPNOTSUPP; |
332 | } | 233 | } |
@@ -340,8 +241,7 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev, | |||
340 | 241 | ||
341 | struct ieee80211_sub_if_data *sdata; | 242 | struct ieee80211_sub_if_data *sdata; |
342 | sdata = IEEE80211_DEV_TO_SUB_IF(dev); | 243 | sdata = IEEE80211_DEV_TO_SUB_IF(dev); |
343 | if (sdata->vif.type == NL80211_IFTYPE_STATION || | 244 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { |
344 | sdata->vif.type == NL80211_IFTYPE_ADHOC) { | ||
345 | int res = ieee80211_sta_get_ssid(sdata, ssid, &len); | 245 | int res = ieee80211_sta_get_ssid(sdata, ssid, &len); |
346 | if (res == 0) { | 246 | if (res == 0) { |
347 | data->length = len; | 247 | data->length = len; |
@@ -349,6 +249,14 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev, | |||
349 | } else | 249 | } else |
350 | data->flags = 0; | 250 | data->flags = 0; |
351 | return res; | 251 | return res; |
252 | } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { | ||
253 | int res = ieee80211_ibss_get_ssid(sdata, ssid, &len); | ||
254 | if (res == 0) { | ||
255 | data->length = len; | ||
256 | data->flags = 1; | ||
257 | } else | ||
258 | data->flags = 0; | ||
259 | return res; | ||
352 | } | 260 | } |
353 | 261 | ||
354 | return -EOPNOTSUPP; | 262 | return -EOPNOTSUPP; |
@@ -362,26 +270,35 @@ static int ieee80211_ioctl_siwap(struct net_device *dev, | |||
362 | struct ieee80211_sub_if_data *sdata; | 270 | struct ieee80211_sub_if_data *sdata; |
363 | 271 | ||
364 | sdata = IEEE80211_DEV_TO_SUB_IF(dev); | 272 | sdata = IEEE80211_DEV_TO_SUB_IF(dev); |
365 | if (sdata->vif.type == NL80211_IFTYPE_STATION || | 273 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { |
366 | sdata->vif.type == NL80211_IFTYPE_ADHOC) { | ||
367 | int ret; | 274 | int ret; |
368 | if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { | 275 | if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { |
369 | memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data, | 276 | memcpy(sdata->u.mgd.bssid, (u8 *) &ap_addr->sa_data, |
370 | ETH_ALEN); | 277 | ETH_ALEN); |
371 | return 0; | 278 | return 0; |
372 | } | 279 | } |
373 | if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) | 280 | if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) |
374 | sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL | | 281 | sdata->u.mgd.flags |= IEEE80211_STA_AUTO_BSSID_SEL | |
375 | IEEE80211_STA_AUTO_CHANNEL_SEL; | 282 | IEEE80211_STA_AUTO_CHANNEL_SEL; |
376 | else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data)) | 283 | else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data)) |
377 | sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL; | 284 | sdata->u.mgd.flags |= IEEE80211_STA_AUTO_BSSID_SEL; |
378 | else | 285 | else |
379 | sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; | 286 | sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; |
380 | ret = ieee80211_sta_set_bssid(sdata, (u8 *) &ap_addr->sa_data); | 287 | ret = ieee80211_sta_set_bssid(sdata, (u8 *) &ap_addr->sa_data); |
381 | if (ret) | 288 | if (ret) |
382 | return ret; | 289 | return ret; |
383 | ieee80211_sta_req_auth(sdata, &sdata->u.sta); | 290 | ieee80211_sta_req_auth(sdata); |
384 | return 0; | 291 | return 0; |
292 | } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { | ||
293 | if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) | ||
294 | sdata->u.ibss.flags |= IEEE80211_IBSS_AUTO_BSSID_SEL | | ||
295 | IEEE80211_IBSS_AUTO_CHANNEL_SEL; | ||
296 | else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data)) | ||
297 | sdata->u.ibss.flags |= IEEE80211_IBSS_AUTO_BSSID_SEL; | ||
298 | else | ||
299 | sdata->u.ibss.flags &= ~IEEE80211_IBSS_AUTO_BSSID_SEL; | ||
300 | |||
301 | return ieee80211_ibss_set_bssid(sdata, (u8 *) &ap_addr->sa_data); | ||
385 | } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { | 302 | } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { |
386 | /* | 303 | /* |
387 | * If it is necessary to update the WDS peer address | 304 | * If it is necessary to update the WDS peer address |
@@ -410,17 +327,20 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, | |||
410 | struct ieee80211_sub_if_data *sdata; | 327 | struct ieee80211_sub_if_data *sdata; |
411 | 328 | ||
412 | sdata = IEEE80211_DEV_TO_SUB_IF(dev); | 329 | sdata = IEEE80211_DEV_TO_SUB_IF(dev); |
413 | if (sdata->vif.type == NL80211_IFTYPE_STATION || | 330 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { |
414 | sdata->vif.type == NL80211_IFTYPE_ADHOC) { | 331 | if (sdata->u.mgd.state == IEEE80211_STA_MLME_ASSOCIATED) { |
415 | if (sdata->u.sta.state == IEEE80211_STA_MLME_ASSOCIATED || | ||
416 | sdata->u.sta.state == IEEE80211_STA_MLME_IBSS_JOINED) { | ||
417 | ap_addr->sa_family = ARPHRD_ETHER; | 332 | ap_addr->sa_family = ARPHRD_ETHER; |
418 | memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); | 333 | memcpy(&ap_addr->sa_data, sdata->u.mgd.bssid, ETH_ALEN); |
419 | return 0; | 334 | } else |
420 | } else { | ||
421 | memset(&ap_addr->sa_data, 0, ETH_ALEN); | 335 | memset(&ap_addr->sa_data, 0, ETH_ALEN); |
422 | return 0; | 336 | return 0; |
423 | } | 337 | } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { |
338 | if (sdata->u.ibss.state == IEEE80211_IBSS_MLME_JOINED) { | ||
339 | ap_addr->sa_family = ARPHRD_ETHER; | ||
340 | memcpy(&ap_addr->sa_data, sdata->u.ibss.bssid, ETH_ALEN); | ||
341 | } else | ||
342 | memset(&ap_addr->sa_data, 0, ETH_ALEN); | ||
343 | return 0; | ||
424 | } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { | 344 | } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { |
425 | ap_addr->sa_family = ARPHRD_ETHER; | 345 | ap_addr->sa_family = ARPHRD_ETHER; |
426 | memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); | 346 | memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); |
@@ -486,7 +406,7 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev, | |||
486 | 406 | ||
487 | rcu_read_lock(); | 407 | rcu_read_lock(); |
488 | 408 | ||
489 | sta = sta_info_get(local, sdata->u.sta.bssid); | 409 | sta = sta_info_get(local, sdata->u.mgd.bssid); |
490 | 410 | ||
491 | if (sta && !(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) | 411 | if (sta && !(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) |
492 | rate->value = sband->bitrates[sta->last_tx_rate.idx].bitrate; | 412 | rate->value = sband->bitrates[sta->last_tx_rate.idx].bitrate; |
@@ -687,8 +607,7 @@ static int ieee80211_ioctl_siwmlme(struct net_device *dev, | |||
687 | struct iw_mlme *mlme = (struct iw_mlme *) extra; | 607 | struct iw_mlme *mlme = (struct iw_mlme *) extra; |
688 | 608 | ||
689 | sdata = IEEE80211_DEV_TO_SUB_IF(dev); | 609 | sdata = IEEE80211_DEV_TO_SUB_IF(dev); |
690 | if (sdata->vif.type != NL80211_IFTYPE_STATION && | 610 | if (!(sdata->vif.type == NL80211_IFTYPE_STATION)) |
691 | sdata->vif.type != NL80211_IFTYPE_ADHOC) | ||
692 | return -EINVAL; | 611 | return -EINVAL; |
693 | 612 | ||
694 | switch (mlme->cmd) { | 613 | switch (mlme->cmd) { |
@@ -784,8 +703,7 @@ static int ieee80211_ioctl_giwencode(struct net_device *dev, | |||
784 | erq->flags |= IW_ENCODE_ENABLED; | 703 | erq->flags |= IW_ENCODE_ENABLED; |
785 | 704 | ||
786 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { | 705 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { |
787 | struct ieee80211_if_sta *ifsta = &sdata->u.sta; | 706 | switch (sdata->u.mgd.auth_alg) { |
788 | switch (ifsta->auth_alg) { | ||
789 | case WLAN_AUTH_OPEN: | 707 | case WLAN_AUTH_OPEN: |
790 | case WLAN_AUTH_LEAP: | 708 | case WLAN_AUTH_LEAP: |
791 | erq->flags |= IW_ENCODE_OPEN; | 709 | erq->flags |= IW_ENCODE_OPEN; |
@@ -849,7 +767,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, | |||
849 | ret = ieee80211_hw_config(local, | 767 | ret = ieee80211_hw_config(local, |
850 | IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT); | 768 | IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT); |
851 | 769 | ||
852 | if (!(sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)) | 770 | if (!(sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED)) |
853 | return ret; | 771 | return ret; |
854 | 772 | ||
855 | if (conf->dynamic_ps_timeout > 0 && | 773 | if (conf->dynamic_ps_timeout > 0 && |
@@ -908,10 +826,10 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, | |||
908 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { | 826 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { |
909 | if (data->value & (IW_AUTH_CIPHER_WEP40 | | 827 | if (data->value & (IW_AUTH_CIPHER_WEP40 | |
910 | IW_AUTH_CIPHER_WEP104 | IW_AUTH_CIPHER_TKIP)) | 828 | IW_AUTH_CIPHER_WEP104 | IW_AUTH_CIPHER_TKIP)) |
911 | sdata->u.sta.flags |= | 829 | sdata->u.mgd.flags |= |
912 | IEEE80211_STA_TKIP_WEP_USED; | 830 | IEEE80211_STA_TKIP_WEP_USED; |
913 | else | 831 | else |
914 | sdata->u.sta.flags &= | 832 | sdata->u.mgd.flags &= |
915 | ~IEEE80211_STA_TKIP_WEP_USED; | 833 | ~IEEE80211_STA_TKIP_WEP_USED; |
916 | } | 834 | } |
917 | break; | 835 | break; |
@@ -922,21 +840,20 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, | |||
922 | if (sdata->vif.type != NL80211_IFTYPE_STATION) | 840 | if (sdata->vif.type != NL80211_IFTYPE_STATION) |
923 | ret = -EINVAL; | 841 | ret = -EINVAL; |
924 | else { | 842 | else { |
925 | sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; | 843 | sdata->u.mgd.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; |
926 | /* | 844 | /* |
927 | * Privacy invoked by wpa_supplicant, store the | 845 | * Privacy invoked by wpa_supplicant, store the |
928 | * value and allow associating to a protected | 846 | * value and allow associating to a protected |
929 | * network without having a key up front. | 847 | * network without having a key up front. |
930 | */ | 848 | */ |
931 | if (data->value) | 849 | if (data->value) |
932 | sdata->u.sta.flags |= | 850 | sdata->u.mgd.flags |= |
933 | IEEE80211_STA_PRIVACY_INVOKED; | 851 | IEEE80211_STA_PRIVACY_INVOKED; |
934 | } | 852 | } |
935 | break; | 853 | break; |
936 | case IW_AUTH_80211_AUTH_ALG: | 854 | case IW_AUTH_80211_AUTH_ALG: |
937 | if (sdata->vif.type == NL80211_IFTYPE_STATION || | 855 | if (sdata->vif.type == NL80211_IFTYPE_STATION) |
938 | sdata->vif.type == NL80211_IFTYPE_ADHOC) | 856 | sdata->u.mgd.auth_algs = data->value; |
939 | sdata->u.sta.auth_algs = data->value; | ||
940 | else | 857 | else |
941 | ret = -EOPNOTSUPP; | 858 | ret = -EOPNOTSUPP; |
942 | break; | 859 | break; |
@@ -945,17 +862,16 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, | |||
945 | ret = -EOPNOTSUPP; | 862 | ret = -EOPNOTSUPP; |
946 | break; | 863 | break; |
947 | } | 864 | } |
948 | if (sdata->vif.type == NL80211_IFTYPE_STATION || | 865 | if (sdata->vif.type == NL80211_IFTYPE_STATION) { |
949 | sdata->vif.type == NL80211_IFTYPE_ADHOC) { | ||
950 | switch (data->value) { | 866 | switch (data->value) { |
951 | case IW_AUTH_MFP_DISABLED: | 867 | case IW_AUTH_MFP_DISABLED: |
952 | sdata->u.sta.mfp = IEEE80211_MFP_DISABLED; | 868 | sdata->u.mgd.mfp = IEEE80211_MFP_DISABLED; |
953 | break; | 869 | break; |
954 | case IW_AUTH_MFP_OPTIONAL: | 870 | case IW_AUTH_MFP_OPTIONAL: |
955 | sdata->u.sta.mfp = IEEE80211_MFP_OPTIONAL; | 871 | sdata->u.mgd.mfp = IEEE80211_MFP_OPTIONAL; |
956 | break; | 872 | break; |
957 | case IW_AUTH_MFP_REQUIRED: | 873 | case IW_AUTH_MFP_REQUIRED: |
958 | sdata->u.sta.mfp = IEEE80211_MFP_REQUIRED; | 874 | sdata->u.mgd.mfp = IEEE80211_MFP_REQUIRED; |
959 | break; | 875 | break; |
960 | default: | 876 | default: |
961 | ret = -EINVAL; | 877 | ret = -EINVAL; |
@@ -980,9 +896,9 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev | |||
980 | 896 | ||
981 | rcu_read_lock(); | 897 | rcu_read_lock(); |
982 | 898 | ||
983 | if (sdata->vif.type == NL80211_IFTYPE_STATION || | 899 | if (sdata->vif.type == NL80211_IFTYPE_STATION) |
984 | sdata->vif.type == NL80211_IFTYPE_ADHOC) | 900 | sta = sta_info_get(local, sdata->u.mgd.bssid); |
985 | sta = sta_info_get(local, sdata->u.sta.bssid); | 901 | |
986 | if (!sta) { | 902 | if (!sta) { |
987 | wstats->discard.fragment = 0; | 903 | wstats->discard.fragment = 0; |
988 | wstats->discard.misc = 0; | 904 | wstats->discard.misc = 0; |
@@ -991,10 +907,45 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev | |||
991 | wstats->qual.noise = 0; | 907 | wstats->qual.noise = 0; |
992 | wstats->qual.updated = IW_QUAL_ALL_INVALID; | 908 | wstats->qual.updated = IW_QUAL_ALL_INVALID; |
993 | } else { | 909 | } else { |
994 | wstats->qual.level = sta->last_signal; | 910 | wstats->qual.updated = 0; |
995 | wstats->qual.qual = sta->last_qual; | 911 | /* |
996 | wstats->qual.noise = sta->last_noise; | 912 | * mirror what cfg80211 does for iwrange/scan results, |
997 | wstats->qual.updated = ieee80211_get_wstats_flags(local); | 913 | * otherwise userspace gets confused. |
914 | */ | ||
915 | if (local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC | | ||
916 | IEEE80211_HW_SIGNAL_DBM)) { | ||
917 | wstats->qual.updated |= IW_QUAL_LEVEL_UPDATED; | ||
918 | wstats->qual.updated |= IW_QUAL_QUAL_UPDATED; | ||
919 | } else { | ||
920 | wstats->qual.updated |= IW_QUAL_LEVEL_INVALID; | ||
921 | wstats->qual.updated |= IW_QUAL_QUAL_INVALID; | ||
922 | } | ||
923 | |||
924 | if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) { | ||
925 | wstats->qual.level = sta->last_signal; | ||
926 | wstats->qual.qual = sta->last_signal; | ||
927 | } else if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { | ||
928 | int sig = sta->last_signal; | ||
929 | |||
930 | wstats->qual.updated |= IW_QUAL_DBM; | ||
931 | wstats->qual.level = sig; | ||
932 | if (sig < -110) | ||
933 | sig = -110; | ||
934 | else if (sig > -40) | ||
935 | sig = -40; | ||
936 | wstats->qual.qual = sig + 110; | ||
937 | } | ||
938 | |||
939 | if (local->hw.flags & IEEE80211_HW_NOISE_DBM) { | ||
940 | /* | ||
941 | * This assumes that if driver reports noise, it also | ||
942 | * reports signal in dBm. | ||
943 | */ | ||
944 | wstats->qual.noise = sta->last_noise; | ||
945 | wstats->qual.updated |= IW_QUAL_NOISE_UPDATED; | ||
946 | } else { | ||
947 | wstats->qual.updated |= IW_QUAL_NOISE_INVALID; | ||
948 | } | ||
998 | } | 949 | } |
999 | 950 | ||
1000 | rcu_read_unlock(); | 951 | rcu_read_unlock(); |
@@ -1011,9 +962,8 @@ static int ieee80211_ioctl_giwauth(struct net_device *dev, | |||
1011 | 962 | ||
1012 | switch (data->flags & IW_AUTH_INDEX) { | 963 | switch (data->flags & IW_AUTH_INDEX) { |
1013 | case IW_AUTH_80211_AUTH_ALG: | 964 | case IW_AUTH_80211_AUTH_ALG: |
1014 | if (sdata->vif.type == NL80211_IFTYPE_STATION || | 965 | if (sdata->vif.type == NL80211_IFTYPE_STATION) |
1015 | sdata->vif.type == NL80211_IFTYPE_ADHOC) | 966 | data->value = sdata->u.mgd.auth_algs; |
1016 | data->value = sdata->u.sta.auth_algs; | ||
1017 | else | 967 | else |
1018 | ret = -EOPNOTSUPP; | 968 | ret = -EOPNOTSUPP; |
1019 | break; | 969 | break; |
@@ -1116,7 +1066,7 @@ static const iw_handler ieee80211_handler[] = | |||
1116 | (iw_handler) NULL, /* SIOCSIWSENS */ | 1066 | (iw_handler) NULL, /* SIOCSIWSENS */ |
1117 | (iw_handler) NULL, /* SIOCGIWSENS */ | 1067 | (iw_handler) NULL, /* SIOCGIWSENS */ |
1118 | (iw_handler) NULL /* not used */, /* SIOCSIWRANGE */ | 1068 | (iw_handler) NULL /* not used */, /* SIOCSIWRANGE */ |
1119 | (iw_handler) ieee80211_ioctl_giwrange, /* SIOCGIWRANGE */ | 1069 | (iw_handler) cfg80211_wext_giwrange, /* SIOCGIWRANGE */ |
1120 | (iw_handler) NULL /* not used */, /* SIOCSIWPRIV */ | 1070 | (iw_handler) NULL /* not used */, /* SIOCSIWPRIV */ |
1121 | (iw_handler) NULL /* kernel code */, /* SIOCGIWPRIV */ | 1071 | (iw_handler) NULL /* kernel code */, /* SIOCGIWPRIV */ |
1122 | (iw_handler) NULL /* not used */, /* SIOCSIWSTATS */ | 1072 | (iw_handler) NULL /* not used */, /* SIOCSIWSTATS */ |
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index ac71b38f7cb5..0b8ad1f4ecdd 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c | |||
@@ -99,10 +99,13 @@ static u16 classify80211(struct ieee80211_local *local, struct sk_buff *skb) | |||
99 | /* in case we are a client verify acm is not set for this ac */ | 99 | /* in case we are a client verify acm is not set for this ac */ |
100 | while (unlikely(local->wmm_acm & BIT(skb->priority))) { | 100 | while (unlikely(local->wmm_acm & BIT(skb->priority))) { |
101 | if (wme_downgrade_ac(skb)) { | 101 | if (wme_downgrade_ac(skb)) { |
102 | /* The old code would drop the packet in this | 102 | /* |
103 | * case. | 103 | * This should not really happen. The AP has marked all |
104 | * lower ACs to require admission control which is not | ||
105 | * a reasonable configuration. Allow the frame to be | ||
106 | * transmitted using AC_BK as a workaround. | ||
104 | */ | 107 | */ |
105 | return 0; | 108 | break; |
106 | } | 109 | } |
107 | } | 110 | } |
108 | 111 | ||
@@ -114,9 +117,7 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) | |||
114 | { | 117 | { |
115 | struct ieee80211_master_priv *mpriv = netdev_priv(dev); | 118 | struct ieee80211_master_priv *mpriv = netdev_priv(dev); |
116 | struct ieee80211_local *local = mpriv->local; | 119 | struct ieee80211_local *local = mpriv->local; |
117 | struct ieee80211_hw *hw = &local->hw; | ||
118 | struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; | 120 | struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; |
119 | struct sta_info *sta; | ||
120 | u16 queue; | 121 | u16 queue; |
121 | u8 tid; | 122 | u8 tid; |
122 | 123 | ||
@@ -124,29 +125,11 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) | |||
124 | if (unlikely(queue >= local->hw.queues)) | 125 | if (unlikely(queue >= local->hw.queues)) |
125 | queue = local->hw.queues - 1; | 126 | queue = local->hw.queues - 1; |
126 | 127 | ||
127 | if (skb->requeue) { | 128 | /* |
128 | if (!hw->ampdu_queues) | 129 | * Now we know the 1d priority, fill in the QoS header if |
129 | return queue; | 130 | * there is one (and we haven't done this before). |
130 | |||
131 | rcu_read_lock(); | ||
132 | sta = sta_info_get(local, hdr->addr1); | ||
133 | tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; | ||
134 | if (sta) { | ||
135 | int ampdu_queue = sta->tid_to_tx_q[tid]; | ||
136 | |||
137 | if ((ampdu_queue < ieee80211_num_queues(hw)) && | ||
138 | test_bit(ampdu_queue, local->queue_pool)) | ||
139 | queue = ampdu_queue; | ||
140 | } | ||
141 | rcu_read_unlock(); | ||
142 | |||
143 | return queue; | ||
144 | } | ||
145 | |||
146 | /* Now we know the 1d priority, fill in the QoS header if | ||
147 | * there is one. | ||
148 | */ | 131 | */ |
149 | if (ieee80211_is_data_qos(hdr->frame_control)) { | 132 | if (!skb->requeue && ieee80211_is_data_qos(hdr->frame_control)) { |
150 | u8 *p = ieee80211_get_qos_ctl(hdr); | 133 | u8 *p = ieee80211_get_qos_ctl(hdr); |
151 | u8 ack_policy = 0; | 134 | u8 ack_policy = 0; |
152 | tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; | 135 | tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; |
@@ -156,140 +139,7 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) | |||
156 | /* qos header is 2 bytes, second reserved */ | 139 | /* qos header is 2 bytes, second reserved */ |
157 | *p++ = ack_policy | tid; | 140 | *p++ = ack_policy | tid; |
158 | *p = 0; | 141 | *p = 0; |
159 | |||
160 | if (!hw->ampdu_queues) | ||
161 | return queue; | ||
162 | |||
163 | rcu_read_lock(); | ||
164 | |||
165 | sta = sta_info_get(local, hdr->addr1); | ||
166 | if (sta) { | ||
167 | int ampdu_queue = sta->tid_to_tx_q[tid]; | ||
168 | |||
169 | if ((ampdu_queue < ieee80211_num_queues(hw)) && | ||
170 | test_bit(ampdu_queue, local->queue_pool)) | ||
171 | queue = ampdu_queue; | ||
172 | } | ||
173 | |||
174 | rcu_read_unlock(); | ||
175 | } | 142 | } |
176 | 143 | ||
177 | return queue; | 144 | return queue; |
178 | } | 145 | } |
179 | |||
180 | int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, | ||
181 | struct sta_info *sta, u16 tid) | ||
182 | { | ||
183 | int i; | ||
184 | |||
185 | /* XXX: currently broken due to cb/requeue use */ | ||
186 | return -EPERM; | ||
187 | |||
188 | /* prepare the filter and save it for the SW queue | ||
189 | * matching the received HW queue */ | ||
190 | |||
191 | if (!local->hw.ampdu_queues) | ||
192 | return -EPERM; | ||
193 | |||
194 | /* try to get a Qdisc from the pool */ | ||
195 | for (i = local->hw.queues; i < ieee80211_num_queues(&local->hw); i++) | ||
196 | if (!test_and_set_bit(i, local->queue_pool)) { | ||
197 | ieee80211_stop_queue(local_to_hw(local), i); | ||
198 | sta->tid_to_tx_q[tid] = i; | ||
199 | |||
200 | /* IF there are already pending packets | ||
201 | * on this tid first we need to drain them | ||
202 | * on the previous queue | ||
203 | * since HT is strict in order */ | ||
204 | #ifdef CONFIG_MAC80211_HT_DEBUG | ||
205 | if (net_ratelimit()) | ||
206 | printk(KERN_DEBUG "allocated aggregation queue" | ||
207 | " %d tid %d addr %pM pool=0x%lX\n", | ||
208 | i, tid, sta->sta.addr, | ||
209 | local->queue_pool[0]); | ||
210 | #endif /* CONFIG_MAC80211_HT_DEBUG */ | ||
211 | return 0; | ||
212 | } | ||
213 | |||
214 | return -EAGAIN; | ||
215 | } | ||
216 | |||
217 | /** | ||
218 | * the caller needs to hold netdev_get_tx_queue(local->mdev, X)->lock | ||
219 | */ | ||
220 | void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, | ||
221 | struct sta_info *sta, u16 tid, | ||
222 | u8 requeue) | ||
223 | { | ||
224 | int agg_queue = sta->tid_to_tx_q[tid]; | ||
225 | struct ieee80211_hw *hw = &local->hw; | ||
226 | |||
227 | /* return the qdisc to the pool */ | ||
228 | clear_bit(agg_queue, local->queue_pool); | ||
229 | sta->tid_to_tx_q[tid] = ieee80211_num_queues(hw); | ||
230 | |||
231 | if (requeue) { | ||
232 | ieee80211_requeue(local, agg_queue); | ||
233 | } else { | ||
234 | struct netdev_queue *txq; | ||
235 | spinlock_t *root_lock; | ||
236 | struct Qdisc *q; | ||
237 | |||
238 | txq = netdev_get_tx_queue(local->mdev, agg_queue); | ||
239 | q = rcu_dereference(txq->qdisc); | ||
240 | root_lock = qdisc_lock(q); | ||
241 | |||
242 | spin_lock_bh(root_lock); | ||
243 | qdisc_reset(q); | ||
244 | spin_unlock_bh(root_lock); | ||
245 | } | ||
246 | } | ||
247 | |||
248 | void ieee80211_requeue(struct ieee80211_local *local, int queue) | ||
249 | { | ||
250 | struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, queue); | ||
251 | struct sk_buff_head list; | ||
252 | spinlock_t *root_lock; | ||
253 | struct Qdisc *qdisc; | ||
254 | u32 len; | ||
255 | |||
256 | rcu_read_lock_bh(); | ||
257 | |||
258 | qdisc = rcu_dereference(txq->qdisc); | ||
259 | if (!qdisc || !qdisc->dequeue) | ||
260 | goto out_unlock; | ||
261 | |||
262 | skb_queue_head_init(&list); | ||
263 | |||
264 | root_lock = qdisc_root_lock(qdisc); | ||
265 | spin_lock(root_lock); | ||
266 | for (len = qdisc->q.qlen; len > 0; len--) { | ||
267 | struct sk_buff *skb = qdisc->dequeue(qdisc); | ||
268 | |||
269 | if (skb) | ||
270 | __skb_queue_tail(&list, skb); | ||
271 | } | ||
272 | spin_unlock(root_lock); | ||
273 | |||
274 | for (len = list.qlen; len > 0; len--) { | ||
275 | struct sk_buff *skb = __skb_dequeue(&list); | ||
276 | u16 new_queue; | ||
277 | |||
278 | BUG_ON(!skb); | ||
279 | new_queue = ieee80211_select_queue(local->mdev, skb); | ||
280 | skb_set_queue_mapping(skb, new_queue); | ||
281 | |||
282 | txq = netdev_get_tx_queue(local->mdev, new_queue); | ||
283 | |||
284 | |||
285 | qdisc = rcu_dereference(txq->qdisc); | ||
286 | root_lock = qdisc_root_lock(qdisc); | ||
287 | |||
288 | spin_lock(root_lock); | ||
289 | qdisc_enqueue_root(skb, qdisc); | ||
290 | spin_unlock(root_lock); | ||
291 | } | ||
292 | |||
293 | out_unlock: | ||
294 | rcu_read_unlock_bh(); | ||
295 | } | ||
diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h index bc62f28a4d3d..7520d2e014dc 100644 --- a/net/mac80211/wme.h +++ b/net/mac80211/wme.h | |||
@@ -21,11 +21,5 @@ | |||
21 | extern const int ieee802_1d_to_ac[8]; | 21 | extern const int ieee802_1d_to_ac[8]; |
22 | 22 | ||
23 | u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb); | 23 | u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb); |
24 | int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, | ||
25 | struct sta_info *sta, u16 tid); | ||
26 | void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, | ||
27 | struct sta_info *sta, u16 tid, | ||
28 | u8 requeue); | ||
29 | void ieee80211_requeue(struct ieee80211_local *local, int queue); | ||
30 | 24 | ||
31 | #endif /* _WME_H */ | 25 | #endif /* _WME_H */ |
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 55befe59e1c0..dfb447b584da 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c | |||
@@ -728,7 +728,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, | |||
728 | NF_CT_ASSERT(skb->nfct); | 728 | NF_CT_ASSERT(skb->nfct); |
729 | 729 | ||
730 | ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); | 730 | ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); |
731 | if (ret < 0) { | 731 | if (ret <= 0) { |
732 | /* Invalid: inverse of the return code tells | 732 | /* Invalid: inverse of the return code tells |
733 | * the netfilter core what to do */ | 733 | * the netfilter core what to do */ |
734 | pr_debug("nf_conntrack_in: Can't track with proto module\n"); | 734 | pr_debug("nf_conntrack_in: Can't track with proto module\n"); |
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1b75c9efb0eb..7a16bd462f82 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c | |||
@@ -1763,6 +1763,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3, u32 pid, int report) | |||
1763 | goto out; | 1763 | goto out; |
1764 | } | 1764 | } |
1765 | 1765 | ||
1766 | exp->class = 0; | ||
1766 | exp->expectfn = NULL; | 1767 | exp->expectfn = NULL; |
1767 | exp->flags = 0; | 1768 | exp->flags = 0; |
1768 | exp->master = ct; | 1769 | exp->master = ct; |
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 7d3944f02ea1..e46f3b79adb3 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c | |||
@@ -861,7 +861,7 @@ static int tcp_packet(struct nf_conn *ct, | |||
861 | */ | 861 | */ |
862 | if (nf_ct_kill(ct)) | 862 | if (nf_ct_kill(ct)) |
863 | return -NF_REPEAT; | 863 | return -NF_REPEAT; |
864 | return -NF_DROP; | 864 | return NF_DROP; |
865 | } | 865 | } |
866 | /* Fall through */ | 866 | /* Fall through */ |
867 | case TCP_CONNTRACK_IGNORE: | 867 | case TCP_CONNTRACK_IGNORE: |
@@ -894,7 +894,7 @@ static int tcp_packet(struct nf_conn *ct, | |||
894 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, | 894 | nf_log_packet(pf, 0, skb, NULL, NULL, NULL, |
895 | "nf_ct_tcp: killing out of sync session "); | 895 | "nf_ct_tcp: killing out of sync session "); |
896 | nf_ct_kill(ct); | 896 | nf_ct_kill(ct); |
897 | return -NF_DROP; | 897 | return NF_DROP; |
898 | } | 898 | } |
899 | ct->proto.tcp.last_index = index; | 899 | ct->proto.tcp.last_index = index; |
900 | ct->proto.tcp.last_dir = dir; | 900 | ct->proto.tcp.last_dir = dir; |
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 3eae3fca29d8..fd326ac27ec8 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c | |||
@@ -39,7 +39,7 @@ | |||
39 | #endif | 39 | #endif |
40 | 40 | ||
41 | #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE | 41 | #define NFULNL_NLBUFSIZ_DEFAULT NLMSG_GOODSIZE |
42 | #define NFULNL_TIMEOUT_DEFAULT HZ /* every second */ | 42 | #define NFULNL_TIMEOUT_DEFAULT 100 /* every second */ |
43 | #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ | 43 | #define NFULNL_QTHRESH_DEFAULT 100 /* 100 packets */ |
44 | #define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */ | 44 | #define NFULNL_COPY_RANGE_MAX 0xFFFF /* max packet size is limited by 16-bit struct nfattr nfa_len field */ |
45 | 45 | ||
@@ -590,8 +590,10 @@ nfulnl_log_packet(u_int8_t pf, | |||
590 | 590 | ||
591 | qthreshold = inst->qthreshold; | 591 | qthreshold = inst->qthreshold; |
592 | /* per-rule qthreshold overrides per-instance */ | 592 | /* per-rule qthreshold overrides per-instance */ |
593 | if (qthreshold > li->u.ulog.qthreshold) | 593 | if (li->u.ulog.qthreshold) |
594 | qthreshold = li->u.ulog.qthreshold; | 594 | if (qthreshold > li->u.ulog.qthreshold) |
595 | qthreshold = li->u.ulog.qthreshold; | ||
596 | |||
595 | 597 | ||
596 | switch (inst->copy_mode) { | 598 | switch (inst->copy_mode) { |
597 | case NFULNL_COPY_META: | 599 | case NFULNL_COPY_META: |
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index bfcac92d5563..509a95621f9f 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c | |||
@@ -843,59 +843,143 @@ static const struct file_operations xt_table_ops = { | |||
843 | .release = seq_release_net, | 843 | .release = seq_release_net, |
844 | }; | 844 | }; |
845 | 845 | ||
846 | static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos) | 846 | /* |
847 | * Traverse state for ip{,6}_{tables,matches} for helping crossing | ||
848 | * the multi-AF mutexes. | ||
849 | */ | ||
850 | struct nf_mttg_trav { | ||
851 | struct list_head *head, *curr; | ||
852 | uint8_t class, nfproto; | ||
853 | }; | ||
854 | |||
855 | enum { | ||
856 | MTTG_TRAV_INIT, | ||
857 | MTTG_TRAV_NFP_UNSPEC, | ||
858 | MTTG_TRAV_NFP_SPEC, | ||
859 | MTTG_TRAV_DONE, | ||
860 | }; | ||
861 | |||
862 | static void *xt_mttg_seq_next(struct seq_file *seq, void *v, loff_t *ppos, | ||
863 | bool is_target) | ||
847 | { | 864 | { |
848 | struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private; | 865 | static const uint8_t next_class[] = { |
849 | u_int16_t af = (unsigned long)pde->data; | 866 | [MTTG_TRAV_NFP_UNSPEC] = MTTG_TRAV_NFP_SPEC, |
867 | [MTTG_TRAV_NFP_SPEC] = MTTG_TRAV_DONE, | ||
868 | }; | ||
869 | struct nf_mttg_trav *trav = seq->private; | ||
870 | |||
871 | switch (trav->class) { | ||
872 | case MTTG_TRAV_INIT: | ||
873 | trav->class = MTTG_TRAV_NFP_UNSPEC; | ||
874 | mutex_lock(&xt[NFPROTO_UNSPEC].mutex); | ||
875 | trav->head = trav->curr = is_target ? | ||
876 | &xt[NFPROTO_UNSPEC].target : &xt[NFPROTO_UNSPEC].match; | ||
877 | break; | ||
878 | case MTTG_TRAV_NFP_UNSPEC: | ||
879 | trav->curr = trav->curr->next; | ||
880 | if (trav->curr != trav->head) | ||
881 | break; | ||
882 | mutex_unlock(&xt[NFPROTO_UNSPEC].mutex); | ||
883 | mutex_lock(&xt[trav->nfproto].mutex); | ||
884 | trav->head = trav->curr = is_target ? | ||
885 | &xt[trav->nfproto].target : &xt[trav->nfproto].match; | ||
886 | trav->class = next_class[trav->class]; | ||
887 | break; | ||
888 | case MTTG_TRAV_NFP_SPEC: | ||
889 | trav->curr = trav->curr->next; | ||
890 | if (trav->curr != trav->head) | ||
891 | break; | ||
892 | /* fallthru, _stop will unlock */ | ||
893 | default: | ||
894 | return NULL; | ||
895 | } | ||
850 | 896 | ||
851 | mutex_lock(&xt[af].mutex); | 897 | if (ppos != NULL) |
852 | return seq_list_start(&xt[af].match, *pos); | 898 | ++*ppos; |
899 | return trav; | ||
853 | } | 900 | } |
854 | 901 | ||
855 | static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 902 | static void *xt_mttg_seq_start(struct seq_file *seq, loff_t *pos, |
903 | bool is_target) | ||
856 | { | 904 | { |
857 | struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private; | 905 | struct nf_mttg_trav *trav = seq->private; |
858 | u_int16_t af = (unsigned long)pde->data; | 906 | unsigned int j; |
859 | 907 | ||
860 | return seq_list_next(v, &xt[af].match, pos); | 908 | trav->class = MTTG_TRAV_INIT; |
909 | for (j = 0; j < *pos; ++j) | ||
910 | if (xt_mttg_seq_next(seq, NULL, NULL, is_target) == NULL) | ||
911 | return NULL; | ||
912 | return trav; | ||
861 | } | 913 | } |
862 | 914 | ||
863 | static void xt_match_seq_stop(struct seq_file *seq, void *v) | 915 | static void xt_mttg_seq_stop(struct seq_file *seq, void *v) |
864 | { | 916 | { |
865 | struct proc_dir_entry *pde = seq->private; | 917 | struct nf_mttg_trav *trav = seq->private; |
866 | u_int16_t af = (unsigned long)pde->data; | 918 | |
919 | switch (trav->class) { | ||
920 | case MTTG_TRAV_NFP_UNSPEC: | ||
921 | mutex_unlock(&xt[NFPROTO_UNSPEC].mutex); | ||
922 | break; | ||
923 | case MTTG_TRAV_NFP_SPEC: | ||
924 | mutex_unlock(&xt[trav->nfproto].mutex); | ||
925 | break; | ||
926 | } | ||
927 | } | ||
867 | 928 | ||
868 | mutex_unlock(&xt[af].mutex); | 929 | static void *xt_match_seq_start(struct seq_file *seq, loff_t *pos) |
930 | { | ||
931 | return xt_mttg_seq_start(seq, pos, false); | ||
869 | } | 932 | } |
870 | 933 | ||
871 | static int xt_match_seq_show(struct seq_file *seq, void *v) | 934 | static void *xt_match_seq_next(struct seq_file *seq, void *v, loff_t *ppos) |
872 | { | 935 | { |
873 | struct xt_match *match = list_entry(v, struct xt_match, list); | 936 | return xt_mttg_seq_next(seq, v, ppos, false); |
937 | } | ||
874 | 938 | ||
875 | if (strlen(match->name)) | 939 | static int xt_match_seq_show(struct seq_file *seq, void *v) |
876 | return seq_printf(seq, "%s\n", match->name); | 940 | { |
877 | else | 941 | const struct nf_mttg_trav *trav = seq->private; |
878 | return 0; | 942 | const struct xt_match *match; |
943 | |||
944 | switch (trav->class) { | ||
945 | case MTTG_TRAV_NFP_UNSPEC: | ||
946 | case MTTG_TRAV_NFP_SPEC: | ||
947 | if (trav->curr == trav->head) | ||
948 | return 0; | ||
949 | match = list_entry(trav->curr, struct xt_match, list); | ||
950 | return (*match->name == '\0') ? 0 : | ||
951 | seq_printf(seq, "%s\n", match->name); | ||
952 | } | ||
953 | return 0; | ||
879 | } | 954 | } |
880 | 955 | ||
881 | static const struct seq_operations xt_match_seq_ops = { | 956 | static const struct seq_operations xt_match_seq_ops = { |
882 | .start = xt_match_seq_start, | 957 | .start = xt_match_seq_start, |
883 | .next = xt_match_seq_next, | 958 | .next = xt_match_seq_next, |
884 | .stop = xt_match_seq_stop, | 959 | .stop = xt_mttg_seq_stop, |
885 | .show = xt_match_seq_show, | 960 | .show = xt_match_seq_show, |
886 | }; | 961 | }; |
887 | 962 | ||
888 | static int xt_match_open(struct inode *inode, struct file *file) | 963 | static int xt_match_open(struct inode *inode, struct file *file) |
889 | { | 964 | { |
965 | struct seq_file *seq; | ||
966 | struct nf_mttg_trav *trav; | ||
890 | int ret; | 967 | int ret; |
891 | 968 | ||
892 | ret = seq_open(file, &xt_match_seq_ops); | 969 | trav = kmalloc(sizeof(*trav), GFP_KERNEL); |
893 | if (!ret) { | 970 | if (trav == NULL) |
894 | struct seq_file *seq = file->private_data; | 971 | return -ENOMEM; |
895 | 972 | ||
896 | seq->private = PDE(inode); | 973 | ret = seq_open(file, &xt_match_seq_ops); |
974 | if (ret < 0) { | ||
975 | kfree(trav); | ||
976 | return ret; | ||
897 | } | 977 | } |
898 | return ret; | 978 | |
979 | seq = file->private_data; | ||
980 | seq->private = trav; | ||
981 | trav->nfproto = (unsigned long)PDE(inode)->data; | ||
982 | return 0; | ||
899 | } | 983 | } |
900 | 984 | ||
901 | static const struct file_operations xt_match_ops = { | 985 | static const struct file_operations xt_match_ops = { |
@@ -903,62 +987,63 @@ static const struct file_operations xt_match_ops = { | |||
903 | .open = xt_match_open, | 987 | .open = xt_match_open, |
904 | .read = seq_read, | 988 | .read = seq_read, |
905 | .llseek = seq_lseek, | 989 | .llseek = seq_lseek, |
906 | .release = seq_release, | 990 | .release = seq_release_private, |
907 | }; | 991 | }; |
908 | 992 | ||
909 | static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos) | 993 | static void *xt_target_seq_start(struct seq_file *seq, loff_t *pos) |
910 | { | 994 | { |
911 | struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private; | 995 | return xt_mttg_seq_start(seq, pos, true); |
912 | u_int16_t af = (unsigned long)pde->data; | ||
913 | |||
914 | mutex_lock(&xt[af].mutex); | ||
915 | return seq_list_start(&xt[af].target, *pos); | ||
916 | } | 996 | } |
917 | 997 | ||
918 | static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *pos) | 998 | static void *xt_target_seq_next(struct seq_file *seq, void *v, loff_t *ppos) |
919 | { | 999 | { |
920 | struct proc_dir_entry *pde = (struct proc_dir_entry *)seq->private; | 1000 | return xt_mttg_seq_next(seq, v, ppos, true); |
921 | u_int16_t af = (unsigned long)pde->data; | ||
922 | |||
923 | return seq_list_next(v, &xt[af].target, pos); | ||
924 | } | ||
925 | |||
926 | static void xt_target_seq_stop(struct seq_file *seq, void *v) | ||
927 | { | ||
928 | struct proc_dir_entry *pde = seq->private; | ||
929 | u_int16_t af = (unsigned long)pde->data; | ||
930 | |||
931 | mutex_unlock(&xt[af].mutex); | ||
932 | } | 1001 | } |
933 | 1002 | ||
934 | static int xt_target_seq_show(struct seq_file *seq, void *v) | 1003 | static int xt_target_seq_show(struct seq_file *seq, void *v) |
935 | { | 1004 | { |
936 | struct xt_target *target = list_entry(v, struct xt_target, list); | 1005 | const struct nf_mttg_trav *trav = seq->private; |
937 | 1006 | const struct xt_target *target; | |
938 | if (strlen(target->name)) | 1007 | |
939 | return seq_printf(seq, "%s\n", target->name); | 1008 | switch (trav->class) { |
940 | else | 1009 | case MTTG_TRAV_NFP_UNSPEC: |
941 | return 0; | 1010 | case MTTG_TRAV_NFP_SPEC: |
1011 | if (trav->curr == trav->head) | ||
1012 | return 0; | ||
1013 | target = list_entry(trav->curr, struct xt_target, list); | ||
1014 | return (*target->name == '\0') ? 0 : | ||
1015 | seq_printf(seq, "%s\n", target->name); | ||
1016 | } | ||
1017 | return 0; | ||
942 | } | 1018 | } |
943 | 1019 | ||
944 | static const struct seq_operations xt_target_seq_ops = { | 1020 | static const struct seq_operations xt_target_seq_ops = { |
945 | .start = xt_target_seq_start, | 1021 | .start = xt_target_seq_start, |
946 | .next = xt_target_seq_next, | 1022 | .next = xt_target_seq_next, |
947 | .stop = xt_target_seq_stop, | 1023 | .stop = xt_mttg_seq_stop, |
948 | .show = xt_target_seq_show, | 1024 | .show = xt_target_seq_show, |
949 | }; | 1025 | }; |
950 | 1026 | ||
951 | static int xt_target_open(struct inode *inode, struct file *file) | 1027 | static int xt_target_open(struct inode *inode, struct file *file) |
952 | { | 1028 | { |
1029 | struct seq_file *seq; | ||
1030 | struct nf_mttg_trav *trav; | ||
953 | int ret; | 1031 | int ret; |
954 | 1032 | ||
955 | ret = seq_open(file, &xt_target_seq_ops); | 1033 | trav = kmalloc(sizeof(*trav), GFP_KERNEL); |
956 | if (!ret) { | 1034 | if (trav == NULL) |
957 | struct seq_file *seq = file->private_data; | 1035 | return -ENOMEM; |
958 | 1036 | ||
959 | seq->private = PDE(inode); | 1037 | ret = seq_open(file, &xt_target_seq_ops); |
1038 | if (ret < 0) { | ||
1039 | kfree(trav); | ||
1040 | return ret; | ||
960 | } | 1041 | } |
961 | return ret; | 1042 | |
1043 | seq = file->private_data; | ||
1044 | seq->private = trav; | ||
1045 | trav->nfproto = (unsigned long)PDE(inode)->data; | ||
1046 | return 0; | ||
962 | } | 1047 | } |
963 | 1048 | ||
964 | static const struct file_operations xt_target_ops = { | 1049 | static const struct file_operations xt_target_ops = { |
@@ -966,7 +1051,7 @@ static const struct file_operations xt_target_ops = { | |||
966 | .open = xt_target_open, | 1051 | .open = xt_target_open, |
967 | .read = seq_read, | 1052 | .read = seq_read, |
968 | .llseek = seq_lseek, | 1053 | .llseek = seq_lseek, |
969 | .release = seq_release, | 1054 | .release = seq_release_private, |
970 | }; | 1055 | }; |
971 | 1056 | ||
972 | #define FORMAT_TABLES "_tables_names" | 1057 | #define FORMAT_TABLES "_tables_names" |
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index fe80b614a400..791e030ea903 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c | |||
@@ -542,7 +542,7 @@ recent_mt_proc_write(struct file *file, const char __user *input, | |||
542 | struct recent_entry *e; | 542 | struct recent_entry *e; |
543 | char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")]; | 543 | char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")]; |
544 | const char *c = buf; | 544 | const char *c = buf; |
545 | union nf_inet_addr addr; | 545 | union nf_inet_addr addr = {}; |
546 | u_int16_t family; | 546 | u_int16_t family; |
547 | bool add, succ; | 547 | bool add, succ; |
548 | 548 | ||
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5b33879c6422..b73d4e61c5ac 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -85,6 +85,7 @@ struct netlink_sock { | |||
85 | 85 | ||
86 | #define NETLINK_KERNEL_SOCKET 0x1 | 86 | #define NETLINK_KERNEL_SOCKET 0x1 |
87 | #define NETLINK_RECV_PKTINFO 0x2 | 87 | #define NETLINK_RECV_PKTINFO 0x2 |
88 | #define NETLINK_BROADCAST_SEND_ERROR 0x4 | ||
88 | 89 | ||
89 | static inline struct netlink_sock *nlk_sk(struct sock *sk) | 90 | static inline struct netlink_sock *nlk_sk(struct sock *sk) |
90 | { | 91 | { |
@@ -995,12 +996,15 @@ static inline int do_one_broadcast(struct sock *sk, | |||
995 | netlink_overrun(sk); | 996 | netlink_overrun(sk); |
996 | /* Clone failed. Notify ALL listeners. */ | 997 | /* Clone failed. Notify ALL listeners. */ |
997 | p->failure = 1; | 998 | p->failure = 1; |
999 | if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) | ||
1000 | p->delivery_failure = 1; | ||
998 | } else if (sk_filter(sk, p->skb2)) { | 1001 | } else if (sk_filter(sk, p->skb2)) { |
999 | kfree_skb(p->skb2); | 1002 | kfree_skb(p->skb2); |
1000 | p->skb2 = NULL; | 1003 | p->skb2 = NULL; |
1001 | } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { | 1004 | } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { |
1002 | netlink_overrun(sk); | 1005 | netlink_overrun(sk); |
1003 | p->delivery_failure = 1; | 1006 | if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) |
1007 | p->delivery_failure = 1; | ||
1004 | } else { | 1008 | } else { |
1005 | p->congested |= val; | 1009 | p->congested |= val; |
1006 | p->delivered = 1; | 1010 | p->delivered = 1; |
@@ -1045,10 +1049,9 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, | |||
1045 | 1049 | ||
1046 | netlink_unlock_table(); | 1050 | netlink_unlock_table(); |
1047 | 1051 | ||
1048 | if (info.skb2) | 1052 | kfree_skb(info.skb2); |
1049 | kfree_skb(info.skb2); | ||
1050 | 1053 | ||
1051 | if (info.delivery_failure || info.failure) | 1054 | if (info.delivery_failure) |
1052 | return -ENOBUFS; | 1055 | return -ENOBUFS; |
1053 | 1056 | ||
1054 | if (info.delivered) { | 1057 | if (info.delivered) { |
@@ -1088,6 +1091,13 @@ out: | |||
1088 | return 0; | 1091 | return 0; |
1089 | } | 1092 | } |
1090 | 1093 | ||
1094 | /** | ||
1095 | * netlink_set_err - report error to broadcast listeners | ||
1096 | * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() | ||
1097 | * @pid: the PID of a process that we want to skip (if any) | ||
1098 | * @groups: the broadcast group that will notice the error | ||
1099 | * @code: error code, must be negative (as usual in kernelspace) | ||
1100 | */ | ||
1091 | void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) | 1101 | void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) |
1092 | { | 1102 | { |
1093 | struct netlink_set_err_data info; | 1103 | struct netlink_set_err_data info; |
@@ -1097,7 +1107,8 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) | |||
1097 | info.exclude_sk = ssk; | 1107 | info.exclude_sk = ssk; |
1098 | info.pid = pid; | 1108 | info.pid = pid; |
1099 | info.group = group; | 1109 | info.group = group; |
1100 | info.code = code; | 1110 | /* sk->sk_err wants a positive error value */ |
1111 | info.code = -code; | ||
1101 | 1112 | ||
1102 | read_lock(&nl_table_lock); | 1113 | read_lock(&nl_table_lock); |
1103 | 1114 | ||
@@ -1164,6 +1175,13 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, | |||
1164 | err = 0; | 1175 | err = 0; |
1165 | break; | 1176 | break; |
1166 | } | 1177 | } |
1178 | case NETLINK_BROADCAST_ERROR: | ||
1179 | if (val) | ||
1180 | nlk->flags |= NETLINK_BROADCAST_SEND_ERROR; | ||
1181 | else | ||
1182 | nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR; | ||
1183 | err = 0; | ||
1184 | break; | ||
1167 | default: | 1185 | default: |
1168 | err = -ENOPROTOOPT; | 1186 | err = -ENOPROTOOPT; |
1169 | } | 1187 | } |
@@ -1196,6 +1214,16 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, | |||
1196 | return -EFAULT; | 1214 | return -EFAULT; |
1197 | err = 0; | 1215 | err = 0; |
1198 | break; | 1216 | break; |
1217 | case NETLINK_BROADCAST_ERROR: | ||
1218 | if (len < sizeof(int)) | ||
1219 | return -EINVAL; | ||
1220 | len = sizeof(int); | ||
1221 | val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0; | ||
1222 | if (put_user(len, optlen) || | ||
1223 | put_user(val, optval)) | ||
1224 | return -EFAULT; | ||
1225 | err = 0; | ||
1226 | break; | ||
1199 | default: | 1227 | default: |
1200 | err = -ENOPROTOOPT; | 1228 | err = -ENOPROTOOPT; |
1201 | } | 1229 | } |
@@ -1522,8 +1550,7 @@ EXPORT_SYMBOL(netlink_set_nonroot); | |||
1522 | 1550 | ||
1523 | static void netlink_destroy_callback(struct netlink_callback *cb) | 1551 | static void netlink_destroy_callback(struct netlink_callback *cb) |
1524 | { | 1552 | { |
1525 | if (cb->skb) | 1553 | kfree_skb(cb->skb); |
1526 | kfree_skb(cb->skb); | ||
1527 | kfree(cb); | 1554 | kfree(cb); |
1528 | } | 1555 | } |
1529 | 1556 | ||
@@ -1740,12 +1767,18 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, | |||
1740 | exclude_pid = pid; | 1767 | exclude_pid = pid; |
1741 | } | 1768 | } |
1742 | 1769 | ||
1743 | /* errors reported via destination sk->sk_err */ | 1770 | /* errors reported via destination sk->sk_err, but propagate |
1744 | nlmsg_multicast(sk, skb, exclude_pid, group, flags); | 1771 | * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ |
1772 | err = nlmsg_multicast(sk, skb, exclude_pid, group, flags); | ||
1745 | } | 1773 | } |
1746 | 1774 | ||
1747 | if (report) | 1775 | if (report) { |
1748 | err = nlmsg_unicast(sk, skb, pid); | 1776 | int err2; |
1777 | |||
1778 | err2 = nlmsg_unicast(sk, skb, pid); | ||
1779 | if (!err || err == -ESRCH) | ||
1780 | err = err2; | ||
1781 | } | ||
1749 | 1782 | ||
1750 | return err; | 1783 | return err; |
1751 | } | 1784 | } |
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index cba7849de98e..6d9c58ec56ac 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c | |||
@@ -1037,6 +1037,10 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
1037 | unsigned char *asmptr; | 1037 | unsigned char *asmptr; |
1038 | int size; | 1038 | int size; |
1039 | 1039 | ||
1040 | /* Netrom empty data frame has no meaning : don't send */ | ||
1041 | if (len == 0) | ||
1042 | return 0; | ||
1043 | |||
1040 | if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) | 1044 | if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) |
1041 | return -EINVAL; | 1045 | return -EINVAL; |
1042 | 1046 | ||
@@ -1167,6 +1171,11 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1167 | skb_reset_transport_header(skb); | 1171 | skb_reset_transport_header(skb); |
1168 | copied = skb->len; | 1172 | copied = skb->len; |
1169 | 1173 | ||
1174 | /* NetRom empty data frame has no meaning : ignore it */ | ||
1175 | if (copied == 0) { | ||
1176 | goto out; | ||
1177 | } | ||
1178 | |||
1170 | if (copied > size) { | 1179 | if (copied > size) { |
1171 | copied = size; | 1180 | copied = size; |
1172 | msg->msg_flags |= MSG_TRUNC; | 1181 | msg->msg_flags |= MSG_TRUNC; |
@@ -1182,7 +1191,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, | |||
1182 | 1191 | ||
1183 | msg->msg_namelen = sizeof(*sax); | 1192 | msg->msg_namelen = sizeof(*sax); |
1184 | 1193 | ||
1185 | skb_free_datagram(sk, skb); | 1194 | out: skb_free_datagram(sk, skb); |
1186 | 1195 | ||
1187 | release_sock(sk); | 1196 | release_sock(sk); |
1188 | return copied; | 1197 | return copied; |
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1fc4a7885c41..74776de523ec 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c | |||
@@ -584,7 +584,7 @@ drop_n_restore: | |||
584 | skb->len = skb_len; | 584 | skb->len = skb_len; |
585 | } | 585 | } |
586 | drop: | 586 | drop: |
587 | kfree_skb(skb); | 587 | consume_skb(skb); |
588 | return 0; | 588 | return 0; |
589 | } | 589 | } |
590 | 590 | ||
@@ -756,8 +756,7 @@ ring_is_full: | |||
756 | spin_unlock(&sk->sk_receive_queue.lock); | 756 | spin_unlock(&sk->sk_receive_queue.lock); |
757 | 757 | ||
758 | sk->sk_data_ready(sk, 0); | 758 | sk->sk_data_ready(sk, 0); |
759 | if (copy_skb) | 759 | kfree_skb(copy_skb); |
760 | kfree_skb(copy_skb); | ||
761 | goto drop_n_restore; | 760 | goto drop_n_restore; |
762 | } | 761 | } |
763 | 762 | ||
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 81795ea87794..a662e62a99cf 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c | |||
@@ -382,9 +382,8 @@ out: | |||
382 | return NET_RX_DROP; | 382 | return NET_RX_DROP; |
383 | } | 383 | } |
384 | 384 | ||
385 | static struct packet_type phonet_packet_type = { | 385 | static struct packet_type phonet_packet_type __read_mostly = { |
386 | .type = cpu_to_be16(ETH_P_PHONET), | 386 | .type = cpu_to_be16(ETH_P_PHONET), |
387 | .dev = NULL, | ||
388 | .func = phonet_rcv, | 387 | .func = phonet_rcv, |
389 | }; | 388 | }; |
390 | 389 | ||
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 1ceea1f92413..cec4e5951681 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c | |||
@@ -47,8 +47,9 @@ static void rtmsg_notify(int event, struct net_device *dev, u8 addr) | |||
47 | kfree_skb(skb); | 47 | kfree_skb(skb); |
48 | goto errout; | 48 | goto errout; |
49 | } | 49 | } |
50 | err = rtnl_notify(skb, dev_net(dev), 0, | 50 | rtnl_notify(skb, dev_net(dev), 0, |
51 | RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); | 51 | RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); |
52 | return; | ||
52 | errout: | 53 | errout: |
53 | if (err < 0) | 54 | if (err < 0) |
54 | rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); | 55 | rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); |
diff --git a/net/rds/Kconfig b/net/rds/Kconfig new file mode 100644 index 000000000000..796773b5df9b --- /dev/null +++ b/net/rds/Kconfig | |||
@@ -0,0 +1,14 @@ | |||
1 | |||
2 | config RDS | ||
3 | tristate "Reliable Datagram Sockets (RDS) (EXPERIMENTAL)" | ||
4 | depends on INET && INFINIBAND_IPOIB && EXPERIMENTAL | ||
5 | depends on INFINIBAND && INFINIBAND_ADDR_TRANS | ||
6 | ---help--- | ||
7 | RDS provides reliable, sequenced delivery of datagrams | ||
8 | over Infiniband. | ||
9 | |||
10 | config RDS_DEBUG | ||
11 | bool "Debugging messages" | ||
12 | depends on RDS | ||
13 | default n | ||
14 | |||
diff --git a/net/rds/Makefile b/net/rds/Makefile new file mode 100644 index 000000000000..51f27585fa08 --- /dev/null +++ b/net/rds/Makefile | |||
@@ -0,0 +1,14 @@ | |||
1 | obj-$(CONFIG_RDS) += rds.o | ||
2 | rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \ | ||
3 | recv.o send.o stats.o sysctl.o threads.o transport.o \ | ||
4 | loop.o page.o rdma.o \ | ||
5 | rdma_transport.o \ | ||
6 | ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \ | ||
7 | ib_sysctl.o ib_rdma.o \ | ||
8 | iw.o iw_cm.o iw_recv.o iw_ring.o iw_send.o iw_stats.o \ | ||
9 | iw_sysctl.o iw_rdma.o | ||
10 | |||
11 | ifeq ($(CONFIG_RDS_DEBUG), y) | ||
12 | EXTRA_CFLAGS += -DDEBUG | ||
13 | endif | ||
14 | |||
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c new file mode 100644 index 000000000000..20cf16fc572f --- /dev/null +++ b/net/rds/af_rds.c | |||
@@ -0,0 +1,586 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/module.h> | ||
34 | #include <linux/errno.h> | ||
35 | #include <linux/kernel.h> | ||
36 | #include <linux/in.h> | ||
37 | #include <linux/poll.h> | ||
38 | #include <linux/version.h> | ||
39 | #include <net/sock.h> | ||
40 | |||
41 | #include "rds.h" | ||
42 | #include "rdma.h" | ||
43 | #include "rdma_transport.h" | ||
44 | |||
45 | /* this is just used for stats gathering :/ */ | ||
46 | static DEFINE_SPINLOCK(rds_sock_lock); | ||
47 | static unsigned long rds_sock_count; | ||
48 | static LIST_HEAD(rds_sock_list); | ||
49 | DECLARE_WAIT_QUEUE_HEAD(rds_poll_waitq); | ||
50 | |||
51 | /* | ||
52 | * This is called as the final descriptor referencing this socket is closed. | ||
53 | * We have to unbind the socket so that another socket can be bound to the | ||
54 | * address it was using. | ||
55 | * | ||
56 | * We have to be careful about racing with the incoming path. sock_orphan() | ||
57 | * sets SOCK_DEAD and we use that as an indicator to the rx path that new | ||
58 | * messages shouldn't be queued. | ||
59 | */ | ||
60 | static int rds_release(struct socket *sock) | ||
61 | { | ||
62 | struct sock *sk = sock->sk; | ||
63 | struct rds_sock *rs; | ||
64 | unsigned long flags; | ||
65 | |||
66 | if (sk == NULL) | ||
67 | goto out; | ||
68 | |||
69 | rs = rds_sk_to_rs(sk); | ||
70 | |||
71 | sock_orphan(sk); | ||
72 | /* Note - rds_clear_recv_queue grabs rs_recv_lock, so | ||
73 | * that ensures the recv path has completed messing | ||
74 | * with the socket. */ | ||
75 | rds_clear_recv_queue(rs); | ||
76 | rds_cong_remove_socket(rs); | ||
77 | rds_remove_bound(rs); | ||
78 | rds_send_drop_to(rs, NULL); | ||
79 | rds_rdma_drop_keys(rs); | ||
80 | rds_notify_queue_get(rs, NULL); | ||
81 | |||
82 | spin_lock_irqsave(&rds_sock_lock, flags); | ||
83 | list_del_init(&rs->rs_item); | ||
84 | rds_sock_count--; | ||
85 | spin_unlock_irqrestore(&rds_sock_lock, flags); | ||
86 | |||
87 | sock->sk = NULL; | ||
88 | sock_put(sk); | ||
89 | out: | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * Careful not to race with rds_release -> sock_orphan which clears sk_sleep. | ||
95 | * _bh() isn't OK here, we're called from interrupt handlers. It's probably OK | ||
96 | * to wake the waitqueue after sk_sleep is clear as we hold a sock ref, but | ||
97 | * this seems more conservative. | ||
98 | * NB - normally, one would use sk_callback_lock for this, but we can | ||
99 | * get here from interrupts, whereas the network code grabs sk_callback_lock | ||
100 | * with _lock_bh only - so relying on sk_callback_lock introduces livelocks. | ||
101 | */ | ||
102 | void rds_wake_sk_sleep(struct rds_sock *rs) | ||
103 | { | ||
104 | unsigned long flags; | ||
105 | |||
106 | read_lock_irqsave(&rs->rs_recv_lock, flags); | ||
107 | __rds_wake_sk_sleep(rds_rs_to_sk(rs)); | ||
108 | read_unlock_irqrestore(&rs->rs_recv_lock, flags); | ||
109 | } | ||
110 | |||
111 | static int rds_getname(struct socket *sock, struct sockaddr *uaddr, | ||
112 | int *uaddr_len, int peer) | ||
113 | { | ||
114 | struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; | ||
115 | struct rds_sock *rs = rds_sk_to_rs(sock->sk); | ||
116 | |||
117 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | ||
118 | |||
119 | /* racey, don't care */ | ||
120 | if (peer) { | ||
121 | if (!rs->rs_conn_addr) | ||
122 | return -ENOTCONN; | ||
123 | |||
124 | sin->sin_port = rs->rs_conn_port; | ||
125 | sin->sin_addr.s_addr = rs->rs_conn_addr; | ||
126 | } else { | ||
127 | sin->sin_port = rs->rs_bound_port; | ||
128 | sin->sin_addr.s_addr = rs->rs_bound_addr; | ||
129 | } | ||
130 | |||
131 | sin->sin_family = AF_INET; | ||
132 | |||
133 | *uaddr_len = sizeof(*sin); | ||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | /* | ||
138 | * RDS' poll is without a doubt the least intuitive part of the interface, | ||
139 | * as POLLIN and POLLOUT do not behave entirely as you would expect from | ||
140 | * a network protocol. | ||
141 | * | ||
142 | * POLLIN is asserted if | ||
143 | * - there is data on the receive queue. | ||
144 | * - to signal that a previously congested destination may have become | ||
145 | * uncongested | ||
146 | * - A notification has been queued to the socket (this can be a congestion | ||
147 | * update, or a RDMA completion). | ||
148 | * | ||
149 | * POLLOUT is asserted if there is room on the send queue. This does not mean | ||
150 | * however, that the next sendmsg() call will succeed. If the application tries | ||
151 | * to send to a congested destination, the system call may still fail (and | ||
152 | * return ENOBUFS). | ||
153 | */ | ||
154 | static unsigned int rds_poll(struct file *file, struct socket *sock, | ||
155 | poll_table *wait) | ||
156 | { | ||
157 | struct sock *sk = sock->sk; | ||
158 | struct rds_sock *rs = rds_sk_to_rs(sk); | ||
159 | unsigned int mask = 0; | ||
160 | unsigned long flags; | ||
161 | |||
162 | poll_wait(file, sk->sk_sleep, wait); | ||
163 | |||
164 | poll_wait(file, &rds_poll_waitq, wait); | ||
165 | |||
166 | read_lock_irqsave(&rs->rs_recv_lock, flags); | ||
167 | if (!rs->rs_cong_monitor) { | ||
168 | /* When a congestion map was updated, we signal POLLIN for | ||
169 | * "historical" reasons. Applications can also poll for | ||
170 | * WRBAND instead. */ | ||
171 | if (rds_cong_updated_since(&rs->rs_cong_track)) | ||
172 | mask |= (POLLIN | POLLRDNORM | POLLWRBAND); | ||
173 | } else { | ||
174 | spin_lock(&rs->rs_lock); | ||
175 | if (rs->rs_cong_notify) | ||
176 | mask |= (POLLIN | POLLRDNORM); | ||
177 | spin_unlock(&rs->rs_lock); | ||
178 | } | ||
179 | if (!list_empty(&rs->rs_recv_queue) | ||
180 | || !list_empty(&rs->rs_notify_queue)) | ||
181 | mask |= (POLLIN | POLLRDNORM); | ||
182 | if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) | ||
183 | mask |= (POLLOUT | POLLWRNORM); | ||
184 | read_unlock_irqrestore(&rs->rs_recv_lock, flags); | ||
185 | |||
186 | return mask; | ||
187 | } | ||
188 | |||
189 | static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) | ||
190 | { | ||
191 | return -ENOIOCTLCMD; | ||
192 | } | ||
193 | |||
194 | static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, | ||
195 | int len) | ||
196 | { | ||
197 | struct sockaddr_in sin; | ||
198 | int ret = 0; | ||
199 | |||
200 | /* racing with another thread binding seems ok here */ | ||
201 | if (rs->rs_bound_addr == 0) { | ||
202 | ret = -ENOTCONN; /* XXX not a great errno */ | ||
203 | goto out; | ||
204 | } | ||
205 | |||
206 | if (len < sizeof(struct sockaddr_in)) { | ||
207 | ret = -EINVAL; | ||
208 | goto out; | ||
209 | } | ||
210 | |||
211 | if (copy_from_user(&sin, optval, sizeof(sin))) { | ||
212 | ret = -EFAULT; | ||
213 | goto out; | ||
214 | } | ||
215 | |||
216 | rds_send_drop_to(rs, &sin); | ||
217 | out: | ||
218 | return ret; | ||
219 | } | ||
220 | |||
221 | static int rds_set_bool_option(unsigned char *optvar, char __user *optval, | ||
222 | int optlen) | ||
223 | { | ||
224 | int value; | ||
225 | |||
226 | if (optlen < sizeof(int)) | ||
227 | return -EINVAL; | ||
228 | if (get_user(value, (int __user *) optval)) | ||
229 | return -EFAULT; | ||
230 | *optvar = !!value; | ||
231 | return 0; | ||
232 | } | ||
233 | |||
234 | static int rds_cong_monitor(struct rds_sock *rs, char __user *optval, | ||
235 | int optlen) | ||
236 | { | ||
237 | int ret; | ||
238 | |||
239 | ret = rds_set_bool_option(&rs->rs_cong_monitor, optval, optlen); | ||
240 | if (ret == 0) { | ||
241 | if (rs->rs_cong_monitor) { | ||
242 | rds_cong_add_socket(rs); | ||
243 | } else { | ||
244 | rds_cong_remove_socket(rs); | ||
245 | rs->rs_cong_mask = 0; | ||
246 | rs->rs_cong_notify = 0; | ||
247 | } | ||
248 | } | ||
249 | return ret; | ||
250 | } | ||
251 | |||
252 | static int rds_setsockopt(struct socket *sock, int level, int optname, | ||
253 | char __user *optval, int optlen) | ||
254 | { | ||
255 | struct rds_sock *rs = rds_sk_to_rs(sock->sk); | ||
256 | int ret; | ||
257 | |||
258 | if (level != SOL_RDS) { | ||
259 | ret = -ENOPROTOOPT; | ||
260 | goto out; | ||
261 | } | ||
262 | |||
263 | switch (optname) { | ||
264 | case RDS_CANCEL_SENT_TO: | ||
265 | ret = rds_cancel_sent_to(rs, optval, optlen); | ||
266 | break; | ||
267 | case RDS_GET_MR: | ||
268 | ret = rds_get_mr(rs, optval, optlen); | ||
269 | break; | ||
270 | case RDS_FREE_MR: | ||
271 | ret = rds_free_mr(rs, optval, optlen); | ||
272 | break; | ||
273 | case RDS_RECVERR: | ||
274 | ret = rds_set_bool_option(&rs->rs_recverr, optval, optlen); | ||
275 | break; | ||
276 | case RDS_CONG_MONITOR: | ||
277 | ret = rds_cong_monitor(rs, optval, optlen); | ||
278 | break; | ||
279 | default: | ||
280 | ret = -ENOPROTOOPT; | ||
281 | } | ||
282 | out: | ||
283 | return ret; | ||
284 | } | ||
285 | |||
286 | static int rds_getsockopt(struct socket *sock, int level, int optname, | ||
287 | char __user *optval, int __user *optlen) | ||
288 | { | ||
289 | struct rds_sock *rs = rds_sk_to_rs(sock->sk); | ||
290 | int ret = -ENOPROTOOPT, len; | ||
291 | |||
292 | if (level != SOL_RDS) | ||
293 | goto out; | ||
294 | |||
295 | if (get_user(len, optlen)) { | ||
296 | ret = -EFAULT; | ||
297 | goto out; | ||
298 | } | ||
299 | |||
300 | switch (optname) { | ||
301 | case RDS_INFO_FIRST ... RDS_INFO_LAST: | ||
302 | ret = rds_info_getsockopt(sock, optname, optval, | ||
303 | optlen); | ||
304 | break; | ||
305 | |||
306 | case RDS_RECVERR: | ||
307 | if (len < sizeof(int)) | ||
308 | ret = -EINVAL; | ||
309 | else | ||
310 | if (put_user(rs->rs_recverr, (int __user *) optval) | ||
311 | || put_user(sizeof(int), optlen)) | ||
312 | ret = -EFAULT; | ||
313 | else | ||
314 | ret = 0; | ||
315 | break; | ||
316 | default: | ||
317 | break; | ||
318 | } | ||
319 | |||
320 | out: | ||
321 | return ret; | ||
322 | |||
323 | } | ||
324 | |||
325 | static int rds_connect(struct socket *sock, struct sockaddr *uaddr, | ||
326 | int addr_len, int flags) | ||
327 | { | ||
328 | struct sock *sk = sock->sk; | ||
329 | struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; | ||
330 | struct rds_sock *rs = rds_sk_to_rs(sk); | ||
331 | int ret = 0; | ||
332 | |||
333 | lock_sock(sk); | ||
334 | |||
335 | if (addr_len != sizeof(struct sockaddr_in)) { | ||
336 | ret = -EINVAL; | ||
337 | goto out; | ||
338 | } | ||
339 | |||
340 | if (sin->sin_family != AF_INET) { | ||
341 | ret = -EAFNOSUPPORT; | ||
342 | goto out; | ||
343 | } | ||
344 | |||
345 | if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { | ||
346 | ret = -EDESTADDRREQ; | ||
347 | goto out; | ||
348 | } | ||
349 | |||
350 | rs->rs_conn_addr = sin->sin_addr.s_addr; | ||
351 | rs->rs_conn_port = sin->sin_port; | ||
352 | |||
353 | out: | ||
354 | release_sock(sk); | ||
355 | return ret; | ||
356 | } | ||
357 | |||
358 | static struct proto rds_proto = { | ||
359 | .name = "RDS", | ||
360 | .owner = THIS_MODULE, | ||
361 | .obj_size = sizeof(struct rds_sock), | ||
362 | }; | ||
363 | |||
364 | static struct proto_ops rds_proto_ops = { | ||
365 | .family = AF_RDS, | ||
366 | .owner = THIS_MODULE, | ||
367 | .release = rds_release, | ||
368 | .bind = rds_bind, | ||
369 | .connect = rds_connect, | ||
370 | .socketpair = sock_no_socketpair, | ||
371 | .accept = sock_no_accept, | ||
372 | .getname = rds_getname, | ||
373 | .poll = rds_poll, | ||
374 | .ioctl = rds_ioctl, | ||
375 | .listen = sock_no_listen, | ||
376 | .shutdown = sock_no_shutdown, | ||
377 | .setsockopt = rds_setsockopt, | ||
378 | .getsockopt = rds_getsockopt, | ||
379 | .sendmsg = rds_sendmsg, | ||
380 | .recvmsg = rds_recvmsg, | ||
381 | .mmap = sock_no_mmap, | ||
382 | .sendpage = sock_no_sendpage, | ||
383 | }; | ||
384 | |||
385 | static int __rds_create(struct socket *sock, struct sock *sk, int protocol) | ||
386 | { | ||
387 | unsigned long flags; | ||
388 | struct rds_sock *rs; | ||
389 | |||
390 | sock_init_data(sock, sk); | ||
391 | sock->ops = &rds_proto_ops; | ||
392 | sk->sk_protocol = protocol; | ||
393 | |||
394 | rs = rds_sk_to_rs(sk); | ||
395 | spin_lock_init(&rs->rs_lock); | ||
396 | rwlock_init(&rs->rs_recv_lock); | ||
397 | INIT_LIST_HEAD(&rs->rs_send_queue); | ||
398 | INIT_LIST_HEAD(&rs->rs_recv_queue); | ||
399 | INIT_LIST_HEAD(&rs->rs_notify_queue); | ||
400 | INIT_LIST_HEAD(&rs->rs_cong_list); | ||
401 | spin_lock_init(&rs->rs_rdma_lock); | ||
402 | rs->rs_rdma_keys = RB_ROOT; | ||
403 | |||
404 | spin_lock_irqsave(&rds_sock_lock, flags); | ||
405 | list_add_tail(&rs->rs_item, &rds_sock_list); | ||
406 | rds_sock_count++; | ||
407 | spin_unlock_irqrestore(&rds_sock_lock, flags); | ||
408 | |||
409 | return 0; | ||
410 | } | ||
411 | |||
412 | static int rds_create(struct net *net, struct socket *sock, int protocol) | ||
413 | { | ||
414 | struct sock *sk; | ||
415 | |||
416 | if (sock->type != SOCK_SEQPACKET || protocol) | ||
417 | return -ESOCKTNOSUPPORT; | ||
418 | |||
419 | sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto); | ||
420 | if (!sk) | ||
421 | return -ENOMEM; | ||
422 | |||
423 | return __rds_create(sock, sk, protocol); | ||
424 | } | ||
425 | |||
426 | void rds_sock_addref(struct rds_sock *rs) | ||
427 | { | ||
428 | sock_hold(rds_rs_to_sk(rs)); | ||
429 | } | ||
430 | |||
431 | void rds_sock_put(struct rds_sock *rs) | ||
432 | { | ||
433 | sock_put(rds_rs_to_sk(rs)); | ||
434 | } | ||
435 | |||
436 | static struct net_proto_family rds_family_ops = { | ||
437 | .family = AF_RDS, | ||
438 | .create = rds_create, | ||
439 | .owner = THIS_MODULE, | ||
440 | }; | ||
441 | |||
442 | static void rds_sock_inc_info(struct socket *sock, unsigned int len, | ||
443 | struct rds_info_iterator *iter, | ||
444 | struct rds_info_lengths *lens) | ||
445 | { | ||
446 | struct rds_sock *rs; | ||
447 | struct sock *sk; | ||
448 | struct rds_incoming *inc; | ||
449 | unsigned long flags; | ||
450 | unsigned int total = 0; | ||
451 | |||
452 | len /= sizeof(struct rds_info_message); | ||
453 | |||
454 | spin_lock_irqsave(&rds_sock_lock, flags); | ||
455 | |||
456 | list_for_each_entry(rs, &rds_sock_list, rs_item) { | ||
457 | sk = rds_rs_to_sk(rs); | ||
458 | read_lock(&rs->rs_recv_lock); | ||
459 | |||
460 | /* XXX too lazy to maintain counts.. */ | ||
461 | list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { | ||
462 | total++; | ||
463 | if (total <= len) | ||
464 | rds_inc_info_copy(inc, iter, inc->i_saddr, | ||
465 | rs->rs_bound_addr, 1); | ||
466 | } | ||
467 | |||
468 | read_unlock(&rs->rs_recv_lock); | ||
469 | } | ||
470 | |||
471 | spin_unlock_irqrestore(&rds_sock_lock, flags); | ||
472 | |||
473 | lens->nr = total; | ||
474 | lens->each = sizeof(struct rds_info_message); | ||
475 | } | ||
476 | |||
477 | static void rds_sock_info(struct socket *sock, unsigned int len, | ||
478 | struct rds_info_iterator *iter, | ||
479 | struct rds_info_lengths *lens) | ||
480 | { | ||
481 | struct rds_info_socket sinfo; | ||
482 | struct rds_sock *rs; | ||
483 | unsigned long flags; | ||
484 | |||
485 | len /= sizeof(struct rds_info_socket); | ||
486 | |||
487 | spin_lock_irqsave(&rds_sock_lock, flags); | ||
488 | |||
489 | if (len < rds_sock_count) | ||
490 | goto out; | ||
491 | |||
492 | list_for_each_entry(rs, &rds_sock_list, rs_item) { | ||
493 | sinfo.sndbuf = rds_sk_sndbuf(rs); | ||
494 | sinfo.rcvbuf = rds_sk_rcvbuf(rs); | ||
495 | sinfo.bound_addr = rs->rs_bound_addr; | ||
496 | sinfo.connected_addr = rs->rs_conn_addr; | ||
497 | sinfo.bound_port = rs->rs_bound_port; | ||
498 | sinfo.connected_port = rs->rs_conn_port; | ||
499 | sinfo.inum = sock_i_ino(rds_rs_to_sk(rs)); | ||
500 | |||
501 | rds_info_copy(iter, &sinfo, sizeof(sinfo)); | ||
502 | } | ||
503 | |||
504 | out: | ||
505 | lens->nr = rds_sock_count; | ||
506 | lens->each = sizeof(struct rds_info_socket); | ||
507 | |||
508 | spin_unlock_irqrestore(&rds_sock_lock, flags); | ||
509 | } | ||
510 | |||
511 | static void __exit rds_exit(void) | ||
512 | { | ||
513 | rds_rdma_exit(); | ||
514 | sock_unregister(rds_family_ops.family); | ||
515 | proto_unregister(&rds_proto); | ||
516 | rds_conn_exit(); | ||
517 | rds_cong_exit(); | ||
518 | rds_sysctl_exit(); | ||
519 | rds_threads_exit(); | ||
520 | rds_stats_exit(); | ||
521 | rds_page_exit(); | ||
522 | rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info); | ||
523 | rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); | ||
524 | } | ||
525 | module_exit(rds_exit); | ||
526 | |||
527 | static int __init rds_init(void) | ||
528 | { | ||
529 | int ret; | ||
530 | |||
531 | ret = rds_conn_init(); | ||
532 | if (ret) | ||
533 | goto out; | ||
534 | ret = rds_threads_init(); | ||
535 | if (ret) | ||
536 | goto out_conn; | ||
537 | ret = rds_sysctl_init(); | ||
538 | if (ret) | ||
539 | goto out_threads; | ||
540 | ret = rds_stats_init(); | ||
541 | if (ret) | ||
542 | goto out_sysctl; | ||
543 | ret = proto_register(&rds_proto, 1); | ||
544 | if (ret) | ||
545 | goto out_stats; | ||
546 | ret = sock_register(&rds_family_ops); | ||
547 | if (ret) | ||
548 | goto out_proto; | ||
549 | |||
550 | rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info); | ||
551 | rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); | ||
552 | |||
553 | /* ib/iwarp transports currently compiled-in */ | ||
554 | ret = rds_rdma_init(); | ||
555 | if (ret) | ||
556 | goto out_sock; | ||
557 | goto out; | ||
558 | |||
559 | out_sock: | ||
560 | sock_unregister(rds_family_ops.family); | ||
561 | out_proto: | ||
562 | proto_unregister(&rds_proto); | ||
563 | out_stats: | ||
564 | rds_stats_exit(); | ||
565 | out_sysctl: | ||
566 | rds_sysctl_exit(); | ||
567 | out_threads: | ||
568 | rds_threads_exit(); | ||
569 | out_conn: | ||
570 | rds_conn_exit(); | ||
571 | rds_cong_exit(); | ||
572 | rds_page_exit(); | ||
573 | out: | ||
574 | return ret; | ||
575 | } | ||
576 | module_init(rds_init); | ||
577 | |||
578 | #define DRV_VERSION "4.0" | ||
579 | #define DRV_RELDATE "Feb 12, 2009" | ||
580 | |||
581 | MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>"); | ||
582 | MODULE_DESCRIPTION("RDS: Reliable Datagram Sockets" | ||
583 | " v" DRV_VERSION " (" DRV_RELDATE ")"); | ||
584 | MODULE_VERSION(DRV_VERSION); | ||
585 | MODULE_LICENSE("Dual BSD/GPL"); | ||
586 | MODULE_ALIAS_NETPROTO(PF_RDS); | ||
diff --git a/net/rds/bind.c b/net/rds/bind.c new file mode 100644 index 000000000000..c17cc39160ce --- /dev/null +++ b/net/rds/bind.c | |||
@@ -0,0 +1,199 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <net/sock.h> | ||
35 | #include <linux/in.h> | ||
36 | #include <linux/if_arp.h> | ||
37 | #include "rds.h" | ||
38 | |||
39 | /* | ||
40 | * XXX this probably still needs more work.. no INADDR_ANY, and rbtrees aren't | ||
41 | * particularly zippy. | ||
42 | * | ||
43 | * This is now called for every incoming frame so we arguably care much more | ||
44 | * about it than we used to. | ||
45 | */ | ||
46 | static DEFINE_SPINLOCK(rds_bind_lock); | ||
47 | static struct rb_root rds_bind_tree = RB_ROOT; | ||
48 | |||
49 | static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port, | ||
50 | struct rds_sock *insert) | ||
51 | { | ||
52 | struct rb_node **p = &rds_bind_tree.rb_node; | ||
53 | struct rb_node *parent = NULL; | ||
54 | struct rds_sock *rs; | ||
55 | u64 cmp; | ||
56 | u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port); | ||
57 | |||
58 | while (*p) { | ||
59 | parent = *p; | ||
60 | rs = rb_entry(parent, struct rds_sock, rs_bound_node); | ||
61 | |||
62 | cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) | | ||
63 | be16_to_cpu(rs->rs_bound_port); | ||
64 | |||
65 | if (needle < cmp) | ||
66 | p = &(*p)->rb_left; | ||
67 | else if (needle > cmp) | ||
68 | p = &(*p)->rb_right; | ||
69 | else | ||
70 | return rs; | ||
71 | } | ||
72 | |||
73 | if (insert) { | ||
74 | rb_link_node(&insert->rs_bound_node, parent, p); | ||
75 | rb_insert_color(&insert->rs_bound_node, &rds_bind_tree); | ||
76 | } | ||
77 | return NULL; | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * Return the rds_sock bound at the given local address. | ||
82 | * | ||
83 | * The rx path can race with rds_release. We notice if rds_release() has | ||
84 | * marked this socket and don't return a rs ref to the rx path. | ||
85 | */ | ||
86 | struct rds_sock *rds_find_bound(__be32 addr, __be16 port) | ||
87 | { | ||
88 | struct rds_sock *rs; | ||
89 | unsigned long flags; | ||
90 | |||
91 | spin_lock_irqsave(&rds_bind_lock, flags); | ||
92 | rs = rds_bind_tree_walk(addr, port, NULL); | ||
93 | if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) | ||
94 | rds_sock_addref(rs); | ||
95 | else | ||
96 | rs = NULL; | ||
97 | spin_unlock_irqrestore(&rds_bind_lock, flags); | ||
98 | |||
99 | rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, | ||
100 | ntohs(port)); | ||
101 | return rs; | ||
102 | } | ||
103 | |||
104 | /* returns -ve errno or +ve port */ | ||
105 | static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) | ||
106 | { | ||
107 | unsigned long flags; | ||
108 | int ret = -EADDRINUSE; | ||
109 | u16 rover, last; | ||
110 | |||
111 | if (*port != 0) { | ||
112 | rover = be16_to_cpu(*port); | ||
113 | last = rover; | ||
114 | } else { | ||
115 | rover = max_t(u16, net_random(), 2); | ||
116 | last = rover - 1; | ||
117 | } | ||
118 | |||
119 | spin_lock_irqsave(&rds_bind_lock, flags); | ||
120 | |||
121 | do { | ||
122 | if (rover == 0) | ||
123 | rover++; | ||
124 | if (rds_bind_tree_walk(addr, cpu_to_be16(rover), rs) == NULL) { | ||
125 | *port = cpu_to_be16(rover); | ||
126 | ret = 0; | ||
127 | break; | ||
128 | } | ||
129 | } while (rover++ != last); | ||
130 | |||
131 | if (ret == 0) { | ||
132 | rs->rs_bound_addr = addr; | ||
133 | rs->rs_bound_port = *port; | ||
134 | rds_sock_addref(rs); | ||
135 | |||
136 | rdsdebug("rs %p binding to %pI4:%d\n", | ||
137 | rs, &addr, (int)ntohs(*port)); | ||
138 | } | ||
139 | |||
140 | spin_unlock_irqrestore(&rds_bind_lock, flags); | ||
141 | |||
142 | return ret; | ||
143 | } | ||
144 | |||
145 | void rds_remove_bound(struct rds_sock *rs) | ||
146 | { | ||
147 | unsigned long flags; | ||
148 | |||
149 | spin_lock_irqsave(&rds_bind_lock, flags); | ||
150 | |||
151 | if (rs->rs_bound_addr) { | ||
152 | rdsdebug("rs %p unbinding from %pI4:%d\n", | ||
153 | rs, &rs->rs_bound_addr, | ||
154 | ntohs(rs->rs_bound_port)); | ||
155 | |||
156 | rb_erase(&rs->rs_bound_node, &rds_bind_tree); | ||
157 | rds_sock_put(rs); | ||
158 | rs->rs_bound_addr = 0; | ||
159 | } | ||
160 | |||
161 | spin_unlock_irqrestore(&rds_bind_lock, flags); | ||
162 | } | ||
163 | |||
164 | int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) | ||
165 | { | ||
166 | struct sock *sk = sock->sk; | ||
167 | struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; | ||
168 | struct rds_sock *rs = rds_sk_to_rs(sk); | ||
169 | struct rds_transport *trans; | ||
170 | int ret = 0; | ||
171 | |||
172 | lock_sock(sk); | ||
173 | |||
174 | if (addr_len != sizeof(struct sockaddr_in) || | ||
175 | sin->sin_family != AF_INET || | ||
176 | rs->rs_bound_addr || | ||
177 | sin->sin_addr.s_addr == htonl(INADDR_ANY)) { | ||
178 | ret = -EINVAL; | ||
179 | goto out; | ||
180 | } | ||
181 | |||
182 | ret = rds_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port); | ||
183 | if (ret) | ||
184 | goto out; | ||
185 | |||
186 | trans = rds_trans_get_preferred(sin->sin_addr.s_addr); | ||
187 | if (trans == NULL) { | ||
188 | ret = -EADDRNOTAVAIL; | ||
189 | rds_remove_bound(rs); | ||
190 | goto out; | ||
191 | } | ||
192 | |||
193 | rs->rs_transport = trans; | ||
194 | ret = 0; | ||
195 | |||
196 | out: | ||
197 | release_sock(sk); | ||
198 | return ret; | ||
199 | } | ||
diff --git a/net/rds/cong.c b/net/rds/cong.c new file mode 100644 index 000000000000..710e4599d76c --- /dev/null +++ b/net/rds/cong.c | |||
@@ -0,0 +1,404 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/types.h> | ||
34 | #include <linux/rbtree.h> | ||
35 | |||
36 | #include <asm-generic/bitops/le.h> | ||
37 | |||
38 | #include "rds.h" | ||
39 | |||
40 | /* | ||
41 | * This file implements the receive side of the unconventional congestion | ||
42 | * management in RDS. | ||
43 | * | ||
44 | * Messages waiting in the receive queue on the receiving socket are accounted | ||
45 | * against the sockets SO_RCVBUF option value. Only the payload bytes in the | ||
46 | * message are accounted for. If the number of bytes queued equals or exceeds | ||
47 | * rcvbuf then the socket is congested. All sends attempted to this socket's | ||
48 | * address should return block or return -EWOULDBLOCK. | ||
49 | * | ||
50 | * Applications are expected to be reasonably tuned such that this situation | ||
51 | * very rarely occurs. An application encountering this "back-pressure" is | ||
52 | * considered a bug. | ||
53 | * | ||
54 | * This is implemented by having each node maintain bitmaps which indicate | ||
55 | * which ports on bound addresses are congested. As the bitmap changes it is | ||
56 | * sent through all the connections which terminate in the local address of the | ||
57 | * bitmap which changed. | ||
58 | * | ||
59 | * The bitmaps are allocated as connections are brought up. This avoids | ||
60 | * allocation in the interrupt handling path which queues messages on sockets. | ||
61 | * The dense bitmaps let transports send the entire bitmap on any bitmap change | ||
62 | * reasonably efficiently. This is much easier to implement than some | ||
63 | * finer-grained communication of per-port congestion. The sender does a very | ||
64 | * inexpensive bit test to test if the port it's about to send to is congested | ||
65 | * or not. | ||
66 | */ | ||
67 | |||
68 | /* | ||
69 | * Interaction with poll is a tad tricky. We want all processes stuck in | ||
70 | * poll to wake up and check whether a congested destination became uncongested. | ||
71 | * The really sad thing is we have no idea which destinations the application | ||
72 | * wants to send to - we don't even know which rds_connections are involved. | ||
73 | * So until we implement a more flexible rds poll interface, we have to make | ||
74 | * do with this: | ||
75 | * We maintain a global counter that is incremented each time a congestion map | ||
76 | * update is received. Each rds socket tracks this value, and if rds_poll | ||
77 | * finds that the saved generation number is smaller than the global generation | ||
78 | * number, it wakes up the process. | ||
79 | */ | ||
80 | static atomic_t rds_cong_generation = ATOMIC_INIT(0); | ||
81 | |||
82 | /* | ||
83 | * Congestion monitoring | ||
84 | */ | ||
85 | static LIST_HEAD(rds_cong_monitor); | ||
86 | static DEFINE_RWLOCK(rds_cong_monitor_lock); | ||
87 | |||
88 | /* | ||
89 | * Yes, a global lock. It's used so infrequently that it's worth keeping it | ||
90 | * global to simplify the locking. It's only used in the following | ||
91 | * circumstances: | ||
92 | * | ||
93 | * - on connection buildup to associate a conn with its maps | ||
94 | * - on map changes to inform conns of a new map to send | ||
95 | * | ||
96 | * It's sadly ordered under the socket callback lock and the connection lock. | ||
97 | * Receive paths can mark ports congested from interrupt context so the | ||
98 | * lock masks interrupts. | ||
99 | */ | ||
100 | static DEFINE_SPINLOCK(rds_cong_lock); | ||
101 | static struct rb_root rds_cong_tree = RB_ROOT; | ||
102 | |||
103 | static struct rds_cong_map *rds_cong_tree_walk(__be32 addr, | ||
104 | struct rds_cong_map *insert) | ||
105 | { | ||
106 | struct rb_node **p = &rds_cong_tree.rb_node; | ||
107 | struct rb_node *parent = NULL; | ||
108 | struct rds_cong_map *map; | ||
109 | |||
110 | while (*p) { | ||
111 | parent = *p; | ||
112 | map = rb_entry(parent, struct rds_cong_map, m_rb_node); | ||
113 | |||
114 | if (addr < map->m_addr) | ||
115 | p = &(*p)->rb_left; | ||
116 | else if (addr > map->m_addr) | ||
117 | p = &(*p)->rb_right; | ||
118 | else | ||
119 | return map; | ||
120 | } | ||
121 | |||
122 | if (insert) { | ||
123 | rb_link_node(&insert->m_rb_node, parent, p); | ||
124 | rb_insert_color(&insert->m_rb_node, &rds_cong_tree); | ||
125 | } | ||
126 | return NULL; | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * There is only ever one bitmap for any address. Connections try and allocate | ||
131 | * these bitmaps in the process getting pointers to them. The bitmaps are only | ||
132 | * ever freed as the module is removed after all connections have been freed. | ||
133 | */ | ||
134 | static struct rds_cong_map *rds_cong_from_addr(__be32 addr) | ||
135 | { | ||
136 | struct rds_cong_map *map; | ||
137 | struct rds_cong_map *ret = NULL; | ||
138 | unsigned long zp; | ||
139 | unsigned long i; | ||
140 | unsigned long flags; | ||
141 | |||
142 | map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL); | ||
143 | if (map == NULL) | ||
144 | return NULL; | ||
145 | |||
146 | map->m_addr = addr; | ||
147 | init_waitqueue_head(&map->m_waitq); | ||
148 | INIT_LIST_HEAD(&map->m_conn_list); | ||
149 | |||
150 | for (i = 0; i < RDS_CONG_MAP_PAGES; i++) { | ||
151 | zp = get_zeroed_page(GFP_KERNEL); | ||
152 | if (zp == 0) | ||
153 | goto out; | ||
154 | map->m_page_addrs[i] = zp; | ||
155 | } | ||
156 | |||
157 | spin_lock_irqsave(&rds_cong_lock, flags); | ||
158 | ret = rds_cong_tree_walk(addr, map); | ||
159 | spin_unlock_irqrestore(&rds_cong_lock, flags); | ||
160 | |||
161 | if (ret == NULL) { | ||
162 | ret = map; | ||
163 | map = NULL; | ||
164 | } | ||
165 | |||
166 | out: | ||
167 | if (map) { | ||
168 | for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++) | ||
169 | free_page(map->m_page_addrs[i]); | ||
170 | kfree(map); | ||
171 | } | ||
172 | |||
173 | rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr)); | ||
174 | |||
175 | return ret; | ||
176 | } | ||
177 | |||
178 | /* | ||
179 | * Put the conn on its local map's list. This is called when the conn is | ||
180 | * really added to the hash. It's nested under the rds_conn_lock, sadly. | ||
181 | */ | ||
182 | void rds_cong_add_conn(struct rds_connection *conn) | ||
183 | { | ||
184 | unsigned long flags; | ||
185 | |||
186 | rdsdebug("conn %p now on map %p\n", conn, conn->c_lcong); | ||
187 | spin_lock_irqsave(&rds_cong_lock, flags); | ||
188 | list_add_tail(&conn->c_map_item, &conn->c_lcong->m_conn_list); | ||
189 | spin_unlock_irqrestore(&rds_cong_lock, flags); | ||
190 | } | ||
191 | |||
192 | void rds_cong_remove_conn(struct rds_connection *conn) | ||
193 | { | ||
194 | unsigned long flags; | ||
195 | |||
196 | rdsdebug("removing conn %p from map %p\n", conn, conn->c_lcong); | ||
197 | spin_lock_irqsave(&rds_cong_lock, flags); | ||
198 | list_del_init(&conn->c_map_item); | ||
199 | spin_unlock_irqrestore(&rds_cong_lock, flags); | ||
200 | } | ||
201 | |||
202 | int rds_cong_get_maps(struct rds_connection *conn) | ||
203 | { | ||
204 | conn->c_lcong = rds_cong_from_addr(conn->c_laddr); | ||
205 | conn->c_fcong = rds_cong_from_addr(conn->c_faddr); | ||
206 | |||
207 | if (conn->c_lcong == NULL || conn->c_fcong == NULL) | ||
208 | return -ENOMEM; | ||
209 | |||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | void rds_cong_queue_updates(struct rds_cong_map *map) | ||
214 | { | ||
215 | struct rds_connection *conn; | ||
216 | unsigned long flags; | ||
217 | |||
218 | spin_lock_irqsave(&rds_cong_lock, flags); | ||
219 | |||
220 | list_for_each_entry(conn, &map->m_conn_list, c_map_item) { | ||
221 | if (!test_and_set_bit(0, &conn->c_map_queued)) { | ||
222 | rds_stats_inc(s_cong_update_queued); | ||
223 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); | ||
224 | } | ||
225 | } | ||
226 | |||
227 | spin_unlock_irqrestore(&rds_cong_lock, flags); | ||
228 | } | ||
229 | |||
230 | void rds_cong_map_updated(struct rds_cong_map *map, uint64_t portmask) | ||
231 | { | ||
232 | rdsdebug("waking map %p for %pI4\n", | ||
233 | map, &map->m_addr); | ||
234 | rds_stats_inc(s_cong_update_received); | ||
235 | atomic_inc(&rds_cong_generation); | ||
236 | if (waitqueue_active(&map->m_waitq)) | ||
237 | wake_up(&map->m_waitq); | ||
238 | if (waitqueue_active(&rds_poll_waitq)) | ||
239 | wake_up_all(&rds_poll_waitq); | ||
240 | |||
241 | if (portmask && !list_empty(&rds_cong_monitor)) { | ||
242 | unsigned long flags; | ||
243 | struct rds_sock *rs; | ||
244 | |||
245 | read_lock_irqsave(&rds_cong_monitor_lock, flags); | ||
246 | list_for_each_entry(rs, &rds_cong_monitor, rs_cong_list) { | ||
247 | spin_lock(&rs->rs_lock); | ||
248 | rs->rs_cong_notify |= (rs->rs_cong_mask & portmask); | ||
249 | rs->rs_cong_mask &= ~portmask; | ||
250 | spin_unlock(&rs->rs_lock); | ||
251 | if (rs->rs_cong_notify) | ||
252 | rds_wake_sk_sleep(rs); | ||
253 | } | ||
254 | read_unlock_irqrestore(&rds_cong_monitor_lock, flags); | ||
255 | } | ||
256 | } | ||
257 | |||
258 | int rds_cong_updated_since(unsigned long *recent) | ||
259 | { | ||
260 | unsigned long gen = atomic_read(&rds_cong_generation); | ||
261 | |||
262 | if (likely(*recent == gen)) | ||
263 | return 0; | ||
264 | *recent = gen; | ||
265 | return 1; | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * We're called under the locking that protects the sockets receive buffer | ||
270 | * consumption. This makes it a lot easier for the caller to only call us | ||
271 | * when it knows that an existing set bit needs to be cleared, and vice versa. | ||
272 | * We can't block and we need to deal with concurrent sockets working against | ||
273 | * the same per-address map. | ||
274 | */ | ||
275 | void rds_cong_set_bit(struct rds_cong_map *map, __be16 port) | ||
276 | { | ||
277 | unsigned long i; | ||
278 | unsigned long off; | ||
279 | |||
280 | rdsdebug("setting congestion for %pI4:%u in map %p\n", | ||
281 | &map->m_addr, ntohs(port), map); | ||
282 | |||
283 | i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; | ||
284 | off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; | ||
285 | |||
286 | generic___set_le_bit(off, (void *)map->m_page_addrs[i]); | ||
287 | } | ||
288 | |||
289 | void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) | ||
290 | { | ||
291 | unsigned long i; | ||
292 | unsigned long off; | ||
293 | |||
294 | rdsdebug("clearing congestion for %pI4:%u in map %p\n", | ||
295 | &map->m_addr, ntohs(port), map); | ||
296 | |||
297 | i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; | ||
298 | off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; | ||
299 | |||
300 | generic___clear_le_bit(off, (void *)map->m_page_addrs[i]); | ||
301 | } | ||
302 | |||
303 | static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) | ||
304 | { | ||
305 | unsigned long i; | ||
306 | unsigned long off; | ||
307 | |||
308 | i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; | ||
309 | off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; | ||
310 | |||
311 | return generic_test_le_bit(off, (void *)map->m_page_addrs[i]); | ||
312 | } | ||
313 | |||
314 | void rds_cong_add_socket(struct rds_sock *rs) | ||
315 | { | ||
316 | unsigned long flags; | ||
317 | |||
318 | write_lock_irqsave(&rds_cong_monitor_lock, flags); | ||
319 | if (list_empty(&rs->rs_cong_list)) | ||
320 | list_add(&rs->rs_cong_list, &rds_cong_monitor); | ||
321 | write_unlock_irqrestore(&rds_cong_monitor_lock, flags); | ||
322 | } | ||
323 | |||
324 | void rds_cong_remove_socket(struct rds_sock *rs) | ||
325 | { | ||
326 | unsigned long flags; | ||
327 | struct rds_cong_map *map; | ||
328 | |||
329 | write_lock_irqsave(&rds_cong_monitor_lock, flags); | ||
330 | list_del_init(&rs->rs_cong_list); | ||
331 | write_unlock_irqrestore(&rds_cong_monitor_lock, flags); | ||
332 | |||
333 | /* update congestion map for now-closed port */ | ||
334 | spin_lock_irqsave(&rds_cong_lock, flags); | ||
335 | map = rds_cong_tree_walk(rs->rs_bound_addr, NULL); | ||
336 | spin_unlock_irqrestore(&rds_cong_lock, flags); | ||
337 | |||
338 | if (map && rds_cong_test_bit(map, rs->rs_bound_port)) { | ||
339 | rds_cong_clear_bit(map, rs->rs_bound_port); | ||
340 | rds_cong_queue_updates(map); | ||
341 | } | ||
342 | } | ||
343 | |||
344 | int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock, | ||
345 | struct rds_sock *rs) | ||
346 | { | ||
347 | if (!rds_cong_test_bit(map, port)) | ||
348 | return 0; | ||
349 | if (nonblock) { | ||
350 | if (rs && rs->rs_cong_monitor) { | ||
351 | unsigned long flags; | ||
352 | |||
353 | /* It would have been nice to have an atomic set_bit on | ||
354 | * a uint64_t. */ | ||
355 | spin_lock_irqsave(&rs->rs_lock, flags); | ||
356 | rs->rs_cong_mask |= RDS_CONG_MONITOR_MASK(ntohs(port)); | ||
357 | spin_unlock_irqrestore(&rs->rs_lock, flags); | ||
358 | |||
359 | /* Test again - a congestion update may have arrived in | ||
360 | * the meantime. */ | ||
361 | if (!rds_cong_test_bit(map, port)) | ||
362 | return 0; | ||
363 | } | ||
364 | rds_stats_inc(s_cong_send_error); | ||
365 | return -ENOBUFS; | ||
366 | } | ||
367 | |||
368 | rds_stats_inc(s_cong_send_blocked); | ||
369 | rdsdebug("waiting on map %p for port %u\n", map, be16_to_cpu(port)); | ||
370 | |||
371 | return wait_event_interruptible(map->m_waitq, | ||
372 | !rds_cong_test_bit(map, port)); | ||
373 | } | ||
374 | |||
375 | void rds_cong_exit(void) | ||
376 | { | ||
377 | struct rb_node *node; | ||
378 | struct rds_cong_map *map; | ||
379 | unsigned long i; | ||
380 | |||
381 | while ((node = rb_first(&rds_cong_tree))) { | ||
382 | map = rb_entry(node, struct rds_cong_map, m_rb_node); | ||
383 | rdsdebug("freeing map %p\n", map); | ||
384 | rb_erase(&map->m_rb_node, &rds_cong_tree); | ||
385 | for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++) | ||
386 | free_page(map->m_page_addrs[i]); | ||
387 | kfree(map); | ||
388 | } | ||
389 | } | ||
390 | |||
391 | /* | ||
392 | * Allocate a RDS message containing a congestion update. | ||
393 | */ | ||
394 | struct rds_message *rds_cong_update_alloc(struct rds_connection *conn) | ||
395 | { | ||
396 | struct rds_cong_map *map = conn->c_lcong; | ||
397 | struct rds_message *rm; | ||
398 | |||
399 | rm = rds_message_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES); | ||
400 | if (!IS_ERR(rm)) | ||
401 | rm->m_inc.i_hdr.h_flags = RDS_FLAG_CONG_BITMAP; | ||
402 | |||
403 | return rm; | ||
404 | } | ||
diff --git a/net/rds/connection.c b/net/rds/connection.c new file mode 100644 index 000000000000..273f064930a8 --- /dev/null +++ b/net/rds/connection.c | |||
@@ -0,0 +1,487 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/list.h> | ||
35 | #include <net/inet_hashtables.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | #include "loop.h" | ||
39 | #include "rdma.h" | ||
40 | |||
41 | #define RDS_CONNECTION_HASH_BITS 12 | ||
42 | #define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS) | ||
43 | #define RDS_CONNECTION_HASH_MASK (RDS_CONNECTION_HASH_ENTRIES - 1) | ||
44 | |||
45 | /* converting this to RCU is a chore for another day.. */ | ||
46 | static DEFINE_SPINLOCK(rds_conn_lock); | ||
47 | static unsigned long rds_conn_count; | ||
48 | static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES]; | ||
49 | static struct kmem_cache *rds_conn_slab; | ||
50 | |||
51 | static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr) | ||
52 | { | ||
53 | /* Pass NULL, don't need struct net for hash */ | ||
54 | unsigned long hash = inet_ehashfn(NULL, | ||
55 | be32_to_cpu(laddr), 0, | ||
56 | be32_to_cpu(faddr), 0); | ||
57 | return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK]; | ||
58 | } | ||
59 | |||
60 | #define rds_conn_info_set(var, test, suffix) do { \ | ||
61 | if (test) \ | ||
62 | var |= RDS_INFO_CONNECTION_FLAG_##suffix; \ | ||
63 | } while (0) | ||
64 | |||
65 | static inline int rds_conn_is_sending(struct rds_connection *conn) | ||
66 | { | ||
67 | int ret = 0; | ||
68 | |||
69 | if (!mutex_trylock(&conn->c_send_lock)) | ||
70 | ret = 1; | ||
71 | else | ||
72 | mutex_unlock(&conn->c_send_lock); | ||
73 | |||
74 | return ret; | ||
75 | } | ||
76 | |||
77 | static struct rds_connection *rds_conn_lookup(struct hlist_head *head, | ||
78 | __be32 laddr, __be32 faddr, | ||
79 | struct rds_transport *trans) | ||
80 | { | ||
81 | struct rds_connection *conn, *ret = NULL; | ||
82 | struct hlist_node *pos; | ||
83 | |||
84 | hlist_for_each_entry(conn, pos, head, c_hash_node) { | ||
85 | if (conn->c_faddr == faddr && conn->c_laddr == laddr && | ||
86 | conn->c_trans == trans) { | ||
87 | ret = conn; | ||
88 | break; | ||
89 | } | ||
90 | } | ||
91 | rdsdebug("returning conn %p for %pI4 -> %pI4\n", ret, | ||
92 | &laddr, &faddr); | ||
93 | return ret; | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * This is called by transports as they're bringing down a connection. | ||
98 | * It clears partial message state so that the transport can start sending | ||
99 | * and receiving over this connection again in the future. It is up to | ||
100 | * the transport to have serialized this call with its send and recv. | ||
101 | */ | ||
102 | void rds_conn_reset(struct rds_connection *conn) | ||
103 | { | ||
104 | rdsdebug("connection %pI4 to %pI4 reset\n", | ||
105 | &conn->c_laddr, &conn->c_faddr); | ||
106 | |||
107 | rds_stats_inc(s_conn_reset); | ||
108 | rds_send_reset(conn); | ||
109 | conn->c_flags = 0; | ||
110 | |||
111 | /* Do not clear next_rx_seq here, else we cannot distinguish | ||
112 | * retransmitted packets from new packets, and will hand all | ||
113 | * of them to the application. That is not consistent with the | ||
114 | * reliability guarantees of RDS. */ | ||
115 | } | ||
116 | |||
117 | /* | ||
118 | * There is only every one 'conn' for a given pair of addresses in the | ||
119 | * system at a time. They contain messages to be retransmitted and so | ||
120 | * span the lifetime of the actual underlying transport connections. | ||
121 | * | ||
122 | * For now they are not garbage collected once they're created. They | ||
123 | * are torn down as the module is removed, if ever. | ||
124 | */ | ||
125 | static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, | ||
126 | struct rds_transport *trans, gfp_t gfp, | ||
127 | int is_outgoing) | ||
128 | { | ||
129 | struct rds_connection *conn, *tmp, *parent = NULL; | ||
130 | struct hlist_head *head = rds_conn_bucket(laddr, faddr); | ||
131 | unsigned long flags; | ||
132 | int ret; | ||
133 | |||
134 | spin_lock_irqsave(&rds_conn_lock, flags); | ||
135 | conn = rds_conn_lookup(head, laddr, faddr, trans); | ||
136 | if (conn | ||
137 | && conn->c_loopback | ||
138 | && conn->c_trans != &rds_loop_transport | ||
139 | && !is_outgoing) { | ||
140 | /* This is a looped back IB connection, and we're | ||
141 | * called by the code handling the incoming connect. | ||
142 | * We need a second connection object into which we | ||
143 | * can stick the other QP. */ | ||
144 | parent = conn; | ||
145 | conn = parent->c_passive; | ||
146 | } | ||
147 | spin_unlock_irqrestore(&rds_conn_lock, flags); | ||
148 | if (conn) | ||
149 | goto out; | ||
150 | |||
151 | conn = kmem_cache_alloc(rds_conn_slab, gfp); | ||
152 | if (conn == NULL) { | ||
153 | conn = ERR_PTR(-ENOMEM); | ||
154 | goto out; | ||
155 | } | ||
156 | |||
157 | memset(conn, 0, sizeof(*conn)); | ||
158 | |||
159 | INIT_HLIST_NODE(&conn->c_hash_node); | ||
160 | conn->c_version = RDS_PROTOCOL_3_0; | ||
161 | conn->c_laddr = laddr; | ||
162 | conn->c_faddr = faddr; | ||
163 | spin_lock_init(&conn->c_lock); | ||
164 | conn->c_next_tx_seq = 1; | ||
165 | |||
166 | mutex_init(&conn->c_send_lock); | ||
167 | INIT_LIST_HEAD(&conn->c_send_queue); | ||
168 | INIT_LIST_HEAD(&conn->c_retrans); | ||
169 | |||
170 | ret = rds_cong_get_maps(conn); | ||
171 | if (ret) { | ||
172 | kmem_cache_free(rds_conn_slab, conn); | ||
173 | conn = ERR_PTR(ret); | ||
174 | goto out; | ||
175 | } | ||
176 | |||
177 | /* | ||
178 | * This is where a connection becomes loopback. If *any* RDS sockets | ||
179 | * can bind to the destination address then we'd rather the messages | ||
180 | * flow through loopback rather than either transport. | ||
181 | */ | ||
182 | if (rds_trans_get_preferred(faddr)) { | ||
183 | conn->c_loopback = 1; | ||
184 | if (is_outgoing && trans->t_prefer_loopback) { | ||
185 | /* "outgoing" connection - and the transport | ||
186 | * says it wants the connection handled by the | ||
187 | * loopback transport. This is what TCP does. | ||
188 | */ | ||
189 | trans = &rds_loop_transport; | ||
190 | } | ||
191 | } | ||
192 | |||
193 | conn->c_trans = trans; | ||
194 | |||
195 | ret = trans->conn_alloc(conn, gfp); | ||
196 | if (ret) { | ||
197 | kmem_cache_free(rds_conn_slab, conn); | ||
198 | conn = ERR_PTR(ret); | ||
199 | goto out; | ||
200 | } | ||
201 | |||
202 | atomic_set(&conn->c_state, RDS_CONN_DOWN); | ||
203 | conn->c_reconnect_jiffies = 0; | ||
204 | INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker); | ||
205 | INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker); | ||
206 | INIT_DELAYED_WORK(&conn->c_conn_w, rds_connect_worker); | ||
207 | INIT_WORK(&conn->c_down_w, rds_shutdown_worker); | ||
208 | mutex_init(&conn->c_cm_lock); | ||
209 | conn->c_flags = 0; | ||
210 | |||
211 | rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n", | ||
212 | conn, &laddr, &faddr, | ||
213 | trans->t_name ? trans->t_name : "[unknown]", | ||
214 | is_outgoing ? "(outgoing)" : ""); | ||
215 | |||
216 | spin_lock_irqsave(&rds_conn_lock, flags); | ||
217 | if (parent == NULL) { | ||
218 | tmp = rds_conn_lookup(head, laddr, faddr, trans); | ||
219 | if (tmp == NULL) | ||
220 | hlist_add_head(&conn->c_hash_node, head); | ||
221 | } else { | ||
222 | tmp = parent->c_passive; | ||
223 | if (!tmp) | ||
224 | parent->c_passive = conn; | ||
225 | } | ||
226 | |||
227 | if (tmp) { | ||
228 | trans->conn_free(conn->c_transport_data); | ||
229 | kmem_cache_free(rds_conn_slab, conn); | ||
230 | conn = tmp; | ||
231 | } else { | ||
232 | rds_cong_add_conn(conn); | ||
233 | rds_conn_count++; | ||
234 | } | ||
235 | |||
236 | spin_unlock_irqrestore(&rds_conn_lock, flags); | ||
237 | |||
238 | out: | ||
239 | return conn; | ||
240 | } | ||
241 | |||
242 | struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr, | ||
243 | struct rds_transport *trans, gfp_t gfp) | ||
244 | { | ||
245 | return __rds_conn_create(laddr, faddr, trans, gfp, 0); | ||
246 | } | ||
247 | |||
248 | struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr, | ||
249 | struct rds_transport *trans, gfp_t gfp) | ||
250 | { | ||
251 | return __rds_conn_create(laddr, faddr, trans, gfp, 1); | ||
252 | } | ||
253 | |||
254 | void rds_conn_destroy(struct rds_connection *conn) | ||
255 | { | ||
256 | struct rds_message *rm, *rtmp; | ||
257 | |||
258 | rdsdebug("freeing conn %p for %pI4 -> " | ||
259 | "%pI4\n", conn, &conn->c_laddr, | ||
260 | &conn->c_faddr); | ||
261 | |||
262 | hlist_del_init(&conn->c_hash_node); | ||
263 | |||
264 | /* wait for the rds thread to shut it down */ | ||
265 | atomic_set(&conn->c_state, RDS_CONN_ERROR); | ||
266 | cancel_delayed_work(&conn->c_conn_w); | ||
267 | queue_work(rds_wq, &conn->c_down_w); | ||
268 | flush_workqueue(rds_wq); | ||
269 | |||
270 | /* tear down queued messages */ | ||
271 | list_for_each_entry_safe(rm, rtmp, | ||
272 | &conn->c_send_queue, | ||
273 | m_conn_item) { | ||
274 | list_del_init(&rm->m_conn_item); | ||
275 | BUG_ON(!list_empty(&rm->m_sock_item)); | ||
276 | rds_message_put(rm); | ||
277 | } | ||
278 | if (conn->c_xmit_rm) | ||
279 | rds_message_put(conn->c_xmit_rm); | ||
280 | |||
281 | conn->c_trans->conn_free(conn->c_transport_data); | ||
282 | |||
283 | /* | ||
284 | * The congestion maps aren't freed up here. They're | ||
285 | * freed by rds_cong_exit() after all the connections | ||
286 | * have been freed. | ||
287 | */ | ||
288 | rds_cong_remove_conn(conn); | ||
289 | |||
290 | BUG_ON(!list_empty(&conn->c_retrans)); | ||
291 | kmem_cache_free(rds_conn_slab, conn); | ||
292 | |||
293 | rds_conn_count--; | ||
294 | } | ||
295 | |||
296 | static void rds_conn_message_info(struct socket *sock, unsigned int len, | ||
297 | struct rds_info_iterator *iter, | ||
298 | struct rds_info_lengths *lens, | ||
299 | int want_send) | ||
300 | { | ||
301 | struct hlist_head *head; | ||
302 | struct hlist_node *pos; | ||
303 | struct list_head *list; | ||
304 | struct rds_connection *conn; | ||
305 | struct rds_message *rm; | ||
306 | unsigned long flags; | ||
307 | unsigned int total = 0; | ||
308 | size_t i; | ||
309 | |||
310 | len /= sizeof(struct rds_info_message); | ||
311 | |||
312 | spin_lock_irqsave(&rds_conn_lock, flags); | ||
313 | |||
314 | for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); | ||
315 | i++, head++) { | ||
316 | hlist_for_each_entry(conn, pos, head, c_hash_node) { | ||
317 | if (want_send) | ||
318 | list = &conn->c_send_queue; | ||
319 | else | ||
320 | list = &conn->c_retrans; | ||
321 | |||
322 | spin_lock(&conn->c_lock); | ||
323 | |||
324 | /* XXX too lazy to maintain counts.. */ | ||
325 | list_for_each_entry(rm, list, m_conn_item) { | ||
326 | total++; | ||
327 | if (total <= len) | ||
328 | rds_inc_info_copy(&rm->m_inc, iter, | ||
329 | conn->c_laddr, | ||
330 | conn->c_faddr, 0); | ||
331 | } | ||
332 | |||
333 | spin_unlock(&conn->c_lock); | ||
334 | } | ||
335 | } | ||
336 | |||
337 | spin_unlock_irqrestore(&rds_conn_lock, flags); | ||
338 | |||
339 | lens->nr = total; | ||
340 | lens->each = sizeof(struct rds_info_message); | ||
341 | } | ||
342 | |||
343 | static void rds_conn_message_info_send(struct socket *sock, unsigned int len, | ||
344 | struct rds_info_iterator *iter, | ||
345 | struct rds_info_lengths *lens) | ||
346 | { | ||
347 | rds_conn_message_info(sock, len, iter, lens, 1); | ||
348 | } | ||
349 | |||
350 | static void rds_conn_message_info_retrans(struct socket *sock, | ||
351 | unsigned int len, | ||
352 | struct rds_info_iterator *iter, | ||
353 | struct rds_info_lengths *lens) | ||
354 | { | ||
355 | rds_conn_message_info(sock, len, iter, lens, 0); | ||
356 | } | ||
357 | |||
358 | void rds_for_each_conn_info(struct socket *sock, unsigned int len, | ||
359 | struct rds_info_iterator *iter, | ||
360 | struct rds_info_lengths *lens, | ||
361 | int (*visitor)(struct rds_connection *, void *), | ||
362 | size_t item_len) | ||
363 | { | ||
364 | uint64_t buffer[(item_len + 7) / 8]; | ||
365 | struct hlist_head *head; | ||
366 | struct hlist_node *pos; | ||
367 | struct hlist_node *tmp; | ||
368 | struct rds_connection *conn; | ||
369 | unsigned long flags; | ||
370 | size_t i; | ||
371 | |||
372 | spin_lock_irqsave(&rds_conn_lock, flags); | ||
373 | |||
374 | lens->nr = 0; | ||
375 | lens->each = item_len; | ||
376 | |||
377 | for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); | ||
378 | i++, head++) { | ||
379 | hlist_for_each_entry_safe(conn, pos, tmp, head, c_hash_node) { | ||
380 | |||
381 | /* XXX no c_lock usage.. */ | ||
382 | if (!visitor(conn, buffer)) | ||
383 | continue; | ||
384 | |||
385 | /* We copy as much as we can fit in the buffer, | ||
386 | * but we count all items so that the caller | ||
387 | * can resize the buffer. */ | ||
388 | if (len >= item_len) { | ||
389 | rds_info_copy(iter, buffer, item_len); | ||
390 | len -= item_len; | ||
391 | } | ||
392 | lens->nr++; | ||
393 | } | ||
394 | } | ||
395 | |||
396 | spin_unlock_irqrestore(&rds_conn_lock, flags); | ||
397 | } | ||
398 | |||
399 | static int rds_conn_info_visitor(struct rds_connection *conn, | ||
400 | void *buffer) | ||
401 | { | ||
402 | struct rds_info_connection *cinfo = buffer; | ||
403 | |||
404 | cinfo->next_tx_seq = conn->c_next_tx_seq; | ||
405 | cinfo->next_rx_seq = conn->c_next_rx_seq; | ||
406 | cinfo->laddr = conn->c_laddr; | ||
407 | cinfo->faddr = conn->c_faddr; | ||
408 | strncpy(cinfo->transport, conn->c_trans->t_name, | ||
409 | sizeof(cinfo->transport)); | ||
410 | cinfo->flags = 0; | ||
411 | |||
412 | rds_conn_info_set(cinfo->flags, | ||
413 | rds_conn_is_sending(conn), SENDING); | ||
414 | /* XXX Future: return the state rather than these funky bits */ | ||
415 | rds_conn_info_set(cinfo->flags, | ||
416 | atomic_read(&conn->c_state) == RDS_CONN_CONNECTING, | ||
417 | CONNECTING); | ||
418 | rds_conn_info_set(cinfo->flags, | ||
419 | atomic_read(&conn->c_state) == RDS_CONN_UP, | ||
420 | CONNECTED); | ||
421 | return 1; | ||
422 | } | ||
423 | |||
424 | static void rds_conn_info(struct socket *sock, unsigned int len, | ||
425 | struct rds_info_iterator *iter, | ||
426 | struct rds_info_lengths *lens) | ||
427 | { | ||
428 | rds_for_each_conn_info(sock, len, iter, lens, | ||
429 | rds_conn_info_visitor, | ||
430 | sizeof(struct rds_info_connection)); | ||
431 | } | ||
432 | |||
433 | int __init rds_conn_init(void) | ||
434 | { | ||
435 | rds_conn_slab = kmem_cache_create("rds_connection", | ||
436 | sizeof(struct rds_connection), | ||
437 | 0, 0, NULL); | ||
438 | if (rds_conn_slab == NULL) | ||
439 | return -ENOMEM; | ||
440 | |||
441 | rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); | ||
442 | rds_info_register_func(RDS_INFO_SEND_MESSAGES, | ||
443 | rds_conn_message_info_send); | ||
444 | rds_info_register_func(RDS_INFO_RETRANS_MESSAGES, | ||
445 | rds_conn_message_info_retrans); | ||
446 | |||
447 | return 0; | ||
448 | } | ||
449 | |||
450 | void rds_conn_exit(void) | ||
451 | { | ||
452 | rds_loop_exit(); | ||
453 | |||
454 | WARN_ON(!hlist_empty(rds_conn_hash)); | ||
455 | |||
456 | kmem_cache_destroy(rds_conn_slab); | ||
457 | |||
458 | rds_info_deregister_func(RDS_INFO_CONNECTIONS, rds_conn_info); | ||
459 | rds_info_deregister_func(RDS_INFO_SEND_MESSAGES, | ||
460 | rds_conn_message_info_send); | ||
461 | rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES, | ||
462 | rds_conn_message_info_retrans); | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * Force a disconnect | ||
467 | */ | ||
468 | void rds_conn_drop(struct rds_connection *conn) | ||
469 | { | ||
470 | atomic_set(&conn->c_state, RDS_CONN_ERROR); | ||
471 | queue_work(rds_wq, &conn->c_down_w); | ||
472 | } | ||
473 | |||
474 | /* | ||
475 | * An error occurred on the connection | ||
476 | */ | ||
477 | void | ||
478 | __rds_conn_error(struct rds_connection *conn, const char *fmt, ...) | ||
479 | { | ||
480 | va_list ap; | ||
481 | |||
482 | va_start(ap, fmt); | ||
483 | vprintk(fmt, ap); | ||
484 | va_end(ap); | ||
485 | |||
486 | rds_conn_drop(conn); | ||
487 | } | ||
diff --git a/net/rds/ib.c b/net/rds/ib.c new file mode 100644 index 000000000000..06a7b798d9a7 --- /dev/null +++ b/net/rds/ib.c | |||
@@ -0,0 +1,323 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/in.h> | ||
35 | #include <linux/if.h> | ||
36 | #include <linux/netdevice.h> | ||
37 | #include <linux/inetdevice.h> | ||
38 | #include <linux/if_arp.h> | ||
39 | #include <linux/delay.h> | ||
40 | |||
41 | #include "rds.h" | ||
42 | #include "ib.h" | ||
43 | |||
44 | unsigned int fmr_pool_size = RDS_FMR_POOL_SIZE; | ||
45 | unsigned int fmr_message_size = RDS_FMR_SIZE + 1; /* +1 allows for unaligned MRs */ | ||
46 | |||
47 | module_param(fmr_pool_size, int, 0444); | ||
48 | MODULE_PARM_DESC(fmr_pool_size, " Max number of fmr per HCA"); | ||
49 | module_param(fmr_message_size, int, 0444); | ||
50 | MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer"); | ||
51 | |||
52 | struct list_head rds_ib_devices; | ||
53 | |||
54 | DEFINE_SPINLOCK(ib_nodev_conns_lock); | ||
55 | LIST_HEAD(ib_nodev_conns); | ||
56 | |||
57 | void rds_ib_add_one(struct ib_device *device) | ||
58 | { | ||
59 | struct rds_ib_device *rds_ibdev; | ||
60 | struct ib_device_attr *dev_attr; | ||
61 | |||
62 | /* Only handle IB (no iWARP) devices */ | ||
63 | if (device->node_type != RDMA_NODE_IB_CA) | ||
64 | return; | ||
65 | |||
66 | dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); | ||
67 | if (!dev_attr) | ||
68 | return; | ||
69 | |||
70 | if (ib_query_device(device, dev_attr)) { | ||
71 | rdsdebug("Query device failed for %s\n", device->name); | ||
72 | goto free_attr; | ||
73 | } | ||
74 | |||
75 | rds_ibdev = kmalloc(sizeof *rds_ibdev, GFP_KERNEL); | ||
76 | if (!rds_ibdev) | ||
77 | goto free_attr; | ||
78 | |||
79 | spin_lock_init(&rds_ibdev->spinlock); | ||
80 | |||
81 | rds_ibdev->max_wrs = dev_attr->max_qp_wr; | ||
82 | rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); | ||
83 | |||
84 | rds_ibdev->fmr_page_shift = max(9, ffs(dev_attr->page_size_cap) - 1); | ||
85 | rds_ibdev->fmr_page_size = 1 << rds_ibdev->fmr_page_shift; | ||
86 | rds_ibdev->fmr_page_mask = ~((u64) rds_ibdev->fmr_page_size - 1); | ||
87 | rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32; | ||
88 | rds_ibdev->max_fmrs = dev_attr->max_fmr ? | ||
89 | min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) : | ||
90 | fmr_pool_size; | ||
91 | |||
92 | rds_ibdev->dev = device; | ||
93 | rds_ibdev->pd = ib_alloc_pd(device); | ||
94 | if (IS_ERR(rds_ibdev->pd)) | ||
95 | goto free_dev; | ||
96 | |||
97 | rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, | ||
98 | IB_ACCESS_LOCAL_WRITE); | ||
99 | if (IS_ERR(rds_ibdev->mr)) | ||
100 | goto err_pd; | ||
101 | |||
102 | rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev); | ||
103 | if (IS_ERR(rds_ibdev->mr_pool)) { | ||
104 | rds_ibdev->mr_pool = NULL; | ||
105 | goto err_mr; | ||
106 | } | ||
107 | |||
108 | INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); | ||
109 | INIT_LIST_HEAD(&rds_ibdev->conn_list); | ||
110 | list_add_tail(&rds_ibdev->list, &rds_ib_devices); | ||
111 | |||
112 | ib_set_client_data(device, &rds_ib_client, rds_ibdev); | ||
113 | |||
114 | goto free_attr; | ||
115 | |||
116 | err_mr: | ||
117 | ib_dereg_mr(rds_ibdev->mr); | ||
118 | err_pd: | ||
119 | ib_dealloc_pd(rds_ibdev->pd); | ||
120 | free_dev: | ||
121 | kfree(rds_ibdev); | ||
122 | free_attr: | ||
123 | kfree(dev_attr); | ||
124 | } | ||
125 | |||
126 | void rds_ib_remove_one(struct ib_device *device) | ||
127 | { | ||
128 | struct rds_ib_device *rds_ibdev; | ||
129 | struct rds_ib_ipaddr *i_ipaddr, *i_next; | ||
130 | |||
131 | rds_ibdev = ib_get_client_data(device, &rds_ib_client); | ||
132 | if (!rds_ibdev) | ||
133 | return; | ||
134 | |||
135 | list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { | ||
136 | list_del(&i_ipaddr->list); | ||
137 | kfree(i_ipaddr); | ||
138 | } | ||
139 | |||
140 | rds_ib_remove_conns(rds_ibdev); | ||
141 | |||
142 | if (rds_ibdev->mr_pool) | ||
143 | rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); | ||
144 | |||
145 | ib_dereg_mr(rds_ibdev->mr); | ||
146 | |||
147 | while (ib_dealloc_pd(rds_ibdev->pd)) { | ||
148 | rdsdebug("Failed to dealloc pd %p\n", rds_ibdev->pd); | ||
149 | msleep(1); | ||
150 | } | ||
151 | |||
152 | list_del(&rds_ibdev->list); | ||
153 | kfree(rds_ibdev); | ||
154 | } | ||
155 | |||
156 | struct ib_client rds_ib_client = { | ||
157 | .name = "rds_ib", | ||
158 | .add = rds_ib_add_one, | ||
159 | .remove = rds_ib_remove_one | ||
160 | }; | ||
161 | |||
162 | static int rds_ib_conn_info_visitor(struct rds_connection *conn, | ||
163 | void *buffer) | ||
164 | { | ||
165 | struct rds_info_rdma_connection *iinfo = buffer; | ||
166 | struct rds_ib_connection *ic; | ||
167 | |||
168 | /* We will only ever look at IB transports */ | ||
169 | if (conn->c_trans != &rds_ib_transport) | ||
170 | return 0; | ||
171 | |||
172 | iinfo->src_addr = conn->c_laddr; | ||
173 | iinfo->dst_addr = conn->c_faddr; | ||
174 | |||
175 | memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); | ||
176 | memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); | ||
177 | if (rds_conn_state(conn) == RDS_CONN_UP) { | ||
178 | struct rds_ib_device *rds_ibdev; | ||
179 | struct rdma_dev_addr *dev_addr; | ||
180 | |||
181 | ic = conn->c_transport_data; | ||
182 | dev_addr = &ic->i_cm_id->route.addr.dev_addr; | ||
183 | |||
184 | ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); | ||
185 | ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); | ||
186 | |||
187 | rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); | ||
188 | iinfo->max_send_wr = ic->i_send_ring.w_nr; | ||
189 | iinfo->max_recv_wr = ic->i_recv_ring.w_nr; | ||
190 | iinfo->max_send_sge = rds_ibdev->max_sge; | ||
191 | rds_ib_get_mr_info(rds_ibdev, iinfo); | ||
192 | } | ||
193 | return 1; | ||
194 | } | ||
195 | |||
196 | static void rds_ib_ic_info(struct socket *sock, unsigned int len, | ||
197 | struct rds_info_iterator *iter, | ||
198 | struct rds_info_lengths *lens) | ||
199 | { | ||
200 | rds_for_each_conn_info(sock, len, iter, lens, | ||
201 | rds_ib_conn_info_visitor, | ||
202 | sizeof(struct rds_info_rdma_connection)); | ||
203 | } | ||
204 | |||
205 | |||
206 | /* | ||
207 | * Early RDS/IB was built to only bind to an address if there is an IPoIB | ||
208 | * device with that address set. | ||
209 | * | ||
210 | * If it were me, I'd advocate for something more flexible. Sending and | ||
211 | * receiving should be device-agnostic. Transports would try and maintain | ||
212 | * connections between peers who have messages queued. Userspace would be | ||
213 | * allowed to influence which paths have priority. We could call userspace | ||
214 | * asserting this policy "routing". | ||
215 | */ | ||
216 | static int rds_ib_laddr_check(__be32 addr) | ||
217 | { | ||
218 | int ret; | ||
219 | struct rdma_cm_id *cm_id; | ||
220 | struct sockaddr_in sin; | ||
221 | |||
222 | /* Create a CMA ID and try to bind it. This catches both | ||
223 | * IB and iWARP capable NICs. | ||
224 | */ | ||
225 | cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); | ||
226 | if (!cm_id) | ||
227 | return -EADDRNOTAVAIL; | ||
228 | |||
229 | memset(&sin, 0, sizeof(sin)); | ||
230 | sin.sin_family = AF_INET; | ||
231 | sin.sin_addr.s_addr = addr; | ||
232 | |||
233 | /* rdma_bind_addr will only succeed for IB & iWARP devices */ | ||
234 | ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); | ||
235 | /* due to this, we will claim to support iWARP devices unless we | ||
236 | check node_type. */ | ||
237 | if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA) | ||
238 | ret = -EADDRNOTAVAIL; | ||
239 | |||
240 | rdsdebug("addr %pI4 ret %d node type %d\n", | ||
241 | &addr, ret, | ||
242 | cm_id->device ? cm_id->device->node_type : -1); | ||
243 | |||
244 | rdma_destroy_id(cm_id); | ||
245 | |||
246 | return ret; | ||
247 | } | ||
248 | |||
249 | void rds_ib_exit(void) | ||
250 | { | ||
251 | rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); | ||
252 | rds_ib_remove_nodev_conns(); | ||
253 | ib_unregister_client(&rds_ib_client); | ||
254 | rds_ib_sysctl_exit(); | ||
255 | rds_ib_recv_exit(); | ||
256 | rds_trans_unregister(&rds_ib_transport); | ||
257 | } | ||
258 | |||
259 | struct rds_transport rds_ib_transport = { | ||
260 | .laddr_check = rds_ib_laddr_check, | ||
261 | .xmit_complete = rds_ib_xmit_complete, | ||
262 | .xmit = rds_ib_xmit, | ||
263 | .xmit_cong_map = NULL, | ||
264 | .xmit_rdma = rds_ib_xmit_rdma, | ||
265 | .recv = rds_ib_recv, | ||
266 | .conn_alloc = rds_ib_conn_alloc, | ||
267 | .conn_free = rds_ib_conn_free, | ||
268 | .conn_connect = rds_ib_conn_connect, | ||
269 | .conn_shutdown = rds_ib_conn_shutdown, | ||
270 | .inc_copy_to_user = rds_ib_inc_copy_to_user, | ||
271 | .inc_purge = rds_ib_inc_purge, | ||
272 | .inc_free = rds_ib_inc_free, | ||
273 | .cm_initiate_connect = rds_ib_cm_initiate_connect, | ||
274 | .cm_handle_connect = rds_ib_cm_handle_connect, | ||
275 | .cm_connect_complete = rds_ib_cm_connect_complete, | ||
276 | .stats_info_copy = rds_ib_stats_info_copy, | ||
277 | .exit = rds_ib_exit, | ||
278 | .get_mr = rds_ib_get_mr, | ||
279 | .sync_mr = rds_ib_sync_mr, | ||
280 | .free_mr = rds_ib_free_mr, | ||
281 | .flush_mrs = rds_ib_flush_mrs, | ||
282 | .t_owner = THIS_MODULE, | ||
283 | .t_name = "infiniband", | ||
284 | }; | ||
285 | |||
286 | int __init rds_ib_init(void) | ||
287 | { | ||
288 | int ret; | ||
289 | |||
290 | INIT_LIST_HEAD(&rds_ib_devices); | ||
291 | |||
292 | ret = ib_register_client(&rds_ib_client); | ||
293 | if (ret) | ||
294 | goto out; | ||
295 | |||
296 | ret = rds_ib_sysctl_init(); | ||
297 | if (ret) | ||
298 | goto out_ibreg; | ||
299 | |||
300 | ret = rds_ib_recv_init(); | ||
301 | if (ret) | ||
302 | goto out_sysctl; | ||
303 | |||
304 | ret = rds_trans_register(&rds_ib_transport); | ||
305 | if (ret) | ||
306 | goto out_recv; | ||
307 | |||
308 | rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); | ||
309 | |||
310 | goto out; | ||
311 | |||
312 | out_recv: | ||
313 | rds_ib_recv_exit(); | ||
314 | out_sysctl: | ||
315 | rds_ib_sysctl_exit(); | ||
316 | out_ibreg: | ||
317 | ib_unregister_client(&rds_ib_client); | ||
318 | out: | ||
319 | return ret; | ||
320 | } | ||
321 | |||
322 | MODULE_LICENSE("GPL"); | ||
323 | |||
diff --git a/net/rds/ib.h b/net/rds/ib.h new file mode 100644 index 000000000000..8be563a1363a --- /dev/null +++ b/net/rds/ib.h | |||
@@ -0,0 +1,367 @@ | |||
1 | #ifndef _RDS_IB_H | ||
2 | #define _RDS_IB_H | ||
3 | |||
4 | #include <rdma/ib_verbs.h> | ||
5 | #include <rdma/rdma_cm.h> | ||
6 | #include "rds.h" | ||
7 | #include "rdma_transport.h" | ||
8 | |||
9 | #define RDS_FMR_SIZE 256 | ||
10 | #define RDS_FMR_POOL_SIZE 4096 | ||
11 | |||
12 | #define RDS_IB_MAX_SGE 8 | ||
13 | #define RDS_IB_RECV_SGE 2 | ||
14 | |||
15 | #define RDS_IB_DEFAULT_RECV_WR 1024 | ||
16 | #define RDS_IB_DEFAULT_SEND_WR 256 | ||
17 | |||
18 | #define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */ | ||
19 | |||
20 | extern struct list_head rds_ib_devices; | ||
21 | |||
22 | /* | ||
23 | * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to | ||
24 | * try and minimize the amount of memory tied up both the device and | ||
25 | * socket receive queues. | ||
26 | */ | ||
27 | /* page offset of the final full frag that fits in the page */ | ||
28 | #define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE) | ||
29 | struct rds_page_frag { | ||
30 | struct list_head f_item; | ||
31 | struct page *f_page; | ||
32 | unsigned long f_offset; | ||
33 | dma_addr_t f_mapped; | ||
34 | }; | ||
35 | |||
36 | struct rds_ib_incoming { | ||
37 | struct list_head ii_frags; | ||
38 | struct rds_incoming ii_inc; | ||
39 | }; | ||
40 | |||
41 | struct rds_ib_connect_private { | ||
42 | /* Add new fields at the end, and don't permute existing fields. */ | ||
43 | __be32 dp_saddr; | ||
44 | __be32 dp_daddr; | ||
45 | u8 dp_protocol_major; | ||
46 | u8 dp_protocol_minor; | ||
47 | __be16 dp_protocol_minor_mask; /* bitmask */ | ||
48 | __be32 dp_reserved1; | ||
49 | __be64 dp_ack_seq; | ||
50 | __be32 dp_credit; /* non-zero enables flow ctl */ | ||
51 | }; | ||
52 | |||
53 | struct rds_ib_send_work { | ||
54 | struct rds_message *s_rm; | ||
55 | struct rds_rdma_op *s_op; | ||
56 | struct ib_send_wr s_wr; | ||
57 | struct ib_sge s_sge[RDS_IB_MAX_SGE]; | ||
58 | unsigned long s_queued; | ||
59 | }; | ||
60 | |||
61 | struct rds_ib_recv_work { | ||
62 | struct rds_ib_incoming *r_ibinc; | ||
63 | struct rds_page_frag *r_frag; | ||
64 | struct ib_recv_wr r_wr; | ||
65 | struct ib_sge r_sge[2]; | ||
66 | }; | ||
67 | |||
68 | struct rds_ib_work_ring { | ||
69 | u32 w_nr; | ||
70 | u32 w_alloc_ptr; | ||
71 | u32 w_alloc_ctr; | ||
72 | u32 w_free_ptr; | ||
73 | atomic_t w_free_ctr; | ||
74 | }; | ||
75 | |||
76 | struct rds_ib_device; | ||
77 | |||
78 | struct rds_ib_connection { | ||
79 | |||
80 | struct list_head ib_node; | ||
81 | struct rds_ib_device *rds_ibdev; | ||
82 | struct rds_connection *conn; | ||
83 | |||
84 | /* alphabet soup, IBTA style */ | ||
85 | struct rdma_cm_id *i_cm_id; | ||
86 | struct ib_pd *i_pd; | ||
87 | struct ib_mr *i_mr; | ||
88 | struct ib_cq *i_send_cq; | ||
89 | struct ib_cq *i_recv_cq; | ||
90 | |||
91 | /* tx */ | ||
92 | struct rds_ib_work_ring i_send_ring; | ||
93 | struct rds_message *i_rm; | ||
94 | struct rds_header *i_send_hdrs; | ||
95 | u64 i_send_hdrs_dma; | ||
96 | struct rds_ib_send_work *i_sends; | ||
97 | |||
98 | /* rx */ | ||
99 | struct mutex i_recv_mutex; | ||
100 | struct rds_ib_work_ring i_recv_ring; | ||
101 | struct rds_ib_incoming *i_ibinc; | ||
102 | u32 i_recv_data_rem; | ||
103 | struct rds_header *i_recv_hdrs; | ||
104 | u64 i_recv_hdrs_dma; | ||
105 | struct rds_ib_recv_work *i_recvs; | ||
106 | struct rds_page_frag i_frag; | ||
107 | u64 i_ack_recv; /* last ACK received */ | ||
108 | |||
109 | /* sending acks */ | ||
110 | unsigned long i_ack_flags; | ||
111 | u64 i_ack_next; /* next ACK to send */ | ||
112 | struct rds_header *i_ack; | ||
113 | struct ib_send_wr i_ack_wr; | ||
114 | struct ib_sge i_ack_sge; | ||
115 | u64 i_ack_dma; | ||
116 | unsigned long i_ack_queued; | ||
117 | |||
118 | /* Flow control related information | ||
119 | * | ||
120 | * Our algorithm uses a pair variables that we need to access | ||
121 | * atomically - one for the send credits, and one posted | ||
122 | * recv credits we need to transfer to remote. | ||
123 | * Rather than protect them using a slow spinlock, we put both into | ||
124 | * a single atomic_t and update it using cmpxchg | ||
125 | */ | ||
126 | atomic_t i_credits; | ||
127 | |||
128 | /* Protocol version specific information */ | ||
129 | unsigned int i_flowctl:1; /* enable/disable flow ctl */ | ||
130 | |||
131 | /* Batched completions */ | ||
132 | unsigned int i_unsignaled_wrs; | ||
133 | long i_unsignaled_bytes; | ||
134 | }; | ||
135 | |||
136 | /* This assumes that atomic_t is at least 32 bits */ | ||
137 | #define IB_GET_SEND_CREDITS(v) ((v) & 0xffff) | ||
138 | #define IB_GET_POST_CREDITS(v) ((v) >> 16) | ||
139 | #define IB_SET_SEND_CREDITS(v) ((v) & 0xffff) | ||
140 | #define IB_SET_POST_CREDITS(v) ((v) << 16) | ||
141 | |||
142 | struct rds_ib_ipaddr { | ||
143 | struct list_head list; | ||
144 | __be32 ipaddr; | ||
145 | }; | ||
146 | |||
147 | struct rds_ib_device { | ||
148 | struct list_head list; | ||
149 | struct list_head ipaddr_list; | ||
150 | struct list_head conn_list; | ||
151 | struct ib_device *dev; | ||
152 | struct ib_pd *pd; | ||
153 | struct ib_mr *mr; | ||
154 | struct rds_ib_mr_pool *mr_pool; | ||
155 | int fmr_page_shift; | ||
156 | int fmr_page_size; | ||
157 | u64 fmr_page_mask; | ||
158 | unsigned int fmr_max_remaps; | ||
159 | unsigned int max_fmrs; | ||
160 | int max_sge; | ||
161 | unsigned int max_wrs; | ||
162 | spinlock_t spinlock; /* protect the above */ | ||
163 | }; | ||
164 | |||
165 | /* bits for i_ack_flags */ | ||
166 | #define IB_ACK_IN_FLIGHT 0 | ||
167 | #define IB_ACK_REQUESTED 1 | ||
168 | |||
169 | /* Magic WR_ID for ACKs */ | ||
170 | #define RDS_IB_ACK_WR_ID (~(u64) 0) | ||
171 | |||
172 | struct rds_ib_statistics { | ||
173 | uint64_t s_ib_connect_raced; | ||
174 | uint64_t s_ib_listen_closed_stale; | ||
175 | uint64_t s_ib_tx_cq_call; | ||
176 | uint64_t s_ib_tx_cq_event; | ||
177 | uint64_t s_ib_tx_ring_full; | ||
178 | uint64_t s_ib_tx_throttle; | ||
179 | uint64_t s_ib_tx_sg_mapping_failure; | ||
180 | uint64_t s_ib_tx_stalled; | ||
181 | uint64_t s_ib_tx_credit_updates; | ||
182 | uint64_t s_ib_rx_cq_call; | ||
183 | uint64_t s_ib_rx_cq_event; | ||
184 | uint64_t s_ib_rx_ring_empty; | ||
185 | uint64_t s_ib_rx_refill_from_cq; | ||
186 | uint64_t s_ib_rx_refill_from_thread; | ||
187 | uint64_t s_ib_rx_alloc_limit; | ||
188 | uint64_t s_ib_rx_credit_updates; | ||
189 | uint64_t s_ib_ack_sent; | ||
190 | uint64_t s_ib_ack_send_failure; | ||
191 | uint64_t s_ib_ack_send_delayed; | ||
192 | uint64_t s_ib_ack_send_piggybacked; | ||
193 | uint64_t s_ib_ack_received; | ||
194 | uint64_t s_ib_rdma_mr_alloc; | ||
195 | uint64_t s_ib_rdma_mr_free; | ||
196 | uint64_t s_ib_rdma_mr_used; | ||
197 | uint64_t s_ib_rdma_mr_pool_flush; | ||
198 | uint64_t s_ib_rdma_mr_pool_wait; | ||
199 | uint64_t s_ib_rdma_mr_pool_depleted; | ||
200 | }; | ||
201 | |||
202 | extern struct workqueue_struct *rds_ib_wq; | ||
203 | |||
204 | /* | ||
205 | * Fake ib_dma_sync_sg_for_{cpu,device} as long as ib_verbs.h | ||
206 | * doesn't define it. | ||
207 | */ | ||
208 | static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev, | ||
209 | struct scatterlist *sg, unsigned int sg_dma_len, int direction) | ||
210 | { | ||
211 | unsigned int i; | ||
212 | |||
213 | for (i = 0; i < sg_dma_len; ++i) { | ||
214 | ib_dma_sync_single_for_cpu(dev, | ||
215 | ib_sg_dma_address(dev, &sg[i]), | ||
216 | ib_sg_dma_len(dev, &sg[i]), | ||
217 | direction); | ||
218 | } | ||
219 | } | ||
220 | #define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu | ||
221 | |||
222 | static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev, | ||
223 | struct scatterlist *sg, unsigned int sg_dma_len, int direction) | ||
224 | { | ||
225 | unsigned int i; | ||
226 | |||
227 | for (i = 0; i < sg_dma_len; ++i) { | ||
228 | ib_dma_sync_single_for_device(dev, | ||
229 | ib_sg_dma_address(dev, &sg[i]), | ||
230 | ib_sg_dma_len(dev, &sg[i]), | ||
231 | direction); | ||
232 | } | ||
233 | } | ||
234 | #define ib_dma_sync_sg_for_device rds_ib_dma_sync_sg_for_device | ||
235 | |||
236 | |||
237 | /* ib.c */ | ||
238 | extern struct rds_transport rds_ib_transport; | ||
239 | extern void rds_ib_add_one(struct ib_device *device); | ||
240 | extern void rds_ib_remove_one(struct ib_device *device); | ||
241 | extern struct ib_client rds_ib_client; | ||
242 | |||
243 | extern unsigned int fmr_pool_size; | ||
244 | extern unsigned int fmr_message_size; | ||
245 | |||
246 | extern spinlock_t ib_nodev_conns_lock; | ||
247 | extern struct list_head ib_nodev_conns; | ||
248 | |||
249 | /* ib_cm.c */ | ||
250 | int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp); | ||
251 | void rds_ib_conn_free(void *arg); | ||
252 | int rds_ib_conn_connect(struct rds_connection *conn); | ||
253 | void rds_ib_conn_shutdown(struct rds_connection *conn); | ||
254 | void rds_ib_state_change(struct sock *sk); | ||
255 | int __init rds_ib_listen_init(void); | ||
256 | void rds_ib_listen_stop(void); | ||
257 | void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...); | ||
258 | int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, | ||
259 | struct rdma_cm_event *event); | ||
260 | int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id); | ||
261 | void rds_ib_cm_connect_complete(struct rds_connection *conn, | ||
262 | struct rdma_cm_event *event); | ||
263 | |||
264 | |||
265 | #define rds_ib_conn_error(conn, fmt...) \ | ||
266 | __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) | ||
267 | |||
268 | /* ib_rdma.c */ | ||
269 | int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); | ||
270 | int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); | ||
271 | void rds_ib_remove_nodev_conns(void); | ||
272 | void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev); | ||
273 | struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); | ||
274 | void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); | ||
275 | void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); | ||
276 | void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, | ||
277 | struct rds_sock *rs, u32 *key_ret); | ||
278 | void rds_ib_sync_mr(void *trans_private, int dir); | ||
279 | void rds_ib_free_mr(void *trans_private, int invalidate); | ||
280 | void rds_ib_flush_mrs(void); | ||
281 | |||
282 | /* ib_recv.c */ | ||
283 | int __init rds_ib_recv_init(void); | ||
284 | void rds_ib_recv_exit(void); | ||
285 | int rds_ib_recv(struct rds_connection *conn); | ||
286 | int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, | ||
287 | gfp_t page_gfp, int prefill); | ||
288 | void rds_ib_inc_purge(struct rds_incoming *inc); | ||
289 | void rds_ib_inc_free(struct rds_incoming *inc); | ||
290 | int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, | ||
291 | size_t size); | ||
292 | void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context); | ||
293 | void rds_ib_recv_init_ring(struct rds_ib_connection *ic); | ||
294 | void rds_ib_recv_clear_ring(struct rds_ib_connection *ic); | ||
295 | void rds_ib_recv_init_ack(struct rds_ib_connection *ic); | ||
296 | void rds_ib_attempt_ack(struct rds_ib_connection *ic); | ||
297 | void rds_ib_ack_send_complete(struct rds_ib_connection *ic); | ||
298 | u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic); | ||
299 | |||
300 | /* ib_ring.c */ | ||
301 | void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr); | ||
302 | void rds_ib_ring_resize(struct rds_ib_work_ring *ring, u32 nr); | ||
303 | u32 rds_ib_ring_alloc(struct rds_ib_work_ring *ring, u32 val, u32 *pos); | ||
304 | void rds_ib_ring_free(struct rds_ib_work_ring *ring, u32 val); | ||
305 | void rds_ib_ring_unalloc(struct rds_ib_work_ring *ring, u32 val); | ||
306 | int rds_ib_ring_empty(struct rds_ib_work_ring *ring); | ||
307 | int rds_ib_ring_low(struct rds_ib_work_ring *ring); | ||
308 | u32 rds_ib_ring_oldest(struct rds_ib_work_ring *ring); | ||
309 | u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest); | ||
310 | extern wait_queue_head_t rds_ib_ring_empty_wait; | ||
311 | |||
312 | /* ib_send.c */ | ||
313 | void rds_ib_xmit_complete(struct rds_connection *conn); | ||
314 | int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, | ||
315 | unsigned int hdr_off, unsigned int sg, unsigned int off); | ||
316 | void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context); | ||
317 | void rds_ib_send_init_ring(struct rds_ib_connection *ic); | ||
318 | void rds_ib_send_clear_ring(struct rds_ib_connection *ic); | ||
319 | int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); | ||
320 | void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits); | ||
321 | void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); | ||
322 | int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, | ||
323 | u32 *adv_credits, int need_posted); | ||
324 | |||
325 | /* ib_stats.c */ | ||
326 | DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); | ||
327 | #define rds_ib_stats_inc(member) rds_stats_inc_which(rds_ib_stats, member) | ||
328 | unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter, | ||
329 | unsigned int avail); | ||
330 | |||
331 | /* ib_sysctl.c */ | ||
332 | int __init rds_ib_sysctl_init(void); | ||
333 | void rds_ib_sysctl_exit(void); | ||
334 | extern unsigned long rds_ib_sysctl_max_send_wr; | ||
335 | extern unsigned long rds_ib_sysctl_max_recv_wr; | ||
336 | extern unsigned long rds_ib_sysctl_max_unsig_wrs; | ||
337 | extern unsigned long rds_ib_sysctl_max_unsig_bytes; | ||
338 | extern unsigned long rds_ib_sysctl_max_recv_allocation; | ||
339 | extern unsigned int rds_ib_sysctl_flow_control; | ||
340 | extern ctl_table rds_ib_sysctl_table[]; | ||
341 | |||
342 | /* | ||
343 | * Helper functions for getting/setting the header and data SGEs in | ||
344 | * RDS packets (not RDMA) | ||
345 | */ | ||
346 | static inline struct ib_sge * | ||
347 | rds_ib_header_sge(struct rds_ib_connection *ic, struct ib_sge *sge) | ||
348 | { | ||
349 | return &sge[0]; | ||
350 | } | ||
351 | |||
352 | static inline struct ib_sge * | ||
353 | rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge) | ||
354 | { | ||
355 | return &sge[1]; | ||
356 | } | ||
357 | |||
358 | static inline void rds_ib_set_64bit(u64 *ptr, u64 val) | ||
359 | { | ||
360 | #if BITS_PER_LONG == 64 | ||
361 | *ptr = val; | ||
362 | #else | ||
363 | set_64bit(ptr, val); | ||
364 | #endif | ||
365 | } | ||
366 | |||
367 | #endif | ||
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c new file mode 100644 index 000000000000..0532237bd128 --- /dev/null +++ b/net/rds/ib_cm.c | |||
@@ -0,0 +1,726 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/in.h> | ||
35 | #include <linux/vmalloc.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | #include "ib.h" | ||
39 | |||
40 | /* | ||
41 | * Set the selected protocol version | ||
42 | */ | ||
43 | static void rds_ib_set_protocol(struct rds_connection *conn, unsigned int version) | ||
44 | { | ||
45 | conn->c_version = version; | ||
46 | } | ||
47 | |||
48 | /* | ||
49 | * Set up flow control | ||
50 | */ | ||
51 | static void rds_ib_set_flow_control(struct rds_connection *conn, u32 credits) | ||
52 | { | ||
53 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
54 | |||
55 | if (rds_ib_sysctl_flow_control && credits != 0) { | ||
56 | /* We're doing flow control */ | ||
57 | ic->i_flowctl = 1; | ||
58 | rds_ib_send_add_credits(conn, credits); | ||
59 | } else { | ||
60 | ic->i_flowctl = 0; | ||
61 | } | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * Tune RNR behavior. Without flow control, we use a rather | ||
66 | * low timeout, but not the absolute minimum - this should | ||
67 | * be tunable. | ||
68 | * | ||
69 | * We already set the RNR retry count to 7 (which is the | ||
70 | * smallest infinite number :-) above. | ||
71 | * If flow control is off, we want to change this back to 0 | ||
72 | * so that we learn quickly when our credit accounting is | ||
73 | * buggy. | ||
74 | * | ||
75 | * Caller passes in a qp_attr pointer - don't waste stack spacv | ||
76 | * by allocation this twice. | ||
77 | */ | ||
78 | static void | ||
79 | rds_ib_tune_rnr(struct rds_ib_connection *ic, struct ib_qp_attr *attr) | ||
80 | { | ||
81 | int ret; | ||
82 | |||
83 | attr->min_rnr_timer = IB_RNR_TIMER_000_32; | ||
84 | ret = ib_modify_qp(ic->i_cm_id->qp, attr, IB_QP_MIN_RNR_TIMER); | ||
85 | if (ret) | ||
86 | printk(KERN_NOTICE "ib_modify_qp(IB_QP_MIN_RNR_TIMER): err=%d\n", -ret); | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * Connection established. | ||
91 | * We get here for both outgoing and incoming connection. | ||
92 | */ | ||
93 | void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) | ||
94 | { | ||
95 | const struct rds_ib_connect_private *dp = NULL; | ||
96 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
97 | struct rds_ib_device *rds_ibdev; | ||
98 | struct ib_qp_attr qp_attr; | ||
99 | int err; | ||
100 | |||
101 | if (event->param.conn.private_data_len) { | ||
102 | dp = event->param.conn.private_data; | ||
103 | |||
104 | rds_ib_set_protocol(conn, | ||
105 | RDS_PROTOCOL(dp->dp_protocol_major, | ||
106 | dp->dp_protocol_minor)); | ||
107 | rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); | ||
108 | } | ||
109 | |||
110 | printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", | ||
111 | &conn->c_laddr, | ||
112 | RDS_PROTOCOL_MAJOR(conn->c_version), | ||
113 | RDS_PROTOCOL_MINOR(conn->c_version), | ||
114 | ic->i_flowctl ? ", flow control" : ""); | ||
115 | |||
116 | /* Tune RNR behavior */ | ||
117 | rds_ib_tune_rnr(ic, &qp_attr); | ||
118 | |||
119 | qp_attr.qp_state = IB_QPS_RTS; | ||
120 | err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE); | ||
121 | if (err) | ||
122 | printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); | ||
123 | |||
124 | /* update ib_device with this local ipaddr & conn */ | ||
125 | rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); | ||
126 | err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); | ||
127 | if (err) | ||
128 | printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); | ||
129 | err = rds_ib_add_conn(rds_ibdev, conn); | ||
130 | if (err) | ||
131 | printk(KERN_ERR "rds_ib_add_conn failed (%d)\n", err); | ||
132 | |||
133 | /* If the peer gave us the last packet it saw, process this as if | ||
134 | * we had received a regular ACK. */ | ||
135 | if (dp && dp->dp_ack_seq) | ||
136 | rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); | ||
137 | |||
138 | rds_connect_complete(conn); | ||
139 | } | ||
140 | |||
141 | static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, | ||
142 | struct rdma_conn_param *conn_param, | ||
143 | struct rds_ib_connect_private *dp, | ||
144 | u32 protocol_version) | ||
145 | { | ||
146 | memset(conn_param, 0, sizeof(struct rdma_conn_param)); | ||
147 | /* XXX tune these? */ | ||
148 | conn_param->responder_resources = 1; | ||
149 | conn_param->initiator_depth = 1; | ||
150 | conn_param->retry_count = 7; | ||
151 | conn_param->rnr_retry_count = 7; | ||
152 | |||
153 | if (dp) { | ||
154 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
155 | |||
156 | memset(dp, 0, sizeof(*dp)); | ||
157 | dp->dp_saddr = conn->c_laddr; | ||
158 | dp->dp_daddr = conn->c_faddr; | ||
159 | dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version); | ||
160 | dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version); | ||
161 | dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); | ||
162 | dp->dp_ack_seq = rds_ib_piggyb_ack(ic); | ||
163 | |||
164 | /* Advertise flow control */ | ||
165 | if (ic->i_flowctl) { | ||
166 | unsigned int credits; | ||
167 | |||
168 | credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)); | ||
169 | dp->dp_credit = cpu_to_be32(credits); | ||
170 | atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits); | ||
171 | } | ||
172 | |||
173 | conn_param->private_data = dp; | ||
174 | conn_param->private_data_len = sizeof(*dp); | ||
175 | } | ||
176 | } | ||
177 | |||
178 | static void rds_ib_cq_event_handler(struct ib_event *event, void *data) | ||
179 | { | ||
180 | rdsdebug("event %u data %p\n", event->event, data); | ||
181 | } | ||
182 | |||
183 | static void rds_ib_qp_event_handler(struct ib_event *event, void *data) | ||
184 | { | ||
185 | struct rds_connection *conn = data; | ||
186 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
187 | |||
188 | rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event); | ||
189 | |||
190 | switch (event->event) { | ||
191 | case IB_EVENT_COMM_EST: | ||
192 | rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); | ||
193 | break; | ||
194 | default: | ||
195 | printk(KERN_WARNING "RDS/ib: unhandled QP event %u " | ||
196 | "on connection to %pI4\n", event->event, | ||
197 | &conn->c_faddr); | ||
198 | break; | ||
199 | } | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * This needs to be very careful to not leave IS_ERR pointers around for | ||
204 | * cleanup to trip over. | ||
205 | */ | ||
206 | static int rds_ib_setup_qp(struct rds_connection *conn) | ||
207 | { | ||
208 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
209 | struct ib_device *dev = ic->i_cm_id->device; | ||
210 | struct ib_qp_init_attr attr; | ||
211 | struct rds_ib_device *rds_ibdev; | ||
212 | int ret; | ||
213 | |||
214 | /* rds_ib_add_one creates a rds_ib_device object per IB device, | ||
215 | * and allocates a protection domain, memory range and FMR pool | ||
216 | * for each. If that fails for any reason, it will not register | ||
217 | * the rds_ibdev at all. | ||
218 | */ | ||
219 | rds_ibdev = ib_get_client_data(dev, &rds_ib_client); | ||
220 | if (rds_ibdev == NULL) { | ||
221 | if (printk_ratelimit()) | ||
222 | printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n", | ||
223 | dev->name); | ||
224 | return -EOPNOTSUPP; | ||
225 | } | ||
226 | |||
227 | if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) | ||
228 | rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); | ||
229 | if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1) | ||
230 | rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1); | ||
231 | |||
232 | /* Protection domain and memory range */ | ||
233 | ic->i_pd = rds_ibdev->pd; | ||
234 | ic->i_mr = rds_ibdev->mr; | ||
235 | |||
236 | ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler, | ||
237 | rds_ib_cq_event_handler, conn, | ||
238 | ic->i_send_ring.w_nr + 1, 0); | ||
239 | if (IS_ERR(ic->i_send_cq)) { | ||
240 | ret = PTR_ERR(ic->i_send_cq); | ||
241 | ic->i_send_cq = NULL; | ||
242 | rdsdebug("ib_create_cq send failed: %d\n", ret); | ||
243 | goto out; | ||
244 | } | ||
245 | |||
246 | ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler, | ||
247 | rds_ib_cq_event_handler, conn, | ||
248 | ic->i_recv_ring.w_nr, 0); | ||
249 | if (IS_ERR(ic->i_recv_cq)) { | ||
250 | ret = PTR_ERR(ic->i_recv_cq); | ||
251 | ic->i_recv_cq = NULL; | ||
252 | rdsdebug("ib_create_cq recv failed: %d\n", ret); | ||
253 | goto out; | ||
254 | } | ||
255 | |||
256 | ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); | ||
257 | if (ret) { | ||
258 | rdsdebug("ib_req_notify_cq send failed: %d\n", ret); | ||
259 | goto out; | ||
260 | } | ||
261 | |||
262 | ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); | ||
263 | if (ret) { | ||
264 | rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); | ||
265 | goto out; | ||
266 | } | ||
267 | |||
268 | /* XXX negotiate max send/recv with remote? */ | ||
269 | memset(&attr, 0, sizeof(attr)); | ||
270 | attr.event_handler = rds_ib_qp_event_handler; | ||
271 | attr.qp_context = conn; | ||
272 | /* + 1 to allow for the single ack message */ | ||
273 | attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1; | ||
274 | attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1; | ||
275 | attr.cap.max_send_sge = rds_ibdev->max_sge; | ||
276 | attr.cap.max_recv_sge = RDS_IB_RECV_SGE; | ||
277 | attr.sq_sig_type = IB_SIGNAL_REQ_WR; | ||
278 | attr.qp_type = IB_QPT_RC; | ||
279 | attr.send_cq = ic->i_send_cq; | ||
280 | attr.recv_cq = ic->i_recv_cq; | ||
281 | |||
282 | /* | ||
283 | * XXX this can fail if max_*_wr is too large? Are we supposed | ||
284 | * to back off until we get a value that the hardware can support? | ||
285 | */ | ||
286 | ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); | ||
287 | if (ret) { | ||
288 | rdsdebug("rdma_create_qp failed: %d\n", ret); | ||
289 | goto out; | ||
290 | } | ||
291 | |||
292 | ic->i_send_hdrs = ib_dma_alloc_coherent(dev, | ||
293 | ic->i_send_ring.w_nr * | ||
294 | sizeof(struct rds_header), | ||
295 | &ic->i_send_hdrs_dma, GFP_KERNEL); | ||
296 | if (ic->i_send_hdrs == NULL) { | ||
297 | ret = -ENOMEM; | ||
298 | rdsdebug("ib_dma_alloc_coherent send failed\n"); | ||
299 | goto out; | ||
300 | } | ||
301 | |||
302 | ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, | ||
303 | ic->i_recv_ring.w_nr * | ||
304 | sizeof(struct rds_header), | ||
305 | &ic->i_recv_hdrs_dma, GFP_KERNEL); | ||
306 | if (ic->i_recv_hdrs == NULL) { | ||
307 | ret = -ENOMEM; | ||
308 | rdsdebug("ib_dma_alloc_coherent recv failed\n"); | ||
309 | goto out; | ||
310 | } | ||
311 | |||
312 | ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), | ||
313 | &ic->i_ack_dma, GFP_KERNEL); | ||
314 | if (ic->i_ack == NULL) { | ||
315 | ret = -ENOMEM; | ||
316 | rdsdebug("ib_dma_alloc_coherent ack failed\n"); | ||
317 | goto out; | ||
318 | } | ||
319 | |||
320 | ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); | ||
321 | if (ic->i_sends == NULL) { | ||
322 | ret = -ENOMEM; | ||
323 | rdsdebug("send allocation failed\n"); | ||
324 | goto out; | ||
325 | } | ||
326 | rds_ib_send_init_ring(ic); | ||
327 | |||
328 | ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); | ||
329 | if (ic->i_recvs == NULL) { | ||
330 | ret = -ENOMEM; | ||
331 | rdsdebug("recv allocation failed\n"); | ||
332 | goto out; | ||
333 | } | ||
334 | |||
335 | rds_ib_recv_init_ring(ic); | ||
336 | rds_ib_recv_init_ack(ic); | ||
337 | |||
338 | /* Post receive buffers - as a side effect, this will update | ||
339 | * the posted credit count. */ | ||
340 | rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); | ||
341 | |||
342 | rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr, | ||
343 | ic->i_send_cq, ic->i_recv_cq); | ||
344 | |||
345 | out: | ||
346 | return ret; | ||
347 | } | ||
348 | |||
349 | static u32 rds_ib_protocol_compatible(const struct rds_ib_connect_private *dp) | ||
350 | { | ||
351 | u16 common; | ||
352 | u32 version = 0; | ||
353 | |||
354 | /* rdma_cm private data is odd - when there is any private data in the | ||
355 | * request, we will be given a pretty large buffer without telling us the | ||
356 | * original size. The only way to tell the difference is by looking at | ||
357 | * the contents, which are initialized to zero. | ||
358 | * If the protocol version fields aren't set, this is a connection attempt | ||
359 | * from an older version. This could could be 3.0 or 2.0 - we can't tell. | ||
360 | * We really should have changed this for OFED 1.3 :-( */ | ||
361 | if (dp->dp_protocol_major == 0) | ||
362 | return RDS_PROTOCOL_3_0; | ||
363 | |||
364 | common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IB_SUPPORTED_PROTOCOLS; | ||
365 | if (dp->dp_protocol_major == 3 && common) { | ||
366 | version = RDS_PROTOCOL_3_0; | ||
367 | while ((common >>= 1) != 0) | ||
368 | version++; | ||
369 | } else if (printk_ratelimit()) { | ||
370 | printk(KERN_NOTICE "RDS: Connection from %pI4 using " | ||
371 | "incompatible protocol version %u.%u\n", | ||
372 | &dp->dp_saddr, | ||
373 | dp->dp_protocol_major, | ||
374 | dp->dp_protocol_minor); | ||
375 | } | ||
376 | return version; | ||
377 | } | ||
378 | |||
379 | int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, | ||
380 | struct rdma_cm_event *event) | ||
381 | { | ||
382 | __be64 lguid = cm_id->route.path_rec->sgid.global.interface_id; | ||
383 | __be64 fguid = cm_id->route.path_rec->dgid.global.interface_id; | ||
384 | const struct rds_ib_connect_private *dp = event->param.conn.private_data; | ||
385 | struct rds_ib_connect_private dp_rep; | ||
386 | struct rds_connection *conn = NULL; | ||
387 | struct rds_ib_connection *ic = NULL; | ||
388 | struct rdma_conn_param conn_param; | ||
389 | u32 version; | ||
390 | int err, destroy = 1; | ||
391 | |||
392 | /* Check whether the remote protocol version matches ours. */ | ||
393 | version = rds_ib_protocol_compatible(dp); | ||
394 | if (!version) | ||
395 | goto out; | ||
396 | |||
397 | rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u lguid 0x%llx fguid " | ||
398 | "0x%llx\n", &dp->dp_saddr, &dp->dp_daddr, | ||
399 | RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version), | ||
400 | (unsigned long long)be64_to_cpu(lguid), | ||
401 | (unsigned long long)be64_to_cpu(fguid)); | ||
402 | |||
403 | conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport, | ||
404 | GFP_KERNEL); | ||
405 | if (IS_ERR(conn)) { | ||
406 | rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); | ||
407 | conn = NULL; | ||
408 | goto out; | ||
409 | } | ||
410 | |||
411 | /* | ||
412 | * The connection request may occur while the | ||
413 | * previous connection exist, e.g. in case of failover. | ||
414 | * But as connections may be initiated simultaneously | ||
415 | * by both hosts, we have a random backoff mechanism - | ||
416 | * see the comment above rds_queue_reconnect() | ||
417 | */ | ||
418 | mutex_lock(&conn->c_cm_lock); | ||
419 | if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { | ||
420 | if (rds_conn_state(conn) == RDS_CONN_UP) { | ||
421 | rdsdebug("incoming connect while connecting\n"); | ||
422 | rds_conn_drop(conn); | ||
423 | rds_ib_stats_inc(s_ib_listen_closed_stale); | ||
424 | } else | ||
425 | if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { | ||
426 | /* Wait and see - our connect may still be succeeding */ | ||
427 | rds_ib_stats_inc(s_ib_connect_raced); | ||
428 | } | ||
429 | mutex_unlock(&conn->c_cm_lock); | ||
430 | goto out; | ||
431 | } | ||
432 | |||
433 | ic = conn->c_transport_data; | ||
434 | |||
435 | rds_ib_set_protocol(conn, version); | ||
436 | rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); | ||
437 | |||
438 | /* If the peer gave us the last packet it saw, process this as if | ||
439 | * we had received a regular ACK. */ | ||
440 | if (dp->dp_ack_seq) | ||
441 | rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); | ||
442 | |||
443 | BUG_ON(cm_id->context); | ||
444 | BUG_ON(ic->i_cm_id); | ||
445 | |||
446 | ic->i_cm_id = cm_id; | ||
447 | cm_id->context = conn; | ||
448 | |||
449 | /* We got halfway through setting up the ib_connection, if we | ||
450 | * fail now, we have to take the long route out of this mess. */ | ||
451 | destroy = 0; | ||
452 | |||
453 | err = rds_ib_setup_qp(conn); | ||
454 | if (err) { | ||
455 | rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err); | ||
456 | goto out; | ||
457 | } | ||
458 | |||
459 | rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version); | ||
460 | |||
461 | /* rdma_accept() calls rdma_reject() internally if it fails */ | ||
462 | err = rdma_accept(cm_id, &conn_param); | ||
463 | mutex_unlock(&conn->c_cm_lock); | ||
464 | if (err) { | ||
465 | rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err); | ||
466 | goto out; | ||
467 | } | ||
468 | |||
469 | return 0; | ||
470 | |||
471 | out: | ||
472 | rdma_reject(cm_id, NULL, 0); | ||
473 | return destroy; | ||
474 | } | ||
475 | |||
476 | |||
477 | int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id) | ||
478 | { | ||
479 | struct rds_connection *conn = cm_id->context; | ||
480 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
481 | struct rdma_conn_param conn_param; | ||
482 | struct rds_ib_connect_private dp; | ||
483 | int ret; | ||
484 | |||
485 | /* If the peer doesn't do protocol negotiation, we must | ||
486 | * default to RDSv3.0 */ | ||
487 | rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0); | ||
488 | ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */ | ||
489 | |||
490 | ret = rds_ib_setup_qp(conn); | ||
491 | if (ret) { | ||
492 | rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", ret); | ||
493 | goto out; | ||
494 | } | ||
495 | |||
496 | rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION); | ||
497 | |||
498 | ret = rdma_connect(cm_id, &conn_param); | ||
499 | if (ret) | ||
500 | rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); | ||
501 | |||
502 | out: | ||
503 | /* Beware - returning non-zero tells the rdma_cm to destroy | ||
504 | * the cm_id. We should certainly not do it as long as we still | ||
505 | * "own" the cm_id. */ | ||
506 | if (ret) { | ||
507 | if (ic->i_cm_id == cm_id) | ||
508 | ret = 0; | ||
509 | } | ||
510 | return ret; | ||
511 | } | ||
512 | |||
513 | int rds_ib_conn_connect(struct rds_connection *conn) | ||
514 | { | ||
515 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
516 | struct sockaddr_in src, dest; | ||
517 | int ret; | ||
518 | |||
519 | /* XXX I wonder what affect the port space has */ | ||
520 | /* delegate cm event handler to rdma_transport */ | ||
521 | ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, | ||
522 | RDMA_PS_TCP); | ||
523 | if (IS_ERR(ic->i_cm_id)) { | ||
524 | ret = PTR_ERR(ic->i_cm_id); | ||
525 | ic->i_cm_id = NULL; | ||
526 | rdsdebug("rdma_create_id() failed: %d\n", ret); | ||
527 | goto out; | ||
528 | } | ||
529 | |||
530 | rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn); | ||
531 | |||
532 | src.sin_family = AF_INET; | ||
533 | src.sin_addr.s_addr = (__force u32)conn->c_laddr; | ||
534 | src.sin_port = (__force u16)htons(0); | ||
535 | |||
536 | dest.sin_family = AF_INET; | ||
537 | dest.sin_addr.s_addr = (__force u32)conn->c_faddr; | ||
538 | dest.sin_port = (__force u16)htons(RDS_PORT); | ||
539 | |||
540 | ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src, | ||
541 | (struct sockaddr *)&dest, | ||
542 | RDS_RDMA_RESOLVE_TIMEOUT_MS); | ||
543 | if (ret) { | ||
544 | rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id, | ||
545 | ret); | ||
546 | rdma_destroy_id(ic->i_cm_id); | ||
547 | ic->i_cm_id = NULL; | ||
548 | } | ||
549 | |||
550 | out: | ||
551 | return ret; | ||
552 | } | ||
553 | |||
554 | /* | ||
555 | * This is so careful about only cleaning up resources that were built up | ||
556 | * so that it can be called at any point during startup. In fact it | ||
557 | * can be called multiple times for a given connection. | ||
558 | */ | ||
559 | void rds_ib_conn_shutdown(struct rds_connection *conn) | ||
560 | { | ||
561 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
562 | int err = 0; | ||
563 | |||
564 | rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id, | ||
565 | ic->i_pd, ic->i_send_cq, ic->i_recv_cq, | ||
566 | ic->i_cm_id ? ic->i_cm_id->qp : NULL); | ||
567 | |||
568 | if (ic->i_cm_id) { | ||
569 | struct ib_device *dev = ic->i_cm_id->device; | ||
570 | |||
571 | rdsdebug("disconnecting cm %p\n", ic->i_cm_id); | ||
572 | err = rdma_disconnect(ic->i_cm_id); | ||
573 | if (err) { | ||
574 | /* Actually this may happen quite frequently, when | ||
575 | * an outgoing connect raced with an incoming connect. | ||
576 | */ | ||
577 | rdsdebug("failed to disconnect, cm: %p err %d\n", | ||
578 | ic->i_cm_id, err); | ||
579 | } | ||
580 | |||
581 | wait_event(rds_ib_ring_empty_wait, | ||
582 | rds_ib_ring_empty(&ic->i_send_ring) && | ||
583 | rds_ib_ring_empty(&ic->i_recv_ring)); | ||
584 | |||
585 | if (ic->i_send_hdrs) | ||
586 | ib_dma_free_coherent(dev, | ||
587 | ic->i_send_ring.w_nr * | ||
588 | sizeof(struct rds_header), | ||
589 | ic->i_send_hdrs, | ||
590 | ic->i_send_hdrs_dma); | ||
591 | |||
592 | if (ic->i_recv_hdrs) | ||
593 | ib_dma_free_coherent(dev, | ||
594 | ic->i_recv_ring.w_nr * | ||
595 | sizeof(struct rds_header), | ||
596 | ic->i_recv_hdrs, | ||
597 | ic->i_recv_hdrs_dma); | ||
598 | |||
599 | if (ic->i_ack) | ||
600 | ib_dma_free_coherent(dev, sizeof(struct rds_header), | ||
601 | ic->i_ack, ic->i_ack_dma); | ||
602 | |||
603 | if (ic->i_sends) | ||
604 | rds_ib_send_clear_ring(ic); | ||
605 | if (ic->i_recvs) | ||
606 | rds_ib_recv_clear_ring(ic); | ||
607 | |||
608 | if (ic->i_cm_id->qp) | ||
609 | rdma_destroy_qp(ic->i_cm_id); | ||
610 | if (ic->i_send_cq) | ||
611 | ib_destroy_cq(ic->i_send_cq); | ||
612 | if (ic->i_recv_cq) | ||
613 | ib_destroy_cq(ic->i_recv_cq); | ||
614 | rdma_destroy_id(ic->i_cm_id); | ||
615 | |||
616 | /* | ||
617 | * Move connection back to the nodev list. | ||
618 | */ | ||
619 | if (ic->rds_ibdev) { | ||
620 | |||
621 | spin_lock_irq(&ic->rds_ibdev->spinlock); | ||
622 | BUG_ON(list_empty(&ic->ib_node)); | ||
623 | list_del(&ic->ib_node); | ||
624 | spin_unlock_irq(&ic->rds_ibdev->spinlock); | ||
625 | |||
626 | spin_lock_irq(&ib_nodev_conns_lock); | ||
627 | list_add_tail(&ic->ib_node, &ib_nodev_conns); | ||
628 | spin_unlock_irq(&ib_nodev_conns_lock); | ||
629 | ic->rds_ibdev = NULL; | ||
630 | } | ||
631 | |||
632 | ic->i_cm_id = NULL; | ||
633 | ic->i_pd = NULL; | ||
634 | ic->i_mr = NULL; | ||
635 | ic->i_send_cq = NULL; | ||
636 | ic->i_recv_cq = NULL; | ||
637 | ic->i_send_hdrs = NULL; | ||
638 | ic->i_recv_hdrs = NULL; | ||
639 | ic->i_ack = NULL; | ||
640 | } | ||
641 | BUG_ON(ic->rds_ibdev); | ||
642 | |||
643 | /* Clear pending transmit */ | ||
644 | if (ic->i_rm) { | ||
645 | rds_message_put(ic->i_rm); | ||
646 | ic->i_rm = NULL; | ||
647 | } | ||
648 | |||
649 | /* Clear the ACK state */ | ||
650 | clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); | ||
651 | rds_ib_set_64bit(&ic->i_ack_next, 0); | ||
652 | ic->i_ack_recv = 0; | ||
653 | |||
654 | /* Clear flow control state */ | ||
655 | ic->i_flowctl = 0; | ||
656 | atomic_set(&ic->i_credits, 0); | ||
657 | |||
658 | rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr); | ||
659 | rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr); | ||
660 | |||
661 | if (ic->i_ibinc) { | ||
662 | rds_inc_put(&ic->i_ibinc->ii_inc); | ||
663 | ic->i_ibinc = NULL; | ||
664 | } | ||
665 | |||
666 | vfree(ic->i_sends); | ||
667 | ic->i_sends = NULL; | ||
668 | vfree(ic->i_recvs); | ||
669 | ic->i_recvs = NULL; | ||
670 | } | ||
671 | |||
672 | int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) | ||
673 | { | ||
674 | struct rds_ib_connection *ic; | ||
675 | unsigned long flags; | ||
676 | |||
677 | /* XXX too lazy? */ | ||
678 | ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL); | ||
679 | if (ic == NULL) | ||
680 | return -ENOMEM; | ||
681 | |||
682 | INIT_LIST_HEAD(&ic->ib_node); | ||
683 | mutex_init(&ic->i_recv_mutex); | ||
684 | |||
685 | /* | ||
686 | * rds_ib_conn_shutdown() waits for these to be emptied so they | ||
687 | * must be initialized before it can be called. | ||
688 | */ | ||
689 | rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr); | ||
690 | rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr); | ||
691 | |||
692 | ic->conn = conn; | ||
693 | conn->c_transport_data = ic; | ||
694 | |||
695 | spin_lock_irqsave(&ib_nodev_conns_lock, flags); | ||
696 | list_add_tail(&ic->ib_node, &ib_nodev_conns); | ||
697 | spin_unlock_irqrestore(&ib_nodev_conns_lock, flags); | ||
698 | |||
699 | |||
700 | rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data); | ||
701 | return 0; | ||
702 | } | ||
703 | |||
704 | void rds_ib_conn_free(void *arg) | ||
705 | { | ||
706 | struct rds_ib_connection *ic = arg; | ||
707 | rdsdebug("ic %p\n", ic); | ||
708 | list_del(&ic->ib_node); | ||
709 | kfree(ic); | ||
710 | } | ||
711 | |||
712 | |||
713 | /* | ||
714 | * An error occurred on the connection | ||
715 | */ | ||
716 | void | ||
717 | __rds_ib_conn_error(struct rds_connection *conn, const char *fmt, ...) | ||
718 | { | ||
719 | va_list ap; | ||
720 | |||
721 | rds_conn_drop(conn); | ||
722 | |||
723 | va_start(ap, fmt); | ||
724 | vprintk(fmt, ap); | ||
725 | va_end(ap); | ||
726 | } | ||
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c new file mode 100644 index 000000000000..69a6289ed672 --- /dev/null +++ b/net/rds/ib_rdma.c | |||
@@ -0,0 +1,641 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | |||
35 | #include "rds.h" | ||
36 | #include "rdma.h" | ||
37 | #include "ib.h" | ||
38 | |||
39 | |||
40 | /* | ||
41 | * This is stored as mr->r_trans_private. | ||
42 | */ | ||
43 | struct rds_ib_mr { | ||
44 | struct rds_ib_device *device; | ||
45 | struct rds_ib_mr_pool *pool; | ||
46 | struct ib_fmr *fmr; | ||
47 | struct list_head list; | ||
48 | unsigned int remap_count; | ||
49 | |||
50 | struct scatterlist *sg; | ||
51 | unsigned int sg_len; | ||
52 | u64 *dma; | ||
53 | int sg_dma_len; | ||
54 | }; | ||
55 | |||
56 | /* | ||
57 | * Our own little FMR pool | ||
58 | */ | ||
59 | struct rds_ib_mr_pool { | ||
60 | struct mutex flush_lock; /* serialize fmr invalidate */ | ||
61 | struct work_struct flush_worker; /* flush worker */ | ||
62 | |||
63 | spinlock_t list_lock; /* protect variables below */ | ||
64 | atomic_t item_count; /* total # of MRs */ | ||
65 | atomic_t dirty_count; /* # dirty of MRs */ | ||
66 | struct list_head drop_list; /* MRs that have reached their max_maps limit */ | ||
67 | struct list_head free_list; /* unused MRs */ | ||
68 | struct list_head clean_list; /* unused & unamapped MRs */ | ||
69 | atomic_t free_pinned; /* memory pinned by free MRs */ | ||
70 | unsigned long max_items; | ||
71 | unsigned long max_items_soft; | ||
72 | unsigned long max_free_pinned; | ||
73 | struct ib_fmr_attr fmr_attr; | ||
74 | }; | ||
75 | |||
76 | static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all); | ||
77 | static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr); | ||
78 | static void rds_ib_mr_pool_flush_worker(struct work_struct *work); | ||
79 | |||
80 | static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) | ||
81 | { | ||
82 | struct rds_ib_device *rds_ibdev; | ||
83 | struct rds_ib_ipaddr *i_ipaddr; | ||
84 | |||
85 | list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { | ||
86 | spin_lock_irq(&rds_ibdev->spinlock); | ||
87 | list_for_each_entry(i_ipaddr, &rds_ibdev->ipaddr_list, list) { | ||
88 | if (i_ipaddr->ipaddr == ipaddr) { | ||
89 | spin_unlock_irq(&rds_ibdev->spinlock); | ||
90 | return rds_ibdev; | ||
91 | } | ||
92 | } | ||
93 | spin_unlock_irq(&rds_ibdev->spinlock); | ||
94 | } | ||
95 | |||
96 | return NULL; | ||
97 | } | ||
98 | |||
99 | static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) | ||
100 | { | ||
101 | struct rds_ib_ipaddr *i_ipaddr; | ||
102 | |||
103 | i_ipaddr = kmalloc(sizeof *i_ipaddr, GFP_KERNEL); | ||
104 | if (!i_ipaddr) | ||
105 | return -ENOMEM; | ||
106 | |||
107 | i_ipaddr->ipaddr = ipaddr; | ||
108 | |||
109 | spin_lock_irq(&rds_ibdev->spinlock); | ||
110 | list_add_tail(&i_ipaddr->list, &rds_ibdev->ipaddr_list); | ||
111 | spin_unlock_irq(&rds_ibdev->spinlock); | ||
112 | |||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) | ||
117 | { | ||
118 | struct rds_ib_ipaddr *i_ipaddr, *next; | ||
119 | |||
120 | spin_lock_irq(&rds_ibdev->spinlock); | ||
121 | list_for_each_entry_safe(i_ipaddr, next, &rds_ibdev->ipaddr_list, list) { | ||
122 | if (i_ipaddr->ipaddr == ipaddr) { | ||
123 | list_del(&i_ipaddr->list); | ||
124 | kfree(i_ipaddr); | ||
125 | break; | ||
126 | } | ||
127 | } | ||
128 | spin_unlock_irq(&rds_ibdev->spinlock); | ||
129 | } | ||
130 | |||
131 | int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) | ||
132 | { | ||
133 | struct rds_ib_device *rds_ibdev_old; | ||
134 | |||
135 | rds_ibdev_old = rds_ib_get_device(ipaddr); | ||
136 | if (rds_ibdev_old) | ||
137 | rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr); | ||
138 | |||
139 | return rds_ib_add_ipaddr(rds_ibdev, ipaddr); | ||
140 | } | ||
141 | |||
142 | int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) | ||
143 | { | ||
144 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
145 | |||
146 | /* conn was previously on the nodev_conns_list */ | ||
147 | spin_lock_irq(&ib_nodev_conns_lock); | ||
148 | BUG_ON(list_empty(&ib_nodev_conns)); | ||
149 | BUG_ON(list_empty(&ic->ib_node)); | ||
150 | list_del(&ic->ib_node); | ||
151 | spin_unlock_irq(&ib_nodev_conns_lock); | ||
152 | |||
153 | spin_lock_irq(&rds_ibdev->spinlock); | ||
154 | list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); | ||
155 | spin_unlock_irq(&rds_ibdev->spinlock); | ||
156 | |||
157 | ic->rds_ibdev = rds_ibdev; | ||
158 | |||
159 | return 0; | ||
160 | } | ||
161 | |||
162 | void rds_ib_remove_nodev_conns(void) | ||
163 | { | ||
164 | struct rds_ib_connection *ic, *_ic; | ||
165 | LIST_HEAD(tmp_list); | ||
166 | |||
167 | /* avoid calling conn_destroy with irqs off */ | ||
168 | spin_lock_irq(&ib_nodev_conns_lock); | ||
169 | list_splice(&ib_nodev_conns, &tmp_list); | ||
170 | INIT_LIST_HEAD(&ib_nodev_conns); | ||
171 | spin_unlock_irq(&ib_nodev_conns_lock); | ||
172 | |||
173 | list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { | ||
174 | if (ic->conn->c_passive) | ||
175 | rds_conn_destroy(ic->conn->c_passive); | ||
176 | rds_conn_destroy(ic->conn); | ||
177 | } | ||
178 | } | ||
179 | |||
180 | void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev) | ||
181 | { | ||
182 | struct rds_ib_connection *ic, *_ic; | ||
183 | LIST_HEAD(tmp_list); | ||
184 | |||
185 | /* avoid calling conn_destroy with irqs off */ | ||
186 | spin_lock_irq(&rds_ibdev->spinlock); | ||
187 | list_splice(&rds_ibdev->conn_list, &tmp_list); | ||
188 | INIT_LIST_HEAD(&rds_ibdev->conn_list); | ||
189 | spin_unlock_irq(&rds_ibdev->spinlock); | ||
190 | |||
191 | list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { | ||
192 | if (ic->conn->c_passive) | ||
193 | rds_conn_destroy(ic->conn->c_passive); | ||
194 | rds_conn_destroy(ic->conn); | ||
195 | } | ||
196 | } | ||
197 | |||
198 | struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev) | ||
199 | { | ||
200 | struct rds_ib_mr_pool *pool; | ||
201 | |||
202 | pool = kzalloc(sizeof(*pool), GFP_KERNEL); | ||
203 | if (!pool) | ||
204 | return ERR_PTR(-ENOMEM); | ||
205 | |||
206 | INIT_LIST_HEAD(&pool->free_list); | ||
207 | INIT_LIST_HEAD(&pool->drop_list); | ||
208 | INIT_LIST_HEAD(&pool->clean_list); | ||
209 | mutex_init(&pool->flush_lock); | ||
210 | spin_lock_init(&pool->list_lock); | ||
211 | INIT_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); | ||
212 | |||
213 | pool->fmr_attr.max_pages = fmr_message_size; | ||
214 | pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps; | ||
215 | pool->fmr_attr.page_shift = rds_ibdev->fmr_page_shift; | ||
216 | pool->max_free_pinned = rds_ibdev->max_fmrs * fmr_message_size / 4; | ||
217 | |||
218 | /* We never allow more than max_items MRs to be allocated. | ||
219 | * When we exceed more than max_items_soft, we start freeing | ||
220 | * items more aggressively. | ||
221 | * Make sure that max_items > max_items_soft > max_items / 2 | ||
222 | */ | ||
223 | pool->max_items_soft = rds_ibdev->max_fmrs * 3 / 4; | ||
224 | pool->max_items = rds_ibdev->max_fmrs; | ||
225 | |||
226 | return pool; | ||
227 | } | ||
228 | |||
229 | void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo) | ||
230 | { | ||
231 | struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; | ||
232 | |||
233 | iinfo->rdma_mr_max = pool->max_items; | ||
234 | iinfo->rdma_mr_size = pool->fmr_attr.max_pages; | ||
235 | } | ||
236 | |||
237 | void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) | ||
238 | { | ||
239 | flush_workqueue(rds_wq); | ||
240 | rds_ib_flush_mr_pool(pool, 1); | ||
241 | BUG_ON(atomic_read(&pool->item_count)); | ||
242 | BUG_ON(atomic_read(&pool->free_pinned)); | ||
243 | kfree(pool); | ||
244 | } | ||
245 | |||
246 | static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) | ||
247 | { | ||
248 | struct rds_ib_mr *ibmr = NULL; | ||
249 | unsigned long flags; | ||
250 | |||
251 | spin_lock_irqsave(&pool->list_lock, flags); | ||
252 | if (!list_empty(&pool->clean_list)) { | ||
253 | ibmr = list_entry(pool->clean_list.next, struct rds_ib_mr, list); | ||
254 | list_del_init(&ibmr->list); | ||
255 | } | ||
256 | spin_unlock_irqrestore(&pool->list_lock, flags); | ||
257 | |||
258 | return ibmr; | ||
259 | } | ||
260 | |||
261 | static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) | ||
262 | { | ||
263 | struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; | ||
264 | struct rds_ib_mr *ibmr = NULL; | ||
265 | int err = 0, iter = 0; | ||
266 | |||
267 | while (1) { | ||
268 | ibmr = rds_ib_reuse_fmr(pool); | ||
269 | if (ibmr) | ||
270 | return ibmr; | ||
271 | |||
272 | /* No clean MRs - now we have the choice of either | ||
273 | * allocating a fresh MR up to the limit imposed by the | ||
274 | * driver, or flush any dirty unused MRs. | ||
275 | * We try to avoid stalling in the send path if possible, | ||
276 | * so we allocate as long as we're allowed to. | ||
277 | * | ||
278 | * We're fussy with enforcing the FMR limit, though. If the driver | ||
279 | * tells us we can't use more than N fmrs, we shouldn't start | ||
280 | * arguing with it */ | ||
281 | if (atomic_inc_return(&pool->item_count) <= pool->max_items) | ||
282 | break; | ||
283 | |||
284 | atomic_dec(&pool->item_count); | ||
285 | |||
286 | if (++iter > 2) { | ||
287 | rds_ib_stats_inc(s_ib_rdma_mr_pool_depleted); | ||
288 | return ERR_PTR(-EAGAIN); | ||
289 | } | ||
290 | |||
291 | /* We do have some empty MRs. Flush them out. */ | ||
292 | rds_ib_stats_inc(s_ib_rdma_mr_pool_wait); | ||
293 | rds_ib_flush_mr_pool(pool, 0); | ||
294 | } | ||
295 | |||
296 | ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL); | ||
297 | if (!ibmr) { | ||
298 | err = -ENOMEM; | ||
299 | goto out_no_cigar; | ||
300 | } | ||
301 | |||
302 | ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, | ||
303 | (IB_ACCESS_LOCAL_WRITE | | ||
304 | IB_ACCESS_REMOTE_READ | | ||
305 | IB_ACCESS_REMOTE_WRITE), | ||
306 | &pool->fmr_attr); | ||
307 | if (IS_ERR(ibmr->fmr)) { | ||
308 | err = PTR_ERR(ibmr->fmr); | ||
309 | ibmr->fmr = NULL; | ||
310 | printk(KERN_WARNING "RDS/IB: ib_alloc_fmr failed (err=%d)\n", err); | ||
311 | goto out_no_cigar; | ||
312 | } | ||
313 | |||
314 | rds_ib_stats_inc(s_ib_rdma_mr_alloc); | ||
315 | return ibmr; | ||
316 | |||
317 | out_no_cigar: | ||
318 | if (ibmr) { | ||
319 | if (ibmr->fmr) | ||
320 | ib_dealloc_fmr(ibmr->fmr); | ||
321 | kfree(ibmr); | ||
322 | } | ||
323 | atomic_dec(&pool->item_count); | ||
324 | return ERR_PTR(err); | ||
325 | } | ||
326 | |||
327 | static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr, | ||
328 | struct scatterlist *sg, unsigned int nents) | ||
329 | { | ||
330 | struct ib_device *dev = rds_ibdev->dev; | ||
331 | struct scatterlist *scat = sg; | ||
332 | u64 io_addr = 0; | ||
333 | u64 *dma_pages; | ||
334 | u32 len; | ||
335 | int page_cnt, sg_dma_len; | ||
336 | int i, j; | ||
337 | int ret; | ||
338 | |||
339 | sg_dma_len = ib_dma_map_sg(dev, sg, nents, | ||
340 | DMA_BIDIRECTIONAL); | ||
341 | if (unlikely(!sg_dma_len)) { | ||
342 | printk(KERN_WARNING "RDS/IB: dma_map_sg failed!\n"); | ||
343 | return -EBUSY; | ||
344 | } | ||
345 | |||
346 | len = 0; | ||
347 | page_cnt = 0; | ||
348 | |||
349 | for (i = 0; i < sg_dma_len; ++i) { | ||
350 | unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]); | ||
351 | u64 dma_addr = ib_sg_dma_address(dev, &scat[i]); | ||
352 | |||
353 | if (dma_addr & ~rds_ibdev->fmr_page_mask) { | ||
354 | if (i > 0) | ||
355 | return -EINVAL; | ||
356 | else | ||
357 | ++page_cnt; | ||
358 | } | ||
359 | if ((dma_addr + dma_len) & ~rds_ibdev->fmr_page_mask) { | ||
360 | if (i < sg_dma_len - 1) | ||
361 | return -EINVAL; | ||
362 | else | ||
363 | ++page_cnt; | ||
364 | } | ||
365 | |||
366 | len += dma_len; | ||
367 | } | ||
368 | |||
369 | page_cnt += len >> rds_ibdev->fmr_page_shift; | ||
370 | if (page_cnt > fmr_message_size) | ||
371 | return -EINVAL; | ||
372 | |||
373 | dma_pages = kmalloc(sizeof(u64) * page_cnt, GFP_ATOMIC); | ||
374 | if (!dma_pages) | ||
375 | return -ENOMEM; | ||
376 | |||
377 | page_cnt = 0; | ||
378 | for (i = 0; i < sg_dma_len; ++i) { | ||
379 | unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]); | ||
380 | u64 dma_addr = ib_sg_dma_address(dev, &scat[i]); | ||
381 | |||
382 | for (j = 0; j < dma_len; j += rds_ibdev->fmr_page_size) | ||
383 | dma_pages[page_cnt++] = | ||
384 | (dma_addr & rds_ibdev->fmr_page_mask) + j; | ||
385 | } | ||
386 | |||
387 | ret = ib_map_phys_fmr(ibmr->fmr, | ||
388 | dma_pages, page_cnt, io_addr); | ||
389 | if (ret) | ||
390 | goto out; | ||
391 | |||
392 | /* Success - we successfully remapped the MR, so we can | ||
393 | * safely tear down the old mapping. */ | ||
394 | rds_ib_teardown_mr(ibmr); | ||
395 | |||
396 | ibmr->sg = scat; | ||
397 | ibmr->sg_len = nents; | ||
398 | ibmr->sg_dma_len = sg_dma_len; | ||
399 | ibmr->remap_count++; | ||
400 | |||
401 | rds_ib_stats_inc(s_ib_rdma_mr_used); | ||
402 | ret = 0; | ||
403 | |||
404 | out: | ||
405 | kfree(dma_pages); | ||
406 | |||
407 | return ret; | ||
408 | } | ||
409 | |||
410 | void rds_ib_sync_mr(void *trans_private, int direction) | ||
411 | { | ||
412 | struct rds_ib_mr *ibmr = trans_private; | ||
413 | struct rds_ib_device *rds_ibdev = ibmr->device; | ||
414 | |||
415 | switch (direction) { | ||
416 | case DMA_FROM_DEVICE: | ||
417 | ib_dma_sync_sg_for_cpu(rds_ibdev->dev, ibmr->sg, | ||
418 | ibmr->sg_dma_len, DMA_BIDIRECTIONAL); | ||
419 | break; | ||
420 | case DMA_TO_DEVICE: | ||
421 | ib_dma_sync_sg_for_device(rds_ibdev->dev, ibmr->sg, | ||
422 | ibmr->sg_dma_len, DMA_BIDIRECTIONAL); | ||
423 | break; | ||
424 | } | ||
425 | } | ||
426 | |||
427 | static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr) | ||
428 | { | ||
429 | struct rds_ib_device *rds_ibdev = ibmr->device; | ||
430 | |||
431 | if (ibmr->sg_dma_len) { | ||
432 | ib_dma_unmap_sg(rds_ibdev->dev, | ||
433 | ibmr->sg, ibmr->sg_len, | ||
434 | DMA_BIDIRECTIONAL); | ||
435 | ibmr->sg_dma_len = 0; | ||
436 | } | ||
437 | |||
438 | /* Release the s/g list */ | ||
439 | if (ibmr->sg_len) { | ||
440 | unsigned int i; | ||
441 | |||
442 | for (i = 0; i < ibmr->sg_len; ++i) { | ||
443 | struct page *page = sg_page(&ibmr->sg[i]); | ||
444 | |||
445 | /* FIXME we need a way to tell a r/w MR | ||
446 | * from a r/o MR */ | ||
447 | set_page_dirty(page); | ||
448 | put_page(page); | ||
449 | } | ||
450 | kfree(ibmr->sg); | ||
451 | |||
452 | ibmr->sg = NULL; | ||
453 | ibmr->sg_len = 0; | ||
454 | } | ||
455 | } | ||
456 | |||
457 | static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr) | ||
458 | { | ||
459 | unsigned int pinned = ibmr->sg_len; | ||
460 | |||
461 | __rds_ib_teardown_mr(ibmr); | ||
462 | if (pinned) { | ||
463 | struct rds_ib_device *rds_ibdev = ibmr->device; | ||
464 | struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; | ||
465 | |||
466 | atomic_sub(pinned, &pool->free_pinned); | ||
467 | } | ||
468 | } | ||
469 | |||
470 | static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int free_all) | ||
471 | { | ||
472 | unsigned int item_count; | ||
473 | |||
474 | item_count = atomic_read(&pool->item_count); | ||
475 | if (free_all) | ||
476 | return item_count; | ||
477 | |||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | /* | ||
482 | * Flush our pool of MRs. | ||
483 | * At a minimum, all currently unused MRs are unmapped. | ||
484 | * If the number of MRs allocated exceeds the limit, we also try | ||
485 | * to free as many MRs as needed to get back to this limit. | ||
486 | */ | ||
487 | static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all) | ||
488 | { | ||
489 | struct rds_ib_mr *ibmr, *next; | ||
490 | LIST_HEAD(unmap_list); | ||
491 | LIST_HEAD(fmr_list); | ||
492 | unsigned long unpinned = 0; | ||
493 | unsigned long flags; | ||
494 | unsigned int nfreed = 0, ncleaned = 0, free_goal; | ||
495 | int ret = 0; | ||
496 | |||
497 | rds_ib_stats_inc(s_ib_rdma_mr_pool_flush); | ||
498 | |||
499 | mutex_lock(&pool->flush_lock); | ||
500 | |||
501 | spin_lock_irqsave(&pool->list_lock, flags); | ||
502 | /* Get the list of all MRs to be dropped. Ordering matters - | ||
503 | * we want to put drop_list ahead of free_list. */ | ||
504 | list_splice_init(&pool->free_list, &unmap_list); | ||
505 | list_splice_init(&pool->drop_list, &unmap_list); | ||
506 | if (free_all) | ||
507 | list_splice_init(&pool->clean_list, &unmap_list); | ||
508 | spin_unlock_irqrestore(&pool->list_lock, flags); | ||
509 | |||
510 | free_goal = rds_ib_flush_goal(pool, free_all); | ||
511 | |||
512 | if (list_empty(&unmap_list)) | ||
513 | goto out; | ||
514 | |||
515 | /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ | ||
516 | list_for_each_entry(ibmr, &unmap_list, list) | ||
517 | list_add(&ibmr->fmr->list, &fmr_list); | ||
518 | ret = ib_unmap_fmr(&fmr_list); | ||
519 | if (ret) | ||
520 | printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret); | ||
521 | |||
522 | /* Now we can destroy the DMA mapping and unpin any pages */ | ||
523 | list_for_each_entry_safe(ibmr, next, &unmap_list, list) { | ||
524 | unpinned += ibmr->sg_len; | ||
525 | __rds_ib_teardown_mr(ibmr); | ||
526 | if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) { | ||
527 | rds_ib_stats_inc(s_ib_rdma_mr_free); | ||
528 | list_del(&ibmr->list); | ||
529 | ib_dealloc_fmr(ibmr->fmr); | ||
530 | kfree(ibmr); | ||
531 | nfreed++; | ||
532 | } | ||
533 | ncleaned++; | ||
534 | } | ||
535 | |||
536 | spin_lock_irqsave(&pool->list_lock, flags); | ||
537 | list_splice(&unmap_list, &pool->clean_list); | ||
538 | spin_unlock_irqrestore(&pool->list_lock, flags); | ||
539 | |||
540 | atomic_sub(unpinned, &pool->free_pinned); | ||
541 | atomic_sub(ncleaned, &pool->dirty_count); | ||
542 | atomic_sub(nfreed, &pool->item_count); | ||
543 | |||
544 | out: | ||
545 | mutex_unlock(&pool->flush_lock); | ||
546 | return ret; | ||
547 | } | ||
548 | |||
549 | static void rds_ib_mr_pool_flush_worker(struct work_struct *work) | ||
550 | { | ||
551 | struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker); | ||
552 | |||
553 | rds_ib_flush_mr_pool(pool, 0); | ||
554 | } | ||
555 | |||
556 | void rds_ib_free_mr(void *trans_private, int invalidate) | ||
557 | { | ||
558 | struct rds_ib_mr *ibmr = trans_private; | ||
559 | struct rds_ib_device *rds_ibdev = ibmr->device; | ||
560 | struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; | ||
561 | unsigned long flags; | ||
562 | |||
563 | rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); | ||
564 | |||
565 | /* Return it to the pool's free list */ | ||
566 | spin_lock_irqsave(&pool->list_lock, flags); | ||
567 | if (ibmr->remap_count >= pool->fmr_attr.max_maps) | ||
568 | list_add(&ibmr->list, &pool->drop_list); | ||
569 | else | ||
570 | list_add(&ibmr->list, &pool->free_list); | ||
571 | |||
572 | atomic_add(ibmr->sg_len, &pool->free_pinned); | ||
573 | atomic_inc(&pool->dirty_count); | ||
574 | spin_unlock_irqrestore(&pool->list_lock, flags); | ||
575 | |||
576 | /* If we've pinned too many pages, request a flush */ | ||
577 | if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned | ||
578 | || atomic_read(&pool->dirty_count) >= pool->max_items / 10) | ||
579 | queue_work(rds_wq, &pool->flush_worker); | ||
580 | |||
581 | if (invalidate) { | ||
582 | if (likely(!in_interrupt())) { | ||
583 | rds_ib_flush_mr_pool(pool, 0); | ||
584 | } else { | ||
585 | /* We get here if the user created a MR marked | ||
586 | * as use_once and invalidate at the same time. */ | ||
587 | queue_work(rds_wq, &pool->flush_worker); | ||
588 | } | ||
589 | } | ||
590 | } | ||
591 | |||
592 | void rds_ib_flush_mrs(void) | ||
593 | { | ||
594 | struct rds_ib_device *rds_ibdev; | ||
595 | |||
596 | list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { | ||
597 | struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; | ||
598 | |||
599 | if (pool) | ||
600 | rds_ib_flush_mr_pool(pool, 0); | ||
601 | } | ||
602 | } | ||
603 | |||
604 | void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, | ||
605 | struct rds_sock *rs, u32 *key_ret) | ||
606 | { | ||
607 | struct rds_ib_device *rds_ibdev; | ||
608 | struct rds_ib_mr *ibmr = NULL; | ||
609 | int ret; | ||
610 | |||
611 | rds_ibdev = rds_ib_get_device(rs->rs_bound_addr); | ||
612 | if (!rds_ibdev) { | ||
613 | ret = -ENODEV; | ||
614 | goto out; | ||
615 | } | ||
616 | |||
617 | if (!rds_ibdev->mr_pool) { | ||
618 | ret = -ENODEV; | ||
619 | goto out; | ||
620 | } | ||
621 | |||
622 | ibmr = rds_ib_alloc_fmr(rds_ibdev); | ||
623 | if (IS_ERR(ibmr)) | ||
624 | return ibmr; | ||
625 | |||
626 | ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents); | ||
627 | if (ret == 0) | ||
628 | *key_ret = ibmr->fmr->rkey; | ||
629 | else | ||
630 | printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret); | ||
631 | |||
632 | ibmr->device = rds_ibdev; | ||
633 | |||
634 | out: | ||
635 | if (ret) { | ||
636 | if (ibmr) | ||
637 | rds_ib_free_mr(ibmr, 0); | ||
638 | ibmr = ERR_PTR(ret); | ||
639 | } | ||
640 | return ibmr; | ||
641 | } | ||
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c new file mode 100644 index 000000000000..5061b5502162 --- /dev/null +++ b/net/rds/ib_recv.c | |||
@@ -0,0 +1,869 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/pci.h> | ||
35 | #include <linux/dma-mapping.h> | ||
36 | #include <rdma/rdma_cm.h> | ||
37 | |||
38 | #include "rds.h" | ||
39 | #include "ib.h" | ||
40 | |||
41 | static struct kmem_cache *rds_ib_incoming_slab; | ||
42 | static struct kmem_cache *rds_ib_frag_slab; | ||
43 | static atomic_t rds_ib_allocation = ATOMIC_INIT(0); | ||
44 | |||
45 | static void rds_ib_frag_drop_page(struct rds_page_frag *frag) | ||
46 | { | ||
47 | rdsdebug("frag %p page %p\n", frag, frag->f_page); | ||
48 | __free_page(frag->f_page); | ||
49 | frag->f_page = NULL; | ||
50 | } | ||
51 | |||
52 | static void rds_ib_frag_free(struct rds_page_frag *frag) | ||
53 | { | ||
54 | rdsdebug("frag %p page %p\n", frag, frag->f_page); | ||
55 | BUG_ON(frag->f_page != NULL); | ||
56 | kmem_cache_free(rds_ib_frag_slab, frag); | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * We map a page at a time. Its fragments are posted in order. This | ||
61 | * is called in fragment order as the fragments get send completion events. | ||
62 | * Only the last frag in the page performs the unmapping. | ||
63 | * | ||
64 | * It's OK for ring cleanup to call this in whatever order it likes because | ||
65 | * DMA is not in flight and so we can unmap while other ring entries still | ||
66 | * hold page references in their frags. | ||
67 | */ | ||
68 | static void rds_ib_recv_unmap_page(struct rds_ib_connection *ic, | ||
69 | struct rds_ib_recv_work *recv) | ||
70 | { | ||
71 | struct rds_page_frag *frag = recv->r_frag; | ||
72 | |||
73 | rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page); | ||
74 | if (frag->f_mapped) | ||
75 | ib_dma_unmap_page(ic->i_cm_id->device, | ||
76 | frag->f_mapped, | ||
77 | RDS_FRAG_SIZE, DMA_FROM_DEVICE); | ||
78 | frag->f_mapped = 0; | ||
79 | } | ||
80 | |||
81 | void rds_ib_recv_init_ring(struct rds_ib_connection *ic) | ||
82 | { | ||
83 | struct rds_ib_recv_work *recv; | ||
84 | u32 i; | ||
85 | |||
86 | for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) { | ||
87 | struct ib_sge *sge; | ||
88 | |||
89 | recv->r_ibinc = NULL; | ||
90 | recv->r_frag = NULL; | ||
91 | |||
92 | recv->r_wr.next = NULL; | ||
93 | recv->r_wr.wr_id = i; | ||
94 | recv->r_wr.sg_list = recv->r_sge; | ||
95 | recv->r_wr.num_sge = RDS_IB_RECV_SGE; | ||
96 | |||
97 | sge = rds_ib_data_sge(ic, recv->r_sge); | ||
98 | sge->addr = 0; | ||
99 | sge->length = RDS_FRAG_SIZE; | ||
100 | sge->lkey = ic->i_mr->lkey; | ||
101 | |||
102 | sge = rds_ib_header_sge(ic, recv->r_sge); | ||
103 | sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); | ||
104 | sge->length = sizeof(struct rds_header); | ||
105 | sge->lkey = ic->i_mr->lkey; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | static void rds_ib_recv_clear_one(struct rds_ib_connection *ic, | ||
110 | struct rds_ib_recv_work *recv) | ||
111 | { | ||
112 | if (recv->r_ibinc) { | ||
113 | rds_inc_put(&recv->r_ibinc->ii_inc); | ||
114 | recv->r_ibinc = NULL; | ||
115 | } | ||
116 | if (recv->r_frag) { | ||
117 | rds_ib_recv_unmap_page(ic, recv); | ||
118 | if (recv->r_frag->f_page) | ||
119 | rds_ib_frag_drop_page(recv->r_frag); | ||
120 | rds_ib_frag_free(recv->r_frag); | ||
121 | recv->r_frag = NULL; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | void rds_ib_recv_clear_ring(struct rds_ib_connection *ic) | ||
126 | { | ||
127 | u32 i; | ||
128 | |||
129 | for (i = 0; i < ic->i_recv_ring.w_nr; i++) | ||
130 | rds_ib_recv_clear_one(ic, &ic->i_recvs[i]); | ||
131 | |||
132 | if (ic->i_frag.f_page) | ||
133 | rds_ib_frag_drop_page(&ic->i_frag); | ||
134 | } | ||
135 | |||
136 | static int rds_ib_recv_refill_one(struct rds_connection *conn, | ||
137 | struct rds_ib_recv_work *recv, | ||
138 | gfp_t kptr_gfp, gfp_t page_gfp) | ||
139 | { | ||
140 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
141 | dma_addr_t dma_addr; | ||
142 | struct ib_sge *sge; | ||
143 | int ret = -ENOMEM; | ||
144 | |||
145 | if (recv->r_ibinc == NULL) { | ||
146 | if (atomic_read(&rds_ib_allocation) >= rds_ib_sysctl_max_recv_allocation) { | ||
147 | rds_ib_stats_inc(s_ib_rx_alloc_limit); | ||
148 | goto out; | ||
149 | } | ||
150 | recv->r_ibinc = kmem_cache_alloc(rds_ib_incoming_slab, | ||
151 | kptr_gfp); | ||
152 | if (recv->r_ibinc == NULL) | ||
153 | goto out; | ||
154 | atomic_inc(&rds_ib_allocation); | ||
155 | INIT_LIST_HEAD(&recv->r_ibinc->ii_frags); | ||
156 | rds_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr); | ||
157 | } | ||
158 | |||
159 | if (recv->r_frag == NULL) { | ||
160 | recv->r_frag = kmem_cache_alloc(rds_ib_frag_slab, kptr_gfp); | ||
161 | if (recv->r_frag == NULL) | ||
162 | goto out; | ||
163 | INIT_LIST_HEAD(&recv->r_frag->f_item); | ||
164 | recv->r_frag->f_page = NULL; | ||
165 | } | ||
166 | |||
167 | if (ic->i_frag.f_page == NULL) { | ||
168 | ic->i_frag.f_page = alloc_page(page_gfp); | ||
169 | if (ic->i_frag.f_page == NULL) | ||
170 | goto out; | ||
171 | ic->i_frag.f_offset = 0; | ||
172 | } | ||
173 | |||
174 | dma_addr = ib_dma_map_page(ic->i_cm_id->device, | ||
175 | ic->i_frag.f_page, | ||
176 | ic->i_frag.f_offset, | ||
177 | RDS_FRAG_SIZE, | ||
178 | DMA_FROM_DEVICE); | ||
179 | if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr)) | ||
180 | goto out; | ||
181 | |||
182 | /* | ||
183 | * Once we get the RDS_PAGE_LAST_OFF frag then rds_ib_frag_unmap() | ||
184 | * must be called on this recv. This happens as completions hit | ||
185 | * in order or on connection shutdown. | ||
186 | */ | ||
187 | recv->r_frag->f_page = ic->i_frag.f_page; | ||
188 | recv->r_frag->f_offset = ic->i_frag.f_offset; | ||
189 | recv->r_frag->f_mapped = dma_addr; | ||
190 | |||
191 | sge = rds_ib_data_sge(ic, recv->r_sge); | ||
192 | sge->addr = dma_addr; | ||
193 | sge->length = RDS_FRAG_SIZE; | ||
194 | |||
195 | sge = rds_ib_header_sge(ic, recv->r_sge); | ||
196 | sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); | ||
197 | sge->length = sizeof(struct rds_header); | ||
198 | |||
199 | get_page(recv->r_frag->f_page); | ||
200 | |||
201 | if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) { | ||
202 | ic->i_frag.f_offset += RDS_FRAG_SIZE; | ||
203 | } else { | ||
204 | put_page(ic->i_frag.f_page); | ||
205 | ic->i_frag.f_page = NULL; | ||
206 | ic->i_frag.f_offset = 0; | ||
207 | } | ||
208 | |||
209 | ret = 0; | ||
210 | out: | ||
211 | return ret; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * This tries to allocate and post unused work requests after making sure that | ||
216 | * they have all the allocations they need to queue received fragments into | ||
217 | * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc | ||
218 | * pairs don't go unmatched. | ||
219 | * | ||
220 | * -1 is returned if posting fails due to temporary resource exhaustion. | ||
221 | */ | ||
222 | int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, | ||
223 | gfp_t page_gfp, int prefill) | ||
224 | { | ||
225 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
226 | struct rds_ib_recv_work *recv; | ||
227 | struct ib_recv_wr *failed_wr; | ||
228 | unsigned int posted = 0; | ||
229 | int ret = 0; | ||
230 | u32 pos; | ||
231 | |||
232 | while ((prefill || rds_conn_up(conn)) | ||
233 | && rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) { | ||
234 | if (pos >= ic->i_recv_ring.w_nr) { | ||
235 | printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n", | ||
236 | pos); | ||
237 | ret = -EINVAL; | ||
238 | break; | ||
239 | } | ||
240 | |||
241 | recv = &ic->i_recvs[pos]; | ||
242 | ret = rds_ib_recv_refill_one(conn, recv, kptr_gfp, page_gfp); | ||
243 | if (ret) { | ||
244 | ret = -1; | ||
245 | break; | ||
246 | } | ||
247 | |||
248 | /* XXX when can this fail? */ | ||
249 | ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); | ||
250 | rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, | ||
251 | recv->r_ibinc, recv->r_frag->f_page, | ||
252 | (long) recv->r_frag->f_mapped, ret); | ||
253 | if (ret) { | ||
254 | rds_ib_conn_error(conn, "recv post on " | ||
255 | "%pI4 returned %d, disconnecting and " | ||
256 | "reconnecting\n", &conn->c_faddr, | ||
257 | ret); | ||
258 | ret = -1; | ||
259 | break; | ||
260 | } | ||
261 | |||
262 | posted++; | ||
263 | } | ||
264 | |||
265 | /* We're doing flow control - update the window. */ | ||
266 | if (ic->i_flowctl && posted) | ||
267 | rds_ib_advertise_credits(conn, posted); | ||
268 | |||
269 | if (ret) | ||
270 | rds_ib_ring_unalloc(&ic->i_recv_ring, 1); | ||
271 | return ret; | ||
272 | } | ||
273 | |||
274 | void rds_ib_inc_purge(struct rds_incoming *inc) | ||
275 | { | ||
276 | struct rds_ib_incoming *ibinc; | ||
277 | struct rds_page_frag *frag; | ||
278 | struct rds_page_frag *pos; | ||
279 | |||
280 | ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); | ||
281 | rdsdebug("purging ibinc %p inc %p\n", ibinc, inc); | ||
282 | |||
283 | list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) { | ||
284 | list_del_init(&frag->f_item); | ||
285 | rds_ib_frag_drop_page(frag); | ||
286 | rds_ib_frag_free(frag); | ||
287 | } | ||
288 | } | ||
289 | |||
290 | void rds_ib_inc_free(struct rds_incoming *inc) | ||
291 | { | ||
292 | struct rds_ib_incoming *ibinc; | ||
293 | |||
294 | ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); | ||
295 | |||
296 | rds_ib_inc_purge(inc); | ||
297 | rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc); | ||
298 | BUG_ON(!list_empty(&ibinc->ii_frags)); | ||
299 | kmem_cache_free(rds_ib_incoming_slab, ibinc); | ||
300 | atomic_dec(&rds_ib_allocation); | ||
301 | BUG_ON(atomic_read(&rds_ib_allocation) < 0); | ||
302 | } | ||
303 | |||
304 | int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, | ||
305 | size_t size) | ||
306 | { | ||
307 | struct rds_ib_incoming *ibinc; | ||
308 | struct rds_page_frag *frag; | ||
309 | struct iovec *iov = first_iov; | ||
310 | unsigned long to_copy; | ||
311 | unsigned long frag_off = 0; | ||
312 | unsigned long iov_off = 0; | ||
313 | int copied = 0; | ||
314 | int ret; | ||
315 | u32 len; | ||
316 | |||
317 | ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); | ||
318 | frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item); | ||
319 | len = be32_to_cpu(inc->i_hdr.h_len); | ||
320 | |||
321 | while (copied < size && copied < len) { | ||
322 | if (frag_off == RDS_FRAG_SIZE) { | ||
323 | frag = list_entry(frag->f_item.next, | ||
324 | struct rds_page_frag, f_item); | ||
325 | frag_off = 0; | ||
326 | } | ||
327 | while (iov_off == iov->iov_len) { | ||
328 | iov_off = 0; | ||
329 | iov++; | ||
330 | } | ||
331 | |||
332 | to_copy = min(iov->iov_len - iov_off, RDS_FRAG_SIZE - frag_off); | ||
333 | to_copy = min_t(size_t, to_copy, size - copied); | ||
334 | to_copy = min_t(unsigned long, to_copy, len - copied); | ||
335 | |||
336 | rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag " | ||
337 | "[%p, %lu] + %lu\n", | ||
338 | to_copy, iov->iov_base, iov->iov_len, iov_off, | ||
339 | frag->f_page, frag->f_offset, frag_off); | ||
340 | |||
341 | /* XXX needs + offset for multiple recvs per page */ | ||
342 | ret = rds_page_copy_to_user(frag->f_page, | ||
343 | frag->f_offset + frag_off, | ||
344 | iov->iov_base + iov_off, | ||
345 | to_copy); | ||
346 | if (ret) { | ||
347 | copied = ret; | ||
348 | break; | ||
349 | } | ||
350 | |||
351 | iov_off += to_copy; | ||
352 | frag_off += to_copy; | ||
353 | copied += to_copy; | ||
354 | } | ||
355 | |||
356 | return copied; | ||
357 | } | ||
358 | |||
359 | /* ic starts out kzalloc()ed */ | ||
360 | void rds_ib_recv_init_ack(struct rds_ib_connection *ic) | ||
361 | { | ||
362 | struct ib_send_wr *wr = &ic->i_ack_wr; | ||
363 | struct ib_sge *sge = &ic->i_ack_sge; | ||
364 | |||
365 | sge->addr = ic->i_ack_dma; | ||
366 | sge->length = sizeof(struct rds_header); | ||
367 | sge->lkey = ic->i_mr->lkey; | ||
368 | |||
369 | wr->sg_list = sge; | ||
370 | wr->num_sge = 1; | ||
371 | wr->opcode = IB_WR_SEND; | ||
372 | wr->wr_id = RDS_IB_ACK_WR_ID; | ||
373 | wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * You'd think that with reliable IB connections you wouldn't need to ack | ||
378 | * messages that have been received. The problem is that IB hardware generates | ||
379 | * an ack message before it has DMAed the message into memory. This creates a | ||
380 | * potential message loss if the HCA is disabled for any reason between when it | ||
381 | * sends the ack and before the message is DMAed and processed. This is only a | ||
382 | * potential issue if another HCA is available for fail-over. | ||
383 | * | ||
384 | * When the remote host receives our ack they'll free the sent message from | ||
385 | * their send queue. To decrease the latency of this we always send an ack | ||
386 | * immediately after we've received messages. | ||
387 | * | ||
388 | * For simplicity, we only have one ack in flight at a time. This puts | ||
389 | * pressure on senders to have deep enough send queues to absorb the latency of | ||
390 | * a single ack frame being in flight. This might not be good enough. | ||
391 | * | ||
392 | * This is implemented by have a long-lived send_wr and sge which point to a | ||
393 | * statically allocated ack frame. This ack wr does not fall under the ring | ||
394 | * accounting that the tx and rx wrs do. The QP attribute specifically makes | ||
395 | * room for it beyond the ring size. Send completion notices its special | ||
396 | * wr_id and avoids working with the ring in that case. | ||
397 | */ | ||
398 | static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, | ||
399 | int ack_required) | ||
400 | { | ||
401 | rds_ib_set_64bit(&ic->i_ack_next, seq); | ||
402 | if (ack_required) { | ||
403 | smp_mb__before_clear_bit(); | ||
404 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
405 | } | ||
406 | } | ||
407 | |||
408 | static u64 rds_ib_get_ack(struct rds_ib_connection *ic) | ||
409 | { | ||
410 | clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
411 | smp_mb__after_clear_bit(); | ||
412 | |||
413 | return ic->i_ack_next; | ||
414 | } | ||
415 | |||
416 | static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) | ||
417 | { | ||
418 | struct rds_header *hdr = ic->i_ack; | ||
419 | struct ib_send_wr *failed_wr; | ||
420 | u64 seq; | ||
421 | int ret; | ||
422 | |||
423 | seq = rds_ib_get_ack(ic); | ||
424 | |||
425 | rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq); | ||
426 | rds_message_populate_header(hdr, 0, 0, 0); | ||
427 | hdr->h_ack = cpu_to_be64(seq); | ||
428 | hdr->h_credit = adv_credits; | ||
429 | rds_message_make_checksum(hdr); | ||
430 | ic->i_ack_queued = jiffies; | ||
431 | |||
432 | ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr); | ||
433 | if (unlikely(ret)) { | ||
434 | /* Failed to send. Release the WR, and | ||
435 | * force another ACK. | ||
436 | */ | ||
437 | clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); | ||
438 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
439 | |||
440 | rds_ib_stats_inc(s_ib_ack_send_failure); | ||
441 | /* Need to finesse this later. */ | ||
442 | BUG(); | ||
443 | } else | ||
444 | rds_ib_stats_inc(s_ib_ack_sent); | ||
445 | } | ||
446 | |||
447 | /* | ||
448 | * There are 3 ways of getting acknowledgements to the peer: | ||
449 | * 1. We call rds_ib_attempt_ack from the recv completion handler | ||
450 | * to send an ACK-only frame. | ||
451 | * However, there can be only one such frame in the send queue | ||
452 | * at any time, so we may have to postpone it. | ||
453 | * 2. When another (data) packet is transmitted while there's | ||
454 | * an ACK in the queue, we piggyback the ACK sequence number | ||
455 | * on the data packet. | ||
456 | * 3. If the ACK WR is done sending, we get called from the | ||
457 | * send queue completion handler, and check whether there's | ||
458 | * another ACK pending (postponed because the WR was on the | ||
459 | * queue). If so, we transmit it. | ||
460 | * | ||
461 | * We maintain 2 variables: | ||
462 | * - i_ack_flags, which keeps track of whether the ACK WR | ||
463 | * is currently in the send queue or not (IB_ACK_IN_FLIGHT) | ||
464 | * - i_ack_next, which is the last sequence number we received | ||
465 | * | ||
466 | * Potentially, send queue and receive queue handlers can run concurrently. | ||
467 | * | ||
468 | * Reconnecting complicates this picture just slightly. When we | ||
469 | * reconnect, we may be seeing duplicate packets. The peer | ||
470 | * is retransmitting them, because it hasn't seen an ACK for | ||
471 | * them. It is important that we ACK these. | ||
472 | * | ||
473 | * ACK mitigation adds a header flag "ACK_REQUIRED"; any packet with | ||
474 | * this flag set *MUST* be acknowledged immediately. | ||
475 | */ | ||
476 | |||
477 | /* | ||
478 | * When we get here, we're called from the recv queue handler. | ||
479 | * Check whether we ought to transmit an ACK. | ||
480 | */ | ||
481 | void rds_ib_attempt_ack(struct rds_ib_connection *ic) | ||
482 | { | ||
483 | unsigned int adv_credits; | ||
484 | |||
485 | if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags)) | ||
486 | return; | ||
487 | |||
488 | if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) { | ||
489 | rds_ib_stats_inc(s_ib_ack_send_delayed); | ||
490 | return; | ||
491 | } | ||
492 | |||
493 | /* Can we get a send credit? */ | ||
494 | if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0)) { | ||
495 | rds_ib_stats_inc(s_ib_tx_throttle); | ||
496 | clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); | ||
497 | return; | ||
498 | } | ||
499 | |||
500 | clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
501 | rds_ib_send_ack(ic, adv_credits); | ||
502 | } | ||
503 | |||
504 | /* | ||
505 | * We get here from the send completion handler, when the | ||
506 | * adapter tells us the ACK frame was sent. | ||
507 | */ | ||
508 | void rds_ib_ack_send_complete(struct rds_ib_connection *ic) | ||
509 | { | ||
510 | clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); | ||
511 | rds_ib_attempt_ack(ic); | ||
512 | } | ||
513 | |||
514 | /* | ||
515 | * This is called by the regular xmit code when it wants to piggyback | ||
516 | * an ACK on an outgoing frame. | ||
517 | */ | ||
518 | u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic) | ||
519 | { | ||
520 | if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags)) | ||
521 | rds_ib_stats_inc(s_ib_ack_send_piggybacked); | ||
522 | return rds_ib_get_ack(ic); | ||
523 | } | ||
524 | |||
525 | /* | ||
526 | * It's kind of lame that we're copying from the posted receive pages into | ||
527 | * long-lived bitmaps. We could have posted the bitmaps and rdma written into | ||
528 | * them. But receiving new congestion bitmaps should be a *rare* event, so | ||
529 | * hopefully we won't need to invest that complexity in making it more | ||
530 | * efficient. By copying we can share a simpler core with TCP which has to | ||
531 | * copy. | ||
532 | */ | ||
533 | static void rds_ib_cong_recv(struct rds_connection *conn, | ||
534 | struct rds_ib_incoming *ibinc) | ||
535 | { | ||
536 | struct rds_cong_map *map; | ||
537 | unsigned int map_off; | ||
538 | unsigned int map_page; | ||
539 | struct rds_page_frag *frag; | ||
540 | unsigned long frag_off; | ||
541 | unsigned long to_copy; | ||
542 | unsigned long copied; | ||
543 | uint64_t uncongested = 0; | ||
544 | void *addr; | ||
545 | |||
546 | /* catch completely corrupt packets */ | ||
547 | if (be32_to_cpu(ibinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES) | ||
548 | return; | ||
549 | |||
550 | map = conn->c_fcong; | ||
551 | map_page = 0; | ||
552 | map_off = 0; | ||
553 | |||
554 | frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item); | ||
555 | frag_off = 0; | ||
556 | |||
557 | copied = 0; | ||
558 | |||
559 | while (copied < RDS_CONG_MAP_BYTES) { | ||
560 | uint64_t *src, *dst; | ||
561 | unsigned int k; | ||
562 | |||
563 | to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); | ||
564 | BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ | ||
565 | |||
566 | addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0); | ||
567 | |||
568 | src = addr + frag_off; | ||
569 | dst = (void *)map->m_page_addrs[map_page] + map_off; | ||
570 | for (k = 0; k < to_copy; k += 8) { | ||
571 | /* Record ports that became uncongested, ie | ||
572 | * bits that changed from 0 to 1. */ | ||
573 | uncongested |= ~(*src) & *dst; | ||
574 | *dst++ = *src++; | ||
575 | } | ||
576 | kunmap_atomic(addr, KM_SOFTIRQ0); | ||
577 | |||
578 | copied += to_copy; | ||
579 | |||
580 | map_off += to_copy; | ||
581 | if (map_off == PAGE_SIZE) { | ||
582 | map_off = 0; | ||
583 | map_page++; | ||
584 | } | ||
585 | |||
586 | frag_off += to_copy; | ||
587 | if (frag_off == RDS_FRAG_SIZE) { | ||
588 | frag = list_entry(frag->f_item.next, | ||
589 | struct rds_page_frag, f_item); | ||
590 | frag_off = 0; | ||
591 | } | ||
592 | } | ||
593 | |||
594 | /* the congestion map is in little endian order */ | ||
595 | uncongested = le64_to_cpu(uncongested); | ||
596 | |||
597 | rds_cong_map_updated(map, uncongested); | ||
598 | } | ||
599 | |||
600 | /* | ||
601 | * Rings are posted with all the allocations they'll need to queue the | ||
602 | * incoming message to the receiving socket so this can't fail. | ||
603 | * All fragments start with a header, so we can make sure we're not receiving | ||
604 | * garbage, and we can tell a small 8 byte fragment from an ACK frame. | ||
605 | */ | ||
606 | struct rds_ib_ack_state { | ||
607 | u64 ack_next; | ||
608 | u64 ack_recv; | ||
609 | unsigned int ack_required:1; | ||
610 | unsigned int ack_next_valid:1; | ||
611 | unsigned int ack_recv_valid:1; | ||
612 | }; | ||
613 | |||
614 | static void rds_ib_process_recv(struct rds_connection *conn, | ||
615 | struct rds_ib_recv_work *recv, u32 byte_len, | ||
616 | struct rds_ib_ack_state *state) | ||
617 | { | ||
618 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
619 | struct rds_ib_incoming *ibinc = ic->i_ibinc; | ||
620 | struct rds_header *ihdr, *hdr; | ||
621 | |||
622 | /* XXX shut down the connection if port 0,0 are seen? */ | ||
623 | |||
624 | rdsdebug("ic %p ibinc %p recv %p byte len %u\n", ic, ibinc, recv, | ||
625 | byte_len); | ||
626 | |||
627 | if (byte_len < sizeof(struct rds_header)) { | ||
628 | rds_ib_conn_error(conn, "incoming message " | ||
629 | "from %pI4 didn't inclue a " | ||
630 | "header, disconnecting and " | ||
631 | "reconnecting\n", | ||
632 | &conn->c_faddr); | ||
633 | return; | ||
634 | } | ||
635 | byte_len -= sizeof(struct rds_header); | ||
636 | |||
637 | ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs]; | ||
638 | |||
639 | /* Validate the checksum. */ | ||
640 | if (!rds_message_verify_checksum(ihdr)) { | ||
641 | rds_ib_conn_error(conn, "incoming message " | ||
642 | "from %pI4 has corrupted header - " | ||
643 | "forcing a reconnect\n", | ||
644 | &conn->c_faddr); | ||
645 | rds_stats_inc(s_recv_drop_bad_checksum); | ||
646 | return; | ||
647 | } | ||
648 | |||
649 | /* Process the ACK sequence which comes with every packet */ | ||
650 | state->ack_recv = be64_to_cpu(ihdr->h_ack); | ||
651 | state->ack_recv_valid = 1; | ||
652 | |||
653 | /* Process the credits update if there was one */ | ||
654 | if (ihdr->h_credit) | ||
655 | rds_ib_send_add_credits(conn, ihdr->h_credit); | ||
656 | |||
657 | if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && byte_len == 0) { | ||
658 | /* This is an ACK-only packet. The fact that it gets | ||
659 | * special treatment here is that historically, ACKs | ||
660 | * were rather special beasts. | ||
661 | */ | ||
662 | rds_ib_stats_inc(s_ib_ack_received); | ||
663 | |||
664 | /* | ||
665 | * Usually the frags make their way on to incs and are then freed as | ||
666 | * the inc is freed. We don't go that route, so we have to drop the | ||
667 | * page ref ourselves. We can't just leave the page on the recv | ||
668 | * because that confuses the dma mapping of pages and each recv's use | ||
669 | * of a partial page. We can leave the frag, though, it will be | ||
670 | * reused. | ||
671 | * | ||
672 | * FIXME: Fold this into the code path below. | ||
673 | */ | ||
674 | rds_ib_frag_drop_page(recv->r_frag); | ||
675 | return; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * If we don't already have an inc on the connection then this | ||
680 | * fragment has a header and starts a message.. copy its header | ||
681 | * into the inc and save the inc so we can hang upcoming fragments | ||
682 | * off its list. | ||
683 | */ | ||
684 | if (ibinc == NULL) { | ||
685 | ibinc = recv->r_ibinc; | ||
686 | recv->r_ibinc = NULL; | ||
687 | ic->i_ibinc = ibinc; | ||
688 | |||
689 | hdr = &ibinc->ii_inc.i_hdr; | ||
690 | memcpy(hdr, ihdr, sizeof(*hdr)); | ||
691 | ic->i_recv_data_rem = be32_to_cpu(hdr->h_len); | ||
692 | |||
693 | rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc, | ||
694 | ic->i_recv_data_rem, hdr->h_flags); | ||
695 | } else { | ||
696 | hdr = &ibinc->ii_inc.i_hdr; | ||
697 | /* We can't just use memcmp here; fragments of a | ||
698 | * single message may carry different ACKs */ | ||
699 | if (hdr->h_sequence != ihdr->h_sequence | ||
700 | || hdr->h_len != ihdr->h_len | ||
701 | || hdr->h_sport != ihdr->h_sport | ||
702 | || hdr->h_dport != ihdr->h_dport) { | ||
703 | rds_ib_conn_error(conn, | ||
704 | "fragment header mismatch; forcing reconnect\n"); | ||
705 | return; | ||
706 | } | ||
707 | } | ||
708 | |||
709 | list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags); | ||
710 | recv->r_frag = NULL; | ||
711 | |||
712 | if (ic->i_recv_data_rem > RDS_FRAG_SIZE) | ||
713 | ic->i_recv_data_rem -= RDS_FRAG_SIZE; | ||
714 | else { | ||
715 | ic->i_recv_data_rem = 0; | ||
716 | ic->i_ibinc = NULL; | ||
717 | |||
718 | if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) | ||
719 | rds_ib_cong_recv(conn, ibinc); | ||
720 | else { | ||
721 | rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr, | ||
722 | &ibinc->ii_inc, GFP_ATOMIC, | ||
723 | KM_SOFTIRQ0); | ||
724 | state->ack_next = be64_to_cpu(hdr->h_sequence); | ||
725 | state->ack_next_valid = 1; | ||
726 | } | ||
727 | |||
728 | /* Evaluate the ACK_REQUIRED flag *after* we received | ||
729 | * the complete frame, and after bumping the next_rx | ||
730 | * sequence. */ | ||
731 | if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) { | ||
732 | rds_stats_inc(s_recv_ack_required); | ||
733 | state->ack_required = 1; | ||
734 | } | ||
735 | |||
736 | rds_inc_put(&ibinc->ii_inc); | ||
737 | } | ||
738 | } | ||
739 | |||
740 | /* | ||
741 | * Plucking the oldest entry from the ring can be done concurrently with | ||
742 | * the thread refilling the ring. Each ring operation is protected by | ||
743 | * spinlocks and the transient state of refilling doesn't change the | ||
744 | * recording of which entry is oldest. | ||
745 | * | ||
746 | * This relies on IB only calling one cq comp_handler for each cq so that | ||
747 | * there will only be one caller of rds_recv_incoming() per RDS connection. | ||
748 | */ | ||
749 | void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context) | ||
750 | { | ||
751 | struct rds_connection *conn = context; | ||
752 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
753 | struct ib_wc wc; | ||
754 | struct rds_ib_ack_state state = { 0, }; | ||
755 | struct rds_ib_recv_work *recv; | ||
756 | |||
757 | rdsdebug("conn %p cq %p\n", conn, cq); | ||
758 | |||
759 | rds_ib_stats_inc(s_ib_rx_cq_call); | ||
760 | |||
761 | ib_req_notify_cq(cq, IB_CQ_SOLICITED); | ||
762 | |||
763 | while (ib_poll_cq(cq, 1, &wc) > 0) { | ||
764 | rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", | ||
765 | (unsigned long long)wc.wr_id, wc.status, wc.byte_len, | ||
766 | be32_to_cpu(wc.ex.imm_data)); | ||
767 | rds_ib_stats_inc(s_ib_rx_cq_event); | ||
768 | |||
769 | recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)]; | ||
770 | |||
771 | rds_ib_recv_unmap_page(ic, recv); | ||
772 | |||
773 | /* | ||
774 | * Also process recvs in connecting state because it is possible | ||
775 | * to get a recv completion _before_ the rdmacm ESTABLISHED | ||
776 | * event is processed. | ||
777 | */ | ||
778 | if (rds_conn_up(conn) || rds_conn_connecting(conn)) { | ||
779 | /* We expect errors as the qp is drained during shutdown */ | ||
780 | if (wc.status == IB_WC_SUCCESS) { | ||
781 | rds_ib_process_recv(conn, recv, wc.byte_len, &state); | ||
782 | } else { | ||
783 | rds_ib_conn_error(conn, "recv completion on " | ||
784 | "%pI4 had status %u, disconnecting and " | ||
785 | "reconnecting\n", &conn->c_faddr, | ||
786 | wc.status); | ||
787 | } | ||
788 | } | ||
789 | |||
790 | rds_ib_ring_free(&ic->i_recv_ring, 1); | ||
791 | } | ||
792 | |||
793 | if (state.ack_next_valid) | ||
794 | rds_ib_set_ack(ic, state.ack_next, state.ack_required); | ||
795 | if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) { | ||
796 | rds_send_drop_acked(conn, state.ack_recv, NULL); | ||
797 | ic->i_ack_recv = state.ack_recv; | ||
798 | } | ||
799 | if (rds_conn_up(conn)) | ||
800 | rds_ib_attempt_ack(ic); | ||
801 | |||
802 | /* If we ever end up with a really empty receive ring, we're | ||
803 | * in deep trouble, as the sender will definitely see RNR | ||
804 | * timeouts. */ | ||
805 | if (rds_ib_ring_empty(&ic->i_recv_ring)) | ||
806 | rds_ib_stats_inc(s_ib_rx_ring_empty); | ||
807 | |||
808 | /* | ||
809 | * If the ring is running low, then schedule the thread to refill. | ||
810 | */ | ||
811 | if (rds_ib_ring_low(&ic->i_recv_ring)) | ||
812 | queue_delayed_work(rds_wq, &conn->c_recv_w, 0); | ||
813 | } | ||
814 | |||
815 | int rds_ib_recv(struct rds_connection *conn) | ||
816 | { | ||
817 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
818 | int ret = 0; | ||
819 | |||
820 | rdsdebug("conn %p\n", conn); | ||
821 | |||
822 | /* | ||
823 | * If we get a temporary posting failure in this context then | ||
824 | * we're really low and we want the caller to back off for a bit. | ||
825 | */ | ||
826 | mutex_lock(&ic->i_recv_mutex); | ||
827 | if (rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0)) | ||
828 | ret = -ENOMEM; | ||
829 | else | ||
830 | rds_ib_stats_inc(s_ib_rx_refill_from_thread); | ||
831 | mutex_unlock(&ic->i_recv_mutex); | ||
832 | |||
833 | if (rds_conn_up(conn)) | ||
834 | rds_ib_attempt_ack(ic); | ||
835 | |||
836 | return ret; | ||
837 | } | ||
838 | |||
839 | int __init rds_ib_recv_init(void) | ||
840 | { | ||
841 | struct sysinfo si; | ||
842 | int ret = -ENOMEM; | ||
843 | |||
844 | /* Default to 30% of all available RAM for recv memory */ | ||
845 | si_meminfo(&si); | ||
846 | rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE; | ||
847 | |||
848 | rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming", | ||
849 | sizeof(struct rds_ib_incoming), | ||
850 | 0, 0, NULL); | ||
851 | if (rds_ib_incoming_slab == NULL) | ||
852 | goto out; | ||
853 | |||
854 | rds_ib_frag_slab = kmem_cache_create("rds_ib_frag", | ||
855 | sizeof(struct rds_page_frag), | ||
856 | 0, 0, NULL); | ||
857 | if (rds_ib_frag_slab == NULL) | ||
858 | kmem_cache_destroy(rds_ib_incoming_slab); | ||
859 | else | ||
860 | ret = 0; | ||
861 | out: | ||
862 | return ret; | ||
863 | } | ||
864 | |||
865 | void rds_ib_recv_exit(void) | ||
866 | { | ||
867 | kmem_cache_destroy(rds_ib_incoming_slab); | ||
868 | kmem_cache_destroy(rds_ib_frag_slab); | ||
869 | } | ||
diff --git a/net/rds/ib_ring.c b/net/rds/ib_ring.c new file mode 100644 index 000000000000..99a6ccae964c --- /dev/null +++ b/net/rds/ib_ring.c | |||
@@ -0,0 +1,168 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | |||
35 | #include "rds.h" | ||
36 | #include "ib.h" | ||
37 | |||
38 | /* | ||
39 | * Locking for IB rings. | ||
40 | * We assume that allocation is always protected by a mutex | ||
41 | * in the caller (this is a valid assumption for the current | ||
42 | * implementation). | ||
43 | * | ||
44 | * Freeing always happens in an interrupt, and hence only | ||
45 | * races with allocations, but not with other free()s. | ||
46 | * | ||
47 | * The interaction between allocation and freeing is that | ||
48 | * the alloc code has to determine the number of free entries. | ||
49 | * To this end, we maintain two counters; an allocation counter | ||
50 | * and a free counter. Both are allowed to run freely, and wrap | ||
51 | * around. | ||
52 | * The number of used entries is always (alloc_ctr - free_ctr) % NR. | ||
53 | * | ||
54 | * The current implementation makes free_ctr atomic. When the | ||
55 | * caller finds an allocation fails, it should set an "alloc fail" | ||
56 | * bit and retry the allocation. The "alloc fail" bit essentially tells | ||
57 | * the CQ completion handlers to wake it up after freeing some | ||
58 | * more entries. | ||
59 | */ | ||
60 | |||
61 | /* | ||
62 | * This only happens on shutdown. | ||
63 | */ | ||
64 | DECLARE_WAIT_QUEUE_HEAD(rds_ib_ring_empty_wait); | ||
65 | |||
66 | void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr) | ||
67 | { | ||
68 | memset(ring, 0, sizeof(*ring)); | ||
69 | ring->w_nr = nr; | ||
70 | rdsdebug("ring %p nr %u\n", ring, ring->w_nr); | ||
71 | } | ||
72 | |||
73 | static inline u32 __rds_ib_ring_used(struct rds_ib_work_ring *ring) | ||
74 | { | ||
75 | u32 diff; | ||
76 | |||
77 | /* This assumes that atomic_t has at least as many bits as u32 */ | ||
78 | diff = ring->w_alloc_ctr - (u32) atomic_read(&ring->w_free_ctr); | ||
79 | BUG_ON(diff > ring->w_nr); | ||
80 | |||
81 | return diff; | ||
82 | } | ||
83 | |||
84 | void rds_ib_ring_resize(struct rds_ib_work_ring *ring, u32 nr) | ||
85 | { | ||
86 | /* We only ever get called from the connection setup code, | ||
87 | * prior to creating the QP. */ | ||
88 | BUG_ON(__rds_ib_ring_used(ring)); | ||
89 | ring->w_nr = nr; | ||
90 | } | ||
91 | |||
92 | static int __rds_ib_ring_empty(struct rds_ib_work_ring *ring) | ||
93 | { | ||
94 | return __rds_ib_ring_used(ring) == 0; | ||
95 | } | ||
96 | |||
97 | u32 rds_ib_ring_alloc(struct rds_ib_work_ring *ring, u32 val, u32 *pos) | ||
98 | { | ||
99 | u32 ret = 0, avail; | ||
100 | |||
101 | avail = ring->w_nr - __rds_ib_ring_used(ring); | ||
102 | |||
103 | rdsdebug("ring %p val %u next %u free %u\n", ring, val, | ||
104 | ring->w_alloc_ptr, avail); | ||
105 | |||
106 | if (val && avail) { | ||
107 | ret = min(val, avail); | ||
108 | *pos = ring->w_alloc_ptr; | ||
109 | |||
110 | ring->w_alloc_ptr = (ring->w_alloc_ptr + ret) % ring->w_nr; | ||
111 | ring->w_alloc_ctr += ret; | ||
112 | } | ||
113 | |||
114 | return ret; | ||
115 | } | ||
116 | |||
117 | void rds_ib_ring_free(struct rds_ib_work_ring *ring, u32 val) | ||
118 | { | ||
119 | ring->w_free_ptr = (ring->w_free_ptr + val) % ring->w_nr; | ||
120 | atomic_add(val, &ring->w_free_ctr); | ||
121 | |||
122 | if (__rds_ib_ring_empty(ring) && | ||
123 | waitqueue_active(&rds_ib_ring_empty_wait)) | ||
124 | wake_up(&rds_ib_ring_empty_wait); | ||
125 | } | ||
126 | |||
127 | void rds_ib_ring_unalloc(struct rds_ib_work_ring *ring, u32 val) | ||
128 | { | ||
129 | ring->w_alloc_ptr = (ring->w_alloc_ptr - val) % ring->w_nr; | ||
130 | ring->w_alloc_ctr -= val; | ||
131 | } | ||
132 | |||
133 | int rds_ib_ring_empty(struct rds_ib_work_ring *ring) | ||
134 | { | ||
135 | return __rds_ib_ring_empty(ring); | ||
136 | } | ||
137 | |||
138 | int rds_ib_ring_low(struct rds_ib_work_ring *ring) | ||
139 | { | ||
140 | return __rds_ib_ring_used(ring) <= (ring->w_nr >> 2); | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * returns the oldest alloced ring entry. This will be the next one | ||
145 | * freed. This can't be called if there are none allocated. | ||
146 | */ | ||
147 | u32 rds_ib_ring_oldest(struct rds_ib_work_ring *ring) | ||
148 | { | ||
149 | return ring->w_free_ptr; | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * returns the number of completed work requests. | ||
154 | */ | ||
155 | |||
156 | u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest) | ||
157 | { | ||
158 | u32 ret; | ||
159 | |||
160 | if (oldest <= (unsigned long long)wr_id) | ||
161 | ret = (unsigned long long)wr_id - oldest + 1; | ||
162 | else | ||
163 | ret = ring->w_nr - oldest + (unsigned long long)wr_id + 1; | ||
164 | |||
165 | rdsdebug("ring %p ret %u wr_id %u oldest %u\n", ring, ret, | ||
166 | wr_id, oldest); | ||
167 | return ret; | ||
168 | } | ||
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c new file mode 100644 index 000000000000..cb6c52cb1c4c --- /dev/null +++ b/net/rds/ib_send.c | |||
@@ -0,0 +1,874 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/in.h> | ||
35 | #include <linux/device.h> | ||
36 | #include <linux/dmapool.h> | ||
37 | |||
38 | #include "rds.h" | ||
39 | #include "rdma.h" | ||
40 | #include "ib.h" | ||
41 | |||
42 | static void rds_ib_send_rdma_complete(struct rds_message *rm, | ||
43 | int wc_status) | ||
44 | { | ||
45 | int notify_status; | ||
46 | |||
47 | switch (wc_status) { | ||
48 | case IB_WC_WR_FLUSH_ERR: | ||
49 | return; | ||
50 | |||
51 | case IB_WC_SUCCESS: | ||
52 | notify_status = RDS_RDMA_SUCCESS; | ||
53 | break; | ||
54 | |||
55 | case IB_WC_REM_ACCESS_ERR: | ||
56 | notify_status = RDS_RDMA_REMOTE_ERROR; | ||
57 | break; | ||
58 | |||
59 | default: | ||
60 | notify_status = RDS_RDMA_OTHER_ERROR; | ||
61 | break; | ||
62 | } | ||
63 | rds_rdma_send_complete(rm, notify_status); | ||
64 | } | ||
65 | |||
66 | static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, | ||
67 | struct rds_rdma_op *op) | ||
68 | { | ||
69 | if (op->r_mapped) { | ||
70 | ib_dma_unmap_sg(ic->i_cm_id->device, | ||
71 | op->r_sg, op->r_nents, | ||
72 | op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | ||
73 | op->r_mapped = 0; | ||
74 | } | ||
75 | } | ||
76 | |||
77 | static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, | ||
78 | struct rds_ib_send_work *send, | ||
79 | int wc_status) | ||
80 | { | ||
81 | struct rds_message *rm = send->s_rm; | ||
82 | |||
83 | rdsdebug("ic %p send %p rm %p\n", ic, send, rm); | ||
84 | |||
85 | ib_dma_unmap_sg(ic->i_cm_id->device, | ||
86 | rm->m_sg, rm->m_nents, | ||
87 | DMA_TO_DEVICE); | ||
88 | |||
89 | if (rm->m_rdma_op != NULL) { | ||
90 | rds_ib_send_unmap_rdma(ic, rm->m_rdma_op); | ||
91 | |||
92 | /* If the user asked for a completion notification on this | ||
93 | * message, we can implement three different semantics: | ||
94 | * 1. Notify when we received the ACK on the RDS message | ||
95 | * that was queued with the RDMA. This provides reliable | ||
96 | * notification of RDMA status at the expense of a one-way | ||
97 | * packet delay. | ||
98 | * 2. Notify when the IB stack gives us the completion event for | ||
99 | * the RDMA operation. | ||
100 | * 3. Notify when the IB stack gives us the completion event for | ||
101 | * the accompanying RDS messages. | ||
102 | * Here, we implement approach #3. To implement approach #2, | ||
103 | * call rds_rdma_send_complete from the cq_handler. To implement #1, | ||
104 | * don't call rds_rdma_send_complete at all, and fall back to the notify | ||
105 | * handling in the ACK processing code. | ||
106 | * | ||
107 | * Note: There's no need to explicitly sync any RDMA buffers using | ||
108 | * ib_dma_sync_sg_for_cpu - the completion for the RDMA | ||
109 | * operation itself unmapped the RDMA buffers, which takes care | ||
110 | * of synching. | ||
111 | */ | ||
112 | rds_ib_send_rdma_complete(rm, wc_status); | ||
113 | |||
114 | if (rm->m_rdma_op->r_write) | ||
115 | rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); | ||
116 | else | ||
117 | rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); | ||
118 | } | ||
119 | |||
120 | /* If anyone waited for this message to get flushed out, wake | ||
121 | * them up now */ | ||
122 | rds_message_unmapped(rm); | ||
123 | |||
124 | rds_message_put(rm); | ||
125 | send->s_rm = NULL; | ||
126 | } | ||
127 | |||
128 | void rds_ib_send_init_ring(struct rds_ib_connection *ic) | ||
129 | { | ||
130 | struct rds_ib_send_work *send; | ||
131 | u32 i; | ||
132 | |||
133 | for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { | ||
134 | struct ib_sge *sge; | ||
135 | |||
136 | send->s_rm = NULL; | ||
137 | send->s_op = NULL; | ||
138 | |||
139 | send->s_wr.wr_id = i; | ||
140 | send->s_wr.sg_list = send->s_sge; | ||
141 | send->s_wr.num_sge = 1; | ||
142 | send->s_wr.opcode = IB_WR_SEND; | ||
143 | send->s_wr.send_flags = 0; | ||
144 | send->s_wr.ex.imm_data = 0; | ||
145 | |||
146 | sge = rds_ib_data_sge(ic, send->s_sge); | ||
147 | sge->lkey = ic->i_mr->lkey; | ||
148 | |||
149 | sge = rds_ib_header_sge(ic, send->s_sge); | ||
150 | sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); | ||
151 | sge->length = sizeof(struct rds_header); | ||
152 | sge->lkey = ic->i_mr->lkey; | ||
153 | } | ||
154 | } | ||
155 | |||
156 | void rds_ib_send_clear_ring(struct rds_ib_connection *ic) | ||
157 | { | ||
158 | struct rds_ib_send_work *send; | ||
159 | u32 i; | ||
160 | |||
161 | for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { | ||
162 | if (send->s_wr.opcode == 0xdead) | ||
163 | continue; | ||
164 | if (send->s_rm) | ||
165 | rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); | ||
166 | if (send->s_op) | ||
167 | rds_ib_send_unmap_rdma(ic, send->s_op); | ||
168 | } | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * The _oldest/_free ring operations here race cleanly with the alloc/unalloc | ||
173 | * operations performed in the send path. As the sender allocs and potentially | ||
174 | * unallocs the next free entry in the ring it doesn't alter which is | ||
175 | * the next to be freed, which is what this is concerned with. | ||
176 | */ | ||
177 | void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) | ||
178 | { | ||
179 | struct rds_connection *conn = context; | ||
180 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
181 | struct ib_wc wc; | ||
182 | struct rds_ib_send_work *send; | ||
183 | u32 completed; | ||
184 | u32 oldest; | ||
185 | u32 i = 0; | ||
186 | int ret; | ||
187 | |||
188 | rdsdebug("cq %p conn %p\n", cq, conn); | ||
189 | rds_ib_stats_inc(s_ib_tx_cq_call); | ||
190 | ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); | ||
191 | if (ret) | ||
192 | rdsdebug("ib_req_notify_cq send failed: %d\n", ret); | ||
193 | |||
194 | while (ib_poll_cq(cq, 1, &wc) > 0) { | ||
195 | rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", | ||
196 | (unsigned long long)wc.wr_id, wc.status, wc.byte_len, | ||
197 | be32_to_cpu(wc.ex.imm_data)); | ||
198 | rds_ib_stats_inc(s_ib_tx_cq_event); | ||
199 | |||
200 | if (wc.wr_id == RDS_IB_ACK_WR_ID) { | ||
201 | if (ic->i_ack_queued + HZ/2 < jiffies) | ||
202 | rds_ib_stats_inc(s_ib_tx_stalled); | ||
203 | rds_ib_ack_send_complete(ic); | ||
204 | continue; | ||
205 | } | ||
206 | |||
207 | oldest = rds_ib_ring_oldest(&ic->i_send_ring); | ||
208 | |||
209 | completed = rds_ib_ring_completed(&ic->i_send_ring, wc.wr_id, oldest); | ||
210 | |||
211 | for (i = 0; i < completed; i++) { | ||
212 | send = &ic->i_sends[oldest]; | ||
213 | |||
214 | /* In the error case, wc.opcode sometimes contains garbage */ | ||
215 | switch (send->s_wr.opcode) { | ||
216 | case IB_WR_SEND: | ||
217 | if (send->s_rm) | ||
218 | rds_ib_send_unmap_rm(ic, send, wc.status); | ||
219 | break; | ||
220 | case IB_WR_RDMA_WRITE: | ||
221 | case IB_WR_RDMA_READ: | ||
222 | /* Nothing to be done - the SG list will be unmapped | ||
223 | * when the SEND completes. */ | ||
224 | break; | ||
225 | default: | ||
226 | if (printk_ratelimit()) | ||
227 | printk(KERN_NOTICE | ||
228 | "RDS/IB: %s: unexpected opcode 0x%x in WR!\n", | ||
229 | __func__, send->s_wr.opcode); | ||
230 | break; | ||
231 | } | ||
232 | |||
233 | send->s_wr.opcode = 0xdead; | ||
234 | send->s_wr.num_sge = 1; | ||
235 | if (send->s_queued + HZ/2 < jiffies) | ||
236 | rds_ib_stats_inc(s_ib_tx_stalled); | ||
237 | |||
238 | /* If a RDMA operation produced an error, signal this right | ||
239 | * away. If we don't, the subsequent SEND that goes with this | ||
240 | * RDMA will be canceled with ERR_WFLUSH, and the application | ||
241 | * never learn that the RDMA failed. */ | ||
242 | if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) { | ||
243 | struct rds_message *rm; | ||
244 | |||
245 | rm = rds_send_get_message(conn, send->s_op); | ||
246 | if (rm) | ||
247 | rds_ib_send_rdma_complete(rm, wc.status); | ||
248 | } | ||
249 | |||
250 | oldest = (oldest + 1) % ic->i_send_ring.w_nr; | ||
251 | } | ||
252 | |||
253 | rds_ib_ring_free(&ic->i_send_ring, completed); | ||
254 | |||
255 | if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) | ||
256 | || test_bit(0, &conn->c_map_queued)) | ||
257 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); | ||
258 | |||
259 | /* We expect errors as the qp is drained during shutdown */ | ||
260 | if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) { | ||
261 | rds_ib_conn_error(conn, | ||
262 | "send completion on %pI4 " | ||
263 | "had status %u, disconnecting and reconnecting\n", | ||
264 | &conn->c_faddr, wc.status); | ||
265 | } | ||
266 | } | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * This is the main function for allocating credits when sending | ||
271 | * messages. | ||
272 | * | ||
273 | * Conceptually, we have two counters: | ||
274 | * - send credits: this tells us how many WRs we're allowed | ||
275 | * to submit without overruning the reciever's queue. For | ||
276 | * each SEND WR we post, we decrement this by one. | ||
277 | * | ||
278 | * - posted credits: this tells us how many WRs we recently | ||
279 | * posted to the receive queue. This value is transferred | ||
280 | * to the peer as a "credit update" in a RDS header field. | ||
281 | * Every time we transmit credits to the peer, we subtract | ||
282 | * the amount of transferred credits from this counter. | ||
283 | * | ||
284 | * It is essential that we avoid situations where both sides have | ||
285 | * exhausted their send credits, and are unable to send new credits | ||
286 | * to the peer. We achieve this by requiring that we send at least | ||
287 | * one credit update to the peer before exhausting our credits. | ||
288 | * When new credits arrive, we subtract one credit that is withheld | ||
289 | * until we've posted new buffers and are ready to transmit these | ||
290 | * credits (see rds_ib_send_add_credits below). | ||
291 | * | ||
292 | * The RDS send code is essentially single-threaded; rds_send_xmit | ||
293 | * grabs c_send_lock to ensure exclusive access to the send ring. | ||
294 | * However, the ACK sending code is independent and can race with | ||
295 | * message SENDs. | ||
296 | * | ||
297 | * In the send path, we need to update the counters for send credits | ||
298 | * and the counter of posted buffers atomically - when we use the | ||
299 | * last available credit, we cannot allow another thread to race us | ||
300 | * and grab the posted credits counter. Hence, we have to use a | ||
301 | * spinlock to protect the credit counter, or use atomics. | ||
302 | * | ||
303 | * Spinlocks shared between the send and the receive path are bad, | ||
304 | * because they create unnecessary delays. An early implementation | ||
305 | * using a spinlock showed a 5% degradation in throughput at some | ||
306 | * loads. | ||
307 | * | ||
308 | * This implementation avoids spinlocks completely, putting both | ||
309 | * counters into a single atomic, and updating that atomic using | ||
310 | * atomic_add (in the receive path, when receiving fresh credits), | ||
311 | * and using atomic_cmpxchg when updating the two counters. | ||
312 | */ | ||
313 | int rds_ib_send_grab_credits(struct rds_ib_connection *ic, | ||
314 | u32 wanted, u32 *adv_credits, int need_posted) | ||
315 | { | ||
316 | unsigned int avail, posted, got = 0, advertise; | ||
317 | long oldval, newval; | ||
318 | |||
319 | *adv_credits = 0; | ||
320 | if (!ic->i_flowctl) | ||
321 | return wanted; | ||
322 | |||
323 | try_again: | ||
324 | advertise = 0; | ||
325 | oldval = newval = atomic_read(&ic->i_credits); | ||
326 | posted = IB_GET_POST_CREDITS(oldval); | ||
327 | avail = IB_GET_SEND_CREDITS(oldval); | ||
328 | |||
329 | rdsdebug("rds_ib_send_grab_credits(%u): credits=%u posted=%u\n", | ||
330 | wanted, avail, posted); | ||
331 | |||
332 | /* The last credit must be used to send a credit update. */ | ||
333 | if (avail && !posted) | ||
334 | avail--; | ||
335 | |||
336 | if (avail < wanted) { | ||
337 | struct rds_connection *conn = ic->i_cm_id->context; | ||
338 | |||
339 | /* Oops, there aren't that many credits left! */ | ||
340 | set_bit(RDS_LL_SEND_FULL, &conn->c_flags); | ||
341 | got = avail; | ||
342 | } else { | ||
343 | /* Sometimes you get what you want, lalala. */ | ||
344 | got = wanted; | ||
345 | } | ||
346 | newval -= IB_SET_SEND_CREDITS(got); | ||
347 | |||
348 | /* | ||
349 | * If need_posted is non-zero, then the caller wants | ||
350 | * the posted regardless of whether any send credits are | ||
351 | * available. | ||
352 | */ | ||
353 | if (posted && (got || need_posted)) { | ||
354 | advertise = min_t(unsigned int, posted, RDS_MAX_ADV_CREDIT); | ||
355 | newval -= IB_SET_POST_CREDITS(advertise); | ||
356 | } | ||
357 | |||
358 | /* Finally bill everything */ | ||
359 | if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval) | ||
360 | goto try_again; | ||
361 | |||
362 | *adv_credits = advertise; | ||
363 | return got; | ||
364 | } | ||
365 | |||
366 | void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits) | ||
367 | { | ||
368 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
369 | |||
370 | if (credits == 0) | ||
371 | return; | ||
372 | |||
373 | rdsdebug("rds_ib_send_add_credits(%u): current=%u%s\n", | ||
374 | credits, | ||
375 | IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)), | ||
376 | test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : ""); | ||
377 | |||
378 | atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits); | ||
379 | if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags)) | ||
380 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); | ||
381 | |||
382 | WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384); | ||
383 | |||
384 | rds_ib_stats_inc(s_ib_rx_credit_updates); | ||
385 | } | ||
386 | |||
387 | void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted) | ||
388 | { | ||
389 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
390 | |||
391 | if (posted == 0) | ||
392 | return; | ||
393 | |||
394 | atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits); | ||
395 | |||
396 | /* Decide whether to send an update to the peer now. | ||
397 | * If we would send a credit update for every single buffer we | ||
398 | * post, we would end up with an ACK storm (ACK arrives, | ||
399 | * consumes buffer, we refill the ring, send ACK to remote | ||
400 | * advertising the newly posted buffer... ad inf) | ||
401 | * | ||
402 | * Performance pretty much depends on how often we send | ||
403 | * credit updates - too frequent updates mean lots of ACKs. | ||
404 | * Too infrequent updates, and the peer will run out of | ||
405 | * credits and has to throttle. | ||
406 | * For the time being, 16 seems to be a good compromise. | ||
407 | */ | ||
408 | if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16) | ||
409 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
410 | } | ||
411 | |||
412 | static inline void | ||
413 | rds_ib_xmit_populate_wr(struct rds_ib_connection *ic, | ||
414 | struct rds_ib_send_work *send, unsigned int pos, | ||
415 | unsigned long buffer, unsigned int length, | ||
416 | int send_flags) | ||
417 | { | ||
418 | struct ib_sge *sge; | ||
419 | |||
420 | WARN_ON(pos != send - ic->i_sends); | ||
421 | |||
422 | send->s_wr.send_flags = send_flags; | ||
423 | send->s_wr.opcode = IB_WR_SEND; | ||
424 | send->s_wr.num_sge = 2; | ||
425 | send->s_wr.next = NULL; | ||
426 | send->s_queued = jiffies; | ||
427 | send->s_op = NULL; | ||
428 | |||
429 | if (length != 0) { | ||
430 | sge = rds_ib_data_sge(ic, send->s_sge); | ||
431 | sge->addr = buffer; | ||
432 | sge->length = length; | ||
433 | sge->lkey = ic->i_mr->lkey; | ||
434 | |||
435 | sge = rds_ib_header_sge(ic, send->s_sge); | ||
436 | } else { | ||
437 | /* We're sending a packet with no payload. There is only | ||
438 | * one SGE */ | ||
439 | send->s_wr.num_sge = 1; | ||
440 | sge = &send->s_sge[0]; | ||
441 | } | ||
442 | |||
443 | sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header)); | ||
444 | sge->length = sizeof(struct rds_header); | ||
445 | sge->lkey = ic->i_mr->lkey; | ||
446 | } | ||
447 | |||
448 | /* | ||
449 | * This can be called multiple times for a given message. The first time | ||
450 | * we see a message we map its scatterlist into the IB device so that | ||
451 | * we can provide that mapped address to the IB scatter gather entries | ||
452 | * in the IB work requests. We translate the scatterlist into a series | ||
453 | * of work requests that fragment the message. These work requests complete | ||
454 | * in order so we pass ownership of the message to the completion handler | ||
455 | * once we send the final fragment. | ||
456 | * | ||
457 | * The RDS core uses the c_send_lock to only enter this function once | ||
458 | * per connection. This makes sure that the tx ring alloc/unalloc pairs | ||
459 | * don't get out of sync and confuse the ring. | ||
460 | */ | ||
461 | int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, | ||
462 | unsigned int hdr_off, unsigned int sg, unsigned int off) | ||
463 | { | ||
464 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
465 | struct ib_device *dev = ic->i_cm_id->device; | ||
466 | struct rds_ib_send_work *send = NULL; | ||
467 | struct rds_ib_send_work *first; | ||
468 | struct rds_ib_send_work *prev; | ||
469 | struct ib_send_wr *failed_wr; | ||
470 | struct scatterlist *scat; | ||
471 | u32 pos; | ||
472 | u32 i; | ||
473 | u32 work_alloc; | ||
474 | u32 credit_alloc; | ||
475 | u32 posted; | ||
476 | u32 adv_credits = 0; | ||
477 | int send_flags = 0; | ||
478 | int sent; | ||
479 | int ret; | ||
480 | int flow_controlled = 0; | ||
481 | |||
482 | BUG_ON(off % RDS_FRAG_SIZE); | ||
483 | BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); | ||
484 | |||
485 | /* FIXME we may overallocate here */ | ||
486 | if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) | ||
487 | i = 1; | ||
488 | else | ||
489 | i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE); | ||
490 | |||
491 | work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); | ||
492 | if (work_alloc == 0) { | ||
493 | set_bit(RDS_LL_SEND_FULL, &conn->c_flags); | ||
494 | rds_ib_stats_inc(s_ib_tx_ring_full); | ||
495 | ret = -ENOMEM; | ||
496 | goto out; | ||
497 | } | ||
498 | |||
499 | credit_alloc = work_alloc; | ||
500 | if (ic->i_flowctl) { | ||
501 | credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0); | ||
502 | adv_credits += posted; | ||
503 | if (credit_alloc < work_alloc) { | ||
504 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc); | ||
505 | work_alloc = credit_alloc; | ||
506 | flow_controlled++; | ||
507 | } | ||
508 | if (work_alloc == 0) { | ||
509 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
510 | rds_ib_stats_inc(s_ib_tx_throttle); | ||
511 | ret = -ENOMEM; | ||
512 | goto out; | ||
513 | } | ||
514 | } | ||
515 | |||
516 | /* map the message the first time we see it */ | ||
517 | if (ic->i_rm == NULL) { | ||
518 | /* | ||
519 | printk(KERN_NOTICE "rds_ib_xmit prep msg dport=%u flags=0x%x len=%d\n", | ||
520 | be16_to_cpu(rm->m_inc.i_hdr.h_dport), | ||
521 | rm->m_inc.i_hdr.h_flags, | ||
522 | be32_to_cpu(rm->m_inc.i_hdr.h_len)); | ||
523 | */ | ||
524 | if (rm->m_nents) { | ||
525 | rm->m_count = ib_dma_map_sg(dev, | ||
526 | rm->m_sg, rm->m_nents, DMA_TO_DEVICE); | ||
527 | rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count); | ||
528 | if (rm->m_count == 0) { | ||
529 | rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); | ||
530 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
531 | ret = -ENOMEM; /* XXX ? */ | ||
532 | goto out; | ||
533 | } | ||
534 | } else { | ||
535 | rm->m_count = 0; | ||
536 | } | ||
537 | |||
538 | ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; | ||
539 | ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes; | ||
540 | rds_message_addref(rm); | ||
541 | ic->i_rm = rm; | ||
542 | |||
543 | /* Finalize the header */ | ||
544 | if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags)) | ||
545 | rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED; | ||
546 | if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) | ||
547 | rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED; | ||
548 | |||
549 | /* If it has a RDMA op, tell the peer we did it. This is | ||
550 | * used by the peer to release use-once RDMA MRs. */ | ||
551 | if (rm->m_rdma_op) { | ||
552 | struct rds_ext_header_rdma ext_hdr; | ||
553 | |||
554 | ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); | ||
555 | rds_message_add_extension(&rm->m_inc.i_hdr, | ||
556 | RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); | ||
557 | } | ||
558 | if (rm->m_rdma_cookie) { | ||
559 | rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr, | ||
560 | rds_rdma_cookie_key(rm->m_rdma_cookie), | ||
561 | rds_rdma_cookie_offset(rm->m_rdma_cookie)); | ||
562 | } | ||
563 | |||
564 | /* Note - rds_ib_piggyb_ack clears the ACK_REQUIRED bit, so | ||
565 | * we should not do this unless we have a chance of at least | ||
566 | * sticking the header into the send ring. Which is why we | ||
567 | * should call rds_ib_ring_alloc first. */ | ||
568 | rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_ib_piggyb_ack(ic)); | ||
569 | rds_message_make_checksum(&rm->m_inc.i_hdr); | ||
570 | |||
571 | /* | ||
572 | * Update adv_credits since we reset the ACK_REQUIRED bit. | ||
573 | */ | ||
574 | rds_ib_send_grab_credits(ic, 0, &posted, 1); | ||
575 | adv_credits += posted; | ||
576 | BUG_ON(adv_credits > 255); | ||
577 | } else if (ic->i_rm != rm) | ||
578 | BUG(); | ||
579 | |||
580 | send = &ic->i_sends[pos]; | ||
581 | first = send; | ||
582 | prev = NULL; | ||
583 | scat = &rm->m_sg[sg]; | ||
584 | sent = 0; | ||
585 | i = 0; | ||
586 | |||
587 | /* Sometimes you want to put a fence between an RDMA | ||
588 | * READ and the following SEND. | ||
589 | * We could either do this all the time | ||
590 | * or when requested by the user. Right now, we let | ||
591 | * the application choose. | ||
592 | */ | ||
593 | if (rm->m_rdma_op && rm->m_rdma_op->r_fence) | ||
594 | send_flags = IB_SEND_FENCE; | ||
595 | |||
596 | /* | ||
597 | * We could be copying the header into the unused tail of the page. | ||
598 | * That would need to be changed in the future when those pages might | ||
599 | * be mapped userspace pages or page cache pages. So instead we always | ||
600 | * use a second sge and our long-lived ring of mapped headers. We send | ||
601 | * the header after the data so that the data payload can be aligned on | ||
602 | * the receiver. | ||
603 | */ | ||
604 | |||
605 | /* handle a 0-len message */ | ||
606 | if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) { | ||
607 | rds_ib_xmit_populate_wr(ic, send, pos, 0, 0, send_flags); | ||
608 | goto add_header; | ||
609 | } | ||
610 | |||
611 | /* if there's data reference it with a chain of work reqs */ | ||
612 | for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { | ||
613 | unsigned int len; | ||
614 | |||
615 | send = &ic->i_sends[pos]; | ||
616 | |||
617 | len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); | ||
618 | rds_ib_xmit_populate_wr(ic, send, pos, | ||
619 | ib_sg_dma_address(dev, scat) + off, len, | ||
620 | send_flags); | ||
621 | |||
622 | /* | ||
623 | * We want to delay signaling completions just enough to get | ||
624 | * the batching benefits but not so much that we create dead time | ||
625 | * on the wire. | ||
626 | */ | ||
627 | if (ic->i_unsignaled_wrs-- == 0) { | ||
628 | ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; | ||
629 | send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; | ||
630 | } | ||
631 | |||
632 | ic->i_unsignaled_bytes -= len; | ||
633 | if (ic->i_unsignaled_bytes <= 0) { | ||
634 | ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes; | ||
635 | send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; | ||
636 | } | ||
637 | |||
638 | /* | ||
639 | * Always signal the last one if we're stopping due to flow control. | ||
640 | */ | ||
641 | if (flow_controlled && i == (work_alloc-1)) | ||
642 | send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; | ||
643 | |||
644 | rdsdebug("send %p wr %p num_sge %u next %p\n", send, | ||
645 | &send->s_wr, send->s_wr.num_sge, send->s_wr.next); | ||
646 | |||
647 | sent += len; | ||
648 | off += len; | ||
649 | if (off == ib_sg_dma_len(dev, scat)) { | ||
650 | scat++; | ||
651 | off = 0; | ||
652 | } | ||
653 | |||
654 | add_header: | ||
655 | /* Tack on the header after the data. The header SGE should already | ||
656 | * have been set up to point to the right header buffer. */ | ||
657 | memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); | ||
658 | |||
659 | if (0) { | ||
660 | struct rds_header *hdr = &ic->i_send_hdrs[pos]; | ||
661 | |||
662 | printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n", | ||
663 | be16_to_cpu(hdr->h_dport), | ||
664 | hdr->h_flags, | ||
665 | be32_to_cpu(hdr->h_len)); | ||
666 | } | ||
667 | if (adv_credits) { | ||
668 | struct rds_header *hdr = &ic->i_send_hdrs[pos]; | ||
669 | |||
670 | /* add credit and redo the header checksum */ | ||
671 | hdr->h_credit = adv_credits; | ||
672 | rds_message_make_checksum(hdr); | ||
673 | adv_credits = 0; | ||
674 | rds_ib_stats_inc(s_ib_tx_credit_updates); | ||
675 | } | ||
676 | |||
677 | if (prev) | ||
678 | prev->s_wr.next = &send->s_wr; | ||
679 | prev = send; | ||
680 | |||
681 | pos = (pos + 1) % ic->i_send_ring.w_nr; | ||
682 | } | ||
683 | |||
684 | /* Account the RDS header in the number of bytes we sent, but just once. | ||
685 | * The caller has no concept of fragmentation. */ | ||
686 | if (hdr_off == 0) | ||
687 | sent += sizeof(struct rds_header); | ||
688 | |||
689 | /* if we finished the message then send completion owns it */ | ||
690 | if (scat == &rm->m_sg[rm->m_count]) { | ||
691 | prev->s_rm = ic->i_rm; | ||
692 | prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; | ||
693 | ic->i_rm = NULL; | ||
694 | } | ||
695 | |||
696 | if (i < work_alloc) { | ||
697 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); | ||
698 | work_alloc = i; | ||
699 | } | ||
700 | if (ic->i_flowctl && i < credit_alloc) | ||
701 | rds_ib_send_add_credits(conn, credit_alloc - i); | ||
702 | |||
703 | /* XXX need to worry about failed_wr and partial sends. */ | ||
704 | failed_wr = &first->s_wr; | ||
705 | ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); | ||
706 | rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, | ||
707 | first, &first->s_wr, ret, failed_wr); | ||
708 | BUG_ON(failed_wr != &first->s_wr); | ||
709 | if (ret) { | ||
710 | printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 " | ||
711 | "returned %d\n", &conn->c_faddr, ret); | ||
712 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
713 | if (prev->s_rm) { | ||
714 | ic->i_rm = prev->s_rm; | ||
715 | prev->s_rm = NULL; | ||
716 | } | ||
717 | /* Finesse this later */ | ||
718 | BUG(); | ||
719 | goto out; | ||
720 | } | ||
721 | |||
722 | ret = sent; | ||
723 | out: | ||
724 | BUG_ON(adv_credits); | ||
725 | return ret; | ||
726 | } | ||
727 | |||
728 | int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) | ||
729 | { | ||
730 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
731 | struct rds_ib_send_work *send = NULL; | ||
732 | struct rds_ib_send_work *first; | ||
733 | struct rds_ib_send_work *prev; | ||
734 | struct ib_send_wr *failed_wr; | ||
735 | struct rds_ib_device *rds_ibdev; | ||
736 | struct scatterlist *scat; | ||
737 | unsigned long len; | ||
738 | u64 remote_addr = op->r_remote_addr; | ||
739 | u32 pos; | ||
740 | u32 work_alloc; | ||
741 | u32 i; | ||
742 | u32 j; | ||
743 | int sent; | ||
744 | int ret; | ||
745 | int num_sge; | ||
746 | |||
747 | rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); | ||
748 | |||
749 | /* map the message the first time we see it */ | ||
750 | if (!op->r_mapped) { | ||
751 | op->r_count = ib_dma_map_sg(ic->i_cm_id->device, | ||
752 | op->r_sg, op->r_nents, (op->r_write) ? | ||
753 | DMA_TO_DEVICE : DMA_FROM_DEVICE); | ||
754 | rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); | ||
755 | if (op->r_count == 0) { | ||
756 | rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); | ||
757 | ret = -ENOMEM; /* XXX ? */ | ||
758 | goto out; | ||
759 | } | ||
760 | |||
761 | op->r_mapped = 1; | ||
762 | } | ||
763 | |||
764 | /* | ||
765 | * Instead of knowing how to return a partial rdma read/write we insist that there | ||
766 | * be enough work requests to send the entire message. | ||
767 | */ | ||
768 | i = ceil(op->r_count, rds_ibdev->max_sge); | ||
769 | |||
770 | work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); | ||
771 | if (work_alloc != i) { | ||
772 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
773 | rds_ib_stats_inc(s_ib_tx_ring_full); | ||
774 | ret = -ENOMEM; | ||
775 | goto out; | ||
776 | } | ||
777 | |||
778 | send = &ic->i_sends[pos]; | ||
779 | first = send; | ||
780 | prev = NULL; | ||
781 | scat = &op->r_sg[0]; | ||
782 | sent = 0; | ||
783 | num_sge = op->r_count; | ||
784 | |||
785 | for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { | ||
786 | send->s_wr.send_flags = 0; | ||
787 | send->s_queued = jiffies; | ||
788 | /* | ||
789 | * We want to delay signaling completions just enough to get | ||
790 | * the batching benefits but not so much that we create dead time on the wire. | ||
791 | */ | ||
792 | if (ic->i_unsignaled_wrs-- == 0) { | ||
793 | ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; | ||
794 | send->s_wr.send_flags = IB_SEND_SIGNALED; | ||
795 | } | ||
796 | |||
797 | send->s_wr.opcode = op->r_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; | ||
798 | send->s_wr.wr.rdma.remote_addr = remote_addr; | ||
799 | send->s_wr.wr.rdma.rkey = op->r_key; | ||
800 | send->s_op = op; | ||
801 | |||
802 | if (num_sge > rds_ibdev->max_sge) { | ||
803 | send->s_wr.num_sge = rds_ibdev->max_sge; | ||
804 | num_sge -= rds_ibdev->max_sge; | ||
805 | } else { | ||
806 | send->s_wr.num_sge = num_sge; | ||
807 | } | ||
808 | |||
809 | send->s_wr.next = NULL; | ||
810 | |||
811 | if (prev) | ||
812 | prev->s_wr.next = &send->s_wr; | ||
813 | |||
814 | for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { | ||
815 | len = ib_sg_dma_len(ic->i_cm_id->device, scat); | ||
816 | send->s_sge[j].addr = | ||
817 | ib_sg_dma_address(ic->i_cm_id->device, scat); | ||
818 | send->s_sge[j].length = len; | ||
819 | send->s_sge[j].lkey = ic->i_mr->lkey; | ||
820 | |||
821 | sent += len; | ||
822 | rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr); | ||
823 | |||
824 | remote_addr += len; | ||
825 | scat++; | ||
826 | } | ||
827 | |||
828 | rdsdebug("send %p wr %p num_sge %u next %p\n", send, | ||
829 | &send->s_wr, send->s_wr.num_sge, send->s_wr.next); | ||
830 | |||
831 | prev = send; | ||
832 | if (++send == &ic->i_sends[ic->i_send_ring.w_nr]) | ||
833 | send = ic->i_sends; | ||
834 | } | ||
835 | |||
836 | /* if we finished the message then send completion owns it */ | ||
837 | if (scat == &op->r_sg[op->r_count]) | ||
838 | prev->s_wr.send_flags = IB_SEND_SIGNALED; | ||
839 | |||
840 | if (i < work_alloc) { | ||
841 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); | ||
842 | work_alloc = i; | ||
843 | } | ||
844 | |||
845 | failed_wr = &first->s_wr; | ||
846 | ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); | ||
847 | rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, | ||
848 | first, &first->s_wr, ret, failed_wr); | ||
849 | BUG_ON(failed_wr != &first->s_wr); | ||
850 | if (ret) { | ||
851 | printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 " | ||
852 | "returned %d\n", &conn->c_faddr, ret); | ||
853 | rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
854 | goto out; | ||
855 | } | ||
856 | |||
857 | if (unlikely(failed_wr != &first->s_wr)) { | ||
858 | printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret); | ||
859 | BUG_ON(failed_wr != &first->s_wr); | ||
860 | } | ||
861 | |||
862 | |||
863 | out: | ||
864 | return ret; | ||
865 | } | ||
866 | |||
867 | void rds_ib_xmit_complete(struct rds_connection *conn) | ||
868 | { | ||
869 | struct rds_ib_connection *ic = conn->c_transport_data; | ||
870 | |||
871 | /* We may have a pending ACK or window update we were unable | ||
872 | * to send previously (due to flow control). Try again. */ | ||
873 | rds_ib_attempt_ack(ic); | ||
874 | } | ||
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c new file mode 100644 index 000000000000..02e3e3d50d4a --- /dev/null +++ b/net/rds/ib_stats.c | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/percpu.h> | ||
34 | #include <linux/seq_file.h> | ||
35 | #include <linux/proc_fs.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | #include "ib.h" | ||
39 | |||
40 | DEFINE_PER_CPU(struct rds_ib_statistics, rds_ib_stats) ____cacheline_aligned; | ||
41 | |||
42 | static char *rds_ib_stat_names[] = { | ||
43 | "ib_connect_raced", | ||
44 | "ib_listen_closed_stale", | ||
45 | "ib_tx_cq_call", | ||
46 | "ib_tx_cq_event", | ||
47 | "ib_tx_ring_full", | ||
48 | "ib_tx_throttle", | ||
49 | "ib_tx_sg_mapping_failure", | ||
50 | "ib_tx_stalled", | ||
51 | "ib_tx_credit_updates", | ||
52 | "ib_rx_cq_call", | ||
53 | "ib_rx_cq_event", | ||
54 | "ib_rx_ring_empty", | ||
55 | "ib_rx_refill_from_cq", | ||
56 | "ib_rx_refill_from_thread", | ||
57 | "ib_rx_alloc_limit", | ||
58 | "ib_rx_credit_updates", | ||
59 | "ib_ack_sent", | ||
60 | "ib_ack_send_failure", | ||
61 | "ib_ack_send_delayed", | ||
62 | "ib_ack_send_piggybacked", | ||
63 | "ib_ack_received", | ||
64 | "ib_rdma_mr_alloc", | ||
65 | "ib_rdma_mr_free", | ||
66 | "ib_rdma_mr_used", | ||
67 | "ib_rdma_mr_pool_flush", | ||
68 | "ib_rdma_mr_pool_wait", | ||
69 | "ib_rdma_mr_pool_depleted", | ||
70 | }; | ||
71 | |||
72 | unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter, | ||
73 | unsigned int avail) | ||
74 | { | ||
75 | struct rds_ib_statistics stats = {0, }; | ||
76 | uint64_t *src; | ||
77 | uint64_t *sum; | ||
78 | size_t i; | ||
79 | int cpu; | ||
80 | |||
81 | if (avail < ARRAY_SIZE(rds_ib_stat_names)) | ||
82 | goto out; | ||
83 | |||
84 | for_each_online_cpu(cpu) { | ||
85 | src = (uint64_t *)&(per_cpu(rds_ib_stats, cpu)); | ||
86 | sum = (uint64_t *)&stats; | ||
87 | for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++) | ||
88 | *(sum++) += *(src++); | ||
89 | } | ||
90 | |||
91 | rds_stats_info_copy(iter, (uint64_t *)&stats, rds_ib_stat_names, | ||
92 | ARRAY_SIZE(rds_ib_stat_names)); | ||
93 | out: | ||
94 | return ARRAY_SIZE(rds_ib_stat_names); | ||
95 | } | ||
diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c new file mode 100644 index 000000000000..d87830db93a0 --- /dev/null +++ b/net/rds/ib_sysctl.c | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/sysctl.h> | ||
35 | #include <linux/proc_fs.h> | ||
36 | |||
37 | #include "ib.h" | ||
38 | |||
39 | static struct ctl_table_header *rds_ib_sysctl_hdr; | ||
40 | |||
41 | unsigned long rds_ib_sysctl_max_send_wr = RDS_IB_DEFAULT_SEND_WR; | ||
42 | unsigned long rds_ib_sysctl_max_recv_wr = RDS_IB_DEFAULT_RECV_WR; | ||
43 | unsigned long rds_ib_sysctl_max_recv_allocation = (128 * 1024 * 1024) / RDS_FRAG_SIZE; | ||
44 | static unsigned long rds_ib_sysctl_max_wr_min = 1; | ||
45 | /* hardware will fail CQ creation long before this */ | ||
46 | static unsigned long rds_ib_sysctl_max_wr_max = (u32)~0; | ||
47 | |||
48 | unsigned long rds_ib_sysctl_max_unsig_wrs = 16; | ||
49 | static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1; | ||
50 | static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64; | ||
51 | |||
52 | unsigned long rds_ib_sysctl_max_unsig_bytes = (16 << 20); | ||
53 | static unsigned long rds_ib_sysctl_max_unsig_bytes_min = 1; | ||
54 | static unsigned long rds_ib_sysctl_max_unsig_bytes_max = ~0UL; | ||
55 | |||
56 | unsigned int rds_ib_sysctl_flow_control = 1; | ||
57 | |||
58 | ctl_table rds_ib_sysctl_table[] = { | ||
59 | { | ||
60 | .ctl_name = CTL_UNNUMBERED, | ||
61 | .procname = "max_send_wr", | ||
62 | .data = &rds_ib_sysctl_max_send_wr, | ||
63 | .maxlen = sizeof(unsigned long), | ||
64 | .mode = 0644, | ||
65 | .proc_handler = &proc_doulongvec_minmax, | ||
66 | .extra1 = &rds_ib_sysctl_max_wr_min, | ||
67 | .extra2 = &rds_ib_sysctl_max_wr_max, | ||
68 | }, | ||
69 | { | ||
70 | .ctl_name = CTL_UNNUMBERED, | ||
71 | .procname = "max_recv_wr", | ||
72 | .data = &rds_ib_sysctl_max_recv_wr, | ||
73 | .maxlen = sizeof(unsigned long), | ||
74 | .mode = 0644, | ||
75 | .proc_handler = &proc_doulongvec_minmax, | ||
76 | .extra1 = &rds_ib_sysctl_max_wr_min, | ||
77 | .extra2 = &rds_ib_sysctl_max_wr_max, | ||
78 | }, | ||
79 | { | ||
80 | .ctl_name = CTL_UNNUMBERED, | ||
81 | .procname = "max_unsignaled_wr", | ||
82 | .data = &rds_ib_sysctl_max_unsig_wrs, | ||
83 | .maxlen = sizeof(unsigned long), | ||
84 | .mode = 0644, | ||
85 | .proc_handler = &proc_doulongvec_minmax, | ||
86 | .extra1 = &rds_ib_sysctl_max_unsig_wr_min, | ||
87 | .extra2 = &rds_ib_sysctl_max_unsig_wr_max, | ||
88 | }, | ||
89 | { | ||
90 | .ctl_name = CTL_UNNUMBERED, | ||
91 | .procname = "max_unsignaled_bytes", | ||
92 | .data = &rds_ib_sysctl_max_unsig_bytes, | ||
93 | .maxlen = sizeof(unsigned long), | ||
94 | .mode = 0644, | ||
95 | .proc_handler = &proc_doulongvec_minmax, | ||
96 | .extra1 = &rds_ib_sysctl_max_unsig_bytes_min, | ||
97 | .extra2 = &rds_ib_sysctl_max_unsig_bytes_max, | ||
98 | }, | ||
99 | { | ||
100 | .ctl_name = CTL_UNNUMBERED, | ||
101 | .procname = "max_recv_allocation", | ||
102 | .data = &rds_ib_sysctl_max_recv_allocation, | ||
103 | .maxlen = sizeof(unsigned long), | ||
104 | .mode = 0644, | ||
105 | .proc_handler = &proc_doulongvec_minmax, | ||
106 | }, | ||
107 | { | ||
108 | .ctl_name = CTL_UNNUMBERED, | ||
109 | .procname = "flow_control", | ||
110 | .data = &rds_ib_sysctl_flow_control, | ||
111 | .maxlen = sizeof(rds_ib_sysctl_flow_control), | ||
112 | .mode = 0644, | ||
113 | .proc_handler = &proc_dointvec, | ||
114 | }, | ||
115 | { .ctl_name = 0} | ||
116 | }; | ||
117 | |||
118 | static struct ctl_path rds_ib_sysctl_path[] = { | ||
119 | { .procname = "net", .ctl_name = CTL_NET, }, | ||
120 | { .procname = "rds", .ctl_name = CTL_UNNUMBERED, }, | ||
121 | { .procname = "ib", .ctl_name = CTL_UNNUMBERED, }, | ||
122 | { } | ||
123 | }; | ||
124 | |||
125 | void rds_ib_sysctl_exit(void) | ||
126 | { | ||
127 | if (rds_ib_sysctl_hdr) | ||
128 | unregister_sysctl_table(rds_ib_sysctl_hdr); | ||
129 | } | ||
130 | |||
131 | int __init rds_ib_sysctl_init(void) | ||
132 | { | ||
133 | rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table); | ||
134 | if (rds_ib_sysctl_hdr == NULL) | ||
135 | return -ENOMEM; | ||
136 | return 0; | ||
137 | } | ||
diff --git a/net/rds/info.c b/net/rds/info.c new file mode 100644 index 000000000000..1d885535214d --- /dev/null +++ b/net/rds/info.c | |||
@@ -0,0 +1,241 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/percpu.h> | ||
34 | #include <linux/seq_file.h> | ||
35 | #include <linux/proc_fs.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | |||
39 | /* | ||
40 | * This file implements a getsockopt() call which copies a set of fixed | ||
41 | * sized structs into a user-specified buffer as a means of providing | ||
42 | * read-only information about RDS. | ||
43 | * | ||
44 | * For a given information source there are a given number of fixed sized | ||
45 | * structs at a given time. The structs are only copied if the user-specified | ||
46 | * buffer is big enough. The destination pages that make up the buffer | ||
47 | * are pinned for the duration of the copy. | ||
48 | * | ||
49 | * This gives us the following benefits: | ||
50 | * | ||
51 | * - simple implementation, no copy "position" across multiple calls | ||
52 | * - consistent snapshot of an info source | ||
53 | * - atomic copy works well with whatever locking info source has | ||
54 | * - one portable tool to get rds info across implementations | ||
55 | * - long-lived tool can get info without allocating | ||
56 | * | ||
57 | * at the following costs: | ||
58 | * | ||
59 | * - info source copy must be pinned, may be "large" | ||
60 | */ | ||
61 | |||
62 | struct rds_info_iterator { | ||
63 | struct page **pages; | ||
64 | void *addr; | ||
65 | unsigned long offset; | ||
66 | }; | ||
67 | |||
68 | static DEFINE_SPINLOCK(rds_info_lock); | ||
69 | static rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1]; | ||
70 | |||
71 | void rds_info_register_func(int optname, rds_info_func func) | ||
72 | { | ||
73 | int offset = optname - RDS_INFO_FIRST; | ||
74 | |||
75 | BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | ||
76 | |||
77 | spin_lock(&rds_info_lock); | ||
78 | BUG_ON(rds_info_funcs[offset] != NULL); | ||
79 | rds_info_funcs[offset] = func; | ||
80 | spin_unlock(&rds_info_lock); | ||
81 | } | ||
82 | |||
83 | void rds_info_deregister_func(int optname, rds_info_func func) | ||
84 | { | ||
85 | int offset = optname - RDS_INFO_FIRST; | ||
86 | |||
87 | BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); | ||
88 | |||
89 | spin_lock(&rds_info_lock); | ||
90 | BUG_ON(rds_info_funcs[offset] != func); | ||
91 | rds_info_funcs[offset] = NULL; | ||
92 | spin_unlock(&rds_info_lock); | ||
93 | } | ||
94 | |||
95 | /* | ||
96 | * Typically we hold an atomic kmap across multiple rds_info_copy() calls | ||
97 | * because the kmap is so expensive. This must be called before using blocking | ||
98 | * operations while holding the mapping and as the iterator is torn down. | ||
99 | */ | ||
100 | void rds_info_iter_unmap(struct rds_info_iterator *iter) | ||
101 | { | ||
102 | if (iter->addr != NULL) { | ||
103 | kunmap_atomic(iter->addr, KM_USER0); | ||
104 | iter->addr = NULL; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * get_user_pages() called flush_dcache_page() on the pages for us. | ||
110 | */ | ||
111 | void rds_info_copy(struct rds_info_iterator *iter, void *data, | ||
112 | unsigned long bytes) | ||
113 | { | ||
114 | unsigned long this; | ||
115 | |||
116 | while (bytes) { | ||
117 | if (iter->addr == NULL) | ||
118 | iter->addr = kmap_atomic(*iter->pages, KM_USER0); | ||
119 | |||
120 | this = min(bytes, PAGE_SIZE - iter->offset); | ||
121 | |||
122 | rdsdebug("page %p addr %p offset %lu this %lu data %p " | ||
123 | "bytes %lu\n", *iter->pages, iter->addr, | ||
124 | iter->offset, this, data, bytes); | ||
125 | |||
126 | memcpy(iter->addr + iter->offset, data, this); | ||
127 | |||
128 | data += this; | ||
129 | bytes -= this; | ||
130 | iter->offset += this; | ||
131 | |||
132 | if (iter->offset == PAGE_SIZE) { | ||
133 | kunmap_atomic(iter->addr, KM_USER0); | ||
134 | iter->addr = NULL; | ||
135 | iter->offset = 0; | ||
136 | iter->pages++; | ||
137 | } | ||
138 | } | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * @optval points to the userspace buffer that the information snapshot | ||
143 | * will be copied into. | ||
144 | * | ||
145 | * @optlen on input is the size of the buffer in userspace. @optlen | ||
146 | * on output is the size of the requested snapshot in bytes. | ||
147 | * | ||
148 | * This function returns -errno if there is a failure, particularly -ENOSPC | ||
149 | * if the given userspace buffer was not large enough to fit the snapshot. | ||
150 | * On success it returns the positive number of bytes of each array element | ||
151 | * in the snapshot. | ||
152 | */ | ||
153 | int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, | ||
154 | int __user *optlen) | ||
155 | { | ||
156 | struct rds_info_iterator iter; | ||
157 | struct rds_info_lengths lens; | ||
158 | unsigned long nr_pages = 0; | ||
159 | unsigned long start; | ||
160 | unsigned long i; | ||
161 | rds_info_func func; | ||
162 | struct page **pages = NULL; | ||
163 | int ret; | ||
164 | int len; | ||
165 | int total; | ||
166 | |||
167 | if (get_user(len, optlen)) { | ||
168 | ret = -EFAULT; | ||
169 | goto out; | ||
170 | } | ||
171 | |||
172 | /* check for all kinds of wrapping and the like */ | ||
173 | start = (unsigned long)optval; | ||
174 | if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { | ||
175 | ret = -EINVAL; | ||
176 | goto out; | ||
177 | } | ||
178 | |||
179 | /* a 0 len call is just trying to probe its length */ | ||
180 | if (len == 0) | ||
181 | goto call_func; | ||
182 | |||
183 | nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK)) | ||
184 | >> PAGE_SHIFT; | ||
185 | |||
186 | pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); | ||
187 | if (pages == NULL) { | ||
188 | ret = -ENOMEM; | ||
189 | goto out; | ||
190 | } | ||
191 | down_read(¤t->mm->mmap_sem); | ||
192 | ret = get_user_pages(current, current->mm, start, nr_pages, 1, 0, | ||
193 | pages, NULL); | ||
194 | up_read(¤t->mm->mmap_sem); | ||
195 | if (ret != nr_pages) { | ||
196 | if (ret > 0) | ||
197 | nr_pages = ret; | ||
198 | else | ||
199 | nr_pages = 0; | ||
200 | ret = -EAGAIN; /* XXX ? */ | ||
201 | goto out; | ||
202 | } | ||
203 | |||
204 | rdsdebug("len %d nr_pages %lu\n", len, nr_pages); | ||
205 | |||
206 | call_func: | ||
207 | func = rds_info_funcs[optname - RDS_INFO_FIRST]; | ||
208 | if (func == NULL) { | ||
209 | ret = -ENOPROTOOPT; | ||
210 | goto out; | ||
211 | } | ||
212 | |||
213 | iter.pages = pages; | ||
214 | iter.addr = NULL; | ||
215 | iter.offset = start & (PAGE_SIZE - 1); | ||
216 | |||
217 | func(sock, len, &iter, &lens); | ||
218 | BUG_ON(lens.each == 0); | ||
219 | |||
220 | total = lens.nr * lens.each; | ||
221 | |||
222 | rds_info_iter_unmap(&iter); | ||
223 | |||
224 | if (total > len) { | ||
225 | len = total; | ||
226 | ret = -ENOSPC; | ||
227 | } else { | ||
228 | len = total; | ||
229 | ret = lens.each; | ||
230 | } | ||
231 | |||
232 | if (put_user(len, optlen)) | ||
233 | ret = -EFAULT; | ||
234 | |||
235 | out: | ||
236 | for (i = 0; pages != NULL && i < nr_pages; i++) | ||
237 | put_page(pages[i]); | ||
238 | kfree(pages); | ||
239 | |||
240 | return ret; | ||
241 | } | ||
diff --git a/net/rds/info.h b/net/rds/info.h new file mode 100644 index 000000000000..b6c052ca7d22 --- /dev/null +++ b/net/rds/info.h | |||
@@ -0,0 +1,30 @@ | |||
1 | #ifndef _RDS_INFO_H | ||
2 | #define _RDS_INFO_H | ||
3 | |||
4 | struct rds_info_lengths { | ||
5 | unsigned int nr; | ||
6 | unsigned int each; | ||
7 | }; | ||
8 | |||
9 | struct rds_info_iterator; | ||
10 | |||
11 | /* | ||
12 | * These functions must fill in the fields of @lens to reflect the size | ||
13 | * of the available info source. If the snapshot fits in @len then it | ||
14 | * should be copied using @iter. The caller will deduce if it was copied | ||
15 | * or not by comparing the lengths. | ||
16 | */ | ||
17 | typedef void (*rds_info_func)(struct socket *sock, unsigned int len, | ||
18 | struct rds_info_iterator *iter, | ||
19 | struct rds_info_lengths *lens); | ||
20 | |||
21 | void rds_info_register_func(int optname, rds_info_func func); | ||
22 | void rds_info_deregister_func(int optname, rds_info_func func); | ||
23 | int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, | ||
24 | int __user *optlen); | ||
25 | void rds_info_copy(struct rds_info_iterator *iter, void *data, | ||
26 | unsigned long bytes); | ||
27 | void rds_info_iter_unmap(struct rds_info_iterator *iter); | ||
28 | |||
29 | |||
30 | #endif | ||
diff --git a/net/rds/iw.c b/net/rds/iw.c new file mode 100644 index 000000000000..1b56905c4c08 --- /dev/null +++ b/net/rds/iw.c | |||
@@ -0,0 +1,333 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/in.h> | ||
35 | #include <linux/if.h> | ||
36 | #include <linux/netdevice.h> | ||
37 | #include <linux/inetdevice.h> | ||
38 | #include <linux/if_arp.h> | ||
39 | #include <linux/delay.h> | ||
40 | |||
41 | #include "rds.h" | ||
42 | #include "iw.h" | ||
43 | |||
44 | unsigned int fastreg_pool_size = RDS_FASTREG_POOL_SIZE; | ||
45 | unsigned int fastreg_message_size = RDS_FASTREG_SIZE + 1; /* +1 allows for unaligned MRs */ | ||
46 | |||
47 | module_param(fastreg_pool_size, int, 0444); | ||
48 | MODULE_PARM_DESC(fastreg_pool_size, " Max number of fastreg MRs per device"); | ||
49 | module_param(fastreg_message_size, int, 0444); | ||
50 | MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MRs)"); | ||
51 | |||
52 | struct list_head rds_iw_devices; | ||
53 | |||
54 | DEFINE_SPINLOCK(iw_nodev_conns_lock); | ||
55 | LIST_HEAD(iw_nodev_conns); | ||
56 | |||
57 | void rds_iw_add_one(struct ib_device *device) | ||
58 | { | ||
59 | struct rds_iw_device *rds_iwdev; | ||
60 | struct ib_device_attr *dev_attr; | ||
61 | |||
62 | /* Only handle iwarp devices */ | ||
63 | if (device->node_type != RDMA_NODE_RNIC) | ||
64 | return; | ||
65 | |||
66 | dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); | ||
67 | if (!dev_attr) | ||
68 | return; | ||
69 | |||
70 | if (ib_query_device(device, dev_attr)) { | ||
71 | rdsdebug("Query device failed for %s\n", device->name); | ||
72 | goto free_attr; | ||
73 | } | ||
74 | |||
75 | rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL); | ||
76 | if (!rds_iwdev) | ||
77 | goto free_attr; | ||
78 | |||
79 | spin_lock_init(&rds_iwdev->spinlock); | ||
80 | |||
81 | rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY); | ||
82 | rds_iwdev->max_wrs = dev_attr->max_qp_wr; | ||
83 | rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE); | ||
84 | |||
85 | rds_iwdev->page_shift = max(PAGE_SHIFT, ffs(dev_attr->page_size_cap) - 1); | ||
86 | |||
87 | rds_iwdev->dev = device; | ||
88 | rds_iwdev->pd = ib_alloc_pd(device); | ||
89 | if (IS_ERR(rds_iwdev->pd)) | ||
90 | goto free_dev; | ||
91 | |||
92 | if (!rds_iwdev->dma_local_lkey) { | ||
93 | if (device->node_type != RDMA_NODE_RNIC) { | ||
94 | rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd, | ||
95 | IB_ACCESS_LOCAL_WRITE); | ||
96 | } else { | ||
97 | rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd, | ||
98 | IB_ACCESS_REMOTE_READ | | ||
99 | IB_ACCESS_REMOTE_WRITE | | ||
100 | IB_ACCESS_LOCAL_WRITE); | ||
101 | } | ||
102 | if (IS_ERR(rds_iwdev->mr)) | ||
103 | goto err_pd; | ||
104 | } else | ||
105 | rds_iwdev->mr = NULL; | ||
106 | |||
107 | rds_iwdev->mr_pool = rds_iw_create_mr_pool(rds_iwdev); | ||
108 | if (IS_ERR(rds_iwdev->mr_pool)) { | ||
109 | rds_iwdev->mr_pool = NULL; | ||
110 | goto err_mr; | ||
111 | } | ||
112 | |||
113 | INIT_LIST_HEAD(&rds_iwdev->cm_id_list); | ||
114 | INIT_LIST_HEAD(&rds_iwdev->conn_list); | ||
115 | list_add_tail(&rds_iwdev->list, &rds_iw_devices); | ||
116 | |||
117 | ib_set_client_data(device, &rds_iw_client, rds_iwdev); | ||
118 | |||
119 | goto free_attr; | ||
120 | |||
121 | err_mr: | ||
122 | if (rds_iwdev->mr) | ||
123 | ib_dereg_mr(rds_iwdev->mr); | ||
124 | err_pd: | ||
125 | ib_dealloc_pd(rds_iwdev->pd); | ||
126 | free_dev: | ||
127 | kfree(rds_iwdev); | ||
128 | free_attr: | ||
129 | kfree(dev_attr); | ||
130 | } | ||
131 | |||
132 | void rds_iw_remove_one(struct ib_device *device) | ||
133 | { | ||
134 | struct rds_iw_device *rds_iwdev; | ||
135 | struct rds_iw_cm_id *i_cm_id, *next; | ||
136 | |||
137 | rds_iwdev = ib_get_client_data(device, &rds_iw_client); | ||
138 | if (!rds_iwdev) | ||
139 | return; | ||
140 | |||
141 | spin_lock_irq(&rds_iwdev->spinlock); | ||
142 | list_for_each_entry_safe(i_cm_id, next, &rds_iwdev->cm_id_list, list) { | ||
143 | list_del(&i_cm_id->list); | ||
144 | kfree(i_cm_id); | ||
145 | } | ||
146 | spin_unlock_irq(&rds_iwdev->spinlock); | ||
147 | |||
148 | rds_iw_remove_conns(rds_iwdev); | ||
149 | |||
150 | if (rds_iwdev->mr_pool) | ||
151 | rds_iw_destroy_mr_pool(rds_iwdev->mr_pool); | ||
152 | |||
153 | if (rds_iwdev->mr) | ||
154 | ib_dereg_mr(rds_iwdev->mr); | ||
155 | |||
156 | while (ib_dealloc_pd(rds_iwdev->pd)) { | ||
157 | rdsdebug("Failed to dealloc pd %p\n", rds_iwdev->pd); | ||
158 | msleep(1); | ||
159 | } | ||
160 | |||
161 | list_del(&rds_iwdev->list); | ||
162 | kfree(rds_iwdev); | ||
163 | } | ||
164 | |||
165 | struct ib_client rds_iw_client = { | ||
166 | .name = "rds_iw", | ||
167 | .add = rds_iw_add_one, | ||
168 | .remove = rds_iw_remove_one | ||
169 | }; | ||
170 | |||
171 | static int rds_iw_conn_info_visitor(struct rds_connection *conn, | ||
172 | void *buffer) | ||
173 | { | ||
174 | struct rds_info_rdma_connection *iinfo = buffer; | ||
175 | struct rds_iw_connection *ic; | ||
176 | |||
177 | /* We will only ever look at IB transports */ | ||
178 | if (conn->c_trans != &rds_iw_transport) | ||
179 | return 0; | ||
180 | |||
181 | iinfo->src_addr = conn->c_laddr; | ||
182 | iinfo->dst_addr = conn->c_faddr; | ||
183 | |||
184 | memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); | ||
185 | memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); | ||
186 | if (rds_conn_state(conn) == RDS_CONN_UP) { | ||
187 | struct rds_iw_device *rds_iwdev; | ||
188 | struct rdma_dev_addr *dev_addr; | ||
189 | |||
190 | ic = conn->c_transport_data; | ||
191 | dev_addr = &ic->i_cm_id->route.addr.dev_addr; | ||
192 | |||
193 | ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); | ||
194 | ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); | ||
195 | |||
196 | rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); | ||
197 | iinfo->max_send_wr = ic->i_send_ring.w_nr; | ||
198 | iinfo->max_recv_wr = ic->i_recv_ring.w_nr; | ||
199 | iinfo->max_send_sge = rds_iwdev->max_sge; | ||
200 | rds_iw_get_mr_info(rds_iwdev, iinfo); | ||
201 | } | ||
202 | return 1; | ||
203 | } | ||
204 | |||
205 | static void rds_iw_ic_info(struct socket *sock, unsigned int len, | ||
206 | struct rds_info_iterator *iter, | ||
207 | struct rds_info_lengths *lens) | ||
208 | { | ||
209 | rds_for_each_conn_info(sock, len, iter, lens, | ||
210 | rds_iw_conn_info_visitor, | ||
211 | sizeof(struct rds_info_rdma_connection)); | ||
212 | } | ||
213 | |||
214 | |||
215 | /* | ||
216 | * Early RDS/IB was built to only bind to an address if there is an IPoIB | ||
217 | * device with that address set. | ||
218 | * | ||
219 | * If it were me, I'd advocate for something more flexible. Sending and | ||
220 | * receiving should be device-agnostic. Transports would try and maintain | ||
221 | * connections between peers who have messages queued. Userspace would be | ||
222 | * allowed to influence which paths have priority. We could call userspace | ||
223 | * asserting this policy "routing". | ||
224 | */ | ||
225 | static int rds_iw_laddr_check(__be32 addr) | ||
226 | { | ||
227 | int ret; | ||
228 | struct rdma_cm_id *cm_id; | ||
229 | struct sockaddr_in sin; | ||
230 | |||
231 | /* Create a CMA ID and try to bind it. This catches both | ||
232 | * IB and iWARP capable NICs. | ||
233 | */ | ||
234 | cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); | ||
235 | if (!cm_id) | ||
236 | return -EADDRNOTAVAIL; | ||
237 | |||
238 | memset(&sin, 0, sizeof(sin)); | ||
239 | sin.sin_family = AF_INET; | ||
240 | sin.sin_addr.s_addr = addr; | ||
241 | |||
242 | /* rdma_bind_addr will only succeed for IB & iWARP devices */ | ||
243 | ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); | ||
244 | /* due to this, we will claim to support IB devices unless we | ||
245 | check node_type. */ | ||
246 | if (ret || cm_id->device->node_type != RDMA_NODE_RNIC) | ||
247 | ret = -EADDRNOTAVAIL; | ||
248 | |||
249 | rdsdebug("addr %pI4 ret %d node type %d\n", | ||
250 | &addr, ret, | ||
251 | cm_id->device ? cm_id->device->node_type : -1); | ||
252 | |||
253 | rdma_destroy_id(cm_id); | ||
254 | |||
255 | return ret; | ||
256 | } | ||
257 | |||
258 | void rds_iw_exit(void) | ||
259 | { | ||
260 | rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info); | ||
261 | rds_iw_remove_nodev_conns(); | ||
262 | ib_unregister_client(&rds_iw_client); | ||
263 | rds_iw_sysctl_exit(); | ||
264 | rds_iw_recv_exit(); | ||
265 | rds_trans_unregister(&rds_iw_transport); | ||
266 | } | ||
267 | |||
268 | struct rds_transport rds_iw_transport = { | ||
269 | .laddr_check = rds_iw_laddr_check, | ||
270 | .xmit_complete = rds_iw_xmit_complete, | ||
271 | .xmit = rds_iw_xmit, | ||
272 | .xmit_cong_map = NULL, | ||
273 | .xmit_rdma = rds_iw_xmit_rdma, | ||
274 | .recv = rds_iw_recv, | ||
275 | .conn_alloc = rds_iw_conn_alloc, | ||
276 | .conn_free = rds_iw_conn_free, | ||
277 | .conn_connect = rds_iw_conn_connect, | ||
278 | .conn_shutdown = rds_iw_conn_shutdown, | ||
279 | .inc_copy_to_user = rds_iw_inc_copy_to_user, | ||
280 | .inc_purge = rds_iw_inc_purge, | ||
281 | .inc_free = rds_iw_inc_free, | ||
282 | .cm_initiate_connect = rds_iw_cm_initiate_connect, | ||
283 | .cm_handle_connect = rds_iw_cm_handle_connect, | ||
284 | .cm_connect_complete = rds_iw_cm_connect_complete, | ||
285 | .stats_info_copy = rds_iw_stats_info_copy, | ||
286 | .exit = rds_iw_exit, | ||
287 | .get_mr = rds_iw_get_mr, | ||
288 | .sync_mr = rds_iw_sync_mr, | ||
289 | .free_mr = rds_iw_free_mr, | ||
290 | .flush_mrs = rds_iw_flush_mrs, | ||
291 | .t_owner = THIS_MODULE, | ||
292 | .t_name = "iwarp", | ||
293 | .t_prefer_loopback = 1, | ||
294 | }; | ||
295 | |||
296 | int __init rds_iw_init(void) | ||
297 | { | ||
298 | int ret; | ||
299 | |||
300 | INIT_LIST_HEAD(&rds_iw_devices); | ||
301 | |||
302 | ret = ib_register_client(&rds_iw_client); | ||
303 | if (ret) | ||
304 | goto out; | ||
305 | |||
306 | ret = rds_iw_sysctl_init(); | ||
307 | if (ret) | ||
308 | goto out_ibreg; | ||
309 | |||
310 | ret = rds_iw_recv_init(); | ||
311 | if (ret) | ||
312 | goto out_sysctl; | ||
313 | |||
314 | ret = rds_trans_register(&rds_iw_transport); | ||
315 | if (ret) | ||
316 | goto out_recv; | ||
317 | |||
318 | rds_info_register_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info); | ||
319 | |||
320 | goto out; | ||
321 | |||
322 | out_recv: | ||
323 | rds_iw_recv_exit(); | ||
324 | out_sysctl: | ||
325 | rds_iw_sysctl_exit(); | ||
326 | out_ibreg: | ||
327 | ib_unregister_client(&rds_iw_client); | ||
328 | out: | ||
329 | return ret; | ||
330 | } | ||
331 | |||
332 | MODULE_LICENSE("GPL"); | ||
333 | |||
diff --git a/net/rds/iw.h b/net/rds/iw.h new file mode 100644 index 000000000000..0ddda34f2a1c --- /dev/null +++ b/net/rds/iw.h | |||
@@ -0,0 +1,395 @@ | |||
1 | #ifndef _RDS_IW_H | ||
2 | #define _RDS_IW_H | ||
3 | |||
4 | #include <rdma/ib_verbs.h> | ||
5 | #include <rdma/rdma_cm.h> | ||
6 | #include "rds.h" | ||
7 | #include "rdma_transport.h" | ||
8 | |||
9 | #define RDS_FASTREG_SIZE 20 | ||
10 | #define RDS_FASTREG_POOL_SIZE 2048 | ||
11 | |||
12 | #define RDS_IW_MAX_SGE 8 | ||
13 | #define RDS_IW_RECV_SGE 2 | ||
14 | |||
15 | #define RDS_IW_DEFAULT_RECV_WR 1024 | ||
16 | #define RDS_IW_DEFAULT_SEND_WR 256 | ||
17 | |||
18 | #define RDS_IW_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */ | ||
19 | |||
20 | extern struct list_head rds_iw_devices; | ||
21 | |||
22 | /* | ||
23 | * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to | ||
24 | * try and minimize the amount of memory tied up both the device and | ||
25 | * socket receive queues. | ||
26 | */ | ||
27 | /* page offset of the final full frag that fits in the page */ | ||
28 | #define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE) | ||
29 | struct rds_page_frag { | ||
30 | struct list_head f_item; | ||
31 | struct page *f_page; | ||
32 | unsigned long f_offset; | ||
33 | dma_addr_t f_mapped; | ||
34 | }; | ||
35 | |||
36 | struct rds_iw_incoming { | ||
37 | struct list_head ii_frags; | ||
38 | struct rds_incoming ii_inc; | ||
39 | }; | ||
40 | |||
41 | struct rds_iw_connect_private { | ||
42 | /* Add new fields at the end, and don't permute existing fields. */ | ||
43 | __be32 dp_saddr; | ||
44 | __be32 dp_daddr; | ||
45 | u8 dp_protocol_major; | ||
46 | u8 dp_protocol_minor; | ||
47 | __be16 dp_protocol_minor_mask; /* bitmask */ | ||
48 | __be32 dp_reserved1; | ||
49 | __be64 dp_ack_seq; | ||
50 | __be32 dp_credit; /* non-zero enables flow ctl */ | ||
51 | }; | ||
52 | |||
53 | struct rds_iw_scatterlist { | ||
54 | struct scatterlist *list; | ||
55 | unsigned int len; | ||
56 | int dma_len; | ||
57 | unsigned int dma_npages; | ||
58 | unsigned int bytes; | ||
59 | }; | ||
60 | |||
61 | struct rds_iw_mapping { | ||
62 | spinlock_t m_lock; /* protect the mapping struct */ | ||
63 | struct list_head m_list; | ||
64 | struct rds_iw_mr *m_mr; | ||
65 | uint32_t m_rkey; | ||
66 | struct rds_iw_scatterlist m_sg; | ||
67 | }; | ||
68 | |||
69 | struct rds_iw_send_work { | ||
70 | struct rds_message *s_rm; | ||
71 | |||
72 | /* We should really put these into a union: */ | ||
73 | struct rds_rdma_op *s_op; | ||
74 | struct rds_iw_mapping *s_mapping; | ||
75 | struct ib_mr *s_mr; | ||
76 | struct ib_fast_reg_page_list *s_page_list; | ||
77 | unsigned char s_remap_count; | ||
78 | |||
79 | struct ib_send_wr s_wr; | ||
80 | struct ib_sge s_sge[RDS_IW_MAX_SGE]; | ||
81 | unsigned long s_queued; | ||
82 | }; | ||
83 | |||
84 | struct rds_iw_recv_work { | ||
85 | struct rds_iw_incoming *r_iwinc; | ||
86 | struct rds_page_frag *r_frag; | ||
87 | struct ib_recv_wr r_wr; | ||
88 | struct ib_sge r_sge[2]; | ||
89 | }; | ||
90 | |||
91 | struct rds_iw_work_ring { | ||
92 | u32 w_nr; | ||
93 | u32 w_alloc_ptr; | ||
94 | u32 w_alloc_ctr; | ||
95 | u32 w_free_ptr; | ||
96 | atomic_t w_free_ctr; | ||
97 | }; | ||
98 | |||
99 | struct rds_iw_device; | ||
100 | |||
101 | struct rds_iw_connection { | ||
102 | |||
103 | struct list_head iw_node; | ||
104 | struct rds_iw_device *rds_iwdev; | ||
105 | struct rds_connection *conn; | ||
106 | |||
107 | /* alphabet soup, IBTA style */ | ||
108 | struct rdma_cm_id *i_cm_id; | ||
109 | struct ib_pd *i_pd; | ||
110 | struct ib_mr *i_mr; | ||
111 | struct ib_cq *i_send_cq; | ||
112 | struct ib_cq *i_recv_cq; | ||
113 | |||
114 | /* tx */ | ||
115 | struct rds_iw_work_ring i_send_ring; | ||
116 | struct rds_message *i_rm; | ||
117 | struct rds_header *i_send_hdrs; | ||
118 | u64 i_send_hdrs_dma; | ||
119 | struct rds_iw_send_work *i_sends; | ||
120 | |||
121 | /* rx */ | ||
122 | struct mutex i_recv_mutex; | ||
123 | struct rds_iw_work_ring i_recv_ring; | ||
124 | struct rds_iw_incoming *i_iwinc; | ||
125 | u32 i_recv_data_rem; | ||
126 | struct rds_header *i_recv_hdrs; | ||
127 | u64 i_recv_hdrs_dma; | ||
128 | struct rds_iw_recv_work *i_recvs; | ||
129 | struct rds_page_frag i_frag; | ||
130 | u64 i_ack_recv; /* last ACK received */ | ||
131 | |||
132 | /* sending acks */ | ||
133 | unsigned long i_ack_flags; | ||
134 | u64 i_ack_next; /* next ACK to send */ | ||
135 | struct rds_header *i_ack; | ||
136 | struct ib_send_wr i_ack_wr; | ||
137 | struct ib_sge i_ack_sge; | ||
138 | u64 i_ack_dma; | ||
139 | unsigned long i_ack_queued; | ||
140 | |||
141 | /* Flow control related information | ||
142 | * | ||
143 | * Our algorithm uses a pair variables that we need to access | ||
144 | * atomically - one for the send credits, and one posted | ||
145 | * recv credits we need to transfer to remote. | ||
146 | * Rather than protect them using a slow spinlock, we put both into | ||
147 | * a single atomic_t and update it using cmpxchg | ||
148 | */ | ||
149 | atomic_t i_credits; | ||
150 | |||
151 | /* Protocol version specific information */ | ||
152 | unsigned int i_flowctl:1; /* enable/disable flow ctl */ | ||
153 | unsigned int i_dma_local_lkey:1; | ||
154 | unsigned int i_fastreg_posted:1; /* fastreg posted on this connection */ | ||
155 | /* Batched completions */ | ||
156 | unsigned int i_unsignaled_wrs; | ||
157 | long i_unsignaled_bytes; | ||
158 | }; | ||
159 | |||
160 | /* This assumes that atomic_t is at least 32 bits */ | ||
161 | #define IB_GET_SEND_CREDITS(v) ((v) & 0xffff) | ||
162 | #define IB_GET_POST_CREDITS(v) ((v) >> 16) | ||
163 | #define IB_SET_SEND_CREDITS(v) ((v) & 0xffff) | ||
164 | #define IB_SET_POST_CREDITS(v) ((v) << 16) | ||
165 | |||
166 | struct rds_iw_cm_id { | ||
167 | struct list_head list; | ||
168 | struct rdma_cm_id *cm_id; | ||
169 | }; | ||
170 | |||
171 | struct rds_iw_device { | ||
172 | struct list_head list; | ||
173 | struct list_head cm_id_list; | ||
174 | struct list_head conn_list; | ||
175 | struct ib_device *dev; | ||
176 | struct ib_pd *pd; | ||
177 | struct ib_mr *mr; | ||
178 | struct rds_iw_mr_pool *mr_pool; | ||
179 | int page_shift; | ||
180 | int max_sge; | ||
181 | unsigned int max_wrs; | ||
182 | unsigned int dma_local_lkey:1; | ||
183 | spinlock_t spinlock; /* protect the above */ | ||
184 | }; | ||
185 | |||
186 | /* bits for i_ack_flags */ | ||
187 | #define IB_ACK_IN_FLIGHT 0 | ||
188 | #define IB_ACK_REQUESTED 1 | ||
189 | |||
190 | /* Magic WR_ID for ACKs */ | ||
191 | #define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL) | ||
192 | #define RDS_IW_FAST_REG_WR_ID ((u64)0xefefefefefefefefULL) | ||
193 | #define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL) | ||
194 | |||
195 | struct rds_iw_statistics { | ||
196 | uint64_t s_iw_connect_raced; | ||
197 | uint64_t s_iw_listen_closed_stale; | ||
198 | uint64_t s_iw_tx_cq_call; | ||
199 | uint64_t s_iw_tx_cq_event; | ||
200 | uint64_t s_iw_tx_ring_full; | ||
201 | uint64_t s_iw_tx_throttle; | ||
202 | uint64_t s_iw_tx_sg_mapping_failure; | ||
203 | uint64_t s_iw_tx_stalled; | ||
204 | uint64_t s_iw_tx_credit_updates; | ||
205 | uint64_t s_iw_rx_cq_call; | ||
206 | uint64_t s_iw_rx_cq_event; | ||
207 | uint64_t s_iw_rx_ring_empty; | ||
208 | uint64_t s_iw_rx_refill_from_cq; | ||
209 | uint64_t s_iw_rx_refill_from_thread; | ||
210 | uint64_t s_iw_rx_alloc_limit; | ||
211 | uint64_t s_iw_rx_credit_updates; | ||
212 | uint64_t s_iw_ack_sent; | ||
213 | uint64_t s_iw_ack_send_failure; | ||
214 | uint64_t s_iw_ack_send_delayed; | ||
215 | uint64_t s_iw_ack_send_piggybacked; | ||
216 | uint64_t s_iw_ack_received; | ||
217 | uint64_t s_iw_rdma_mr_alloc; | ||
218 | uint64_t s_iw_rdma_mr_free; | ||
219 | uint64_t s_iw_rdma_mr_used; | ||
220 | uint64_t s_iw_rdma_mr_pool_flush; | ||
221 | uint64_t s_iw_rdma_mr_pool_wait; | ||
222 | uint64_t s_iw_rdma_mr_pool_depleted; | ||
223 | }; | ||
224 | |||
225 | extern struct workqueue_struct *rds_iw_wq; | ||
226 | |||
227 | /* | ||
228 | * Fake ib_dma_sync_sg_for_{cpu,device} as long as ib_verbs.h | ||
229 | * doesn't define it. | ||
230 | */ | ||
231 | static inline void rds_iw_dma_sync_sg_for_cpu(struct ib_device *dev, | ||
232 | struct scatterlist *sg, unsigned int sg_dma_len, int direction) | ||
233 | { | ||
234 | unsigned int i; | ||
235 | |||
236 | for (i = 0; i < sg_dma_len; ++i) { | ||
237 | ib_dma_sync_single_for_cpu(dev, | ||
238 | ib_sg_dma_address(dev, &sg[i]), | ||
239 | ib_sg_dma_len(dev, &sg[i]), | ||
240 | direction); | ||
241 | } | ||
242 | } | ||
243 | #define ib_dma_sync_sg_for_cpu rds_iw_dma_sync_sg_for_cpu | ||
244 | |||
245 | static inline void rds_iw_dma_sync_sg_for_device(struct ib_device *dev, | ||
246 | struct scatterlist *sg, unsigned int sg_dma_len, int direction) | ||
247 | { | ||
248 | unsigned int i; | ||
249 | |||
250 | for (i = 0; i < sg_dma_len; ++i) { | ||
251 | ib_dma_sync_single_for_device(dev, | ||
252 | ib_sg_dma_address(dev, &sg[i]), | ||
253 | ib_sg_dma_len(dev, &sg[i]), | ||
254 | direction); | ||
255 | } | ||
256 | } | ||
257 | #define ib_dma_sync_sg_for_device rds_iw_dma_sync_sg_for_device | ||
258 | |||
259 | static inline u32 rds_iw_local_dma_lkey(struct rds_iw_connection *ic) | ||
260 | { | ||
261 | return ic->i_dma_local_lkey ? ic->i_cm_id->device->local_dma_lkey : ic->i_mr->lkey; | ||
262 | } | ||
263 | |||
264 | /* ib.c */ | ||
265 | extern struct rds_transport rds_iw_transport; | ||
266 | extern void rds_iw_add_one(struct ib_device *device); | ||
267 | extern void rds_iw_remove_one(struct ib_device *device); | ||
268 | extern struct ib_client rds_iw_client; | ||
269 | |||
270 | extern unsigned int fastreg_pool_size; | ||
271 | extern unsigned int fastreg_message_size; | ||
272 | |||
273 | extern spinlock_t iw_nodev_conns_lock; | ||
274 | extern struct list_head iw_nodev_conns; | ||
275 | |||
276 | /* ib_cm.c */ | ||
277 | int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp); | ||
278 | void rds_iw_conn_free(void *arg); | ||
279 | int rds_iw_conn_connect(struct rds_connection *conn); | ||
280 | void rds_iw_conn_shutdown(struct rds_connection *conn); | ||
281 | void rds_iw_state_change(struct sock *sk); | ||
282 | int __init rds_iw_listen_init(void); | ||
283 | void rds_iw_listen_stop(void); | ||
284 | void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...); | ||
285 | int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, | ||
286 | struct rdma_cm_event *event); | ||
287 | int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id); | ||
288 | void rds_iw_cm_connect_complete(struct rds_connection *conn, | ||
289 | struct rdma_cm_event *event); | ||
290 | |||
291 | |||
292 | #define rds_iw_conn_error(conn, fmt...) \ | ||
293 | __rds_iw_conn_error(conn, KERN_WARNING "RDS/IW: " fmt) | ||
294 | |||
295 | /* ib_rdma.c */ | ||
296 | int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); | ||
297 | int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); | ||
298 | void rds_iw_remove_nodev_conns(void); | ||
299 | void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev); | ||
300 | struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *); | ||
301 | void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo); | ||
302 | void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *); | ||
303 | void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents, | ||
304 | struct rds_sock *rs, u32 *key_ret); | ||
305 | void rds_iw_sync_mr(void *trans_private, int dir); | ||
306 | void rds_iw_free_mr(void *trans_private, int invalidate); | ||
307 | void rds_iw_flush_mrs(void); | ||
308 | void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); | ||
309 | |||
310 | /* ib_recv.c */ | ||
311 | int __init rds_iw_recv_init(void); | ||
312 | void rds_iw_recv_exit(void); | ||
313 | int rds_iw_recv(struct rds_connection *conn); | ||
314 | int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, | ||
315 | gfp_t page_gfp, int prefill); | ||
316 | void rds_iw_inc_purge(struct rds_incoming *inc); | ||
317 | void rds_iw_inc_free(struct rds_incoming *inc); | ||
318 | int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, | ||
319 | size_t size); | ||
320 | void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context); | ||
321 | void rds_iw_recv_init_ring(struct rds_iw_connection *ic); | ||
322 | void rds_iw_recv_clear_ring(struct rds_iw_connection *ic); | ||
323 | void rds_iw_recv_init_ack(struct rds_iw_connection *ic); | ||
324 | void rds_iw_attempt_ack(struct rds_iw_connection *ic); | ||
325 | void rds_iw_ack_send_complete(struct rds_iw_connection *ic); | ||
326 | u64 rds_iw_piggyb_ack(struct rds_iw_connection *ic); | ||
327 | |||
328 | /* ib_ring.c */ | ||
329 | void rds_iw_ring_init(struct rds_iw_work_ring *ring, u32 nr); | ||
330 | void rds_iw_ring_resize(struct rds_iw_work_ring *ring, u32 nr); | ||
331 | u32 rds_iw_ring_alloc(struct rds_iw_work_ring *ring, u32 val, u32 *pos); | ||
332 | void rds_iw_ring_free(struct rds_iw_work_ring *ring, u32 val); | ||
333 | void rds_iw_ring_unalloc(struct rds_iw_work_ring *ring, u32 val); | ||
334 | int rds_iw_ring_empty(struct rds_iw_work_ring *ring); | ||
335 | int rds_iw_ring_low(struct rds_iw_work_ring *ring); | ||
336 | u32 rds_iw_ring_oldest(struct rds_iw_work_ring *ring); | ||
337 | u32 rds_iw_ring_completed(struct rds_iw_work_ring *ring, u32 wr_id, u32 oldest); | ||
338 | extern wait_queue_head_t rds_iw_ring_empty_wait; | ||
339 | |||
340 | /* ib_send.c */ | ||
341 | void rds_iw_xmit_complete(struct rds_connection *conn); | ||
342 | int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, | ||
343 | unsigned int hdr_off, unsigned int sg, unsigned int off); | ||
344 | void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context); | ||
345 | void rds_iw_send_init_ring(struct rds_iw_connection *ic); | ||
346 | void rds_iw_send_clear_ring(struct rds_iw_connection *ic); | ||
347 | int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); | ||
348 | void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits); | ||
349 | void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted); | ||
350 | int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted, | ||
351 | u32 *adv_credits, int need_posted); | ||
352 | |||
353 | /* ib_stats.c */ | ||
354 | DECLARE_PER_CPU(struct rds_iw_statistics, rds_iw_stats); | ||
355 | #define rds_iw_stats_inc(member) rds_stats_inc_which(rds_iw_stats, member) | ||
356 | unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter, | ||
357 | unsigned int avail); | ||
358 | |||
359 | /* ib_sysctl.c */ | ||
360 | int __init rds_iw_sysctl_init(void); | ||
361 | void rds_iw_sysctl_exit(void); | ||
362 | extern unsigned long rds_iw_sysctl_max_send_wr; | ||
363 | extern unsigned long rds_iw_sysctl_max_recv_wr; | ||
364 | extern unsigned long rds_iw_sysctl_max_unsig_wrs; | ||
365 | extern unsigned long rds_iw_sysctl_max_unsig_bytes; | ||
366 | extern unsigned long rds_iw_sysctl_max_recv_allocation; | ||
367 | extern unsigned int rds_iw_sysctl_flow_control; | ||
368 | extern ctl_table rds_iw_sysctl_table[]; | ||
369 | |||
370 | /* | ||
371 | * Helper functions for getting/setting the header and data SGEs in | ||
372 | * RDS packets (not RDMA) | ||
373 | */ | ||
374 | static inline struct ib_sge * | ||
375 | rds_iw_header_sge(struct rds_iw_connection *ic, struct ib_sge *sge) | ||
376 | { | ||
377 | return &sge[0]; | ||
378 | } | ||
379 | |||
380 | static inline struct ib_sge * | ||
381 | rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge) | ||
382 | { | ||
383 | return &sge[1]; | ||
384 | } | ||
385 | |||
386 | static inline void rds_iw_set_64bit(u64 *ptr, u64 val) | ||
387 | { | ||
388 | #if BITS_PER_LONG == 64 | ||
389 | *ptr = val; | ||
390 | #else | ||
391 | set_64bit(ptr, val); | ||
392 | #endif | ||
393 | } | ||
394 | |||
395 | #endif | ||
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c new file mode 100644 index 000000000000..57ecb3d4b8a5 --- /dev/null +++ b/net/rds/iw_cm.c | |||
@@ -0,0 +1,750 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/in.h> | ||
35 | #include <linux/vmalloc.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | #include "iw.h" | ||
39 | |||
40 | /* | ||
41 | * Set the selected protocol version | ||
42 | */ | ||
43 | static void rds_iw_set_protocol(struct rds_connection *conn, unsigned int version) | ||
44 | { | ||
45 | conn->c_version = version; | ||
46 | } | ||
47 | |||
48 | /* | ||
49 | * Set up flow control | ||
50 | */ | ||
51 | static void rds_iw_set_flow_control(struct rds_connection *conn, u32 credits) | ||
52 | { | ||
53 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
54 | |||
55 | if (rds_iw_sysctl_flow_control && credits != 0) { | ||
56 | /* We're doing flow control */ | ||
57 | ic->i_flowctl = 1; | ||
58 | rds_iw_send_add_credits(conn, credits); | ||
59 | } else { | ||
60 | ic->i_flowctl = 0; | ||
61 | } | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * Connection established. | ||
66 | * We get here for both outgoing and incoming connection. | ||
67 | */ | ||
68 | void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) | ||
69 | { | ||
70 | const struct rds_iw_connect_private *dp = NULL; | ||
71 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
72 | struct rds_iw_device *rds_iwdev; | ||
73 | int err; | ||
74 | |||
75 | if (event->param.conn.private_data_len) { | ||
76 | dp = event->param.conn.private_data; | ||
77 | |||
78 | rds_iw_set_protocol(conn, | ||
79 | RDS_PROTOCOL(dp->dp_protocol_major, | ||
80 | dp->dp_protocol_minor)); | ||
81 | rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); | ||
82 | } | ||
83 | |||
84 | /* update ib_device with this local ipaddr & conn */ | ||
85 | rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); | ||
86 | err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id); | ||
87 | if (err) | ||
88 | printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err); | ||
89 | err = rds_iw_add_conn(rds_iwdev, conn); | ||
90 | if (err) | ||
91 | printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err); | ||
92 | |||
93 | /* If the peer gave us the last packet it saw, process this as if | ||
94 | * we had received a regular ACK. */ | ||
95 | if (dp && dp->dp_ack_seq) | ||
96 | rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); | ||
97 | |||
98 | printk(KERN_NOTICE "RDS/IW: connected to %pI4<->%pI4 version %u.%u%s\n", | ||
99 | &conn->c_laddr, &conn->c_faddr, | ||
100 | RDS_PROTOCOL_MAJOR(conn->c_version), | ||
101 | RDS_PROTOCOL_MINOR(conn->c_version), | ||
102 | ic->i_flowctl ? ", flow control" : ""); | ||
103 | |||
104 | rds_connect_complete(conn); | ||
105 | } | ||
106 | |||
107 | static void rds_iw_cm_fill_conn_param(struct rds_connection *conn, | ||
108 | struct rdma_conn_param *conn_param, | ||
109 | struct rds_iw_connect_private *dp, | ||
110 | u32 protocol_version) | ||
111 | { | ||
112 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
113 | |||
114 | memset(conn_param, 0, sizeof(struct rdma_conn_param)); | ||
115 | /* XXX tune these? */ | ||
116 | conn_param->responder_resources = 1; | ||
117 | conn_param->initiator_depth = 1; | ||
118 | |||
119 | if (dp) { | ||
120 | memset(dp, 0, sizeof(*dp)); | ||
121 | dp->dp_saddr = conn->c_laddr; | ||
122 | dp->dp_daddr = conn->c_faddr; | ||
123 | dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version); | ||
124 | dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version); | ||
125 | dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IW_SUPPORTED_PROTOCOLS); | ||
126 | dp->dp_ack_seq = rds_iw_piggyb_ack(ic); | ||
127 | |||
128 | /* Advertise flow control */ | ||
129 | if (ic->i_flowctl) { | ||
130 | unsigned int credits; | ||
131 | |||
132 | credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)); | ||
133 | dp->dp_credit = cpu_to_be32(credits); | ||
134 | atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits); | ||
135 | } | ||
136 | |||
137 | conn_param->private_data = dp; | ||
138 | conn_param->private_data_len = sizeof(*dp); | ||
139 | } | ||
140 | } | ||
141 | |||
142 | static void rds_iw_cq_event_handler(struct ib_event *event, void *data) | ||
143 | { | ||
144 | rdsdebug("event %u data %p\n", event->event, data); | ||
145 | } | ||
146 | |||
147 | static void rds_iw_qp_event_handler(struct ib_event *event, void *data) | ||
148 | { | ||
149 | struct rds_connection *conn = data; | ||
150 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
151 | |||
152 | rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event); | ||
153 | |||
154 | switch (event->event) { | ||
155 | case IB_EVENT_COMM_EST: | ||
156 | rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); | ||
157 | break; | ||
158 | case IB_EVENT_QP_REQ_ERR: | ||
159 | case IB_EVENT_QP_FATAL: | ||
160 | default: | ||
161 | rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %pI4->%pI4...reconnecting\n", | ||
162 | event->event, &conn->c_laddr, | ||
163 | &conn->c_faddr); | ||
164 | break; | ||
165 | } | ||
166 | } | ||
167 | |||
168 | /* | ||
169 | * Create a QP | ||
170 | */ | ||
171 | static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr, | ||
172 | struct rds_iw_device *rds_iwdev, | ||
173 | struct rds_iw_work_ring *send_ring, | ||
174 | void (*send_cq_handler)(struct ib_cq *, void *), | ||
175 | struct rds_iw_work_ring *recv_ring, | ||
176 | void (*recv_cq_handler)(struct ib_cq *, void *), | ||
177 | void *context) | ||
178 | { | ||
179 | struct ib_device *dev = rds_iwdev->dev; | ||
180 | unsigned int send_size, recv_size; | ||
181 | int ret; | ||
182 | |||
183 | /* The offset of 1 is to accomodate the additional ACK WR. */ | ||
184 | send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1); | ||
185 | recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1); | ||
186 | rds_iw_ring_resize(send_ring, send_size - 1); | ||
187 | rds_iw_ring_resize(recv_ring, recv_size - 1); | ||
188 | |||
189 | memset(attr, 0, sizeof(*attr)); | ||
190 | attr->event_handler = rds_iw_qp_event_handler; | ||
191 | attr->qp_context = context; | ||
192 | attr->cap.max_send_wr = send_size; | ||
193 | attr->cap.max_recv_wr = recv_size; | ||
194 | attr->cap.max_send_sge = rds_iwdev->max_sge; | ||
195 | attr->cap.max_recv_sge = RDS_IW_RECV_SGE; | ||
196 | attr->sq_sig_type = IB_SIGNAL_REQ_WR; | ||
197 | attr->qp_type = IB_QPT_RC; | ||
198 | |||
199 | attr->send_cq = ib_create_cq(dev, send_cq_handler, | ||
200 | rds_iw_cq_event_handler, | ||
201 | context, send_size, 0); | ||
202 | if (IS_ERR(attr->send_cq)) { | ||
203 | ret = PTR_ERR(attr->send_cq); | ||
204 | attr->send_cq = NULL; | ||
205 | rdsdebug("ib_create_cq send failed: %d\n", ret); | ||
206 | goto out; | ||
207 | } | ||
208 | |||
209 | attr->recv_cq = ib_create_cq(dev, recv_cq_handler, | ||
210 | rds_iw_cq_event_handler, | ||
211 | context, recv_size, 0); | ||
212 | if (IS_ERR(attr->recv_cq)) { | ||
213 | ret = PTR_ERR(attr->recv_cq); | ||
214 | attr->recv_cq = NULL; | ||
215 | rdsdebug("ib_create_cq send failed: %d\n", ret); | ||
216 | goto out; | ||
217 | } | ||
218 | |||
219 | ret = ib_req_notify_cq(attr->send_cq, IB_CQ_NEXT_COMP); | ||
220 | if (ret) { | ||
221 | rdsdebug("ib_req_notify_cq send failed: %d\n", ret); | ||
222 | goto out; | ||
223 | } | ||
224 | |||
225 | ret = ib_req_notify_cq(attr->recv_cq, IB_CQ_SOLICITED); | ||
226 | if (ret) { | ||
227 | rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); | ||
228 | goto out; | ||
229 | } | ||
230 | |||
231 | out: | ||
232 | if (ret) { | ||
233 | if (attr->send_cq) | ||
234 | ib_destroy_cq(attr->send_cq); | ||
235 | if (attr->recv_cq) | ||
236 | ib_destroy_cq(attr->recv_cq); | ||
237 | } | ||
238 | return ret; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * This needs to be very careful to not leave IS_ERR pointers around for | ||
243 | * cleanup to trip over. | ||
244 | */ | ||
245 | static int rds_iw_setup_qp(struct rds_connection *conn) | ||
246 | { | ||
247 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
248 | struct ib_device *dev = ic->i_cm_id->device; | ||
249 | struct ib_qp_init_attr attr; | ||
250 | struct rds_iw_device *rds_iwdev; | ||
251 | int ret; | ||
252 | |||
253 | /* rds_iw_add_one creates a rds_iw_device object per IB device, | ||
254 | * and allocates a protection domain, memory range and MR pool | ||
255 | * for each. If that fails for any reason, it will not register | ||
256 | * the rds_iwdev at all. | ||
257 | */ | ||
258 | rds_iwdev = ib_get_client_data(dev, &rds_iw_client); | ||
259 | if (rds_iwdev == NULL) { | ||
260 | if (printk_ratelimit()) | ||
261 | printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n", | ||
262 | dev->name); | ||
263 | return -EOPNOTSUPP; | ||
264 | } | ||
265 | |||
266 | /* Protection domain and memory range */ | ||
267 | ic->i_pd = rds_iwdev->pd; | ||
268 | ic->i_mr = rds_iwdev->mr; | ||
269 | |||
270 | ret = rds_iw_init_qp_attrs(&attr, rds_iwdev, | ||
271 | &ic->i_send_ring, rds_iw_send_cq_comp_handler, | ||
272 | &ic->i_recv_ring, rds_iw_recv_cq_comp_handler, | ||
273 | conn); | ||
274 | if (ret < 0) | ||
275 | goto out; | ||
276 | |||
277 | ic->i_send_cq = attr.send_cq; | ||
278 | ic->i_recv_cq = attr.recv_cq; | ||
279 | |||
280 | /* | ||
281 | * XXX this can fail if max_*_wr is too large? Are we supposed | ||
282 | * to back off until we get a value that the hardware can support? | ||
283 | */ | ||
284 | ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); | ||
285 | if (ret) { | ||
286 | rdsdebug("rdma_create_qp failed: %d\n", ret); | ||
287 | goto out; | ||
288 | } | ||
289 | |||
290 | ic->i_send_hdrs = ib_dma_alloc_coherent(dev, | ||
291 | ic->i_send_ring.w_nr * | ||
292 | sizeof(struct rds_header), | ||
293 | &ic->i_send_hdrs_dma, GFP_KERNEL); | ||
294 | if (ic->i_send_hdrs == NULL) { | ||
295 | ret = -ENOMEM; | ||
296 | rdsdebug("ib_dma_alloc_coherent send failed\n"); | ||
297 | goto out; | ||
298 | } | ||
299 | |||
300 | ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, | ||
301 | ic->i_recv_ring.w_nr * | ||
302 | sizeof(struct rds_header), | ||
303 | &ic->i_recv_hdrs_dma, GFP_KERNEL); | ||
304 | if (ic->i_recv_hdrs == NULL) { | ||
305 | ret = -ENOMEM; | ||
306 | rdsdebug("ib_dma_alloc_coherent recv failed\n"); | ||
307 | goto out; | ||
308 | } | ||
309 | |||
310 | ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), | ||
311 | &ic->i_ack_dma, GFP_KERNEL); | ||
312 | if (ic->i_ack == NULL) { | ||
313 | ret = -ENOMEM; | ||
314 | rdsdebug("ib_dma_alloc_coherent ack failed\n"); | ||
315 | goto out; | ||
316 | } | ||
317 | |||
318 | ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work)); | ||
319 | if (ic->i_sends == NULL) { | ||
320 | ret = -ENOMEM; | ||
321 | rdsdebug("send allocation failed\n"); | ||
322 | goto out; | ||
323 | } | ||
324 | rds_iw_send_init_ring(ic); | ||
325 | |||
326 | ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work)); | ||
327 | if (ic->i_recvs == NULL) { | ||
328 | ret = -ENOMEM; | ||
329 | rdsdebug("recv allocation failed\n"); | ||
330 | goto out; | ||
331 | } | ||
332 | |||
333 | rds_iw_recv_init_ring(ic); | ||
334 | rds_iw_recv_init_ack(ic); | ||
335 | |||
336 | /* Post receive buffers - as a side effect, this will update | ||
337 | * the posted credit count. */ | ||
338 | rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); | ||
339 | |||
340 | rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr, | ||
341 | ic->i_send_cq, ic->i_recv_cq); | ||
342 | |||
343 | out: | ||
344 | return ret; | ||
345 | } | ||
346 | |||
347 | static u32 rds_iw_protocol_compatible(const struct rds_iw_connect_private *dp) | ||
348 | { | ||
349 | u16 common; | ||
350 | u32 version = 0; | ||
351 | |||
352 | /* rdma_cm private data is odd - when there is any private data in the | ||
353 | * request, we will be given a pretty large buffer without telling us the | ||
354 | * original size. The only way to tell the difference is by looking at | ||
355 | * the contents, which are initialized to zero. | ||
356 | * If the protocol version fields aren't set, this is a connection attempt | ||
357 | * from an older version. This could could be 3.0 or 2.0 - we can't tell. | ||
358 | * We really should have changed this for OFED 1.3 :-( */ | ||
359 | if (dp->dp_protocol_major == 0) | ||
360 | return RDS_PROTOCOL_3_0; | ||
361 | |||
362 | common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IW_SUPPORTED_PROTOCOLS; | ||
363 | if (dp->dp_protocol_major == 3 && common) { | ||
364 | version = RDS_PROTOCOL_3_0; | ||
365 | while ((common >>= 1) != 0) | ||
366 | version++; | ||
367 | } else if (printk_ratelimit()) { | ||
368 | printk(KERN_NOTICE "RDS: Connection from %pI4 using " | ||
369 | "incompatible protocol version %u.%u\n", | ||
370 | &dp->dp_saddr, | ||
371 | dp->dp_protocol_major, | ||
372 | dp->dp_protocol_minor); | ||
373 | } | ||
374 | return version; | ||
375 | } | ||
376 | |||
377 | int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, | ||
378 | struct rdma_cm_event *event) | ||
379 | { | ||
380 | const struct rds_iw_connect_private *dp = event->param.conn.private_data; | ||
381 | struct rds_iw_connect_private dp_rep; | ||
382 | struct rds_connection *conn = NULL; | ||
383 | struct rds_iw_connection *ic = NULL; | ||
384 | struct rdma_conn_param conn_param; | ||
385 | struct rds_iw_device *rds_iwdev; | ||
386 | u32 version; | ||
387 | int err, destroy = 1; | ||
388 | |||
389 | /* Check whether the remote protocol version matches ours. */ | ||
390 | version = rds_iw_protocol_compatible(dp); | ||
391 | if (!version) | ||
392 | goto out; | ||
393 | |||
394 | rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u\n", | ||
395 | &dp->dp_saddr, &dp->dp_daddr, | ||
396 | RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version)); | ||
397 | |||
398 | conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_iw_transport, | ||
399 | GFP_KERNEL); | ||
400 | if (IS_ERR(conn)) { | ||
401 | rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); | ||
402 | conn = NULL; | ||
403 | goto out; | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * The connection request may occur while the | ||
408 | * previous connection exist, e.g. in case of failover. | ||
409 | * But as connections may be initiated simultaneously | ||
410 | * by both hosts, we have a random backoff mechanism - | ||
411 | * see the comment above rds_queue_reconnect() | ||
412 | */ | ||
413 | mutex_lock(&conn->c_cm_lock); | ||
414 | if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { | ||
415 | if (rds_conn_state(conn) == RDS_CONN_UP) { | ||
416 | rdsdebug("incoming connect while connecting\n"); | ||
417 | rds_conn_drop(conn); | ||
418 | rds_iw_stats_inc(s_iw_listen_closed_stale); | ||
419 | } else | ||
420 | if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { | ||
421 | /* Wait and see - our connect may still be succeeding */ | ||
422 | rds_iw_stats_inc(s_iw_connect_raced); | ||
423 | } | ||
424 | mutex_unlock(&conn->c_cm_lock); | ||
425 | goto out; | ||
426 | } | ||
427 | |||
428 | ic = conn->c_transport_data; | ||
429 | |||
430 | rds_iw_set_protocol(conn, version); | ||
431 | rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); | ||
432 | |||
433 | /* If the peer gave us the last packet it saw, process this as if | ||
434 | * we had received a regular ACK. */ | ||
435 | if (dp->dp_ack_seq) | ||
436 | rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); | ||
437 | |||
438 | BUG_ON(cm_id->context); | ||
439 | BUG_ON(ic->i_cm_id); | ||
440 | |||
441 | ic->i_cm_id = cm_id; | ||
442 | cm_id->context = conn; | ||
443 | |||
444 | rds_iwdev = ib_get_client_data(cm_id->device, &rds_iw_client); | ||
445 | ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey; | ||
446 | |||
447 | /* We got halfway through setting up the ib_connection, if we | ||
448 | * fail now, we have to take the long route out of this mess. */ | ||
449 | destroy = 0; | ||
450 | |||
451 | err = rds_iw_setup_qp(conn); | ||
452 | if (err) { | ||
453 | rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err); | ||
454 | goto out; | ||
455 | } | ||
456 | |||
457 | rds_iw_cm_fill_conn_param(conn, &conn_param, &dp_rep, version); | ||
458 | |||
459 | /* rdma_accept() calls rdma_reject() internally if it fails */ | ||
460 | err = rdma_accept(cm_id, &conn_param); | ||
461 | mutex_unlock(&conn->c_cm_lock); | ||
462 | if (err) { | ||
463 | rds_iw_conn_error(conn, "rdma_accept failed (%d)\n", err); | ||
464 | goto out; | ||
465 | } | ||
466 | |||
467 | return 0; | ||
468 | |||
469 | out: | ||
470 | rdma_reject(cm_id, NULL, 0); | ||
471 | return destroy; | ||
472 | } | ||
473 | |||
474 | |||
475 | int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id) | ||
476 | { | ||
477 | struct rds_connection *conn = cm_id->context; | ||
478 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
479 | struct rdma_conn_param conn_param; | ||
480 | struct rds_iw_connect_private dp; | ||
481 | int ret; | ||
482 | |||
483 | /* If the peer doesn't do protocol negotiation, we must | ||
484 | * default to RDSv3.0 */ | ||
485 | rds_iw_set_protocol(conn, RDS_PROTOCOL_3_0); | ||
486 | ic->i_flowctl = rds_iw_sysctl_flow_control; /* advertise flow control */ | ||
487 | |||
488 | ret = rds_iw_setup_qp(conn); | ||
489 | if (ret) { | ||
490 | rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", ret); | ||
491 | goto out; | ||
492 | } | ||
493 | |||
494 | rds_iw_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION); | ||
495 | |||
496 | ret = rdma_connect(cm_id, &conn_param); | ||
497 | if (ret) | ||
498 | rds_iw_conn_error(conn, "rdma_connect failed (%d)\n", ret); | ||
499 | |||
500 | out: | ||
501 | /* Beware - returning non-zero tells the rdma_cm to destroy | ||
502 | * the cm_id. We should certainly not do it as long as we still | ||
503 | * "own" the cm_id. */ | ||
504 | if (ret) { | ||
505 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
506 | |||
507 | if (ic->i_cm_id == cm_id) | ||
508 | ret = 0; | ||
509 | } | ||
510 | return ret; | ||
511 | } | ||
512 | |||
513 | int rds_iw_conn_connect(struct rds_connection *conn) | ||
514 | { | ||
515 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
516 | struct rds_iw_device *rds_iwdev; | ||
517 | struct sockaddr_in src, dest; | ||
518 | int ret; | ||
519 | |||
520 | /* XXX I wonder what affect the port space has */ | ||
521 | /* delegate cm event handler to rdma_transport */ | ||
522 | ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, | ||
523 | RDMA_PS_TCP); | ||
524 | if (IS_ERR(ic->i_cm_id)) { | ||
525 | ret = PTR_ERR(ic->i_cm_id); | ||
526 | ic->i_cm_id = NULL; | ||
527 | rdsdebug("rdma_create_id() failed: %d\n", ret); | ||
528 | goto out; | ||
529 | } | ||
530 | |||
531 | rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn); | ||
532 | |||
533 | src.sin_family = AF_INET; | ||
534 | src.sin_addr.s_addr = (__force u32)conn->c_laddr; | ||
535 | src.sin_port = (__force u16)htons(0); | ||
536 | |||
537 | /* First, bind to the local address and device. */ | ||
538 | ret = rdma_bind_addr(ic->i_cm_id, (struct sockaddr *) &src); | ||
539 | if (ret) { | ||
540 | rdsdebug("rdma_bind_addr(%pI4) failed: %d\n", | ||
541 | &conn->c_laddr, ret); | ||
542 | rdma_destroy_id(ic->i_cm_id); | ||
543 | ic->i_cm_id = NULL; | ||
544 | goto out; | ||
545 | } | ||
546 | |||
547 | rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); | ||
548 | ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey; | ||
549 | |||
550 | dest.sin_family = AF_INET; | ||
551 | dest.sin_addr.s_addr = (__force u32)conn->c_faddr; | ||
552 | dest.sin_port = (__force u16)htons(RDS_PORT); | ||
553 | |||
554 | ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src, | ||
555 | (struct sockaddr *)&dest, | ||
556 | RDS_RDMA_RESOLVE_TIMEOUT_MS); | ||
557 | if (ret) { | ||
558 | rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id, | ||
559 | ret); | ||
560 | rdma_destroy_id(ic->i_cm_id); | ||
561 | ic->i_cm_id = NULL; | ||
562 | } | ||
563 | |||
564 | out: | ||
565 | return ret; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * This is so careful about only cleaning up resources that were built up | ||
570 | * so that it can be called at any point during startup. In fact it | ||
571 | * can be called multiple times for a given connection. | ||
572 | */ | ||
573 | void rds_iw_conn_shutdown(struct rds_connection *conn) | ||
574 | { | ||
575 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
576 | int err = 0; | ||
577 | struct ib_qp_attr qp_attr; | ||
578 | |||
579 | rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id, | ||
580 | ic->i_pd, ic->i_send_cq, ic->i_recv_cq, | ||
581 | ic->i_cm_id ? ic->i_cm_id->qp : NULL); | ||
582 | |||
583 | if (ic->i_cm_id) { | ||
584 | struct ib_device *dev = ic->i_cm_id->device; | ||
585 | |||
586 | rdsdebug("disconnecting cm %p\n", ic->i_cm_id); | ||
587 | err = rdma_disconnect(ic->i_cm_id); | ||
588 | if (err) { | ||
589 | /* Actually this may happen quite frequently, when | ||
590 | * an outgoing connect raced with an incoming connect. | ||
591 | */ | ||
592 | rdsdebug("rds_iw_conn_shutdown: failed to disconnect," | ||
593 | " cm: %p err %d\n", ic->i_cm_id, err); | ||
594 | } | ||
595 | |||
596 | if (ic->i_cm_id->qp) { | ||
597 | qp_attr.qp_state = IB_QPS_ERR; | ||
598 | ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE); | ||
599 | } | ||
600 | |||
601 | wait_event(rds_iw_ring_empty_wait, | ||
602 | rds_iw_ring_empty(&ic->i_send_ring) && | ||
603 | rds_iw_ring_empty(&ic->i_recv_ring)); | ||
604 | |||
605 | if (ic->i_send_hdrs) | ||
606 | ib_dma_free_coherent(dev, | ||
607 | ic->i_send_ring.w_nr * | ||
608 | sizeof(struct rds_header), | ||
609 | ic->i_send_hdrs, | ||
610 | ic->i_send_hdrs_dma); | ||
611 | |||
612 | if (ic->i_recv_hdrs) | ||
613 | ib_dma_free_coherent(dev, | ||
614 | ic->i_recv_ring.w_nr * | ||
615 | sizeof(struct rds_header), | ||
616 | ic->i_recv_hdrs, | ||
617 | ic->i_recv_hdrs_dma); | ||
618 | |||
619 | if (ic->i_ack) | ||
620 | ib_dma_free_coherent(dev, sizeof(struct rds_header), | ||
621 | ic->i_ack, ic->i_ack_dma); | ||
622 | |||
623 | if (ic->i_sends) | ||
624 | rds_iw_send_clear_ring(ic); | ||
625 | if (ic->i_recvs) | ||
626 | rds_iw_recv_clear_ring(ic); | ||
627 | |||
628 | if (ic->i_cm_id->qp) | ||
629 | rdma_destroy_qp(ic->i_cm_id); | ||
630 | if (ic->i_send_cq) | ||
631 | ib_destroy_cq(ic->i_send_cq); | ||
632 | if (ic->i_recv_cq) | ||
633 | ib_destroy_cq(ic->i_recv_cq); | ||
634 | |||
635 | /* | ||
636 | * If associated with an rds_iw_device: | ||
637 | * Move connection back to the nodev list. | ||
638 | * Remove cm_id from the device cm_id list. | ||
639 | */ | ||
640 | if (ic->rds_iwdev) { | ||
641 | |||
642 | spin_lock_irq(&ic->rds_iwdev->spinlock); | ||
643 | BUG_ON(list_empty(&ic->iw_node)); | ||
644 | list_del(&ic->iw_node); | ||
645 | spin_unlock_irq(&ic->rds_iwdev->spinlock); | ||
646 | |||
647 | spin_lock_irq(&iw_nodev_conns_lock); | ||
648 | list_add_tail(&ic->iw_node, &iw_nodev_conns); | ||
649 | spin_unlock_irq(&iw_nodev_conns_lock); | ||
650 | rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id); | ||
651 | ic->rds_iwdev = NULL; | ||
652 | } | ||
653 | |||
654 | rdma_destroy_id(ic->i_cm_id); | ||
655 | |||
656 | ic->i_cm_id = NULL; | ||
657 | ic->i_pd = NULL; | ||
658 | ic->i_mr = NULL; | ||
659 | ic->i_send_cq = NULL; | ||
660 | ic->i_recv_cq = NULL; | ||
661 | ic->i_send_hdrs = NULL; | ||
662 | ic->i_recv_hdrs = NULL; | ||
663 | ic->i_ack = NULL; | ||
664 | } | ||
665 | BUG_ON(ic->rds_iwdev); | ||
666 | |||
667 | /* Clear pending transmit */ | ||
668 | if (ic->i_rm) { | ||
669 | rds_message_put(ic->i_rm); | ||
670 | ic->i_rm = NULL; | ||
671 | } | ||
672 | |||
673 | /* Clear the ACK state */ | ||
674 | clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); | ||
675 | rds_iw_set_64bit(&ic->i_ack_next, 0); | ||
676 | ic->i_ack_recv = 0; | ||
677 | |||
678 | /* Clear flow control state */ | ||
679 | ic->i_flowctl = 0; | ||
680 | atomic_set(&ic->i_credits, 0); | ||
681 | |||
682 | rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr); | ||
683 | rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr); | ||
684 | |||
685 | if (ic->i_iwinc) { | ||
686 | rds_inc_put(&ic->i_iwinc->ii_inc); | ||
687 | ic->i_iwinc = NULL; | ||
688 | } | ||
689 | |||
690 | vfree(ic->i_sends); | ||
691 | ic->i_sends = NULL; | ||
692 | vfree(ic->i_recvs); | ||
693 | ic->i_recvs = NULL; | ||
694 | rdsdebug("shutdown complete\n"); | ||
695 | } | ||
696 | |||
697 | int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) | ||
698 | { | ||
699 | struct rds_iw_connection *ic; | ||
700 | unsigned long flags; | ||
701 | |||
702 | /* XXX too lazy? */ | ||
703 | ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL); | ||
704 | if (ic == NULL) | ||
705 | return -ENOMEM; | ||
706 | |||
707 | INIT_LIST_HEAD(&ic->iw_node); | ||
708 | mutex_init(&ic->i_recv_mutex); | ||
709 | |||
710 | /* | ||
711 | * rds_iw_conn_shutdown() waits for these to be emptied so they | ||
712 | * must be initialized before it can be called. | ||
713 | */ | ||
714 | rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr); | ||
715 | rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr); | ||
716 | |||
717 | ic->conn = conn; | ||
718 | conn->c_transport_data = ic; | ||
719 | |||
720 | spin_lock_irqsave(&iw_nodev_conns_lock, flags); | ||
721 | list_add_tail(&ic->iw_node, &iw_nodev_conns); | ||
722 | spin_unlock_irqrestore(&iw_nodev_conns_lock, flags); | ||
723 | |||
724 | |||
725 | rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data); | ||
726 | return 0; | ||
727 | } | ||
728 | |||
729 | void rds_iw_conn_free(void *arg) | ||
730 | { | ||
731 | struct rds_iw_connection *ic = arg; | ||
732 | rdsdebug("ic %p\n", ic); | ||
733 | list_del(&ic->iw_node); | ||
734 | kfree(ic); | ||
735 | } | ||
736 | |||
737 | /* | ||
738 | * An error occurred on the connection | ||
739 | */ | ||
740 | void | ||
741 | __rds_iw_conn_error(struct rds_connection *conn, const char *fmt, ...) | ||
742 | { | ||
743 | va_list ap; | ||
744 | |||
745 | rds_conn_drop(conn); | ||
746 | |||
747 | va_start(ap, fmt); | ||
748 | vprintk(fmt, ap); | ||
749 | va_end(ap); | ||
750 | } | ||
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c new file mode 100644 index 000000000000..1c02a8f952d0 --- /dev/null +++ b/net/rds/iw_rdma.c | |||
@@ -0,0 +1,888 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | |||
35 | #include "rds.h" | ||
36 | #include "rdma.h" | ||
37 | #include "iw.h" | ||
38 | |||
39 | |||
40 | /* | ||
41 | * This is stored as mr->r_trans_private. | ||
42 | */ | ||
43 | struct rds_iw_mr { | ||
44 | struct rds_iw_device *device; | ||
45 | struct rds_iw_mr_pool *pool; | ||
46 | struct rdma_cm_id *cm_id; | ||
47 | |||
48 | struct ib_mr *mr; | ||
49 | struct ib_fast_reg_page_list *page_list; | ||
50 | |||
51 | struct rds_iw_mapping mapping; | ||
52 | unsigned char remap_count; | ||
53 | }; | ||
54 | |||
55 | /* | ||
56 | * Our own little MR pool | ||
57 | */ | ||
58 | struct rds_iw_mr_pool { | ||
59 | struct rds_iw_device *device; /* back ptr to the device that owns us */ | ||
60 | |||
61 | struct mutex flush_lock; /* serialize fmr invalidate */ | ||
62 | struct work_struct flush_worker; /* flush worker */ | ||
63 | |||
64 | spinlock_t list_lock; /* protect variables below */ | ||
65 | atomic_t item_count; /* total # of MRs */ | ||
66 | atomic_t dirty_count; /* # dirty of MRs */ | ||
67 | struct list_head dirty_list; /* dirty mappings */ | ||
68 | struct list_head clean_list; /* unused & unamapped MRs */ | ||
69 | atomic_t free_pinned; /* memory pinned by free MRs */ | ||
70 | unsigned long max_message_size; /* in pages */ | ||
71 | unsigned long max_items; | ||
72 | unsigned long max_items_soft; | ||
73 | unsigned long max_free_pinned; | ||
74 | int max_pages; | ||
75 | }; | ||
76 | |||
77 | static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all); | ||
78 | static void rds_iw_mr_pool_flush_worker(struct work_struct *work); | ||
79 | static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); | ||
80 | static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, | ||
81 | struct rds_iw_mr *ibmr, | ||
82 | struct scatterlist *sg, unsigned int nents); | ||
83 | static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); | ||
84 | static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, | ||
85 | struct list_head *unmap_list, | ||
86 | struct list_head *kill_list); | ||
87 | static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); | ||
88 | |||
89 | static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id) | ||
90 | { | ||
91 | struct rds_iw_device *iwdev; | ||
92 | struct rds_iw_cm_id *i_cm_id; | ||
93 | |||
94 | *rds_iwdev = NULL; | ||
95 | *cm_id = NULL; | ||
96 | |||
97 | list_for_each_entry(iwdev, &rds_iw_devices, list) { | ||
98 | spin_lock_irq(&iwdev->spinlock); | ||
99 | list_for_each_entry(i_cm_id, &iwdev->cm_id_list, list) { | ||
100 | struct sockaddr_in *src_addr, *dst_addr; | ||
101 | |||
102 | src_addr = (struct sockaddr_in *)&i_cm_id->cm_id->route.addr.src_addr; | ||
103 | dst_addr = (struct sockaddr_in *)&i_cm_id->cm_id->route.addr.dst_addr; | ||
104 | |||
105 | rdsdebug("local ipaddr = %x port %d, " | ||
106 | "remote ipaddr = %x port %d" | ||
107 | "..looking for %x port %d, " | ||
108 | "remote ipaddr = %x port %d\n", | ||
109 | src_addr->sin_addr.s_addr, | ||
110 | src_addr->sin_port, | ||
111 | dst_addr->sin_addr.s_addr, | ||
112 | dst_addr->sin_port, | ||
113 | rs->rs_bound_addr, | ||
114 | rs->rs_bound_port, | ||
115 | rs->rs_conn_addr, | ||
116 | rs->rs_conn_port); | ||
117 | #ifdef WORKING_TUPLE_DETECTION | ||
118 | if (src_addr->sin_addr.s_addr == rs->rs_bound_addr && | ||
119 | src_addr->sin_port == rs->rs_bound_port && | ||
120 | dst_addr->sin_addr.s_addr == rs->rs_conn_addr && | ||
121 | dst_addr->sin_port == rs->rs_conn_port) { | ||
122 | #else | ||
123 | /* FIXME - needs to compare the local and remote | ||
124 | * ipaddr/port tuple, but the ipaddr is the only | ||
125 | * available infomation in the rds_sock (as the rest are | ||
126 | * zero'ed. It doesn't appear to be properly populated | ||
127 | * during connection setup... | ||
128 | */ | ||
129 | if (src_addr->sin_addr.s_addr == rs->rs_bound_addr) { | ||
130 | #endif | ||
131 | spin_unlock_irq(&iwdev->spinlock); | ||
132 | *rds_iwdev = iwdev; | ||
133 | *cm_id = i_cm_id->cm_id; | ||
134 | return 0; | ||
135 | } | ||
136 | } | ||
137 | spin_unlock_irq(&iwdev->spinlock); | ||
138 | } | ||
139 | |||
140 | return 1; | ||
141 | } | ||
142 | |||
143 | static int rds_iw_add_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id) | ||
144 | { | ||
145 | struct rds_iw_cm_id *i_cm_id; | ||
146 | |||
147 | i_cm_id = kmalloc(sizeof *i_cm_id, GFP_KERNEL); | ||
148 | if (!i_cm_id) | ||
149 | return -ENOMEM; | ||
150 | |||
151 | i_cm_id->cm_id = cm_id; | ||
152 | |||
153 | spin_lock_irq(&rds_iwdev->spinlock); | ||
154 | list_add_tail(&i_cm_id->list, &rds_iwdev->cm_id_list); | ||
155 | spin_unlock_irq(&rds_iwdev->spinlock); | ||
156 | |||
157 | return 0; | ||
158 | } | ||
159 | |||
160 | void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id) | ||
161 | { | ||
162 | struct rds_iw_cm_id *i_cm_id; | ||
163 | |||
164 | spin_lock_irq(&rds_iwdev->spinlock); | ||
165 | list_for_each_entry(i_cm_id, &rds_iwdev->cm_id_list, list) { | ||
166 | if (i_cm_id->cm_id == cm_id) { | ||
167 | list_del(&i_cm_id->list); | ||
168 | kfree(i_cm_id); | ||
169 | break; | ||
170 | } | ||
171 | } | ||
172 | spin_unlock_irq(&rds_iwdev->spinlock); | ||
173 | } | ||
174 | |||
175 | |||
176 | int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id) | ||
177 | { | ||
178 | struct sockaddr_in *src_addr, *dst_addr; | ||
179 | struct rds_iw_device *rds_iwdev_old; | ||
180 | struct rds_sock rs; | ||
181 | struct rdma_cm_id *pcm_id; | ||
182 | int rc; | ||
183 | |||
184 | src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr; | ||
185 | dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr; | ||
186 | |||
187 | rs.rs_bound_addr = src_addr->sin_addr.s_addr; | ||
188 | rs.rs_bound_port = src_addr->sin_port; | ||
189 | rs.rs_conn_addr = dst_addr->sin_addr.s_addr; | ||
190 | rs.rs_conn_port = dst_addr->sin_port; | ||
191 | |||
192 | rc = rds_iw_get_device(&rs, &rds_iwdev_old, &pcm_id); | ||
193 | if (rc) | ||
194 | rds_iw_remove_cm_id(rds_iwdev, cm_id); | ||
195 | |||
196 | return rds_iw_add_cm_id(rds_iwdev, cm_id); | ||
197 | } | ||
198 | |||
199 | int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) | ||
200 | { | ||
201 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
202 | |||
203 | /* conn was previously on the nodev_conns_list */ | ||
204 | spin_lock_irq(&iw_nodev_conns_lock); | ||
205 | BUG_ON(list_empty(&iw_nodev_conns)); | ||
206 | BUG_ON(list_empty(&ic->iw_node)); | ||
207 | list_del(&ic->iw_node); | ||
208 | spin_unlock_irq(&iw_nodev_conns_lock); | ||
209 | |||
210 | spin_lock_irq(&rds_iwdev->spinlock); | ||
211 | list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); | ||
212 | spin_unlock_irq(&rds_iwdev->spinlock); | ||
213 | |||
214 | ic->rds_iwdev = rds_iwdev; | ||
215 | |||
216 | return 0; | ||
217 | } | ||
218 | |||
219 | void rds_iw_remove_nodev_conns(void) | ||
220 | { | ||
221 | struct rds_iw_connection *ic, *_ic; | ||
222 | LIST_HEAD(tmp_list); | ||
223 | |||
224 | /* avoid calling conn_destroy with irqs off */ | ||
225 | spin_lock_irq(&iw_nodev_conns_lock); | ||
226 | list_splice(&iw_nodev_conns, &tmp_list); | ||
227 | INIT_LIST_HEAD(&iw_nodev_conns); | ||
228 | spin_unlock_irq(&iw_nodev_conns_lock); | ||
229 | |||
230 | list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { | ||
231 | if (ic->conn->c_passive) | ||
232 | rds_conn_destroy(ic->conn->c_passive); | ||
233 | rds_conn_destroy(ic->conn); | ||
234 | } | ||
235 | } | ||
236 | |||
237 | void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev) | ||
238 | { | ||
239 | struct rds_iw_connection *ic, *_ic; | ||
240 | LIST_HEAD(tmp_list); | ||
241 | |||
242 | /* avoid calling conn_destroy with irqs off */ | ||
243 | spin_lock_irq(&rds_iwdev->spinlock); | ||
244 | list_splice(&rds_iwdev->conn_list, &tmp_list); | ||
245 | INIT_LIST_HEAD(&rds_iwdev->conn_list); | ||
246 | spin_unlock_irq(&rds_iwdev->spinlock); | ||
247 | |||
248 | list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { | ||
249 | if (ic->conn->c_passive) | ||
250 | rds_conn_destroy(ic->conn->c_passive); | ||
251 | rds_conn_destroy(ic->conn); | ||
252 | } | ||
253 | } | ||
254 | |||
255 | static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg, | ||
256 | struct scatterlist *list, unsigned int sg_len) | ||
257 | { | ||
258 | sg->list = list; | ||
259 | sg->len = sg_len; | ||
260 | sg->dma_len = 0; | ||
261 | sg->dma_npages = 0; | ||
262 | sg->bytes = 0; | ||
263 | } | ||
264 | |||
265 | static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev, | ||
266 | struct rds_iw_scatterlist *sg, | ||
267 | unsigned int dma_page_shift) | ||
268 | { | ||
269 | struct ib_device *dev = rds_iwdev->dev; | ||
270 | u64 *dma_pages = NULL; | ||
271 | u64 dma_mask; | ||
272 | unsigned int dma_page_size; | ||
273 | int i, j, ret; | ||
274 | |||
275 | dma_page_size = 1 << dma_page_shift; | ||
276 | dma_mask = dma_page_size - 1; | ||
277 | |||
278 | WARN_ON(sg->dma_len); | ||
279 | |||
280 | sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL); | ||
281 | if (unlikely(!sg->dma_len)) { | ||
282 | printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n"); | ||
283 | return ERR_PTR(-EBUSY); | ||
284 | } | ||
285 | |||
286 | sg->bytes = 0; | ||
287 | sg->dma_npages = 0; | ||
288 | |||
289 | ret = -EINVAL; | ||
290 | for (i = 0; i < sg->dma_len; ++i) { | ||
291 | unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]); | ||
292 | u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]); | ||
293 | u64 end_addr; | ||
294 | |||
295 | sg->bytes += dma_len; | ||
296 | |||
297 | end_addr = dma_addr + dma_len; | ||
298 | if (dma_addr & dma_mask) { | ||
299 | if (i > 0) | ||
300 | goto out_unmap; | ||
301 | dma_addr &= ~dma_mask; | ||
302 | } | ||
303 | if (end_addr & dma_mask) { | ||
304 | if (i < sg->dma_len - 1) | ||
305 | goto out_unmap; | ||
306 | end_addr = (end_addr + dma_mask) & ~dma_mask; | ||
307 | } | ||
308 | |||
309 | sg->dma_npages += (end_addr - dma_addr) >> dma_page_shift; | ||
310 | } | ||
311 | |||
312 | /* Now gather the dma addrs into one list */ | ||
313 | if (sg->dma_npages > fastreg_message_size) | ||
314 | goto out_unmap; | ||
315 | |||
316 | dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC); | ||
317 | if (!dma_pages) { | ||
318 | ret = -ENOMEM; | ||
319 | goto out_unmap; | ||
320 | } | ||
321 | |||
322 | for (i = j = 0; i < sg->dma_len; ++i) { | ||
323 | unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]); | ||
324 | u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]); | ||
325 | u64 end_addr; | ||
326 | |||
327 | end_addr = dma_addr + dma_len; | ||
328 | dma_addr &= ~dma_mask; | ||
329 | for (; dma_addr < end_addr; dma_addr += dma_page_size) | ||
330 | dma_pages[j++] = dma_addr; | ||
331 | BUG_ON(j > sg->dma_npages); | ||
332 | } | ||
333 | |||
334 | return dma_pages; | ||
335 | |||
336 | out_unmap: | ||
337 | ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL); | ||
338 | sg->dma_len = 0; | ||
339 | kfree(dma_pages); | ||
340 | return ERR_PTR(ret); | ||
341 | } | ||
342 | |||
343 | |||
344 | struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *rds_iwdev) | ||
345 | { | ||
346 | struct rds_iw_mr_pool *pool; | ||
347 | |||
348 | pool = kzalloc(sizeof(*pool), GFP_KERNEL); | ||
349 | if (!pool) { | ||
350 | printk(KERN_WARNING "RDS/IW: rds_iw_create_mr_pool alloc error\n"); | ||
351 | return ERR_PTR(-ENOMEM); | ||
352 | } | ||
353 | |||
354 | pool->device = rds_iwdev; | ||
355 | INIT_LIST_HEAD(&pool->dirty_list); | ||
356 | INIT_LIST_HEAD(&pool->clean_list); | ||
357 | mutex_init(&pool->flush_lock); | ||
358 | spin_lock_init(&pool->list_lock); | ||
359 | INIT_WORK(&pool->flush_worker, rds_iw_mr_pool_flush_worker); | ||
360 | |||
361 | pool->max_message_size = fastreg_message_size; | ||
362 | pool->max_items = fastreg_pool_size; | ||
363 | pool->max_free_pinned = pool->max_items * pool->max_message_size / 4; | ||
364 | pool->max_pages = fastreg_message_size; | ||
365 | |||
366 | /* We never allow more than max_items MRs to be allocated. | ||
367 | * When we exceed more than max_items_soft, we start freeing | ||
368 | * items more aggressively. | ||
369 | * Make sure that max_items > max_items_soft > max_items / 2 | ||
370 | */ | ||
371 | pool->max_items_soft = pool->max_items * 3 / 4; | ||
372 | |||
373 | return pool; | ||
374 | } | ||
375 | |||
376 | void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo) | ||
377 | { | ||
378 | struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool; | ||
379 | |||
380 | iinfo->rdma_mr_max = pool->max_items; | ||
381 | iinfo->rdma_mr_size = pool->max_pages; | ||
382 | } | ||
383 | |||
384 | void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *pool) | ||
385 | { | ||
386 | flush_workqueue(rds_wq); | ||
387 | rds_iw_flush_mr_pool(pool, 1); | ||
388 | BUG_ON(atomic_read(&pool->item_count)); | ||
389 | BUG_ON(atomic_read(&pool->free_pinned)); | ||
390 | kfree(pool); | ||
391 | } | ||
392 | |||
393 | static inline struct rds_iw_mr *rds_iw_reuse_fmr(struct rds_iw_mr_pool *pool) | ||
394 | { | ||
395 | struct rds_iw_mr *ibmr = NULL; | ||
396 | unsigned long flags; | ||
397 | |||
398 | spin_lock_irqsave(&pool->list_lock, flags); | ||
399 | if (!list_empty(&pool->clean_list)) { | ||
400 | ibmr = list_entry(pool->clean_list.next, struct rds_iw_mr, mapping.m_list); | ||
401 | list_del_init(&ibmr->mapping.m_list); | ||
402 | } | ||
403 | spin_unlock_irqrestore(&pool->list_lock, flags); | ||
404 | |||
405 | return ibmr; | ||
406 | } | ||
407 | |||
408 | static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev) | ||
409 | { | ||
410 | struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool; | ||
411 | struct rds_iw_mr *ibmr = NULL; | ||
412 | int err = 0, iter = 0; | ||
413 | |||
414 | while (1) { | ||
415 | ibmr = rds_iw_reuse_fmr(pool); | ||
416 | if (ibmr) | ||
417 | return ibmr; | ||
418 | |||
419 | /* No clean MRs - now we have the choice of either | ||
420 | * allocating a fresh MR up to the limit imposed by the | ||
421 | * driver, or flush any dirty unused MRs. | ||
422 | * We try to avoid stalling in the send path if possible, | ||
423 | * so we allocate as long as we're allowed to. | ||
424 | * | ||
425 | * We're fussy with enforcing the FMR limit, though. If the driver | ||
426 | * tells us we can't use more than N fmrs, we shouldn't start | ||
427 | * arguing with it */ | ||
428 | if (atomic_inc_return(&pool->item_count) <= pool->max_items) | ||
429 | break; | ||
430 | |||
431 | atomic_dec(&pool->item_count); | ||
432 | |||
433 | if (++iter > 2) { | ||
434 | rds_iw_stats_inc(s_iw_rdma_mr_pool_depleted); | ||
435 | return ERR_PTR(-EAGAIN); | ||
436 | } | ||
437 | |||
438 | /* We do have some empty MRs. Flush them out. */ | ||
439 | rds_iw_stats_inc(s_iw_rdma_mr_pool_wait); | ||
440 | rds_iw_flush_mr_pool(pool, 0); | ||
441 | } | ||
442 | |||
443 | ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL); | ||
444 | if (!ibmr) { | ||
445 | err = -ENOMEM; | ||
446 | goto out_no_cigar; | ||
447 | } | ||
448 | |||
449 | spin_lock_init(&ibmr->mapping.m_lock); | ||
450 | INIT_LIST_HEAD(&ibmr->mapping.m_list); | ||
451 | ibmr->mapping.m_mr = ibmr; | ||
452 | |||
453 | err = rds_iw_init_fastreg(pool, ibmr); | ||
454 | if (err) | ||
455 | goto out_no_cigar; | ||
456 | |||
457 | rds_iw_stats_inc(s_iw_rdma_mr_alloc); | ||
458 | return ibmr; | ||
459 | |||
460 | out_no_cigar: | ||
461 | if (ibmr) { | ||
462 | rds_iw_destroy_fastreg(pool, ibmr); | ||
463 | kfree(ibmr); | ||
464 | } | ||
465 | atomic_dec(&pool->item_count); | ||
466 | return ERR_PTR(err); | ||
467 | } | ||
468 | |||
469 | void rds_iw_sync_mr(void *trans_private, int direction) | ||
470 | { | ||
471 | struct rds_iw_mr *ibmr = trans_private; | ||
472 | struct rds_iw_device *rds_iwdev = ibmr->device; | ||
473 | |||
474 | switch (direction) { | ||
475 | case DMA_FROM_DEVICE: | ||
476 | ib_dma_sync_sg_for_cpu(rds_iwdev->dev, ibmr->mapping.m_sg.list, | ||
477 | ibmr->mapping.m_sg.dma_len, DMA_BIDIRECTIONAL); | ||
478 | break; | ||
479 | case DMA_TO_DEVICE: | ||
480 | ib_dma_sync_sg_for_device(rds_iwdev->dev, ibmr->mapping.m_sg.list, | ||
481 | ibmr->mapping.m_sg.dma_len, DMA_BIDIRECTIONAL); | ||
482 | break; | ||
483 | } | ||
484 | } | ||
485 | |||
486 | static inline unsigned int rds_iw_flush_goal(struct rds_iw_mr_pool *pool, int free_all) | ||
487 | { | ||
488 | unsigned int item_count; | ||
489 | |||
490 | item_count = atomic_read(&pool->item_count); | ||
491 | if (free_all) | ||
492 | return item_count; | ||
493 | |||
494 | return 0; | ||
495 | } | ||
496 | |||
497 | /* | ||
498 | * Flush our pool of MRs. | ||
499 | * At a minimum, all currently unused MRs are unmapped. | ||
500 | * If the number of MRs allocated exceeds the limit, we also try | ||
501 | * to free as many MRs as needed to get back to this limit. | ||
502 | */ | ||
503 | static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) | ||
504 | { | ||
505 | struct rds_iw_mr *ibmr, *next; | ||
506 | LIST_HEAD(unmap_list); | ||
507 | LIST_HEAD(kill_list); | ||
508 | unsigned long flags; | ||
509 | unsigned int nfreed = 0, ncleaned = 0, free_goal; | ||
510 | int ret = 0; | ||
511 | |||
512 | rds_iw_stats_inc(s_iw_rdma_mr_pool_flush); | ||
513 | |||
514 | mutex_lock(&pool->flush_lock); | ||
515 | |||
516 | spin_lock_irqsave(&pool->list_lock, flags); | ||
517 | /* Get the list of all mappings to be destroyed */ | ||
518 | list_splice_init(&pool->dirty_list, &unmap_list); | ||
519 | if (free_all) | ||
520 | list_splice_init(&pool->clean_list, &kill_list); | ||
521 | spin_unlock_irqrestore(&pool->list_lock, flags); | ||
522 | |||
523 | free_goal = rds_iw_flush_goal(pool, free_all); | ||
524 | |||
525 | /* Batched invalidate of dirty MRs. | ||
526 | * For FMR based MRs, the mappings on the unmap list are | ||
527 | * actually members of an ibmr (ibmr->mapping). They either | ||
528 | * migrate to the kill_list, or have been cleaned and should be | ||
529 | * moved to the clean_list. | ||
530 | * For fastregs, they will be dynamically allocated, and | ||
531 | * will be destroyed by the unmap function. | ||
532 | */ | ||
533 | if (!list_empty(&unmap_list)) { | ||
534 | ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, &kill_list); | ||
535 | /* If we've been asked to destroy all MRs, move those | ||
536 | * that were simply cleaned to the kill list */ | ||
537 | if (free_all) | ||
538 | list_splice_init(&unmap_list, &kill_list); | ||
539 | } | ||
540 | |||
541 | /* Destroy any MRs that are past their best before date */ | ||
542 | list_for_each_entry_safe(ibmr, next, &kill_list, mapping.m_list) { | ||
543 | rds_iw_stats_inc(s_iw_rdma_mr_free); | ||
544 | list_del(&ibmr->mapping.m_list); | ||
545 | rds_iw_destroy_fastreg(pool, ibmr); | ||
546 | kfree(ibmr); | ||
547 | nfreed++; | ||
548 | } | ||
549 | |||
550 | /* Anything that remains are laundered ibmrs, which we can add | ||
551 | * back to the clean list. */ | ||
552 | if (!list_empty(&unmap_list)) { | ||
553 | spin_lock_irqsave(&pool->list_lock, flags); | ||
554 | list_splice(&unmap_list, &pool->clean_list); | ||
555 | spin_unlock_irqrestore(&pool->list_lock, flags); | ||
556 | } | ||
557 | |||
558 | atomic_sub(ncleaned, &pool->dirty_count); | ||
559 | atomic_sub(nfreed, &pool->item_count); | ||
560 | |||
561 | mutex_unlock(&pool->flush_lock); | ||
562 | return ret; | ||
563 | } | ||
564 | |||
565 | static void rds_iw_mr_pool_flush_worker(struct work_struct *work) | ||
566 | { | ||
567 | struct rds_iw_mr_pool *pool = container_of(work, struct rds_iw_mr_pool, flush_worker); | ||
568 | |||
569 | rds_iw_flush_mr_pool(pool, 0); | ||
570 | } | ||
571 | |||
572 | void rds_iw_free_mr(void *trans_private, int invalidate) | ||
573 | { | ||
574 | struct rds_iw_mr *ibmr = trans_private; | ||
575 | struct rds_iw_mr_pool *pool = ibmr->device->mr_pool; | ||
576 | |||
577 | rdsdebug("RDS/IW: free_mr nents %u\n", ibmr->mapping.m_sg.len); | ||
578 | if (!pool) | ||
579 | return; | ||
580 | |||
581 | /* Return it to the pool's free list */ | ||
582 | rds_iw_free_fastreg(pool, ibmr); | ||
583 | |||
584 | /* If we've pinned too many pages, request a flush */ | ||
585 | if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned | ||
586 | || atomic_read(&pool->dirty_count) >= pool->max_items / 10) | ||
587 | queue_work(rds_wq, &pool->flush_worker); | ||
588 | |||
589 | if (invalidate) { | ||
590 | if (likely(!in_interrupt())) { | ||
591 | rds_iw_flush_mr_pool(pool, 0); | ||
592 | } else { | ||
593 | /* We get here if the user created a MR marked | ||
594 | * as use_once and invalidate at the same time. */ | ||
595 | queue_work(rds_wq, &pool->flush_worker); | ||
596 | } | ||
597 | } | ||
598 | } | ||
599 | |||
600 | void rds_iw_flush_mrs(void) | ||
601 | { | ||
602 | struct rds_iw_device *rds_iwdev; | ||
603 | |||
604 | list_for_each_entry(rds_iwdev, &rds_iw_devices, list) { | ||
605 | struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool; | ||
606 | |||
607 | if (pool) | ||
608 | rds_iw_flush_mr_pool(pool, 0); | ||
609 | } | ||
610 | } | ||
611 | |||
612 | void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents, | ||
613 | struct rds_sock *rs, u32 *key_ret) | ||
614 | { | ||
615 | struct rds_iw_device *rds_iwdev; | ||
616 | struct rds_iw_mr *ibmr = NULL; | ||
617 | struct rdma_cm_id *cm_id; | ||
618 | int ret; | ||
619 | |||
620 | ret = rds_iw_get_device(rs, &rds_iwdev, &cm_id); | ||
621 | if (ret || !cm_id) { | ||
622 | ret = -ENODEV; | ||
623 | goto out; | ||
624 | } | ||
625 | |||
626 | if (!rds_iwdev->mr_pool) { | ||
627 | ret = -ENODEV; | ||
628 | goto out; | ||
629 | } | ||
630 | |||
631 | ibmr = rds_iw_alloc_mr(rds_iwdev); | ||
632 | if (IS_ERR(ibmr)) | ||
633 | return ibmr; | ||
634 | |||
635 | ibmr->cm_id = cm_id; | ||
636 | ibmr->device = rds_iwdev; | ||
637 | |||
638 | ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents); | ||
639 | if (ret == 0) | ||
640 | *key_ret = ibmr->mr->rkey; | ||
641 | else | ||
642 | printk(KERN_WARNING "RDS/IW: failed to map mr (errno=%d)\n", ret); | ||
643 | |||
644 | out: | ||
645 | if (ret) { | ||
646 | if (ibmr) | ||
647 | rds_iw_free_mr(ibmr, 0); | ||
648 | ibmr = ERR_PTR(ret); | ||
649 | } | ||
650 | return ibmr; | ||
651 | } | ||
652 | |||
653 | /* | ||
654 | * iWARP fastreg handling | ||
655 | * | ||
656 | * The life cycle of a fastreg registration is a bit different from | ||
657 | * FMRs. | ||
658 | * The idea behind fastreg is to have one MR, to which we bind different | ||
659 | * mappings over time. To avoid stalling on the expensive map and invalidate | ||
660 | * operations, these operations are pipelined on the same send queue on | ||
661 | * which we want to send the message containing the r_key. | ||
662 | * | ||
663 | * This creates a bit of a problem for us, as we do not have the destination | ||
664 | * IP in GET_MR, so the connection must be setup prior to the GET_MR call for | ||
665 | * RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit | ||
666 | * will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request | ||
667 | * before queuing the SEND. When completions for these arrive, they are | ||
668 | * dispatched to the MR has a bit set showing that RDMa can be performed. | ||
669 | * | ||
670 | * There is another interesting aspect that's related to invalidation. | ||
671 | * The application can request that a mapping is invalidated in FREE_MR. | ||
672 | * The expectation there is that this invalidation step includes ALL | ||
673 | * PREVIOUSLY FREED MRs. | ||
674 | */ | ||
675 | static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, | ||
676 | struct rds_iw_mr *ibmr) | ||
677 | { | ||
678 | struct rds_iw_device *rds_iwdev = pool->device; | ||
679 | struct ib_fast_reg_page_list *page_list = NULL; | ||
680 | struct ib_mr *mr; | ||
681 | int err; | ||
682 | |||
683 | mr = ib_alloc_fast_reg_mr(rds_iwdev->pd, pool->max_message_size); | ||
684 | if (IS_ERR(mr)) { | ||
685 | err = PTR_ERR(mr); | ||
686 | |||
687 | printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed (err=%d)\n", err); | ||
688 | return err; | ||
689 | } | ||
690 | |||
691 | /* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages | ||
692 | * is not filled in. | ||
693 | */ | ||
694 | page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size); | ||
695 | if (IS_ERR(page_list)) { | ||
696 | err = PTR_ERR(page_list); | ||
697 | |||
698 | printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err); | ||
699 | ib_dereg_mr(mr); | ||
700 | return err; | ||
701 | } | ||
702 | |||
703 | ibmr->page_list = page_list; | ||
704 | ibmr->mr = mr; | ||
705 | return 0; | ||
706 | } | ||
707 | |||
708 | static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) | ||
709 | { | ||
710 | struct rds_iw_mr *ibmr = mapping->m_mr; | ||
711 | struct ib_send_wr f_wr, *failed_wr; | ||
712 | int ret; | ||
713 | |||
714 | /* | ||
715 | * Perform a WR for the fast_reg_mr. Each individual page | ||
716 | * in the sg list is added to the fast reg page list and placed | ||
717 | * inside the fast_reg_mr WR. The key used is a rolling 8bit | ||
718 | * counter, which should guarantee uniqueness. | ||
719 | */ | ||
720 | ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++); | ||
721 | mapping->m_rkey = ibmr->mr->rkey; | ||
722 | |||
723 | memset(&f_wr, 0, sizeof(f_wr)); | ||
724 | f_wr.wr_id = RDS_IW_FAST_REG_WR_ID; | ||
725 | f_wr.opcode = IB_WR_FAST_REG_MR; | ||
726 | f_wr.wr.fast_reg.length = mapping->m_sg.bytes; | ||
727 | f_wr.wr.fast_reg.rkey = mapping->m_rkey; | ||
728 | f_wr.wr.fast_reg.page_list = ibmr->page_list; | ||
729 | f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len; | ||
730 | f_wr.wr.fast_reg.page_shift = ibmr->device->page_shift; | ||
731 | f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE | | ||
732 | IB_ACCESS_REMOTE_READ | | ||
733 | IB_ACCESS_REMOTE_WRITE; | ||
734 | f_wr.wr.fast_reg.iova_start = 0; | ||
735 | f_wr.send_flags = IB_SEND_SIGNALED; | ||
736 | |||
737 | failed_wr = &f_wr; | ||
738 | ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr); | ||
739 | BUG_ON(failed_wr != &f_wr); | ||
740 | if (ret && printk_ratelimit()) | ||
741 | printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", | ||
742 | __func__, __LINE__, ret); | ||
743 | return ret; | ||
744 | } | ||
745 | |||
746 | static int rds_iw_rdma_fastreg_inv(struct rds_iw_mr *ibmr) | ||
747 | { | ||
748 | struct ib_send_wr s_wr, *failed_wr; | ||
749 | int ret = 0; | ||
750 | |||
751 | if (!ibmr->cm_id->qp || !ibmr->mr) | ||
752 | goto out; | ||
753 | |||
754 | memset(&s_wr, 0, sizeof(s_wr)); | ||
755 | s_wr.wr_id = RDS_IW_LOCAL_INV_WR_ID; | ||
756 | s_wr.opcode = IB_WR_LOCAL_INV; | ||
757 | s_wr.ex.invalidate_rkey = ibmr->mr->rkey; | ||
758 | s_wr.send_flags = IB_SEND_SIGNALED; | ||
759 | |||
760 | failed_wr = &s_wr; | ||
761 | ret = ib_post_send(ibmr->cm_id->qp, &s_wr, &failed_wr); | ||
762 | if (ret && printk_ratelimit()) { | ||
763 | printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", | ||
764 | __func__, __LINE__, ret); | ||
765 | goto out; | ||
766 | } | ||
767 | out: | ||
768 | return ret; | ||
769 | } | ||
770 | |||
771 | static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, | ||
772 | struct rds_iw_mr *ibmr, | ||
773 | struct scatterlist *sg, | ||
774 | unsigned int sg_len) | ||
775 | { | ||
776 | struct rds_iw_device *rds_iwdev = pool->device; | ||
777 | struct rds_iw_mapping *mapping = &ibmr->mapping; | ||
778 | u64 *dma_pages; | ||
779 | int i, ret = 0; | ||
780 | |||
781 | rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len); | ||
782 | |||
783 | dma_pages = rds_iw_map_scatterlist(rds_iwdev, | ||
784 | &mapping->m_sg, | ||
785 | rds_iwdev->page_shift); | ||
786 | if (IS_ERR(dma_pages)) { | ||
787 | ret = PTR_ERR(dma_pages); | ||
788 | dma_pages = NULL; | ||
789 | goto out; | ||
790 | } | ||
791 | |||
792 | if (mapping->m_sg.dma_len > pool->max_message_size) { | ||
793 | ret = -EMSGSIZE; | ||
794 | goto out; | ||
795 | } | ||
796 | |||
797 | for (i = 0; i < mapping->m_sg.dma_npages; ++i) | ||
798 | ibmr->page_list->page_list[i] = dma_pages[i]; | ||
799 | |||
800 | ret = rds_iw_rdma_build_fastreg(mapping); | ||
801 | if (ret) | ||
802 | goto out; | ||
803 | |||
804 | rds_iw_stats_inc(s_iw_rdma_mr_used); | ||
805 | |||
806 | out: | ||
807 | kfree(dma_pages); | ||
808 | |||
809 | return ret; | ||
810 | } | ||
811 | |||
812 | /* | ||
813 | * "Free" a fastreg MR. | ||
814 | */ | ||
815 | static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, | ||
816 | struct rds_iw_mr *ibmr) | ||
817 | { | ||
818 | unsigned long flags; | ||
819 | int ret; | ||
820 | |||
821 | if (!ibmr->mapping.m_sg.dma_len) | ||
822 | return; | ||
823 | |||
824 | ret = rds_iw_rdma_fastreg_inv(ibmr); | ||
825 | if (ret) | ||
826 | return; | ||
827 | |||
828 | /* Try to post the LOCAL_INV WR to the queue. */ | ||
829 | spin_lock_irqsave(&pool->list_lock, flags); | ||
830 | |||
831 | list_add_tail(&ibmr->mapping.m_list, &pool->dirty_list); | ||
832 | atomic_add(ibmr->mapping.m_sg.len, &pool->free_pinned); | ||
833 | atomic_inc(&pool->dirty_count); | ||
834 | |||
835 | spin_unlock_irqrestore(&pool->list_lock, flags); | ||
836 | } | ||
837 | |||
838 | static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, | ||
839 | struct list_head *unmap_list, | ||
840 | struct list_head *kill_list) | ||
841 | { | ||
842 | struct rds_iw_mapping *mapping, *next; | ||
843 | unsigned int ncleaned = 0; | ||
844 | LIST_HEAD(laundered); | ||
845 | |||
846 | /* Batched invalidation of fastreg MRs. | ||
847 | * Why do we do it this way, even though we could pipeline unmap | ||
848 | * and remap? The reason is the application semantics - when the | ||
849 | * application requests an invalidation of MRs, it expects all | ||
850 | * previously released R_Keys to become invalid. | ||
851 | * | ||
852 | * If we implement MR reuse naively, we risk memory corruption | ||
853 | * (this has actually been observed). So the default behavior | ||
854 | * requires that a MR goes through an explicit unmap operation before | ||
855 | * we can reuse it again. | ||
856 | * | ||
857 | * We could probably improve on this a little, by allowing immediate | ||
858 | * reuse of a MR on the same socket (eg you could add small | ||
859 | * cache of unused MRs to strct rds_socket - GET_MR could grab one | ||
860 | * of these without requiring an explicit invalidate). | ||
861 | */ | ||
862 | while (!list_empty(unmap_list)) { | ||
863 | unsigned long flags; | ||
864 | |||
865 | spin_lock_irqsave(&pool->list_lock, flags); | ||
866 | list_for_each_entry_safe(mapping, next, unmap_list, m_list) { | ||
867 | list_move(&mapping->m_list, &laundered); | ||
868 | ncleaned++; | ||
869 | } | ||
870 | spin_unlock_irqrestore(&pool->list_lock, flags); | ||
871 | } | ||
872 | |||
873 | /* Move all laundered mappings back to the unmap list. | ||
874 | * We do not kill any WRs right now - it doesn't seem the | ||
875 | * fastreg API has a max_remap limit. */ | ||
876 | list_splice_init(&laundered, unmap_list); | ||
877 | |||
878 | return ncleaned; | ||
879 | } | ||
880 | |||
881 | static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, | ||
882 | struct rds_iw_mr *ibmr) | ||
883 | { | ||
884 | if (ibmr->page_list) | ||
885 | ib_free_fast_reg_page_list(ibmr->page_list); | ||
886 | if (ibmr->mr) | ||
887 | ib_dereg_mr(ibmr->mr); | ||
888 | } | ||
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c new file mode 100644 index 000000000000..a1931f0027a2 --- /dev/null +++ b/net/rds/iw_recv.c | |||
@@ -0,0 +1,869 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/pci.h> | ||
35 | #include <linux/dma-mapping.h> | ||
36 | #include <rdma/rdma_cm.h> | ||
37 | |||
38 | #include "rds.h" | ||
39 | #include "iw.h" | ||
40 | |||
41 | static struct kmem_cache *rds_iw_incoming_slab; | ||
42 | static struct kmem_cache *rds_iw_frag_slab; | ||
43 | static atomic_t rds_iw_allocation = ATOMIC_INIT(0); | ||
44 | |||
45 | static void rds_iw_frag_drop_page(struct rds_page_frag *frag) | ||
46 | { | ||
47 | rdsdebug("frag %p page %p\n", frag, frag->f_page); | ||
48 | __free_page(frag->f_page); | ||
49 | frag->f_page = NULL; | ||
50 | } | ||
51 | |||
52 | static void rds_iw_frag_free(struct rds_page_frag *frag) | ||
53 | { | ||
54 | rdsdebug("frag %p page %p\n", frag, frag->f_page); | ||
55 | BUG_ON(frag->f_page != NULL); | ||
56 | kmem_cache_free(rds_iw_frag_slab, frag); | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * We map a page at a time. Its fragments are posted in order. This | ||
61 | * is called in fragment order as the fragments get send completion events. | ||
62 | * Only the last frag in the page performs the unmapping. | ||
63 | * | ||
64 | * It's OK for ring cleanup to call this in whatever order it likes because | ||
65 | * DMA is not in flight and so we can unmap while other ring entries still | ||
66 | * hold page references in their frags. | ||
67 | */ | ||
68 | static void rds_iw_recv_unmap_page(struct rds_iw_connection *ic, | ||
69 | struct rds_iw_recv_work *recv) | ||
70 | { | ||
71 | struct rds_page_frag *frag = recv->r_frag; | ||
72 | |||
73 | rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page); | ||
74 | if (frag->f_mapped) | ||
75 | ib_dma_unmap_page(ic->i_cm_id->device, | ||
76 | frag->f_mapped, | ||
77 | RDS_FRAG_SIZE, DMA_FROM_DEVICE); | ||
78 | frag->f_mapped = 0; | ||
79 | } | ||
80 | |||
81 | void rds_iw_recv_init_ring(struct rds_iw_connection *ic) | ||
82 | { | ||
83 | struct rds_iw_recv_work *recv; | ||
84 | u32 i; | ||
85 | |||
86 | for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) { | ||
87 | struct ib_sge *sge; | ||
88 | |||
89 | recv->r_iwinc = NULL; | ||
90 | recv->r_frag = NULL; | ||
91 | |||
92 | recv->r_wr.next = NULL; | ||
93 | recv->r_wr.wr_id = i; | ||
94 | recv->r_wr.sg_list = recv->r_sge; | ||
95 | recv->r_wr.num_sge = RDS_IW_RECV_SGE; | ||
96 | |||
97 | sge = rds_iw_data_sge(ic, recv->r_sge); | ||
98 | sge->addr = 0; | ||
99 | sge->length = RDS_FRAG_SIZE; | ||
100 | sge->lkey = 0; | ||
101 | |||
102 | sge = rds_iw_header_sge(ic, recv->r_sge); | ||
103 | sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); | ||
104 | sge->length = sizeof(struct rds_header); | ||
105 | sge->lkey = 0; | ||
106 | } | ||
107 | } | ||
108 | |||
109 | static void rds_iw_recv_clear_one(struct rds_iw_connection *ic, | ||
110 | struct rds_iw_recv_work *recv) | ||
111 | { | ||
112 | if (recv->r_iwinc) { | ||
113 | rds_inc_put(&recv->r_iwinc->ii_inc); | ||
114 | recv->r_iwinc = NULL; | ||
115 | } | ||
116 | if (recv->r_frag) { | ||
117 | rds_iw_recv_unmap_page(ic, recv); | ||
118 | if (recv->r_frag->f_page) | ||
119 | rds_iw_frag_drop_page(recv->r_frag); | ||
120 | rds_iw_frag_free(recv->r_frag); | ||
121 | recv->r_frag = NULL; | ||
122 | } | ||
123 | } | ||
124 | |||
125 | void rds_iw_recv_clear_ring(struct rds_iw_connection *ic) | ||
126 | { | ||
127 | u32 i; | ||
128 | |||
129 | for (i = 0; i < ic->i_recv_ring.w_nr; i++) | ||
130 | rds_iw_recv_clear_one(ic, &ic->i_recvs[i]); | ||
131 | |||
132 | if (ic->i_frag.f_page) | ||
133 | rds_iw_frag_drop_page(&ic->i_frag); | ||
134 | } | ||
135 | |||
136 | static int rds_iw_recv_refill_one(struct rds_connection *conn, | ||
137 | struct rds_iw_recv_work *recv, | ||
138 | gfp_t kptr_gfp, gfp_t page_gfp) | ||
139 | { | ||
140 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
141 | dma_addr_t dma_addr; | ||
142 | struct ib_sge *sge; | ||
143 | int ret = -ENOMEM; | ||
144 | |||
145 | if (recv->r_iwinc == NULL) { | ||
146 | if (atomic_read(&rds_iw_allocation) >= rds_iw_sysctl_max_recv_allocation) { | ||
147 | rds_iw_stats_inc(s_iw_rx_alloc_limit); | ||
148 | goto out; | ||
149 | } | ||
150 | recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab, | ||
151 | kptr_gfp); | ||
152 | if (recv->r_iwinc == NULL) | ||
153 | goto out; | ||
154 | atomic_inc(&rds_iw_allocation); | ||
155 | INIT_LIST_HEAD(&recv->r_iwinc->ii_frags); | ||
156 | rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr); | ||
157 | } | ||
158 | |||
159 | if (recv->r_frag == NULL) { | ||
160 | recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp); | ||
161 | if (recv->r_frag == NULL) | ||
162 | goto out; | ||
163 | INIT_LIST_HEAD(&recv->r_frag->f_item); | ||
164 | recv->r_frag->f_page = NULL; | ||
165 | } | ||
166 | |||
167 | if (ic->i_frag.f_page == NULL) { | ||
168 | ic->i_frag.f_page = alloc_page(page_gfp); | ||
169 | if (ic->i_frag.f_page == NULL) | ||
170 | goto out; | ||
171 | ic->i_frag.f_offset = 0; | ||
172 | } | ||
173 | |||
174 | dma_addr = ib_dma_map_page(ic->i_cm_id->device, | ||
175 | ic->i_frag.f_page, | ||
176 | ic->i_frag.f_offset, | ||
177 | RDS_FRAG_SIZE, | ||
178 | DMA_FROM_DEVICE); | ||
179 | if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr)) | ||
180 | goto out; | ||
181 | |||
182 | /* | ||
183 | * Once we get the RDS_PAGE_LAST_OFF frag then rds_iw_frag_unmap() | ||
184 | * must be called on this recv. This happens as completions hit | ||
185 | * in order or on connection shutdown. | ||
186 | */ | ||
187 | recv->r_frag->f_page = ic->i_frag.f_page; | ||
188 | recv->r_frag->f_offset = ic->i_frag.f_offset; | ||
189 | recv->r_frag->f_mapped = dma_addr; | ||
190 | |||
191 | sge = rds_iw_data_sge(ic, recv->r_sge); | ||
192 | sge->addr = dma_addr; | ||
193 | sge->length = RDS_FRAG_SIZE; | ||
194 | |||
195 | sge = rds_iw_header_sge(ic, recv->r_sge); | ||
196 | sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); | ||
197 | sge->length = sizeof(struct rds_header); | ||
198 | |||
199 | get_page(recv->r_frag->f_page); | ||
200 | |||
201 | if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) { | ||
202 | ic->i_frag.f_offset += RDS_FRAG_SIZE; | ||
203 | } else { | ||
204 | put_page(ic->i_frag.f_page); | ||
205 | ic->i_frag.f_page = NULL; | ||
206 | ic->i_frag.f_offset = 0; | ||
207 | } | ||
208 | |||
209 | ret = 0; | ||
210 | out: | ||
211 | return ret; | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * This tries to allocate and post unused work requests after making sure that | ||
216 | * they have all the allocations they need to queue received fragments into | ||
217 | * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc | ||
218 | * pairs don't go unmatched. | ||
219 | * | ||
220 | * -1 is returned if posting fails due to temporary resource exhaustion. | ||
221 | */ | ||
222 | int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, | ||
223 | gfp_t page_gfp, int prefill) | ||
224 | { | ||
225 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
226 | struct rds_iw_recv_work *recv; | ||
227 | struct ib_recv_wr *failed_wr; | ||
228 | unsigned int posted = 0; | ||
229 | int ret = 0; | ||
230 | u32 pos; | ||
231 | |||
232 | while ((prefill || rds_conn_up(conn)) | ||
233 | && rds_iw_ring_alloc(&ic->i_recv_ring, 1, &pos)) { | ||
234 | if (pos >= ic->i_recv_ring.w_nr) { | ||
235 | printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n", | ||
236 | pos); | ||
237 | ret = -EINVAL; | ||
238 | break; | ||
239 | } | ||
240 | |||
241 | recv = &ic->i_recvs[pos]; | ||
242 | ret = rds_iw_recv_refill_one(conn, recv, kptr_gfp, page_gfp); | ||
243 | if (ret) { | ||
244 | ret = -1; | ||
245 | break; | ||
246 | } | ||
247 | |||
248 | /* XXX when can this fail? */ | ||
249 | ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); | ||
250 | rdsdebug("recv %p iwinc %p page %p addr %lu ret %d\n", recv, | ||
251 | recv->r_iwinc, recv->r_frag->f_page, | ||
252 | (long) recv->r_frag->f_mapped, ret); | ||
253 | if (ret) { | ||
254 | rds_iw_conn_error(conn, "recv post on " | ||
255 | "%pI4 returned %d, disconnecting and " | ||
256 | "reconnecting\n", &conn->c_faddr, | ||
257 | ret); | ||
258 | ret = -1; | ||
259 | break; | ||
260 | } | ||
261 | |||
262 | posted++; | ||
263 | } | ||
264 | |||
265 | /* We're doing flow control - update the window. */ | ||
266 | if (ic->i_flowctl && posted) | ||
267 | rds_iw_advertise_credits(conn, posted); | ||
268 | |||
269 | if (ret) | ||
270 | rds_iw_ring_unalloc(&ic->i_recv_ring, 1); | ||
271 | return ret; | ||
272 | } | ||
273 | |||
274 | void rds_iw_inc_purge(struct rds_incoming *inc) | ||
275 | { | ||
276 | struct rds_iw_incoming *iwinc; | ||
277 | struct rds_page_frag *frag; | ||
278 | struct rds_page_frag *pos; | ||
279 | |||
280 | iwinc = container_of(inc, struct rds_iw_incoming, ii_inc); | ||
281 | rdsdebug("purging iwinc %p inc %p\n", iwinc, inc); | ||
282 | |||
283 | list_for_each_entry_safe(frag, pos, &iwinc->ii_frags, f_item) { | ||
284 | list_del_init(&frag->f_item); | ||
285 | rds_iw_frag_drop_page(frag); | ||
286 | rds_iw_frag_free(frag); | ||
287 | } | ||
288 | } | ||
289 | |||
290 | void rds_iw_inc_free(struct rds_incoming *inc) | ||
291 | { | ||
292 | struct rds_iw_incoming *iwinc; | ||
293 | |||
294 | iwinc = container_of(inc, struct rds_iw_incoming, ii_inc); | ||
295 | |||
296 | rds_iw_inc_purge(inc); | ||
297 | rdsdebug("freeing iwinc %p inc %p\n", iwinc, inc); | ||
298 | BUG_ON(!list_empty(&iwinc->ii_frags)); | ||
299 | kmem_cache_free(rds_iw_incoming_slab, iwinc); | ||
300 | atomic_dec(&rds_iw_allocation); | ||
301 | BUG_ON(atomic_read(&rds_iw_allocation) < 0); | ||
302 | } | ||
303 | |||
304 | int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, | ||
305 | size_t size) | ||
306 | { | ||
307 | struct rds_iw_incoming *iwinc; | ||
308 | struct rds_page_frag *frag; | ||
309 | struct iovec *iov = first_iov; | ||
310 | unsigned long to_copy; | ||
311 | unsigned long frag_off = 0; | ||
312 | unsigned long iov_off = 0; | ||
313 | int copied = 0; | ||
314 | int ret; | ||
315 | u32 len; | ||
316 | |||
317 | iwinc = container_of(inc, struct rds_iw_incoming, ii_inc); | ||
318 | frag = list_entry(iwinc->ii_frags.next, struct rds_page_frag, f_item); | ||
319 | len = be32_to_cpu(inc->i_hdr.h_len); | ||
320 | |||
321 | while (copied < size && copied < len) { | ||
322 | if (frag_off == RDS_FRAG_SIZE) { | ||
323 | frag = list_entry(frag->f_item.next, | ||
324 | struct rds_page_frag, f_item); | ||
325 | frag_off = 0; | ||
326 | } | ||
327 | while (iov_off == iov->iov_len) { | ||
328 | iov_off = 0; | ||
329 | iov++; | ||
330 | } | ||
331 | |||
332 | to_copy = min(iov->iov_len - iov_off, RDS_FRAG_SIZE - frag_off); | ||
333 | to_copy = min_t(size_t, to_copy, size - copied); | ||
334 | to_copy = min_t(unsigned long, to_copy, len - copied); | ||
335 | |||
336 | rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag " | ||
337 | "[%p, %lu] + %lu\n", | ||
338 | to_copy, iov->iov_base, iov->iov_len, iov_off, | ||
339 | frag->f_page, frag->f_offset, frag_off); | ||
340 | |||
341 | /* XXX needs + offset for multiple recvs per page */ | ||
342 | ret = rds_page_copy_to_user(frag->f_page, | ||
343 | frag->f_offset + frag_off, | ||
344 | iov->iov_base + iov_off, | ||
345 | to_copy); | ||
346 | if (ret) { | ||
347 | copied = ret; | ||
348 | break; | ||
349 | } | ||
350 | |||
351 | iov_off += to_copy; | ||
352 | frag_off += to_copy; | ||
353 | copied += to_copy; | ||
354 | } | ||
355 | |||
356 | return copied; | ||
357 | } | ||
358 | |||
359 | /* ic starts out kzalloc()ed */ | ||
360 | void rds_iw_recv_init_ack(struct rds_iw_connection *ic) | ||
361 | { | ||
362 | struct ib_send_wr *wr = &ic->i_ack_wr; | ||
363 | struct ib_sge *sge = &ic->i_ack_sge; | ||
364 | |||
365 | sge->addr = ic->i_ack_dma; | ||
366 | sge->length = sizeof(struct rds_header); | ||
367 | sge->lkey = rds_iw_local_dma_lkey(ic); | ||
368 | |||
369 | wr->sg_list = sge; | ||
370 | wr->num_sge = 1; | ||
371 | wr->opcode = IB_WR_SEND; | ||
372 | wr->wr_id = RDS_IW_ACK_WR_ID; | ||
373 | wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; | ||
374 | } | ||
375 | |||
376 | /* | ||
377 | * You'd think that with reliable IB connections you wouldn't need to ack | ||
378 | * messages that have been received. The problem is that IB hardware generates | ||
379 | * an ack message before it has DMAed the message into memory. This creates a | ||
380 | * potential message loss if the HCA is disabled for any reason between when it | ||
381 | * sends the ack and before the message is DMAed and processed. This is only a | ||
382 | * potential issue if another HCA is available for fail-over. | ||
383 | * | ||
384 | * When the remote host receives our ack they'll free the sent message from | ||
385 | * their send queue. To decrease the latency of this we always send an ack | ||
386 | * immediately after we've received messages. | ||
387 | * | ||
388 | * For simplicity, we only have one ack in flight at a time. This puts | ||
389 | * pressure on senders to have deep enough send queues to absorb the latency of | ||
390 | * a single ack frame being in flight. This might not be good enough. | ||
391 | * | ||
392 | * This is implemented by have a long-lived send_wr and sge which point to a | ||
393 | * statically allocated ack frame. This ack wr does not fall under the ring | ||
394 | * accounting that the tx and rx wrs do. The QP attribute specifically makes | ||
395 | * room for it beyond the ring size. Send completion notices its special | ||
396 | * wr_id and avoids working with the ring in that case. | ||
397 | */ | ||
398 | static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, | ||
399 | int ack_required) | ||
400 | { | ||
401 | rds_iw_set_64bit(&ic->i_ack_next, seq); | ||
402 | if (ack_required) { | ||
403 | smp_mb__before_clear_bit(); | ||
404 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
405 | } | ||
406 | } | ||
407 | |||
408 | static u64 rds_iw_get_ack(struct rds_iw_connection *ic) | ||
409 | { | ||
410 | clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
411 | smp_mb__after_clear_bit(); | ||
412 | |||
413 | return ic->i_ack_next; | ||
414 | } | ||
415 | |||
416 | static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits) | ||
417 | { | ||
418 | struct rds_header *hdr = ic->i_ack; | ||
419 | struct ib_send_wr *failed_wr; | ||
420 | u64 seq; | ||
421 | int ret; | ||
422 | |||
423 | seq = rds_iw_get_ack(ic); | ||
424 | |||
425 | rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq); | ||
426 | rds_message_populate_header(hdr, 0, 0, 0); | ||
427 | hdr->h_ack = cpu_to_be64(seq); | ||
428 | hdr->h_credit = adv_credits; | ||
429 | rds_message_make_checksum(hdr); | ||
430 | ic->i_ack_queued = jiffies; | ||
431 | |||
432 | ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr); | ||
433 | if (unlikely(ret)) { | ||
434 | /* Failed to send. Release the WR, and | ||
435 | * force another ACK. | ||
436 | */ | ||
437 | clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); | ||
438 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
439 | |||
440 | rds_iw_stats_inc(s_iw_ack_send_failure); | ||
441 | /* Need to finesse this later. */ | ||
442 | BUG(); | ||
443 | } else | ||
444 | rds_iw_stats_inc(s_iw_ack_sent); | ||
445 | } | ||
446 | |||
447 | /* | ||
448 | * There are 3 ways of getting acknowledgements to the peer: | ||
449 | * 1. We call rds_iw_attempt_ack from the recv completion handler | ||
450 | * to send an ACK-only frame. | ||
451 | * However, there can be only one such frame in the send queue | ||
452 | * at any time, so we may have to postpone it. | ||
453 | * 2. When another (data) packet is transmitted while there's | ||
454 | * an ACK in the queue, we piggyback the ACK sequence number | ||
455 | * on the data packet. | ||
456 | * 3. If the ACK WR is done sending, we get called from the | ||
457 | * send queue completion handler, and check whether there's | ||
458 | * another ACK pending (postponed because the WR was on the | ||
459 | * queue). If so, we transmit it. | ||
460 | * | ||
461 | * We maintain 2 variables: | ||
462 | * - i_ack_flags, which keeps track of whether the ACK WR | ||
463 | * is currently in the send queue or not (IB_ACK_IN_FLIGHT) | ||
464 | * - i_ack_next, which is the last sequence number we received | ||
465 | * | ||
466 | * Potentially, send queue and receive queue handlers can run concurrently. | ||
467 | * | ||
468 | * Reconnecting complicates this picture just slightly. When we | ||
469 | * reconnect, we may be seeing duplicate packets. The peer | ||
470 | * is retransmitting them, because it hasn't seen an ACK for | ||
471 | * them. It is important that we ACK these. | ||
472 | * | ||
473 | * ACK mitigation adds a header flag "ACK_REQUIRED"; any packet with | ||
474 | * this flag set *MUST* be acknowledged immediately. | ||
475 | */ | ||
476 | |||
477 | /* | ||
478 | * When we get here, we're called from the recv queue handler. | ||
479 | * Check whether we ought to transmit an ACK. | ||
480 | */ | ||
481 | void rds_iw_attempt_ack(struct rds_iw_connection *ic) | ||
482 | { | ||
483 | unsigned int adv_credits; | ||
484 | |||
485 | if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags)) | ||
486 | return; | ||
487 | |||
488 | if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) { | ||
489 | rds_iw_stats_inc(s_iw_ack_send_delayed); | ||
490 | return; | ||
491 | } | ||
492 | |||
493 | /* Can we get a send credit? */ | ||
494 | if (!rds_iw_send_grab_credits(ic, 1, &adv_credits, 0)) { | ||
495 | rds_iw_stats_inc(s_iw_tx_throttle); | ||
496 | clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); | ||
497 | return; | ||
498 | } | ||
499 | |||
500 | clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
501 | rds_iw_send_ack(ic, adv_credits); | ||
502 | } | ||
503 | |||
504 | /* | ||
505 | * We get here from the send completion handler, when the | ||
506 | * adapter tells us the ACK frame was sent. | ||
507 | */ | ||
508 | void rds_iw_ack_send_complete(struct rds_iw_connection *ic) | ||
509 | { | ||
510 | clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); | ||
511 | rds_iw_attempt_ack(ic); | ||
512 | } | ||
513 | |||
514 | /* | ||
515 | * This is called by the regular xmit code when it wants to piggyback | ||
516 | * an ACK on an outgoing frame. | ||
517 | */ | ||
518 | u64 rds_iw_piggyb_ack(struct rds_iw_connection *ic) | ||
519 | { | ||
520 | if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags)) | ||
521 | rds_iw_stats_inc(s_iw_ack_send_piggybacked); | ||
522 | return rds_iw_get_ack(ic); | ||
523 | } | ||
524 | |||
525 | /* | ||
526 | * It's kind of lame that we're copying from the posted receive pages into | ||
527 | * long-lived bitmaps. We could have posted the bitmaps and rdma written into | ||
528 | * them. But receiving new congestion bitmaps should be a *rare* event, so | ||
529 | * hopefully we won't need to invest that complexity in making it more | ||
530 | * efficient. By copying we can share a simpler core with TCP which has to | ||
531 | * copy. | ||
532 | */ | ||
533 | static void rds_iw_cong_recv(struct rds_connection *conn, | ||
534 | struct rds_iw_incoming *iwinc) | ||
535 | { | ||
536 | struct rds_cong_map *map; | ||
537 | unsigned int map_off; | ||
538 | unsigned int map_page; | ||
539 | struct rds_page_frag *frag; | ||
540 | unsigned long frag_off; | ||
541 | unsigned long to_copy; | ||
542 | unsigned long copied; | ||
543 | uint64_t uncongested = 0; | ||
544 | void *addr; | ||
545 | |||
546 | /* catch completely corrupt packets */ | ||
547 | if (be32_to_cpu(iwinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES) | ||
548 | return; | ||
549 | |||
550 | map = conn->c_fcong; | ||
551 | map_page = 0; | ||
552 | map_off = 0; | ||
553 | |||
554 | frag = list_entry(iwinc->ii_frags.next, struct rds_page_frag, f_item); | ||
555 | frag_off = 0; | ||
556 | |||
557 | copied = 0; | ||
558 | |||
559 | while (copied < RDS_CONG_MAP_BYTES) { | ||
560 | uint64_t *src, *dst; | ||
561 | unsigned int k; | ||
562 | |||
563 | to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); | ||
564 | BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ | ||
565 | |||
566 | addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0); | ||
567 | |||
568 | src = addr + frag_off; | ||
569 | dst = (void *)map->m_page_addrs[map_page] + map_off; | ||
570 | for (k = 0; k < to_copy; k += 8) { | ||
571 | /* Record ports that became uncongested, ie | ||
572 | * bits that changed from 0 to 1. */ | ||
573 | uncongested |= ~(*src) & *dst; | ||
574 | *dst++ = *src++; | ||
575 | } | ||
576 | kunmap_atomic(addr, KM_SOFTIRQ0); | ||
577 | |||
578 | copied += to_copy; | ||
579 | |||
580 | map_off += to_copy; | ||
581 | if (map_off == PAGE_SIZE) { | ||
582 | map_off = 0; | ||
583 | map_page++; | ||
584 | } | ||
585 | |||
586 | frag_off += to_copy; | ||
587 | if (frag_off == RDS_FRAG_SIZE) { | ||
588 | frag = list_entry(frag->f_item.next, | ||
589 | struct rds_page_frag, f_item); | ||
590 | frag_off = 0; | ||
591 | } | ||
592 | } | ||
593 | |||
594 | /* the congestion map is in little endian order */ | ||
595 | uncongested = le64_to_cpu(uncongested); | ||
596 | |||
597 | rds_cong_map_updated(map, uncongested); | ||
598 | } | ||
599 | |||
600 | /* | ||
601 | * Rings are posted with all the allocations they'll need to queue the | ||
602 | * incoming message to the receiving socket so this can't fail. | ||
603 | * All fragments start with a header, so we can make sure we're not receiving | ||
604 | * garbage, and we can tell a small 8 byte fragment from an ACK frame. | ||
605 | */ | ||
606 | struct rds_iw_ack_state { | ||
607 | u64 ack_next; | ||
608 | u64 ack_recv; | ||
609 | unsigned int ack_required:1; | ||
610 | unsigned int ack_next_valid:1; | ||
611 | unsigned int ack_recv_valid:1; | ||
612 | }; | ||
613 | |||
614 | static void rds_iw_process_recv(struct rds_connection *conn, | ||
615 | struct rds_iw_recv_work *recv, u32 byte_len, | ||
616 | struct rds_iw_ack_state *state) | ||
617 | { | ||
618 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
619 | struct rds_iw_incoming *iwinc = ic->i_iwinc; | ||
620 | struct rds_header *ihdr, *hdr; | ||
621 | |||
622 | /* XXX shut down the connection if port 0,0 are seen? */ | ||
623 | |||
624 | rdsdebug("ic %p iwinc %p recv %p byte len %u\n", ic, iwinc, recv, | ||
625 | byte_len); | ||
626 | |||
627 | if (byte_len < sizeof(struct rds_header)) { | ||
628 | rds_iw_conn_error(conn, "incoming message " | ||
629 | "from %pI4 didn't inclue a " | ||
630 | "header, disconnecting and " | ||
631 | "reconnecting\n", | ||
632 | &conn->c_faddr); | ||
633 | return; | ||
634 | } | ||
635 | byte_len -= sizeof(struct rds_header); | ||
636 | |||
637 | ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs]; | ||
638 | |||
639 | /* Validate the checksum. */ | ||
640 | if (!rds_message_verify_checksum(ihdr)) { | ||
641 | rds_iw_conn_error(conn, "incoming message " | ||
642 | "from %pI4 has corrupted header - " | ||
643 | "forcing a reconnect\n", | ||
644 | &conn->c_faddr); | ||
645 | rds_stats_inc(s_recv_drop_bad_checksum); | ||
646 | return; | ||
647 | } | ||
648 | |||
649 | /* Process the ACK sequence which comes with every packet */ | ||
650 | state->ack_recv = be64_to_cpu(ihdr->h_ack); | ||
651 | state->ack_recv_valid = 1; | ||
652 | |||
653 | /* Process the credits update if there was one */ | ||
654 | if (ihdr->h_credit) | ||
655 | rds_iw_send_add_credits(conn, ihdr->h_credit); | ||
656 | |||
657 | if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && byte_len == 0) { | ||
658 | /* This is an ACK-only packet. The fact that it gets | ||
659 | * special treatment here is that historically, ACKs | ||
660 | * were rather special beasts. | ||
661 | */ | ||
662 | rds_iw_stats_inc(s_iw_ack_received); | ||
663 | |||
664 | /* | ||
665 | * Usually the frags make their way on to incs and are then freed as | ||
666 | * the inc is freed. We don't go that route, so we have to drop the | ||
667 | * page ref ourselves. We can't just leave the page on the recv | ||
668 | * because that confuses the dma mapping of pages and each recv's use | ||
669 | * of a partial page. We can leave the frag, though, it will be | ||
670 | * reused. | ||
671 | * | ||
672 | * FIXME: Fold this into the code path below. | ||
673 | */ | ||
674 | rds_iw_frag_drop_page(recv->r_frag); | ||
675 | return; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * If we don't already have an inc on the connection then this | ||
680 | * fragment has a header and starts a message.. copy its header | ||
681 | * into the inc and save the inc so we can hang upcoming fragments | ||
682 | * off its list. | ||
683 | */ | ||
684 | if (iwinc == NULL) { | ||
685 | iwinc = recv->r_iwinc; | ||
686 | recv->r_iwinc = NULL; | ||
687 | ic->i_iwinc = iwinc; | ||
688 | |||
689 | hdr = &iwinc->ii_inc.i_hdr; | ||
690 | memcpy(hdr, ihdr, sizeof(*hdr)); | ||
691 | ic->i_recv_data_rem = be32_to_cpu(hdr->h_len); | ||
692 | |||
693 | rdsdebug("ic %p iwinc %p rem %u flag 0x%x\n", ic, iwinc, | ||
694 | ic->i_recv_data_rem, hdr->h_flags); | ||
695 | } else { | ||
696 | hdr = &iwinc->ii_inc.i_hdr; | ||
697 | /* We can't just use memcmp here; fragments of a | ||
698 | * single message may carry different ACKs */ | ||
699 | if (hdr->h_sequence != ihdr->h_sequence | ||
700 | || hdr->h_len != ihdr->h_len | ||
701 | || hdr->h_sport != ihdr->h_sport | ||
702 | || hdr->h_dport != ihdr->h_dport) { | ||
703 | rds_iw_conn_error(conn, | ||
704 | "fragment header mismatch; forcing reconnect\n"); | ||
705 | return; | ||
706 | } | ||
707 | } | ||
708 | |||
709 | list_add_tail(&recv->r_frag->f_item, &iwinc->ii_frags); | ||
710 | recv->r_frag = NULL; | ||
711 | |||
712 | if (ic->i_recv_data_rem > RDS_FRAG_SIZE) | ||
713 | ic->i_recv_data_rem -= RDS_FRAG_SIZE; | ||
714 | else { | ||
715 | ic->i_recv_data_rem = 0; | ||
716 | ic->i_iwinc = NULL; | ||
717 | |||
718 | if (iwinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) | ||
719 | rds_iw_cong_recv(conn, iwinc); | ||
720 | else { | ||
721 | rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr, | ||
722 | &iwinc->ii_inc, GFP_ATOMIC, | ||
723 | KM_SOFTIRQ0); | ||
724 | state->ack_next = be64_to_cpu(hdr->h_sequence); | ||
725 | state->ack_next_valid = 1; | ||
726 | } | ||
727 | |||
728 | /* Evaluate the ACK_REQUIRED flag *after* we received | ||
729 | * the complete frame, and after bumping the next_rx | ||
730 | * sequence. */ | ||
731 | if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) { | ||
732 | rds_stats_inc(s_recv_ack_required); | ||
733 | state->ack_required = 1; | ||
734 | } | ||
735 | |||
736 | rds_inc_put(&iwinc->ii_inc); | ||
737 | } | ||
738 | } | ||
739 | |||
740 | /* | ||
741 | * Plucking the oldest entry from the ring can be done concurrently with | ||
742 | * the thread refilling the ring. Each ring operation is protected by | ||
743 | * spinlocks and the transient state of refilling doesn't change the | ||
744 | * recording of which entry is oldest. | ||
745 | * | ||
746 | * This relies on IB only calling one cq comp_handler for each cq so that | ||
747 | * there will only be one caller of rds_recv_incoming() per RDS connection. | ||
748 | */ | ||
749 | void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context) | ||
750 | { | ||
751 | struct rds_connection *conn = context; | ||
752 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
753 | struct ib_wc wc; | ||
754 | struct rds_iw_ack_state state = { 0, }; | ||
755 | struct rds_iw_recv_work *recv; | ||
756 | |||
757 | rdsdebug("conn %p cq %p\n", conn, cq); | ||
758 | |||
759 | rds_iw_stats_inc(s_iw_rx_cq_call); | ||
760 | |||
761 | ib_req_notify_cq(cq, IB_CQ_SOLICITED); | ||
762 | |||
763 | while (ib_poll_cq(cq, 1, &wc) > 0) { | ||
764 | rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", | ||
765 | (unsigned long long)wc.wr_id, wc.status, wc.byte_len, | ||
766 | be32_to_cpu(wc.ex.imm_data)); | ||
767 | rds_iw_stats_inc(s_iw_rx_cq_event); | ||
768 | |||
769 | recv = &ic->i_recvs[rds_iw_ring_oldest(&ic->i_recv_ring)]; | ||
770 | |||
771 | rds_iw_recv_unmap_page(ic, recv); | ||
772 | |||
773 | /* | ||
774 | * Also process recvs in connecting state because it is possible | ||
775 | * to get a recv completion _before_ the rdmacm ESTABLISHED | ||
776 | * event is processed. | ||
777 | */ | ||
778 | if (rds_conn_up(conn) || rds_conn_connecting(conn)) { | ||
779 | /* We expect errors as the qp is drained during shutdown */ | ||
780 | if (wc.status == IB_WC_SUCCESS) { | ||
781 | rds_iw_process_recv(conn, recv, wc.byte_len, &state); | ||
782 | } else { | ||
783 | rds_iw_conn_error(conn, "recv completion on " | ||
784 | "%pI4 had status %u, disconnecting and " | ||
785 | "reconnecting\n", &conn->c_faddr, | ||
786 | wc.status); | ||
787 | } | ||
788 | } | ||
789 | |||
790 | rds_iw_ring_free(&ic->i_recv_ring, 1); | ||
791 | } | ||
792 | |||
793 | if (state.ack_next_valid) | ||
794 | rds_iw_set_ack(ic, state.ack_next, state.ack_required); | ||
795 | if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) { | ||
796 | rds_send_drop_acked(conn, state.ack_recv, NULL); | ||
797 | ic->i_ack_recv = state.ack_recv; | ||
798 | } | ||
799 | if (rds_conn_up(conn)) | ||
800 | rds_iw_attempt_ack(ic); | ||
801 | |||
802 | /* If we ever end up with a really empty receive ring, we're | ||
803 | * in deep trouble, as the sender will definitely see RNR | ||
804 | * timeouts. */ | ||
805 | if (rds_iw_ring_empty(&ic->i_recv_ring)) | ||
806 | rds_iw_stats_inc(s_iw_rx_ring_empty); | ||
807 | |||
808 | /* | ||
809 | * If the ring is running low, then schedule the thread to refill. | ||
810 | */ | ||
811 | if (rds_iw_ring_low(&ic->i_recv_ring)) | ||
812 | queue_delayed_work(rds_wq, &conn->c_recv_w, 0); | ||
813 | } | ||
814 | |||
815 | int rds_iw_recv(struct rds_connection *conn) | ||
816 | { | ||
817 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
818 | int ret = 0; | ||
819 | |||
820 | rdsdebug("conn %p\n", conn); | ||
821 | |||
822 | /* | ||
823 | * If we get a temporary posting failure in this context then | ||
824 | * we're really low and we want the caller to back off for a bit. | ||
825 | */ | ||
826 | mutex_lock(&ic->i_recv_mutex); | ||
827 | if (rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0)) | ||
828 | ret = -ENOMEM; | ||
829 | else | ||
830 | rds_iw_stats_inc(s_iw_rx_refill_from_thread); | ||
831 | mutex_unlock(&ic->i_recv_mutex); | ||
832 | |||
833 | if (rds_conn_up(conn)) | ||
834 | rds_iw_attempt_ack(ic); | ||
835 | |||
836 | return ret; | ||
837 | } | ||
838 | |||
839 | int __init rds_iw_recv_init(void) | ||
840 | { | ||
841 | struct sysinfo si; | ||
842 | int ret = -ENOMEM; | ||
843 | |||
844 | /* Default to 30% of all available RAM for recv memory */ | ||
845 | si_meminfo(&si); | ||
846 | rds_iw_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE; | ||
847 | |||
848 | rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming", | ||
849 | sizeof(struct rds_iw_incoming), | ||
850 | 0, 0, NULL); | ||
851 | if (rds_iw_incoming_slab == NULL) | ||
852 | goto out; | ||
853 | |||
854 | rds_iw_frag_slab = kmem_cache_create("rds_iw_frag", | ||
855 | sizeof(struct rds_page_frag), | ||
856 | 0, 0, NULL); | ||
857 | if (rds_iw_frag_slab == NULL) | ||
858 | kmem_cache_destroy(rds_iw_incoming_slab); | ||
859 | else | ||
860 | ret = 0; | ||
861 | out: | ||
862 | return ret; | ||
863 | } | ||
864 | |||
865 | void rds_iw_recv_exit(void) | ||
866 | { | ||
867 | kmem_cache_destroy(rds_iw_incoming_slab); | ||
868 | kmem_cache_destroy(rds_iw_frag_slab); | ||
869 | } | ||
diff --git a/net/rds/iw_ring.c b/net/rds/iw_ring.c new file mode 100644 index 000000000000..d422d4b5deef --- /dev/null +++ b/net/rds/iw_ring.c | |||
@@ -0,0 +1,169 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | |||
35 | #include "rds.h" | ||
36 | #include "iw.h" | ||
37 | |||
38 | /* | ||
39 | * Locking for IB rings. | ||
40 | * We assume that allocation is always protected by a mutex | ||
41 | * in the caller (this is a valid assumption for the current | ||
42 | * implementation). | ||
43 | * | ||
44 | * Freeing always happens in an interrupt, and hence only | ||
45 | * races with allocations, but not with other free()s. | ||
46 | * | ||
47 | * The interaction between allocation and freeing is that | ||
48 | * the alloc code has to determine the number of free entries. | ||
49 | * To this end, we maintain two counters; an allocation counter | ||
50 | * and a free counter. Both are allowed to run freely, and wrap | ||
51 | * around. | ||
52 | * The number of used entries is always (alloc_ctr - free_ctr) % NR. | ||
53 | * | ||
54 | * The current implementation makes free_ctr atomic. When the | ||
55 | * caller finds an allocation fails, it should set an "alloc fail" | ||
56 | * bit and retry the allocation. The "alloc fail" bit essentially tells | ||
57 | * the CQ completion handlers to wake it up after freeing some | ||
58 | * more entries. | ||
59 | */ | ||
60 | |||
61 | /* | ||
62 | * This only happens on shutdown. | ||
63 | */ | ||
64 | DECLARE_WAIT_QUEUE_HEAD(rds_iw_ring_empty_wait); | ||
65 | |||
66 | void rds_iw_ring_init(struct rds_iw_work_ring *ring, u32 nr) | ||
67 | { | ||
68 | memset(ring, 0, sizeof(*ring)); | ||
69 | ring->w_nr = nr; | ||
70 | rdsdebug("ring %p nr %u\n", ring, ring->w_nr); | ||
71 | } | ||
72 | |||
73 | static inline u32 __rds_iw_ring_used(struct rds_iw_work_ring *ring) | ||
74 | { | ||
75 | u32 diff; | ||
76 | |||
77 | /* This assumes that atomic_t has at least as many bits as u32 */ | ||
78 | diff = ring->w_alloc_ctr - (u32) atomic_read(&ring->w_free_ctr); | ||
79 | BUG_ON(diff > ring->w_nr); | ||
80 | |||
81 | return diff; | ||
82 | } | ||
83 | |||
84 | void rds_iw_ring_resize(struct rds_iw_work_ring *ring, u32 nr) | ||
85 | { | ||
86 | /* We only ever get called from the connection setup code, | ||
87 | * prior to creating the QP. */ | ||
88 | BUG_ON(__rds_iw_ring_used(ring)); | ||
89 | ring->w_nr = nr; | ||
90 | } | ||
91 | |||
92 | static int __rds_iw_ring_empty(struct rds_iw_work_ring *ring) | ||
93 | { | ||
94 | return __rds_iw_ring_used(ring) == 0; | ||
95 | } | ||
96 | |||
97 | u32 rds_iw_ring_alloc(struct rds_iw_work_ring *ring, u32 val, u32 *pos) | ||
98 | { | ||
99 | u32 ret = 0, avail; | ||
100 | |||
101 | avail = ring->w_nr - __rds_iw_ring_used(ring); | ||
102 | |||
103 | rdsdebug("ring %p val %u next %u free %u\n", ring, val, | ||
104 | ring->w_alloc_ptr, avail); | ||
105 | |||
106 | if (val && avail) { | ||
107 | ret = min(val, avail); | ||
108 | *pos = ring->w_alloc_ptr; | ||
109 | |||
110 | ring->w_alloc_ptr = (ring->w_alloc_ptr + ret) % ring->w_nr; | ||
111 | ring->w_alloc_ctr += ret; | ||
112 | } | ||
113 | |||
114 | return ret; | ||
115 | } | ||
116 | |||
117 | void rds_iw_ring_free(struct rds_iw_work_ring *ring, u32 val) | ||
118 | { | ||
119 | ring->w_free_ptr = (ring->w_free_ptr + val) % ring->w_nr; | ||
120 | atomic_add(val, &ring->w_free_ctr); | ||
121 | |||
122 | if (__rds_iw_ring_empty(ring) && | ||
123 | waitqueue_active(&rds_iw_ring_empty_wait)) | ||
124 | wake_up(&rds_iw_ring_empty_wait); | ||
125 | } | ||
126 | |||
127 | void rds_iw_ring_unalloc(struct rds_iw_work_ring *ring, u32 val) | ||
128 | { | ||
129 | ring->w_alloc_ptr = (ring->w_alloc_ptr - val) % ring->w_nr; | ||
130 | ring->w_alloc_ctr -= val; | ||
131 | } | ||
132 | |||
133 | int rds_iw_ring_empty(struct rds_iw_work_ring *ring) | ||
134 | { | ||
135 | return __rds_iw_ring_empty(ring); | ||
136 | } | ||
137 | |||
138 | int rds_iw_ring_low(struct rds_iw_work_ring *ring) | ||
139 | { | ||
140 | return __rds_iw_ring_used(ring) <= (ring->w_nr >> 2); | ||
141 | } | ||
142 | |||
143 | |||
144 | /* | ||
145 | * returns the oldest alloced ring entry. This will be the next one | ||
146 | * freed. This can't be called if there are none allocated. | ||
147 | */ | ||
148 | u32 rds_iw_ring_oldest(struct rds_iw_work_ring *ring) | ||
149 | { | ||
150 | return ring->w_free_ptr; | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * returns the number of completed work requests. | ||
155 | */ | ||
156 | |||
157 | u32 rds_iw_ring_completed(struct rds_iw_work_ring *ring, u32 wr_id, u32 oldest) | ||
158 | { | ||
159 | u32 ret; | ||
160 | |||
161 | if (oldest <= (unsigned long long)wr_id) | ||
162 | ret = (unsigned long long)wr_id - oldest + 1; | ||
163 | else | ||
164 | ret = ring->w_nr - oldest + (unsigned long long)wr_id + 1; | ||
165 | |||
166 | rdsdebug("ring %p ret %u wr_id %u oldest %u\n", ring, ret, | ||
167 | wr_id, oldest); | ||
168 | return ret; | ||
169 | } | ||
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c new file mode 100644 index 000000000000..22dd38ffd608 --- /dev/null +++ b/net/rds/iw_send.c | |||
@@ -0,0 +1,975 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/in.h> | ||
35 | #include <linux/device.h> | ||
36 | #include <linux/dmapool.h> | ||
37 | |||
38 | #include "rds.h" | ||
39 | #include "rdma.h" | ||
40 | #include "iw.h" | ||
41 | |||
42 | static void rds_iw_send_rdma_complete(struct rds_message *rm, | ||
43 | int wc_status) | ||
44 | { | ||
45 | int notify_status; | ||
46 | |||
47 | switch (wc_status) { | ||
48 | case IB_WC_WR_FLUSH_ERR: | ||
49 | return; | ||
50 | |||
51 | case IB_WC_SUCCESS: | ||
52 | notify_status = RDS_RDMA_SUCCESS; | ||
53 | break; | ||
54 | |||
55 | case IB_WC_REM_ACCESS_ERR: | ||
56 | notify_status = RDS_RDMA_REMOTE_ERROR; | ||
57 | break; | ||
58 | |||
59 | default: | ||
60 | notify_status = RDS_RDMA_OTHER_ERROR; | ||
61 | break; | ||
62 | } | ||
63 | rds_rdma_send_complete(rm, notify_status); | ||
64 | } | ||
65 | |||
66 | static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic, | ||
67 | struct rds_rdma_op *op) | ||
68 | { | ||
69 | if (op->r_mapped) { | ||
70 | ib_dma_unmap_sg(ic->i_cm_id->device, | ||
71 | op->r_sg, op->r_nents, | ||
72 | op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | ||
73 | op->r_mapped = 0; | ||
74 | } | ||
75 | } | ||
76 | |||
77 | static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic, | ||
78 | struct rds_iw_send_work *send, | ||
79 | int wc_status) | ||
80 | { | ||
81 | struct rds_message *rm = send->s_rm; | ||
82 | |||
83 | rdsdebug("ic %p send %p rm %p\n", ic, send, rm); | ||
84 | |||
85 | ib_dma_unmap_sg(ic->i_cm_id->device, | ||
86 | rm->m_sg, rm->m_nents, | ||
87 | DMA_TO_DEVICE); | ||
88 | |||
89 | if (rm->m_rdma_op != NULL) { | ||
90 | rds_iw_send_unmap_rdma(ic, rm->m_rdma_op); | ||
91 | |||
92 | /* If the user asked for a completion notification on this | ||
93 | * message, we can implement three different semantics: | ||
94 | * 1. Notify when we received the ACK on the RDS message | ||
95 | * that was queued with the RDMA. This provides reliable | ||
96 | * notification of RDMA status at the expense of a one-way | ||
97 | * packet delay. | ||
98 | * 2. Notify when the IB stack gives us the completion event for | ||
99 | * the RDMA operation. | ||
100 | * 3. Notify when the IB stack gives us the completion event for | ||
101 | * the accompanying RDS messages. | ||
102 | * Here, we implement approach #3. To implement approach #2, | ||
103 | * call rds_rdma_send_complete from the cq_handler. To implement #1, | ||
104 | * don't call rds_rdma_send_complete at all, and fall back to the notify | ||
105 | * handling in the ACK processing code. | ||
106 | * | ||
107 | * Note: There's no need to explicitly sync any RDMA buffers using | ||
108 | * ib_dma_sync_sg_for_cpu - the completion for the RDMA | ||
109 | * operation itself unmapped the RDMA buffers, which takes care | ||
110 | * of synching. | ||
111 | */ | ||
112 | rds_iw_send_rdma_complete(rm, wc_status); | ||
113 | |||
114 | if (rm->m_rdma_op->r_write) | ||
115 | rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); | ||
116 | else | ||
117 | rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); | ||
118 | } | ||
119 | |||
120 | /* If anyone waited for this message to get flushed out, wake | ||
121 | * them up now */ | ||
122 | rds_message_unmapped(rm); | ||
123 | |||
124 | rds_message_put(rm); | ||
125 | send->s_rm = NULL; | ||
126 | } | ||
127 | |||
128 | void rds_iw_send_init_ring(struct rds_iw_connection *ic) | ||
129 | { | ||
130 | struct rds_iw_send_work *send; | ||
131 | u32 i; | ||
132 | |||
133 | for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { | ||
134 | struct ib_sge *sge; | ||
135 | |||
136 | send->s_rm = NULL; | ||
137 | send->s_op = NULL; | ||
138 | send->s_mapping = NULL; | ||
139 | |||
140 | send->s_wr.next = NULL; | ||
141 | send->s_wr.wr_id = i; | ||
142 | send->s_wr.sg_list = send->s_sge; | ||
143 | send->s_wr.num_sge = 1; | ||
144 | send->s_wr.opcode = IB_WR_SEND; | ||
145 | send->s_wr.send_flags = 0; | ||
146 | send->s_wr.ex.imm_data = 0; | ||
147 | |||
148 | sge = rds_iw_data_sge(ic, send->s_sge); | ||
149 | sge->lkey = 0; | ||
150 | |||
151 | sge = rds_iw_header_sge(ic, send->s_sge); | ||
152 | sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); | ||
153 | sge->length = sizeof(struct rds_header); | ||
154 | sge->lkey = 0; | ||
155 | |||
156 | send->s_mr = ib_alloc_fast_reg_mr(ic->i_pd, fastreg_message_size); | ||
157 | if (IS_ERR(send->s_mr)) { | ||
158 | printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed\n"); | ||
159 | break; | ||
160 | } | ||
161 | |||
162 | send->s_page_list = ib_alloc_fast_reg_page_list( | ||
163 | ic->i_cm_id->device, fastreg_message_size); | ||
164 | if (IS_ERR(send->s_page_list)) { | ||
165 | printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n"); | ||
166 | break; | ||
167 | } | ||
168 | } | ||
169 | } | ||
170 | |||
171 | void rds_iw_send_clear_ring(struct rds_iw_connection *ic) | ||
172 | { | ||
173 | struct rds_iw_send_work *send; | ||
174 | u32 i; | ||
175 | |||
176 | for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { | ||
177 | BUG_ON(!send->s_mr); | ||
178 | ib_dereg_mr(send->s_mr); | ||
179 | BUG_ON(!send->s_page_list); | ||
180 | ib_free_fast_reg_page_list(send->s_page_list); | ||
181 | if (send->s_wr.opcode == 0xdead) | ||
182 | continue; | ||
183 | if (send->s_rm) | ||
184 | rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); | ||
185 | if (send->s_op) | ||
186 | rds_iw_send_unmap_rdma(ic, send->s_op); | ||
187 | } | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * The _oldest/_free ring operations here race cleanly with the alloc/unalloc | ||
192 | * operations performed in the send path. As the sender allocs and potentially | ||
193 | * unallocs the next free entry in the ring it doesn't alter which is | ||
194 | * the next to be freed, which is what this is concerned with. | ||
195 | */ | ||
196 | void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) | ||
197 | { | ||
198 | struct rds_connection *conn = context; | ||
199 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
200 | struct ib_wc wc; | ||
201 | struct rds_iw_send_work *send; | ||
202 | u32 completed; | ||
203 | u32 oldest; | ||
204 | u32 i; | ||
205 | int ret; | ||
206 | |||
207 | rdsdebug("cq %p conn %p\n", cq, conn); | ||
208 | rds_iw_stats_inc(s_iw_tx_cq_call); | ||
209 | ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); | ||
210 | if (ret) | ||
211 | rdsdebug("ib_req_notify_cq send failed: %d\n", ret); | ||
212 | |||
213 | while (ib_poll_cq(cq, 1, &wc) > 0) { | ||
214 | rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", | ||
215 | (unsigned long long)wc.wr_id, wc.status, wc.byte_len, | ||
216 | be32_to_cpu(wc.ex.imm_data)); | ||
217 | rds_iw_stats_inc(s_iw_tx_cq_event); | ||
218 | |||
219 | if (wc.status != IB_WC_SUCCESS) { | ||
220 | printk(KERN_ERR "WC Error: status = %d opcode = %d\n", wc.status, wc.opcode); | ||
221 | break; | ||
222 | } | ||
223 | |||
224 | if (wc.opcode == IB_WC_LOCAL_INV && wc.wr_id == RDS_IW_LOCAL_INV_WR_ID) { | ||
225 | ic->i_fastreg_posted = 0; | ||
226 | continue; | ||
227 | } | ||
228 | |||
229 | if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) { | ||
230 | ic->i_fastreg_posted = 1; | ||
231 | continue; | ||
232 | } | ||
233 | |||
234 | if (wc.wr_id == RDS_IW_ACK_WR_ID) { | ||
235 | if (ic->i_ack_queued + HZ/2 < jiffies) | ||
236 | rds_iw_stats_inc(s_iw_tx_stalled); | ||
237 | rds_iw_ack_send_complete(ic); | ||
238 | continue; | ||
239 | } | ||
240 | |||
241 | oldest = rds_iw_ring_oldest(&ic->i_send_ring); | ||
242 | |||
243 | completed = rds_iw_ring_completed(&ic->i_send_ring, wc.wr_id, oldest); | ||
244 | |||
245 | for (i = 0; i < completed; i++) { | ||
246 | send = &ic->i_sends[oldest]; | ||
247 | |||
248 | /* In the error case, wc.opcode sometimes contains garbage */ | ||
249 | switch (send->s_wr.opcode) { | ||
250 | case IB_WR_SEND: | ||
251 | if (send->s_rm) | ||
252 | rds_iw_send_unmap_rm(ic, send, wc.status); | ||
253 | break; | ||
254 | case IB_WR_FAST_REG_MR: | ||
255 | case IB_WR_RDMA_WRITE: | ||
256 | case IB_WR_RDMA_READ: | ||
257 | case IB_WR_RDMA_READ_WITH_INV: | ||
258 | /* Nothing to be done - the SG list will be unmapped | ||
259 | * when the SEND completes. */ | ||
260 | break; | ||
261 | default: | ||
262 | if (printk_ratelimit()) | ||
263 | printk(KERN_NOTICE | ||
264 | "RDS/IW: %s: unexpected opcode 0x%x in WR!\n", | ||
265 | __func__, send->s_wr.opcode); | ||
266 | break; | ||
267 | } | ||
268 | |||
269 | send->s_wr.opcode = 0xdead; | ||
270 | send->s_wr.num_sge = 1; | ||
271 | if (send->s_queued + HZ/2 < jiffies) | ||
272 | rds_iw_stats_inc(s_iw_tx_stalled); | ||
273 | |||
274 | /* If a RDMA operation produced an error, signal this right | ||
275 | * away. If we don't, the subsequent SEND that goes with this | ||
276 | * RDMA will be canceled with ERR_WFLUSH, and the application | ||
277 | * never learn that the RDMA failed. */ | ||
278 | if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) { | ||
279 | struct rds_message *rm; | ||
280 | |||
281 | rm = rds_send_get_message(conn, send->s_op); | ||
282 | if (rm) | ||
283 | rds_iw_send_rdma_complete(rm, wc.status); | ||
284 | } | ||
285 | |||
286 | oldest = (oldest + 1) % ic->i_send_ring.w_nr; | ||
287 | } | ||
288 | |||
289 | rds_iw_ring_free(&ic->i_send_ring, completed); | ||
290 | |||
291 | if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) | ||
292 | || test_bit(0, &conn->c_map_queued)) | ||
293 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); | ||
294 | |||
295 | /* We expect errors as the qp is drained during shutdown */ | ||
296 | if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) { | ||
297 | rds_iw_conn_error(conn, | ||
298 | "send completion on %pI4 " | ||
299 | "had status %u, disconnecting and reconnecting\n", | ||
300 | &conn->c_faddr, wc.status); | ||
301 | } | ||
302 | } | ||
303 | } | ||
304 | |||
305 | /* | ||
306 | * This is the main function for allocating credits when sending | ||
307 | * messages. | ||
308 | * | ||
309 | * Conceptually, we have two counters: | ||
310 | * - send credits: this tells us how many WRs we're allowed | ||
311 | * to submit without overruning the reciever's queue. For | ||
312 | * each SEND WR we post, we decrement this by one. | ||
313 | * | ||
314 | * - posted credits: this tells us how many WRs we recently | ||
315 | * posted to the receive queue. This value is transferred | ||
316 | * to the peer as a "credit update" in a RDS header field. | ||
317 | * Every time we transmit credits to the peer, we subtract | ||
318 | * the amount of transferred credits from this counter. | ||
319 | * | ||
320 | * It is essential that we avoid situations where both sides have | ||
321 | * exhausted their send credits, and are unable to send new credits | ||
322 | * to the peer. We achieve this by requiring that we send at least | ||
323 | * one credit update to the peer before exhausting our credits. | ||
324 | * When new credits arrive, we subtract one credit that is withheld | ||
325 | * until we've posted new buffers and are ready to transmit these | ||
326 | * credits (see rds_iw_send_add_credits below). | ||
327 | * | ||
328 | * The RDS send code is essentially single-threaded; rds_send_xmit | ||
329 | * grabs c_send_lock to ensure exclusive access to the send ring. | ||
330 | * However, the ACK sending code is independent and can race with | ||
331 | * message SENDs. | ||
332 | * | ||
333 | * In the send path, we need to update the counters for send credits | ||
334 | * and the counter of posted buffers atomically - when we use the | ||
335 | * last available credit, we cannot allow another thread to race us | ||
336 | * and grab the posted credits counter. Hence, we have to use a | ||
337 | * spinlock to protect the credit counter, or use atomics. | ||
338 | * | ||
339 | * Spinlocks shared between the send and the receive path are bad, | ||
340 | * because they create unnecessary delays. An early implementation | ||
341 | * using a spinlock showed a 5% degradation in throughput at some | ||
342 | * loads. | ||
343 | * | ||
344 | * This implementation avoids spinlocks completely, putting both | ||
345 | * counters into a single atomic, and updating that atomic using | ||
346 | * atomic_add (in the receive path, when receiving fresh credits), | ||
347 | * and using atomic_cmpxchg when updating the two counters. | ||
348 | */ | ||
349 | int rds_iw_send_grab_credits(struct rds_iw_connection *ic, | ||
350 | u32 wanted, u32 *adv_credits, int need_posted) | ||
351 | { | ||
352 | unsigned int avail, posted, got = 0, advertise; | ||
353 | long oldval, newval; | ||
354 | |||
355 | *adv_credits = 0; | ||
356 | if (!ic->i_flowctl) | ||
357 | return wanted; | ||
358 | |||
359 | try_again: | ||
360 | advertise = 0; | ||
361 | oldval = newval = atomic_read(&ic->i_credits); | ||
362 | posted = IB_GET_POST_CREDITS(oldval); | ||
363 | avail = IB_GET_SEND_CREDITS(oldval); | ||
364 | |||
365 | rdsdebug("rds_iw_send_grab_credits(%u): credits=%u posted=%u\n", | ||
366 | wanted, avail, posted); | ||
367 | |||
368 | /* The last credit must be used to send a credit update. */ | ||
369 | if (avail && !posted) | ||
370 | avail--; | ||
371 | |||
372 | if (avail < wanted) { | ||
373 | struct rds_connection *conn = ic->i_cm_id->context; | ||
374 | |||
375 | /* Oops, there aren't that many credits left! */ | ||
376 | set_bit(RDS_LL_SEND_FULL, &conn->c_flags); | ||
377 | got = avail; | ||
378 | } else { | ||
379 | /* Sometimes you get what you want, lalala. */ | ||
380 | got = wanted; | ||
381 | } | ||
382 | newval -= IB_SET_SEND_CREDITS(got); | ||
383 | |||
384 | /* | ||
385 | * If need_posted is non-zero, then the caller wants | ||
386 | * the posted regardless of whether any send credits are | ||
387 | * available. | ||
388 | */ | ||
389 | if (posted && (got || need_posted)) { | ||
390 | advertise = min_t(unsigned int, posted, RDS_MAX_ADV_CREDIT); | ||
391 | newval -= IB_SET_POST_CREDITS(advertise); | ||
392 | } | ||
393 | |||
394 | /* Finally bill everything */ | ||
395 | if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval) | ||
396 | goto try_again; | ||
397 | |||
398 | *adv_credits = advertise; | ||
399 | return got; | ||
400 | } | ||
401 | |||
402 | void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits) | ||
403 | { | ||
404 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
405 | |||
406 | if (credits == 0) | ||
407 | return; | ||
408 | |||
409 | rdsdebug("rds_iw_send_add_credits(%u): current=%u%s\n", | ||
410 | credits, | ||
411 | IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)), | ||
412 | test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : ""); | ||
413 | |||
414 | atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits); | ||
415 | if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags)) | ||
416 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); | ||
417 | |||
418 | WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384); | ||
419 | |||
420 | rds_iw_stats_inc(s_iw_rx_credit_updates); | ||
421 | } | ||
422 | |||
423 | void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted) | ||
424 | { | ||
425 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
426 | |||
427 | if (posted == 0) | ||
428 | return; | ||
429 | |||
430 | atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits); | ||
431 | |||
432 | /* Decide whether to send an update to the peer now. | ||
433 | * If we would send a credit update for every single buffer we | ||
434 | * post, we would end up with an ACK storm (ACK arrives, | ||
435 | * consumes buffer, we refill the ring, send ACK to remote | ||
436 | * advertising the newly posted buffer... ad inf) | ||
437 | * | ||
438 | * Performance pretty much depends on how often we send | ||
439 | * credit updates - too frequent updates mean lots of ACKs. | ||
440 | * Too infrequent updates, and the peer will run out of | ||
441 | * credits and has to throttle. | ||
442 | * For the time being, 16 seems to be a good compromise. | ||
443 | */ | ||
444 | if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16) | ||
445 | set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); | ||
446 | } | ||
447 | |||
448 | static inline void | ||
449 | rds_iw_xmit_populate_wr(struct rds_iw_connection *ic, | ||
450 | struct rds_iw_send_work *send, unsigned int pos, | ||
451 | unsigned long buffer, unsigned int length, | ||
452 | int send_flags) | ||
453 | { | ||
454 | struct ib_sge *sge; | ||
455 | |||
456 | WARN_ON(pos != send - ic->i_sends); | ||
457 | |||
458 | send->s_wr.send_flags = send_flags; | ||
459 | send->s_wr.opcode = IB_WR_SEND; | ||
460 | send->s_wr.num_sge = 2; | ||
461 | send->s_wr.next = NULL; | ||
462 | send->s_queued = jiffies; | ||
463 | send->s_op = NULL; | ||
464 | |||
465 | if (length != 0) { | ||
466 | sge = rds_iw_data_sge(ic, send->s_sge); | ||
467 | sge->addr = buffer; | ||
468 | sge->length = length; | ||
469 | sge->lkey = rds_iw_local_dma_lkey(ic); | ||
470 | |||
471 | sge = rds_iw_header_sge(ic, send->s_sge); | ||
472 | } else { | ||
473 | /* We're sending a packet with no payload. There is only | ||
474 | * one SGE */ | ||
475 | send->s_wr.num_sge = 1; | ||
476 | sge = &send->s_sge[0]; | ||
477 | } | ||
478 | |||
479 | sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header)); | ||
480 | sge->length = sizeof(struct rds_header); | ||
481 | sge->lkey = rds_iw_local_dma_lkey(ic); | ||
482 | } | ||
483 | |||
484 | /* | ||
485 | * This can be called multiple times for a given message. The first time | ||
486 | * we see a message we map its scatterlist into the IB device so that | ||
487 | * we can provide that mapped address to the IB scatter gather entries | ||
488 | * in the IB work requests. We translate the scatterlist into a series | ||
489 | * of work requests that fragment the message. These work requests complete | ||
490 | * in order so we pass ownership of the message to the completion handler | ||
491 | * once we send the final fragment. | ||
492 | * | ||
493 | * The RDS core uses the c_send_lock to only enter this function once | ||
494 | * per connection. This makes sure that the tx ring alloc/unalloc pairs | ||
495 | * don't get out of sync and confuse the ring. | ||
496 | */ | ||
497 | int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, | ||
498 | unsigned int hdr_off, unsigned int sg, unsigned int off) | ||
499 | { | ||
500 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
501 | struct ib_device *dev = ic->i_cm_id->device; | ||
502 | struct rds_iw_send_work *send = NULL; | ||
503 | struct rds_iw_send_work *first; | ||
504 | struct rds_iw_send_work *prev; | ||
505 | struct ib_send_wr *failed_wr; | ||
506 | struct scatterlist *scat; | ||
507 | u32 pos; | ||
508 | u32 i; | ||
509 | u32 work_alloc; | ||
510 | u32 credit_alloc; | ||
511 | u32 posted; | ||
512 | u32 adv_credits = 0; | ||
513 | int send_flags = 0; | ||
514 | int sent; | ||
515 | int ret; | ||
516 | int flow_controlled = 0; | ||
517 | |||
518 | BUG_ON(off % RDS_FRAG_SIZE); | ||
519 | BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); | ||
520 | |||
521 | /* Fastreg support */ | ||
522 | if (rds_rdma_cookie_key(rm->m_rdma_cookie) | ||
523 | && !ic->i_fastreg_posted) { | ||
524 | ret = -EAGAIN; | ||
525 | goto out; | ||
526 | } | ||
527 | |||
528 | /* FIXME we may overallocate here */ | ||
529 | if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) | ||
530 | i = 1; | ||
531 | else | ||
532 | i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE); | ||
533 | |||
534 | work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos); | ||
535 | if (work_alloc == 0) { | ||
536 | set_bit(RDS_LL_SEND_FULL, &conn->c_flags); | ||
537 | rds_iw_stats_inc(s_iw_tx_ring_full); | ||
538 | ret = -ENOMEM; | ||
539 | goto out; | ||
540 | } | ||
541 | |||
542 | credit_alloc = work_alloc; | ||
543 | if (ic->i_flowctl) { | ||
544 | credit_alloc = rds_iw_send_grab_credits(ic, work_alloc, &posted, 0); | ||
545 | adv_credits += posted; | ||
546 | if (credit_alloc < work_alloc) { | ||
547 | rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc); | ||
548 | work_alloc = credit_alloc; | ||
549 | flow_controlled++; | ||
550 | } | ||
551 | if (work_alloc == 0) { | ||
552 | rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
553 | rds_iw_stats_inc(s_iw_tx_throttle); | ||
554 | ret = -ENOMEM; | ||
555 | goto out; | ||
556 | } | ||
557 | } | ||
558 | |||
559 | /* map the message the first time we see it */ | ||
560 | if (ic->i_rm == NULL) { | ||
561 | /* | ||
562 | printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n", | ||
563 | be16_to_cpu(rm->m_inc.i_hdr.h_dport), | ||
564 | rm->m_inc.i_hdr.h_flags, | ||
565 | be32_to_cpu(rm->m_inc.i_hdr.h_len)); | ||
566 | */ | ||
567 | if (rm->m_nents) { | ||
568 | rm->m_count = ib_dma_map_sg(dev, | ||
569 | rm->m_sg, rm->m_nents, DMA_TO_DEVICE); | ||
570 | rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count); | ||
571 | if (rm->m_count == 0) { | ||
572 | rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); | ||
573 | rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
574 | ret = -ENOMEM; /* XXX ? */ | ||
575 | goto out; | ||
576 | } | ||
577 | } else { | ||
578 | rm->m_count = 0; | ||
579 | } | ||
580 | |||
581 | ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; | ||
582 | ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes; | ||
583 | rds_message_addref(rm); | ||
584 | ic->i_rm = rm; | ||
585 | |||
586 | /* Finalize the header */ | ||
587 | if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags)) | ||
588 | rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED; | ||
589 | if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) | ||
590 | rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED; | ||
591 | |||
592 | /* If it has a RDMA op, tell the peer we did it. This is | ||
593 | * used by the peer to release use-once RDMA MRs. */ | ||
594 | if (rm->m_rdma_op) { | ||
595 | struct rds_ext_header_rdma ext_hdr; | ||
596 | |||
597 | ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); | ||
598 | rds_message_add_extension(&rm->m_inc.i_hdr, | ||
599 | RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); | ||
600 | } | ||
601 | if (rm->m_rdma_cookie) { | ||
602 | rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr, | ||
603 | rds_rdma_cookie_key(rm->m_rdma_cookie), | ||
604 | rds_rdma_cookie_offset(rm->m_rdma_cookie)); | ||
605 | } | ||
606 | |||
607 | /* Note - rds_iw_piggyb_ack clears the ACK_REQUIRED bit, so | ||
608 | * we should not do this unless we have a chance of at least | ||
609 | * sticking the header into the send ring. Which is why we | ||
610 | * should call rds_iw_ring_alloc first. */ | ||
611 | rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_iw_piggyb_ack(ic)); | ||
612 | rds_message_make_checksum(&rm->m_inc.i_hdr); | ||
613 | |||
614 | /* | ||
615 | * Update adv_credits since we reset the ACK_REQUIRED bit. | ||
616 | */ | ||
617 | rds_iw_send_grab_credits(ic, 0, &posted, 1); | ||
618 | adv_credits += posted; | ||
619 | BUG_ON(adv_credits > 255); | ||
620 | } else if (ic->i_rm != rm) | ||
621 | BUG(); | ||
622 | |||
623 | send = &ic->i_sends[pos]; | ||
624 | first = send; | ||
625 | prev = NULL; | ||
626 | scat = &rm->m_sg[sg]; | ||
627 | sent = 0; | ||
628 | i = 0; | ||
629 | |||
630 | /* Sometimes you want to put a fence between an RDMA | ||
631 | * READ and the following SEND. | ||
632 | * We could either do this all the time | ||
633 | * or when requested by the user. Right now, we let | ||
634 | * the application choose. | ||
635 | */ | ||
636 | if (rm->m_rdma_op && rm->m_rdma_op->r_fence) | ||
637 | send_flags = IB_SEND_FENCE; | ||
638 | |||
639 | /* | ||
640 | * We could be copying the header into the unused tail of the page. | ||
641 | * That would need to be changed in the future when those pages might | ||
642 | * be mapped userspace pages or page cache pages. So instead we always | ||
643 | * use a second sge and our long-lived ring of mapped headers. We send | ||
644 | * the header after the data so that the data payload can be aligned on | ||
645 | * the receiver. | ||
646 | */ | ||
647 | |||
648 | /* handle a 0-len message */ | ||
649 | if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) { | ||
650 | rds_iw_xmit_populate_wr(ic, send, pos, 0, 0, send_flags); | ||
651 | goto add_header; | ||
652 | } | ||
653 | |||
654 | /* if there's data reference it with a chain of work reqs */ | ||
655 | for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { | ||
656 | unsigned int len; | ||
657 | |||
658 | send = &ic->i_sends[pos]; | ||
659 | |||
660 | len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); | ||
661 | rds_iw_xmit_populate_wr(ic, send, pos, | ||
662 | ib_sg_dma_address(dev, scat) + off, len, | ||
663 | send_flags); | ||
664 | |||
665 | /* | ||
666 | * We want to delay signaling completions just enough to get | ||
667 | * the batching benefits but not so much that we create dead time | ||
668 | * on the wire. | ||
669 | */ | ||
670 | if (ic->i_unsignaled_wrs-- == 0) { | ||
671 | ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; | ||
672 | send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; | ||
673 | } | ||
674 | |||
675 | ic->i_unsignaled_bytes -= len; | ||
676 | if (ic->i_unsignaled_bytes <= 0) { | ||
677 | ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes; | ||
678 | send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; | ||
679 | } | ||
680 | |||
681 | /* | ||
682 | * Always signal the last one if we're stopping due to flow control. | ||
683 | */ | ||
684 | if (flow_controlled && i == (work_alloc-1)) | ||
685 | send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; | ||
686 | |||
687 | rdsdebug("send %p wr %p num_sge %u next %p\n", send, | ||
688 | &send->s_wr, send->s_wr.num_sge, send->s_wr.next); | ||
689 | |||
690 | sent += len; | ||
691 | off += len; | ||
692 | if (off == ib_sg_dma_len(dev, scat)) { | ||
693 | scat++; | ||
694 | off = 0; | ||
695 | } | ||
696 | |||
697 | add_header: | ||
698 | /* Tack on the header after the data. The header SGE should already | ||
699 | * have been set up to point to the right header buffer. */ | ||
700 | memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); | ||
701 | |||
702 | if (0) { | ||
703 | struct rds_header *hdr = &ic->i_send_hdrs[pos]; | ||
704 | |||
705 | printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n", | ||
706 | be16_to_cpu(hdr->h_dport), | ||
707 | hdr->h_flags, | ||
708 | be32_to_cpu(hdr->h_len)); | ||
709 | } | ||
710 | if (adv_credits) { | ||
711 | struct rds_header *hdr = &ic->i_send_hdrs[pos]; | ||
712 | |||
713 | /* add credit and redo the header checksum */ | ||
714 | hdr->h_credit = adv_credits; | ||
715 | rds_message_make_checksum(hdr); | ||
716 | adv_credits = 0; | ||
717 | rds_iw_stats_inc(s_iw_tx_credit_updates); | ||
718 | } | ||
719 | |||
720 | if (prev) | ||
721 | prev->s_wr.next = &send->s_wr; | ||
722 | prev = send; | ||
723 | |||
724 | pos = (pos + 1) % ic->i_send_ring.w_nr; | ||
725 | } | ||
726 | |||
727 | /* Account the RDS header in the number of bytes we sent, but just once. | ||
728 | * The caller has no concept of fragmentation. */ | ||
729 | if (hdr_off == 0) | ||
730 | sent += sizeof(struct rds_header); | ||
731 | |||
732 | /* if we finished the message then send completion owns it */ | ||
733 | if (scat == &rm->m_sg[rm->m_count]) { | ||
734 | prev->s_rm = ic->i_rm; | ||
735 | prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; | ||
736 | ic->i_rm = NULL; | ||
737 | } | ||
738 | |||
739 | if (i < work_alloc) { | ||
740 | rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i); | ||
741 | work_alloc = i; | ||
742 | } | ||
743 | if (ic->i_flowctl && i < credit_alloc) | ||
744 | rds_iw_send_add_credits(conn, credit_alloc - i); | ||
745 | |||
746 | /* XXX need to worry about failed_wr and partial sends. */ | ||
747 | failed_wr = &first->s_wr; | ||
748 | ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); | ||
749 | rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, | ||
750 | first, &first->s_wr, ret, failed_wr); | ||
751 | BUG_ON(failed_wr != &first->s_wr); | ||
752 | if (ret) { | ||
753 | printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 " | ||
754 | "returned %d\n", &conn->c_faddr, ret); | ||
755 | rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
756 | if (prev->s_rm) { | ||
757 | ic->i_rm = prev->s_rm; | ||
758 | prev->s_rm = NULL; | ||
759 | } | ||
760 | goto out; | ||
761 | } | ||
762 | |||
763 | ret = sent; | ||
764 | out: | ||
765 | BUG_ON(adv_credits); | ||
766 | return ret; | ||
767 | } | ||
768 | |||
769 | static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr) | ||
770 | { | ||
771 | BUG_ON(nent > send->s_page_list->max_page_list_len); | ||
772 | /* | ||
773 | * Perform a WR for the fast_reg_mr. Each individual page | ||
774 | * in the sg list is added to the fast reg page list and placed | ||
775 | * inside the fast_reg_mr WR. | ||
776 | */ | ||
777 | send->s_wr.opcode = IB_WR_FAST_REG_MR; | ||
778 | send->s_wr.wr.fast_reg.length = len; | ||
779 | send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey; | ||
780 | send->s_wr.wr.fast_reg.page_list = send->s_page_list; | ||
781 | send->s_wr.wr.fast_reg.page_list_len = nent; | ||
782 | send->s_wr.wr.fast_reg.page_shift = rds_iwdev->page_shift; | ||
783 | send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE; | ||
784 | send->s_wr.wr.fast_reg.iova_start = sg_addr; | ||
785 | |||
786 | ib_update_fast_reg_key(send->s_mr, send->s_remap_count++); | ||
787 | } | ||
788 | |||
789 | int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) | ||
790 | { | ||
791 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
792 | struct rds_iw_send_work *send = NULL; | ||
793 | struct rds_iw_send_work *first; | ||
794 | struct rds_iw_send_work *prev; | ||
795 | struct ib_send_wr *failed_wr; | ||
796 | struct rds_iw_device *rds_iwdev; | ||
797 | struct scatterlist *scat; | ||
798 | unsigned long len; | ||
799 | u64 remote_addr = op->r_remote_addr; | ||
800 | u32 pos, fr_pos; | ||
801 | u32 work_alloc; | ||
802 | u32 i; | ||
803 | u32 j; | ||
804 | int sent; | ||
805 | int ret; | ||
806 | int num_sge; | ||
807 | |||
808 | rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); | ||
809 | |||
810 | /* map the message the first time we see it */ | ||
811 | if (!op->r_mapped) { | ||
812 | op->r_count = ib_dma_map_sg(ic->i_cm_id->device, | ||
813 | op->r_sg, op->r_nents, (op->r_write) ? | ||
814 | DMA_TO_DEVICE : DMA_FROM_DEVICE); | ||
815 | rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); | ||
816 | if (op->r_count == 0) { | ||
817 | rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); | ||
818 | ret = -ENOMEM; /* XXX ? */ | ||
819 | goto out; | ||
820 | } | ||
821 | |||
822 | op->r_mapped = 1; | ||
823 | } | ||
824 | |||
825 | if (!op->r_write) { | ||
826 | /* Alloc space on the send queue for the fastreg */ | ||
827 | work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos); | ||
828 | if (work_alloc != 1) { | ||
829 | rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
830 | rds_iw_stats_inc(s_iw_tx_ring_full); | ||
831 | ret = -ENOMEM; | ||
832 | goto out; | ||
833 | } | ||
834 | } | ||
835 | |||
836 | /* | ||
837 | * Instead of knowing how to return a partial rdma read/write we insist that there | ||
838 | * be enough work requests to send the entire message. | ||
839 | */ | ||
840 | i = ceil(op->r_count, rds_iwdev->max_sge); | ||
841 | |||
842 | work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos); | ||
843 | if (work_alloc != i) { | ||
844 | rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
845 | rds_iw_stats_inc(s_iw_tx_ring_full); | ||
846 | ret = -ENOMEM; | ||
847 | goto out; | ||
848 | } | ||
849 | |||
850 | send = &ic->i_sends[pos]; | ||
851 | if (!op->r_write) { | ||
852 | first = prev = &ic->i_sends[fr_pos]; | ||
853 | } else { | ||
854 | first = send; | ||
855 | prev = NULL; | ||
856 | } | ||
857 | scat = &op->r_sg[0]; | ||
858 | sent = 0; | ||
859 | num_sge = op->r_count; | ||
860 | |||
861 | for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { | ||
862 | send->s_wr.send_flags = 0; | ||
863 | send->s_queued = jiffies; | ||
864 | |||
865 | /* | ||
866 | * We want to delay signaling completions just enough to get | ||
867 | * the batching benefits but not so much that we create dead time on the wire. | ||
868 | */ | ||
869 | if (ic->i_unsignaled_wrs-- == 0) { | ||
870 | ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; | ||
871 | send->s_wr.send_flags = IB_SEND_SIGNALED; | ||
872 | } | ||
873 | |||
874 | /* To avoid the need to have the plumbing to invalidate the fastreg_mr used | ||
875 | * for local access after RDS is finished with it, using | ||
876 | * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed. | ||
877 | */ | ||
878 | if (op->r_write) | ||
879 | send->s_wr.opcode = IB_WR_RDMA_WRITE; | ||
880 | else | ||
881 | send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV; | ||
882 | |||
883 | send->s_wr.wr.rdma.remote_addr = remote_addr; | ||
884 | send->s_wr.wr.rdma.rkey = op->r_key; | ||
885 | send->s_op = op; | ||
886 | |||
887 | if (num_sge > rds_iwdev->max_sge) { | ||
888 | send->s_wr.num_sge = rds_iwdev->max_sge; | ||
889 | num_sge -= rds_iwdev->max_sge; | ||
890 | } else | ||
891 | send->s_wr.num_sge = num_sge; | ||
892 | |||
893 | send->s_wr.next = NULL; | ||
894 | |||
895 | if (prev) | ||
896 | prev->s_wr.next = &send->s_wr; | ||
897 | |||
898 | for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { | ||
899 | len = ib_sg_dma_len(ic->i_cm_id->device, scat); | ||
900 | |||
901 | if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) | ||
902 | send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat); | ||
903 | else { | ||
904 | send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat); | ||
905 | send->s_sge[j].length = len; | ||
906 | send->s_sge[j].lkey = rds_iw_local_dma_lkey(ic); | ||
907 | } | ||
908 | |||
909 | sent += len; | ||
910 | rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr); | ||
911 | remote_addr += len; | ||
912 | |||
913 | scat++; | ||
914 | } | ||
915 | |||
916 | if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) { | ||
917 | send->s_wr.num_sge = 1; | ||
918 | send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr; | ||
919 | send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes; | ||
920 | send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey; | ||
921 | } | ||
922 | |||
923 | rdsdebug("send %p wr %p num_sge %u next %p\n", send, | ||
924 | &send->s_wr, send->s_wr.num_sge, send->s_wr.next); | ||
925 | |||
926 | prev = send; | ||
927 | if (++send == &ic->i_sends[ic->i_send_ring.w_nr]) | ||
928 | send = ic->i_sends; | ||
929 | } | ||
930 | |||
931 | /* if we finished the message then send completion owns it */ | ||
932 | if (scat == &op->r_sg[op->r_count]) | ||
933 | first->s_wr.send_flags = IB_SEND_SIGNALED; | ||
934 | |||
935 | if (i < work_alloc) { | ||
936 | rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i); | ||
937 | work_alloc = i; | ||
938 | } | ||
939 | |||
940 | /* On iWARP, local memory access by a remote system (ie, RDMA Read) is not | ||
941 | * recommended. Putting the lkey on the wire is a security hole, as it can | ||
942 | * allow for memory access to all of memory on the remote system. Some | ||
943 | * adapters do not allow using the lkey for this at all. To bypass this use a | ||
944 | * fastreg_mr (or possibly a dma_mr) | ||
945 | */ | ||
946 | if (!op->r_write) { | ||
947 | rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos], | ||
948 | op->r_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr); | ||
949 | work_alloc++; | ||
950 | } | ||
951 | |||
952 | failed_wr = &first->s_wr; | ||
953 | ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); | ||
954 | rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, | ||
955 | first, &first->s_wr, ret, failed_wr); | ||
956 | BUG_ON(failed_wr != &first->s_wr); | ||
957 | if (ret) { | ||
958 | printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 " | ||
959 | "returned %d\n", &conn->c_faddr, ret); | ||
960 | rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); | ||
961 | goto out; | ||
962 | } | ||
963 | |||
964 | out: | ||
965 | return ret; | ||
966 | } | ||
967 | |||
968 | void rds_iw_xmit_complete(struct rds_connection *conn) | ||
969 | { | ||
970 | struct rds_iw_connection *ic = conn->c_transport_data; | ||
971 | |||
972 | /* We may have a pending ACK or window update we were unable | ||
973 | * to send previously (due to flow control). Try again. */ | ||
974 | rds_iw_attempt_ack(ic); | ||
975 | } | ||
diff --git a/net/rds/iw_stats.c b/net/rds/iw_stats.c new file mode 100644 index 000000000000..ccc7e8f0bf0e --- /dev/null +++ b/net/rds/iw_stats.c | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/percpu.h> | ||
34 | #include <linux/seq_file.h> | ||
35 | #include <linux/proc_fs.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | #include "iw.h" | ||
39 | |||
40 | DEFINE_PER_CPU(struct rds_iw_statistics, rds_iw_stats) ____cacheline_aligned; | ||
41 | |||
42 | static char *rds_iw_stat_names[] = { | ||
43 | "iw_connect_raced", | ||
44 | "iw_listen_closed_stale", | ||
45 | "iw_tx_cq_call", | ||
46 | "iw_tx_cq_event", | ||
47 | "iw_tx_ring_full", | ||
48 | "iw_tx_throttle", | ||
49 | "iw_tx_sg_mapping_failure", | ||
50 | "iw_tx_stalled", | ||
51 | "iw_tx_credit_updates", | ||
52 | "iw_rx_cq_call", | ||
53 | "iw_rx_cq_event", | ||
54 | "iw_rx_ring_empty", | ||
55 | "iw_rx_refill_from_cq", | ||
56 | "iw_rx_refill_from_thread", | ||
57 | "iw_rx_alloc_limit", | ||
58 | "iw_rx_credit_updates", | ||
59 | "iw_ack_sent", | ||
60 | "iw_ack_send_failure", | ||
61 | "iw_ack_send_delayed", | ||
62 | "iw_ack_send_piggybacked", | ||
63 | "iw_ack_received", | ||
64 | "iw_rdma_mr_alloc", | ||
65 | "iw_rdma_mr_free", | ||
66 | "iw_rdma_mr_used", | ||
67 | "iw_rdma_mr_pool_flush", | ||
68 | "iw_rdma_mr_pool_wait", | ||
69 | "iw_rdma_mr_pool_depleted", | ||
70 | }; | ||
71 | |||
72 | unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter, | ||
73 | unsigned int avail) | ||
74 | { | ||
75 | struct rds_iw_statistics stats = {0, }; | ||
76 | uint64_t *src; | ||
77 | uint64_t *sum; | ||
78 | size_t i; | ||
79 | int cpu; | ||
80 | |||
81 | if (avail < ARRAY_SIZE(rds_iw_stat_names)) | ||
82 | goto out; | ||
83 | |||
84 | for_each_online_cpu(cpu) { | ||
85 | src = (uint64_t *)&(per_cpu(rds_iw_stats, cpu)); | ||
86 | sum = (uint64_t *)&stats; | ||
87 | for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++) | ||
88 | *(sum++) += *(src++); | ||
89 | } | ||
90 | |||
91 | rds_stats_info_copy(iter, (uint64_t *)&stats, rds_iw_stat_names, | ||
92 | ARRAY_SIZE(rds_iw_stat_names)); | ||
93 | out: | ||
94 | return ARRAY_SIZE(rds_iw_stat_names); | ||
95 | } | ||
diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c new file mode 100644 index 000000000000..9590678cd616 --- /dev/null +++ b/net/rds/iw_sysctl.c | |||
@@ -0,0 +1,137 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/sysctl.h> | ||
35 | #include <linux/proc_fs.h> | ||
36 | |||
37 | #include "iw.h" | ||
38 | |||
39 | static struct ctl_table_header *rds_iw_sysctl_hdr; | ||
40 | |||
41 | unsigned long rds_iw_sysctl_max_send_wr = RDS_IW_DEFAULT_SEND_WR; | ||
42 | unsigned long rds_iw_sysctl_max_recv_wr = RDS_IW_DEFAULT_RECV_WR; | ||
43 | unsigned long rds_iw_sysctl_max_recv_allocation = (128 * 1024 * 1024) / RDS_FRAG_SIZE; | ||
44 | static unsigned long rds_iw_sysctl_max_wr_min = 1; | ||
45 | /* hardware will fail CQ creation long before this */ | ||
46 | static unsigned long rds_iw_sysctl_max_wr_max = (u32)~0; | ||
47 | |||
48 | unsigned long rds_iw_sysctl_max_unsig_wrs = 16; | ||
49 | static unsigned long rds_iw_sysctl_max_unsig_wr_min = 1; | ||
50 | static unsigned long rds_iw_sysctl_max_unsig_wr_max = 64; | ||
51 | |||
52 | unsigned long rds_iw_sysctl_max_unsig_bytes = (16 << 20); | ||
53 | static unsigned long rds_iw_sysctl_max_unsig_bytes_min = 1; | ||
54 | static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL; | ||
55 | |||
56 | unsigned int rds_iw_sysctl_flow_control = 1; | ||
57 | |||
58 | ctl_table rds_iw_sysctl_table[] = { | ||
59 | { | ||
60 | .ctl_name = CTL_UNNUMBERED, | ||
61 | .procname = "max_send_wr", | ||
62 | .data = &rds_iw_sysctl_max_send_wr, | ||
63 | .maxlen = sizeof(unsigned long), | ||
64 | .mode = 0644, | ||
65 | .proc_handler = &proc_doulongvec_minmax, | ||
66 | .extra1 = &rds_iw_sysctl_max_wr_min, | ||
67 | .extra2 = &rds_iw_sysctl_max_wr_max, | ||
68 | }, | ||
69 | { | ||
70 | .ctl_name = CTL_UNNUMBERED, | ||
71 | .procname = "max_recv_wr", | ||
72 | .data = &rds_iw_sysctl_max_recv_wr, | ||
73 | .maxlen = sizeof(unsigned long), | ||
74 | .mode = 0644, | ||
75 | .proc_handler = &proc_doulongvec_minmax, | ||
76 | .extra1 = &rds_iw_sysctl_max_wr_min, | ||
77 | .extra2 = &rds_iw_sysctl_max_wr_max, | ||
78 | }, | ||
79 | { | ||
80 | .ctl_name = CTL_UNNUMBERED, | ||
81 | .procname = "max_unsignaled_wr", | ||
82 | .data = &rds_iw_sysctl_max_unsig_wrs, | ||
83 | .maxlen = sizeof(unsigned long), | ||
84 | .mode = 0644, | ||
85 | .proc_handler = &proc_doulongvec_minmax, | ||
86 | .extra1 = &rds_iw_sysctl_max_unsig_wr_min, | ||
87 | .extra2 = &rds_iw_sysctl_max_unsig_wr_max, | ||
88 | }, | ||
89 | { | ||
90 | .ctl_name = CTL_UNNUMBERED, | ||
91 | .procname = "max_unsignaled_bytes", | ||
92 | .data = &rds_iw_sysctl_max_unsig_bytes, | ||
93 | .maxlen = sizeof(unsigned long), | ||
94 | .mode = 0644, | ||
95 | .proc_handler = &proc_doulongvec_minmax, | ||
96 | .extra1 = &rds_iw_sysctl_max_unsig_bytes_min, | ||
97 | .extra2 = &rds_iw_sysctl_max_unsig_bytes_max, | ||
98 | }, | ||
99 | { | ||
100 | .ctl_name = CTL_UNNUMBERED, | ||
101 | .procname = "max_recv_allocation", | ||
102 | .data = &rds_iw_sysctl_max_recv_allocation, | ||
103 | .maxlen = sizeof(unsigned long), | ||
104 | .mode = 0644, | ||
105 | .proc_handler = &proc_doulongvec_minmax, | ||
106 | }, | ||
107 | { | ||
108 | .ctl_name = CTL_UNNUMBERED, | ||
109 | .procname = "flow_control", | ||
110 | .data = &rds_iw_sysctl_flow_control, | ||
111 | .maxlen = sizeof(rds_iw_sysctl_flow_control), | ||
112 | .mode = 0644, | ||
113 | .proc_handler = &proc_dointvec, | ||
114 | }, | ||
115 | { .ctl_name = 0} | ||
116 | }; | ||
117 | |||
118 | static struct ctl_path rds_iw_sysctl_path[] = { | ||
119 | { .procname = "net", .ctl_name = CTL_NET, }, | ||
120 | { .procname = "rds", .ctl_name = CTL_UNNUMBERED, }, | ||
121 | { .procname = "iw", .ctl_name = CTL_UNNUMBERED, }, | ||
122 | { } | ||
123 | }; | ||
124 | |||
125 | void rds_iw_sysctl_exit(void) | ||
126 | { | ||
127 | if (rds_iw_sysctl_hdr) | ||
128 | unregister_sysctl_table(rds_iw_sysctl_hdr); | ||
129 | } | ||
130 | |||
131 | int __init rds_iw_sysctl_init(void) | ||
132 | { | ||
133 | rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table); | ||
134 | if (rds_iw_sysctl_hdr == NULL) | ||
135 | return -ENOMEM; | ||
136 | return 0; | ||
137 | } | ||
diff --git a/net/rds/loop.c b/net/rds/loop.c new file mode 100644 index 000000000000..4a61997f554d --- /dev/null +++ b/net/rds/loop.c | |||
@@ -0,0 +1,188 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/in.h> | ||
35 | |||
36 | #include "rds.h" | ||
37 | #include "loop.h" | ||
38 | |||
39 | static DEFINE_SPINLOCK(loop_conns_lock); | ||
40 | static LIST_HEAD(loop_conns); | ||
41 | |||
42 | /* | ||
43 | * This 'loopback' transport is a special case for flows that originate | ||
44 | * and terminate on the same machine. | ||
45 | * | ||
46 | * Connection build-up notices if the destination address is thought of | ||
47 | * as a local address by a transport. At that time it decides to use the | ||
48 | * loopback transport instead of the bound transport of the sending socket. | ||
49 | * | ||
50 | * The loopback transport's sending path just hands the sent rds_message | ||
51 | * straight to the receiving path via an embedded rds_incoming. | ||
52 | */ | ||
53 | |||
54 | /* | ||
55 | * Usually a message transits both the sender and receiver's conns as it | ||
56 | * flows to the receiver. In the loopback case, though, the receive path | ||
57 | * is handed the sending conn so the sense of the addresses is reversed. | ||
58 | */ | ||
59 | static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, | ||
60 | unsigned int hdr_off, unsigned int sg, | ||
61 | unsigned int off) | ||
62 | { | ||
63 | BUG_ON(hdr_off || sg || off); | ||
64 | |||
65 | rds_inc_init(&rm->m_inc, conn, conn->c_laddr); | ||
66 | rds_message_addref(rm); /* for the inc */ | ||
67 | |||
68 | rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc, | ||
69 | GFP_KERNEL, KM_USER0); | ||
70 | |||
71 | rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence), | ||
72 | NULL); | ||
73 | |||
74 | rds_inc_put(&rm->m_inc); | ||
75 | |||
76 | return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); | ||
77 | } | ||
78 | |||
79 | static int rds_loop_xmit_cong_map(struct rds_connection *conn, | ||
80 | struct rds_cong_map *map, | ||
81 | unsigned long offset) | ||
82 | { | ||
83 | unsigned long i; | ||
84 | |||
85 | BUG_ON(offset); | ||
86 | BUG_ON(map != conn->c_lcong); | ||
87 | |||
88 | for (i = 0; i < RDS_CONG_MAP_PAGES; i++) { | ||
89 | memcpy((void *)conn->c_fcong->m_page_addrs[i], | ||
90 | (void *)map->m_page_addrs[i], PAGE_SIZE); | ||
91 | } | ||
92 | |||
93 | rds_cong_map_updated(conn->c_fcong, ~(u64) 0); | ||
94 | |||
95 | return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; | ||
96 | } | ||
97 | |||
98 | /* we need to at least give the thread something to succeed */ | ||
99 | static int rds_loop_recv(struct rds_connection *conn) | ||
100 | { | ||
101 | return 0; | ||
102 | } | ||
103 | |||
104 | struct rds_loop_connection { | ||
105 | struct list_head loop_node; | ||
106 | struct rds_connection *conn; | ||
107 | }; | ||
108 | |||
109 | /* | ||
110 | * Even the loopback transport needs to keep track of its connections, | ||
111 | * so it can call rds_conn_destroy() on them on exit. N.B. there are | ||
112 | * 1+ loopback addresses (127.*.*.*) so it's not a bug to have | ||
113 | * multiple loopback conns allocated, although rather useless. | ||
114 | */ | ||
115 | static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp) | ||
116 | { | ||
117 | struct rds_loop_connection *lc; | ||
118 | unsigned long flags; | ||
119 | |||
120 | lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL); | ||
121 | if (lc == NULL) | ||
122 | return -ENOMEM; | ||
123 | |||
124 | INIT_LIST_HEAD(&lc->loop_node); | ||
125 | lc->conn = conn; | ||
126 | conn->c_transport_data = lc; | ||
127 | |||
128 | spin_lock_irqsave(&loop_conns_lock, flags); | ||
129 | list_add_tail(&lc->loop_node, &loop_conns); | ||
130 | spin_unlock_irqrestore(&loop_conns_lock, flags); | ||
131 | |||
132 | return 0; | ||
133 | } | ||
134 | |||
135 | static void rds_loop_conn_free(void *arg) | ||
136 | { | ||
137 | struct rds_loop_connection *lc = arg; | ||
138 | rdsdebug("lc %p\n", lc); | ||
139 | list_del(&lc->loop_node); | ||
140 | kfree(lc); | ||
141 | } | ||
142 | |||
143 | static int rds_loop_conn_connect(struct rds_connection *conn) | ||
144 | { | ||
145 | rds_connect_complete(conn); | ||
146 | return 0; | ||
147 | } | ||
148 | |||
149 | static void rds_loop_conn_shutdown(struct rds_connection *conn) | ||
150 | { | ||
151 | } | ||
152 | |||
153 | void rds_loop_exit(void) | ||
154 | { | ||
155 | struct rds_loop_connection *lc, *_lc; | ||
156 | LIST_HEAD(tmp_list); | ||
157 | |||
158 | /* avoid calling conn_destroy with irqs off */ | ||
159 | spin_lock_irq(&loop_conns_lock); | ||
160 | list_splice(&loop_conns, &tmp_list); | ||
161 | INIT_LIST_HEAD(&loop_conns); | ||
162 | spin_unlock_irq(&loop_conns_lock); | ||
163 | |||
164 | list_for_each_entry_safe(lc, _lc, &tmp_list, loop_node) { | ||
165 | WARN_ON(lc->conn->c_passive); | ||
166 | rds_conn_destroy(lc->conn); | ||
167 | } | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * This is missing .xmit_* because loop doesn't go through generic | ||
172 | * rds_send_xmit() and doesn't call rds_recv_incoming(). .listen_stop and | ||
173 | * .laddr_check are missing because transport.c doesn't iterate over | ||
174 | * rds_loop_transport. | ||
175 | */ | ||
176 | struct rds_transport rds_loop_transport = { | ||
177 | .xmit = rds_loop_xmit, | ||
178 | .xmit_cong_map = rds_loop_xmit_cong_map, | ||
179 | .recv = rds_loop_recv, | ||
180 | .conn_alloc = rds_loop_conn_alloc, | ||
181 | .conn_free = rds_loop_conn_free, | ||
182 | .conn_connect = rds_loop_conn_connect, | ||
183 | .conn_shutdown = rds_loop_conn_shutdown, | ||
184 | .inc_copy_to_user = rds_message_inc_copy_to_user, | ||
185 | .inc_purge = rds_message_inc_purge, | ||
186 | .inc_free = rds_message_inc_free, | ||
187 | .t_name = "loopback", | ||
188 | }; | ||
diff --git a/net/rds/loop.h b/net/rds/loop.h new file mode 100644 index 000000000000..f32b0939a04d --- /dev/null +++ b/net/rds/loop.h | |||
@@ -0,0 +1,9 @@ | |||
1 | #ifndef _RDS_LOOP_H | ||
2 | #define _RDS_LOOP_H | ||
3 | |||
4 | /* loop.c */ | ||
5 | extern struct rds_transport rds_loop_transport; | ||
6 | |||
7 | void rds_loop_exit(void); | ||
8 | |||
9 | #endif | ||
diff --git a/net/rds/message.c b/net/rds/message.c new file mode 100644 index 000000000000..5a15dc8d0cd7 --- /dev/null +++ b/net/rds/message.c | |||
@@ -0,0 +1,402 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | |||
35 | #include "rds.h" | ||
36 | #include "rdma.h" | ||
37 | |||
38 | static DECLARE_WAIT_QUEUE_HEAD(rds_message_flush_waitq); | ||
39 | |||
40 | static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { | ||
41 | [RDS_EXTHDR_NONE] = 0, | ||
42 | [RDS_EXTHDR_VERSION] = sizeof(struct rds_ext_header_version), | ||
43 | [RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma), | ||
44 | [RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest), | ||
45 | }; | ||
46 | |||
47 | |||
48 | void rds_message_addref(struct rds_message *rm) | ||
49 | { | ||
50 | rdsdebug("addref rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); | ||
51 | atomic_inc(&rm->m_refcount); | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * This relies on dma_map_sg() not touching sg[].page during merging. | ||
56 | */ | ||
57 | static void rds_message_purge(struct rds_message *rm) | ||
58 | { | ||
59 | unsigned long i; | ||
60 | |||
61 | if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) | ||
62 | return; | ||
63 | |||
64 | for (i = 0; i < rm->m_nents; i++) { | ||
65 | rdsdebug("putting data page %p\n", (void *)sg_page(&rm->m_sg[i])); | ||
66 | /* XXX will have to put_page for page refs */ | ||
67 | __free_page(sg_page(&rm->m_sg[i])); | ||
68 | } | ||
69 | rm->m_nents = 0; | ||
70 | |||
71 | if (rm->m_rdma_op) | ||
72 | rds_rdma_free_op(rm->m_rdma_op); | ||
73 | if (rm->m_rdma_mr) | ||
74 | rds_mr_put(rm->m_rdma_mr); | ||
75 | } | ||
76 | |||
77 | void rds_message_inc_purge(struct rds_incoming *inc) | ||
78 | { | ||
79 | struct rds_message *rm = container_of(inc, struct rds_message, m_inc); | ||
80 | rds_message_purge(rm); | ||
81 | } | ||
82 | |||
83 | void rds_message_put(struct rds_message *rm) | ||
84 | { | ||
85 | rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); | ||
86 | |||
87 | if (atomic_dec_and_test(&rm->m_refcount)) { | ||
88 | BUG_ON(!list_empty(&rm->m_sock_item)); | ||
89 | BUG_ON(!list_empty(&rm->m_conn_item)); | ||
90 | rds_message_purge(rm); | ||
91 | |||
92 | kfree(rm); | ||
93 | } | ||
94 | } | ||
95 | |||
96 | void rds_message_inc_free(struct rds_incoming *inc) | ||
97 | { | ||
98 | struct rds_message *rm = container_of(inc, struct rds_message, m_inc); | ||
99 | rds_message_put(rm); | ||
100 | } | ||
101 | |||
102 | void rds_message_populate_header(struct rds_header *hdr, __be16 sport, | ||
103 | __be16 dport, u64 seq) | ||
104 | { | ||
105 | hdr->h_flags = 0; | ||
106 | hdr->h_sport = sport; | ||
107 | hdr->h_dport = dport; | ||
108 | hdr->h_sequence = cpu_to_be64(seq); | ||
109 | hdr->h_exthdr[0] = RDS_EXTHDR_NONE; | ||
110 | } | ||
111 | |||
112 | int rds_message_add_extension(struct rds_header *hdr, | ||
113 | unsigned int type, const void *data, unsigned int len) | ||
114 | { | ||
115 | unsigned int ext_len = sizeof(u8) + len; | ||
116 | unsigned char *dst; | ||
117 | |||
118 | /* For now, refuse to add more than one extension header */ | ||
119 | if (hdr->h_exthdr[0] != RDS_EXTHDR_NONE) | ||
120 | return 0; | ||
121 | |||
122 | if (type >= __RDS_EXTHDR_MAX | ||
123 | || len != rds_exthdr_size[type]) | ||
124 | return 0; | ||
125 | |||
126 | if (ext_len >= RDS_HEADER_EXT_SPACE) | ||
127 | return 0; | ||
128 | dst = hdr->h_exthdr; | ||
129 | |||
130 | *dst++ = type; | ||
131 | memcpy(dst, data, len); | ||
132 | |||
133 | dst[len] = RDS_EXTHDR_NONE; | ||
134 | return 1; | ||
135 | } | ||
136 | |||
137 | /* | ||
138 | * If a message has extension headers, retrieve them here. | ||
139 | * Call like this: | ||
140 | * | ||
141 | * unsigned int pos = 0; | ||
142 | * | ||
143 | * while (1) { | ||
144 | * buflen = sizeof(buffer); | ||
145 | * type = rds_message_next_extension(hdr, &pos, buffer, &buflen); | ||
146 | * if (type == RDS_EXTHDR_NONE) | ||
147 | * break; | ||
148 | * ... | ||
149 | * } | ||
150 | */ | ||
151 | int rds_message_next_extension(struct rds_header *hdr, | ||
152 | unsigned int *pos, void *buf, unsigned int *buflen) | ||
153 | { | ||
154 | unsigned int offset, ext_type, ext_len; | ||
155 | u8 *src = hdr->h_exthdr; | ||
156 | |||
157 | offset = *pos; | ||
158 | if (offset >= RDS_HEADER_EXT_SPACE) | ||
159 | goto none; | ||
160 | |||
161 | /* Get the extension type and length. For now, the | ||
162 | * length is implied by the extension type. */ | ||
163 | ext_type = src[offset++]; | ||
164 | |||
165 | if (ext_type == RDS_EXTHDR_NONE || ext_type >= __RDS_EXTHDR_MAX) | ||
166 | goto none; | ||
167 | ext_len = rds_exthdr_size[ext_type]; | ||
168 | if (offset + ext_len > RDS_HEADER_EXT_SPACE) | ||
169 | goto none; | ||
170 | |||
171 | *pos = offset + ext_len; | ||
172 | if (ext_len < *buflen) | ||
173 | *buflen = ext_len; | ||
174 | memcpy(buf, src + offset, *buflen); | ||
175 | return ext_type; | ||
176 | |||
177 | none: | ||
178 | *pos = RDS_HEADER_EXT_SPACE; | ||
179 | *buflen = 0; | ||
180 | return RDS_EXTHDR_NONE; | ||
181 | } | ||
182 | |||
183 | int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version) | ||
184 | { | ||
185 | struct rds_ext_header_version ext_hdr; | ||
186 | |||
187 | ext_hdr.h_version = cpu_to_be32(version); | ||
188 | return rds_message_add_extension(hdr, RDS_EXTHDR_VERSION, &ext_hdr, sizeof(ext_hdr)); | ||
189 | } | ||
190 | |||
191 | int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version) | ||
192 | { | ||
193 | struct rds_ext_header_version ext_hdr; | ||
194 | unsigned int pos = 0, len = sizeof(ext_hdr); | ||
195 | |||
196 | /* We assume the version extension is the only one present */ | ||
197 | if (rds_message_next_extension(hdr, &pos, &ext_hdr, &len) != RDS_EXTHDR_VERSION) | ||
198 | return 0; | ||
199 | *version = be32_to_cpu(ext_hdr.h_version); | ||
200 | return 1; | ||
201 | } | ||
202 | |||
203 | int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset) | ||
204 | { | ||
205 | struct rds_ext_header_rdma_dest ext_hdr; | ||
206 | |||
207 | ext_hdr.h_rdma_rkey = cpu_to_be32(r_key); | ||
208 | ext_hdr.h_rdma_offset = cpu_to_be32(offset); | ||
209 | return rds_message_add_extension(hdr, RDS_EXTHDR_RDMA_DEST, &ext_hdr, sizeof(ext_hdr)); | ||
210 | } | ||
211 | |||
212 | struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp) | ||
213 | { | ||
214 | struct rds_message *rm; | ||
215 | |||
216 | rm = kzalloc(sizeof(struct rds_message) + | ||
217 | (nents * sizeof(struct scatterlist)), gfp); | ||
218 | if (!rm) | ||
219 | goto out; | ||
220 | |||
221 | if (nents) | ||
222 | sg_init_table(rm->m_sg, nents); | ||
223 | atomic_set(&rm->m_refcount, 1); | ||
224 | INIT_LIST_HEAD(&rm->m_sock_item); | ||
225 | INIT_LIST_HEAD(&rm->m_conn_item); | ||
226 | spin_lock_init(&rm->m_rs_lock); | ||
227 | |||
228 | out: | ||
229 | return rm; | ||
230 | } | ||
231 | |||
232 | struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len) | ||
233 | { | ||
234 | struct rds_message *rm; | ||
235 | unsigned int i; | ||
236 | |||
237 | rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL); | ||
238 | if (rm == NULL) | ||
239 | return ERR_PTR(-ENOMEM); | ||
240 | |||
241 | set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); | ||
242 | rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); | ||
243 | rm->m_nents = ceil(total_len, PAGE_SIZE); | ||
244 | |||
245 | for (i = 0; i < rm->m_nents; ++i) { | ||
246 | sg_set_page(&rm->m_sg[i], | ||
247 | virt_to_page(page_addrs[i]), | ||
248 | PAGE_SIZE, 0); | ||
249 | } | ||
250 | |||
251 | return rm; | ||
252 | } | ||
253 | |||
254 | struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, | ||
255 | size_t total_len) | ||
256 | { | ||
257 | unsigned long to_copy; | ||
258 | unsigned long iov_off; | ||
259 | unsigned long sg_off; | ||
260 | struct rds_message *rm; | ||
261 | struct iovec *iov; | ||
262 | struct scatterlist *sg; | ||
263 | int ret; | ||
264 | |||
265 | rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL); | ||
266 | if (rm == NULL) { | ||
267 | ret = -ENOMEM; | ||
268 | goto out; | ||
269 | } | ||
270 | |||
271 | rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); | ||
272 | |||
273 | /* | ||
274 | * now allocate and copy in the data payload. | ||
275 | */ | ||
276 | sg = rm->m_sg; | ||
277 | iov = first_iov; | ||
278 | iov_off = 0; | ||
279 | sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ | ||
280 | |||
281 | while (total_len) { | ||
282 | if (sg_page(sg) == NULL) { | ||
283 | ret = rds_page_remainder_alloc(sg, total_len, | ||
284 | GFP_HIGHUSER); | ||
285 | if (ret) | ||
286 | goto out; | ||
287 | rm->m_nents++; | ||
288 | sg_off = 0; | ||
289 | } | ||
290 | |||
291 | while (iov_off == iov->iov_len) { | ||
292 | iov_off = 0; | ||
293 | iov++; | ||
294 | } | ||
295 | |||
296 | to_copy = min(iov->iov_len - iov_off, sg->length - sg_off); | ||
297 | to_copy = min_t(size_t, to_copy, total_len); | ||
298 | |||
299 | rdsdebug("copying %lu bytes from user iov [%p, %zu] + %lu to " | ||
300 | "sg [%p, %u, %u] + %lu\n", | ||
301 | to_copy, iov->iov_base, iov->iov_len, iov_off, | ||
302 | (void *)sg_page(sg), sg->offset, sg->length, sg_off); | ||
303 | |||
304 | ret = rds_page_copy_from_user(sg_page(sg), sg->offset + sg_off, | ||
305 | iov->iov_base + iov_off, | ||
306 | to_copy); | ||
307 | if (ret) | ||
308 | goto out; | ||
309 | |||
310 | iov_off += to_copy; | ||
311 | total_len -= to_copy; | ||
312 | sg_off += to_copy; | ||
313 | |||
314 | if (sg_off == sg->length) | ||
315 | sg++; | ||
316 | } | ||
317 | |||
318 | ret = 0; | ||
319 | out: | ||
320 | if (ret) { | ||
321 | if (rm) | ||
322 | rds_message_put(rm); | ||
323 | rm = ERR_PTR(ret); | ||
324 | } | ||
325 | return rm; | ||
326 | } | ||
327 | |||
328 | int rds_message_inc_copy_to_user(struct rds_incoming *inc, | ||
329 | struct iovec *first_iov, size_t size) | ||
330 | { | ||
331 | struct rds_message *rm; | ||
332 | struct iovec *iov; | ||
333 | struct scatterlist *sg; | ||
334 | unsigned long to_copy; | ||
335 | unsigned long iov_off; | ||
336 | unsigned long vec_off; | ||
337 | int copied; | ||
338 | int ret; | ||
339 | u32 len; | ||
340 | |||
341 | rm = container_of(inc, struct rds_message, m_inc); | ||
342 | len = be32_to_cpu(rm->m_inc.i_hdr.h_len); | ||
343 | |||
344 | iov = first_iov; | ||
345 | iov_off = 0; | ||
346 | sg = rm->m_sg; | ||
347 | vec_off = 0; | ||
348 | copied = 0; | ||
349 | |||
350 | while (copied < size && copied < len) { | ||
351 | while (iov_off == iov->iov_len) { | ||
352 | iov_off = 0; | ||
353 | iov++; | ||
354 | } | ||
355 | |||
356 | to_copy = min(iov->iov_len - iov_off, sg->length - vec_off); | ||
357 | to_copy = min_t(size_t, to_copy, size - copied); | ||
358 | to_copy = min_t(unsigned long, to_copy, len - copied); | ||
359 | |||
360 | rdsdebug("copying %lu bytes to user iov [%p, %zu] + %lu to " | ||
361 | "sg [%p, %u, %u] + %lu\n", | ||
362 | to_copy, iov->iov_base, iov->iov_len, iov_off, | ||
363 | sg_page(sg), sg->offset, sg->length, vec_off); | ||
364 | |||
365 | ret = rds_page_copy_to_user(sg_page(sg), sg->offset + vec_off, | ||
366 | iov->iov_base + iov_off, | ||
367 | to_copy); | ||
368 | if (ret) { | ||
369 | copied = ret; | ||
370 | break; | ||
371 | } | ||
372 | |||
373 | iov_off += to_copy; | ||
374 | vec_off += to_copy; | ||
375 | copied += to_copy; | ||
376 | |||
377 | if (vec_off == sg->length) { | ||
378 | vec_off = 0; | ||
379 | sg++; | ||
380 | } | ||
381 | } | ||
382 | |||
383 | return copied; | ||
384 | } | ||
385 | |||
386 | /* | ||
387 | * If the message is still on the send queue, wait until the transport | ||
388 | * is done with it. This is particularly important for RDMA operations. | ||
389 | */ | ||
390 | void rds_message_wait(struct rds_message *rm) | ||
391 | { | ||
392 | wait_event(rds_message_flush_waitq, | ||
393 | !test_bit(RDS_MSG_MAPPED, &rm->m_flags)); | ||
394 | } | ||
395 | |||
396 | void rds_message_unmapped(struct rds_message *rm) | ||
397 | { | ||
398 | clear_bit(RDS_MSG_MAPPED, &rm->m_flags); | ||
399 | if (waitqueue_active(&rds_message_flush_waitq)) | ||
400 | wake_up(&rds_message_flush_waitq); | ||
401 | } | ||
402 | |||
diff --git a/net/rds/page.c b/net/rds/page.c new file mode 100644 index 000000000000..c460743a89ad --- /dev/null +++ b/net/rds/page.c | |||
@@ -0,0 +1,221 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/highmem.h> | ||
34 | |||
35 | #include "rds.h" | ||
36 | |||
37 | struct rds_page_remainder { | ||
38 | struct page *r_page; | ||
39 | unsigned long r_offset; | ||
40 | }; | ||
41 | |||
42 | DEFINE_PER_CPU(struct rds_page_remainder, rds_page_remainders) ____cacheline_aligned; | ||
43 | |||
44 | /* | ||
45 | * returns 0 on success or -errno on failure. | ||
46 | * | ||
47 | * We don't have to worry about flush_dcache_page() as this only works | ||
48 | * with private pages. If, say, we were to do directed receive to pinned | ||
49 | * user pages we'd have to worry more about cache coherence. (Though | ||
50 | * the flush_dcache_page() in get_user_pages() would probably be enough). | ||
51 | */ | ||
52 | int rds_page_copy_user(struct page *page, unsigned long offset, | ||
53 | void __user *ptr, unsigned long bytes, | ||
54 | int to_user) | ||
55 | { | ||
56 | unsigned long ret; | ||
57 | void *addr; | ||
58 | |||
59 | if (to_user) | ||
60 | rds_stats_add(s_copy_to_user, bytes); | ||
61 | else | ||
62 | rds_stats_add(s_copy_from_user, bytes); | ||
63 | |||
64 | addr = kmap_atomic(page, KM_USER0); | ||
65 | if (to_user) | ||
66 | ret = __copy_to_user_inatomic(ptr, addr + offset, bytes); | ||
67 | else | ||
68 | ret = __copy_from_user_inatomic(addr + offset, ptr, bytes); | ||
69 | kunmap_atomic(addr, KM_USER0); | ||
70 | |||
71 | if (ret) { | ||
72 | addr = kmap(page); | ||
73 | if (to_user) | ||
74 | ret = copy_to_user(ptr, addr + offset, bytes); | ||
75 | else | ||
76 | ret = copy_from_user(addr + offset, ptr, bytes); | ||
77 | kunmap(page); | ||
78 | if (ret) | ||
79 | return -EFAULT; | ||
80 | } | ||
81 | |||
82 | return 0; | ||
83 | } | ||
84 | |||
85 | /* | ||
86 | * Message allocation uses this to build up regions of a message. | ||
87 | * | ||
88 | * @bytes - the number of bytes needed. | ||
89 | * @gfp - the waiting behaviour of the allocation | ||
90 | * | ||
91 | * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to | ||
92 | * kmap the pages, etc. | ||
93 | * | ||
94 | * If @bytes is at least a full page then this just returns a page from | ||
95 | * alloc_page(). | ||
96 | * | ||
97 | * If @bytes is a partial page then this stores the unused region of the | ||
98 | * page in a per-cpu structure. Future partial-page allocations may be | ||
99 | * satisfied from that cached region. This lets us waste less memory on | ||
100 | * small allocations with minimal complexity. It works because the transmit | ||
101 | * path passes read-only page regions down to devices. They hold a page | ||
102 | * reference until they are done with the region. | ||
103 | */ | ||
104 | int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, | ||
105 | gfp_t gfp) | ||
106 | { | ||
107 | struct rds_page_remainder *rem; | ||
108 | unsigned long flags; | ||
109 | struct page *page; | ||
110 | int ret; | ||
111 | |||
112 | gfp |= __GFP_HIGHMEM; | ||
113 | |||
114 | /* jump straight to allocation if we're trying for a huge page */ | ||
115 | if (bytes >= PAGE_SIZE) { | ||
116 | page = alloc_page(gfp); | ||
117 | if (page == NULL) { | ||
118 | ret = -ENOMEM; | ||
119 | } else { | ||
120 | sg_set_page(scat, page, PAGE_SIZE, 0); | ||
121 | ret = 0; | ||
122 | } | ||
123 | goto out; | ||
124 | } | ||
125 | |||
126 | rem = &per_cpu(rds_page_remainders, get_cpu()); | ||
127 | local_irq_save(flags); | ||
128 | |||
129 | while (1) { | ||
130 | /* avoid a tiny region getting stuck by tossing it */ | ||
131 | if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) { | ||
132 | rds_stats_inc(s_page_remainder_miss); | ||
133 | __free_page(rem->r_page); | ||
134 | rem->r_page = NULL; | ||
135 | } | ||
136 | |||
137 | /* hand out a fragment from the cached page */ | ||
138 | if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) { | ||
139 | sg_set_page(scat, rem->r_page, bytes, rem->r_offset); | ||
140 | get_page(sg_page(scat)); | ||
141 | |||
142 | if (rem->r_offset != 0) | ||
143 | rds_stats_inc(s_page_remainder_hit); | ||
144 | |||
145 | rem->r_offset += bytes; | ||
146 | if (rem->r_offset == PAGE_SIZE) { | ||
147 | __free_page(rem->r_page); | ||
148 | rem->r_page = NULL; | ||
149 | } | ||
150 | ret = 0; | ||
151 | break; | ||
152 | } | ||
153 | |||
154 | /* alloc if there is nothing for us to use */ | ||
155 | local_irq_restore(flags); | ||
156 | put_cpu(); | ||
157 | |||
158 | page = alloc_page(gfp); | ||
159 | |||
160 | rem = &per_cpu(rds_page_remainders, get_cpu()); | ||
161 | local_irq_save(flags); | ||
162 | |||
163 | if (page == NULL) { | ||
164 | ret = -ENOMEM; | ||
165 | break; | ||
166 | } | ||
167 | |||
168 | /* did someone race to fill the remainder before us? */ | ||
169 | if (rem->r_page) { | ||
170 | __free_page(page); | ||
171 | continue; | ||
172 | } | ||
173 | |||
174 | /* otherwise install our page and loop around to alloc */ | ||
175 | rem->r_page = page; | ||
176 | rem->r_offset = 0; | ||
177 | } | ||
178 | |||
179 | local_irq_restore(flags); | ||
180 | put_cpu(); | ||
181 | out: | ||
182 | rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret, | ||
183 | ret ? NULL : sg_page(scat), ret ? 0 : scat->offset, | ||
184 | ret ? 0 : scat->length); | ||
185 | return ret; | ||
186 | } | ||
187 | |||
188 | static int rds_page_remainder_cpu_notify(struct notifier_block *self, | ||
189 | unsigned long action, void *hcpu) | ||
190 | { | ||
191 | struct rds_page_remainder *rem; | ||
192 | long cpu = (long)hcpu; | ||
193 | |||
194 | rem = &per_cpu(rds_page_remainders, cpu); | ||
195 | |||
196 | rdsdebug("cpu %ld action 0x%lx\n", cpu, action); | ||
197 | |||
198 | switch (action) { | ||
199 | case CPU_DEAD: | ||
200 | if (rem->r_page) | ||
201 | __free_page(rem->r_page); | ||
202 | rem->r_page = NULL; | ||
203 | break; | ||
204 | } | ||
205 | |||
206 | return 0; | ||
207 | } | ||
208 | |||
209 | static struct notifier_block rds_page_remainder_nb = { | ||
210 | .notifier_call = rds_page_remainder_cpu_notify, | ||
211 | }; | ||
212 | |||
213 | void rds_page_exit(void) | ||
214 | { | ||
215 | int i; | ||
216 | |||
217 | for_each_possible_cpu(i) | ||
218 | rds_page_remainder_cpu_notify(&rds_page_remainder_nb, | ||
219 | (unsigned long)CPU_DEAD, | ||
220 | (void *)(long)i); | ||
221 | } | ||
diff --git a/net/rds/rdma.c b/net/rds/rdma.c new file mode 100644 index 000000000000..eaeeb91e1119 --- /dev/null +++ b/net/rds/rdma.c | |||
@@ -0,0 +1,679 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2007 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/pagemap.h> | ||
34 | #include <linux/rbtree.h> | ||
35 | #include <linux/dma-mapping.h> /* for DMA_*_DEVICE */ | ||
36 | |||
37 | #include "rdma.h" | ||
38 | |||
39 | /* | ||
40 | * XXX | ||
41 | * - build with sparse | ||
42 | * - should we limit the size of a mr region? let transport return failure? | ||
43 | * - should we detect duplicate keys on a socket? hmm. | ||
44 | * - an rdma is an mlock, apply rlimit? | ||
45 | */ | ||
46 | |||
47 | /* | ||
48 | * get the number of pages by looking at the page indices that the start and | ||
49 | * end addresses fall in. | ||
50 | * | ||
51 | * Returns 0 if the vec is invalid. It is invalid if the number of bytes | ||
52 | * causes the address to wrap or overflows an unsigned int. This comes | ||
53 | * from being stored in the 'length' member of 'struct scatterlist'. | ||
54 | */ | ||
55 | static unsigned int rds_pages_in_vec(struct rds_iovec *vec) | ||
56 | { | ||
57 | if ((vec->addr + vec->bytes <= vec->addr) || | ||
58 | (vec->bytes > (u64)UINT_MAX)) | ||
59 | return 0; | ||
60 | |||
61 | return ((vec->addr + vec->bytes + PAGE_SIZE - 1) >> PAGE_SHIFT) - | ||
62 | (vec->addr >> PAGE_SHIFT); | ||
63 | } | ||
64 | |||
65 | static struct rds_mr *rds_mr_tree_walk(struct rb_root *root, u64 key, | ||
66 | struct rds_mr *insert) | ||
67 | { | ||
68 | struct rb_node **p = &root->rb_node; | ||
69 | struct rb_node *parent = NULL; | ||
70 | struct rds_mr *mr; | ||
71 | |||
72 | while (*p) { | ||
73 | parent = *p; | ||
74 | mr = rb_entry(parent, struct rds_mr, r_rb_node); | ||
75 | |||
76 | if (key < mr->r_key) | ||
77 | p = &(*p)->rb_left; | ||
78 | else if (key > mr->r_key) | ||
79 | p = &(*p)->rb_right; | ||
80 | else | ||
81 | return mr; | ||
82 | } | ||
83 | |||
84 | if (insert) { | ||
85 | rb_link_node(&insert->r_rb_node, parent, p); | ||
86 | rb_insert_color(&insert->r_rb_node, root); | ||
87 | atomic_inc(&insert->r_refcount); | ||
88 | } | ||
89 | return NULL; | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * Destroy the transport-specific part of a MR. | ||
94 | */ | ||
95 | static void rds_destroy_mr(struct rds_mr *mr) | ||
96 | { | ||
97 | struct rds_sock *rs = mr->r_sock; | ||
98 | void *trans_private = NULL; | ||
99 | unsigned long flags; | ||
100 | |||
101 | rdsdebug("RDS: destroy mr key is %x refcnt %u\n", | ||
102 | mr->r_key, atomic_read(&mr->r_refcount)); | ||
103 | |||
104 | if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state)) | ||
105 | return; | ||
106 | |||
107 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); | ||
108 | if (!RB_EMPTY_NODE(&mr->r_rb_node)) | ||
109 | rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); | ||
110 | trans_private = mr->r_trans_private; | ||
111 | mr->r_trans_private = NULL; | ||
112 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); | ||
113 | |||
114 | if (trans_private) | ||
115 | mr->r_trans->free_mr(trans_private, mr->r_invalidate); | ||
116 | } | ||
117 | |||
118 | void __rds_put_mr_final(struct rds_mr *mr) | ||
119 | { | ||
120 | rds_destroy_mr(mr); | ||
121 | kfree(mr); | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * By the time this is called we can't have any more ioctls called on | ||
126 | * the socket so we don't need to worry about racing with others. | ||
127 | */ | ||
128 | void rds_rdma_drop_keys(struct rds_sock *rs) | ||
129 | { | ||
130 | struct rds_mr *mr; | ||
131 | struct rb_node *node; | ||
132 | |||
133 | /* Release any MRs associated with this socket */ | ||
134 | while ((node = rb_first(&rs->rs_rdma_keys))) { | ||
135 | mr = container_of(node, struct rds_mr, r_rb_node); | ||
136 | if (mr->r_trans == rs->rs_transport) | ||
137 | mr->r_invalidate = 0; | ||
138 | rds_mr_put(mr); | ||
139 | } | ||
140 | |||
141 | if (rs->rs_transport && rs->rs_transport->flush_mrs) | ||
142 | rs->rs_transport->flush_mrs(); | ||
143 | } | ||
144 | |||
145 | /* | ||
146 | * Helper function to pin user pages. | ||
147 | */ | ||
148 | static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages, | ||
149 | struct page **pages, int write) | ||
150 | { | ||
151 | int ret; | ||
152 | |||
153 | down_read(¤t->mm->mmap_sem); | ||
154 | ret = get_user_pages(current, current->mm, user_addr, | ||
155 | nr_pages, write, 0, pages, NULL); | ||
156 | up_read(¤t->mm->mmap_sem); | ||
157 | |||
158 | if (0 <= ret && (unsigned) ret < nr_pages) { | ||
159 | while (ret--) | ||
160 | put_page(pages[ret]); | ||
161 | ret = -EFAULT; | ||
162 | } | ||
163 | |||
164 | return ret; | ||
165 | } | ||
166 | |||
167 | static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, | ||
168 | u64 *cookie_ret, struct rds_mr **mr_ret) | ||
169 | { | ||
170 | struct rds_mr *mr = NULL, *found; | ||
171 | unsigned int nr_pages; | ||
172 | struct page **pages = NULL; | ||
173 | struct scatterlist *sg; | ||
174 | void *trans_private; | ||
175 | unsigned long flags; | ||
176 | rds_rdma_cookie_t cookie; | ||
177 | unsigned int nents; | ||
178 | long i; | ||
179 | int ret; | ||
180 | |||
181 | if (rs->rs_bound_addr == 0) { | ||
182 | ret = -ENOTCONN; /* XXX not a great errno */ | ||
183 | goto out; | ||
184 | } | ||
185 | |||
186 | if (rs->rs_transport->get_mr == NULL) { | ||
187 | ret = -EOPNOTSUPP; | ||
188 | goto out; | ||
189 | } | ||
190 | |||
191 | nr_pages = rds_pages_in_vec(&args->vec); | ||
192 | if (nr_pages == 0) { | ||
193 | ret = -EINVAL; | ||
194 | goto out; | ||
195 | } | ||
196 | |||
197 | rdsdebug("RDS: get_mr addr %llx len %llu nr_pages %u\n", | ||
198 | args->vec.addr, args->vec.bytes, nr_pages); | ||
199 | |||
200 | /* XXX clamp nr_pages to limit the size of this alloc? */ | ||
201 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); | ||
202 | if (pages == NULL) { | ||
203 | ret = -ENOMEM; | ||
204 | goto out; | ||
205 | } | ||
206 | |||
207 | mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL); | ||
208 | if (mr == NULL) { | ||
209 | ret = -ENOMEM; | ||
210 | goto out; | ||
211 | } | ||
212 | |||
213 | atomic_set(&mr->r_refcount, 1); | ||
214 | RB_CLEAR_NODE(&mr->r_rb_node); | ||
215 | mr->r_trans = rs->rs_transport; | ||
216 | mr->r_sock = rs; | ||
217 | |||
218 | if (args->flags & RDS_RDMA_USE_ONCE) | ||
219 | mr->r_use_once = 1; | ||
220 | if (args->flags & RDS_RDMA_INVALIDATE) | ||
221 | mr->r_invalidate = 1; | ||
222 | if (args->flags & RDS_RDMA_READWRITE) | ||
223 | mr->r_write = 1; | ||
224 | |||
225 | /* | ||
226 | * Pin the pages that make up the user buffer and transfer the page | ||
227 | * pointers to the mr's sg array. We check to see if we've mapped | ||
228 | * the whole region after transferring the partial page references | ||
229 | * to the sg array so that we can have one page ref cleanup path. | ||
230 | * | ||
231 | * For now we have no flag that tells us whether the mapping is | ||
232 | * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to | ||
233 | * the zero page. | ||
234 | */ | ||
235 | ret = rds_pin_pages(args->vec.addr & PAGE_MASK, nr_pages, pages, 1); | ||
236 | if (ret < 0) | ||
237 | goto out; | ||
238 | |||
239 | nents = ret; | ||
240 | sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL); | ||
241 | if (sg == NULL) { | ||
242 | ret = -ENOMEM; | ||
243 | goto out; | ||
244 | } | ||
245 | WARN_ON(!nents); | ||
246 | sg_init_table(sg, nents); | ||
247 | |||
248 | /* Stick all pages into the scatterlist */ | ||
249 | for (i = 0 ; i < nents; i++) | ||
250 | sg_set_page(&sg[i], pages[i], PAGE_SIZE, 0); | ||
251 | |||
252 | rdsdebug("RDS: trans_private nents is %u\n", nents); | ||
253 | |||
254 | /* Obtain a transport specific MR. If this succeeds, the | ||
255 | * s/g list is now owned by the MR. | ||
256 | * Note that dma_map() implies that pending writes are | ||
257 | * flushed to RAM, so no dma_sync is needed here. */ | ||
258 | trans_private = rs->rs_transport->get_mr(sg, nents, rs, | ||
259 | &mr->r_key); | ||
260 | |||
261 | if (IS_ERR(trans_private)) { | ||
262 | for (i = 0 ; i < nents; i++) | ||
263 | put_page(sg_page(&sg[i])); | ||
264 | kfree(sg); | ||
265 | ret = PTR_ERR(trans_private); | ||
266 | goto out; | ||
267 | } | ||
268 | |||
269 | mr->r_trans_private = trans_private; | ||
270 | |||
271 | rdsdebug("RDS: get_mr put_user key is %x cookie_addr %p\n", | ||
272 | mr->r_key, (void *)(unsigned long) args->cookie_addr); | ||
273 | |||
274 | /* The user may pass us an unaligned address, but we can only | ||
275 | * map page aligned regions. So we keep the offset, and build | ||
276 | * a 64bit cookie containing <R_Key, offset> and pass that | ||
277 | * around. */ | ||
278 | cookie = rds_rdma_make_cookie(mr->r_key, args->vec.addr & ~PAGE_MASK); | ||
279 | if (cookie_ret) | ||
280 | *cookie_ret = cookie; | ||
281 | |||
282 | if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long) args->cookie_addr)) { | ||
283 | ret = -EFAULT; | ||
284 | goto out; | ||
285 | } | ||
286 | |||
287 | /* Inserting the new MR into the rbtree bumps its | ||
288 | * reference count. */ | ||
289 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); | ||
290 | found = rds_mr_tree_walk(&rs->rs_rdma_keys, mr->r_key, mr); | ||
291 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); | ||
292 | |||
293 | BUG_ON(found && found != mr); | ||
294 | |||
295 | rdsdebug("RDS: get_mr key is %x\n", mr->r_key); | ||
296 | if (mr_ret) { | ||
297 | atomic_inc(&mr->r_refcount); | ||
298 | *mr_ret = mr; | ||
299 | } | ||
300 | |||
301 | ret = 0; | ||
302 | out: | ||
303 | kfree(pages); | ||
304 | if (mr) | ||
305 | rds_mr_put(mr); | ||
306 | return ret; | ||
307 | } | ||
308 | |||
309 | int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen) | ||
310 | { | ||
311 | struct rds_get_mr_args args; | ||
312 | |||
313 | if (optlen != sizeof(struct rds_get_mr_args)) | ||
314 | return -EINVAL; | ||
315 | |||
316 | if (copy_from_user(&args, (struct rds_get_mr_args __user *)optval, | ||
317 | sizeof(struct rds_get_mr_args))) | ||
318 | return -EFAULT; | ||
319 | |||
320 | return __rds_rdma_map(rs, &args, NULL, NULL); | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * Free the MR indicated by the given R_Key | ||
325 | */ | ||
326 | int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen) | ||
327 | { | ||
328 | struct rds_free_mr_args args; | ||
329 | struct rds_mr *mr; | ||
330 | unsigned long flags; | ||
331 | |||
332 | if (optlen != sizeof(struct rds_free_mr_args)) | ||
333 | return -EINVAL; | ||
334 | |||
335 | if (copy_from_user(&args, (struct rds_free_mr_args __user *)optval, | ||
336 | sizeof(struct rds_free_mr_args))) | ||
337 | return -EFAULT; | ||
338 | |||
339 | /* Special case - a null cookie means flush all unused MRs */ | ||
340 | if (args.cookie == 0) { | ||
341 | if (!rs->rs_transport || !rs->rs_transport->flush_mrs) | ||
342 | return -EINVAL; | ||
343 | rs->rs_transport->flush_mrs(); | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | /* Look up the MR given its R_key and remove it from the rbtree | ||
348 | * so nobody else finds it. | ||
349 | * This should also prevent races with rds_rdma_unuse. | ||
350 | */ | ||
351 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); | ||
352 | mr = rds_mr_tree_walk(&rs->rs_rdma_keys, rds_rdma_cookie_key(args.cookie), NULL); | ||
353 | if (mr) { | ||
354 | rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); | ||
355 | RB_CLEAR_NODE(&mr->r_rb_node); | ||
356 | if (args.flags & RDS_RDMA_INVALIDATE) | ||
357 | mr->r_invalidate = 1; | ||
358 | } | ||
359 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); | ||
360 | |||
361 | if (!mr) | ||
362 | return -EINVAL; | ||
363 | |||
364 | /* | ||
365 | * call rds_destroy_mr() ourselves so that we're sure it's done by the time | ||
366 | * we return. If we let rds_mr_put() do it it might not happen until | ||
367 | * someone else drops their ref. | ||
368 | */ | ||
369 | rds_destroy_mr(mr); | ||
370 | rds_mr_put(mr); | ||
371 | return 0; | ||
372 | } | ||
373 | |||
374 | /* | ||
375 | * This is called when we receive an extension header that | ||
376 | * tells us this MR was used. It allows us to implement | ||
377 | * use_once semantics | ||
378 | */ | ||
379 | void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) | ||
380 | { | ||
381 | struct rds_mr *mr; | ||
382 | unsigned long flags; | ||
383 | int zot_me = 0; | ||
384 | |||
385 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); | ||
386 | mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); | ||
387 | if (mr && (mr->r_use_once || force)) { | ||
388 | rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); | ||
389 | RB_CLEAR_NODE(&mr->r_rb_node); | ||
390 | zot_me = 1; | ||
391 | } else if (mr) | ||
392 | atomic_inc(&mr->r_refcount); | ||
393 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); | ||
394 | |||
395 | /* May have to issue a dma_sync on this memory region. | ||
396 | * Note we could avoid this if the operation was a RDMA READ, | ||
397 | * but at this point we can't tell. */ | ||
398 | if (mr != NULL) { | ||
399 | if (mr->r_trans->sync_mr) | ||
400 | mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); | ||
401 | |||
402 | /* If the MR was marked as invalidate, this will | ||
403 | * trigger an async flush. */ | ||
404 | if (zot_me) | ||
405 | rds_destroy_mr(mr); | ||
406 | rds_mr_put(mr); | ||
407 | } | ||
408 | } | ||
409 | |||
410 | void rds_rdma_free_op(struct rds_rdma_op *ro) | ||
411 | { | ||
412 | unsigned int i; | ||
413 | |||
414 | for (i = 0; i < ro->r_nents; i++) { | ||
415 | struct page *page = sg_page(&ro->r_sg[i]); | ||
416 | |||
417 | /* Mark page dirty if it was possibly modified, which | ||
418 | * is the case for a RDMA_READ which copies from remote | ||
419 | * to local memory */ | ||
420 | if (!ro->r_write) | ||
421 | set_page_dirty(page); | ||
422 | put_page(page); | ||
423 | } | ||
424 | |||
425 | kfree(ro->r_notifier); | ||
426 | kfree(ro); | ||
427 | } | ||
428 | |||
429 | /* | ||
430 | * args is a pointer to an in-kernel copy in the sendmsg cmsg. | ||
431 | */ | ||
432 | static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, | ||
433 | struct rds_rdma_args *args) | ||
434 | { | ||
435 | struct rds_iovec vec; | ||
436 | struct rds_rdma_op *op = NULL; | ||
437 | unsigned int nr_pages; | ||
438 | unsigned int max_pages; | ||
439 | unsigned int nr_bytes; | ||
440 | struct page **pages = NULL; | ||
441 | struct rds_iovec __user *local_vec; | ||
442 | struct scatterlist *sg; | ||
443 | unsigned int nr; | ||
444 | unsigned int i, j; | ||
445 | int ret; | ||
446 | |||
447 | |||
448 | if (rs->rs_bound_addr == 0) { | ||
449 | ret = -ENOTCONN; /* XXX not a great errno */ | ||
450 | goto out; | ||
451 | } | ||
452 | |||
453 | if (args->nr_local > (u64)UINT_MAX) { | ||
454 | ret = -EMSGSIZE; | ||
455 | goto out; | ||
456 | } | ||
457 | |||
458 | nr_pages = 0; | ||
459 | max_pages = 0; | ||
460 | |||
461 | local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; | ||
462 | |||
463 | /* figure out the number of pages in the vector */ | ||
464 | for (i = 0; i < args->nr_local; i++) { | ||
465 | if (copy_from_user(&vec, &local_vec[i], | ||
466 | sizeof(struct rds_iovec))) { | ||
467 | ret = -EFAULT; | ||
468 | goto out; | ||
469 | } | ||
470 | |||
471 | nr = rds_pages_in_vec(&vec); | ||
472 | if (nr == 0) { | ||
473 | ret = -EINVAL; | ||
474 | goto out; | ||
475 | } | ||
476 | |||
477 | max_pages = max(nr, max_pages); | ||
478 | nr_pages += nr; | ||
479 | } | ||
480 | |||
481 | pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL); | ||
482 | if (pages == NULL) { | ||
483 | ret = -ENOMEM; | ||
484 | goto out; | ||
485 | } | ||
486 | |||
487 | op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL); | ||
488 | if (op == NULL) { | ||
489 | ret = -ENOMEM; | ||
490 | goto out; | ||
491 | } | ||
492 | |||
493 | op->r_write = !!(args->flags & RDS_RDMA_READWRITE); | ||
494 | op->r_fence = !!(args->flags & RDS_RDMA_FENCE); | ||
495 | op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); | ||
496 | op->r_recverr = rs->rs_recverr; | ||
497 | WARN_ON(!nr_pages); | ||
498 | sg_init_table(op->r_sg, nr_pages); | ||
499 | |||
500 | if (op->r_notify || op->r_recverr) { | ||
501 | /* We allocate an uninitialized notifier here, because | ||
502 | * we don't want to do that in the completion handler. We | ||
503 | * would have to use GFP_ATOMIC there, and don't want to deal | ||
504 | * with failed allocations. | ||
505 | */ | ||
506 | op->r_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL); | ||
507 | if (!op->r_notifier) { | ||
508 | ret = -ENOMEM; | ||
509 | goto out; | ||
510 | } | ||
511 | op->r_notifier->n_user_token = args->user_token; | ||
512 | op->r_notifier->n_status = RDS_RDMA_SUCCESS; | ||
513 | } | ||
514 | |||
515 | /* The cookie contains the R_Key of the remote memory region, and | ||
516 | * optionally an offset into it. This is how we implement RDMA into | ||
517 | * unaligned memory. | ||
518 | * When setting up the RDMA, we need to add that offset to the | ||
519 | * destination address (which is really an offset into the MR) | ||
520 | * FIXME: We may want to move this into ib_rdma.c | ||
521 | */ | ||
522 | op->r_key = rds_rdma_cookie_key(args->cookie); | ||
523 | op->r_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie); | ||
524 | |||
525 | nr_bytes = 0; | ||
526 | |||
527 | rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n", | ||
528 | (unsigned long long)args->nr_local, | ||
529 | (unsigned long long)args->remote_vec.addr, | ||
530 | op->r_key); | ||
531 | |||
532 | for (i = 0; i < args->nr_local; i++) { | ||
533 | if (copy_from_user(&vec, &local_vec[i], | ||
534 | sizeof(struct rds_iovec))) { | ||
535 | ret = -EFAULT; | ||
536 | goto out; | ||
537 | } | ||
538 | |||
539 | nr = rds_pages_in_vec(&vec); | ||
540 | if (nr == 0) { | ||
541 | ret = -EINVAL; | ||
542 | goto out; | ||
543 | } | ||
544 | |||
545 | rs->rs_user_addr = vec.addr; | ||
546 | rs->rs_user_bytes = vec.bytes; | ||
547 | |||
548 | /* did the user change the vec under us? */ | ||
549 | if (nr > max_pages || op->r_nents + nr > nr_pages) { | ||
550 | ret = -EINVAL; | ||
551 | goto out; | ||
552 | } | ||
553 | /* If it's a WRITE operation, we want to pin the pages for reading. | ||
554 | * If it's a READ operation, we need to pin the pages for writing. | ||
555 | */ | ||
556 | ret = rds_pin_pages(vec.addr & PAGE_MASK, nr, pages, !op->r_write); | ||
557 | if (ret < 0) | ||
558 | goto out; | ||
559 | |||
560 | rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n", | ||
561 | nr_bytes, nr, vec.bytes, vec.addr); | ||
562 | |||
563 | nr_bytes += vec.bytes; | ||
564 | |||
565 | for (j = 0; j < nr; j++) { | ||
566 | unsigned int offset = vec.addr & ~PAGE_MASK; | ||
567 | |||
568 | sg = &op->r_sg[op->r_nents + j]; | ||
569 | sg_set_page(sg, pages[j], | ||
570 | min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), | ||
571 | offset); | ||
572 | |||
573 | rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n", | ||
574 | sg->offset, sg->length, vec.addr, vec.bytes); | ||
575 | |||
576 | vec.addr += sg->length; | ||
577 | vec.bytes -= sg->length; | ||
578 | } | ||
579 | |||
580 | op->r_nents += nr; | ||
581 | } | ||
582 | |||
583 | |||
584 | if (nr_bytes > args->remote_vec.bytes) { | ||
585 | rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n", | ||
586 | nr_bytes, | ||
587 | (unsigned int) args->remote_vec.bytes); | ||
588 | ret = -EINVAL; | ||
589 | goto out; | ||
590 | } | ||
591 | op->r_bytes = nr_bytes; | ||
592 | |||
593 | ret = 0; | ||
594 | out: | ||
595 | kfree(pages); | ||
596 | if (ret) { | ||
597 | if (op) | ||
598 | rds_rdma_free_op(op); | ||
599 | op = ERR_PTR(ret); | ||
600 | } | ||
601 | return op; | ||
602 | } | ||
603 | |||
604 | /* | ||
605 | * The application asks for a RDMA transfer. | ||
606 | * Extract all arguments and set up the rdma_op | ||
607 | */ | ||
608 | int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | ||
609 | struct cmsghdr *cmsg) | ||
610 | { | ||
611 | struct rds_rdma_op *op; | ||
612 | |||
613 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) | ||
614 | || rm->m_rdma_op != NULL) | ||
615 | return -EINVAL; | ||
616 | |||
617 | op = rds_rdma_prepare(rs, CMSG_DATA(cmsg)); | ||
618 | if (IS_ERR(op)) | ||
619 | return PTR_ERR(op); | ||
620 | rds_stats_inc(s_send_rdma); | ||
621 | rm->m_rdma_op = op; | ||
622 | return 0; | ||
623 | } | ||
624 | |||
625 | /* | ||
626 | * The application wants us to pass an RDMA destination (aka MR) | ||
627 | * to the remote | ||
628 | */ | ||
629 | int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, | ||
630 | struct cmsghdr *cmsg) | ||
631 | { | ||
632 | unsigned long flags; | ||
633 | struct rds_mr *mr; | ||
634 | u32 r_key; | ||
635 | int err = 0; | ||
636 | |||
637 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(rds_rdma_cookie_t)) | ||
638 | || rm->m_rdma_cookie != 0) | ||
639 | return -EINVAL; | ||
640 | |||
641 | memcpy(&rm->m_rdma_cookie, CMSG_DATA(cmsg), sizeof(rm->m_rdma_cookie)); | ||
642 | |||
643 | /* We are reusing a previously mapped MR here. Most likely, the | ||
644 | * application has written to the buffer, so we need to explicitly | ||
645 | * flush those writes to RAM. Otherwise the HCA may not see them | ||
646 | * when doing a DMA from that buffer. | ||
647 | */ | ||
648 | r_key = rds_rdma_cookie_key(rm->m_rdma_cookie); | ||
649 | |||
650 | spin_lock_irqsave(&rs->rs_rdma_lock, flags); | ||
651 | mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); | ||
652 | if (mr == NULL) | ||
653 | err = -EINVAL; /* invalid r_key */ | ||
654 | else | ||
655 | atomic_inc(&mr->r_refcount); | ||
656 | spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); | ||
657 | |||
658 | if (mr) { | ||
659 | mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); | ||
660 | rm->m_rdma_mr = mr; | ||
661 | } | ||
662 | return err; | ||
663 | } | ||
664 | |||
665 | /* | ||
666 | * The application passes us an address range it wants to enable RDMA | ||
667 | * to/from. We map the area, and save the <R_Key,offset> pair | ||
668 | * in rm->m_rdma_cookie. This causes it to be sent along to the peer | ||
669 | * in an extension header. | ||
670 | */ | ||
671 | int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, | ||
672 | struct cmsghdr *cmsg) | ||
673 | { | ||
674 | if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_get_mr_args)) | ||
675 | || rm->m_rdma_cookie != 0) | ||
676 | return -EINVAL; | ||
677 | |||
678 | return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr); | ||
679 | } | ||
diff --git a/net/rds/rdma.h b/net/rds/rdma.h new file mode 100644 index 000000000000..425512098b0b --- /dev/null +++ b/net/rds/rdma.h | |||
@@ -0,0 +1,84 @@ | |||
1 | #ifndef _RDS_RDMA_H | ||
2 | #define _RDS_RDMA_H | ||
3 | |||
4 | #include <linux/rbtree.h> | ||
5 | #include <linux/spinlock.h> | ||
6 | #include <linux/scatterlist.h> | ||
7 | |||
8 | #include "rds.h" | ||
9 | |||
10 | struct rds_mr { | ||
11 | struct rb_node r_rb_node; | ||
12 | atomic_t r_refcount; | ||
13 | u32 r_key; | ||
14 | |||
15 | /* A copy of the creation flags */ | ||
16 | unsigned int r_use_once:1; | ||
17 | unsigned int r_invalidate:1; | ||
18 | unsigned int r_write:1; | ||
19 | |||
20 | /* This is for RDS_MR_DEAD. | ||
21 | * It would be nice & consistent to make this part of the above | ||
22 | * bit field here, but we need to use test_and_set_bit. | ||
23 | */ | ||
24 | unsigned long r_state; | ||
25 | struct rds_sock *r_sock; /* back pointer to the socket that owns us */ | ||
26 | struct rds_transport *r_trans; | ||
27 | void *r_trans_private; | ||
28 | }; | ||
29 | |||
30 | /* Flags for mr->r_state */ | ||
31 | #define RDS_MR_DEAD 0 | ||
32 | |||
33 | struct rds_rdma_op { | ||
34 | u32 r_key; | ||
35 | u64 r_remote_addr; | ||
36 | unsigned int r_write:1; | ||
37 | unsigned int r_fence:1; | ||
38 | unsigned int r_notify:1; | ||
39 | unsigned int r_recverr:1; | ||
40 | unsigned int r_mapped:1; | ||
41 | struct rds_notifier *r_notifier; | ||
42 | unsigned int r_bytes; | ||
43 | unsigned int r_nents; | ||
44 | unsigned int r_count; | ||
45 | struct scatterlist r_sg[0]; | ||
46 | }; | ||
47 | |||
48 | static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset) | ||
49 | { | ||
50 | return r_key | (((u64) offset) << 32); | ||
51 | } | ||
52 | |||
53 | static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie) | ||
54 | { | ||
55 | return cookie; | ||
56 | } | ||
57 | |||
58 | static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) | ||
59 | { | ||
60 | return cookie >> 32; | ||
61 | } | ||
62 | |||
63 | int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen); | ||
64 | int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen); | ||
65 | void rds_rdma_drop_keys(struct rds_sock *rs); | ||
66 | int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | ||
67 | struct cmsghdr *cmsg); | ||
68 | int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, | ||
69 | struct cmsghdr *cmsg); | ||
70 | int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, | ||
71 | struct cmsghdr *cmsg); | ||
72 | int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, | ||
73 | struct cmsghdr *cmsg); | ||
74 | void rds_rdma_free_op(struct rds_rdma_op *ro); | ||
75 | void rds_rdma_send_complete(struct rds_message *rm, int); | ||
76 | |||
77 | extern void __rds_put_mr_final(struct rds_mr *mr); | ||
78 | static inline void rds_mr_put(struct rds_mr *mr) | ||
79 | { | ||
80 | if (atomic_dec_and_test(&mr->r_refcount)) | ||
81 | __rds_put_mr_final(mr); | ||
82 | } | ||
83 | |||
84 | #endif | ||
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c new file mode 100644 index 000000000000..7b19024f9706 --- /dev/null +++ b/net/rds/rdma_transport.c | |||
@@ -0,0 +1,214 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2009 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <rdma/rdma_cm.h> | ||
34 | |||
35 | #include "rdma_transport.h" | ||
36 | |||
37 | static struct rdma_cm_id *rds_iw_listen_id; | ||
38 | |||
39 | int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, | ||
40 | struct rdma_cm_event *event) | ||
41 | { | ||
42 | /* this can be null in the listening path */ | ||
43 | struct rds_connection *conn = cm_id->context; | ||
44 | struct rds_transport *trans; | ||
45 | int ret = 0; | ||
46 | |||
47 | rdsdebug("conn %p id %p handling event %u\n", conn, cm_id, | ||
48 | event->event); | ||
49 | |||
50 | if (cm_id->device->node_type == RDMA_NODE_RNIC) | ||
51 | trans = &rds_iw_transport; | ||
52 | else | ||
53 | trans = &rds_ib_transport; | ||
54 | |||
55 | /* Prevent shutdown from tearing down the connection | ||
56 | * while we're executing. */ | ||
57 | if (conn) { | ||
58 | mutex_lock(&conn->c_cm_lock); | ||
59 | |||
60 | /* If the connection is being shut down, bail out | ||
61 | * right away. We return 0 so cm_id doesn't get | ||
62 | * destroyed prematurely */ | ||
63 | if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING) { | ||
64 | /* Reject incoming connections while we're tearing | ||
65 | * down an existing one. */ | ||
66 | if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) | ||
67 | ret = 1; | ||
68 | goto out; | ||
69 | } | ||
70 | } | ||
71 | |||
72 | switch (event->event) { | ||
73 | case RDMA_CM_EVENT_CONNECT_REQUEST: | ||
74 | ret = trans->cm_handle_connect(cm_id, event); | ||
75 | break; | ||
76 | |||
77 | case RDMA_CM_EVENT_ADDR_RESOLVED: | ||
78 | /* XXX do we need to clean up if this fails? */ | ||
79 | ret = rdma_resolve_route(cm_id, | ||
80 | RDS_RDMA_RESOLVE_TIMEOUT_MS); | ||
81 | break; | ||
82 | |||
83 | case RDMA_CM_EVENT_ROUTE_RESOLVED: | ||
84 | /* XXX worry about racing with listen acceptance */ | ||
85 | ret = trans->cm_initiate_connect(cm_id); | ||
86 | break; | ||
87 | |||
88 | case RDMA_CM_EVENT_ESTABLISHED: | ||
89 | trans->cm_connect_complete(conn, event); | ||
90 | break; | ||
91 | |||
92 | case RDMA_CM_EVENT_ADDR_ERROR: | ||
93 | case RDMA_CM_EVENT_ROUTE_ERROR: | ||
94 | case RDMA_CM_EVENT_CONNECT_ERROR: | ||
95 | case RDMA_CM_EVENT_UNREACHABLE: | ||
96 | case RDMA_CM_EVENT_REJECTED: | ||
97 | case RDMA_CM_EVENT_DEVICE_REMOVAL: | ||
98 | case RDMA_CM_EVENT_ADDR_CHANGE: | ||
99 | if (conn) | ||
100 | rds_conn_drop(conn); | ||
101 | break; | ||
102 | |||
103 | case RDMA_CM_EVENT_DISCONNECTED: | ||
104 | printk(KERN_WARNING "RDS/IW: DISCONNECT event - dropping connection " | ||
105 | "%pI4->%pI4\n", &conn->c_laddr, | ||
106 | &conn->c_faddr); | ||
107 | rds_conn_drop(conn); | ||
108 | break; | ||
109 | |||
110 | default: | ||
111 | /* things like device disconnect? */ | ||
112 | printk(KERN_ERR "unknown event %u\n", event->event); | ||
113 | BUG(); | ||
114 | break; | ||
115 | } | ||
116 | |||
117 | out: | ||
118 | if (conn) | ||
119 | mutex_unlock(&conn->c_cm_lock); | ||
120 | |||
121 | rdsdebug("id %p event %u handling ret %d\n", cm_id, event->event, ret); | ||
122 | |||
123 | return ret; | ||
124 | } | ||
125 | |||
126 | static int __init rds_rdma_listen_init(void) | ||
127 | { | ||
128 | struct sockaddr_in sin; | ||
129 | struct rdma_cm_id *cm_id; | ||
130 | int ret; | ||
131 | |||
132 | cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP); | ||
133 | if (IS_ERR(cm_id)) { | ||
134 | ret = PTR_ERR(cm_id); | ||
135 | printk(KERN_ERR "RDS/IW: failed to setup listener, " | ||
136 | "rdma_create_id() returned %d\n", ret); | ||
137 | goto out; | ||
138 | } | ||
139 | |||
140 | sin.sin_family = PF_INET, | ||
141 | sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); | ||
142 | sin.sin_port = (__force u16)htons(RDS_PORT); | ||
143 | |||
144 | /* | ||
145 | * XXX I bet this binds the cm_id to a device. If we want to support | ||
146 | * fail-over we'll have to take this into consideration. | ||
147 | */ | ||
148 | ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); | ||
149 | if (ret) { | ||
150 | printk(KERN_ERR "RDS/IW: failed to setup listener, " | ||
151 | "rdma_bind_addr() returned %d\n", ret); | ||
152 | goto out; | ||
153 | } | ||
154 | |||
155 | ret = rdma_listen(cm_id, 128); | ||
156 | if (ret) { | ||
157 | printk(KERN_ERR "RDS/IW: failed to setup listener, " | ||
158 | "rdma_listen() returned %d\n", ret); | ||
159 | goto out; | ||
160 | } | ||
161 | |||
162 | rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT); | ||
163 | |||
164 | rds_iw_listen_id = cm_id; | ||
165 | cm_id = NULL; | ||
166 | out: | ||
167 | if (cm_id) | ||
168 | rdma_destroy_id(cm_id); | ||
169 | return ret; | ||
170 | } | ||
171 | |||
172 | static void rds_rdma_listen_stop(void) | ||
173 | { | ||
174 | if (rds_iw_listen_id) { | ||
175 | rdsdebug("cm %p\n", rds_iw_listen_id); | ||
176 | rdma_destroy_id(rds_iw_listen_id); | ||
177 | rds_iw_listen_id = NULL; | ||
178 | } | ||
179 | } | ||
180 | |||
181 | int __init rds_rdma_init(void) | ||
182 | { | ||
183 | int ret; | ||
184 | |||
185 | ret = rds_rdma_listen_init(); | ||
186 | if (ret) | ||
187 | goto out; | ||
188 | |||
189 | ret = rds_iw_init(); | ||
190 | if (ret) | ||
191 | goto err_iw_init; | ||
192 | |||
193 | ret = rds_ib_init(); | ||
194 | if (ret) | ||
195 | goto err_ib_init; | ||
196 | |||
197 | goto out; | ||
198 | |||
199 | err_ib_init: | ||
200 | rds_iw_exit(); | ||
201 | err_iw_init: | ||
202 | rds_rdma_listen_stop(); | ||
203 | out: | ||
204 | return ret; | ||
205 | } | ||
206 | |||
207 | void rds_rdma_exit(void) | ||
208 | { | ||
209 | /* stop listening first to ensure no new connections are attempted */ | ||
210 | rds_rdma_listen_stop(); | ||
211 | rds_ib_exit(); | ||
212 | rds_iw_exit(); | ||
213 | } | ||
214 | |||
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h new file mode 100644 index 000000000000..2f2c7d976c21 --- /dev/null +++ b/net/rds/rdma_transport.h | |||
@@ -0,0 +1,28 @@ | |||
1 | #ifndef _RDMA_TRANSPORT_H | ||
2 | #define _RDMA_TRANSPORT_H | ||
3 | |||
4 | #include <rdma/ib_verbs.h> | ||
5 | #include <rdma/rdma_cm.h> | ||
6 | #include "rds.h" | ||
7 | |||
8 | #define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000 | ||
9 | |||
10 | int rds_rdma_conn_connect(struct rds_connection *conn); | ||
11 | int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, | ||
12 | struct rdma_cm_event *event); | ||
13 | |||
14 | /* from rdma_transport.c */ | ||
15 | int rds_rdma_init(void); | ||
16 | void rds_rdma_exit(void); | ||
17 | |||
18 | /* from ib.c */ | ||
19 | extern struct rds_transport rds_ib_transport; | ||
20 | int rds_ib_init(void); | ||
21 | void rds_ib_exit(void); | ||
22 | |||
23 | /* from iw.c */ | ||
24 | extern struct rds_transport rds_iw_transport; | ||
25 | int rds_iw_init(void); | ||
26 | void rds_iw_exit(void); | ||
27 | |||
28 | #endif | ||
diff --git a/net/rds/rds.h b/net/rds/rds.h new file mode 100644 index 000000000000..060400704979 --- /dev/null +++ b/net/rds/rds.h | |||
@@ -0,0 +1,686 @@ | |||
1 | #ifndef _RDS_RDS_H | ||
2 | #define _RDS_RDS_H | ||
3 | |||
4 | #include <net/sock.h> | ||
5 | #include <linux/scatterlist.h> | ||
6 | #include <linux/highmem.h> | ||
7 | #include <rdma/rdma_cm.h> | ||
8 | #include <linux/mutex.h> | ||
9 | #include <linux/rds.h> | ||
10 | |||
11 | #include "info.h" | ||
12 | |||
13 | /* | ||
14 | * RDS Network protocol version | ||
15 | */ | ||
16 | #define RDS_PROTOCOL_3_0 0x0300 | ||
17 | #define RDS_PROTOCOL_3_1 0x0301 | ||
18 | #define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 | ||
19 | #define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) | ||
20 | #define RDS_PROTOCOL_MINOR(v) ((v) & 255) | ||
21 | #define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) | ||
22 | |||
23 | /* | ||
24 | * XXX randomly chosen, but at least seems to be unused: | ||
25 | * # 18464-18768 Unassigned | ||
26 | * We should do better. We want a reserved port to discourage unpriv'ed | ||
27 | * userspace from listening. | ||
28 | */ | ||
29 | #define RDS_PORT 18634 | ||
30 | |||
31 | #ifdef DEBUG | ||
32 | #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) | ||
33 | #else | ||
34 | /* sigh, pr_debug() causes unused variable warnings */ | ||
35 | static inline void __attribute__ ((format (printf, 1, 2))) | ||
36 | rdsdebug(char *fmt, ...) | ||
37 | { | ||
38 | } | ||
39 | #endif | ||
40 | |||
41 | /* XXX is there one of these somewhere? */ | ||
42 | #define ceil(x, y) \ | ||
43 | ({ unsigned long __x = (x), __y = (y); (__x + __y - 1) / __y; }) | ||
44 | |||
45 | #define RDS_FRAG_SHIFT 12 | ||
46 | #define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) | ||
47 | |||
48 | #define RDS_CONG_MAP_BYTES (65536 / 8) | ||
49 | #define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long)) | ||
50 | #define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE) | ||
51 | #define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) | ||
52 | |||
53 | struct rds_cong_map { | ||
54 | struct rb_node m_rb_node; | ||
55 | __be32 m_addr; | ||
56 | wait_queue_head_t m_waitq; | ||
57 | struct list_head m_conn_list; | ||
58 | unsigned long m_page_addrs[RDS_CONG_MAP_PAGES]; | ||
59 | }; | ||
60 | |||
61 | |||
62 | /* | ||
63 | * This is how we will track the connection state: | ||
64 | * A connection is always in one of the following | ||
65 | * states. Updates to the state are atomic and imply | ||
66 | * a memory barrier. | ||
67 | */ | ||
68 | enum { | ||
69 | RDS_CONN_DOWN = 0, | ||
70 | RDS_CONN_CONNECTING, | ||
71 | RDS_CONN_DISCONNECTING, | ||
72 | RDS_CONN_UP, | ||
73 | RDS_CONN_ERROR, | ||
74 | }; | ||
75 | |||
76 | /* Bits for c_flags */ | ||
77 | #define RDS_LL_SEND_FULL 0 | ||
78 | #define RDS_RECONNECT_PENDING 1 | ||
79 | |||
80 | struct rds_connection { | ||
81 | struct hlist_node c_hash_node; | ||
82 | __be32 c_laddr; | ||
83 | __be32 c_faddr; | ||
84 | unsigned int c_loopback:1; | ||
85 | struct rds_connection *c_passive; | ||
86 | |||
87 | struct rds_cong_map *c_lcong; | ||
88 | struct rds_cong_map *c_fcong; | ||
89 | |||
90 | struct mutex c_send_lock; /* protect send ring */ | ||
91 | struct rds_message *c_xmit_rm; | ||
92 | unsigned long c_xmit_sg; | ||
93 | unsigned int c_xmit_hdr_off; | ||
94 | unsigned int c_xmit_data_off; | ||
95 | unsigned int c_xmit_rdma_sent; | ||
96 | |||
97 | spinlock_t c_lock; /* protect msg queues */ | ||
98 | u64 c_next_tx_seq; | ||
99 | struct list_head c_send_queue; | ||
100 | struct list_head c_retrans; | ||
101 | |||
102 | u64 c_next_rx_seq; | ||
103 | |||
104 | struct rds_transport *c_trans; | ||
105 | void *c_transport_data; | ||
106 | |||
107 | atomic_t c_state; | ||
108 | unsigned long c_flags; | ||
109 | unsigned long c_reconnect_jiffies; | ||
110 | struct delayed_work c_send_w; | ||
111 | struct delayed_work c_recv_w; | ||
112 | struct delayed_work c_conn_w; | ||
113 | struct work_struct c_down_w; | ||
114 | struct mutex c_cm_lock; /* protect conn state & cm */ | ||
115 | |||
116 | struct list_head c_map_item; | ||
117 | unsigned long c_map_queued; | ||
118 | unsigned long c_map_offset; | ||
119 | unsigned long c_map_bytes; | ||
120 | |||
121 | unsigned int c_unacked_packets; | ||
122 | unsigned int c_unacked_bytes; | ||
123 | |||
124 | /* Protocol version */ | ||
125 | unsigned int c_version; | ||
126 | }; | ||
127 | |||
128 | #define RDS_FLAG_CONG_BITMAP 0x01 | ||
129 | #define RDS_FLAG_ACK_REQUIRED 0x02 | ||
130 | #define RDS_FLAG_RETRANSMITTED 0x04 | ||
131 | #define RDS_MAX_ADV_CREDIT 127 | ||
132 | |||
133 | /* | ||
134 | * Maximum space available for extension headers. | ||
135 | */ | ||
136 | #define RDS_HEADER_EXT_SPACE 16 | ||
137 | |||
138 | struct rds_header { | ||
139 | __be64 h_sequence; | ||
140 | __be64 h_ack; | ||
141 | __be32 h_len; | ||
142 | __be16 h_sport; | ||
143 | __be16 h_dport; | ||
144 | u8 h_flags; | ||
145 | u8 h_credit; | ||
146 | u8 h_padding[4]; | ||
147 | __sum16 h_csum; | ||
148 | |||
149 | u8 h_exthdr[RDS_HEADER_EXT_SPACE]; | ||
150 | }; | ||
151 | |||
152 | /* | ||
153 | * Reserved - indicates end of extensions | ||
154 | */ | ||
155 | #define RDS_EXTHDR_NONE 0 | ||
156 | |||
157 | /* | ||
158 | * This extension header is included in the very | ||
159 | * first message that is sent on a new connection, | ||
160 | * and identifies the protocol level. This will help | ||
161 | * rolling updates if a future change requires breaking | ||
162 | * the protocol. | ||
163 | * NB: This is no longer true for IB, where we do a version | ||
164 | * negotiation during the connection setup phase (protocol | ||
165 | * version information is included in the RDMA CM private data). | ||
166 | */ | ||
167 | #define RDS_EXTHDR_VERSION 1 | ||
168 | struct rds_ext_header_version { | ||
169 | __be32 h_version; | ||
170 | }; | ||
171 | |||
172 | /* | ||
173 | * This extension header is included in the RDS message | ||
174 | * chasing an RDMA operation. | ||
175 | */ | ||
176 | #define RDS_EXTHDR_RDMA 2 | ||
177 | struct rds_ext_header_rdma { | ||
178 | __be32 h_rdma_rkey; | ||
179 | }; | ||
180 | |||
181 | /* | ||
182 | * This extension header tells the peer about the | ||
183 | * destination <R_Key,offset> of the requested RDMA | ||
184 | * operation. | ||
185 | */ | ||
186 | #define RDS_EXTHDR_RDMA_DEST 3 | ||
187 | struct rds_ext_header_rdma_dest { | ||
188 | __be32 h_rdma_rkey; | ||
189 | __be32 h_rdma_offset; | ||
190 | }; | ||
191 | |||
192 | #define __RDS_EXTHDR_MAX 16 /* for now */ | ||
193 | |||
194 | struct rds_incoming { | ||
195 | atomic_t i_refcount; | ||
196 | struct list_head i_item; | ||
197 | struct rds_connection *i_conn; | ||
198 | struct rds_header i_hdr; | ||
199 | unsigned long i_rx_jiffies; | ||
200 | __be32 i_saddr; | ||
201 | |||
202 | rds_rdma_cookie_t i_rdma_cookie; | ||
203 | }; | ||
204 | |||
205 | /* | ||
206 | * m_sock_item and m_conn_item are on lists that are serialized under | ||
207 | * conn->c_lock. m_sock_item has additional meaning in that once it is empty | ||
208 | * the message will not be put back on the retransmit list after being sent. | ||
209 | * messages that are canceled while being sent rely on this. | ||
210 | * | ||
211 | * m_inc is used by loopback so that it can pass an incoming message straight | ||
212 | * back up into the rx path. It embeds a wire header which is also used by | ||
213 | * the send path, which is kind of awkward. | ||
214 | * | ||
215 | * m_sock_item indicates the message's presence on a socket's send or receive | ||
216 | * queue. m_rs will point to that socket. | ||
217 | * | ||
218 | * m_daddr is used by cancellation to prune messages to a given destination. | ||
219 | * | ||
220 | * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock | ||
221 | * nesting. As paths iterate over messages on a sock, or conn, they must | ||
222 | * also lock the conn, or sock, to remove the message from those lists too. | ||
223 | * Testing the flag to determine if the message is still on the lists lets | ||
224 | * us avoid testing the list_head directly. That means each path can use | ||
225 | * the message's list_head to keep it on a local list while juggling locks | ||
226 | * without confusing the other path. | ||
227 | * | ||
228 | * m_ack_seq is an optional field set by transports who need a different | ||
229 | * sequence number range to invalidate. They can use this in a callback | ||
230 | * that they pass to rds_send_drop_acked() to see if each message has been | ||
231 | * acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't | ||
232 | * had ack_seq set yet. | ||
233 | */ | ||
234 | #define RDS_MSG_ON_SOCK 1 | ||
235 | #define RDS_MSG_ON_CONN 2 | ||
236 | #define RDS_MSG_HAS_ACK_SEQ 3 | ||
237 | #define RDS_MSG_ACK_REQUIRED 4 | ||
238 | #define RDS_MSG_RETRANSMITTED 5 | ||
239 | #define RDS_MSG_MAPPED 6 | ||
240 | #define RDS_MSG_PAGEVEC 7 | ||
241 | |||
242 | struct rds_message { | ||
243 | atomic_t m_refcount; | ||
244 | struct list_head m_sock_item; | ||
245 | struct list_head m_conn_item; | ||
246 | struct rds_incoming m_inc; | ||
247 | u64 m_ack_seq; | ||
248 | __be32 m_daddr; | ||
249 | unsigned long m_flags; | ||
250 | |||
251 | /* Never access m_rs without holding m_rs_lock. | ||
252 | * Lock nesting is | ||
253 | * rm->m_rs_lock | ||
254 | * -> rs->rs_lock | ||
255 | */ | ||
256 | spinlock_t m_rs_lock; | ||
257 | struct rds_sock *m_rs; | ||
258 | struct rds_rdma_op *m_rdma_op; | ||
259 | rds_rdma_cookie_t m_rdma_cookie; | ||
260 | struct rds_mr *m_rdma_mr; | ||
261 | unsigned int m_nents; | ||
262 | unsigned int m_count; | ||
263 | struct scatterlist m_sg[0]; | ||
264 | }; | ||
265 | |||
266 | /* | ||
267 | * The RDS notifier is used (optionally) to tell the application about | ||
268 | * completed RDMA operations. Rather than keeping the whole rds message | ||
269 | * around on the queue, we allocate a small notifier that is put on the | ||
270 | * socket's notifier_list. Notifications are delivered to the application | ||
271 | * through control messages. | ||
272 | */ | ||
273 | struct rds_notifier { | ||
274 | struct list_head n_list; | ||
275 | uint64_t n_user_token; | ||
276 | int n_status; | ||
277 | }; | ||
278 | |||
279 | /** | ||
280 | * struct rds_transport - transport specific behavioural hooks | ||
281 | * | ||
282 | * @xmit: .xmit is called by rds_send_xmit() to tell the transport to send | ||
283 | * part of a message. The caller serializes on the send_sem so this | ||
284 | * doesn't need to be reentrant for a given conn. The header must be | ||
285 | * sent before the data payload. .xmit must be prepared to send a | ||
286 | * message with no data payload. .xmit should return the number of | ||
287 | * bytes that were sent down the connection, including header bytes. | ||
288 | * Returning 0 tells the caller that it doesn't need to perform any | ||
289 | * additional work now. This is usually the case when the transport has | ||
290 | * filled the sending queue for its connection and will handle | ||
291 | * triggering the rds thread to continue the send when space becomes | ||
292 | * available. Returning -EAGAIN tells the caller to retry the send | ||
293 | * immediately. Returning -ENOMEM tells the caller to retry the send at | ||
294 | * some point in the future. | ||
295 | * | ||
296 | * @conn_shutdown: conn_shutdown stops traffic on the given connection. Once | ||
297 | * it returns the connection can not call rds_recv_incoming(). | ||
298 | * This will only be called once after conn_connect returns | ||
299 | * non-zero success and will The caller serializes this with | ||
300 | * the send and connecting paths (xmit_* and conn_*). The | ||
301 | * transport is responsible for other serialization, including | ||
302 | * rds_recv_incoming(). This is called in process context but | ||
303 | * should try hard not to block. | ||
304 | * | ||
305 | * @xmit_cong_map: This asks the transport to send the local bitmap down the | ||
306 | * given connection. XXX get a better story about the bitmap | ||
307 | * flag and header. | ||
308 | */ | ||
309 | |||
310 | struct rds_transport { | ||
311 | char t_name[TRANSNAMSIZ]; | ||
312 | struct list_head t_item; | ||
313 | struct module *t_owner; | ||
314 | unsigned int t_prefer_loopback:1; | ||
315 | |||
316 | int (*laddr_check)(__be32 addr); | ||
317 | int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp); | ||
318 | void (*conn_free)(void *data); | ||
319 | int (*conn_connect)(struct rds_connection *conn); | ||
320 | void (*conn_shutdown)(struct rds_connection *conn); | ||
321 | void (*xmit_prepare)(struct rds_connection *conn); | ||
322 | void (*xmit_complete)(struct rds_connection *conn); | ||
323 | int (*xmit)(struct rds_connection *conn, struct rds_message *rm, | ||
324 | unsigned int hdr_off, unsigned int sg, unsigned int off); | ||
325 | int (*xmit_cong_map)(struct rds_connection *conn, | ||
326 | struct rds_cong_map *map, unsigned long offset); | ||
327 | int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op); | ||
328 | int (*recv)(struct rds_connection *conn); | ||
329 | int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov, | ||
330 | size_t size); | ||
331 | void (*inc_purge)(struct rds_incoming *inc); | ||
332 | void (*inc_free)(struct rds_incoming *inc); | ||
333 | |||
334 | int (*cm_handle_connect)(struct rdma_cm_id *cm_id, | ||
335 | struct rdma_cm_event *event); | ||
336 | int (*cm_initiate_connect)(struct rdma_cm_id *cm_id); | ||
337 | void (*cm_connect_complete)(struct rds_connection *conn, | ||
338 | struct rdma_cm_event *event); | ||
339 | |||
340 | unsigned int (*stats_info_copy)(struct rds_info_iterator *iter, | ||
341 | unsigned int avail); | ||
342 | void (*exit)(void); | ||
343 | void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg, | ||
344 | struct rds_sock *rs, u32 *key_ret); | ||
345 | void (*sync_mr)(void *trans_private, int direction); | ||
346 | void (*free_mr)(void *trans_private, int invalidate); | ||
347 | void (*flush_mrs)(void); | ||
348 | }; | ||
349 | |||
350 | struct rds_sock { | ||
351 | struct sock rs_sk; | ||
352 | |||
353 | u64 rs_user_addr; | ||
354 | u64 rs_user_bytes; | ||
355 | |||
356 | /* | ||
357 | * bound_addr used for both incoming and outgoing, no INADDR_ANY | ||
358 | * support. | ||
359 | */ | ||
360 | struct rb_node rs_bound_node; | ||
361 | __be32 rs_bound_addr; | ||
362 | __be32 rs_conn_addr; | ||
363 | __be16 rs_bound_port; | ||
364 | __be16 rs_conn_port; | ||
365 | |||
366 | /* | ||
367 | * This is only used to communicate the transport between bind and | ||
368 | * initiating connections. All other trans use is referenced through | ||
369 | * the connection. | ||
370 | */ | ||
371 | struct rds_transport *rs_transport; | ||
372 | |||
373 | /* | ||
374 | * rds_sendmsg caches the conn it used the last time around. | ||
375 | * This helps avoid costly lookups. | ||
376 | */ | ||
377 | struct rds_connection *rs_conn; | ||
378 | |||
379 | /* flag indicating we were congested or not */ | ||
380 | int rs_congested; | ||
381 | |||
382 | /* rs_lock protects all these adjacent members before the newline */ | ||
383 | spinlock_t rs_lock; | ||
384 | struct list_head rs_send_queue; | ||
385 | u32 rs_snd_bytes; | ||
386 | int rs_rcv_bytes; | ||
387 | struct list_head rs_notify_queue; /* currently used for failed RDMAs */ | ||
388 | |||
389 | /* Congestion wake_up. If rs_cong_monitor is set, we use cong_mask | ||
390 | * to decide whether the application should be woken up. | ||
391 | * If not set, we use rs_cong_track to find out whether a cong map | ||
392 | * update arrived. | ||
393 | */ | ||
394 | uint64_t rs_cong_mask; | ||
395 | uint64_t rs_cong_notify; | ||
396 | struct list_head rs_cong_list; | ||
397 | unsigned long rs_cong_track; | ||
398 | |||
399 | /* | ||
400 | * rs_recv_lock protects the receive queue, and is | ||
401 | * used to serialize with rds_release. | ||
402 | */ | ||
403 | rwlock_t rs_recv_lock; | ||
404 | struct list_head rs_recv_queue; | ||
405 | |||
406 | /* just for stats reporting */ | ||
407 | struct list_head rs_item; | ||
408 | |||
409 | /* these have their own lock */ | ||
410 | spinlock_t rs_rdma_lock; | ||
411 | struct rb_root rs_rdma_keys; | ||
412 | |||
413 | /* Socket options - in case there will be more */ | ||
414 | unsigned char rs_recverr, | ||
415 | rs_cong_monitor; | ||
416 | }; | ||
417 | |||
418 | static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) | ||
419 | { | ||
420 | return container_of(sk, struct rds_sock, rs_sk); | ||
421 | } | ||
422 | static inline struct sock *rds_rs_to_sk(struct rds_sock *rs) | ||
423 | { | ||
424 | return &rs->rs_sk; | ||
425 | } | ||
426 | |||
427 | /* | ||
428 | * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value | ||
429 | * to account for overhead. We don't account for overhead, we just apply | ||
430 | * the number of payload bytes to the specified value. | ||
431 | */ | ||
432 | static inline int rds_sk_sndbuf(struct rds_sock *rs) | ||
433 | { | ||
434 | return rds_rs_to_sk(rs)->sk_sndbuf / 2; | ||
435 | } | ||
436 | static inline int rds_sk_rcvbuf(struct rds_sock *rs) | ||
437 | { | ||
438 | return rds_rs_to_sk(rs)->sk_rcvbuf / 2; | ||
439 | } | ||
440 | |||
441 | struct rds_statistics { | ||
442 | uint64_t s_conn_reset; | ||
443 | uint64_t s_recv_drop_bad_checksum; | ||
444 | uint64_t s_recv_drop_old_seq; | ||
445 | uint64_t s_recv_drop_no_sock; | ||
446 | uint64_t s_recv_drop_dead_sock; | ||
447 | uint64_t s_recv_deliver_raced; | ||
448 | uint64_t s_recv_delivered; | ||
449 | uint64_t s_recv_queued; | ||
450 | uint64_t s_recv_immediate_retry; | ||
451 | uint64_t s_recv_delayed_retry; | ||
452 | uint64_t s_recv_ack_required; | ||
453 | uint64_t s_recv_rdma_bytes; | ||
454 | uint64_t s_recv_ping; | ||
455 | uint64_t s_send_queue_empty; | ||
456 | uint64_t s_send_queue_full; | ||
457 | uint64_t s_send_sem_contention; | ||
458 | uint64_t s_send_sem_queue_raced; | ||
459 | uint64_t s_send_immediate_retry; | ||
460 | uint64_t s_send_delayed_retry; | ||
461 | uint64_t s_send_drop_acked; | ||
462 | uint64_t s_send_ack_required; | ||
463 | uint64_t s_send_queued; | ||
464 | uint64_t s_send_rdma; | ||
465 | uint64_t s_send_rdma_bytes; | ||
466 | uint64_t s_send_pong; | ||
467 | uint64_t s_page_remainder_hit; | ||
468 | uint64_t s_page_remainder_miss; | ||
469 | uint64_t s_copy_to_user; | ||
470 | uint64_t s_copy_from_user; | ||
471 | uint64_t s_cong_update_queued; | ||
472 | uint64_t s_cong_update_received; | ||
473 | uint64_t s_cong_send_error; | ||
474 | uint64_t s_cong_send_blocked; | ||
475 | }; | ||
476 | |||
477 | /* af_rds.c */ | ||
478 | void rds_sock_addref(struct rds_sock *rs); | ||
479 | void rds_sock_put(struct rds_sock *rs); | ||
480 | void rds_wake_sk_sleep(struct rds_sock *rs); | ||
481 | static inline void __rds_wake_sk_sleep(struct sock *sk) | ||
482 | { | ||
483 | wait_queue_head_t *waitq = sk->sk_sleep; | ||
484 | |||
485 | if (!sock_flag(sk, SOCK_DEAD) && waitq) | ||
486 | wake_up(waitq); | ||
487 | } | ||
488 | extern wait_queue_head_t rds_poll_waitq; | ||
489 | |||
490 | |||
491 | /* bind.c */ | ||
492 | int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); | ||
493 | void rds_remove_bound(struct rds_sock *rs); | ||
494 | struct rds_sock *rds_find_bound(__be32 addr, __be16 port); | ||
495 | |||
496 | /* cong.c */ | ||
497 | int rds_cong_get_maps(struct rds_connection *conn); | ||
498 | void rds_cong_add_conn(struct rds_connection *conn); | ||
499 | void rds_cong_remove_conn(struct rds_connection *conn); | ||
500 | void rds_cong_set_bit(struct rds_cong_map *map, __be16 port); | ||
501 | void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port); | ||
502 | int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock, struct rds_sock *rs); | ||
503 | void rds_cong_queue_updates(struct rds_cong_map *map); | ||
504 | void rds_cong_map_updated(struct rds_cong_map *map, uint64_t); | ||
505 | int rds_cong_updated_since(unsigned long *recent); | ||
506 | void rds_cong_add_socket(struct rds_sock *); | ||
507 | void rds_cong_remove_socket(struct rds_sock *); | ||
508 | void rds_cong_exit(void); | ||
509 | struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); | ||
510 | |||
511 | /* conn.c */ | ||
512 | int __init rds_conn_init(void); | ||
513 | void rds_conn_exit(void); | ||
514 | struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr, | ||
515 | struct rds_transport *trans, gfp_t gfp); | ||
516 | struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr, | ||
517 | struct rds_transport *trans, gfp_t gfp); | ||
518 | void rds_conn_destroy(struct rds_connection *conn); | ||
519 | void rds_conn_reset(struct rds_connection *conn); | ||
520 | void rds_conn_drop(struct rds_connection *conn); | ||
521 | void rds_for_each_conn_info(struct socket *sock, unsigned int len, | ||
522 | struct rds_info_iterator *iter, | ||
523 | struct rds_info_lengths *lens, | ||
524 | int (*visitor)(struct rds_connection *, void *), | ||
525 | size_t item_len); | ||
526 | void __rds_conn_error(struct rds_connection *conn, const char *, ...) | ||
527 | __attribute__ ((format (printf, 2, 3))); | ||
528 | #define rds_conn_error(conn, fmt...) \ | ||
529 | __rds_conn_error(conn, KERN_WARNING "RDS: " fmt) | ||
530 | |||
531 | static inline int | ||
532 | rds_conn_transition(struct rds_connection *conn, int old, int new) | ||
533 | { | ||
534 | return atomic_cmpxchg(&conn->c_state, old, new) == old; | ||
535 | } | ||
536 | |||
537 | static inline int | ||
538 | rds_conn_state(struct rds_connection *conn) | ||
539 | { | ||
540 | return atomic_read(&conn->c_state); | ||
541 | } | ||
542 | |||
543 | static inline int | ||
544 | rds_conn_up(struct rds_connection *conn) | ||
545 | { | ||
546 | return atomic_read(&conn->c_state) == RDS_CONN_UP; | ||
547 | } | ||
548 | |||
549 | static inline int | ||
550 | rds_conn_connecting(struct rds_connection *conn) | ||
551 | { | ||
552 | return atomic_read(&conn->c_state) == RDS_CONN_CONNECTING; | ||
553 | } | ||
554 | |||
555 | /* message.c */ | ||
556 | struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); | ||
557 | struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, | ||
558 | size_t total_len); | ||
559 | struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); | ||
560 | void rds_message_populate_header(struct rds_header *hdr, __be16 sport, | ||
561 | __be16 dport, u64 seq); | ||
562 | int rds_message_add_extension(struct rds_header *hdr, | ||
563 | unsigned int type, const void *data, unsigned int len); | ||
564 | int rds_message_next_extension(struct rds_header *hdr, | ||
565 | unsigned int *pos, void *buf, unsigned int *buflen); | ||
566 | int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version); | ||
567 | int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version); | ||
568 | int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset); | ||
569 | int rds_message_inc_copy_to_user(struct rds_incoming *inc, | ||
570 | struct iovec *first_iov, size_t size); | ||
571 | void rds_message_inc_purge(struct rds_incoming *inc); | ||
572 | void rds_message_inc_free(struct rds_incoming *inc); | ||
573 | void rds_message_addref(struct rds_message *rm); | ||
574 | void rds_message_put(struct rds_message *rm); | ||
575 | void rds_message_wait(struct rds_message *rm); | ||
576 | void rds_message_unmapped(struct rds_message *rm); | ||
577 | |||
578 | static inline void rds_message_make_checksum(struct rds_header *hdr) | ||
579 | { | ||
580 | hdr->h_csum = 0; | ||
581 | hdr->h_csum = ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2); | ||
582 | } | ||
583 | |||
584 | static inline int rds_message_verify_checksum(const struct rds_header *hdr) | ||
585 | { | ||
586 | return !hdr->h_csum || ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2) == 0; | ||
587 | } | ||
588 | |||
589 | |||
590 | /* page.c */ | ||
591 | int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, | ||
592 | gfp_t gfp); | ||
593 | int rds_page_copy_user(struct page *page, unsigned long offset, | ||
594 | void __user *ptr, unsigned long bytes, | ||
595 | int to_user); | ||
596 | #define rds_page_copy_to_user(page, offset, ptr, bytes) \ | ||
597 | rds_page_copy_user(page, offset, ptr, bytes, 1) | ||
598 | #define rds_page_copy_from_user(page, offset, ptr, bytes) \ | ||
599 | rds_page_copy_user(page, offset, ptr, bytes, 0) | ||
600 | void rds_page_exit(void); | ||
601 | |||
602 | /* recv.c */ | ||
603 | void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, | ||
604 | __be32 saddr); | ||
605 | void rds_inc_addref(struct rds_incoming *inc); | ||
606 | void rds_inc_put(struct rds_incoming *inc); | ||
607 | void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, | ||
608 | struct rds_incoming *inc, gfp_t gfp, enum km_type km); | ||
609 | int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | ||
610 | size_t size, int msg_flags); | ||
611 | void rds_clear_recv_queue(struct rds_sock *rs); | ||
612 | int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg); | ||
613 | void rds_inc_info_copy(struct rds_incoming *inc, | ||
614 | struct rds_info_iterator *iter, | ||
615 | __be32 saddr, __be32 daddr, int flip); | ||
616 | |||
617 | /* send.c */ | ||
618 | int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | ||
619 | size_t payload_len); | ||
620 | void rds_send_reset(struct rds_connection *conn); | ||
621 | int rds_send_xmit(struct rds_connection *conn); | ||
622 | struct sockaddr_in; | ||
623 | void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest); | ||
624 | typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack); | ||
625 | void rds_send_drop_acked(struct rds_connection *conn, u64 ack, | ||
626 | is_acked_func is_acked); | ||
627 | int rds_send_acked_before(struct rds_connection *conn, u64 seq); | ||
628 | void rds_send_remove_from_sock(struct list_head *messages, int status); | ||
629 | int rds_send_pong(struct rds_connection *conn, __be16 dport); | ||
630 | struct rds_message *rds_send_get_message(struct rds_connection *, | ||
631 | struct rds_rdma_op *); | ||
632 | |||
633 | /* rdma.c */ | ||
634 | void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); | ||
635 | |||
636 | /* stats.c */ | ||
637 | DECLARE_PER_CPU(struct rds_statistics, rds_stats); | ||
638 | #define rds_stats_inc_which(which, member) do { \ | ||
639 | per_cpu(which, get_cpu()).member++; \ | ||
640 | put_cpu(); \ | ||
641 | } while (0) | ||
642 | #define rds_stats_inc(member) rds_stats_inc_which(rds_stats, member) | ||
643 | #define rds_stats_add_which(which, member, count) do { \ | ||
644 | per_cpu(which, get_cpu()).member += count; \ | ||
645 | put_cpu(); \ | ||
646 | } while (0) | ||
647 | #define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count) | ||
648 | int __init rds_stats_init(void); | ||
649 | void rds_stats_exit(void); | ||
650 | void rds_stats_info_copy(struct rds_info_iterator *iter, | ||
651 | uint64_t *values, char **names, size_t nr); | ||
652 | |||
653 | /* sysctl.c */ | ||
654 | int __init rds_sysctl_init(void); | ||
655 | void rds_sysctl_exit(void); | ||
656 | extern unsigned long rds_sysctl_sndbuf_min; | ||
657 | extern unsigned long rds_sysctl_sndbuf_default; | ||
658 | extern unsigned long rds_sysctl_sndbuf_max; | ||
659 | extern unsigned long rds_sysctl_reconnect_min_jiffies; | ||
660 | extern unsigned long rds_sysctl_reconnect_max_jiffies; | ||
661 | extern unsigned int rds_sysctl_max_unacked_packets; | ||
662 | extern unsigned int rds_sysctl_max_unacked_bytes; | ||
663 | extern unsigned int rds_sysctl_ping_enable; | ||
664 | extern unsigned long rds_sysctl_trace_flags; | ||
665 | extern unsigned int rds_sysctl_trace_level; | ||
666 | |||
667 | /* threads.c */ | ||
668 | int __init rds_threads_init(void); | ||
669 | void rds_threads_exit(void); | ||
670 | extern struct workqueue_struct *rds_wq; | ||
671 | void rds_connect_worker(struct work_struct *); | ||
672 | void rds_shutdown_worker(struct work_struct *); | ||
673 | void rds_send_worker(struct work_struct *); | ||
674 | void rds_recv_worker(struct work_struct *); | ||
675 | void rds_connect_complete(struct rds_connection *conn); | ||
676 | |||
677 | /* transport.c */ | ||
678 | int rds_trans_register(struct rds_transport *trans); | ||
679 | void rds_trans_unregister(struct rds_transport *trans); | ||
680 | struct rds_transport *rds_trans_get_preferred(__be32 addr); | ||
681 | unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, | ||
682 | unsigned int avail); | ||
683 | int __init rds_trans_init(void); | ||
684 | void rds_trans_exit(void); | ||
685 | |||
686 | #endif | ||
diff --git a/net/rds/recv.c b/net/rds/recv.c new file mode 100644 index 000000000000..f2118c51cfa3 --- /dev/null +++ b/net/rds/recv.c | |||
@@ -0,0 +1,542 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <net/sock.h> | ||
35 | #include <linux/in.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | #include "rdma.h" | ||
39 | |||
40 | void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, | ||
41 | __be32 saddr) | ||
42 | { | ||
43 | atomic_set(&inc->i_refcount, 1); | ||
44 | INIT_LIST_HEAD(&inc->i_item); | ||
45 | inc->i_conn = conn; | ||
46 | inc->i_saddr = saddr; | ||
47 | inc->i_rdma_cookie = 0; | ||
48 | } | ||
49 | |||
50 | void rds_inc_addref(struct rds_incoming *inc) | ||
51 | { | ||
52 | rdsdebug("addref inc %p ref %d\n", inc, atomic_read(&inc->i_refcount)); | ||
53 | atomic_inc(&inc->i_refcount); | ||
54 | } | ||
55 | |||
56 | void rds_inc_put(struct rds_incoming *inc) | ||
57 | { | ||
58 | rdsdebug("put inc %p ref %d\n", inc, atomic_read(&inc->i_refcount)); | ||
59 | if (atomic_dec_and_test(&inc->i_refcount)) { | ||
60 | BUG_ON(!list_empty(&inc->i_item)); | ||
61 | |||
62 | inc->i_conn->c_trans->inc_free(inc); | ||
63 | } | ||
64 | } | ||
65 | |||
66 | static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk, | ||
67 | struct rds_cong_map *map, | ||
68 | int delta, __be16 port) | ||
69 | { | ||
70 | int now_congested; | ||
71 | |||
72 | if (delta == 0) | ||
73 | return; | ||
74 | |||
75 | rs->rs_rcv_bytes += delta; | ||
76 | now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs); | ||
77 | |||
78 | rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d " | ||
79 | "now_cong %d delta %d\n", | ||
80 | rs, &rs->rs_bound_addr, | ||
81 | ntohs(rs->rs_bound_port), rs->rs_rcv_bytes, | ||
82 | rds_sk_rcvbuf(rs), now_congested, delta); | ||
83 | |||
84 | /* wasn't -> am congested */ | ||
85 | if (!rs->rs_congested && now_congested) { | ||
86 | rs->rs_congested = 1; | ||
87 | rds_cong_set_bit(map, port); | ||
88 | rds_cong_queue_updates(map); | ||
89 | } | ||
90 | /* was -> aren't congested */ | ||
91 | /* Require more free space before reporting uncongested to prevent | ||
92 | bouncing cong/uncong state too often */ | ||
93 | else if (rs->rs_congested && (rs->rs_rcv_bytes < (rds_sk_rcvbuf(rs)/2))) { | ||
94 | rs->rs_congested = 0; | ||
95 | rds_cong_clear_bit(map, port); | ||
96 | rds_cong_queue_updates(map); | ||
97 | } | ||
98 | |||
99 | /* do nothing if no change in cong state */ | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * Process all extension headers that come with this message. | ||
104 | */ | ||
105 | static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock *rs) | ||
106 | { | ||
107 | struct rds_header *hdr = &inc->i_hdr; | ||
108 | unsigned int pos = 0, type, len; | ||
109 | union { | ||
110 | struct rds_ext_header_version version; | ||
111 | struct rds_ext_header_rdma rdma; | ||
112 | struct rds_ext_header_rdma_dest rdma_dest; | ||
113 | } buffer; | ||
114 | |||
115 | while (1) { | ||
116 | len = sizeof(buffer); | ||
117 | type = rds_message_next_extension(hdr, &pos, &buffer, &len); | ||
118 | if (type == RDS_EXTHDR_NONE) | ||
119 | break; | ||
120 | /* Process extension header here */ | ||
121 | switch (type) { | ||
122 | case RDS_EXTHDR_RDMA: | ||
123 | rds_rdma_unuse(rs, be32_to_cpu(buffer.rdma.h_rdma_rkey), 0); | ||
124 | break; | ||
125 | |||
126 | case RDS_EXTHDR_RDMA_DEST: | ||
127 | /* We ignore the size for now. We could stash it | ||
128 | * somewhere and use it for error checking. */ | ||
129 | inc->i_rdma_cookie = rds_rdma_make_cookie( | ||
130 | be32_to_cpu(buffer.rdma_dest.h_rdma_rkey), | ||
131 | be32_to_cpu(buffer.rdma_dest.h_rdma_offset)); | ||
132 | |||
133 | break; | ||
134 | } | ||
135 | } | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * The transport must make sure that this is serialized against other | ||
140 | * rx and conn reset on this specific conn. | ||
141 | * | ||
142 | * We currently assert that only one fragmented message will be sent | ||
143 | * down a connection at a time. This lets us reassemble in the conn | ||
144 | * instead of per-flow which means that we don't have to go digging through | ||
145 | * flows to tear down partial reassembly progress on conn failure and | ||
146 | * we save flow lookup and locking for each frag arrival. It does mean | ||
147 | * that small messages will wait behind large ones. Fragmenting at all | ||
148 | * is only to reduce the memory consumption of pre-posted buffers. | ||
149 | * | ||
150 | * The caller passes in saddr and daddr instead of us getting it from the | ||
151 | * conn. This lets loopback, who only has one conn for both directions, | ||
152 | * tell us which roles the addrs in the conn are playing for this message. | ||
153 | */ | ||
154 | void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, | ||
155 | struct rds_incoming *inc, gfp_t gfp, enum km_type km) | ||
156 | { | ||
157 | struct rds_sock *rs = NULL; | ||
158 | struct sock *sk; | ||
159 | unsigned long flags; | ||
160 | |||
161 | inc->i_conn = conn; | ||
162 | inc->i_rx_jiffies = jiffies; | ||
163 | |||
164 | rdsdebug("conn %p next %llu inc %p seq %llu len %u sport %u dport %u " | ||
165 | "flags 0x%x rx_jiffies %lu\n", conn, | ||
166 | (unsigned long long)conn->c_next_rx_seq, | ||
167 | inc, | ||
168 | (unsigned long long)be64_to_cpu(inc->i_hdr.h_sequence), | ||
169 | be32_to_cpu(inc->i_hdr.h_len), | ||
170 | be16_to_cpu(inc->i_hdr.h_sport), | ||
171 | be16_to_cpu(inc->i_hdr.h_dport), | ||
172 | inc->i_hdr.h_flags, | ||
173 | inc->i_rx_jiffies); | ||
174 | |||
175 | /* | ||
176 | * Sequence numbers should only increase. Messages get their | ||
177 | * sequence number as they're queued in a sending conn. They | ||
178 | * can be dropped, though, if the sending socket is closed before | ||
179 | * they hit the wire. So sequence numbers can skip forward | ||
180 | * under normal operation. They can also drop back in the conn | ||
181 | * failover case as previously sent messages are resent down the | ||
182 | * new instance of a conn. We drop those, otherwise we have | ||
183 | * to assume that the next valid seq does not come after a | ||
184 | * hole in the fragment stream. | ||
185 | * | ||
186 | * The headers don't give us a way to realize if fragments of | ||
187 | * a message have been dropped. We assume that frags that arrive | ||
188 | * to a flow are part of the current message on the flow that is | ||
189 | * being reassembled. This means that senders can't drop messages | ||
190 | * from the sending conn until all their frags are sent. | ||
191 | * | ||
192 | * XXX we could spend more on the wire to get more robust failure | ||
193 | * detection, arguably worth it to avoid data corruption. | ||
194 | */ | ||
195 | if (be64_to_cpu(inc->i_hdr.h_sequence) < conn->c_next_rx_seq | ||
196 | && (inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) { | ||
197 | rds_stats_inc(s_recv_drop_old_seq); | ||
198 | goto out; | ||
199 | } | ||
200 | conn->c_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1; | ||
201 | |||
202 | if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) { | ||
203 | rds_stats_inc(s_recv_ping); | ||
204 | rds_send_pong(conn, inc->i_hdr.h_sport); | ||
205 | goto out; | ||
206 | } | ||
207 | |||
208 | rs = rds_find_bound(daddr, inc->i_hdr.h_dport); | ||
209 | if (rs == NULL) { | ||
210 | rds_stats_inc(s_recv_drop_no_sock); | ||
211 | goto out; | ||
212 | } | ||
213 | |||
214 | /* Process extension headers */ | ||
215 | rds_recv_incoming_exthdrs(inc, rs); | ||
216 | |||
217 | /* We can be racing with rds_release() which marks the socket dead. */ | ||
218 | sk = rds_rs_to_sk(rs); | ||
219 | |||
220 | /* serialize with rds_release -> sock_orphan */ | ||
221 | write_lock_irqsave(&rs->rs_recv_lock, flags); | ||
222 | if (!sock_flag(sk, SOCK_DEAD)) { | ||
223 | rdsdebug("adding inc %p to rs %p's recv queue\n", inc, rs); | ||
224 | rds_stats_inc(s_recv_queued); | ||
225 | rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, | ||
226 | be32_to_cpu(inc->i_hdr.h_len), | ||
227 | inc->i_hdr.h_dport); | ||
228 | rds_inc_addref(inc); | ||
229 | list_add_tail(&inc->i_item, &rs->rs_recv_queue); | ||
230 | __rds_wake_sk_sleep(sk); | ||
231 | } else { | ||
232 | rds_stats_inc(s_recv_drop_dead_sock); | ||
233 | } | ||
234 | write_unlock_irqrestore(&rs->rs_recv_lock, flags); | ||
235 | |||
236 | out: | ||
237 | if (rs) | ||
238 | rds_sock_put(rs); | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * be very careful here. This is being called as the condition in | ||
243 | * wait_event_*() needs to cope with being called many times. | ||
244 | */ | ||
245 | static int rds_next_incoming(struct rds_sock *rs, struct rds_incoming **inc) | ||
246 | { | ||
247 | unsigned long flags; | ||
248 | |||
249 | if (*inc == NULL) { | ||
250 | read_lock_irqsave(&rs->rs_recv_lock, flags); | ||
251 | if (!list_empty(&rs->rs_recv_queue)) { | ||
252 | *inc = list_entry(rs->rs_recv_queue.next, | ||
253 | struct rds_incoming, | ||
254 | i_item); | ||
255 | rds_inc_addref(*inc); | ||
256 | } | ||
257 | read_unlock_irqrestore(&rs->rs_recv_lock, flags); | ||
258 | } | ||
259 | |||
260 | return *inc != NULL; | ||
261 | } | ||
262 | |||
263 | static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, | ||
264 | int drop) | ||
265 | { | ||
266 | struct sock *sk = rds_rs_to_sk(rs); | ||
267 | int ret = 0; | ||
268 | unsigned long flags; | ||
269 | |||
270 | write_lock_irqsave(&rs->rs_recv_lock, flags); | ||
271 | if (!list_empty(&inc->i_item)) { | ||
272 | ret = 1; | ||
273 | if (drop) { | ||
274 | /* XXX make sure this i_conn is reliable */ | ||
275 | rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, | ||
276 | -be32_to_cpu(inc->i_hdr.h_len), | ||
277 | inc->i_hdr.h_dport); | ||
278 | list_del_init(&inc->i_item); | ||
279 | rds_inc_put(inc); | ||
280 | } | ||
281 | } | ||
282 | write_unlock_irqrestore(&rs->rs_recv_lock, flags); | ||
283 | |||
284 | rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop); | ||
285 | return ret; | ||
286 | } | ||
287 | |||
288 | /* | ||
289 | * Pull errors off the error queue. | ||
290 | * If msghdr is NULL, we will just purge the error queue. | ||
291 | */ | ||
292 | int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr) | ||
293 | { | ||
294 | struct rds_notifier *notifier; | ||
295 | struct rds_rdma_notify cmsg; | ||
296 | unsigned int count = 0, max_messages = ~0U; | ||
297 | unsigned long flags; | ||
298 | LIST_HEAD(copy); | ||
299 | int err = 0; | ||
300 | |||
301 | |||
302 | /* put_cmsg copies to user space and thus may sleep. We can't do this | ||
303 | * with rs_lock held, so first grab as many notifications as we can stuff | ||
304 | * in the user provided cmsg buffer. We don't try to copy more, to avoid | ||
305 | * losing notifications - except when the buffer is so small that it wouldn't | ||
306 | * even hold a single notification. Then we give him as much of this single | ||
307 | * msg as we can squeeze in, and set MSG_CTRUNC. | ||
308 | */ | ||
309 | if (msghdr) { | ||
310 | max_messages = msghdr->msg_controllen / CMSG_SPACE(sizeof(cmsg)); | ||
311 | if (!max_messages) | ||
312 | max_messages = 1; | ||
313 | } | ||
314 | |||
315 | spin_lock_irqsave(&rs->rs_lock, flags); | ||
316 | while (!list_empty(&rs->rs_notify_queue) && count < max_messages) { | ||
317 | notifier = list_entry(rs->rs_notify_queue.next, | ||
318 | struct rds_notifier, n_list); | ||
319 | list_move(¬ifier->n_list, ©); | ||
320 | count++; | ||
321 | } | ||
322 | spin_unlock_irqrestore(&rs->rs_lock, flags); | ||
323 | |||
324 | if (!count) | ||
325 | return 0; | ||
326 | |||
327 | while (!list_empty(©)) { | ||
328 | notifier = list_entry(copy.next, struct rds_notifier, n_list); | ||
329 | |||
330 | if (msghdr) { | ||
331 | cmsg.user_token = notifier->n_user_token; | ||
332 | cmsg.status = notifier->n_status; | ||
333 | |||
334 | err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS, | ||
335 | sizeof(cmsg), &cmsg); | ||
336 | if (err) | ||
337 | break; | ||
338 | } | ||
339 | |||
340 | list_del_init(¬ifier->n_list); | ||
341 | kfree(notifier); | ||
342 | } | ||
343 | |||
344 | /* If we bailed out because of an error in put_cmsg, | ||
345 | * we may be left with one or more notifications that we | ||
346 | * didn't process. Return them to the head of the list. */ | ||
347 | if (!list_empty(©)) { | ||
348 | spin_lock_irqsave(&rs->rs_lock, flags); | ||
349 | list_splice(©, &rs->rs_notify_queue); | ||
350 | spin_unlock_irqrestore(&rs->rs_lock, flags); | ||
351 | } | ||
352 | |||
353 | return err; | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * Queue a congestion notification | ||
358 | */ | ||
359 | static int rds_notify_cong(struct rds_sock *rs, struct msghdr *msghdr) | ||
360 | { | ||
361 | uint64_t notify = rs->rs_cong_notify; | ||
362 | unsigned long flags; | ||
363 | int err; | ||
364 | |||
365 | err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_CONG_UPDATE, | ||
366 | sizeof(notify), ¬ify); | ||
367 | if (err) | ||
368 | return err; | ||
369 | |||
370 | spin_lock_irqsave(&rs->rs_lock, flags); | ||
371 | rs->rs_cong_notify &= ~notify; | ||
372 | spin_unlock_irqrestore(&rs->rs_lock, flags); | ||
373 | |||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | /* | ||
378 | * Receive any control messages. | ||
379 | */ | ||
380 | static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg) | ||
381 | { | ||
382 | int ret = 0; | ||
383 | |||
384 | if (inc->i_rdma_cookie) { | ||
385 | ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST, | ||
386 | sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie); | ||
387 | if (ret) | ||
388 | return ret; | ||
389 | } | ||
390 | |||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | ||
395 | size_t size, int msg_flags) | ||
396 | { | ||
397 | struct sock *sk = sock->sk; | ||
398 | struct rds_sock *rs = rds_sk_to_rs(sk); | ||
399 | long timeo; | ||
400 | int ret = 0, nonblock = msg_flags & MSG_DONTWAIT; | ||
401 | struct sockaddr_in *sin; | ||
402 | struct rds_incoming *inc = NULL; | ||
403 | |||
404 | /* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */ | ||
405 | timeo = sock_rcvtimeo(sk, nonblock); | ||
406 | |||
407 | rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); | ||
408 | |||
409 | if (msg_flags & MSG_OOB) | ||
410 | goto out; | ||
411 | |||
412 | /* If there are pending notifications, do those - and nothing else */ | ||
413 | if (!list_empty(&rs->rs_notify_queue)) { | ||
414 | ret = rds_notify_queue_get(rs, msg); | ||
415 | goto out; | ||
416 | } | ||
417 | |||
418 | if (rs->rs_cong_notify) { | ||
419 | ret = rds_notify_cong(rs, msg); | ||
420 | goto out; | ||
421 | } | ||
422 | |||
423 | while (1) { | ||
424 | if (!rds_next_incoming(rs, &inc)) { | ||
425 | if (nonblock) { | ||
426 | ret = -EAGAIN; | ||
427 | break; | ||
428 | } | ||
429 | |||
430 | timeo = wait_event_interruptible_timeout(*sk->sk_sleep, | ||
431 | rds_next_incoming(rs, &inc), | ||
432 | timeo); | ||
433 | rdsdebug("recvmsg woke inc %p timeo %ld\n", inc, | ||
434 | timeo); | ||
435 | if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT) | ||
436 | continue; | ||
437 | |||
438 | ret = timeo; | ||
439 | if (ret == 0) | ||
440 | ret = -ETIMEDOUT; | ||
441 | break; | ||
442 | } | ||
443 | |||
444 | rdsdebug("copying inc %p from %pI4:%u to user\n", inc, | ||
445 | &inc->i_conn->c_faddr, | ||
446 | ntohs(inc->i_hdr.h_sport)); | ||
447 | ret = inc->i_conn->c_trans->inc_copy_to_user(inc, msg->msg_iov, | ||
448 | size); | ||
449 | if (ret < 0) | ||
450 | break; | ||
451 | |||
452 | /* | ||
453 | * if the message we just copied isn't at the head of the | ||
454 | * recv queue then someone else raced us to return it, try | ||
455 | * to get the next message. | ||
456 | */ | ||
457 | if (!rds_still_queued(rs, inc, !(msg_flags & MSG_PEEK))) { | ||
458 | rds_inc_put(inc); | ||
459 | inc = NULL; | ||
460 | rds_stats_inc(s_recv_deliver_raced); | ||
461 | continue; | ||
462 | } | ||
463 | |||
464 | if (ret < be32_to_cpu(inc->i_hdr.h_len)) { | ||
465 | if (msg_flags & MSG_TRUNC) | ||
466 | ret = be32_to_cpu(inc->i_hdr.h_len); | ||
467 | msg->msg_flags |= MSG_TRUNC; | ||
468 | } | ||
469 | |||
470 | if (rds_cmsg_recv(inc, msg)) { | ||
471 | ret = -EFAULT; | ||
472 | goto out; | ||
473 | } | ||
474 | |||
475 | rds_stats_inc(s_recv_delivered); | ||
476 | |||
477 | sin = (struct sockaddr_in *)msg->msg_name; | ||
478 | if (sin) { | ||
479 | sin->sin_family = AF_INET; | ||
480 | sin->sin_port = inc->i_hdr.h_sport; | ||
481 | sin->sin_addr.s_addr = inc->i_saddr; | ||
482 | memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); | ||
483 | } | ||
484 | break; | ||
485 | } | ||
486 | |||
487 | if (inc) | ||
488 | rds_inc_put(inc); | ||
489 | |||
490 | out: | ||
491 | return ret; | ||
492 | } | ||
493 | |||
494 | /* | ||
495 | * The socket is being shut down and we're asked to drop messages that were | ||
496 | * queued for recvmsg. The caller has unbound the socket so the receive path | ||
497 | * won't queue any more incoming fragments or messages on the socket. | ||
498 | */ | ||
499 | void rds_clear_recv_queue(struct rds_sock *rs) | ||
500 | { | ||
501 | struct sock *sk = rds_rs_to_sk(rs); | ||
502 | struct rds_incoming *inc, *tmp; | ||
503 | unsigned long flags; | ||
504 | |||
505 | write_lock_irqsave(&rs->rs_recv_lock, flags); | ||
506 | list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) { | ||
507 | rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, | ||
508 | -be32_to_cpu(inc->i_hdr.h_len), | ||
509 | inc->i_hdr.h_dport); | ||
510 | list_del_init(&inc->i_item); | ||
511 | rds_inc_put(inc); | ||
512 | } | ||
513 | write_unlock_irqrestore(&rs->rs_recv_lock, flags); | ||
514 | } | ||
515 | |||
516 | /* | ||
517 | * inc->i_saddr isn't used here because it is only set in the receive | ||
518 | * path. | ||
519 | */ | ||
520 | void rds_inc_info_copy(struct rds_incoming *inc, | ||
521 | struct rds_info_iterator *iter, | ||
522 | __be32 saddr, __be32 daddr, int flip) | ||
523 | { | ||
524 | struct rds_info_message minfo; | ||
525 | |||
526 | minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence); | ||
527 | minfo.len = be32_to_cpu(inc->i_hdr.h_len); | ||
528 | |||
529 | if (flip) { | ||
530 | minfo.laddr = daddr; | ||
531 | minfo.faddr = saddr; | ||
532 | minfo.lport = inc->i_hdr.h_dport; | ||
533 | minfo.fport = inc->i_hdr.h_sport; | ||
534 | } else { | ||
535 | minfo.laddr = saddr; | ||
536 | minfo.faddr = daddr; | ||
537 | minfo.lport = inc->i_hdr.h_sport; | ||
538 | minfo.fport = inc->i_hdr.h_dport; | ||
539 | } | ||
540 | |||
541 | rds_info_copy(iter, &minfo, sizeof(minfo)); | ||
542 | } | ||
diff --git a/net/rds/send.c b/net/rds/send.c new file mode 100644 index 000000000000..1b37364656f0 --- /dev/null +++ b/net/rds/send.c | |||
@@ -0,0 +1,1003 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <net/sock.h> | ||
35 | #include <linux/in.h> | ||
36 | #include <linux/list.h> | ||
37 | |||
38 | #include "rds.h" | ||
39 | #include "rdma.h" | ||
40 | |||
41 | /* When transmitting messages in rds_send_xmit, we need to emerge from | ||
42 | * time to time and briefly release the CPU. Otherwise the softlock watchdog | ||
43 | * will kick our shin. | ||
44 | * Also, it seems fairer to not let one busy connection stall all the | ||
45 | * others. | ||
46 | * | ||
47 | * send_batch_count is the number of times we'll loop in send_xmit. Setting | ||
48 | * it to 0 will restore the old behavior (where we looped until we had | ||
49 | * drained the queue). | ||
50 | */ | ||
51 | static int send_batch_count = 64; | ||
52 | module_param(send_batch_count, int, 0444); | ||
53 | MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue"); | ||
54 | |||
55 | /* | ||
56 | * Reset the send state. Caller must hold c_send_lock when calling here. | ||
57 | */ | ||
58 | void rds_send_reset(struct rds_connection *conn) | ||
59 | { | ||
60 | struct rds_message *rm, *tmp; | ||
61 | unsigned long flags; | ||
62 | |||
63 | if (conn->c_xmit_rm) { | ||
64 | /* Tell the user the RDMA op is no longer mapped by the | ||
65 | * transport. This isn't entirely true (it's flushed out | ||
66 | * independently) but as the connection is down, there's | ||
67 | * no ongoing RDMA to/from that memory */ | ||
68 | rds_message_unmapped(conn->c_xmit_rm); | ||
69 | rds_message_put(conn->c_xmit_rm); | ||
70 | conn->c_xmit_rm = NULL; | ||
71 | } | ||
72 | conn->c_xmit_sg = 0; | ||
73 | conn->c_xmit_hdr_off = 0; | ||
74 | conn->c_xmit_data_off = 0; | ||
75 | conn->c_xmit_rdma_sent = 0; | ||
76 | |||
77 | conn->c_map_queued = 0; | ||
78 | |||
79 | conn->c_unacked_packets = rds_sysctl_max_unacked_packets; | ||
80 | conn->c_unacked_bytes = rds_sysctl_max_unacked_bytes; | ||
81 | |||
82 | /* Mark messages as retransmissions, and move them to the send q */ | ||
83 | spin_lock_irqsave(&conn->c_lock, flags); | ||
84 | list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { | ||
85 | set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); | ||
86 | set_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags); | ||
87 | } | ||
88 | list_splice_init(&conn->c_retrans, &conn->c_send_queue); | ||
89 | spin_unlock_irqrestore(&conn->c_lock, flags); | ||
90 | } | ||
91 | |||
92 | /* | ||
93 | * We're making the concious trade-off here to only send one message | ||
94 | * down the connection at a time. | ||
95 | * Pro: | ||
96 | * - tx queueing is a simple fifo list | ||
97 | * - reassembly is optional and easily done by transports per conn | ||
98 | * - no per flow rx lookup at all, straight to the socket | ||
99 | * - less per-frag memory and wire overhead | ||
100 | * Con: | ||
101 | * - queued acks can be delayed behind large messages | ||
102 | * Depends: | ||
103 | * - small message latency is higher behind queued large messages | ||
104 | * - large message latency isn't starved by intervening small sends | ||
105 | */ | ||
106 | int rds_send_xmit(struct rds_connection *conn) | ||
107 | { | ||
108 | struct rds_message *rm; | ||
109 | unsigned long flags; | ||
110 | unsigned int tmp; | ||
111 | unsigned int send_quota = send_batch_count; | ||
112 | struct scatterlist *sg; | ||
113 | int ret = 0; | ||
114 | int was_empty = 0; | ||
115 | LIST_HEAD(to_be_dropped); | ||
116 | |||
117 | /* | ||
118 | * sendmsg calls here after having queued its message on the send | ||
119 | * queue. We only have one task feeding the connection at a time. If | ||
120 | * another thread is already feeding the queue then we back off. This | ||
121 | * avoids blocking the caller and trading per-connection data between | ||
122 | * caches per message. | ||
123 | * | ||
124 | * The sem holder will issue a retry if they notice that someone queued | ||
125 | * a message after they stopped walking the send queue but before they | ||
126 | * dropped the sem. | ||
127 | */ | ||
128 | if (!mutex_trylock(&conn->c_send_lock)) { | ||
129 | rds_stats_inc(s_send_sem_contention); | ||
130 | ret = -ENOMEM; | ||
131 | goto out; | ||
132 | } | ||
133 | |||
134 | if (conn->c_trans->xmit_prepare) | ||
135 | conn->c_trans->xmit_prepare(conn); | ||
136 | |||
137 | /* | ||
138 | * spin trying to push headers and data down the connection until | ||
139 | * the connection doens't make forward progress. | ||
140 | */ | ||
141 | while (--send_quota) { | ||
142 | /* | ||
143 | * See if need to send a congestion map update if we're | ||
144 | * between sending messages. The send_sem protects our sole | ||
145 | * use of c_map_offset and _bytes. | ||
146 | * Note this is used only by transports that define a special | ||
147 | * xmit_cong_map function. For all others, we create allocate | ||
148 | * a cong_map message and treat it just like any other send. | ||
149 | */ | ||
150 | if (conn->c_map_bytes) { | ||
151 | ret = conn->c_trans->xmit_cong_map(conn, conn->c_lcong, | ||
152 | conn->c_map_offset); | ||
153 | if (ret <= 0) | ||
154 | break; | ||
155 | |||
156 | conn->c_map_offset += ret; | ||
157 | conn->c_map_bytes -= ret; | ||
158 | if (conn->c_map_bytes) | ||
159 | continue; | ||
160 | } | ||
161 | |||
162 | /* If we're done sending the current message, clear the | ||
163 | * offset and S/G temporaries. | ||
164 | */ | ||
165 | rm = conn->c_xmit_rm; | ||
166 | if (rm != NULL && | ||
167 | conn->c_xmit_hdr_off == sizeof(struct rds_header) && | ||
168 | conn->c_xmit_sg == rm->m_nents) { | ||
169 | conn->c_xmit_rm = NULL; | ||
170 | conn->c_xmit_sg = 0; | ||
171 | conn->c_xmit_hdr_off = 0; | ||
172 | conn->c_xmit_data_off = 0; | ||
173 | conn->c_xmit_rdma_sent = 0; | ||
174 | |||
175 | /* Release the reference to the previous message. */ | ||
176 | rds_message_put(rm); | ||
177 | rm = NULL; | ||
178 | } | ||
179 | |||
180 | /* If we're asked to send a cong map update, do so. | ||
181 | */ | ||
182 | if (rm == NULL && test_and_clear_bit(0, &conn->c_map_queued)) { | ||
183 | if (conn->c_trans->xmit_cong_map != NULL) { | ||
184 | conn->c_map_offset = 0; | ||
185 | conn->c_map_bytes = sizeof(struct rds_header) + | ||
186 | RDS_CONG_MAP_BYTES; | ||
187 | continue; | ||
188 | } | ||
189 | |||
190 | rm = rds_cong_update_alloc(conn); | ||
191 | if (IS_ERR(rm)) { | ||
192 | ret = PTR_ERR(rm); | ||
193 | break; | ||
194 | } | ||
195 | |||
196 | conn->c_xmit_rm = rm; | ||
197 | } | ||
198 | |||
199 | /* | ||
200 | * Grab the next message from the send queue, if there is one. | ||
201 | * | ||
202 | * c_xmit_rm holds a ref while we're sending this message down | ||
203 | * the connction. We can use this ref while holding the | ||
204 | * send_sem.. rds_send_reset() is serialized with it. | ||
205 | */ | ||
206 | if (rm == NULL) { | ||
207 | unsigned int len; | ||
208 | |||
209 | spin_lock_irqsave(&conn->c_lock, flags); | ||
210 | |||
211 | if (!list_empty(&conn->c_send_queue)) { | ||
212 | rm = list_entry(conn->c_send_queue.next, | ||
213 | struct rds_message, | ||
214 | m_conn_item); | ||
215 | rds_message_addref(rm); | ||
216 | |||
217 | /* | ||
218 | * Move the message from the send queue to the retransmit | ||
219 | * list right away. | ||
220 | */ | ||
221 | list_move_tail(&rm->m_conn_item, &conn->c_retrans); | ||
222 | } | ||
223 | |||
224 | spin_unlock_irqrestore(&conn->c_lock, flags); | ||
225 | |||
226 | if (rm == NULL) { | ||
227 | was_empty = 1; | ||
228 | break; | ||
229 | } | ||
230 | |||
231 | /* Unfortunately, the way Infiniband deals with | ||
232 | * RDMA to a bad MR key is by moving the entire | ||
233 | * queue pair to error state. We cold possibly | ||
234 | * recover from that, but right now we drop the | ||
235 | * connection. | ||
236 | * Therefore, we never retransmit messages with RDMA ops. | ||
237 | */ | ||
238 | if (rm->m_rdma_op | ||
239 | && test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { | ||
240 | spin_lock_irqsave(&conn->c_lock, flags); | ||
241 | if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) | ||
242 | list_move(&rm->m_conn_item, &to_be_dropped); | ||
243 | spin_unlock_irqrestore(&conn->c_lock, flags); | ||
244 | rds_message_put(rm); | ||
245 | continue; | ||
246 | } | ||
247 | |||
248 | /* Require an ACK every once in a while */ | ||
249 | len = ntohl(rm->m_inc.i_hdr.h_len); | ||
250 | if (conn->c_unacked_packets == 0 | ||
251 | || conn->c_unacked_bytes < len) { | ||
252 | __set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); | ||
253 | |||
254 | conn->c_unacked_packets = rds_sysctl_max_unacked_packets; | ||
255 | conn->c_unacked_bytes = rds_sysctl_max_unacked_bytes; | ||
256 | rds_stats_inc(s_send_ack_required); | ||
257 | } else { | ||
258 | conn->c_unacked_bytes -= len; | ||
259 | conn->c_unacked_packets--; | ||
260 | } | ||
261 | |||
262 | conn->c_xmit_rm = rm; | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Try and send an rdma message. Let's see if we can | ||
267 | * keep this simple and require that the transport either | ||
268 | * send the whole rdma or none of it. | ||
269 | */ | ||
270 | if (rm->m_rdma_op && !conn->c_xmit_rdma_sent) { | ||
271 | ret = conn->c_trans->xmit_rdma(conn, rm->m_rdma_op); | ||
272 | if (ret) | ||
273 | break; | ||
274 | conn->c_xmit_rdma_sent = 1; | ||
275 | /* The transport owns the mapped memory for now. | ||
276 | * You can't unmap it while it's on the send queue */ | ||
277 | set_bit(RDS_MSG_MAPPED, &rm->m_flags); | ||
278 | } | ||
279 | |||
280 | if (conn->c_xmit_hdr_off < sizeof(struct rds_header) || | ||
281 | conn->c_xmit_sg < rm->m_nents) { | ||
282 | ret = conn->c_trans->xmit(conn, rm, | ||
283 | conn->c_xmit_hdr_off, | ||
284 | conn->c_xmit_sg, | ||
285 | conn->c_xmit_data_off); | ||
286 | if (ret <= 0) | ||
287 | break; | ||
288 | |||
289 | if (conn->c_xmit_hdr_off < sizeof(struct rds_header)) { | ||
290 | tmp = min_t(int, ret, | ||
291 | sizeof(struct rds_header) - | ||
292 | conn->c_xmit_hdr_off); | ||
293 | conn->c_xmit_hdr_off += tmp; | ||
294 | ret -= tmp; | ||
295 | } | ||
296 | |||
297 | sg = &rm->m_sg[conn->c_xmit_sg]; | ||
298 | while (ret) { | ||
299 | tmp = min_t(int, ret, sg->length - | ||
300 | conn->c_xmit_data_off); | ||
301 | conn->c_xmit_data_off += tmp; | ||
302 | ret -= tmp; | ||
303 | if (conn->c_xmit_data_off == sg->length) { | ||
304 | conn->c_xmit_data_off = 0; | ||
305 | sg++; | ||
306 | conn->c_xmit_sg++; | ||
307 | BUG_ON(ret != 0 && | ||
308 | conn->c_xmit_sg == rm->m_nents); | ||
309 | } | ||
310 | } | ||
311 | } | ||
312 | } | ||
313 | |||
314 | /* Nuke any messages we decided not to retransmit. */ | ||
315 | if (!list_empty(&to_be_dropped)) | ||
316 | rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED); | ||
317 | |||
318 | if (conn->c_trans->xmit_complete) | ||
319 | conn->c_trans->xmit_complete(conn); | ||
320 | |||
321 | /* | ||
322 | * We might be racing with another sender who queued a message but | ||
323 | * backed off on noticing that we held the c_send_lock. If we check | ||
324 | * for queued messages after dropping the sem then either we'll | ||
325 | * see the queued message or the queuer will get the sem. If we | ||
326 | * notice the queued message then we trigger an immediate retry. | ||
327 | * | ||
328 | * We need to be careful only to do this when we stopped processing | ||
329 | * the send queue because it was empty. It's the only way we | ||
330 | * stop processing the loop when the transport hasn't taken | ||
331 | * responsibility for forward progress. | ||
332 | */ | ||
333 | mutex_unlock(&conn->c_send_lock); | ||
334 | |||
335 | if (conn->c_map_bytes || (send_quota == 0 && !was_empty)) { | ||
336 | /* We exhausted the send quota, but there's work left to | ||
337 | * do. Return and (re-)schedule the send worker. | ||
338 | */ | ||
339 | ret = -EAGAIN; | ||
340 | } | ||
341 | |||
342 | if (ret == 0 && was_empty) { | ||
343 | /* A simple bit test would be way faster than taking the | ||
344 | * spin lock */ | ||
345 | spin_lock_irqsave(&conn->c_lock, flags); | ||
346 | if (!list_empty(&conn->c_send_queue)) { | ||
347 | rds_stats_inc(s_send_sem_queue_raced); | ||
348 | ret = -EAGAIN; | ||
349 | } | ||
350 | spin_unlock_irqrestore(&conn->c_lock, flags); | ||
351 | } | ||
352 | out: | ||
353 | return ret; | ||
354 | } | ||
355 | |||
356 | static void rds_send_sndbuf_remove(struct rds_sock *rs, struct rds_message *rm) | ||
357 | { | ||
358 | u32 len = be32_to_cpu(rm->m_inc.i_hdr.h_len); | ||
359 | |||
360 | assert_spin_locked(&rs->rs_lock); | ||
361 | |||
362 | BUG_ON(rs->rs_snd_bytes < len); | ||
363 | rs->rs_snd_bytes -= len; | ||
364 | |||
365 | if (rs->rs_snd_bytes == 0) | ||
366 | rds_stats_inc(s_send_queue_empty); | ||
367 | } | ||
368 | |||
369 | static inline int rds_send_is_acked(struct rds_message *rm, u64 ack, | ||
370 | is_acked_func is_acked) | ||
371 | { | ||
372 | if (is_acked) | ||
373 | return is_acked(rm, ack); | ||
374 | return be64_to_cpu(rm->m_inc.i_hdr.h_sequence) <= ack; | ||
375 | } | ||
376 | |||
377 | /* | ||
378 | * Returns true if there are no messages on the send and retransmit queues | ||
379 | * which have a sequence number greater than or equal to the given sequence | ||
380 | * number. | ||
381 | */ | ||
382 | int rds_send_acked_before(struct rds_connection *conn, u64 seq) | ||
383 | { | ||
384 | struct rds_message *rm, *tmp; | ||
385 | int ret = 1; | ||
386 | |||
387 | spin_lock(&conn->c_lock); | ||
388 | |||
389 | list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { | ||
390 | if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) | ||
391 | ret = 0; | ||
392 | break; | ||
393 | } | ||
394 | |||
395 | list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { | ||
396 | if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) | ||
397 | ret = 0; | ||
398 | break; | ||
399 | } | ||
400 | |||
401 | spin_unlock(&conn->c_lock); | ||
402 | |||
403 | return ret; | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * This is pretty similar to what happens below in the ACK | ||
408 | * handling code - except that we call here as soon as we get | ||
409 | * the IB send completion on the RDMA op and the accompanying | ||
410 | * message. | ||
411 | */ | ||
412 | void rds_rdma_send_complete(struct rds_message *rm, int status) | ||
413 | { | ||
414 | struct rds_sock *rs = NULL; | ||
415 | struct rds_rdma_op *ro; | ||
416 | struct rds_notifier *notifier; | ||
417 | |||
418 | spin_lock(&rm->m_rs_lock); | ||
419 | |||
420 | ro = rm->m_rdma_op; | ||
421 | if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) | ||
422 | && ro && ro->r_notify && ro->r_notifier) { | ||
423 | notifier = ro->r_notifier; | ||
424 | rs = rm->m_rs; | ||
425 | sock_hold(rds_rs_to_sk(rs)); | ||
426 | |||
427 | notifier->n_status = status; | ||
428 | spin_lock(&rs->rs_lock); | ||
429 | list_add_tail(¬ifier->n_list, &rs->rs_notify_queue); | ||
430 | spin_unlock(&rs->rs_lock); | ||
431 | |||
432 | ro->r_notifier = NULL; | ||
433 | } | ||
434 | |||
435 | spin_unlock(&rm->m_rs_lock); | ||
436 | |||
437 | if (rs) { | ||
438 | rds_wake_sk_sleep(rs); | ||
439 | sock_put(rds_rs_to_sk(rs)); | ||
440 | } | ||
441 | } | ||
442 | |||
443 | /* | ||
444 | * This is the same as rds_rdma_send_complete except we | ||
445 | * don't do any locking - we have all the ingredients (message, | ||
446 | * socket, socket lock) and can just move the notifier. | ||
447 | */ | ||
448 | static inline void | ||
449 | __rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status) | ||
450 | { | ||
451 | struct rds_rdma_op *ro; | ||
452 | |||
453 | ro = rm->m_rdma_op; | ||
454 | if (ro && ro->r_notify && ro->r_notifier) { | ||
455 | ro->r_notifier->n_status = status; | ||
456 | list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue); | ||
457 | ro->r_notifier = NULL; | ||
458 | } | ||
459 | |||
460 | /* No need to wake the app - caller does this */ | ||
461 | } | ||
462 | |||
463 | /* | ||
464 | * This is called from the IB send completion when we detect | ||
465 | * a RDMA operation that failed with remote access error. | ||
466 | * So speed is not an issue here. | ||
467 | */ | ||
468 | struct rds_message *rds_send_get_message(struct rds_connection *conn, | ||
469 | struct rds_rdma_op *op) | ||
470 | { | ||
471 | struct rds_message *rm, *tmp, *found = NULL; | ||
472 | unsigned long flags; | ||
473 | |||
474 | spin_lock_irqsave(&conn->c_lock, flags); | ||
475 | |||
476 | list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { | ||
477 | if (rm->m_rdma_op == op) { | ||
478 | atomic_inc(&rm->m_refcount); | ||
479 | found = rm; | ||
480 | goto out; | ||
481 | } | ||
482 | } | ||
483 | |||
484 | list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { | ||
485 | if (rm->m_rdma_op == op) { | ||
486 | atomic_inc(&rm->m_refcount); | ||
487 | found = rm; | ||
488 | break; | ||
489 | } | ||
490 | } | ||
491 | |||
492 | out: | ||
493 | spin_unlock_irqrestore(&conn->c_lock, flags); | ||
494 | |||
495 | return found; | ||
496 | } | ||
497 | |||
498 | /* | ||
499 | * This removes messages from the socket's list if they're on it. The list | ||
500 | * argument must be private to the caller, we must be able to modify it | ||
501 | * without locks. The messages must have a reference held for their | ||
502 | * position on the list. This function will drop that reference after | ||
503 | * removing the messages from the 'messages' list regardless of if it found | ||
504 | * the messages on the socket list or not. | ||
505 | */ | ||
506 | void rds_send_remove_from_sock(struct list_head *messages, int status) | ||
507 | { | ||
508 | unsigned long flags = 0; /* silence gcc :P */ | ||
509 | struct rds_sock *rs = NULL; | ||
510 | struct rds_message *rm; | ||
511 | |||
512 | local_irq_save(flags); | ||
513 | while (!list_empty(messages)) { | ||
514 | rm = list_entry(messages->next, struct rds_message, | ||
515 | m_conn_item); | ||
516 | list_del_init(&rm->m_conn_item); | ||
517 | |||
518 | /* | ||
519 | * If we see this flag cleared then we're *sure* that someone | ||
520 | * else beat us to removing it from the sock. If we race | ||
521 | * with their flag update we'll get the lock and then really | ||
522 | * see that the flag has been cleared. | ||
523 | * | ||
524 | * The message spinlock makes sure nobody clears rm->m_rs | ||
525 | * while we're messing with it. It does not prevent the | ||
526 | * message from being removed from the socket, though. | ||
527 | */ | ||
528 | spin_lock(&rm->m_rs_lock); | ||
529 | if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) | ||
530 | goto unlock_and_drop; | ||
531 | |||
532 | if (rs != rm->m_rs) { | ||
533 | if (rs) { | ||
534 | spin_unlock(&rs->rs_lock); | ||
535 | rds_wake_sk_sleep(rs); | ||
536 | sock_put(rds_rs_to_sk(rs)); | ||
537 | } | ||
538 | rs = rm->m_rs; | ||
539 | spin_lock(&rs->rs_lock); | ||
540 | sock_hold(rds_rs_to_sk(rs)); | ||
541 | } | ||
542 | |||
543 | if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { | ||
544 | struct rds_rdma_op *ro = rm->m_rdma_op; | ||
545 | struct rds_notifier *notifier; | ||
546 | |||
547 | list_del_init(&rm->m_sock_item); | ||
548 | rds_send_sndbuf_remove(rs, rm); | ||
549 | |||
550 | if (ro && ro->r_notifier | ||
551 | && (status || ro->r_notify)) { | ||
552 | notifier = ro->r_notifier; | ||
553 | list_add_tail(¬ifier->n_list, | ||
554 | &rs->rs_notify_queue); | ||
555 | if (!notifier->n_status) | ||
556 | notifier->n_status = status; | ||
557 | rm->m_rdma_op->r_notifier = NULL; | ||
558 | } | ||
559 | rds_message_put(rm); | ||
560 | rm->m_rs = NULL; | ||
561 | } | ||
562 | |||
563 | unlock_and_drop: | ||
564 | spin_unlock(&rm->m_rs_lock); | ||
565 | rds_message_put(rm); | ||
566 | } | ||
567 | |||
568 | if (rs) { | ||
569 | spin_unlock(&rs->rs_lock); | ||
570 | rds_wake_sk_sleep(rs); | ||
571 | sock_put(rds_rs_to_sk(rs)); | ||
572 | } | ||
573 | local_irq_restore(flags); | ||
574 | } | ||
575 | |||
576 | /* | ||
577 | * Transports call here when they've determined that the receiver queued | ||
578 | * messages up to, and including, the given sequence number. Messages are | ||
579 | * moved to the retrans queue when rds_send_xmit picks them off the send | ||
580 | * queue. This means that in the TCP case, the message may not have been | ||
581 | * assigned the m_ack_seq yet - but that's fine as long as tcp_is_acked | ||
582 | * checks the RDS_MSG_HAS_ACK_SEQ bit. | ||
583 | * | ||
584 | * XXX It's not clear to me how this is safely serialized with socket | ||
585 | * destruction. Maybe it should bail if it sees SOCK_DEAD. | ||
586 | */ | ||
587 | void rds_send_drop_acked(struct rds_connection *conn, u64 ack, | ||
588 | is_acked_func is_acked) | ||
589 | { | ||
590 | struct rds_message *rm, *tmp; | ||
591 | unsigned long flags; | ||
592 | LIST_HEAD(list); | ||
593 | |||
594 | spin_lock_irqsave(&conn->c_lock, flags); | ||
595 | |||
596 | list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { | ||
597 | if (!rds_send_is_acked(rm, ack, is_acked)) | ||
598 | break; | ||
599 | |||
600 | list_move(&rm->m_conn_item, &list); | ||
601 | clear_bit(RDS_MSG_ON_CONN, &rm->m_flags); | ||
602 | } | ||
603 | |||
604 | /* order flag updates with spin locks */ | ||
605 | if (!list_empty(&list)) | ||
606 | smp_mb__after_clear_bit(); | ||
607 | |||
608 | spin_unlock_irqrestore(&conn->c_lock, flags); | ||
609 | |||
610 | /* now remove the messages from the sock list as needed */ | ||
611 | rds_send_remove_from_sock(&list, RDS_RDMA_SUCCESS); | ||
612 | } | ||
613 | |||
614 | void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) | ||
615 | { | ||
616 | struct rds_message *rm, *tmp; | ||
617 | struct rds_connection *conn; | ||
618 | unsigned long flags; | ||
619 | LIST_HEAD(list); | ||
620 | int wake = 0; | ||
621 | |||
622 | /* get all the messages we're dropping under the rs lock */ | ||
623 | spin_lock_irqsave(&rs->rs_lock, flags); | ||
624 | |||
625 | list_for_each_entry_safe(rm, tmp, &rs->rs_send_queue, m_sock_item) { | ||
626 | if (dest && (dest->sin_addr.s_addr != rm->m_daddr || | ||
627 | dest->sin_port != rm->m_inc.i_hdr.h_dport)) | ||
628 | continue; | ||
629 | |||
630 | wake = 1; | ||
631 | list_move(&rm->m_sock_item, &list); | ||
632 | rds_send_sndbuf_remove(rs, rm); | ||
633 | clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); | ||
634 | |||
635 | /* If this is a RDMA operation, notify the app. */ | ||
636 | __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED); | ||
637 | } | ||
638 | |||
639 | /* order flag updates with the rs lock */ | ||
640 | if (wake) | ||
641 | smp_mb__after_clear_bit(); | ||
642 | |||
643 | spin_unlock_irqrestore(&rs->rs_lock, flags); | ||
644 | |||
645 | if (wake) | ||
646 | rds_wake_sk_sleep(rs); | ||
647 | |||
648 | conn = NULL; | ||
649 | |||
650 | /* now remove the messages from the conn list as needed */ | ||
651 | list_for_each_entry(rm, &list, m_sock_item) { | ||
652 | /* We do this here rather than in the loop above, so that | ||
653 | * we don't have to nest m_rs_lock under rs->rs_lock */ | ||
654 | spin_lock(&rm->m_rs_lock); | ||
655 | rm->m_rs = NULL; | ||
656 | spin_unlock(&rm->m_rs_lock); | ||
657 | |||
658 | /* | ||
659 | * If we see this flag cleared then we're *sure* that someone | ||
660 | * else beat us to removing it from the conn. If we race | ||
661 | * with their flag update we'll get the lock and then really | ||
662 | * see that the flag has been cleared. | ||
663 | */ | ||
664 | if (!test_bit(RDS_MSG_ON_CONN, &rm->m_flags)) | ||
665 | continue; | ||
666 | |||
667 | if (conn != rm->m_inc.i_conn) { | ||
668 | if (conn) | ||
669 | spin_unlock_irqrestore(&conn->c_lock, flags); | ||
670 | conn = rm->m_inc.i_conn; | ||
671 | spin_lock_irqsave(&conn->c_lock, flags); | ||
672 | } | ||
673 | |||
674 | if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { | ||
675 | list_del_init(&rm->m_conn_item); | ||
676 | rds_message_put(rm); | ||
677 | } | ||
678 | } | ||
679 | |||
680 | if (conn) | ||
681 | spin_unlock_irqrestore(&conn->c_lock, flags); | ||
682 | |||
683 | while (!list_empty(&list)) { | ||
684 | rm = list_entry(list.next, struct rds_message, m_sock_item); | ||
685 | list_del_init(&rm->m_sock_item); | ||
686 | |||
687 | rds_message_wait(rm); | ||
688 | rds_message_put(rm); | ||
689 | } | ||
690 | } | ||
691 | |||
692 | /* | ||
693 | * we only want this to fire once so we use the callers 'queued'. It's | ||
694 | * possible that another thread can race with us and remove the | ||
695 | * message from the flow with RDS_CANCEL_SENT_TO. | ||
696 | */ | ||
697 | static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn, | ||
698 | struct rds_message *rm, __be16 sport, | ||
699 | __be16 dport, int *queued) | ||
700 | { | ||
701 | unsigned long flags; | ||
702 | u32 len; | ||
703 | |||
704 | if (*queued) | ||
705 | goto out; | ||
706 | |||
707 | len = be32_to_cpu(rm->m_inc.i_hdr.h_len); | ||
708 | |||
709 | /* this is the only place which holds both the socket's rs_lock | ||
710 | * and the connection's c_lock */ | ||
711 | spin_lock_irqsave(&rs->rs_lock, flags); | ||
712 | |||
713 | /* | ||
714 | * If there is a little space in sndbuf, we don't queue anything, | ||
715 | * and userspace gets -EAGAIN. But poll() indicates there's send | ||
716 | * room. This can lead to bad behavior (spinning) if snd_bytes isn't | ||
717 | * freed up by incoming acks. So we check the *old* value of | ||
718 | * rs_snd_bytes here to allow the last msg to exceed the buffer, | ||
719 | * and poll() now knows no more data can be sent. | ||
720 | */ | ||
721 | if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) { | ||
722 | rs->rs_snd_bytes += len; | ||
723 | |||
724 | /* let recv side know we are close to send space exhaustion. | ||
725 | * This is probably not the optimal way to do it, as this | ||
726 | * means we set the flag on *all* messages as soon as our | ||
727 | * throughput hits a certain threshold. | ||
728 | */ | ||
729 | if (rs->rs_snd_bytes >= rds_sk_sndbuf(rs) / 2) | ||
730 | __set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); | ||
731 | |||
732 | list_add_tail(&rm->m_sock_item, &rs->rs_send_queue); | ||
733 | set_bit(RDS_MSG_ON_SOCK, &rm->m_flags); | ||
734 | rds_message_addref(rm); | ||
735 | rm->m_rs = rs; | ||
736 | |||
737 | /* The code ordering is a little weird, but we're | ||
738 | trying to minimize the time we hold c_lock */ | ||
739 | rds_message_populate_header(&rm->m_inc.i_hdr, sport, dport, 0); | ||
740 | rm->m_inc.i_conn = conn; | ||
741 | rds_message_addref(rm); | ||
742 | |||
743 | spin_lock(&conn->c_lock); | ||
744 | rm->m_inc.i_hdr.h_sequence = cpu_to_be64(conn->c_next_tx_seq++); | ||
745 | list_add_tail(&rm->m_conn_item, &conn->c_send_queue); | ||
746 | set_bit(RDS_MSG_ON_CONN, &rm->m_flags); | ||
747 | spin_unlock(&conn->c_lock); | ||
748 | |||
749 | rdsdebug("queued msg %p len %d, rs %p bytes %d seq %llu\n", | ||
750 | rm, len, rs, rs->rs_snd_bytes, | ||
751 | (unsigned long long)be64_to_cpu(rm->m_inc.i_hdr.h_sequence)); | ||
752 | |||
753 | *queued = 1; | ||
754 | } | ||
755 | |||
756 | spin_unlock_irqrestore(&rs->rs_lock, flags); | ||
757 | out: | ||
758 | return *queued; | ||
759 | } | ||
760 | |||
761 | static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, | ||
762 | struct msghdr *msg, int *allocated_mr) | ||
763 | { | ||
764 | struct cmsghdr *cmsg; | ||
765 | int ret = 0; | ||
766 | |||
767 | for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { | ||
768 | if (!CMSG_OK(msg, cmsg)) | ||
769 | return -EINVAL; | ||
770 | |||
771 | if (cmsg->cmsg_level != SOL_RDS) | ||
772 | continue; | ||
773 | |||
774 | /* As a side effect, RDMA_DEST and RDMA_MAP will set | ||
775 | * rm->m_rdma_cookie and rm->m_rdma_mr. | ||
776 | */ | ||
777 | switch (cmsg->cmsg_type) { | ||
778 | case RDS_CMSG_RDMA_ARGS: | ||
779 | ret = rds_cmsg_rdma_args(rs, rm, cmsg); | ||
780 | break; | ||
781 | |||
782 | case RDS_CMSG_RDMA_DEST: | ||
783 | ret = rds_cmsg_rdma_dest(rs, rm, cmsg); | ||
784 | break; | ||
785 | |||
786 | case RDS_CMSG_RDMA_MAP: | ||
787 | ret = rds_cmsg_rdma_map(rs, rm, cmsg); | ||
788 | if (!ret) | ||
789 | *allocated_mr = 1; | ||
790 | break; | ||
791 | |||
792 | default: | ||
793 | return -EINVAL; | ||
794 | } | ||
795 | |||
796 | if (ret) | ||
797 | break; | ||
798 | } | ||
799 | |||
800 | return ret; | ||
801 | } | ||
802 | |||
803 | int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, | ||
804 | size_t payload_len) | ||
805 | { | ||
806 | struct sock *sk = sock->sk; | ||
807 | struct rds_sock *rs = rds_sk_to_rs(sk); | ||
808 | struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; | ||
809 | __be32 daddr; | ||
810 | __be16 dport; | ||
811 | struct rds_message *rm = NULL; | ||
812 | struct rds_connection *conn; | ||
813 | int ret = 0; | ||
814 | int queued = 0, allocated_mr = 0; | ||
815 | int nonblock = msg->msg_flags & MSG_DONTWAIT; | ||
816 | long timeo = sock_rcvtimeo(sk, nonblock); | ||
817 | |||
818 | /* Mirror Linux UDP mirror of BSD error message compatibility */ | ||
819 | /* XXX: Perhaps MSG_MORE someday */ | ||
820 | if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) { | ||
821 | printk(KERN_INFO "msg_flags 0x%08X\n", msg->msg_flags); | ||
822 | ret = -EOPNOTSUPP; | ||
823 | goto out; | ||
824 | } | ||
825 | |||
826 | if (msg->msg_namelen) { | ||
827 | /* XXX fail non-unicast destination IPs? */ | ||
828 | if (msg->msg_namelen < sizeof(*usin) || usin->sin_family != AF_INET) { | ||
829 | ret = -EINVAL; | ||
830 | goto out; | ||
831 | } | ||
832 | daddr = usin->sin_addr.s_addr; | ||
833 | dport = usin->sin_port; | ||
834 | } else { | ||
835 | /* We only care about consistency with ->connect() */ | ||
836 | lock_sock(sk); | ||
837 | daddr = rs->rs_conn_addr; | ||
838 | dport = rs->rs_conn_port; | ||
839 | release_sock(sk); | ||
840 | } | ||
841 | |||
842 | /* racing with another thread binding seems ok here */ | ||
843 | if (daddr == 0 || rs->rs_bound_addr == 0) { | ||
844 | ret = -ENOTCONN; /* XXX not a great errno */ | ||
845 | goto out; | ||
846 | } | ||
847 | |||
848 | rm = rds_message_copy_from_user(msg->msg_iov, payload_len); | ||
849 | if (IS_ERR(rm)) { | ||
850 | ret = PTR_ERR(rm); | ||
851 | rm = NULL; | ||
852 | goto out; | ||
853 | } | ||
854 | |||
855 | rm->m_daddr = daddr; | ||
856 | |||
857 | /* Parse any control messages the user may have included. */ | ||
858 | ret = rds_cmsg_send(rs, rm, msg, &allocated_mr); | ||
859 | if (ret) | ||
860 | goto out; | ||
861 | |||
862 | /* rds_conn_create has a spinlock that runs with IRQ off. | ||
863 | * Caching the conn in the socket helps a lot. */ | ||
864 | if (rs->rs_conn && rs->rs_conn->c_faddr == daddr) | ||
865 | conn = rs->rs_conn; | ||
866 | else { | ||
867 | conn = rds_conn_create_outgoing(rs->rs_bound_addr, daddr, | ||
868 | rs->rs_transport, | ||
869 | sock->sk->sk_allocation); | ||
870 | if (IS_ERR(conn)) { | ||
871 | ret = PTR_ERR(conn); | ||
872 | goto out; | ||
873 | } | ||
874 | rs->rs_conn = conn; | ||
875 | } | ||
876 | |||
877 | if ((rm->m_rdma_cookie || rm->m_rdma_op) | ||
878 | && conn->c_trans->xmit_rdma == NULL) { | ||
879 | if (printk_ratelimit()) | ||
880 | printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", | ||
881 | rm->m_rdma_op, conn->c_trans->xmit_rdma); | ||
882 | ret = -EOPNOTSUPP; | ||
883 | goto out; | ||
884 | } | ||
885 | |||
886 | /* If the connection is down, trigger a connect. We may | ||
887 | * have scheduled a delayed reconnect however - in this case | ||
888 | * we should not interfere. | ||
889 | */ | ||
890 | if (rds_conn_state(conn) == RDS_CONN_DOWN | ||
891 | && !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) | ||
892 | queue_delayed_work(rds_wq, &conn->c_conn_w, 0); | ||
893 | |||
894 | ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); | ||
895 | if (ret) | ||
896 | goto out; | ||
897 | |||
898 | while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port, | ||
899 | dport, &queued)) { | ||
900 | rds_stats_inc(s_send_queue_full); | ||
901 | /* XXX make sure this is reasonable */ | ||
902 | if (payload_len > rds_sk_sndbuf(rs)) { | ||
903 | ret = -EMSGSIZE; | ||
904 | goto out; | ||
905 | } | ||
906 | if (nonblock) { | ||
907 | ret = -EAGAIN; | ||
908 | goto out; | ||
909 | } | ||
910 | |||
911 | timeo = wait_event_interruptible_timeout(*sk->sk_sleep, | ||
912 | rds_send_queue_rm(rs, conn, rm, | ||
913 | rs->rs_bound_port, | ||
914 | dport, | ||
915 | &queued), | ||
916 | timeo); | ||
917 | rdsdebug("sendmsg woke queued %d timeo %ld\n", queued, timeo); | ||
918 | if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT) | ||
919 | continue; | ||
920 | |||
921 | ret = timeo; | ||
922 | if (ret == 0) | ||
923 | ret = -ETIMEDOUT; | ||
924 | goto out; | ||
925 | } | ||
926 | |||
927 | /* | ||
928 | * By now we've committed to the send. We reuse rds_send_worker() | ||
929 | * to retry sends in the rds thread if the transport asks us to. | ||
930 | */ | ||
931 | rds_stats_inc(s_send_queued); | ||
932 | |||
933 | if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) | ||
934 | rds_send_worker(&conn->c_send_w.work); | ||
935 | |||
936 | rds_message_put(rm); | ||
937 | return payload_len; | ||
938 | |||
939 | out: | ||
940 | /* If the user included a RDMA_MAP cmsg, we allocated a MR on the fly. | ||
941 | * If the sendmsg goes through, we keep the MR. If it fails with EAGAIN | ||
942 | * or in any other way, we need to destroy the MR again */ | ||
943 | if (allocated_mr) | ||
944 | rds_rdma_unuse(rs, rds_rdma_cookie_key(rm->m_rdma_cookie), 1); | ||
945 | |||
946 | if (rm) | ||
947 | rds_message_put(rm); | ||
948 | return ret; | ||
949 | } | ||
950 | |||
951 | /* | ||
952 | * Reply to a ping packet. | ||
953 | */ | ||
954 | int | ||
955 | rds_send_pong(struct rds_connection *conn, __be16 dport) | ||
956 | { | ||
957 | struct rds_message *rm; | ||
958 | unsigned long flags; | ||
959 | int ret = 0; | ||
960 | |||
961 | rm = rds_message_alloc(0, GFP_ATOMIC); | ||
962 | if (rm == NULL) { | ||
963 | ret = -ENOMEM; | ||
964 | goto out; | ||
965 | } | ||
966 | |||
967 | rm->m_daddr = conn->c_faddr; | ||
968 | |||
969 | /* If the connection is down, trigger a connect. We may | ||
970 | * have scheduled a delayed reconnect however - in this case | ||
971 | * we should not interfere. | ||
972 | */ | ||
973 | if (rds_conn_state(conn) == RDS_CONN_DOWN | ||
974 | && !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) | ||
975 | queue_delayed_work(rds_wq, &conn->c_conn_w, 0); | ||
976 | |||
977 | ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL); | ||
978 | if (ret) | ||
979 | goto out; | ||
980 | |||
981 | spin_lock_irqsave(&conn->c_lock, flags); | ||
982 | list_add_tail(&rm->m_conn_item, &conn->c_send_queue); | ||
983 | set_bit(RDS_MSG_ON_CONN, &rm->m_flags); | ||
984 | rds_message_addref(rm); | ||
985 | rm->m_inc.i_conn = conn; | ||
986 | |||
987 | rds_message_populate_header(&rm->m_inc.i_hdr, 0, dport, | ||
988 | conn->c_next_tx_seq); | ||
989 | conn->c_next_tx_seq++; | ||
990 | spin_unlock_irqrestore(&conn->c_lock, flags); | ||
991 | |||
992 | rds_stats_inc(s_send_queued); | ||
993 | rds_stats_inc(s_send_pong); | ||
994 | |||
995 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); | ||
996 | rds_message_put(rm); | ||
997 | return 0; | ||
998 | |||
999 | out: | ||
1000 | if (rm) | ||
1001 | rds_message_put(rm); | ||
1002 | return ret; | ||
1003 | } | ||
diff --git a/net/rds/stats.c b/net/rds/stats.c new file mode 100644 index 000000000000..637146893cf3 --- /dev/null +++ b/net/rds/stats.c | |||
@@ -0,0 +1,148 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/percpu.h> | ||
34 | #include <linux/seq_file.h> | ||
35 | #include <linux/proc_fs.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | |||
39 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); | ||
40 | |||
41 | /* :.,$s/unsigned long\>.*\<s_\(.*\);/"\1",/g */ | ||
42 | |||
43 | static char *rds_stat_names[] = { | ||
44 | "conn_reset", | ||
45 | "recv_drop_bad_checksum", | ||
46 | "recv_drop_old_seq", | ||
47 | "recv_drop_no_sock", | ||
48 | "recv_drop_dead_sock", | ||
49 | "recv_deliver_raced", | ||
50 | "recv_delivered", | ||
51 | "recv_queued", | ||
52 | "recv_immediate_retry", | ||
53 | "recv_delayed_retry", | ||
54 | "recv_ack_required", | ||
55 | "recv_rdma_bytes", | ||
56 | "recv_ping", | ||
57 | "send_queue_empty", | ||
58 | "send_queue_full", | ||
59 | "send_sem_contention", | ||
60 | "send_sem_queue_raced", | ||
61 | "send_immediate_retry", | ||
62 | "send_delayed_retry", | ||
63 | "send_drop_acked", | ||
64 | "send_ack_required", | ||
65 | "send_queued", | ||
66 | "send_rdma", | ||
67 | "send_rdma_bytes", | ||
68 | "send_pong", | ||
69 | "page_remainder_hit", | ||
70 | "page_remainder_miss", | ||
71 | "copy_to_user", | ||
72 | "copy_from_user", | ||
73 | "cong_update_queued", | ||
74 | "cong_update_received", | ||
75 | "cong_send_error", | ||
76 | "cong_send_blocked", | ||
77 | }; | ||
78 | |||
79 | void rds_stats_info_copy(struct rds_info_iterator *iter, | ||
80 | uint64_t *values, char **names, size_t nr) | ||
81 | { | ||
82 | struct rds_info_counter ctr; | ||
83 | size_t i; | ||
84 | |||
85 | for (i = 0; i < nr; i++) { | ||
86 | BUG_ON(strlen(names[i]) >= sizeof(ctr.name)); | ||
87 | strncpy(ctr.name, names[i], sizeof(ctr.name) - 1); | ||
88 | ctr.value = values[i]; | ||
89 | |||
90 | rds_info_copy(iter, &ctr, sizeof(ctr)); | ||
91 | } | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * This gives global counters across all the transports. The strings | ||
96 | * are copied in so that the tool doesn't need knowledge of the specific | ||
97 | * stats that we're exporting. Some are pretty implementation dependent | ||
98 | * and may change over time. That doesn't stop them from being useful. | ||
99 | * | ||
100 | * This is the only function in the chain that knows about the byte granular | ||
101 | * length in userspace. It converts it to number of stat entries that the | ||
102 | * rest of the functions operate in. | ||
103 | */ | ||
104 | static void rds_stats_info(struct socket *sock, unsigned int len, | ||
105 | struct rds_info_iterator *iter, | ||
106 | struct rds_info_lengths *lens) | ||
107 | { | ||
108 | struct rds_statistics stats = {0, }; | ||
109 | uint64_t *src; | ||
110 | uint64_t *sum; | ||
111 | size_t i; | ||
112 | int cpu; | ||
113 | unsigned int avail; | ||
114 | |||
115 | avail = len / sizeof(struct rds_info_counter); | ||
116 | |||
117 | if (avail < ARRAY_SIZE(rds_stat_names)) { | ||
118 | avail = 0; | ||
119 | goto trans; | ||
120 | } | ||
121 | |||
122 | for_each_online_cpu(cpu) { | ||
123 | src = (uint64_t *)&(per_cpu(rds_stats, cpu)); | ||
124 | sum = (uint64_t *)&stats; | ||
125 | for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++) | ||
126 | *(sum++) += *(src++); | ||
127 | } | ||
128 | |||
129 | rds_stats_info_copy(iter, (uint64_t *)&stats, rds_stat_names, | ||
130 | ARRAY_SIZE(rds_stat_names)); | ||
131 | avail -= ARRAY_SIZE(rds_stat_names); | ||
132 | |||
133 | trans: | ||
134 | lens->each = sizeof(struct rds_info_counter); | ||
135 | lens->nr = rds_trans_stats_info_copy(iter, avail) + | ||
136 | ARRAY_SIZE(rds_stat_names); | ||
137 | } | ||
138 | |||
139 | void rds_stats_exit(void) | ||
140 | { | ||
141 | rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info); | ||
142 | } | ||
143 | |||
144 | int __init rds_stats_init(void) | ||
145 | { | ||
146 | rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info); | ||
147 | return 0; | ||
148 | } | ||
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c new file mode 100644 index 000000000000..307dc5c1be15 --- /dev/null +++ b/net/rds/sysctl.c | |||
@@ -0,0 +1,122 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/sysctl.h> | ||
35 | #include <linux/proc_fs.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | |||
39 | static struct ctl_table_header *rds_sysctl_reg_table; | ||
40 | |||
41 | static unsigned long rds_sysctl_reconnect_min = 1; | ||
42 | static unsigned long rds_sysctl_reconnect_max = ~0UL; | ||
43 | |||
44 | unsigned long rds_sysctl_reconnect_min_jiffies; | ||
45 | unsigned long rds_sysctl_reconnect_max_jiffies = HZ; | ||
46 | |||
47 | unsigned int rds_sysctl_max_unacked_packets = 8; | ||
48 | unsigned int rds_sysctl_max_unacked_bytes = (16 << 20); | ||
49 | |||
50 | unsigned int rds_sysctl_ping_enable = 1; | ||
51 | |||
52 | static ctl_table rds_sysctl_rds_table[] = { | ||
53 | { | ||
54 | .ctl_name = CTL_UNNUMBERED, | ||
55 | .procname = "reconnect_min_delay_ms", | ||
56 | .data = &rds_sysctl_reconnect_min_jiffies, | ||
57 | .maxlen = sizeof(unsigned long), | ||
58 | .mode = 0644, | ||
59 | .proc_handler = &proc_doulongvec_ms_jiffies_minmax, | ||
60 | .extra1 = &rds_sysctl_reconnect_min, | ||
61 | .extra2 = &rds_sysctl_reconnect_max_jiffies, | ||
62 | }, | ||
63 | { | ||
64 | .ctl_name = CTL_UNNUMBERED, | ||
65 | .procname = "reconnect_max_delay_ms", | ||
66 | .data = &rds_sysctl_reconnect_max_jiffies, | ||
67 | .maxlen = sizeof(unsigned long), | ||
68 | .mode = 0644, | ||
69 | .proc_handler = &proc_doulongvec_ms_jiffies_minmax, | ||
70 | .extra1 = &rds_sysctl_reconnect_min_jiffies, | ||
71 | .extra2 = &rds_sysctl_reconnect_max, | ||
72 | }, | ||
73 | { | ||
74 | .ctl_name = CTL_UNNUMBERED, | ||
75 | .procname = "max_unacked_packets", | ||
76 | .data = &rds_sysctl_max_unacked_packets, | ||
77 | .maxlen = sizeof(unsigned long), | ||
78 | .mode = 0644, | ||
79 | .proc_handler = &proc_dointvec, | ||
80 | }, | ||
81 | { | ||
82 | .ctl_name = CTL_UNNUMBERED, | ||
83 | .procname = "max_unacked_bytes", | ||
84 | .data = &rds_sysctl_max_unacked_bytes, | ||
85 | .maxlen = sizeof(unsigned long), | ||
86 | .mode = 0644, | ||
87 | .proc_handler = &proc_dointvec, | ||
88 | }, | ||
89 | { | ||
90 | .ctl_name = CTL_UNNUMBERED, | ||
91 | .procname = "ping_enable", | ||
92 | .data = &rds_sysctl_ping_enable, | ||
93 | .maxlen = sizeof(int), | ||
94 | .mode = 0644, | ||
95 | .proc_handler = &proc_dointvec, | ||
96 | }, | ||
97 | { .ctl_name = 0} | ||
98 | }; | ||
99 | |||
100 | static struct ctl_path rds_sysctl_path[] = { | ||
101 | { .procname = "net", .ctl_name = CTL_NET, }, | ||
102 | { .procname = "rds", .ctl_name = CTL_UNNUMBERED, }, | ||
103 | { } | ||
104 | }; | ||
105 | |||
106 | |||
107 | void rds_sysctl_exit(void) | ||
108 | { | ||
109 | if (rds_sysctl_reg_table) | ||
110 | unregister_sysctl_table(rds_sysctl_reg_table); | ||
111 | } | ||
112 | |||
113 | int __init rds_sysctl_init(void) | ||
114 | { | ||
115 | rds_sysctl_reconnect_min = msecs_to_jiffies(1); | ||
116 | rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; | ||
117 | |||
118 | rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table); | ||
119 | if (rds_sysctl_reg_table == NULL) | ||
120 | return -ENOMEM; | ||
121 | return 0; | ||
122 | } | ||
diff --git a/net/rds/threads.c b/net/rds/threads.c new file mode 100644 index 000000000000..828a1bf9ea92 --- /dev/null +++ b/net/rds/threads.c | |||
@@ -0,0 +1,265 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/random.h> | ||
35 | |||
36 | #include "rds.h" | ||
37 | |||
38 | /* | ||
39 | * All of connection management is simplified by serializing it through | ||
40 | * work queues that execute in a connection managing thread. | ||
41 | * | ||
42 | * TCP wants to send acks through sendpage() in response to data_ready(), | ||
43 | * but it needs a process context to do so. | ||
44 | * | ||
45 | * The receive paths need to allocate but can't drop packets (!) so we have | ||
46 | * a thread around to block allocating if the receive fast path sees an | ||
47 | * allocation failure. | ||
48 | */ | ||
49 | |||
50 | /* Grand Unified Theory of connection life cycle: | ||
51 | * At any point in time, the connection can be in one of these states: | ||
52 | * DOWN, CONNECTING, UP, DISCONNECTING, ERROR | ||
53 | * | ||
54 | * The following transitions are possible: | ||
55 | * ANY -> ERROR | ||
56 | * UP -> DISCONNECTING | ||
57 | * ERROR -> DISCONNECTING | ||
58 | * DISCONNECTING -> DOWN | ||
59 | * DOWN -> CONNECTING | ||
60 | * CONNECTING -> UP | ||
61 | * | ||
62 | * Transition to state DISCONNECTING/DOWN: | ||
63 | * - Inside the shutdown worker; synchronizes with xmit path | ||
64 | * through c_send_lock, and with connection management callbacks | ||
65 | * via c_cm_lock. | ||
66 | * | ||
67 | * For receive callbacks, we rely on the underlying transport | ||
68 | * (TCP, IB/RDMA) to provide the necessary synchronisation. | ||
69 | */ | ||
70 | struct workqueue_struct *rds_wq; | ||
71 | |||
72 | void rds_connect_complete(struct rds_connection *conn) | ||
73 | { | ||
74 | if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) { | ||
75 | printk(KERN_WARNING "%s: Cannot transition to state UP, " | ||
76 | "current state is %d\n", | ||
77 | __func__, | ||
78 | atomic_read(&conn->c_state)); | ||
79 | atomic_set(&conn->c_state, RDS_CONN_ERROR); | ||
80 | queue_work(rds_wq, &conn->c_down_w); | ||
81 | return; | ||
82 | } | ||
83 | |||
84 | rdsdebug("conn %p for %pI4 to %pI4 complete\n", | ||
85 | conn, &conn->c_laddr, &conn->c_faddr); | ||
86 | |||
87 | conn->c_reconnect_jiffies = 0; | ||
88 | set_bit(0, &conn->c_map_queued); | ||
89 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); | ||
90 | queue_delayed_work(rds_wq, &conn->c_recv_w, 0); | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * This random exponential backoff is relied on to eventually resolve racing | ||
95 | * connects. | ||
96 | * | ||
97 | * If connect attempts race then both parties drop both connections and come | ||
98 | * here to wait for a random amount of time before trying again. Eventually | ||
99 | * the backoff range will be so much greater than the time it takes to | ||
100 | * establish a connection that one of the pair will establish the connection | ||
101 | * before the other's random delay fires. | ||
102 | * | ||
103 | * Connection attempts that arrive while a connection is already established | ||
104 | * are also considered to be racing connects. This lets a connection from | ||
105 | * a rebooted machine replace an existing stale connection before the transport | ||
106 | * notices that the connection has failed. | ||
107 | * | ||
108 | * We should *always* start with a random backoff; otherwise a broken connection | ||
109 | * will always take several iterations to be re-established. | ||
110 | */ | ||
111 | static void rds_queue_reconnect(struct rds_connection *conn) | ||
112 | { | ||
113 | unsigned long rand; | ||
114 | |||
115 | rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n", | ||
116 | conn, &conn->c_laddr, &conn->c_faddr, | ||
117 | conn->c_reconnect_jiffies); | ||
118 | |||
119 | set_bit(RDS_RECONNECT_PENDING, &conn->c_flags); | ||
120 | if (conn->c_reconnect_jiffies == 0) { | ||
121 | conn->c_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; | ||
122 | queue_delayed_work(rds_wq, &conn->c_conn_w, 0); | ||
123 | return; | ||
124 | } | ||
125 | |||
126 | get_random_bytes(&rand, sizeof(rand)); | ||
127 | rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n", | ||
128 | rand % conn->c_reconnect_jiffies, conn->c_reconnect_jiffies, | ||
129 | conn, &conn->c_laddr, &conn->c_faddr); | ||
130 | queue_delayed_work(rds_wq, &conn->c_conn_w, | ||
131 | rand % conn->c_reconnect_jiffies); | ||
132 | |||
133 | conn->c_reconnect_jiffies = min(conn->c_reconnect_jiffies * 2, | ||
134 | rds_sysctl_reconnect_max_jiffies); | ||
135 | } | ||
136 | |||
137 | void rds_connect_worker(struct work_struct *work) | ||
138 | { | ||
139 | struct rds_connection *conn = container_of(work, struct rds_connection, c_conn_w.work); | ||
140 | int ret; | ||
141 | |||
142 | clear_bit(RDS_RECONNECT_PENDING, &conn->c_flags); | ||
143 | if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { | ||
144 | ret = conn->c_trans->conn_connect(conn); | ||
145 | rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n", | ||
146 | conn, &conn->c_laddr, &conn->c_faddr, ret); | ||
147 | |||
148 | if (ret) { | ||
149 | if (rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_DOWN)) | ||
150 | rds_queue_reconnect(conn); | ||
151 | else | ||
152 | rds_conn_error(conn, "RDS: connect failed\n"); | ||
153 | } | ||
154 | } | ||
155 | } | ||
156 | |||
157 | void rds_shutdown_worker(struct work_struct *work) | ||
158 | { | ||
159 | struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w); | ||
160 | |||
161 | /* shut it down unless it's down already */ | ||
162 | if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) { | ||
163 | /* | ||
164 | * Quiesce the connection mgmt handlers before we start tearing | ||
165 | * things down. We don't hold the mutex for the entire | ||
166 | * duration of the shutdown operation, else we may be | ||
167 | * deadlocking with the CM handler. Instead, the CM event | ||
168 | * handler is supposed to check for state DISCONNECTING | ||
169 | */ | ||
170 | mutex_lock(&conn->c_cm_lock); | ||
171 | if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING) | ||
172 | && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) { | ||
173 | rds_conn_error(conn, "shutdown called in state %d\n", | ||
174 | atomic_read(&conn->c_state)); | ||
175 | mutex_unlock(&conn->c_cm_lock); | ||
176 | return; | ||
177 | } | ||
178 | mutex_unlock(&conn->c_cm_lock); | ||
179 | |||
180 | mutex_lock(&conn->c_send_lock); | ||
181 | conn->c_trans->conn_shutdown(conn); | ||
182 | rds_conn_reset(conn); | ||
183 | mutex_unlock(&conn->c_send_lock); | ||
184 | |||
185 | if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) { | ||
186 | /* This can happen - eg when we're in the middle of tearing | ||
187 | * down the connection, and someone unloads the rds module. | ||
188 | * Quite reproduceable with loopback connections. | ||
189 | * Mostly harmless. | ||
190 | */ | ||
191 | rds_conn_error(conn, | ||
192 | "%s: failed to transition to state DOWN, " | ||
193 | "current state is %d\n", | ||
194 | __func__, | ||
195 | atomic_read(&conn->c_state)); | ||
196 | return; | ||
197 | } | ||
198 | } | ||
199 | |||
200 | /* Then reconnect if it's still live. | ||
201 | * The passive side of an IB loopback connection is never added | ||
202 | * to the conn hash, so we never trigger a reconnect on this | ||
203 | * conn - the reconnect is always triggered by the active peer. */ | ||
204 | cancel_delayed_work(&conn->c_conn_w); | ||
205 | if (!hlist_unhashed(&conn->c_hash_node)) | ||
206 | rds_queue_reconnect(conn); | ||
207 | } | ||
208 | |||
209 | void rds_send_worker(struct work_struct *work) | ||
210 | { | ||
211 | struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work); | ||
212 | int ret; | ||
213 | |||
214 | if (rds_conn_state(conn) == RDS_CONN_UP) { | ||
215 | ret = rds_send_xmit(conn); | ||
216 | rdsdebug("conn %p ret %d\n", conn, ret); | ||
217 | switch (ret) { | ||
218 | case -EAGAIN: | ||
219 | rds_stats_inc(s_send_immediate_retry); | ||
220 | queue_delayed_work(rds_wq, &conn->c_send_w, 0); | ||
221 | break; | ||
222 | case -ENOMEM: | ||
223 | rds_stats_inc(s_send_delayed_retry); | ||
224 | queue_delayed_work(rds_wq, &conn->c_send_w, 2); | ||
225 | default: | ||
226 | break; | ||
227 | } | ||
228 | } | ||
229 | } | ||
230 | |||
231 | void rds_recv_worker(struct work_struct *work) | ||
232 | { | ||
233 | struct rds_connection *conn = container_of(work, struct rds_connection, c_recv_w.work); | ||
234 | int ret; | ||
235 | |||
236 | if (rds_conn_state(conn) == RDS_CONN_UP) { | ||
237 | ret = conn->c_trans->recv(conn); | ||
238 | rdsdebug("conn %p ret %d\n", conn, ret); | ||
239 | switch (ret) { | ||
240 | case -EAGAIN: | ||
241 | rds_stats_inc(s_recv_immediate_retry); | ||
242 | queue_delayed_work(rds_wq, &conn->c_recv_w, 0); | ||
243 | break; | ||
244 | case -ENOMEM: | ||
245 | rds_stats_inc(s_recv_delayed_retry); | ||
246 | queue_delayed_work(rds_wq, &conn->c_recv_w, 2); | ||
247 | default: | ||
248 | break; | ||
249 | } | ||
250 | } | ||
251 | } | ||
252 | |||
253 | void rds_threads_exit(void) | ||
254 | { | ||
255 | destroy_workqueue(rds_wq); | ||
256 | } | ||
257 | |||
258 | int __init rds_threads_init(void) | ||
259 | { | ||
260 | rds_wq = create_singlethread_workqueue("krdsd"); | ||
261 | if (rds_wq == NULL) | ||
262 | return -ENOMEM; | ||
263 | |||
264 | return 0; | ||
265 | } | ||
diff --git a/net/rds/transport.c b/net/rds/transport.c new file mode 100644 index 000000000000..767da61ad2f3 --- /dev/null +++ b/net/rds/transport.c | |||
@@ -0,0 +1,117 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2006 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This software is available to you under a choice of one of two | ||
5 | * licenses. You may choose to be licensed under the terms of the GNU | ||
6 | * General Public License (GPL) Version 2, available from the file | ||
7 | * COPYING in the main directory of this source tree, or the | ||
8 | * OpenIB.org BSD license below: | ||
9 | * | ||
10 | * Redistribution and use in source and binary forms, with or | ||
11 | * without modification, are permitted provided that the following | ||
12 | * conditions are met: | ||
13 | * | ||
14 | * - Redistributions of source code must retain the above | ||
15 | * copyright notice, this list of conditions and the following | ||
16 | * disclaimer. | ||
17 | * | ||
18 | * - Redistributions in binary form must reproduce the above | ||
19 | * copyright notice, this list of conditions and the following | ||
20 | * disclaimer in the documentation and/or other materials | ||
21 | * provided with the distribution. | ||
22 | * | ||
23 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | ||
24 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | ||
25 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | ||
26 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS | ||
27 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN | ||
28 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | ||
29 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
30 | * SOFTWARE. | ||
31 | * | ||
32 | */ | ||
33 | #include <linux/kernel.h> | ||
34 | #include <linux/module.h> | ||
35 | #include <linux/in.h> | ||
36 | |||
37 | #include "rds.h" | ||
38 | #include "loop.h" | ||
39 | |||
40 | static LIST_HEAD(rds_transports); | ||
41 | static DECLARE_RWSEM(rds_trans_sem); | ||
42 | |||
43 | int rds_trans_register(struct rds_transport *trans) | ||
44 | { | ||
45 | BUG_ON(strlen(trans->t_name) + 1 > TRANSNAMSIZ); | ||
46 | |||
47 | down_write(&rds_trans_sem); | ||
48 | |||
49 | list_add_tail(&trans->t_item, &rds_transports); | ||
50 | printk(KERN_INFO "Registered RDS/%s transport\n", trans->t_name); | ||
51 | |||
52 | up_write(&rds_trans_sem); | ||
53 | |||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | void rds_trans_unregister(struct rds_transport *trans) | ||
58 | { | ||
59 | down_write(&rds_trans_sem); | ||
60 | |||
61 | list_del_init(&trans->t_item); | ||
62 | printk(KERN_INFO "Unregistered RDS/%s transport\n", trans->t_name); | ||
63 | |||
64 | up_write(&rds_trans_sem); | ||
65 | } | ||
66 | |||
67 | struct rds_transport *rds_trans_get_preferred(__be32 addr) | ||
68 | { | ||
69 | struct rds_transport *trans; | ||
70 | struct rds_transport *ret = NULL; | ||
71 | |||
72 | if (IN_LOOPBACK(ntohl(addr))) | ||
73 | return &rds_loop_transport; | ||
74 | |||
75 | down_read(&rds_trans_sem); | ||
76 | list_for_each_entry(trans, &rds_transports, t_item) { | ||
77 | if (trans->laddr_check(addr) == 0) { | ||
78 | ret = trans; | ||
79 | break; | ||
80 | } | ||
81 | } | ||
82 | up_read(&rds_trans_sem); | ||
83 | |||
84 | return ret; | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * This returns the number of stats entries in the snapshot and only | ||
89 | * copies them using the iter if there is enough space for them. The | ||
90 | * caller passes in the global stats so that we can size and copy while | ||
91 | * holding the lock. | ||
92 | */ | ||
93 | unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, | ||
94 | unsigned int avail) | ||
95 | |||
96 | { | ||
97 | struct rds_transport *trans; | ||
98 | unsigned int total = 0; | ||
99 | unsigned int part; | ||
100 | |||
101 | rds_info_iter_unmap(iter); | ||
102 | down_read(&rds_trans_sem); | ||
103 | |||
104 | list_for_each_entry(trans, &rds_transports, t_item) { | ||
105 | if (trans->stats_info_copy == NULL) | ||
106 | continue; | ||
107 | |||
108 | part = trans->stats_info_copy(iter, avail); | ||
109 | avail -= min(avail, part); | ||
110 | total += part; | ||
111 | } | ||
112 | |||
113 | up_read(&rds_trans_sem); | ||
114 | |||
115 | return total; | ||
116 | } | ||
117 | |||
diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 5c72a116b1a4..f8f047b61245 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c | |||
@@ -183,13 +183,6 @@ override: | |||
183 | if (R_tab == NULL) | 183 | if (R_tab == NULL) |
184 | goto failure; | 184 | goto failure; |
185 | 185 | ||
186 | if (!est && (ret == ACT_P_CREATED || | ||
187 | !gen_estimator_active(&police->tcf_bstats, | ||
188 | &police->tcf_rate_est))) { | ||
189 | err = -EINVAL; | ||
190 | goto failure; | ||
191 | } | ||
192 | |||
193 | if (parm->peakrate.rate) { | 186 | if (parm->peakrate.rate) { |
194 | P_tab = qdisc_get_rtab(&parm->peakrate, | 187 | P_tab = qdisc_get_rtab(&parm->peakrate, |
195 | tb[TCA_POLICE_PEAKRATE]); | 188 | tb[TCA_POLICE_PEAKRATE]); |
@@ -205,6 +198,12 @@ override: | |||
205 | &police->tcf_lock, est); | 198 | &police->tcf_lock, est); |
206 | if (err) | 199 | if (err) |
207 | goto failure_unlock; | 200 | goto failure_unlock; |
201 | } else if (tb[TCA_POLICE_AVRATE] && | ||
202 | (ret == ACT_P_CREATED || | ||
203 | !gen_estimator_active(&police->tcf_bstats, | ||
204 | &police->tcf_rate_est))) { | ||
205 | err = -EINVAL; | ||
206 | goto failure_unlock; | ||
208 | } | 207 | } |
209 | 208 | ||
210 | /* No failure allowed after this point */ | 209 | /* No failure allowed after this point */ |
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 9e43ed949167..d728d8111732 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c | |||
@@ -1960,8 +1960,11 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) | |||
1960 | cbq_rmprio(q, cl); | 1960 | cbq_rmprio(q, cl); |
1961 | sch_tree_unlock(sch); | 1961 | sch_tree_unlock(sch); |
1962 | 1962 | ||
1963 | if (--cl->refcnt == 0) | 1963 | BUG_ON(--cl->refcnt == 0); |
1964 | cbq_destroy_class(sch, cl); | 1964 | /* |
1965 | * This shouldn't happen: we "hold" one cops->get() when called | ||
1966 | * from tc_ctl_tclass; the destroy method is done from cops->put(). | ||
1967 | */ | ||
1965 | 1968 | ||
1966 | return 0; | 1969 | return 0; |
1967 | } | 1970 | } |
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index f6b4fa97df70..7597fe146866 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c | |||
@@ -66,11 +66,15 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, | |||
66 | { | 66 | { |
67 | struct drr_sched *q = qdisc_priv(sch); | 67 | struct drr_sched *q = qdisc_priv(sch); |
68 | struct drr_class *cl = (struct drr_class *)*arg; | 68 | struct drr_class *cl = (struct drr_class *)*arg; |
69 | struct nlattr *opt = tca[TCA_OPTIONS]; | ||
69 | struct nlattr *tb[TCA_DRR_MAX + 1]; | 70 | struct nlattr *tb[TCA_DRR_MAX + 1]; |
70 | u32 quantum; | 71 | u32 quantum; |
71 | int err; | 72 | int err; |
72 | 73 | ||
73 | err = nla_parse_nested(tb, TCA_DRR_MAX, tca[TCA_OPTIONS], drr_policy); | 74 | if (!opt) |
75 | return -EINVAL; | ||
76 | |||
77 | err = nla_parse_nested(tb, TCA_DRR_MAX, opt, drr_policy); | ||
74 | if (err < 0) | 78 | if (err < 0) |
75 | return err; | 79 | return err; |
76 | 80 | ||
@@ -151,8 +155,11 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg) | |||
151 | drr_purge_queue(cl); | 155 | drr_purge_queue(cl); |
152 | qdisc_class_hash_remove(&q->clhash, &cl->common); | 156 | qdisc_class_hash_remove(&q->clhash, &cl->common); |
153 | 157 | ||
154 | if (--cl->refcnt == 0) | 158 | BUG_ON(--cl->refcnt == 0); |
155 | drr_destroy_class(sch, cl); | 159 | /* |
160 | * This shouldn't happen: we "hold" one cops->get() when called | ||
161 | * from tc_ctl_tclass; the destroy method is done from cops->put(). | ||
162 | */ | ||
156 | 163 | ||
157 | sch_tree_unlock(sch); | 164 | sch_tree_unlock(sch); |
158 | return 0; | 165 | return 0; |
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 74226b265528..5022f9c1f34b 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c | |||
@@ -1139,8 +1139,11 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) | |||
1139 | hfsc_purge_queue(sch, cl); | 1139 | hfsc_purge_queue(sch, cl); |
1140 | qdisc_class_hash_remove(&q->clhash, &cl->cl_common); | 1140 | qdisc_class_hash_remove(&q->clhash, &cl->cl_common); |
1141 | 1141 | ||
1142 | if (--cl->refcnt == 0) | 1142 | BUG_ON(--cl->refcnt == 0); |
1143 | hfsc_destroy_class(sch, cl); | 1143 | /* |
1144 | * This shouldn't happen: we "hold" one cops->get() when called | ||
1145 | * from tc_ctl_tclass; the destroy method is done from cops->put(). | ||
1146 | */ | ||
1144 | 1147 | ||
1145 | sch_tree_unlock(sch); | 1148 | sch_tree_unlock(sch); |
1146 | return 0; | 1149 | return 0; |
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 355974f610c5..88cd02626621 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c | |||
@@ -1275,8 +1275,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) | |||
1275 | if (last_child) | 1275 | if (last_child) |
1276 | htb_parent_to_leaf(q, cl, new_q); | 1276 | htb_parent_to_leaf(q, cl, new_q); |
1277 | 1277 | ||
1278 | if (--cl->refcnt == 0) | 1278 | BUG_ON(--cl->refcnt == 0); |
1279 | htb_destroy_class(sch, cl); | 1279 | /* |
1280 | * This shouldn't happen: we "hold" one cops->get() when called | ||
1281 | * from tc_ctl_tclass; the destroy method is done from cops->put(). | ||
1282 | */ | ||
1280 | 1283 | ||
1281 | sch_tree_unlock(sch); | 1284 | sch_tree_unlock(sch); |
1282 | return 0; | 1285 | return 0; |
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index a2f93c09f3cc..e22dfe85e43e 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c | |||
@@ -236,7 +236,6 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) | |||
236 | struct tc_tbf_qopt *qopt; | 236 | struct tc_tbf_qopt *qopt; |
237 | struct qdisc_rate_table *rtab = NULL; | 237 | struct qdisc_rate_table *rtab = NULL; |
238 | struct qdisc_rate_table *ptab = NULL; | 238 | struct qdisc_rate_table *ptab = NULL; |
239 | struct qdisc_rate_table *tmp; | ||
240 | struct Qdisc *child = NULL; | 239 | struct Qdisc *child = NULL; |
241 | int max_size,n; | 240 | int max_size,n; |
242 | 241 | ||
@@ -295,13 +294,9 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) | |||
295 | q->tokens = q->buffer; | 294 | q->tokens = q->buffer; |
296 | q->ptokens = q->mtu; | 295 | q->ptokens = q->mtu; |
297 | 296 | ||
298 | tmp = q->R_tab; | 297 | swap(q->R_tab, rtab); |
299 | q->R_tab = rtab; | 298 | swap(q->P_tab, ptab); |
300 | rtab = tmp; | ||
301 | 299 | ||
302 | tmp = q->P_tab; | ||
303 | q->P_tab = ptab; | ||
304 | ptab = tmp; | ||
305 | sch_tree_unlock(sch); | 300 | sch_tree_unlock(sch); |
306 | err = 0; | 301 | err = 0; |
307 | done: | 302 | done: |
diff --git a/net/sctp/debug.c b/net/sctp/debug.c index 67715f4eb849..7ff548a30cfb 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c | |||
@@ -86,6 +86,9 @@ const char *sctp_cname(const sctp_subtype_t cid) | |||
86 | case SCTP_CID_FWD_TSN: | 86 | case SCTP_CID_FWD_TSN: |
87 | return "FWD_TSN"; | 87 | return "FWD_TSN"; |
88 | 88 | ||
89 | case SCTP_CID_AUTH: | ||
90 | return "AUTH"; | ||
91 | |||
89 | default: | 92 | default: |
90 | break; | 93 | break; |
91 | } | 94 | } |
@@ -135,6 +138,7 @@ static const char *sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = { | |||
135 | "PRIMITIVE_ABORT", | 138 | "PRIMITIVE_ABORT", |
136 | "PRIMITIVE_SEND", | 139 | "PRIMITIVE_SEND", |
137 | "PRIMITIVE_REQUESTHEARTBEAT", | 140 | "PRIMITIVE_REQUESTHEARTBEAT", |
141 | "PRIMITIVE_ASCONF", | ||
138 | }; | 142 | }; |
139 | 143 | ||
140 | /* Lookup primitive debug name. */ | 144 | /* Lookup primitive debug name. */ |
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 4c8d9f45ce09..905fda582b92 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c | |||
@@ -111,7 +111,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, | |||
111 | if (sctp_addip_enable) { | 111 | if (sctp_addip_enable) { |
112 | auth_chunks->chunks[0] = SCTP_CID_ASCONF; | 112 | auth_chunks->chunks[0] = SCTP_CID_ASCONF; |
113 | auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; | 113 | auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; |
114 | auth_chunks->param_hdr.length += htons(2); | 114 | auth_chunks->param_hdr.length = |
115 | htons(sizeof(sctp_paramhdr_t) + 2); | ||
115 | } | 116 | } |
116 | } | 117 | } |
117 | 118 | ||
diff --git a/net/sctp/output.c b/net/sctp/output.c index 07d58903a746..7d08f522ec84 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c | |||
@@ -49,13 +49,10 @@ | |||
49 | #include <linux/ipv6.h> | 49 | #include <linux/ipv6.h> |
50 | #include <linux/init.h> | 50 | #include <linux/init.h> |
51 | #include <net/inet_ecn.h> | 51 | #include <net/inet_ecn.h> |
52 | #include <net/ip.h> | ||
52 | #include <net/icmp.h> | 53 | #include <net/icmp.h> |
53 | #include <net/net_namespace.h> | 54 | #include <net/net_namespace.h> |
54 | 55 | ||
55 | #ifndef TEST_FRAME | ||
56 | #include <net/tcp.h> | ||
57 | #endif /* TEST_FRAME (not defined) */ | ||
58 | |||
59 | #include <linux/socket.h> /* for sa_family_t */ | 56 | #include <linux/socket.h> /* for sa_family_t */ |
60 | #include <net/sock.h> | 57 | #include <net/sock.h> |
61 | 58 | ||
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index bc411c896216..d765fc53e74d 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c | |||
@@ -428,7 +428,8 @@ void sctp_retransmit_mark(struct sctp_outq *q, | |||
428 | * retransmitting due to T3 timeout. | 428 | * retransmitting due to T3 timeout. |
429 | */ | 429 | */ |
430 | if (reason == SCTP_RTXR_T3_RTX && | 430 | if (reason == SCTP_RTXR_T3_RTX && |
431 | (jiffies - chunk->sent_at) < transport->last_rto) | 431 | time_before(jiffies, chunk->sent_at + |
432 | transport->last_rto)) | ||
432 | continue; | 433 | continue; |
433 | 434 | ||
434 | /* RFC 2960 6.2.1 Processing a Received SACK | 435 | /* RFC 2960 6.2.1 Processing a Received SACK |
@@ -1757,6 +1758,9 @@ static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn) | |||
1757 | struct sctp_chunk *chunk; | 1758 | struct sctp_chunk *chunk; |
1758 | struct list_head *lchunk, *temp; | 1759 | struct list_head *lchunk, *temp; |
1759 | 1760 | ||
1761 | if (!asoc->peer.prsctp_capable) | ||
1762 | return; | ||
1763 | |||
1760 | /* PR-SCTP C1) Let SackCumAck be the Cumulative TSN ACK carried in the | 1764 | /* PR-SCTP C1) Let SackCumAck be the Cumulative TSN ACK carried in the |
1761 | * received SACK. | 1765 | * received SACK. |
1762 | * | 1766 | * |
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index c1e316ee7155..cb198af8887c 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c | |||
@@ -692,15 +692,20 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, | |||
692 | static int sctp_ctl_sock_init(void) | 692 | static int sctp_ctl_sock_init(void) |
693 | { | 693 | { |
694 | int err; | 694 | int err; |
695 | sa_family_t family; | 695 | sa_family_t family = PF_INET; |
696 | 696 | ||
697 | if (sctp_get_pf_specific(PF_INET6)) | 697 | if (sctp_get_pf_specific(PF_INET6)) |
698 | family = PF_INET6; | 698 | family = PF_INET6; |
699 | else | ||
700 | family = PF_INET; | ||
701 | 699 | ||
702 | err = inet_ctl_sock_create(&sctp_ctl_sock, family, | 700 | err = inet_ctl_sock_create(&sctp_ctl_sock, family, |
703 | SOCK_SEQPACKET, IPPROTO_SCTP, &init_net); | 701 | SOCK_SEQPACKET, IPPROTO_SCTP, &init_net); |
702 | |||
703 | /* If IPv6 socket could not be created, try the IPv4 socket */ | ||
704 | if (err < 0 && family == PF_INET6) | ||
705 | err = inet_ctl_sock_create(&sctp_ctl_sock, AF_INET, | ||
706 | SOCK_SEQPACKET, IPPROTO_SCTP, | ||
707 | &init_net); | ||
708 | |||
704 | if (err < 0) { | 709 | if (err < 0) { |
705 | printk(KERN_ERR | 710 | printk(KERN_ERR |
706 | "SCTP: Failed to create the SCTP control socket.\n"); | 711 | "SCTP: Failed to create the SCTP control socket.\n"); |
@@ -1297,9 +1302,8 @@ SCTP_STATIC __init int sctp_init(void) | |||
1297 | out: | 1302 | out: |
1298 | return status; | 1303 | return status; |
1299 | err_v6_add_protocol: | 1304 | err_v6_add_protocol: |
1300 | sctp_v6_del_protocol(); | ||
1301 | err_add_protocol: | ||
1302 | sctp_v4_del_protocol(); | 1305 | sctp_v4_del_protocol(); |
1306 | err_add_protocol: | ||
1303 | inet_ctl_sock_destroy(sctp_ctl_sock); | 1307 | inet_ctl_sock_destroy(sctp_ctl_sock); |
1304 | err_ctl_sock_init: | 1308 | err_ctl_sock_init: |
1305 | sctp_v6_protosw_exit(); | 1309 | sctp_v6_protosw_exit(); |
@@ -1310,7 +1314,6 @@ err_protosw_init: | |||
1310 | sctp_v4_pf_exit(); | 1314 | sctp_v4_pf_exit(); |
1311 | sctp_v6_pf_exit(); | 1315 | sctp_v6_pf_exit(); |
1312 | sctp_sysctl_unregister(); | 1316 | sctp_sysctl_unregister(); |
1313 | list_del(&sctp_af_inet.list); | ||
1314 | free_pages((unsigned long)sctp_port_hashtable, | 1317 | free_pages((unsigned long)sctp_port_hashtable, |
1315 | get_order(sctp_port_hashsize * | 1318 | get_order(sctp_port_hashsize * |
1316 | sizeof(struct sctp_bind_hashbucket))); | 1319 | sizeof(struct sctp_bind_hashbucket))); |
@@ -1358,7 +1361,6 @@ SCTP_STATIC __exit void sctp_exit(void) | |||
1358 | sctp_v4_pf_exit(); | 1361 | sctp_v4_pf_exit(); |
1359 | 1362 | ||
1360 | sctp_sysctl_unregister(); | 1363 | sctp_sysctl_unregister(); |
1361 | list_del(&sctp_af_inet.list); | ||
1362 | 1364 | ||
1363 | free_pages((unsigned long)sctp_assoc_hashtable, | 1365 | free_pages((unsigned long)sctp_assoc_hashtable, |
1364 | get_order(sctp_assoc_hashsize * | 1366 | get_order(sctp_assoc_hashsize * |
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index b40e95f9851b..6851ee94e974 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c | |||
@@ -224,7 +224,9 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, | |||
224 | num_ext += 2; | 224 | num_ext += 2; |
225 | } | 225 | } |
226 | 226 | ||
227 | chunksize += sizeof(aiparam); | 227 | if (sp->adaptation_ind) |
228 | chunksize += sizeof(aiparam); | ||
229 | |||
228 | chunksize += vparam_len; | 230 | chunksize += vparam_len; |
229 | 231 | ||
230 | /* Account for AUTH related parameters */ | 232 | /* Account for AUTH related parameters */ |
@@ -304,10 +306,12 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, | |||
304 | if (sctp_prsctp_enable) | 306 | if (sctp_prsctp_enable) |
305 | sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); | 307 | sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); |
306 | 308 | ||
307 | aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; | 309 | if (sp->adaptation_ind) { |
308 | aiparam.param_hdr.length = htons(sizeof(aiparam)); | 310 | aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; |
309 | aiparam.adaptation_ind = htonl(sp->adaptation_ind); | 311 | aiparam.param_hdr.length = htons(sizeof(aiparam)); |
310 | sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); | 312 | aiparam.adaptation_ind = htonl(sp->adaptation_ind); |
313 | sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); | ||
314 | } | ||
311 | 315 | ||
312 | /* Add SCTP-AUTH chunks to the parameter list */ | 316 | /* Add SCTP-AUTH chunks to the parameter list */ |
313 | if (sctp_auth_enable) { | 317 | if (sctp_auth_enable) { |
@@ -332,6 +336,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, | |||
332 | sctp_inithdr_t initack; | 336 | sctp_inithdr_t initack; |
333 | struct sctp_chunk *retval; | 337 | struct sctp_chunk *retval; |
334 | union sctp_params addrs; | 338 | union sctp_params addrs; |
339 | struct sctp_sock *sp; | ||
335 | int addrs_len; | 340 | int addrs_len; |
336 | sctp_cookie_param_t *cookie; | 341 | sctp_cookie_param_t *cookie; |
337 | int cookie_len; | 342 | int cookie_len; |
@@ -366,22 +371,24 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, | |||
366 | /* Calculate the total size of allocation, include the reserved | 371 | /* Calculate the total size of allocation, include the reserved |
367 | * space for reporting unknown parameters if it is specified. | 372 | * space for reporting unknown parameters if it is specified. |
368 | */ | 373 | */ |
374 | sp = sctp_sk(asoc->base.sk); | ||
369 | chunksize = sizeof(initack) + addrs_len + cookie_len + unkparam_len; | 375 | chunksize = sizeof(initack) + addrs_len + cookie_len + unkparam_len; |
370 | 376 | ||
371 | /* Tell peer that we'll do ECN only if peer advertised such cap. */ | 377 | /* Tell peer that we'll do ECN only if peer advertised such cap. */ |
372 | if (asoc->peer.ecn_capable) | 378 | if (asoc->peer.ecn_capable) |
373 | chunksize += sizeof(ecap_param); | 379 | chunksize += sizeof(ecap_param); |
374 | 380 | ||
375 | if (sctp_prsctp_enable) | 381 | if (asoc->peer.prsctp_capable) |
376 | chunksize += sizeof(prsctp_param); | 382 | chunksize += sizeof(prsctp_param); |
377 | 383 | ||
378 | if (sctp_addip_enable) { | 384 | if (asoc->peer.asconf_capable) { |
379 | extensions[num_ext] = SCTP_CID_ASCONF; | 385 | extensions[num_ext] = SCTP_CID_ASCONF; |
380 | extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; | 386 | extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; |
381 | num_ext += 2; | 387 | num_ext += 2; |
382 | } | 388 | } |
383 | 389 | ||
384 | chunksize += sizeof(aiparam); | 390 | if (sp->adaptation_ind) |
391 | chunksize += sizeof(aiparam); | ||
385 | 392 | ||
386 | if (asoc->peer.auth_capable) { | 393 | if (asoc->peer.auth_capable) { |
387 | auth_random = (sctp_paramhdr_t *)asoc->c.auth_random; | 394 | auth_random = (sctp_paramhdr_t *)asoc->c.auth_random; |
@@ -432,10 +439,12 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, | |||
432 | if (asoc->peer.prsctp_capable) | 439 | if (asoc->peer.prsctp_capable) |
433 | sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); | 440 | sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); |
434 | 441 | ||
435 | aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; | 442 | if (sp->adaptation_ind) { |
436 | aiparam.param_hdr.length = htons(sizeof(aiparam)); | 443 | aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; |
437 | aiparam.adaptation_ind = htonl(sctp_sk(asoc->base.sk)->adaptation_ind); | 444 | aiparam.param_hdr.length = htons(sizeof(aiparam)); |
438 | sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); | 445 | aiparam.adaptation_ind = htonl(sp->adaptation_ind); |
446 | sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); | ||
447 | } | ||
439 | 448 | ||
440 | if (asoc->peer.auth_capable) { | 449 | if (asoc->peer.auth_capable) { |
441 | sctp_addto_chunk(retval, ntohs(auth_random->length), | 450 | sctp_addto_chunk(retval, ntohs(auth_random->length), |
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 0146cfb1f182..e2020eb2c8ca 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c | |||
@@ -434,7 +434,8 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = { | |||
434 | * | 434 | * |
435 | */ | 435 | */ |
436 | static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, | 436 | static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, |
437 | struct sctp_transport *transport) | 437 | struct sctp_transport *transport, |
438 | int is_hb) | ||
438 | { | 439 | { |
439 | /* The check for association's overall error counter exceeding the | 440 | /* The check for association's overall error counter exceeding the |
440 | * threshold is done in the state function. | 441 | * threshold is done in the state function. |
@@ -466,7 +467,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, | |||
466 | * The first unacknowleged HB triggers it. We do this with a flag | 467 | * The first unacknowleged HB triggers it. We do this with a flag |
467 | * that indicates that we have an outstanding HB. | 468 | * that indicates that we have an outstanding HB. |
468 | */ | 469 | */ |
469 | if (transport->hb_sent) { | 470 | if (!is_hb || transport->hb_sent) { |
470 | transport->last_rto = transport->rto; | 471 | transport->last_rto = transport->rto; |
471 | transport->rto = min((transport->rto * 2), transport->asoc->rto_max); | 472 | transport->rto = min((transport->rto * 2), transport->asoc->rto_max); |
472 | } | 473 | } |
@@ -657,20 +658,6 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, | |||
657 | sctp_transport_hold(t); | 658 | sctp_transport_hold(t); |
658 | } | 659 | } |
659 | 660 | ||
660 | /* Helper function to do a transport reset at the expiry of the hearbeat | ||
661 | * timer. | ||
662 | */ | ||
663 | static void sctp_cmd_transport_reset(sctp_cmd_seq_t *cmds, | ||
664 | struct sctp_association *asoc, | ||
665 | struct sctp_transport *t) | ||
666 | { | ||
667 | sctp_transport_lower_cwnd(t, SCTP_LOWER_CWND_INACTIVE); | ||
668 | |||
669 | /* Mark one strike against a transport. */ | ||
670 | sctp_do_8_2_transport_strike(asoc, t); | ||
671 | |||
672 | t->hb_sent = 1; | ||
673 | } | ||
674 | 661 | ||
675 | /* Helper function to process the process SACK command. */ | 662 | /* Helper function to process the process SACK command. */ |
676 | static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, | 663 | static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, |
@@ -800,36 +787,48 @@ static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds, | |||
800 | struct sctp_association *asoc, | 787 | struct sctp_association *asoc, |
801 | struct sctp_chunk *chunk) | 788 | struct sctp_chunk *chunk) |
802 | { | 789 | { |
803 | struct sctp_operr_chunk *operr_chunk; | ||
804 | struct sctp_errhdr *err_hdr; | 790 | struct sctp_errhdr *err_hdr; |
791 | struct sctp_ulpevent *ev; | ||
805 | 792 | ||
806 | operr_chunk = (struct sctp_operr_chunk *)chunk->chunk_hdr; | 793 | while (chunk->chunk_end > chunk->skb->data) { |
807 | err_hdr = &operr_chunk->err_hdr; | 794 | err_hdr = (struct sctp_errhdr *)(chunk->skb->data); |
808 | 795 | ||
809 | switch (err_hdr->cause) { | 796 | ev = sctp_ulpevent_make_remote_error(asoc, chunk, 0, |
810 | case SCTP_ERROR_UNKNOWN_CHUNK: | 797 | GFP_ATOMIC); |
811 | { | 798 | if (!ev) |
812 | struct sctp_chunkhdr *unk_chunk_hdr; | 799 | return; |
813 | 800 | ||
814 | unk_chunk_hdr = (struct sctp_chunkhdr *)err_hdr->variable; | 801 | sctp_ulpq_tail_event(&asoc->ulpq, ev); |
815 | switch (unk_chunk_hdr->type) { | 802 | |
816 | /* ADDIP 4.1 A9) If the peer responds to an ASCONF with an | 803 | switch (err_hdr->cause) { |
817 | * ERROR chunk reporting that it did not recognized the ASCONF | 804 | case SCTP_ERROR_UNKNOWN_CHUNK: |
818 | * chunk type, the sender of the ASCONF MUST NOT send any | 805 | { |
819 | * further ASCONF chunks and MUST stop its T-4 timer. | 806 | sctp_chunkhdr_t *unk_chunk_hdr; |
820 | */ | 807 | |
821 | case SCTP_CID_ASCONF: | 808 | unk_chunk_hdr = (sctp_chunkhdr_t *)err_hdr->variable; |
822 | asoc->peer.asconf_capable = 0; | 809 | switch (unk_chunk_hdr->type) { |
823 | sctp_add_cmd_sf(cmds, SCTP_CMD_TIMER_STOP, | 810 | /* ADDIP 4.1 A9) If the peer responds to an ASCONF with |
811 | * an ERROR chunk reporting that it did not recognized | ||
812 | * the ASCONF chunk type, the sender of the ASCONF MUST | ||
813 | * NOT send any further ASCONF chunks and MUST stop its | ||
814 | * T-4 timer. | ||
815 | */ | ||
816 | case SCTP_CID_ASCONF: | ||
817 | if (asoc->peer.asconf_capable == 0) | ||
818 | break; | ||
819 | |||
820 | asoc->peer.asconf_capable = 0; | ||
821 | sctp_add_cmd_sf(cmds, SCTP_CMD_TIMER_STOP, | ||
824 | SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); | 822 | SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); |
823 | break; | ||
824 | default: | ||
825 | break; | ||
826 | } | ||
825 | break; | 827 | break; |
828 | } | ||
826 | default: | 829 | default: |
827 | break; | 830 | break; |
828 | } | 831 | } |
829 | break; | ||
830 | } | ||
831 | default: | ||
832 | break; | ||
833 | } | 832 | } |
834 | } | 833 | } |
835 | 834 | ||
@@ -1459,12 +1458,19 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, | |||
1459 | 1458 | ||
1460 | case SCTP_CMD_STRIKE: | 1459 | case SCTP_CMD_STRIKE: |
1461 | /* Mark one strike against a transport. */ | 1460 | /* Mark one strike against a transport. */ |
1462 | sctp_do_8_2_transport_strike(asoc, cmd->obj.transport); | 1461 | sctp_do_8_2_transport_strike(asoc, cmd->obj.transport, |
1462 | 0); | ||
1463 | break; | ||
1464 | |||
1465 | case SCTP_CMD_TRANSPORT_IDLE: | ||
1466 | t = cmd->obj.transport; | ||
1467 | sctp_transport_lower_cwnd(t, SCTP_LOWER_CWND_INACTIVE); | ||
1463 | break; | 1468 | break; |
1464 | 1469 | ||
1465 | case SCTP_CMD_TRANSPORT_RESET: | 1470 | case SCTP_CMD_TRANSPORT_HB_SENT: |
1466 | t = cmd->obj.transport; | 1471 | t = cmd->obj.transport; |
1467 | sctp_cmd_transport_reset(commands, asoc, t); | 1472 | sctp_do_8_2_transport_strike(asoc, t, 1); |
1473 | t->hb_sent = 1; | ||
1468 | break; | 1474 | break; |
1469 | 1475 | ||
1470 | case SCTP_CMD_TRANSPORT_ON: | 1476 | case SCTP_CMD_TRANSPORT_ON: |
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 3a0cd075914f..55a61aa69662 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c | |||
@@ -988,7 +988,9 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, | |||
988 | /* Set transport error counter and association error counter | 988 | /* Set transport error counter and association error counter |
989 | * when sending heartbeat. | 989 | * when sending heartbeat. |
990 | */ | 990 | */ |
991 | sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_RESET, | 991 | sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE, |
992 | SCTP_TRANSPORT(transport)); | ||
993 | sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT, | ||
992 | SCTP_TRANSPORT(transport)); | 994 | SCTP_TRANSPORT(transport)); |
993 | } | 995 | } |
994 | sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE, | 996 | sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE, |
@@ -3163,7 +3165,6 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, | |||
3163 | sctp_cmd_seq_t *commands) | 3165 | sctp_cmd_seq_t *commands) |
3164 | { | 3166 | { |
3165 | struct sctp_chunk *chunk = arg; | 3167 | struct sctp_chunk *chunk = arg; |
3166 | struct sctp_ulpevent *ev; | ||
3167 | 3168 | ||
3168 | if (!sctp_vtag_verify(chunk, asoc)) | 3169 | if (!sctp_vtag_verify(chunk, asoc)) |
3169 | return sctp_sf_pdiscard(ep, asoc, type, arg, commands); | 3170 | return sctp_sf_pdiscard(ep, asoc, type, arg, commands); |
@@ -3173,21 +3174,10 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, | |||
3173 | return sctp_sf_violation_chunklen(ep, asoc, type, arg, | 3174 | return sctp_sf_violation_chunklen(ep, asoc, type, arg, |
3174 | commands); | 3175 | commands); |
3175 | 3176 | ||
3176 | while (chunk->chunk_end > chunk->skb->data) { | 3177 | sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR, |
3177 | ev = sctp_ulpevent_make_remote_error(asoc, chunk, 0, | 3178 | SCTP_CHUNK(chunk)); |
3178 | GFP_ATOMIC); | ||
3179 | if (!ev) | ||
3180 | goto nomem; | ||
3181 | 3179 | ||
3182 | sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, | ||
3183 | SCTP_ULPEVENT(ev)); | ||
3184 | sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR, | ||
3185 | SCTP_CHUNK(chunk)); | ||
3186 | } | ||
3187 | return SCTP_DISPOSITION_CONSUME; | 3180 | return SCTP_DISPOSITION_CONSUME; |
3188 | |||
3189 | nomem: | ||
3190 | return SCTP_DISPOSITION_NOMEM; | ||
3191 | } | 3181 | } |
3192 | 3182 | ||
3193 | /* | 3183 | /* |
@@ -4967,7 +4957,7 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat( | |||
4967 | * to that address and not acknowledged within one RTO. | 4957 | * to that address and not acknowledged within one RTO. |
4968 | * | 4958 | * |
4969 | */ | 4959 | */ |
4970 | sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_RESET, | 4960 | sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT, |
4971 | SCTP_TRANSPORT(arg)); | 4961 | SCTP_TRANSPORT(arg)); |
4972 | return SCTP_DISPOSITION_CONSUME; | 4962 | return SCTP_DISPOSITION_CONSUME; |
4973 | } | 4963 | } |
diff --git a/net/sctp/socket.c b/net/sctp/socket.c index dea864f5de54..5fb3a8c9792e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c | |||
@@ -3069,9 +3069,6 @@ static int sctp_setsockopt_maxburst(struct sock *sk, | |||
3069 | int val; | 3069 | int val; |
3070 | int assoc_id = 0; | 3070 | int assoc_id = 0; |
3071 | 3071 | ||
3072 | if (optlen < sizeof(int)) | ||
3073 | return -EINVAL; | ||
3074 | |||
3075 | if (optlen == sizeof(int)) { | 3072 | if (optlen == sizeof(int)) { |
3076 | printk(KERN_WARNING | 3073 | printk(KERN_WARNING |
3077 | "SCTP: Use of int in max_burst socket option deprecated\n"); | 3074 | "SCTP: Use of int in max_burst socket option deprecated\n"); |
@@ -5283,16 +5280,14 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, | |||
5283 | struct sctp_sock *sp; | 5280 | struct sctp_sock *sp; |
5284 | struct sctp_association *asoc; | 5281 | struct sctp_association *asoc; |
5285 | 5282 | ||
5286 | if (len < sizeof(int)) | ||
5287 | return -EINVAL; | ||
5288 | |||
5289 | if (len == sizeof(int)) { | 5283 | if (len == sizeof(int)) { |
5290 | printk(KERN_WARNING | 5284 | printk(KERN_WARNING |
5291 | "SCTP: Use of int in max_burst socket option deprecated\n"); | 5285 | "SCTP: Use of int in max_burst socket option deprecated\n"); |
5292 | printk(KERN_WARNING | 5286 | printk(KERN_WARNING |
5293 | "SCTP: Use struct sctp_assoc_value instead\n"); | 5287 | "SCTP: Use struct sctp_assoc_value instead\n"); |
5294 | params.assoc_id = 0; | 5288 | params.assoc_id = 0; |
5295 | } else if (len == sizeof (struct sctp_assoc_value)) { | 5289 | } else if (len >= sizeof(struct sctp_assoc_value)) { |
5290 | len = sizeof(struct sctp_assoc_value); | ||
5296 | if (copy_from_user(¶ms, optval, len)) | 5291 | if (copy_from_user(¶ms, optval, len)) |
5297 | return -EFAULT; | 5292 | return -EFAULT; |
5298 | } else | 5293 | } else |
@@ -5848,37 +5843,28 @@ static int sctp_get_port(struct sock *sk, unsigned short snum) | |||
5848 | } | 5843 | } |
5849 | 5844 | ||
5850 | /* | 5845 | /* |
5851 | * 3.1.3 listen() - UDP Style Syntax | 5846 | * Move a socket to LISTENING state. |
5852 | * | ||
5853 | * By default, new associations are not accepted for UDP style sockets. | ||
5854 | * An application uses listen() to mark a socket as being able to | ||
5855 | * accept new associations. | ||
5856 | */ | 5847 | */ |
5857 | SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) | 5848 | SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) |
5858 | { | 5849 | { |
5859 | struct sctp_sock *sp = sctp_sk(sk); | 5850 | struct sctp_sock *sp = sctp_sk(sk); |
5860 | struct sctp_endpoint *ep = sp->ep; | 5851 | struct sctp_endpoint *ep = sp->ep; |
5852 | struct crypto_hash *tfm = NULL; | ||
5861 | 5853 | ||
5862 | /* Only UDP style sockets that are not peeled off are allowed to | 5854 | /* Allocate HMAC for generating cookie. */ |
5863 | * listen(). | 5855 | if (!sctp_sk(sk)->hmac && sctp_hmac_alg) { |
5864 | */ | 5856 | tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); |
5865 | if (!sctp_style(sk, UDP)) | 5857 | if (IS_ERR(tfm)) { |
5866 | return -EINVAL; | 5858 | if (net_ratelimit()) { |
5867 | 5859 | printk(KERN_INFO | |
5868 | /* If backlog is zero, disable listening. */ | 5860 | "SCTP: failed to load transform for %s: %ld\n", |
5869 | if (!backlog) { | 5861 | sctp_hmac_alg, PTR_ERR(tfm)); |
5870 | if (sctp_sstate(sk, CLOSED)) | 5862 | } |
5871 | return 0; | 5863 | return -ENOSYS; |
5872 | 5864 | } | |
5873 | sctp_unhash_endpoint(ep); | 5865 | sctp_sk(sk)->hmac = tfm; |
5874 | sk->sk_state = SCTP_SS_CLOSED; | ||
5875 | return 0; | ||
5876 | } | 5866 | } |
5877 | 5867 | ||
5878 | /* Return if we are already listening. */ | ||
5879 | if (sctp_sstate(sk, LISTENING)) | ||
5880 | return 0; | ||
5881 | |||
5882 | /* | 5868 | /* |
5883 | * If a bind() or sctp_bindx() is not called prior to a listen() | 5869 | * If a bind() or sctp_bindx() is not called prior to a listen() |
5884 | * call that allows new associations to be accepted, the system | 5870 | * call that allows new associations to be accepted, the system |
@@ -5889,7 +5875,6 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) | |||
5889 | * extensions draft, but follows the practice as seen in TCP | 5875 | * extensions draft, but follows the practice as seen in TCP |
5890 | * sockets. | 5876 | * sockets. |
5891 | * | 5877 | * |
5892 | * Additionally, turn off fastreuse flag since we are not listening | ||
5893 | */ | 5878 | */ |
5894 | sk->sk_state = SCTP_SS_LISTENING; | 5879 | sk->sk_state = SCTP_SS_LISTENING; |
5895 | if (!ep->base.bind_addr.port) { | 5880 | if (!ep->base.bind_addr.port) { |
@@ -5900,113 +5885,71 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) | |||
5900 | sk->sk_state = SCTP_SS_CLOSED; | 5885 | sk->sk_state = SCTP_SS_CLOSED; |
5901 | return -EADDRINUSE; | 5886 | return -EADDRINUSE; |
5902 | } | 5887 | } |
5903 | sctp_sk(sk)->bind_hash->fastreuse = 0; | ||
5904 | } | 5888 | } |
5905 | 5889 | ||
5906 | sctp_hash_endpoint(ep); | ||
5907 | return 0; | ||
5908 | } | ||
5909 | |||
5910 | /* | ||
5911 | * 4.1.3 listen() - TCP Style Syntax | ||
5912 | * | ||
5913 | * Applications uses listen() to ready the SCTP endpoint for accepting | ||
5914 | * inbound associations. | ||
5915 | */ | ||
5916 | SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog) | ||
5917 | { | ||
5918 | struct sctp_sock *sp = sctp_sk(sk); | ||
5919 | struct sctp_endpoint *ep = sp->ep; | ||
5920 | |||
5921 | /* If backlog is zero, disable listening. */ | ||
5922 | if (!backlog) { | ||
5923 | if (sctp_sstate(sk, CLOSED)) | ||
5924 | return 0; | ||
5925 | |||
5926 | sctp_unhash_endpoint(ep); | ||
5927 | sk->sk_state = SCTP_SS_CLOSED; | ||
5928 | return 0; | ||
5929 | } | ||
5930 | |||
5931 | if (sctp_sstate(sk, LISTENING)) | ||
5932 | return 0; | ||
5933 | |||
5934 | /* | ||
5935 | * If a bind() or sctp_bindx() is not called prior to a listen() | ||
5936 | * call that allows new associations to be accepted, the system | ||
5937 | * picks an ephemeral port and will choose an address set equivalent | ||
5938 | * to binding with a wildcard address. | ||
5939 | * | ||
5940 | * This is not currently spelled out in the SCTP sockets | ||
5941 | * extensions draft, but follows the practice as seen in TCP | ||
5942 | * sockets. | ||
5943 | */ | ||
5944 | sk->sk_state = SCTP_SS_LISTENING; | ||
5945 | if (!ep->base.bind_addr.port) { | ||
5946 | if (sctp_autobind(sk)) | ||
5947 | return -EAGAIN; | ||
5948 | } else | ||
5949 | sctp_sk(sk)->bind_hash->fastreuse = 0; | ||
5950 | |||
5951 | sk->sk_max_ack_backlog = backlog; | 5890 | sk->sk_max_ack_backlog = backlog; |
5952 | sctp_hash_endpoint(ep); | 5891 | sctp_hash_endpoint(ep); |
5953 | return 0; | 5892 | return 0; |
5954 | } | 5893 | } |
5955 | 5894 | ||
5956 | /* | 5895 | /* |
5896 | * 4.1.3 / 5.1.3 listen() | ||
5897 | * | ||
5898 | * By default, new associations are not accepted for UDP style sockets. | ||
5899 | * An application uses listen() to mark a socket as being able to | ||
5900 | * accept new associations. | ||
5901 | * | ||
5902 | * On TCP style sockets, applications use listen() to ready the SCTP | ||
5903 | * endpoint for accepting inbound associations. | ||
5904 | * | ||
5905 | * On both types of endpoints a backlog of '0' disables listening. | ||
5906 | * | ||
5957 | * Move a socket to LISTENING state. | 5907 | * Move a socket to LISTENING state. |
5958 | */ | 5908 | */ |
5959 | int sctp_inet_listen(struct socket *sock, int backlog) | 5909 | int sctp_inet_listen(struct socket *sock, int backlog) |
5960 | { | 5910 | { |
5961 | struct sock *sk = sock->sk; | 5911 | struct sock *sk = sock->sk; |
5962 | struct crypto_hash *tfm = NULL; | 5912 | struct sctp_endpoint *ep = sctp_sk(sk)->ep; |
5963 | int err = -EINVAL; | 5913 | int err = -EINVAL; |
5964 | 5914 | ||
5965 | if (unlikely(backlog < 0)) | 5915 | if (unlikely(backlog < 0)) |
5966 | goto out; | 5916 | return err; |
5967 | 5917 | ||
5968 | sctp_lock_sock(sk); | 5918 | sctp_lock_sock(sk); |
5969 | 5919 | ||
5920 | /* Peeled-off sockets are not allowed to listen(). */ | ||
5921 | if (sctp_style(sk, UDP_HIGH_BANDWIDTH)) | ||
5922 | goto out; | ||
5923 | |||
5970 | if (sock->state != SS_UNCONNECTED) | 5924 | if (sock->state != SS_UNCONNECTED) |
5971 | goto out; | 5925 | goto out; |
5972 | 5926 | ||
5973 | /* Allocate HMAC for generating cookie. */ | 5927 | /* If backlog is zero, disable listening. */ |
5974 | if (!sctp_sk(sk)->hmac && sctp_hmac_alg) { | 5928 | if (!backlog) { |
5975 | tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); | 5929 | if (sctp_sstate(sk, CLOSED)) |
5976 | if (IS_ERR(tfm)) { | ||
5977 | if (net_ratelimit()) { | ||
5978 | printk(KERN_INFO | ||
5979 | "SCTP: failed to load transform for %s: %ld\n", | ||
5980 | sctp_hmac_alg, PTR_ERR(tfm)); | ||
5981 | } | ||
5982 | err = -ENOSYS; | ||
5983 | goto out; | 5930 | goto out; |
5984 | } | ||
5985 | } | ||
5986 | 5931 | ||
5987 | switch (sock->type) { | 5932 | err = 0; |
5988 | case SOCK_SEQPACKET: | 5933 | sctp_unhash_endpoint(ep); |
5989 | err = sctp_seqpacket_listen(sk, backlog); | 5934 | sk->sk_state = SCTP_SS_CLOSED; |
5990 | break; | 5935 | if (sk->sk_reuse) |
5991 | case SOCK_STREAM: | 5936 | sctp_sk(sk)->bind_hash->fastreuse = 1; |
5992 | err = sctp_stream_listen(sk, backlog); | 5937 | goto out; |
5993 | break; | ||
5994 | default: | ||
5995 | break; | ||
5996 | } | 5938 | } |
5997 | 5939 | ||
5998 | if (err) | 5940 | /* If we are already listening, just update the backlog */ |
5999 | goto cleanup; | 5941 | if (sctp_sstate(sk, LISTENING)) |
5942 | sk->sk_max_ack_backlog = backlog; | ||
5943 | else { | ||
5944 | err = sctp_listen_start(sk, backlog); | ||
5945 | if (err) | ||
5946 | goto out; | ||
5947 | } | ||
6000 | 5948 | ||
6001 | /* Store away the transform reference. */ | 5949 | err = 0; |
6002 | if (!sctp_sk(sk)->hmac) | ||
6003 | sctp_sk(sk)->hmac = tfm; | ||
6004 | out: | 5950 | out: |
6005 | sctp_release_sock(sk); | 5951 | sctp_release_sock(sk); |
6006 | return err; | 5952 | return err; |
6007 | cleanup: | ||
6008 | crypto_free_hash(tfm); | ||
6009 | goto out; | ||
6010 | } | 5953 | } |
6011 | 5954 | ||
6012 | /* | 5955 | /* |
diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 5c29b14ee9af..e5dde45c79d3 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c | |||
@@ -543,8 +543,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, | |||
543 | * congestion indications more than once every window of | 543 | * congestion indications more than once every window of |
544 | * data (or more loosely more than once every round-trip time). | 544 | * data (or more loosely more than once every round-trip time). |
545 | */ | 545 | */ |
546 | if ((jiffies - transport->last_time_ecne_reduced) > | 546 | if (time_after(jiffies, transport->last_time_ecne_reduced + |
547 | transport->rtt) { | 547 | transport->rtt)) { |
548 | transport->ssthresh = max(transport->cwnd/2, | 548 | transport->ssthresh = max(transport->cwnd/2, |
549 | 4*transport->asoc->pathmtu); | 549 | 4*transport->asoc->pathmtu); |
550 | transport->cwnd = transport->ssthresh; | 550 | transport->cwnd = transport->ssthresh; |
@@ -561,7 +561,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, | |||
561 | * to be done every RTO interval, we do it every hearbeat | 561 | * to be done every RTO interval, we do it every hearbeat |
562 | * interval. | 562 | * interval. |
563 | */ | 563 | */ |
564 | if ((jiffies - transport->last_time_used) > transport->rto) | 564 | if (time_after(jiffies, transport->last_time_used + |
565 | transport->rto)) | ||
565 | transport->cwnd = max(transport->cwnd/2, | 566 | transport->cwnd = max(transport->cwnd/2, |
566 | 4*transport->asoc->pathmtu); | 567 | 4*transport->asoc->pathmtu); |
567 | break; | 568 | break; |
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 3ddaff42d1bb..a3bfd4064912 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c | |||
@@ -119,7 +119,7 @@ static struct bclink *bclink = NULL; | |||
119 | static struct link *bcl = NULL; | 119 | static struct link *bcl = NULL; |
120 | static DEFINE_SPINLOCK(bc_lock); | 120 | static DEFINE_SPINLOCK(bc_lock); |
121 | 121 | ||
122 | char tipc_bclink_name[] = "multicast-link"; | 122 | const char tipc_bclink_name[] = "multicast-link"; |
123 | 123 | ||
124 | 124 | ||
125 | static u32 buf_seqno(struct sk_buff *buf) | 125 | static u32 buf_seqno(struct sk_buff *buf) |
@@ -800,7 +800,7 @@ int tipc_bclink_init(void) | |||
800 | tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); | 800 | tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); |
801 | bcl->b_ptr = &bcbearer->bearer; | 801 | bcl->b_ptr = &bcbearer->bearer; |
802 | bcl->state = WORKING_WORKING; | 802 | bcl->state = WORKING_WORKING; |
803 | sprintf(bcl->name, tipc_bclink_name); | 803 | strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); |
804 | 804 | ||
805 | if (BCLINK_LOG_BUF_SIZE) { | 805 | if (BCLINK_LOG_BUF_SIZE) { |
806 | char *pb = kmalloc(BCLINK_LOG_BUF_SIZE, GFP_ATOMIC); | 806 | char *pb = kmalloc(BCLINK_LOG_BUF_SIZE, GFP_ATOMIC); |
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 2f2d731bc1c2..4c1771e95c99 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h | |||
@@ -70,7 +70,7 @@ struct port_list { | |||
70 | 70 | ||
71 | struct tipc_node; | 71 | struct tipc_node; |
72 | 72 | ||
73 | extern char tipc_bclink_name[]; | 73 | extern const char tipc_bclink_name[]; |
74 | 74 | ||
75 | 75 | ||
76 | /** | 76 | /** |
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c index 29ecae851668..1885a7edb0c8 100644 --- a/net/tipc/dbg.c +++ b/net/tipc/dbg.c | |||
@@ -258,7 +258,7 @@ void tipc_printf(struct print_buf *pb, const char *fmt, ...) | |||
258 | } | 258 | } |
259 | 259 | ||
260 | if (pb->echo) | 260 | if (pb->echo) |
261 | printk(print_string); | 261 | printk("%s", print_string); |
262 | 262 | ||
263 | spin_unlock_bh(&print_lock); | 263 | spin_unlock_bh(&print_lock); |
264 | } | 264 | } |
diff --git a/net/tipc/node.c b/net/tipc/node.c index 20d98c56e152..2c24e7d6d950 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c | |||
@@ -703,7 +703,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) | |||
703 | 703 | ||
704 | link_info.dest = htonl(tipc_own_addr & 0xfffff00); | 704 | link_info.dest = htonl(tipc_own_addr & 0xfffff00); |
705 | link_info.up = htonl(1); | 705 | link_info.up = htonl(1); |
706 | sprintf(link_info.str, tipc_bclink_name); | 706 | strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME); |
707 | tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); | 707 | tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); |
708 | 708 | ||
709 | /* Add TLVs for any other links in scope */ | 709 | /* Add TLVs for any other links in scope */ |
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d1b89820ab4f..baac91049b0e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c | |||
@@ -1178,8 +1178,7 @@ out_unlock: | |||
1178 | unix_state_unlock(other); | 1178 | unix_state_unlock(other); |
1179 | 1179 | ||
1180 | out: | 1180 | out: |
1181 | if (skb) | 1181 | kfree_skb(skb); |
1182 | kfree_skb(skb); | ||
1183 | if (newsk) | 1182 | if (newsk) |
1184 | unix_release_sock(newsk, 0); | 1183 | unix_release_sock(newsk, 0); |
1185 | if (other) | 1184 | if (other) |
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 39701dec1dba..466e2d22d256 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c | |||
@@ -86,8 +86,10 @@ static int wanrouter_device_del_if(struct wan_device *wandev, | |||
86 | 86 | ||
87 | static struct wan_device *wanrouter_find_device(char *name); | 87 | static struct wan_device *wanrouter_find_device(char *name); |
88 | static int wanrouter_delete_interface(struct wan_device *wandev, char *name); | 88 | static int wanrouter_delete_interface(struct wan_device *wandev, char *name); |
89 | static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); | 89 | static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) |
90 | static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); | 90 | __acquires(lock); |
91 | static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) | ||
92 | __releases(lock); | ||
91 | 93 | ||
92 | 94 | ||
93 | 95 | ||
@@ -763,12 +765,14 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name) | |||
763 | } | 765 | } |
764 | 766 | ||
765 | static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) | 767 | static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) |
768 | __acquires(lock) | ||
766 | { | 769 | { |
767 | spin_lock_irqsave(lock, *smp_flags); | 770 | spin_lock_irqsave(lock, *smp_flags); |
768 | } | 771 | } |
769 | 772 | ||
770 | 773 | ||
771 | static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) | 774 | static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) |
775 | __releases(lock) | ||
772 | { | 776 | { |
773 | spin_unlock_irqrestore(lock, *smp_flags); | 777 | spin_unlock_irqrestore(lock, *smp_flags); |
774 | } | 778 | } |
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index 267f7ff49827..c44d96b3a437 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c | |||
@@ -80,6 +80,7 @@ static struct proc_dir_entry *proc_router; | |||
80 | * Iterator | 80 | * Iterator |
81 | */ | 81 | */ |
82 | static void *r_start(struct seq_file *m, loff_t *pos) | 82 | static void *r_start(struct seq_file *m, loff_t *pos) |
83 | __acquires(kernel_lock) | ||
83 | { | 84 | { |
84 | struct wan_device *wandev; | 85 | struct wan_device *wandev; |
85 | loff_t l = *pos; | 86 | loff_t l = *pos; |
@@ -101,6 +102,7 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos) | |||
101 | } | 102 | } |
102 | 103 | ||
103 | static void r_stop(struct seq_file *m, void *v) | 104 | static void r_stop(struct seq_file *m, void *v) |
105 | __releases(kernel_lock) | ||
104 | { | 106 | { |
105 | unlock_kernel(); | 107 | unlock_kernel(); |
106 | } | 108 | } |
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index e28e2b8fa436..092ae6faccca 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig | |||
@@ -102,3 +102,13 @@ config LIB80211_CRYPT_CCMP | |||
102 | 102 | ||
103 | config LIB80211_CRYPT_TKIP | 103 | config LIB80211_CRYPT_TKIP |
104 | tristate | 104 | tristate |
105 | |||
106 | config LIB80211_DEBUG | ||
107 | bool "lib80211 debugging messages" | ||
108 | depends on LIB80211 | ||
109 | default n | ||
110 | ---help--- | ||
111 | You can enable this if you want verbose debugging messages | ||
112 | from lib80211. | ||
113 | |||
114 | If unsure, say N. | ||
diff --git a/net/wireless/core.c b/net/wireless/core.c index 0668b2bfc1da..17fe39049740 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c | |||
@@ -7,7 +7,6 @@ | |||
7 | #include <linux/if.h> | 7 | #include <linux/if.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/err.h> | 9 | #include <linux/err.h> |
10 | #include <linux/mutex.h> | ||
11 | #include <linux/list.h> | 10 | #include <linux/list.h> |
12 | #include <linux/nl80211.h> | 11 | #include <linux/nl80211.h> |
13 | #include <linux/debugfs.h> | 12 | #include <linux/debugfs.h> |
@@ -31,18 +30,29 @@ MODULE_DESCRIPTION("wireless configuration support"); | |||
31 | * only read the list, and that can happen quite | 30 | * only read the list, and that can happen quite |
32 | * often because we need to do it for each command */ | 31 | * often because we need to do it for each command */ |
33 | LIST_HEAD(cfg80211_drv_list); | 32 | LIST_HEAD(cfg80211_drv_list); |
34 | DEFINE_MUTEX(cfg80211_drv_mutex); | 33 | |
34 | /* | ||
35 | * This is used to protect the cfg80211_drv_list, cfg80211_regdomain, | ||
36 | * country_ie_regdomain, the reg_beacon_list and the the last regulatory | ||
37 | * request receipt (last_request). | ||
38 | */ | ||
39 | DEFINE_MUTEX(cfg80211_mutex); | ||
35 | 40 | ||
36 | /* for debugfs */ | 41 | /* for debugfs */ |
37 | static struct dentry *ieee80211_debugfs_dir; | 42 | static struct dentry *ieee80211_debugfs_dir; |
38 | 43 | ||
39 | /* requires cfg80211_drv_mutex to be held! */ | 44 | /* requires cfg80211_mutex to be held! */ |
40 | static struct cfg80211_registered_device *cfg80211_drv_by_wiphy(int wiphy) | 45 | struct cfg80211_registered_device *cfg80211_drv_by_wiphy_idx(int wiphy_idx) |
41 | { | 46 | { |
42 | struct cfg80211_registered_device *result = NULL, *drv; | 47 | struct cfg80211_registered_device *result = NULL, *drv; |
43 | 48 | ||
49 | if (!wiphy_idx_valid(wiphy_idx)) | ||
50 | return NULL; | ||
51 | |||
52 | assert_cfg80211_lock(); | ||
53 | |||
44 | list_for_each_entry(drv, &cfg80211_drv_list, list) { | 54 | list_for_each_entry(drv, &cfg80211_drv_list, list) { |
45 | if (drv->idx == wiphy) { | 55 | if (drv->wiphy_idx == wiphy_idx) { |
46 | result = drv; | 56 | result = drv; |
47 | break; | 57 | break; |
48 | } | 58 | } |
@@ -51,17 +61,44 @@ static struct cfg80211_registered_device *cfg80211_drv_by_wiphy(int wiphy) | |||
51 | return result; | 61 | return result; |
52 | } | 62 | } |
53 | 63 | ||
64 | int get_wiphy_idx(struct wiphy *wiphy) | ||
65 | { | ||
66 | struct cfg80211_registered_device *drv; | ||
67 | if (!wiphy) | ||
68 | return WIPHY_IDX_STALE; | ||
69 | drv = wiphy_to_dev(wiphy); | ||
70 | return drv->wiphy_idx; | ||
71 | } | ||
72 | |||
54 | /* requires cfg80211_drv_mutex to be held! */ | 73 | /* requires cfg80211_drv_mutex to be held! */ |
74 | struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) | ||
75 | { | ||
76 | struct cfg80211_registered_device *drv; | ||
77 | |||
78 | if (!wiphy_idx_valid(wiphy_idx)) | ||
79 | return NULL; | ||
80 | |||
81 | assert_cfg80211_lock(); | ||
82 | |||
83 | drv = cfg80211_drv_by_wiphy_idx(wiphy_idx); | ||
84 | if (!drv) | ||
85 | return NULL; | ||
86 | return &drv->wiphy; | ||
87 | } | ||
88 | |||
89 | /* requires cfg80211_mutex to be held! */ | ||
55 | static struct cfg80211_registered_device * | 90 | static struct cfg80211_registered_device * |
56 | __cfg80211_drv_from_info(struct genl_info *info) | 91 | __cfg80211_drv_from_info(struct genl_info *info) |
57 | { | 92 | { |
58 | int ifindex; | 93 | int ifindex; |
59 | struct cfg80211_registered_device *bywiphy = NULL, *byifidx = NULL; | 94 | struct cfg80211_registered_device *bywiphyidx = NULL, *byifidx = NULL; |
60 | struct net_device *dev; | 95 | struct net_device *dev; |
61 | int err = -EINVAL; | 96 | int err = -EINVAL; |
62 | 97 | ||
98 | assert_cfg80211_lock(); | ||
99 | |||
63 | if (info->attrs[NL80211_ATTR_WIPHY]) { | 100 | if (info->attrs[NL80211_ATTR_WIPHY]) { |
64 | bywiphy = cfg80211_drv_by_wiphy( | 101 | bywiphyidx = cfg80211_drv_by_wiphy_idx( |
65 | nla_get_u32(info->attrs[NL80211_ATTR_WIPHY])); | 102 | nla_get_u32(info->attrs[NL80211_ATTR_WIPHY])); |
66 | err = -ENODEV; | 103 | err = -ENODEV; |
67 | } | 104 | } |
@@ -78,14 +115,14 @@ __cfg80211_drv_from_info(struct genl_info *info) | |||
78 | err = -ENODEV; | 115 | err = -ENODEV; |
79 | } | 116 | } |
80 | 117 | ||
81 | if (bywiphy && byifidx) { | 118 | if (bywiphyidx && byifidx) { |
82 | if (bywiphy != byifidx) | 119 | if (bywiphyidx != byifidx) |
83 | return ERR_PTR(-EINVAL); | 120 | return ERR_PTR(-EINVAL); |
84 | else | 121 | else |
85 | return bywiphy; /* == byifidx */ | 122 | return bywiphyidx; /* == byifidx */ |
86 | } | 123 | } |
87 | if (bywiphy) | 124 | if (bywiphyidx) |
88 | return bywiphy; | 125 | return bywiphyidx; |
89 | 126 | ||
90 | if (byifidx) | 127 | if (byifidx) |
91 | return byifidx; | 128 | return byifidx; |
@@ -98,7 +135,7 @@ cfg80211_get_dev_from_info(struct genl_info *info) | |||
98 | { | 135 | { |
99 | struct cfg80211_registered_device *drv; | 136 | struct cfg80211_registered_device *drv; |
100 | 137 | ||
101 | mutex_lock(&cfg80211_drv_mutex); | 138 | mutex_lock(&cfg80211_mutex); |
102 | drv = __cfg80211_drv_from_info(info); | 139 | drv = __cfg80211_drv_from_info(info); |
103 | 140 | ||
104 | /* if it is not an error we grab the lock on | 141 | /* if it is not an error we grab the lock on |
@@ -107,7 +144,7 @@ cfg80211_get_dev_from_info(struct genl_info *info) | |||
107 | if (!IS_ERR(drv)) | 144 | if (!IS_ERR(drv)) |
108 | mutex_lock(&drv->mtx); | 145 | mutex_lock(&drv->mtx); |
109 | 146 | ||
110 | mutex_unlock(&cfg80211_drv_mutex); | 147 | mutex_unlock(&cfg80211_mutex); |
111 | 148 | ||
112 | return drv; | 149 | return drv; |
113 | } | 150 | } |
@@ -118,7 +155,7 @@ cfg80211_get_dev_from_ifindex(int ifindex) | |||
118 | struct cfg80211_registered_device *drv = ERR_PTR(-ENODEV); | 155 | struct cfg80211_registered_device *drv = ERR_PTR(-ENODEV); |
119 | struct net_device *dev; | 156 | struct net_device *dev; |
120 | 157 | ||
121 | mutex_lock(&cfg80211_drv_mutex); | 158 | mutex_lock(&cfg80211_mutex); |
122 | dev = dev_get_by_index(&init_net, ifindex); | 159 | dev = dev_get_by_index(&init_net, ifindex); |
123 | if (!dev) | 160 | if (!dev) |
124 | goto out; | 161 | goto out; |
@@ -129,7 +166,7 @@ cfg80211_get_dev_from_ifindex(int ifindex) | |||
129 | drv = ERR_PTR(-ENODEV); | 166 | drv = ERR_PTR(-ENODEV); |
130 | dev_put(dev); | 167 | dev_put(dev); |
131 | out: | 168 | out: |
132 | mutex_unlock(&cfg80211_drv_mutex); | 169 | mutex_unlock(&cfg80211_mutex); |
133 | return drv; | 170 | return drv; |
134 | } | 171 | } |
135 | 172 | ||
@@ -143,16 +180,16 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, | |||
143 | char *newname) | 180 | char *newname) |
144 | { | 181 | { |
145 | struct cfg80211_registered_device *drv; | 182 | struct cfg80211_registered_device *drv; |
146 | int idx, taken = -1, result, digits; | 183 | int wiphy_idx, taken = -1, result, digits; |
147 | 184 | ||
148 | mutex_lock(&cfg80211_drv_mutex); | 185 | mutex_lock(&cfg80211_mutex); |
149 | 186 | ||
150 | /* prohibit calling the thing phy%d when %d is not its number */ | 187 | /* prohibit calling the thing phy%d when %d is not its number */ |
151 | sscanf(newname, PHY_NAME "%d%n", &idx, &taken); | 188 | sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken); |
152 | if (taken == strlen(newname) && idx != rdev->idx) { | 189 | if (taken == strlen(newname) && wiphy_idx != rdev->wiphy_idx) { |
153 | /* count number of places needed to print idx */ | 190 | /* count number of places needed to print wiphy_idx */ |
154 | digits = 1; | 191 | digits = 1; |
155 | while (idx /= 10) | 192 | while (wiphy_idx /= 10) |
156 | digits++; | 193 | digits++; |
157 | /* | 194 | /* |
158 | * deny the name if it is phy<idx> where <idx> is printed | 195 | * deny the name if it is phy<idx> where <idx> is printed |
@@ -193,7 +230,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, | |||
193 | 230 | ||
194 | result = 0; | 231 | result = 0; |
195 | out_unlock: | 232 | out_unlock: |
196 | mutex_unlock(&cfg80211_drv_mutex); | 233 | mutex_unlock(&cfg80211_mutex); |
197 | if (result == 0) | 234 | if (result == 0) |
198 | nl80211_notify_dev_rename(rdev); | 235 | nl80211_notify_dev_rename(rdev); |
199 | 236 | ||
@@ -220,22 +257,22 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) | |||
220 | 257 | ||
221 | drv->ops = ops; | 258 | drv->ops = ops; |
222 | 259 | ||
223 | mutex_lock(&cfg80211_drv_mutex); | 260 | mutex_lock(&cfg80211_mutex); |
224 | 261 | ||
225 | drv->idx = wiphy_counter++; | 262 | drv->wiphy_idx = wiphy_counter++; |
226 | 263 | ||
227 | if (unlikely(drv->idx < 0)) { | 264 | if (unlikely(!wiphy_idx_valid(drv->wiphy_idx))) { |
228 | wiphy_counter--; | 265 | wiphy_counter--; |
229 | mutex_unlock(&cfg80211_drv_mutex); | 266 | mutex_unlock(&cfg80211_mutex); |
230 | /* ugh, wrapped! */ | 267 | /* ugh, wrapped! */ |
231 | kfree(drv); | 268 | kfree(drv); |
232 | return NULL; | 269 | return NULL; |
233 | } | 270 | } |
234 | 271 | ||
235 | mutex_unlock(&cfg80211_drv_mutex); | 272 | mutex_unlock(&cfg80211_mutex); |
236 | 273 | ||
237 | /* give it a proper name */ | 274 | /* give it a proper name */ |
238 | dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->idx); | 275 | dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->wiphy_idx); |
239 | 276 | ||
240 | mutex_init(&drv->mtx); | 277 | mutex_init(&drv->mtx); |
241 | mutex_init(&drv->devlist_mtx); | 278 | mutex_init(&drv->devlist_mtx); |
@@ -310,10 +347,10 @@ int wiphy_register(struct wiphy *wiphy) | |||
310 | /* check and set up bitrates */ | 347 | /* check and set up bitrates */ |
311 | ieee80211_set_bitrate_flags(wiphy); | 348 | ieee80211_set_bitrate_flags(wiphy); |
312 | 349 | ||
313 | mutex_lock(&cfg80211_drv_mutex); | 350 | mutex_lock(&cfg80211_mutex); |
314 | 351 | ||
315 | /* set up regulatory info */ | 352 | /* set up regulatory info */ |
316 | wiphy_update_regulatory(wiphy, REGDOM_SET_BY_CORE); | 353 | wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); |
317 | 354 | ||
318 | res = device_add(&drv->wiphy.dev); | 355 | res = device_add(&drv->wiphy.dev); |
319 | if (res) | 356 | if (res) |
@@ -328,9 +365,20 @@ int wiphy_register(struct wiphy *wiphy) | |||
328 | if (IS_ERR(drv->wiphy.debugfsdir)) | 365 | if (IS_ERR(drv->wiphy.debugfsdir)) |
329 | drv->wiphy.debugfsdir = NULL; | 366 | drv->wiphy.debugfsdir = NULL; |
330 | 367 | ||
368 | if (wiphy->custom_regulatory) { | ||
369 | struct regulatory_request request; | ||
370 | |||
371 | request.wiphy_idx = get_wiphy_idx(wiphy); | ||
372 | request.initiator = NL80211_REGDOM_SET_BY_DRIVER; | ||
373 | request.alpha2[0] = '9'; | ||
374 | request.alpha2[1] = '9'; | ||
375 | |||
376 | nl80211_send_reg_change_event(&request); | ||
377 | } | ||
378 | |||
331 | res = 0; | 379 | res = 0; |
332 | out_unlock: | 380 | out_unlock: |
333 | mutex_unlock(&cfg80211_drv_mutex); | 381 | mutex_unlock(&cfg80211_mutex); |
334 | return res; | 382 | return res; |
335 | } | 383 | } |
336 | EXPORT_SYMBOL(wiphy_register); | 384 | EXPORT_SYMBOL(wiphy_register); |
@@ -340,7 +388,7 @@ void wiphy_unregister(struct wiphy *wiphy) | |||
340 | struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy); | 388 | struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy); |
341 | 389 | ||
342 | /* protect the device list */ | 390 | /* protect the device list */ |
343 | mutex_lock(&cfg80211_drv_mutex); | 391 | mutex_lock(&cfg80211_mutex); |
344 | 392 | ||
345 | BUG_ON(!list_empty(&drv->netdev_list)); | 393 | BUG_ON(!list_empty(&drv->netdev_list)); |
346 | 394 | ||
@@ -366,7 +414,7 @@ void wiphy_unregister(struct wiphy *wiphy) | |||
366 | device_del(&drv->wiphy.dev); | 414 | device_del(&drv->wiphy.dev); |
367 | debugfs_remove(drv->wiphy.debugfsdir); | 415 | debugfs_remove(drv->wiphy.debugfsdir); |
368 | 416 | ||
369 | mutex_unlock(&cfg80211_drv_mutex); | 417 | mutex_unlock(&cfg80211_mutex); |
370 | } | 418 | } |
371 | EXPORT_SYMBOL(wiphy_unregister); | 419 | EXPORT_SYMBOL(wiphy_unregister); |
372 | 420 | ||
diff --git a/net/wireless/core.h b/net/wireless/core.h index e29ad4cd464f..6acd483a61f8 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/netdevice.h> | 10 | #include <linux/netdevice.h> |
11 | #include <linux/kref.h> | 11 | #include <linux/kref.h> |
12 | #include <linux/rbtree.h> | 12 | #include <linux/rbtree.h> |
13 | #include <linux/mutex.h> | ||
13 | #include <net/genetlink.h> | 14 | #include <net/genetlink.h> |
14 | #include <net/wireless.h> | 15 | #include <net/wireless.h> |
15 | #include <net/cfg80211.h> | 16 | #include <net/cfg80211.h> |
@@ -37,7 +38,7 @@ struct cfg80211_registered_device { | |||
37 | enum environment_cap env; | 38 | enum environment_cap env; |
38 | 39 | ||
39 | /* wiphy index, internal only */ | 40 | /* wiphy index, internal only */ |
40 | int idx; | 41 | int wiphy_idx; |
41 | 42 | ||
42 | /* associate netdev list */ | 43 | /* associate netdev list */ |
43 | struct mutex devlist_mtx; | 44 | struct mutex devlist_mtx; |
@@ -49,6 +50,7 @@ struct cfg80211_registered_device { | |||
49 | struct rb_root bss_tree; | 50 | struct rb_root bss_tree; |
50 | u32 bss_generation; | 51 | u32 bss_generation; |
51 | struct cfg80211_scan_request *scan_req; /* protected by RTNL */ | 52 | struct cfg80211_scan_request *scan_req; /* protected by RTNL */ |
53 | unsigned long suspend_at; | ||
52 | 54 | ||
53 | /* must be last because of the way we do wiphy_priv(), | 55 | /* must be last because of the way we do wiphy_priv(), |
54 | * and it should at least be aligned to NETDEV_ALIGN */ | 56 | * and it should at least be aligned to NETDEV_ALIGN */ |
@@ -62,9 +64,27 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) | |||
62 | return container_of(wiphy, struct cfg80211_registered_device, wiphy); | 64 | return container_of(wiphy, struct cfg80211_registered_device, wiphy); |
63 | } | 65 | } |
64 | 66 | ||
65 | extern struct mutex cfg80211_drv_mutex; | 67 | /* Note 0 is valid, hence phy0 */ |
68 | static inline | ||
69 | bool wiphy_idx_valid(int wiphy_idx) | ||
70 | { | ||
71 | return (wiphy_idx >= 0); | ||
72 | } | ||
73 | |||
74 | extern struct mutex cfg80211_mutex; | ||
66 | extern struct list_head cfg80211_drv_list; | 75 | extern struct list_head cfg80211_drv_list; |
67 | 76 | ||
77 | static inline void assert_cfg80211_lock(void) | ||
78 | { | ||
79 | WARN_ON(!mutex_is_locked(&cfg80211_mutex)); | ||
80 | } | ||
81 | |||
82 | /* | ||
83 | * You can use this to mark a wiphy_idx as not having an associated wiphy. | ||
84 | * It guarantees cfg80211_drv_by_wiphy_idx(wiphy_idx) will return NULL | ||
85 | */ | ||
86 | #define WIPHY_IDX_STALE -1 | ||
87 | |||
68 | struct cfg80211_internal_bss { | 88 | struct cfg80211_internal_bss { |
69 | struct list_head list; | 89 | struct list_head list; |
70 | struct rb_node rbn; | 90 | struct rb_node rbn; |
@@ -74,6 +94,9 @@ struct cfg80211_internal_bss { | |||
74 | struct cfg80211_bss pub; | 94 | struct cfg80211_bss pub; |
75 | }; | 95 | }; |
76 | 96 | ||
97 | struct cfg80211_registered_device *cfg80211_drv_by_wiphy_idx(int wiphy_idx); | ||
98 | int get_wiphy_idx(struct wiphy *wiphy); | ||
99 | |||
77 | /* | 100 | /* |
78 | * This function returns a pointer to the driver | 101 | * This function returns a pointer to the driver |
79 | * that the genl_info item that is passed refers to. | 102 | * that the genl_info item that is passed refers to. |
@@ -81,13 +104,13 @@ struct cfg80211_internal_bss { | |||
81 | * the driver's mutex! | 104 | * the driver's mutex! |
82 | * | 105 | * |
83 | * This means that you need to call cfg80211_put_dev() | 106 | * This means that you need to call cfg80211_put_dev() |
84 | * before being allowed to acquire &cfg80211_drv_mutex! | 107 | * before being allowed to acquire &cfg80211_mutex! |
85 | * | 108 | * |
86 | * This is necessary because we need to lock the global | 109 | * This is necessary because we need to lock the global |
87 | * mutex to get an item off the list safely, and then | 110 | * mutex to get an item off the list safely, and then |
88 | * we lock the drv mutex so it doesn't go away under us. | 111 | * we lock the drv mutex so it doesn't go away under us. |
89 | * | 112 | * |
90 | * We don't want to keep cfg80211_drv_mutex locked | 113 | * We don't want to keep cfg80211_mutex locked |
91 | * for all the time in order to allow requests on | 114 | * for all the time in order to allow requests on |
92 | * other interfaces to go through at the same time. | 115 | * other interfaces to go through at the same time. |
93 | * | 116 | * |
@@ -97,6 +120,9 @@ struct cfg80211_internal_bss { | |||
97 | extern struct cfg80211_registered_device * | 120 | extern struct cfg80211_registered_device * |
98 | cfg80211_get_dev_from_info(struct genl_info *info); | 121 | cfg80211_get_dev_from_info(struct genl_info *info); |
99 | 122 | ||
123 | /* requires cfg80211_drv_mutex to be held! */ | ||
124 | struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); | ||
125 | |||
100 | /* identical to cfg80211_get_dev_from_info but only operate on ifindex */ | 126 | /* identical to cfg80211_get_dev_from_info but only operate on ifindex */ |
101 | extern struct cfg80211_registered_device * | 127 | extern struct cfg80211_registered_device * |
102 | cfg80211_get_dev_from_ifindex(int ifindex); | 128 | cfg80211_get_dev_from_ifindex(int ifindex); |
@@ -110,8 +136,11 @@ extern int cfg80211_dev_rename(struct cfg80211_registered_device *drv, | |||
110 | char *newname); | 136 | char *newname); |
111 | 137 | ||
112 | void ieee80211_set_bitrate_flags(struct wiphy *wiphy); | 138 | void ieee80211_set_bitrate_flags(struct wiphy *wiphy); |
113 | void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby); | 139 | void wiphy_update_regulatory(struct wiphy *wiphy, |
140 | enum nl80211_reg_initiator setby); | ||
114 | 141 | ||
115 | void cfg80211_bss_expire(struct cfg80211_registered_device *dev); | 142 | void cfg80211_bss_expire(struct cfg80211_registered_device *dev); |
143 | void cfg80211_bss_age(struct cfg80211_registered_device *dev, | ||
144 | unsigned long age_secs); | ||
116 | 145 | ||
117 | #endif /* __NET_WIRELESS_CORE_H */ | 146 | #endif /* __NET_WIRELESS_CORE_H */ |
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index db428194c16a..2301dc1edc4c 100644 --- a/net/wireless/lib80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c | |||
@@ -337,6 +337,7 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) | |||
337 | pos += 8; | 337 | pos += 8; |
338 | 338 | ||
339 | if (ccmp_replay_check(pn, key->rx_pn)) { | 339 | if (ccmp_replay_check(pn, key->rx_pn)) { |
340 | #ifdef CONFIG_LIB80211_DEBUG | ||
340 | if (net_ratelimit()) { | 341 | if (net_ratelimit()) { |
341 | printk(KERN_DEBUG "CCMP: replay detected: STA=%pM " | 342 | printk(KERN_DEBUG "CCMP: replay detected: STA=%pM " |
342 | "previous PN %02x%02x%02x%02x%02x%02x " | 343 | "previous PN %02x%02x%02x%02x%02x%02x " |
@@ -346,6 +347,7 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) | |||
346 | key->rx_pn[3], key->rx_pn[4], key->rx_pn[5], | 347 | key->rx_pn[3], key->rx_pn[4], key->rx_pn[5], |
347 | pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); | 348 | pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); |
348 | } | 349 | } |
350 | #endif | ||
349 | key->dot11RSNAStatsCCMPReplays++; | 351 | key->dot11RSNAStatsCCMPReplays++; |
350 | return -4; | 352 | return -4; |
351 | } | 353 | } |
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index 7e8e22bfed90..c36287399d7e 100644 --- a/net/wireless/lib80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c | |||
@@ -465,12 +465,14 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) | |||
465 | pos += 8; | 465 | pos += 8; |
466 | 466 | ||
467 | if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { | 467 | if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { |
468 | #ifdef CONFIG_LIB80211_DEBUG | ||
468 | if (net_ratelimit()) { | 469 | if (net_ratelimit()) { |
469 | printk(KERN_DEBUG "TKIP: replay detected: STA=%pM" | 470 | printk(KERN_DEBUG "TKIP: replay detected: STA=%pM" |
470 | " previous TSC %08x%04x received TSC " | 471 | " previous TSC %08x%04x received TSC " |
471 | "%08x%04x\n", hdr->addr2, | 472 | "%08x%04x\n", hdr->addr2, |
472 | tkey->rx_iv32, tkey->rx_iv16, iv32, iv16); | 473 | tkey->rx_iv32, tkey->rx_iv16, iv32, iv16); |
473 | } | 474 | } |
475 | #endif | ||
474 | tkey->dot11RSNAStatsTKIPReplays++; | 476 | tkey->dot11RSNAStatsTKIPReplays++; |
475 | return -4; | 477 | return -4; |
476 | } | 478 | } |
@@ -505,10 +507,12 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) | |||
505 | * it needs to be recalculated for the next packet. */ | 507 | * it needs to be recalculated for the next packet. */ |
506 | tkey->rx_phase1_done = 0; | 508 | tkey->rx_phase1_done = 0; |
507 | } | 509 | } |
510 | #ifdef CONFIG_LIB80211_DEBUG | ||
508 | if (net_ratelimit()) { | 511 | if (net_ratelimit()) { |
509 | printk(KERN_DEBUG "TKIP: ICV error detected: STA=" | 512 | printk(KERN_DEBUG "TKIP: ICV error detected: STA=" |
510 | "%pM\n", hdr->addr2); | 513 | "%pM\n", hdr->addr2); |
511 | } | 514 | } |
515 | #endif | ||
512 | tkey->dot11RSNAStatsTKIPICVErrors++; | 516 | tkey->dot11RSNAStatsTKIPICVErrors++; |
513 | return -5; | 517 | return -5; |
514 | } | 518 | } |
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 298a4de59948..ab9d8f14e151 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c | |||
@@ -7,7 +7,6 @@ | |||
7 | #include <linux/if.h> | 7 | #include <linux/if.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/err.h> | 9 | #include <linux/err.h> |
10 | #include <linux/mutex.h> | ||
11 | #include <linux/list.h> | 10 | #include <linux/list.h> |
12 | #include <linux/if_ether.h> | 11 | #include <linux/if_ether.h> |
13 | #include <linux/ieee80211.h> | 12 | #include <linux/ieee80211.h> |
@@ -142,7 +141,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, | |||
142 | if (!hdr) | 141 | if (!hdr) |
143 | return -1; | 142 | return -1; |
144 | 143 | ||
145 | NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->idx); | 144 | NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx); |
146 | NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); | 145 | NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); |
147 | NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, | 146 | NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, |
148 | dev->wiphy.max_scan_ssids); | 147 | dev->wiphy.max_scan_ssids); |
@@ -256,7 +255,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) | |||
256 | int start = cb->args[0]; | 255 | int start = cb->args[0]; |
257 | struct cfg80211_registered_device *dev; | 256 | struct cfg80211_registered_device *dev; |
258 | 257 | ||
259 | mutex_lock(&cfg80211_drv_mutex); | 258 | mutex_lock(&cfg80211_mutex); |
260 | list_for_each_entry(dev, &cfg80211_drv_list, list) { | 259 | list_for_each_entry(dev, &cfg80211_drv_list, list) { |
261 | if (++idx <= start) | 260 | if (++idx <= start) |
262 | continue; | 261 | continue; |
@@ -267,7 +266,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) | |||
267 | break; | 266 | break; |
268 | } | 267 | } |
269 | } | 268 | } |
270 | mutex_unlock(&cfg80211_drv_mutex); | 269 | mutex_unlock(&cfg80211_mutex); |
271 | 270 | ||
272 | cb->args[0] = idx; | 271 | cb->args[0] = idx; |
273 | 272 | ||
@@ -470,7 +469,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * | |||
470 | struct cfg80211_registered_device *dev; | 469 | struct cfg80211_registered_device *dev; |
471 | struct wireless_dev *wdev; | 470 | struct wireless_dev *wdev; |
472 | 471 | ||
473 | mutex_lock(&cfg80211_drv_mutex); | 472 | mutex_lock(&cfg80211_mutex); |
474 | list_for_each_entry(dev, &cfg80211_drv_list, list) { | 473 | list_for_each_entry(dev, &cfg80211_drv_list, list) { |
475 | if (wp_idx < wp_start) { | 474 | if (wp_idx < wp_start) { |
476 | wp_idx++; | 475 | wp_idx++; |
@@ -497,7 +496,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * | |||
497 | wp_idx++; | 496 | wp_idx++; |
498 | } | 497 | } |
499 | out: | 498 | out: |
500 | mutex_unlock(&cfg80211_drv_mutex); | 499 | mutex_unlock(&cfg80211_mutex); |
501 | 500 | ||
502 | cb->args[0] = wp_idx; | 501 | cb->args[0] = wp_idx; |
503 | cb->args[1] = if_idx; | 502 | cb->args[1] = if_idx; |
@@ -1206,6 +1205,12 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, | |||
1206 | 1205 | ||
1207 | nla_nest_end(msg, txrate); | 1206 | nla_nest_end(msg, txrate); |
1208 | } | 1207 | } |
1208 | if (sinfo->filled & STATION_INFO_RX_PACKETS) | ||
1209 | NLA_PUT_U32(msg, NL80211_STA_INFO_RX_PACKETS, | ||
1210 | sinfo->rx_packets); | ||
1211 | if (sinfo->filled & STATION_INFO_TX_PACKETS) | ||
1212 | NLA_PUT_U32(msg, NL80211_STA_INFO_TX_PACKETS, | ||
1213 | sinfo->tx_packets); | ||
1209 | nla_nest_end(msg, sinfoattr); | 1214 | nla_nest_end(msg, sinfoattr); |
1210 | 1215 | ||
1211 | return genlmsg_end(msg, hdr); | 1216 | return genlmsg_end(msg, hdr); |
@@ -1900,6 +1905,19 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) | |||
1900 | int r; | 1905 | int r; |
1901 | char *data = NULL; | 1906 | char *data = NULL; |
1902 | 1907 | ||
1908 | /* | ||
1909 | * You should only get this when cfg80211 hasn't yet initialized | ||
1910 | * completely when built-in to the kernel right between the time | ||
1911 | * window between nl80211_init() and regulatory_init(), if that is | ||
1912 | * even possible. | ||
1913 | */ | ||
1914 | mutex_lock(&cfg80211_mutex); | ||
1915 | if (unlikely(!cfg80211_regdomain)) { | ||
1916 | mutex_unlock(&cfg80211_mutex); | ||
1917 | return -EINPROGRESS; | ||
1918 | } | ||
1919 | mutex_unlock(&cfg80211_mutex); | ||
1920 | |||
1903 | if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) | 1921 | if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) |
1904 | return -EINVAL; | 1922 | return -EINVAL; |
1905 | 1923 | ||
@@ -1910,14 +1928,9 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) | |||
1910 | if (is_world_regdom(data)) | 1928 | if (is_world_regdom(data)) |
1911 | return -EINVAL; | 1929 | return -EINVAL; |
1912 | #endif | 1930 | #endif |
1913 | mutex_lock(&cfg80211_drv_mutex); | 1931 | |
1914 | r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, 0, ENVIRON_ANY); | 1932 | r = regulatory_hint_user(data); |
1915 | mutex_unlock(&cfg80211_drv_mutex); | 1933 | |
1916 | /* This means the regulatory domain was already set, however | ||
1917 | * we don't want to confuse userspace with a "successful error" | ||
1918 | * message so lets just treat it as a success */ | ||
1919 | if (r == -EALREADY) | ||
1920 | r = 0; | ||
1921 | return r; | 1934 | return r; |
1922 | } | 1935 | } |
1923 | 1936 | ||
@@ -1937,6 +1950,11 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, | |||
1937 | if (err) | 1950 | if (err) |
1938 | return err; | 1951 | return err; |
1939 | 1952 | ||
1953 | if (!drv->ops->get_mesh_params) { | ||
1954 | err = -EOPNOTSUPP; | ||
1955 | goto out; | ||
1956 | } | ||
1957 | |||
1940 | /* Get the mesh params */ | 1958 | /* Get the mesh params */ |
1941 | rtnl_lock(); | 1959 | rtnl_lock(); |
1942 | err = drv->ops->get_mesh_params(&drv->wiphy, dev, &cur_params); | 1960 | err = drv->ops->get_mesh_params(&drv->wiphy, dev, &cur_params); |
@@ -2046,6 +2064,11 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) | |||
2046 | if (err) | 2064 | if (err) |
2047 | return err; | 2065 | return err; |
2048 | 2066 | ||
2067 | if (!drv->ops->set_mesh_params) { | ||
2068 | err = -EOPNOTSUPP; | ||
2069 | goto out; | ||
2070 | } | ||
2071 | |||
2049 | /* This makes sure that there aren't more than 32 mesh config | 2072 | /* This makes sure that there aren't more than 32 mesh config |
2050 | * parameters (otherwise our bitfield scheme would not work.) */ | 2073 | * parameters (otherwise our bitfield scheme would not work.) */ |
2051 | BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32); | 2074 | BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32); |
@@ -2090,6 +2113,7 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) | |||
2090 | err = drv->ops->set_mesh_params(&drv->wiphy, dev, &cfg, mask); | 2113 | err = drv->ops->set_mesh_params(&drv->wiphy, dev, &cfg, mask); |
2091 | rtnl_unlock(); | 2114 | rtnl_unlock(); |
2092 | 2115 | ||
2116 | out: | ||
2093 | /* cleanup */ | 2117 | /* cleanup */ |
2094 | cfg80211_put_dev(drv); | 2118 | cfg80211_put_dev(drv); |
2095 | dev_put(dev); | 2119 | dev_put(dev); |
@@ -2106,7 +2130,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) | |||
2106 | unsigned int i; | 2130 | unsigned int i; |
2107 | int err = -EINVAL; | 2131 | int err = -EINVAL; |
2108 | 2132 | ||
2109 | mutex_lock(&cfg80211_drv_mutex); | 2133 | mutex_lock(&cfg80211_mutex); |
2110 | 2134 | ||
2111 | if (!cfg80211_regdomain) | 2135 | if (!cfg80211_regdomain) |
2112 | goto out; | 2136 | goto out; |
@@ -2169,7 +2193,7 @@ nla_put_failure: | |||
2169 | genlmsg_cancel(msg, hdr); | 2193 | genlmsg_cancel(msg, hdr); |
2170 | err = -EMSGSIZE; | 2194 | err = -EMSGSIZE; |
2171 | out: | 2195 | out: |
2172 | mutex_unlock(&cfg80211_drv_mutex); | 2196 | mutex_unlock(&cfg80211_mutex); |
2173 | return err; | 2197 | return err; |
2174 | } | 2198 | } |
2175 | 2199 | ||
@@ -2228,9 +2252,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) | |||
2228 | 2252 | ||
2229 | BUG_ON(rule_idx != num_rules); | 2253 | BUG_ON(rule_idx != num_rules); |
2230 | 2254 | ||
2231 | mutex_lock(&cfg80211_drv_mutex); | 2255 | mutex_lock(&cfg80211_mutex); |
2232 | r = set_regdom(rd); | 2256 | r = set_regdom(rd); |
2233 | mutex_unlock(&cfg80211_drv_mutex); | 2257 | mutex_unlock(&cfg80211_mutex); |
2234 | return r; | 2258 | return r; |
2235 | 2259 | ||
2236 | bad_reg: | 2260 | bad_reg: |
@@ -2286,6 +2310,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) | |||
2286 | struct wiphy *wiphy; | 2310 | struct wiphy *wiphy; |
2287 | int err, tmp, n_ssids = 0, n_channels = 0, i; | 2311 | int err, tmp, n_ssids = 0, n_channels = 0, i; |
2288 | enum ieee80211_band band; | 2312 | enum ieee80211_band band; |
2313 | size_t ie_len; | ||
2289 | 2314 | ||
2290 | err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); | 2315 | err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); |
2291 | if (err) | 2316 | if (err) |
@@ -2327,9 +2352,15 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) | |||
2327 | goto out_unlock; | 2352 | goto out_unlock; |
2328 | } | 2353 | } |
2329 | 2354 | ||
2355 | if (info->attrs[NL80211_ATTR_IE]) | ||
2356 | ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); | ||
2357 | else | ||
2358 | ie_len = 0; | ||
2359 | |||
2330 | request = kzalloc(sizeof(*request) | 2360 | request = kzalloc(sizeof(*request) |
2331 | + sizeof(*ssid) * n_ssids | 2361 | + sizeof(*ssid) * n_ssids |
2332 | + sizeof(channel) * n_channels, GFP_KERNEL); | 2362 | + sizeof(channel) * n_channels |
2363 | + ie_len, GFP_KERNEL); | ||
2333 | if (!request) { | 2364 | if (!request) { |
2334 | err = -ENOMEM; | 2365 | err = -ENOMEM; |
2335 | goto out_unlock; | 2366 | goto out_unlock; |
@@ -2340,6 +2371,12 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) | |||
2340 | if (n_ssids) | 2371 | if (n_ssids) |
2341 | request->ssids = (void *)(request->channels + n_channels); | 2372 | request->ssids = (void *)(request->channels + n_channels); |
2342 | request->n_ssids = n_ssids; | 2373 | request->n_ssids = n_ssids; |
2374 | if (ie_len) { | ||
2375 | if (request->ssids) | ||
2376 | request->ie = (void *)(request->ssids + n_ssids); | ||
2377 | else | ||
2378 | request->ie = (void *)(request->channels + n_channels); | ||
2379 | } | ||
2343 | 2380 | ||
2344 | if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { | 2381 | if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { |
2345 | /* user specified, bail out if channel not found */ | 2382 | /* user specified, bail out if channel not found */ |
@@ -2380,6 +2417,12 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) | |||
2380 | } | 2417 | } |
2381 | } | 2418 | } |
2382 | 2419 | ||
2420 | if (info->attrs[NL80211_ATTR_IE]) { | ||
2421 | request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); | ||
2422 | memcpy(request->ie, nla_data(info->attrs[NL80211_ATTR_IE]), | ||
2423 | request->ie_len); | ||
2424 | } | ||
2425 | |||
2383 | request->ifidx = dev->ifindex; | 2426 | request->ifidx = dev->ifindex; |
2384 | request->wiphy = &drv->wiphy; | 2427 | request->wiphy = &drv->wiphy; |
2385 | 2428 | ||
@@ -2432,7 +2475,7 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags, | |||
2432 | NLA_PUT_U16(msg, NL80211_BSS_CAPABILITY, res->capability); | 2475 | NLA_PUT_U16(msg, NL80211_BSS_CAPABILITY, res->capability); |
2433 | NLA_PUT_U32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq); | 2476 | NLA_PUT_U32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq); |
2434 | 2477 | ||
2435 | switch (res->signal_type) { | 2478 | switch (rdev->wiphy.signal_type) { |
2436 | case CFG80211_SIGNAL_TYPE_MBM: | 2479 | case CFG80211_SIGNAL_TYPE_MBM: |
2437 | NLA_PUT_U32(msg, NL80211_BSS_SIGNAL_MBM, res->signal); | 2480 | NLA_PUT_U32(msg, NL80211_BSS_SIGNAL_MBM, res->signal); |
2438 | break; | 2481 | break; |
@@ -2601,7 +2644,6 @@ static struct genl_ops nl80211_ops[] = { | |||
2601 | .doit = nl80211_get_station, | 2644 | .doit = nl80211_get_station, |
2602 | .dumpit = nl80211_dump_station, | 2645 | .dumpit = nl80211_dump_station, |
2603 | .policy = nl80211_policy, | 2646 | .policy = nl80211_policy, |
2604 | .flags = GENL_ADMIN_PERM, | ||
2605 | }, | 2647 | }, |
2606 | { | 2648 | { |
2607 | .cmd = NL80211_CMD_SET_STATION, | 2649 | .cmd = NL80211_CMD_SET_STATION, |
@@ -2708,6 +2750,9 @@ static struct genl_multicast_group nl80211_config_mcgrp = { | |||
2708 | static struct genl_multicast_group nl80211_scan_mcgrp = { | 2750 | static struct genl_multicast_group nl80211_scan_mcgrp = { |
2709 | .name = "scan", | 2751 | .name = "scan", |
2710 | }; | 2752 | }; |
2753 | static struct genl_multicast_group nl80211_regulatory_mcgrp = { | ||
2754 | .name = "regulatory", | ||
2755 | }; | ||
2711 | 2756 | ||
2712 | /* notification functions */ | 2757 | /* notification functions */ |
2713 | 2758 | ||
@@ -2739,7 +2784,7 @@ static int nl80211_send_scan_donemsg(struct sk_buff *msg, | |||
2739 | if (!hdr) | 2784 | if (!hdr) |
2740 | return -1; | 2785 | return -1; |
2741 | 2786 | ||
2742 | NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->idx); | 2787 | NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); |
2743 | NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); | 2788 | NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); |
2744 | 2789 | ||
2745 | /* XXX: we should probably bounce back the request? */ | 2790 | /* XXX: we should probably bounce back the request? */ |
@@ -2787,6 +2832,61 @@ void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, | |||
2787 | genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL); | 2832 | genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL); |
2788 | } | 2833 | } |
2789 | 2834 | ||
2835 | /* | ||
2836 | * This can happen on global regulatory changes or device specific settings | ||
2837 | * based on custom world regulatory domains. | ||
2838 | */ | ||
2839 | void nl80211_send_reg_change_event(struct regulatory_request *request) | ||
2840 | { | ||
2841 | struct sk_buff *msg; | ||
2842 | void *hdr; | ||
2843 | |||
2844 | msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); | ||
2845 | if (!msg) | ||
2846 | return; | ||
2847 | |||
2848 | hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_REG_CHANGE); | ||
2849 | if (!hdr) { | ||
2850 | nlmsg_free(msg); | ||
2851 | return; | ||
2852 | } | ||
2853 | |||
2854 | /* Userspace can always count this one always being set */ | ||
2855 | NLA_PUT_U8(msg, NL80211_ATTR_REG_INITIATOR, request->initiator); | ||
2856 | |||
2857 | if (request->alpha2[0] == '0' && request->alpha2[1] == '0') | ||
2858 | NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE, | ||
2859 | NL80211_REGDOM_TYPE_WORLD); | ||
2860 | else if (request->alpha2[0] == '9' && request->alpha2[1] == '9') | ||
2861 | NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE, | ||
2862 | NL80211_REGDOM_TYPE_CUSTOM_WORLD); | ||
2863 | else if ((request->alpha2[0] == '9' && request->alpha2[1] == '8') || | ||
2864 | request->intersect) | ||
2865 | NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE, | ||
2866 | NL80211_REGDOM_TYPE_INTERSECTION); | ||
2867 | else { | ||
2868 | NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE, | ||
2869 | NL80211_REGDOM_TYPE_COUNTRY); | ||
2870 | NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2, request->alpha2); | ||
2871 | } | ||
2872 | |||
2873 | if (wiphy_idx_valid(request->wiphy_idx)) | ||
2874 | NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx); | ||
2875 | |||
2876 | if (genlmsg_end(msg, hdr) < 0) { | ||
2877 | nlmsg_free(msg); | ||
2878 | return; | ||
2879 | } | ||
2880 | |||
2881 | genlmsg_multicast(msg, 0, nl80211_regulatory_mcgrp.id, GFP_KERNEL); | ||
2882 | |||
2883 | return; | ||
2884 | |||
2885 | nla_put_failure: | ||
2886 | genlmsg_cancel(msg, hdr); | ||
2887 | nlmsg_free(msg); | ||
2888 | } | ||
2889 | |||
2790 | /* initialisation/exit functions */ | 2890 | /* initialisation/exit functions */ |
2791 | 2891 | ||
2792 | int nl80211_init(void) | 2892 | int nl80211_init(void) |
@@ -2811,6 +2911,10 @@ int nl80211_init(void) | |||
2811 | if (err) | 2911 | if (err) |
2812 | goto err_out; | 2912 | goto err_out; |
2813 | 2913 | ||
2914 | err = genl_register_mc_group(&nl80211_fam, &nl80211_regulatory_mcgrp); | ||
2915 | if (err) | ||
2916 | goto err_out; | ||
2917 | |||
2814 | return 0; | 2918 | return 0; |
2815 | err_out: | 2919 | err_out: |
2816 | genl_unregister_family(&nl80211_fam); | 2920 | genl_unregister_family(&nl80211_fam); |
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index b565a5f84e97..e65a3c38c52f 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h | |||
@@ -11,6 +11,7 @@ extern void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, | |||
11 | struct net_device *netdev); | 11 | struct net_device *netdev); |
12 | extern void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, | 12 | extern void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, |
13 | struct net_device *netdev); | 13 | struct net_device *netdev); |
14 | extern void nl80211_send_reg_change_event(struct regulatory_request *request); | ||
14 | #else | 15 | #else |
15 | static inline int nl80211_init(void) | 16 | static inline int nl80211_init(void) |
16 | { | 17 | { |
@@ -27,6 +28,14 @@ static inline void | |||
27 | nl80211_send_scan_done(struct cfg80211_registered_device *rdev, | 28 | nl80211_send_scan_done(struct cfg80211_registered_device *rdev, |
28 | struct net_device *netdev) | 29 | struct net_device *netdev) |
29 | {} | 30 | {} |
31 | static inline void nl80211_send_scan_aborted( | ||
32 | struct cfg80211_registered_device *rdev, | ||
33 | struct net_device *netdev) | ||
34 | {} | ||
35 | static inline void | ||
36 | nl80211_send_reg_change_event(struct regulatory_request *request) | ||
37 | { | ||
38 | } | ||
30 | #endif /* CONFIG_NL80211 */ | 39 | #endif /* CONFIG_NL80211 */ |
31 | 40 | ||
32 | #endif /* __NET_WIRELESS_NL80211_H */ | 41 | #endif /* __NET_WIRELESS_NL80211_H */ |
diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2323644330cd..eb8b8ed16155 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <net/cfg80211.h> | 41 | #include <net/cfg80211.h> |
42 | #include "core.h" | 42 | #include "core.h" |
43 | #include "reg.h" | 43 | #include "reg.h" |
44 | #include "nl80211.h" | ||
44 | 45 | ||
45 | /* Receipt of information from last regulatory request */ | 46 | /* Receipt of information from last regulatory request */ |
46 | static struct regulatory_request *last_request; | 47 | static struct regulatory_request *last_request; |
@@ -54,22 +55,63 @@ static u32 supported_bandwidths[] = { | |||
54 | MHZ_TO_KHZ(20), | 55 | MHZ_TO_KHZ(20), |
55 | }; | 56 | }; |
56 | 57 | ||
57 | /* Central wireless core regulatory domains, we only need two, | 58 | /* |
59 | * Central wireless core regulatory domains, we only need two, | ||
58 | * the current one and a world regulatory domain in case we have no | 60 | * the current one and a world regulatory domain in case we have no |
59 | * information to give us an alpha2 */ | 61 | * information to give us an alpha2 |
62 | */ | ||
60 | const struct ieee80211_regdomain *cfg80211_regdomain; | 63 | const struct ieee80211_regdomain *cfg80211_regdomain; |
61 | 64 | ||
62 | /* We use this as a place for the rd structure built from the | 65 | /* |
66 | * We use this as a place for the rd structure built from the | ||
63 | * last parsed country IE to rest until CRDA gets back to us with | 67 | * last parsed country IE to rest until CRDA gets back to us with |
64 | * what it thinks should apply for the same country */ | 68 | * what it thinks should apply for the same country |
69 | */ | ||
65 | static const struct ieee80211_regdomain *country_ie_regdomain; | 70 | static const struct ieee80211_regdomain *country_ie_regdomain; |
66 | 71 | ||
72 | /* Used to queue up regulatory hints */ | ||
73 | static LIST_HEAD(reg_requests_list); | ||
74 | static spinlock_t reg_requests_lock; | ||
75 | |||
76 | /* Used to queue up beacon hints for review */ | ||
77 | static LIST_HEAD(reg_pending_beacons); | ||
78 | static spinlock_t reg_pending_beacons_lock; | ||
79 | |||
80 | /* Used to keep track of processed beacon hints */ | ||
81 | static LIST_HEAD(reg_beacon_list); | ||
82 | |||
83 | struct reg_beacon { | ||
84 | struct list_head list; | ||
85 | struct ieee80211_channel chan; | ||
86 | }; | ||
87 | |||
67 | /* We keep a static world regulatory domain in case of the absence of CRDA */ | 88 | /* We keep a static world regulatory domain in case of the absence of CRDA */ |
68 | static const struct ieee80211_regdomain world_regdom = { | 89 | static const struct ieee80211_regdomain world_regdom = { |
69 | .n_reg_rules = 1, | 90 | .n_reg_rules = 5, |
70 | .alpha2 = "00", | 91 | .alpha2 = "00", |
71 | .reg_rules = { | 92 | .reg_rules = { |
72 | REG_RULE(2412-10, 2462+10, 40, 6, 20, | 93 | /* IEEE 802.11b/g, channels 1..11 */ |
94 | REG_RULE(2412-10, 2462+10, 40, 6, 20, 0), | ||
95 | /* IEEE 802.11b/g, channels 12..13. No HT40 | ||
96 | * channel fits here. */ | ||
97 | REG_RULE(2467-10, 2472+10, 20, 6, 20, | ||
98 | NL80211_RRF_PASSIVE_SCAN | | ||
99 | NL80211_RRF_NO_IBSS), | ||
100 | /* IEEE 802.11 channel 14 - Only JP enables | ||
101 | * this and for 802.11b only */ | ||
102 | REG_RULE(2484-10, 2484+10, 20, 6, 20, | ||
103 | NL80211_RRF_PASSIVE_SCAN | | ||
104 | NL80211_RRF_NO_IBSS | | ||
105 | NL80211_RRF_NO_OFDM), | ||
106 | /* IEEE 802.11a, channel 36..48 */ | ||
107 | REG_RULE(5180-10, 5240+10, 40, 6, 20, | ||
108 | NL80211_RRF_PASSIVE_SCAN | | ||
109 | NL80211_RRF_NO_IBSS), | ||
110 | |||
111 | /* NB: 5260 MHz - 5700 MHz requies DFS */ | ||
112 | |||
113 | /* IEEE 802.11a, channel 149..165 */ | ||
114 | REG_RULE(5745-10, 5825+10, 40, 6, 20, | ||
73 | NL80211_RRF_PASSIVE_SCAN | | 115 | NL80211_RRF_PASSIVE_SCAN | |
74 | NL80211_RRF_NO_IBSS), | 116 | NL80211_RRF_NO_IBSS), |
75 | } | 117 | } |
@@ -83,9 +125,11 @@ static char *ieee80211_regdom = "US"; | |||
83 | module_param(ieee80211_regdom, charp, 0444); | 125 | module_param(ieee80211_regdom, charp, 0444); |
84 | MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); | 126 | MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); |
85 | 127 | ||
86 | /* We assume 40 MHz bandwidth for the old regulatory work. | 128 | /* |
129 | * We assume 40 MHz bandwidth for the old regulatory work. | ||
87 | * We make emphasis we are using the exact same frequencies | 130 | * We make emphasis we are using the exact same frequencies |
88 | * as before */ | 131 | * as before |
132 | */ | ||
89 | 133 | ||
90 | static const struct ieee80211_regdomain us_regdom = { | 134 | static const struct ieee80211_regdomain us_regdom = { |
91 | .n_reg_rules = 6, | 135 | .n_reg_rules = 6, |
@@ -124,8 +168,10 @@ static const struct ieee80211_regdomain jp_regdom = { | |||
124 | 168 | ||
125 | static const struct ieee80211_regdomain eu_regdom = { | 169 | static const struct ieee80211_regdomain eu_regdom = { |
126 | .n_reg_rules = 6, | 170 | .n_reg_rules = 6, |
127 | /* This alpha2 is bogus, we leave it here just for stupid | 171 | /* |
128 | * backward compatibility */ | 172 | * This alpha2 is bogus, we leave it here just for stupid |
173 | * backward compatibility | ||
174 | */ | ||
129 | .alpha2 = "EU", | 175 | .alpha2 = "EU", |
130 | .reg_rules = { | 176 | .reg_rules = { |
131 | /* IEEE 802.11b/g, channels 1..13 */ | 177 | /* IEEE 802.11b/g, channels 1..13 */ |
@@ -194,8 +240,10 @@ static void reset_regdomains(void) | |||
194 | cfg80211_regdomain = NULL; | 240 | cfg80211_regdomain = NULL; |
195 | } | 241 | } |
196 | 242 | ||
197 | /* Dynamic world regulatory domain requested by the wireless | 243 | /* |
198 | * core upon initialization */ | 244 | * Dynamic world regulatory domain requested by the wireless |
245 | * core upon initialization | ||
246 | */ | ||
199 | static void update_world_regdomain(const struct ieee80211_regdomain *rd) | 247 | static void update_world_regdomain(const struct ieee80211_regdomain *rd) |
200 | { | 248 | { |
201 | BUG_ON(!last_request); | 249 | BUG_ON(!last_request); |
@@ -236,8 +284,10 @@ static bool is_unknown_alpha2(const char *alpha2) | |||
236 | { | 284 | { |
237 | if (!alpha2) | 285 | if (!alpha2) |
238 | return false; | 286 | return false; |
239 | /* Special case where regulatory domain was built by driver | 287 | /* |
240 | * but a specific alpha2 cannot be determined */ | 288 | * Special case where regulatory domain was built by driver |
289 | * but a specific alpha2 cannot be determined | ||
290 | */ | ||
241 | if (alpha2[0] == '9' && alpha2[1] == '9') | 291 | if (alpha2[0] == '9' && alpha2[1] == '9') |
242 | return true; | 292 | return true; |
243 | return false; | 293 | return false; |
@@ -247,9 +297,11 @@ static bool is_intersected_alpha2(const char *alpha2) | |||
247 | { | 297 | { |
248 | if (!alpha2) | 298 | if (!alpha2) |
249 | return false; | 299 | return false; |
250 | /* Special case where regulatory domain is the | 300 | /* |
301 | * Special case where regulatory domain is the | ||
251 | * result of an intersection between two regulatory domain | 302 | * result of an intersection between two regulatory domain |
252 | * structures */ | 303 | * structures |
304 | */ | ||
253 | if (alpha2[0] == '9' && alpha2[1] == '8') | 305 | if (alpha2[0] == '9' && alpha2[1] == '8') |
254 | return true; | 306 | return true; |
255 | return false; | 307 | return false; |
@@ -274,8 +326,10 @@ static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) | |||
274 | return false; | 326 | return false; |
275 | } | 327 | } |
276 | 328 | ||
277 | static bool regdom_changed(const char *alpha2) | 329 | static bool regdom_changes(const char *alpha2) |
278 | { | 330 | { |
331 | assert_cfg80211_lock(); | ||
332 | |||
279 | if (!cfg80211_regdomain) | 333 | if (!cfg80211_regdomain) |
280 | return true; | 334 | return true; |
281 | if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) | 335 | if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) |
@@ -302,8 +356,10 @@ static bool country_ie_integrity_changes(u32 checksum) | |||
302 | return false; | 356 | return false; |
303 | } | 357 | } |
304 | 358 | ||
305 | /* This lets us keep regulatory code which is updated on a regulatory | 359 | /* |
306 | * basis in userspace. */ | 360 | * This lets us keep regulatory code which is updated on a regulatory |
361 | * basis in userspace. | ||
362 | */ | ||
307 | static int call_crda(const char *alpha2) | 363 | static int call_crda(const char *alpha2) |
308 | { | 364 | { |
309 | char country_env[9 + 2] = "COUNTRY="; | 365 | char country_env[9 + 2] = "COUNTRY="; |
@@ -348,7 +404,8 @@ static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) | |||
348 | 404 | ||
349 | freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; | 405 | freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; |
350 | 406 | ||
351 | if (freq_diff <= 0 || freq_range->max_bandwidth_khz > freq_diff) | 407 | if (freq_range->end_freq_khz <= freq_range->start_freq_khz || |
408 | freq_range->max_bandwidth_khz > freq_diff) | ||
352 | return false; | 409 | return false; |
353 | 410 | ||
354 | return true; | 411 | return true; |
@@ -414,10 +471,12 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, | |||
414 | #undef ONE_GHZ_IN_KHZ | 471 | #undef ONE_GHZ_IN_KHZ |
415 | } | 472 | } |
416 | 473 | ||
417 | /* Converts a country IE to a regulatory domain. A regulatory domain | 474 | /* |
475 | * Converts a country IE to a regulatory domain. A regulatory domain | ||
418 | * structure has a lot of information which the IE doesn't yet have, | 476 | * structure has a lot of information which the IE doesn't yet have, |
419 | * so for the other values we use upper max values as we will intersect | 477 | * so for the other values we use upper max values as we will intersect |
420 | * with our userspace regulatory agent to get lower bounds. */ | 478 | * with our userspace regulatory agent to get lower bounds. |
479 | */ | ||
421 | static struct ieee80211_regdomain *country_ie_2_rd( | 480 | static struct ieee80211_regdomain *country_ie_2_rd( |
422 | u8 *country_ie, | 481 | u8 *country_ie, |
423 | u8 country_ie_len, | 482 | u8 country_ie_len, |
@@ -462,9 +521,11 @@ static struct ieee80211_regdomain *country_ie_2_rd( | |||
462 | 521 | ||
463 | *checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8); | 522 | *checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8); |
464 | 523 | ||
465 | /* We need to build a reg rule for each triplet, but first we must | 524 | /* |
525 | * We need to build a reg rule for each triplet, but first we must | ||
466 | * calculate the number of reg rules we will need. We will need one | 526 | * calculate the number of reg rules we will need. We will need one |
467 | * for each channel subband */ | 527 | * for each channel subband |
528 | */ | ||
468 | while (country_ie_len >= 3) { | 529 | while (country_ie_len >= 3) { |
469 | int end_channel = 0; | 530 | int end_channel = 0; |
470 | struct ieee80211_country_ie_triplet *triplet = | 531 | struct ieee80211_country_ie_triplet *triplet = |
@@ -502,9 +563,11 @@ static struct ieee80211_regdomain *country_ie_2_rd( | |||
502 | if (cur_sub_max_channel < cur_channel) | 563 | if (cur_sub_max_channel < cur_channel) |
503 | return NULL; | 564 | return NULL; |
504 | 565 | ||
505 | /* Do not allow overlapping channels. Also channels | 566 | /* |
567 | * Do not allow overlapping channels. Also channels | ||
506 | * passed in each subband must be monotonically | 568 | * passed in each subband must be monotonically |
507 | * increasing */ | 569 | * increasing |
570 | */ | ||
508 | if (last_sub_max_channel) { | 571 | if (last_sub_max_channel) { |
509 | if (cur_channel <= last_sub_max_channel) | 572 | if (cur_channel <= last_sub_max_channel) |
510 | return NULL; | 573 | return NULL; |
@@ -512,10 +575,12 @@ static struct ieee80211_regdomain *country_ie_2_rd( | |||
512 | return NULL; | 575 | return NULL; |
513 | } | 576 | } |
514 | 577 | ||
515 | /* When dot11RegulatoryClassesRequired is supported | 578 | /* |
579 | * When dot11RegulatoryClassesRequired is supported | ||
516 | * we can throw ext triplets as part of this soup, | 580 | * we can throw ext triplets as part of this soup, |
517 | * for now we don't care when those change as we | 581 | * for now we don't care when those change as we |
518 | * don't support them */ | 582 | * don't support them |
583 | */ | ||
519 | *checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) | | 584 | *checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) | |
520 | ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) | | 585 | ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) | |
521 | ((triplet->chans.max_power ^ cur_sub_max_channel) << 24); | 586 | ((triplet->chans.max_power ^ cur_sub_max_channel) << 24); |
@@ -526,8 +591,10 @@ static struct ieee80211_regdomain *country_ie_2_rd( | |||
526 | country_ie_len -= 3; | 591 | country_ie_len -= 3; |
527 | num_rules++; | 592 | num_rules++; |
528 | 593 | ||
529 | /* Note: this is not a IEEE requirement but | 594 | /* |
530 | * simply a memory requirement */ | 595 | * Note: this is not a IEEE requirement but |
596 | * simply a memory requirement | ||
597 | */ | ||
531 | if (num_rules > NL80211_MAX_SUPP_REG_RULES) | 598 | if (num_rules > NL80211_MAX_SUPP_REG_RULES) |
532 | return NULL; | 599 | return NULL; |
533 | } | 600 | } |
@@ -555,8 +622,10 @@ static struct ieee80211_regdomain *country_ie_2_rd( | |||
555 | struct ieee80211_freq_range *freq_range = NULL; | 622 | struct ieee80211_freq_range *freq_range = NULL; |
556 | struct ieee80211_power_rule *power_rule = NULL; | 623 | struct ieee80211_power_rule *power_rule = NULL; |
557 | 624 | ||
558 | /* Must parse if dot11RegulatoryClassesRequired is true, | 625 | /* |
559 | * we don't support this yet */ | 626 | * Must parse if dot11RegulatoryClassesRequired is true, |
627 | * we don't support this yet | ||
628 | */ | ||
560 | if (triplet->ext.reg_extension_id >= | 629 | if (triplet->ext.reg_extension_id >= |
561 | IEEE80211_COUNTRY_EXTENSION_ID) { | 630 | IEEE80211_COUNTRY_EXTENSION_ID) { |
562 | country_ie += 3; | 631 | country_ie += 3; |
@@ -578,10 +647,12 @@ static struct ieee80211_regdomain *country_ie_2_rd( | |||
578 | end_channel = triplet->chans.first_channel + | 647 | end_channel = triplet->chans.first_channel + |
579 | (4 * (triplet->chans.num_channels - 1)); | 648 | (4 * (triplet->chans.num_channels - 1)); |
580 | 649 | ||
581 | /* The +10 is since the regulatory domain expects | 650 | /* |
651 | * The +10 is since the regulatory domain expects | ||
582 | * the actual band edge, not the center of freq for | 652 | * the actual band edge, not the center of freq for |
583 | * its start and end freqs, assuming 20 MHz bandwidth on | 653 | * its start and end freqs, assuming 20 MHz bandwidth on |
584 | * the channels passed */ | 654 | * the channels passed |
655 | */ | ||
585 | freq_range->start_freq_khz = | 656 | freq_range->start_freq_khz = |
586 | MHZ_TO_KHZ(ieee80211_channel_to_frequency( | 657 | MHZ_TO_KHZ(ieee80211_channel_to_frequency( |
587 | triplet->chans.first_channel) - 10); | 658 | triplet->chans.first_channel) - 10); |
@@ -589,9 +660,11 @@ static struct ieee80211_regdomain *country_ie_2_rd( | |||
589 | MHZ_TO_KHZ(ieee80211_channel_to_frequency( | 660 | MHZ_TO_KHZ(ieee80211_channel_to_frequency( |
590 | end_channel) + 10); | 661 | end_channel) + 10); |
591 | 662 | ||
592 | /* Large arbitrary values, we intersect later */ | 663 | /* |
593 | /* Increment this if we ever support >= 40 MHz channels | 664 | * These are large arbitrary values we use to intersect later. |
594 | * in IEEE 802.11 */ | 665 | * Increment this if we ever support >= 40 MHz channels |
666 | * in IEEE 802.11 | ||
667 | */ | ||
595 | freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40); | 668 | freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40); |
596 | power_rule->max_antenna_gain = DBI_TO_MBI(100); | 669 | power_rule->max_antenna_gain = DBI_TO_MBI(100); |
597 | power_rule->max_eirp = DBM_TO_MBM(100); | 670 | power_rule->max_eirp = DBM_TO_MBM(100); |
@@ -607,8 +680,10 @@ static struct ieee80211_regdomain *country_ie_2_rd( | |||
607 | } | 680 | } |
608 | 681 | ||
609 | 682 | ||
610 | /* Helper for regdom_intersect(), this does the real | 683 | /* |
611 | * mathematical intersection fun */ | 684 | * Helper for regdom_intersect(), this does the real |
685 | * mathematical intersection fun | ||
686 | */ | ||
612 | static int reg_rules_intersect( | 687 | static int reg_rules_intersect( |
613 | const struct ieee80211_reg_rule *rule1, | 688 | const struct ieee80211_reg_rule *rule1, |
614 | const struct ieee80211_reg_rule *rule2, | 689 | const struct ieee80211_reg_rule *rule2, |
@@ -686,11 +761,13 @@ static struct ieee80211_regdomain *regdom_intersect( | |||
686 | if (!rd1 || !rd2) | 761 | if (!rd1 || !rd2) |
687 | return NULL; | 762 | return NULL; |
688 | 763 | ||
689 | /* First we get a count of the rules we'll need, then we actually | 764 | /* |
765 | * First we get a count of the rules we'll need, then we actually | ||
690 | * build them. This is to so we can malloc() and free() a | 766 | * build them. This is to so we can malloc() and free() a |
691 | * regdomain once. The reason we use reg_rules_intersect() here | 767 | * regdomain once. The reason we use reg_rules_intersect() here |
692 | * is it will return -EINVAL if the rule computed makes no sense. | 768 | * is it will return -EINVAL if the rule computed makes no sense. |
693 | * All rules that do check out OK are valid. */ | 769 | * All rules that do check out OK are valid. |
770 | */ | ||
694 | 771 | ||
695 | for (x = 0; x < rd1->n_reg_rules; x++) { | 772 | for (x = 0; x < rd1->n_reg_rules; x++) { |
696 | rule1 = &rd1->reg_rules[x]; | 773 | rule1 = &rd1->reg_rules[x]; |
@@ -718,14 +795,18 @@ static struct ieee80211_regdomain *regdom_intersect( | |||
718 | rule1 = &rd1->reg_rules[x]; | 795 | rule1 = &rd1->reg_rules[x]; |
719 | for (y = 0; y < rd2->n_reg_rules; y++) { | 796 | for (y = 0; y < rd2->n_reg_rules; y++) { |
720 | rule2 = &rd2->reg_rules[y]; | 797 | rule2 = &rd2->reg_rules[y]; |
721 | /* This time around instead of using the stack lets | 798 | /* |
799 | * This time around instead of using the stack lets | ||
722 | * write to the target rule directly saving ourselves | 800 | * write to the target rule directly saving ourselves |
723 | * a memcpy() */ | 801 | * a memcpy() |
802 | */ | ||
724 | intersected_rule = &rd->reg_rules[rule_idx]; | 803 | intersected_rule = &rd->reg_rules[rule_idx]; |
725 | r = reg_rules_intersect(rule1, rule2, | 804 | r = reg_rules_intersect(rule1, rule2, |
726 | intersected_rule); | 805 | intersected_rule); |
727 | /* No need to memset here the intersected rule here as | 806 | /* |
728 | * we're not using the stack anymore */ | 807 | * No need to memset here the intersected rule here as |
808 | * we're not using the stack anymore | ||
809 | */ | ||
729 | if (r) | 810 | if (r) |
730 | continue; | 811 | continue; |
731 | rule_idx++; | 812 | rule_idx++; |
@@ -744,8 +825,10 @@ static struct ieee80211_regdomain *regdom_intersect( | |||
744 | return rd; | 825 | return rd; |
745 | } | 826 | } |
746 | 827 | ||
747 | /* XXX: add support for the rest of enum nl80211_reg_rule_flags, we may | 828 | /* |
748 | * want to just have the channel structure use these */ | 829 | * XXX: add support for the rest of enum nl80211_reg_rule_flags, we may |
830 | * want to just have the channel structure use these | ||
831 | */ | ||
749 | static u32 map_regdom_flags(u32 rd_flags) | 832 | static u32 map_regdom_flags(u32 rd_flags) |
750 | { | 833 | { |
751 | u32 channel_flags = 0; | 834 | u32 channel_flags = 0; |
@@ -771,10 +854,12 @@ static int freq_reg_info_regd(struct wiphy *wiphy, | |||
771 | 854 | ||
772 | regd = custom_regd ? custom_regd : cfg80211_regdomain; | 855 | regd = custom_regd ? custom_regd : cfg80211_regdomain; |
773 | 856 | ||
774 | /* Follow the driver's regulatory domain, if present, unless a country | 857 | /* |
775 | * IE has been processed or a user wants to help complaince further */ | 858 | * Follow the driver's regulatory domain, if present, unless a country |
776 | if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE && | 859 | * IE has been processed or a user wants to help complaince further |
777 | last_request->initiator != REGDOM_SET_BY_USER && | 860 | */ |
861 | if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && | ||
862 | last_request->initiator != NL80211_REGDOM_SET_BY_USER && | ||
778 | wiphy->regd) | 863 | wiphy->regd) |
779 | regd = wiphy->regd; | 864 | regd = wiphy->regd; |
780 | 865 | ||
@@ -790,9 +875,11 @@ static int freq_reg_info_regd(struct wiphy *wiphy, | |||
790 | fr = &rr->freq_range; | 875 | fr = &rr->freq_range; |
791 | pr = &rr->power_rule; | 876 | pr = &rr->power_rule; |
792 | 877 | ||
793 | /* We only need to know if one frequency rule was | 878 | /* |
879 | * We only need to know if one frequency rule was | ||
794 | * was in center_freq's band, that's enough, so lets | 880 | * was in center_freq's band, that's enough, so lets |
795 | * not overwrite it once found */ | 881 | * not overwrite it once found |
882 | */ | ||
796 | if (!band_rule_found) | 883 | if (!band_rule_found) |
797 | band_rule_found = freq_in_rule_band(fr, center_freq); | 884 | band_rule_found = freq_in_rule_band(fr, center_freq); |
798 | 885 | ||
@@ -829,6 +916,11 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, | |||
829 | const struct ieee80211_power_rule *power_rule = NULL; | 916 | const struct ieee80211_power_rule *power_rule = NULL; |
830 | struct ieee80211_supported_band *sband; | 917 | struct ieee80211_supported_band *sband; |
831 | struct ieee80211_channel *chan; | 918 | struct ieee80211_channel *chan; |
919 | struct wiphy *request_wiphy = NULL; | ||
920 | |||
921 | assert_cfg80211_lock(); | ||
922 | |||
923 | request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); | ||
832 | 924 | ||
833 | sband = wiphy->bands[band]; | 925 | sband = wiphy->bands[band]; |
834 | BUG_ON(chan_idx >= sband->n_channels); | 926 | BUG_ON(chan_idx >= sband->n_channels); |
@@ -840,7 +932,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, | |||
840 | &max_bandwidth, ®_rule); | 932 | &max_bandwidth, ®_rule); |
841 | 933 | ||
842 | if (r) { | 934 | if (r) { |
843 | /* This means no regulatory rule was found in the country IE | 935 | /* |
936 | * This means no regulatory rule was found in the country IE | ||
844 | * with a frequency range on the center_freq's band, since | 937 | * with a frequency range on the center_freq's band, since |
845 | * IEEE-802.11 allows for a country IE to have a subset of the | 938 | * IEEE-802.11 allows for a country IE to have a subset of the |
846 | * regulatory information provided in a country we ignore | 939 | * regulatory information provided in a country we ignore |
@@ -851,7 +944,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, | |||
851 | * http://tinyurl.com/11d-clarification | 944 | * http://tinyurl.com/11d-clarification |
852 | */ | 945 | */ |
853 | if (r == -ERANGE && | 946 | if (r == -ERANGE && |
854 | last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { | 947 | last_request->initiator == |
948 | NL80211_REGDOM_SET_BY_COUNTRY_IE) { | ||
855 | #ifdef CONFIG_CFG80211_REG_DEBUG | 949 | #ifdef CONFIG_CFG80211_REG_DEBUG |
856 | printk(KERN_DEBUG "cfg80211: Leaving channel %d MHz " | 950 | printk(KERN_DEBUG "cfg80211: Leaving channel %d MHz " |
857 | "intact on %s - no rule found in band on " | 951 | "intact on %s - no rule found in band on " |
@@ -859,10 +953,13 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, | |||
859 | chan->center_freq, wiphy_name(wiphy)); | 953 | chan->center_freq, wiphy_name(wiphy)); |
860 | #endif | 954 | #endif |
861 | } else { | 955 | } else { |
862 | /* In this case we know the country IE has at least one reg rule | 956 | /* |
863 | * for the band so we respect its band definitions */ | 957 | * In this case we know the country IE has at least one reg rule |
958 | * for the band so we respect its band definitions | ||
959 | */ | ||
864 | #ifdef CONFIG_CFG80211_REG_DEBUG | 960 | #ifdef CONFIG_CFG80211_REG_DEBUG |
865 | if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) | 961 | if (last_request->initiator == |
962 | NL80211_REGDOM_SET_BY_COUNTRY_IE) | ||
866 | printk(KERN_DEBUG "cfg80211: Disabling " | 963 | printk(KERN_DEBUG "cfg80211: Disabling " |
867 | "channel %d MHz on %s due to " | 964 | "channel %d MHz on %s due to " |
868 | "Country IE\n", | 965 | "Country IE\n", |
@@ -876,12 +973,14 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, | |||
876 | 973 | ||
877 | power_rule = ®_rule->power_rule; | 974 | power_rule = ®_rule->power_rule; |
878 | 975 | ||
879 | if (last_request->initiator == REGDOM_SET_BY_DRIVER && | 976 | if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && |
880 | last_request->wiphy && last_request->wiphy == wiphy && | 977 | request_wiphy && request_wiphy == wiphy && |
881 | last_request->wiphy->strict_regulatory) { | 978 | request_wiphy->strict_regulatory) { |
882 | /* This gaurantees the driver's requested regulatory domain | 979 | /* |
980 | * This gaurantees the driver's requested regulatory domain | ||
883 | * will always be used as a base for further regulatory | 981 | * will always be used as a base for further regulatory |
884 | * settings */ | 982 | * settings |
983 | */ | ||
885 | chan->flags = chan->orig_flags = | 984 | chan->flags = chan->orig_flags = |
886 | map_regdom_flags(reg_rule->flags); | 985 | map_regdom_flags(reg_rule->flags); |
887 | chan->max_antenna_gain = chan->orig_mag = | 986 | chan->max_antenna_gain = chan->orig_mag = |
@@ -915,39 +1014,147 @@ static void handle_band(struct wiphy *wiphy, enum ieee80211_band band) | |||
915 | handle_channel(wiphy, band, i); | 1014 | handle_channel(wiphy, band, i); |
916 | } | 1015 | } |
917 | 1016 | ||
918 | static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby) | 1017 | static bool ignore_reg_update(struct wiphy *wiphy, |
1018 | enum nl80211_reg_initiator initiator) | ||
919 | { | 1019 | { |
920 | if (!last_request) | 1020 | if (!last_request) |
921 | return true; | 1021 | return true; |
922 | if (setby == REGDOM_SET_BY_CORE && | 1022 | if (initiator == NL80211_REGDOM_SET_BY_CORE && |
923 | wiphy->custom_regulatory) | 1023 | wiphy->custom_regulatory) |
924 | return true; | 1024 | return true; |
925 | /* wiphy->regd will be set once the device has its own | 1025 | /* |
926 | * desired regulatory domain set */ | 1026 | * wiphy->regd will be set once the device has its own |
1027 | * desired regulatory domain set | ||
1028 | */ | ||
927 | if (wiphy->strict_regulatory && !wiphy->regd && | 1029 | if (wiphy->strict_regulatory && !wiphy->regd && |
928 | !is_world_regdom(last_request->alpha2)) | 1030 | !is_world_regdom(last_request->alpha2)) |
929 | return true; | 1031 | return true; |
930 | return false; | 1032 | return false; |
931 | } | 1033 | } |
932 | 1034 | ||
933 | static void update_all_wiphy_regulatory(enum reg_set_by setby) | 1035 | static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) |
934 | { | 1036 | { |
935 | struct cfg80211_registered_device *drv; | 1037 | struct cfg80211_registered_device *drv; |
936 | 1038 | ||
937 | list_for_each_entry(drv, &cfg80211_drv_list, list) | 1039 | list_for_each_entry(drv, &cfg80211_drv_list, list) |
938 | wiphy_update_regulatory(&drv->wiphy, setby); | 1040 | wiphy_update_regulatory(&drv->wiphy, initiator); |
939 | } | 1041 | } |
940 | 1042 | ||
941 | void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) | 1043 | static void handle_reg_beacon(struct wiphy *wiphy, |
1044 | unsigned int chan_idx, | ||
1045 | struct reg_beacon *reg_beacon) | ||
942 | { | 1046 | { |
943 | enum ieee80211_band band; | 1047 | #ifdef CONFIG_CFG80211_REG_DEBUG |
1048 | #define REG_DEBUG_BEACON_FLAG(desc) \ | ||
1049 | printk(KERN_DEBUG "cfg80211: Enabling " desc " on " \ | ||
1050 | "frequency: %d MHz (Ch %d) on %s\n", \ | ||
1051 | reg_beacon->chan.center_freq, \ | ||
1052 | ieee80211_frequency_to_channel(reg_beacon->chan.center_freq), \ | ||
1053 | wiphy_name(wiphy)); | ||
1054 | #else | ||
1055 | #define REG_DEBUG_BEACON_FLAG(desc) do {} while (0) | ||
1056 | #endif | ||
1057 | struct ieee80211_supported_band *sband; | ||
1058 | struct ieee80211_channel *chan; | ||
1059 | |||
1060 | assert_cfg80211_lock(); | ||
1061 | |||
1062 | sband = wiphy->bands[reg_beacon->chan.band]; | ||
1063 | chan = &sband->channels[chan_idx]; | ||
1064 | |||
1065 | if (likely(chan->center_freq != reg_beacon->chan.center_freq)) | ||
1066 | return; | ||
1067 | |||
1068 | if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) { | ||
1069 | chan->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN; | ||
1070 | REG_DEBUG_BEACON_FLAG("active scanning"); | ||
1071 | } | ||
1072 | |||
1073 | if (chan->flags & IEEE80211_CHAN_NO_IBSS) { | ||
1074 | chan->flags &= ~IEEE80211_CHAN_NO_IBSS; | ||
1075 | REG_DEBUG_BEACON_FLAG("beaconing"); | ||
1076 | } | ||
1077 | |||
1078 | chan->beacon_found = true; | ||
1079 | #undef REG_DEBUG_BEACON_FLAG | ||
1080 | } | ||
1081 | |||
1082 | /* | ||
1083 | * Called when a scan on a wiphy finds a beacon on | ||
1084 | * new channel | ||
1085 | */ | ||
1086 | static void wiphy_update_new_beacon(struct wiphy *wiphy, | ||
1087 | struct reg_beacon *reg_beacon) | ||
1088 | { | ||
1089 | unsigned int i; | ||
1090 | struct ieee80211_supported_band *sband; | ||
1091 | |||
1092 | assert_cfg80211_lock(); | ||
944 | 1093 | ||
945 | if (ignore_reg_update(wiphy, setby)) | 1094 | if (!wiphy->bands[reg_beacon->chan.band]) |
946 | return; | 1095 | return; |
1096 | |||
1097 | sband = wiphy->bands[reg_beacon->chan.band]; | ||
1098 | |||
1099 | for (i = 0; i < sband->n_channels; i++) | ||
1100 | handle_reg_beacon(wiphy, i, reg_beacon); | ||
1101 | } | ||
1102 | |||
1103 | /* | ||
1104 | * Called upon reg changes or a new wiphy is added | ||
1105 | */ | ||
1106 | static void wiphy_update_beacon_reg(struct wiphy *wiphy) | ||
1107 | { | ||
1108 | unsigned int i; | ||
1109 | struct ieee80211_supported_band *sband; | ||
1110 | struct reg_beacon *reg_beacon; | ||
1111 | |||
1112 | assert_cfg80211_lock(); | ||
1113 | |||
1114 | if (list_empty(®_beacon_list)) | ||
1115 | return; | ||
1116 | |||
1117 | list_for_each_entry(reg_beacon, ®_beacon_list, list) { | ||
1118 | if (!wiphy->bands[reg_beacon->chan.band]) | ||
1119 | continue; | ||
1120 | sband = wiphy->bands[reg_beacon->chan.band]; | ||
1121 | for (i = 0; i < sband->n_channels; i++) | ||
1122 | handle_reg_beacon(wiphy, i, reg_beacon); | ||
1123 | } | ||
1124 | } | ||
1125 | |||
1126 | static bool reg_is_world_roaming(struct wiphy *wiphy) | ||
1127 | { | ||
1128 | if (is_world_regdom(cfg80211_regdomain->alpha2) || | ||
1129 | (wiphy->regd && is_world_regdom(wiphy->regd->alpha2))) | ||
1130 | return true; | ||
1131 | if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && | ||
1132 | wiphy->custom_regulatory) | ||
1133 | return true; | ||
1134 | return false; | ||
1135 | } | ||
1136 | |||
1137 | /* Reap the advantages of previously found beacons */ | ||
1138 | static void reg_process_beacons(struct wiphy *wiphy) | ||
1139 | { | ||
1140 | if (!reg_is_world_roaming(wiphy)) | ||
1141 | return; | ||
1142 | wiphy_update_beacon_reg(wiphy); | ||
1143 | } | ||
1144 | |||
1145 | void wiphy_update_regulatory(struct wiphy *wiphy, | ||
1146 | enum nl80211_reg_initiator initiator) | ||
1147 | { | ||
1148 | enum ieee80211_band band; | ||
1149 | |||
1150 | if (ignore_reg_update(wiphy, initiator)) | ||
1151 | goto out; | ||
947 | for (band = 0; band < IEEE80211_NUM_BANDS; band++) { | 1152 | for (band = 0; band < IEEE80211_NUM_BANDS; band++) { |
948 | if (wiphy->bands[band]) | 1153 | if (wiphy->bands[band]) |
949 | handle_band(wiphy, band); | 1154 | handle_band(wiphy, band); |
950 | } | 1155 | } |
1156 | out: | ||
1157 | reg_process_beacons(wiphy); | ||
951 | if (wiphy->reg_notifier) | 1158 | if (wiphy->reg_notifier) |
952 | wiphy->reg_notifier(wiphy, last_request); | 1159 | wiphy->reg_notifier(wiphy, last_request); |
953 | } | 1160 | } |
@@ -1033,81 +1240,98 @@ static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd, | |||
1033 | return 0; | 1240 | return 0; |
1034 | } | 1241 | } |
1035 | 1242 | ||
1036 | /* Return value which can be used by ignore_request() to indicate | 1243 | /* |
1037 | * it has been determined we should intersect two regulatory domains */ | 1244 | * Return value which can be used by ignore_request() to indicate |
1245 | * it has been determined we should intersect two regulatory domains | ||
1246 | */ | ||
1038 | #define REG_INTERSECT 1 | 1247 | #define REG_INTERSECT 1 |
1039 | 1248 | ||
1040 | /* This has the logic which determines when a new request | 1249 | /* This has the logic which determines when a new request |
1041 | * should be ignored. */ | 1250 | * should be ignored. */ |
1042 | static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, | 1251 | static int ignore_request(struct wiphy *wiphy, |
1043 | const char *alpha2) | 1252 | struct regulatory_request *pending_request) |
1044 | { | 1253 | { |
1254 | struct wiphy *last_wiphy = NULL; | ||
1255 | |||
1256 | assert_cfg80211_lock(); | ||
1257 | |||
1045 | /* All initial requests are respected */ | 1258 | /* All initial requests are respected */ |
1046 | if (!last_request) | 1259 | if (!last_request) |
1047 | return 0; | 1260 | return 0; |
1048 | 1261 | ||
1049 | switch (set_by) { | 1262 | switch (pending_request->initiator) { |
1050 | case REGDOM_SET_BY_INIT: | 1263 | case NL80211_REGDOM_SET_BY_CORE: |
1051 | return -EINVAL; | 1264 | return -EINVAL; |
1052 | case REGDOM_SET_BY_CORE: | 1265 | case NL80211_REGDOM_SET_BY_COUNTRY_IE: |
1053 | /* | 1266 | |
1054 | * Always respect new wireless core hints, should only happen | 1267 | last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); |
1055 | * when updating the world regulatory domain at init. | 1268 | |
1056 | */ | 1269 | if (unlikely(!is_an_alpha2(pending_request->alpha2))) |
1057 | return 0; | ||
1058 | case REGDOM_SET_BY_COUNTRY_IE: | ||
1059 | if (unlikely(!is_an_alpha2(alpha2))) | ||
1060 | return -EINVAL; | 1270 | return -EINVAL; |
1061 | if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { | 1271 | if (last_request->initiator == |
1062 | if (last_request->wiphy != wiphy) { | 1272 | NL80211_REGDOM_SET_BY_COUNTRY_IE) { |
1273 | if (last_wiphy != wiphy) { | ||
1063 | /* | 1274 | /* |
1064 | * Two cards with two APs claiming different | 1275 | * Two cards with two APs claiming different |
1065 | * different Country IE alpha2s. We could | 1276 | * different Country IE alpha2s. We could |
1066 | * intersect them, but that seems unlikely | 1277 | * intersect them, but that seems unlikely |
1067 | * to be correct. Reject second one for now. | 1278 | * to be correct. Reject second one for now. |
1068 | */ | 1279 | */ |
1069 | if (!alpha2_equal(alpha2, | 1280 | if (regdom_changes(pending_request->alpha2)) |
1070 | cfg80211_regdomain->alpha2)) | ||
1071 | return -EOPNOTSUPP; | 1281 | return -EOPNOTSUPP; |
1072 | return -EALREADY; | 1282 | return -EALREADY; |
1073 | } | 1283 | } |
1074 | /* Two consecutive Country IE hints on the same wiphy. | 1284 | /* |
1075 | * This should be picked up early by the driver/stack */ | 1285 | * Two consecutive Country IE hints on the same wiphy. |
1076 | if (WARN_ON(!alpha2_equal(cfg80211_regdomain->alpha2, | 1286 | * This should be picked up early by the driver/stack |
1077 | alpha2))) | 1287 | */ |
1288 | if (WARN_ON(regdom_changes(pending_request->alpha2))) | ||
1078 | return 0; | 1289 | return 0; |
1079 | return -EALREADY; | 1290 | return -EALREADY; |
1080 | } | 1291 | } |
1081 | return REG_INTERSECT; | 1292 | return REG_INTERSECT; |
1082 | case REGDOM_SET_BY_DRIVER: | 1293 | case NL80211_REGDOM_SET_BY_DRIVER: |
1083 | if (last_request->initiator == REGDOM_SET_BY_CORE) { | 1294 | if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE) { |
1084 | if (is_old_static_regdom(cfg80211_regdomain)) | 1295 | if (is_old_static_regdom(cfg80211_regdomain)) |
1085 | return 0; | 1296 | return 0; |
1086 | if (!alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) | 1297 | if (regdom_changes(pending_request->alpha2)) |
1087 | return 0; | 1298 | return 0; |
1088 | return -EALREADY; | 1299 | return -EALREADY; |
1089 | } | 1300 | } |
1301 | |||
1302 | /* | ||
1303 | * This would happen if you unplug and plug your card | ||
1304 | * back in or if you add a new device for which the previously | ||
1305 | * loaded card also agrees on the regulatory domain. | ||
1306 | */ | ||
1307 | if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && | ||
1308 | !regdom_changes(pending_request->alpha2)) | ||
1309 | return -EALREADY; | ||
1310 | |||
1090 | return REG_INTERSECT; | 1311 | return REG_INTERSECT; |
1091 | case REGDOM_SET_BY_USER: | 1312 | case NL80211_REGDOM_SET_BY_USER: |
1092 | if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) | 1313 | if (last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) |
1093 | return REG_INTERSECT; | 1314 | return REG_INTERSECT; |
1094 | /* If the user knows better the user should set the regdom | 1315 | /* |
1095 | * to their country before the IE is picked up */ | 1316 | * If the user knows better the user should set the regdom |
1096 | if (last_request->initiator == REGDOM_SET_BY_USER && | 1317 | * to their country before the IE is picked up |
1318 | */ | ||
1319 | if (last_request->initiator == NL80211_REGDOM_SET_BY_USER && | ||
1097 | last_request->intersect) | 1320 | last_request->intersect) |
1098 | return -EOPNOTSUPP; | 1321 | return -EOPNOTSUPP; |
1099 | /* Process user requests only after previous user/driver/core | 1322 | /* |
1100 | * requests have been processed */ | 1323 | * Process user requests only after previous user/driver/core |
1101 | if (last_request->initiator == REGDOM_SET_BY_CORE || | 1324 | * requests have been processed |
1102 | last_request->initiator == REGDOM_SET_BY_DRIVER || | 1325 | */ |
1103 | last_request->initiator == REGDOM_SET_BY_USER) { | 1326 | if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE || |
1104 | if (!alpha2_equal(last_request->alpha2, | 1327 | last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER || |
1105 | cfg80211_regdomain->alpha2)) | 1328 | last_request->initiator == NL80211_REGDOM_SET_BY_USER) { |
1329 | if (regdom_changes(last_request->alpha2)) | ||
1106 | return -EAGAIN; | 1330 | return -EAGAIN; |
1107 | } | 1331 | } |
1108 | 1332 | ||
1109 | if (!is_old_static_regdom(cfg80211_regdomain) && | 1333 | if (!is_old_static_regdom(cfg80211_regdomain) && |
1110 | alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) | 1334 | !regdom_changes(pending_request->alpha2)) |
1111 | return -EALREADY; | 1335 | return -EALREADY; |
1112 | 1336 | ||
1113 | return 0; | 1337 | return 0; |
@@ -1116,59 +1340,80 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, | |||
1116 | return -EINVAL; | 1340 | return -EINVAL; |
1117 | } | 1341 | } |
1118 | 1342 | ||
1119 | /* Caller must hold &cfg80211_drv_mutex */ | 1343 | /** |
1120 | int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, | 1344 | * __regulatory_hint - hint to the wireless core a regulatory domain |
1121 | const char *alpha2, | 1345 | * @wiphy: if the hint comes from country information from an AP, this |
1122 | u32 country_ie_checksum, | 1346 | * is required to be set to the wiphy that received the information |
1123 | enum environment_cap env) | 1347 | * @pending_request: the regulatory request currently being processed |
1348 | * | ||
1349 | * The Wireless subsystem can use this function to hint to the wireless core | ||
1350 | * what it believes should be the current regulatory domain. | ||
1351 | * | ||
1352 | * Returns zero if all went fine, %-EALREADY if a regulatory domain had | ||
1353 | * already been set or other standard error codes. | ||
1354 | * | ||
1355 | * Caller must hold &cfg80211_mutex | ||
1356 | */ | ||
1357 | static int __regulatory_hint(struct wiphy *wiphy, | ||
1358 | struct regulatory_request *pending_request) | ||
1124 | { | 1359 | { |
1125 | struct regulatory_request *request; | ||
1126 | bool intersect = false; | 1360 | bool intersect = false; |
1127 | int r = 0; | 1361 | int r = 0; |
1128 | 1362 | ||
1129 | r = ignore_request(wiphy, set_by, alpha2); | 1363 | assert_cfg80211_lock(); |
1364 | |||
1365 | r = ignore_request(wiphy, pending_request); | ||
1130 | 1366 | ||
1131 | if (r == REG_INTERSECT) { | 1367 | if (r == REG_INTERSECT) { |
1132 | if (set_by == REGDOM_SET_BY_DRIVER) { | 1368 | if (pending_request->initiator == |
1369 | NL80211_REGDOM_SET_BY_DRIVER) { | ||
1133 | r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); | 1370 | r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); |
1134 | if (r) | 1371 | if (r) { |
1372 | kfree(pending_request); | ||
1135 | return r; | 1373 | return r; |
1374 | } | ||
1136 | } | 1375 | } |
1137 | intersect = true; | 1376 | intersect = true; |
1138 | } else if (r) { | 1377 | } else if (r) { |
1139 | /* If the regulatory domain being requested by the | 1378 | /* |
1379 | * If the regulatory domain being requested by the | ||
1140 | * driver has already been set just copy it to the | 1380 | * driver has already been set just copy it to the |
1141 | * wiphy */ | 1381 | * wiphy |
1142 | if (r == -EALREADY && set_by == REGDOM_SET_BY_DRIVER) { | 1382 | */ |
1383 | if (r == -EALREADY && | ||
1384 | pending_request->initiator == | ||
1385 | NL80211_REGDOM_SET_BY_DRIVER) { | ||
1143 | r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); | 1386 | r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); |
1144 | if (r) | 1387 | if (r) { |
1388 | kfree(pending_request); | ||
1145 | return r; | 1389 | return r; |
1390 | } | ||
1146 | r = -EALREADY; | 1391 | r = -EALREADY; |
1147 | goto new_request; | 1392 | goto new_request; |
1148 | } | 1393 | } |
1394 | kfree(pending_request); | ||
1149 | return r; | 1395 | return r; |
1150 | } | 1396 | } |
1151 | 1397 | ||
1152 | new_request: | 1398 | new_request: |
1153 | request = kzalloc(sizeof(struct regulatory_request), | 1399 | kfree(last_request); |
1154 | GFP_KERNEL); | ||
1155 | if (!request) | ||
1156 | return -ENOMEM; | ||
1157 | 1400 | ||
1158 | request->alpha2[0] = alpha2[0]; | 1401 | last_request = pending_request; |
1159 | request->alpha2[1] = alpha2[1]; | 1402 | last_request->intersect = intersect; |
1160 | request->initiator = set_by; | ||
1161 | request->wiphy = wiphy; | ||
1162 | request->intersect = intersect; | ||
1163 | request->country_ie_checksum = country_ie_checksum; | ||
1164 | request->country_ie_env = env; | ||
1165 | 1403 | ||
1166 | kfree(last_request); | 1404 | pending_request = NULL; |
1167 | last_request = request; | ||
1168 | 1405 | ||
1169 | /* When r == REG_INTERSECT we do need to call CRDA */ | 1406 | /* When r == REG_INTERSECT we do need to call CRDA */ |
1170 | if (r < 0) | 1407 | if (r < 0) { |
1408 | /* | ||
1409 | * Since CRDA will not be called in this case as we already | ||
1410 | * have applied the requested regulatory domain before we just | ||
1411 | * inform userspace we have processed the request | ||
1412 | */ | ||
1413 | if (r == -EALREADY) | ||
1414 | nl80211_send_reg_change_event(last_request); | ||
1171 | return r; | 1415 | return r; |
1416 | } | ||
1172 | 1417 | ||
1173 | /* | 1418 | /* |
1174 | * Note: When CONFIG_WIRELESS_OLD_REGULATORY is enabled | 1419 | * Note: When CONFIG_WIRELESS_OLD_REGULATORY is enabled |
@@ -1180,34 +1425,194 @@ new_request: | |||
1180 | * | 1425 | * |
1181 | * to intersect with the static rd | 1426 | * to intersect with the static rd |
1182 | */ | 1427 | */ |
1183 | return call_crda(alpha2); | 1428 | return call_crda(last_request->alpha2); |
1184 | } | 1429 | } |
1185 | 1430 | ||
1186 | void regulatory_hint(struct wiphy *wiphy, const char *alpha2) | 1431 | /* This currently only processes user and driver regulatory hints */ |
1432 | static void reg_process_hint(struct regulatory_request *reg_request) | ||
1187 | { | 1433 | { |
1188 | int r; | 1434 | int r = 0; |
1189 | BUG_ON(!alpha2); | 1435 | struct wiphy *wiphy = NULL; |
1436 | |||
1437 | BUG_ON(!reg_request->alpha2); | ||
1438 | |||
1439 | mutex_lock(&cfg80211_mutex); | ||
1440 | |||
1441 | if (wiphy_idx_valid(reg_request->wiphy_idx)) | ||
1442 | wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); | ||
1443 | |||
1444 | if (reg_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && | ||
1445 | !wiphy) { | ||
1446 | kfree(reg_request); | ||
1447 | goto out; | ||
1448 | } | ||
1190 | 1449 | ||
1191 | mutex_lock(&cfg80211_drv_mutex); | 1450 | r = __regulatory_hint(wiphy, reg_request); |
1192 | r = __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, | ||
1193 | alpha2, 0, ENVIRON_ANY); | ||
1194 | /* This is required so that the orig_* parameters are saved */ | 1451 | /* This is required so that the orig_* parameters are saved */ |
1195 | if (r == -EALREADY && wiphy->strict_regulatory) | 1452 | if (r == -EALREADY && wiphy && wiphy->strict_regulatory) |
1196 | wiphy_update_regulatory(wiphy, REGDOM_SET_BY_DRIVER); | 1453 | wiphy_update_regulatory(wiphy, reg_request->initiator); |
1197 | mutex_unlock(&cfg80211_drv_mutex); | 1454 | out: |
1455 | mutex_unlock(&cfg80211_mutex); | ||
1456 | } | ||
1457 | |||
1458 | /* Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* */ | ||
1459 | static void reg_process_pending_hints(void) | ||
1460 | { | ||
1461 | struct regulatory_request *reg_request; | ||
1462 | |||
1463 | spin_lock(®_requests_lock); | ||
1464 | while (!list_empty(®_requests_list)) { | ||
1465 | reg_request = list_first_entry(®_requests_list, | ||
1466 | struct regulatory_request, | ||
1467 | list); | ||
1468 | list_del_init(®_request->list); | ||
1469 | |||
1470 | spin_unlock(®_requests_lock); | ||
1471 | reg_process_hint(reg_request); | ||
1472 | spin_lock(®_requests_lock); | ||
1473 | } | ||
1474 | spin_unlock(®_requests_lock); | ||
1475 | } | ||
1476 | |||
1477 | /* Processes beacon hints -- this has nothing to do with country IEs */ | ||
1478 | static void reg_process_pending_beacon_hints(void) | ||
1479 | { | ||
1480 | struct cfg80211_registered_device *drv; | ||
1481 | struct reg_beacon *pending_beacon, *tmp; | ||
1482 | |||
1483 | mutex_lock(&cfg80211_mutex); | ||
1484 | |||
1485 | /* This goes through the _pending_ beacon list */ | ||
1486 | spin_lock_bh(®_pending_beacons_lock); | ||
1487 | |||
1488 | if (list_empty(®_pending_beacons)) { | ||
1489 | spin_unlock_bh(®_pending_beacons_lock); | ||
1490 | goto out; | ||
1491 | } | ||
1492 | |||
1493 | list_for_each_entry_safe(pending_beacon, tmp, | ||
1494 | ®_pending_beacons, list) { | ||
1495 | |||
1496 | list_del_init(&pending_beacon->list); | ||
1497 | |||
1498 | /* Applies the beacon hint to current wiphys */ | ||
1499 | list_for_each_entry(drv, &cfg80211_drv_list, list) | ||
1500 | wiphy_update_new_beacon(&drv->wiphy, pending_beacon); | ||
1501 | |||
1502 | /* Remembers the beacon hint for new wiphys or reg changes */ | ||
1503 | list_add_tail(&pending_beacon->list, ®_beacon_list); | ||
1504 | } | ||
1505 | |||
1506 | spin_unlock_bh(®_pending_beacons_lock); | ||
1507 | out: | ||
1508 | mutex_unlock(&cfg80211_mutex); | ||
1509 | } | ||
1510 | |||
1511 | static void reg_todo(struct work_struct *work) | ||
1512 | { | ||
1513 | reg_process_pending_hints(); | ||
1514 | reg_process_pending_beacon_hints(); | ||
1515 | } | ||
1516 | |||
1517 | static DECLARE_WORK(reg_work, reg_todo); | ||
1518 | |||
1519 | static void queue_regulatory_request(struct regulatory_request *request) | ||
1520 | { | ||
1521 | spin_lock(®_requests_lock); | ||
1522 | list_add_tail(&request->list, ®_requests_list); | ||
1523 | spin_unlock(®_requests_lock); | ||
1524 | |||
1525 | schedule_work(®_work); | ||
1526 | } | ||
1527 | |||
1528 | /* Core regulatory hint -- happens once during cfg80211_init() */ | ||
1529 | static int regulatory_hint_core(const char *alpha2) | ||
1530 | { | ||
1531 | struct regulatory_request *request; | ||
1532 | |||
1533 | BUG_ON(last_request); | ||
1534 | |||
1535 | request = kzalloc(sizeof(struct regulatory_request), | ||
1536 | GFP_KERNEL); | ||
1537 | if (!request) | ||
1538 | return -ENOMEM; | ||
1539 | |||
1540 | request->alpha2[0] = alpha2[0]; | ||
1541 | request->alpha2[1] = alpha2[1]; | ||
1542 | request->initiator = NL80211_REGDOM_SET_BY_CORE; | ||
1543 | |||
1544 | queue_regulatory_request(request); | ||
1545 | |||
1546 | return 0; | ||
1547 | } | ||
1548 | |||
1549 | /* User hints */ | ||
1550 | int regulatory_hint_user(const char *alpha2) | ||
1551 | { | ||
1552 | struct regulatory_request *request; | ||
1553 | |||
1554 | BUG_ON(!alpha2); | ||
1555 | |||
1556 | request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); | ||
1557 | if (!request) | ||
1558 | return -ENOMEM; | ||
1559 | |||
1560 | request->wiphy_idx = WIPHY_IDX_STALE; | ||
1561 | request->alpha2[0] = alpha2[0]; | ||
1562 | request->alpha2[1] = alpha2[1]; | ||
1563 | request->initiator = NL80211_REGDOM_SET_BY_USER, | ||
1564 | |||
1565 | queue_regulatory_request(request); | ||
1566 | |||
1567 | return 0; | ||
1568 | } | ||
1569 | |||
1570 | /* Driver hints */ | ||
1571 | int regulatory_hint(struct wiphy *wiphy, const char *alpha2) | ||
1572 | { | ||
1573 | struct regulatory_request *request; | ||
1574 | |||
1575 | BUG_ON(!alpha2); | ||
1576 | BUG_ON(!wiphy); | ||
1577 | |||
1578 | request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); | ||
1579 | if (!request) | ||
1580 | return -ENOMEM; | ||
1581 | |||
1582 | request->wiphy_idx = get_wiphy_idx(wiphy); | ||
1583 | |||
1584 | /* Must have registered wiphy first */ | ||
1585 | BUG_ON(!wiphy_idx_valid(request->wiphy_idx)); | ||
1586 | |||
1587 | request->alpha2[0] = alpha2[0]; | ||
1588 | request->alpha2[1] = alpha2[1]; | ||
1589 | request->initiator = NL80211_REGDOM_SET_BY_DRIVER; | ||
1590 | |||
1591 | queue_regulatory_request(request); | ||
1592 | |||
1593 | return 0; | ||
1198 | } | 1594 | } |
1199 | EXPORT_SYMBOL(regulatory_hint); | 1595 | EXPORT_SYMBOL(regulatory_hint); |
1200 | 1596 | ||
1201 | static bool reg_same_country_ie_hint(struct wiphy *wiphy, | 1597 | static bool reg_same_country_ie_hint(struct wiphy *wiphy, |
1202 | u32 country_ie_checksum) | 1598 | u32 country_ie_checksum) |
1203 | { | 1599 | { |
1204 | if (!last_request->wiphy) | 1600 | struct wiphy *request_wiphy; |
1601 | |||
1602 | assert_cfg80211_lock(); | ||
1603 | |||
1604 | request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); | ||
1605 | |||
1606 | if (!request_wiphy) | ||
1205 | return false; | 1607 | return false; |
1206 | if (likely(last_request->wiphy != wiphy)) | 1608 | |
1609 | if (likely(request_wiphy != wiphy)) | ||
1207 | return !country_ie_integrity_changes(country_ie_checksum); | 1610 | return !country_ie_integrity_changes(country_ie_checksum); |
1208 | /* We should not have let these through at this point, they | 1611 | /* |
1612 | * We should not have let these through at this point, they | ||
1209 | * should have been picked up earlier by the first alpha2 check | 1613 | * should have been picked up earlier by the first alpha2 check |
1210 | * on the device */ | 1614 | * on the device |
1615 | */ | ||
1211 | if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum))) | 1616 | if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum))) |
1212 | return true; | 1617 | return true; |
1213 | return false; | 1618 | return false; |
@@ -1221,11 +1626,14 @@ void regulatory_hint_11d(struct wiphy *wiphy, | |||
1221 | char alpha2[2]; | 1626 | char alpha2[2]; |
1222 | u32 checksum = 0; | 1627 | u32 checksum = 0; |
1223 | enum environment_cap env = ENVIRON_ANY; | 1628 | enum environment_cap env = ENVIRON_ANY; |
1629 | struct regulatory_request *request; | ||
1224 | 1630 | ||
1225 | if (!last_request) | 1631 | mutex_lock(&cfg80211_mutex); |
1226 | return; | ||
1227 | 1632 | ||
1228 | mutex_lock(&cfg80211_drv_mutex); | 1633 | if (unlikely(!last_request)) { |
1634 | mutex_unlock(&cfg80211_mutex); | ||
1635 | return; | ||
1636 | } | ||
1229 | 1637 | ||
1230 | /* IE len must be evenly divisible by 2 */ | 1638 | /* IE len must be evenly divisible by 2 */ |
1231 | if (country_ie_len & 0x01) | 1639 | if (country_ie_len & 0x01) |
@@ -1234,9 +1642,11 @@ void regulatory_hint_11d(struct wiphy *wiphy, | |||
1234 | if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) | 1642 | if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) |
1235 | goto out; | 1643 | goto out; |
1236 | 1644 | ||
1237 | /* Pending country IE processing, this can happen after we | 1645 | /* |
1646 | * Pending country IE processing, this can happen after we | ||
1238 | * call CRDA and wait for a response if a beacon was received before | 1647 | * call CRDA and wait for a response if a beacon was received before |
1239 | * we were able to process the last regulatory_hint_11d() call */ | 1648 | * we were able to process the last regulatory_hint_11d() call |
1649 | */ | ||
1240 | if (country_ie_regdomain) | 1650 | if (country_ie_regdomain) |
1241 | goto out; | 1651 | goto out; |
1242 | 1652 | ||
@@ -1248,33 +1658,44 @@ void regulatory_hint_11d(struct wiphy *wiphy, | |||
1248 | else if (country_ie[2] == 'O') | 1658 | else if (country_ie[2] == 'O') |
1249 | env = ENVIRON_OUTDOOR; | 1659 | env = ENVIRON_OUTDOOR; |
1250 | 1660 | ||
1251 | /* We will run this for *every* beacon processed for the BSSID, so | 1661 | /* |
1662 | * We will run this for *every* beacon processed for the BSSID, so | ||
1252 | * we optimize an early check to exit out early if we don't have to | 1663 | * we optimize an early check to exit out early if we don't have to |
1253 | * do anything */ | 1664 | * do anything |
1254 | if (likely(last_request->wiphy)) { | 1665 | */ |
1666 | if (likely(wiphy_idx_valid(last_request->wiphy_idx))) { | ||
1255 | struct cfg80211_registered_device *drv_last_ie; | 1667 | struct cfg80211_registered_device *drv_last_ie; |
1256 | 1668 | ||
1257 | drv_last_ie = wiphy_to_dev(last_request->wiphy); | 1669 | drv_last_ie = |
1670 | cfg80211_drv_by_wiphy_idx(last_request->wiphy_idx); | ||
1258 | 1671 | ||
1259 | /* Lets keep this simple -- we trust the first AP | 1672 | /* |
1260 | * after we intersect with CRDA */ | 1673 | * Lets keep this simple -- we trust the first AP |
1261 | if (likely(last_request->wiphy == wiphy)) { | 1674 | * after we intersect with CRDA |
1262 | /* Ignore IEs coming in on this wiphy with | 1675 | */ |
1263 | * the same alpha2 and environment cap */ | 1676 | if (likely(&drv_last_ie->wiphy == wiphy)) { |
1677 | /* | ||
1678 | * Ignore IEs coming in on this wiphy with | ||
1679 | * the same alpha2 and environment cap | ||
1680 | */ | ||
1264 | if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, | 1681 | if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, |
1265 | alpha2) && | 1682 | alpha2) && |
1266 | env == drv_last_ie->env)) { | 1683 | env == drv_last_ie->env)) { |
1267 | goto out; | 1684 | goto out; |
1268 | } | 1685 | } |
1269 | /* the wiphy moved on to another BSSID or the AP | 1686 | /* |
1687 | * the wiphy moved on to another BSSID or the AP | ||
1270 | * was reconfigured. XXX: We need to deal with the | 1688 | * was reconfigured. XXX: We need to deal with the |
1271 | * case where the user suspends and goes to goes | 1689 | * case where the user suspends and goes to goes |
1272 | * to another country, and then gets IEs from an | 1690 | * to another country, and then gets IEs from an |
1273 | * AP with different settings */ | 1691 | * AP with different settings |
1692 | */ | ||
1274 | goto out; | 1693 | goto out; |
1275 | } else { | 1694 | } else { |
1276 | /* Ignore IEs coming in on two separate wiphys with | 1695 | /* |
1277 | * the same alpha2 and environment cap */ | 1696 | * Ignore IEs coming in on two separate wiphys with |
1697 | * the same alpha2 and environment cap | ||
1698 | */ | ||
1278 | if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, | 1699 | if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, |
1279 | alpha2) && | 1700 | alpha2) && |
1280 | env == drv_last_ie->env)) { | 1701 | env == drv_last_ie->env)) { |
@@ -1289,28 +1710,97 @@ void regulatory_hint_11d(struct wiphy *wiphy, | |||
1289 | if (!rd) | 1710 | if (!rd) |
1290 | goto out; | 1711 | goto out; |
1291 | 1712 | ||
1292 | /* This will not happen right now but we leave it here for the | 1713 | /* |
1714 | * This will not happen right now but we leave it here for the | ||
1293 | * the future when we want to add suspend/resume support and having | 1715 | * the future when we want to add suspend/resume support and having |
1294 | * the user move to another country after doing so, or having the user | 1716 | * the user move to another country after doing so, or having the user |
1295 | * move to another AP. Right now we just trust the first AP. This is why | 1717 | * move to another AP. Right now we just trust the first AP. |
1296 | * this is marked as likley(). If we hit this before we add this support | 1718 | * |
1297 | * we want to be informed of it as it would indicate a mistake in the | 1719 | * If we hit this before we add this support we want to be informed of |
1298 | * current design */ | 1720 | * it as it would indicate a mistake in the current design |
1299 | if (likely(WARN_ON(reg_same_country_ie_hint(wiphy, checksum)))) | 1721 | */ |
1300 | goto out; | 1722 | if (WARN_ON(reg_same_country_ie_hint(wiphy, checksum))) |
1723 | goto free_rd_out; | ||
1724 | |||
1725 | request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); | ||
1726 | if (!request) | ||
1727 | goto free_rd_out; | ||
1301 | 1728 | ||
1302 | /* We keep this around for when CRDA comes back with a response so | 1729 | /* |
1303 | * we can intersect with that */ | 1730 | * We keep this around for when CRDA comes back with a response so |
1731 | * we can intersect with that | ||
1732 | */ | ||
1304 | country_ie_regdomain = rd; | 1733 | country_ie_regdomain = rd; |
1305 | 1734 | ||
1306 | __regulatory_hint(wiphy, REGDOM_SET_BY_COUNTRY_IE, | 1735 | request->wiphy_idx = get_wiphy_idx(wiphy); |
1307 | country_ie_regdomain->alpha2, checksum, env); | 1736 | request->alpha2[0] = rd->alpha2[0]; |
1737 | request->alpha2[1] = rd->alpha2[1]; | ||
1738 | request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE; | ||
1739 | request->country_ie_checksum = checksum; | ||
1740 | request->country_ie_env = env; | ||
1308 | 1741 | ||
1742 | mutex_unlock(&cfg80211_mutex); | ||
1743 | |||
1744 | queue_regulatory_request(request); | ||
1745 | |||
1746 | return; | ||
1747 | |||
1748 | free_rd_out: | ||
1749 | kfree(rd); | ||
1309 | out: | 1750 | out: |
1310 | mutex_unlock(&cfg80211_drv_mutex); | 1751 | mutex_unlock(&cfg80211_mutex); |
1311 | } | 1752 | } |
1312 | EXPORT_SYMBOL(regulatory_hint_11d); | 1753 | EXPORT_SYMBOL(regulatory_hint_11d); |
1313 | 1754 | ||
1755 | static bool freq_is_chan_12_13_14(u16 freq) | ||
1756 | { | ||
1757 | if (freq == ieee80211_channel_to_frequency(12) || | ||
1758 | freq == ieee80211_channel_to_frequency(13) || | ||
1759 | freq == ieee80211_channel_to_frequency(14)) | ||
1760 | return true; | ||
1761 | return false; | ||
1762 | } | ||
1763 | |||
1764 | int regulatory_hint_found_beacon(struct wiphy *wiphy, | ||
1765 | struct ieee80211_channel *beacon_chan, | ||
1766 | gfp_t gfp) | ||
1767 | { | ||
1768 | struct reg_beacon *reg_beacon; | ||
1769 | |||
1770 | if (likely((beacon_chan->beacon_found || | ||
1771 | (beacon_chan->flags & IEEE80211_CHAN_RADAR) || | ||
1772 | (beacon_chan->band == IEEE80211_BAND_2GHZ && | ||
1773 | !freq_is_chan_12_13_14(beacon_chan->center_freq))))) | ||
1774 | return 0; | ||
1775 | |||
1776 | reg_beacon = kzalloc(sizeof(struct reg_beacon), gfp); | ||
1777 | if (!reg_beacon) | ||
1778 | return -ENOMEM; | ||
1779 | |||
1780 | #ifdef CONFIG_CFG80211_REG_DEBUG | ||
1781 | printk(KERN_DEBUG "cfg80211: Found new beacon on " | ||
1782 | "frequency: %d MHz (Ch %d) on %s\n", | ||
1783 | beacon_chan->center_freq, | ||
1784 | ieee80211_frequency_to_channel(beacon_chan->center_freq), | ||
1785 | wiphy_name(wiphy)); | ||
1786 | #endif | ||
1787 | memcpy(®_beacon->chan, beacon_chan, | ||
1788 | sizeof(struct ieee80211_channel)); | ||
1789 | |||
1790 | |||
1791 | /* | ||
1792 | * Since we can be called from BH or and non-BH context | ||
1793 | * we must use spin_lock_bh() | ||
1794 | */ | ||
1795 | spin_lock_bh(®_pending_beacons_lock); | ||
1796 | list_add_tail(®_beacon->list, ®_pending_beacons); | ||
1797 | spin_unlock_bh(®_pending_beacons_lock); | ||
1798 | |||
1799 | schedule_work(®_work); | ||
1800 | |||
1801 | return 0; | ||
1802 | } | ||
1803 | |||
1314 | static void print_rd_rules(const struct ieee80211_regdomain *rd) | 1804 | static void print_rd_rules(const struct ieee80211_regdomain *rd) |
1315 | { | 1805 | { |
1316 | unsigned int i; | 1806 | unsigned int i; |
@@ -1326,8 +1816,10 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) | |||
1326 | freq_range = ®_rule->freq_range; | 1816 | freq_range = ®_rule->freq_range; |
1327 | power_rule = ®_rule->power_rule; | 1817 | power_rule = ®_rule->power_rule; |
1328 | 1818 | ||
1329 | /* There may not be documentation for max antenna gain | 1819 | /* |
1330 | * in certain regions */ | 1820 | * There may not be documentation for max antenna gain |
1821 | * in certain regions | ||
1822 | */ | ||
1331 | if (power_rule->max_antenna_gain) | 1823 | if (power_rule->max_antenna_gain) |
1332 | printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), " | 1824 | printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), " |
1333 | "(%d mBi, %d mBm)\n", | 1825 | "(%d mBi, %d mBm)\n", |
@@ -1350,13 +1842,13 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) | |||
1350 | { | 1842 | { |
1351 | 1843 | ||
1352 | if (is_intersected_alpha2(rd->alpha2)) { | 1844 | if (is_intersected_alpha2(rd->alpha2)) { |
1353 | struct wiphy *wiphy = NULL; | ||
1354 | struct cfg80211_registered_device *drv; | ||
1355 | 1845 | ||
1356 | if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { | 1846 | if (last_request->initiator == |
1357 | if (last_request->wiphy) { | 1847 | NL80211_REGDOM_SET_BY_COUNTRY_IE) { |
1358 | wiphy = last_request->wiphy; | 1848 | struct cfg80211_registered_device *drv; |
1359 | drv = wiphy_to_dev(wiphy); | 1849 | drv = cfg80211_drv_by_wiphy_idx( |
1850 | last_request->wiphy_idx); | ||
1851 | if (drv) { | ||
1360 | printk(KERN_INFO "cfg80211: Current regulatory " | 1852 | printk(KERN_INFO "cfg80211: Current regulatory " |
1361 | "domain updated by AP to: %c%c\n", | 1853 | "domain updated by AP to: %c%c\n", |
1362 | drv->country_ie_alpha2[0], | 1854 | drv->country_ie_alpha2[0], |
@@ -1422,7 +1914,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) | |||
1422 | { | 1914 | { |
1423 | const struct ieee80211_regdomain *intersected_rd = NULL; | 1915 | const struct ieee80211_regdomain *intersected_rd = NULL; |
1424 | struct cfg80211_registered_device *drv = NULL; | 1916 | struct cfg80211_registered_device *drv = NULL; |
1425 | struct wiphy *wiphy = NULL; | 1917 | struct wiphy *request_wiphy; |
1426 | /* Some basic sanity checks first */ | 1918 | /* Some basic sanity checks first */ |
1427 | 1919 | ||
1428 | if (is_world_regdom(rd->alpha2)) { | 1920 | if (is_world_regdom(rd->alpha2)) { |
@@ -1439,23 +1931,27 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) | |||
1439 | if (!last_request) | 1931 | if (!last_request) |
1440 | return -EINVAL; | 1932 | return -EINVAL; |
1441 | 1933 | ||
1442 | /* Lets only bother proceeding on the same alpha2 if the current | 1934 | /* |
1935 | * Lets only bother proceeding on the same alpha2 if the current | ||
1443 | * rd is non static (it means CRDA was present and was used last) | 1936 | * rd is non static (it means CRDA was present and was used last) |
1444 | * and the pending request came in from a country IE */ | 1937 | * and the pending request came in from a country IE |
1445 | if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) { | 1938 | */ |
1446 | /* If someone else asked us to change the rd lets only bother | 1939 | if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { |
1447 | * checking if the alpha2 changes if CRDA was already called */ | 1940 | /* |
1941 | * If someone else asked us to change the rd lets only bother | ||
1942 | * checking if the alpha2 changes if CRDA was already called | ||
1943 | */ | ||
1448 | if (!is_old_static_regdom(cfg80211_regdomain) && | 1944 | if (!is_old_static_regdom(cfg80211_regdomain) && |
1449 | !regdom_changed(rd->alpha2)) | 1945 | !regdom_changes(rd->alpha2)) |
1450 | return -EINVAL; | 1946 | return -EINVAL; |
1451 | } | 1947 | } |
1452 | 1948 | ||
1453 | wiphy = last_request->wiphy; | 1949 | /* |
1454 | 1950 | * Now lets set the regulatory domain, update all driver channels | |
1455 | /* Now lets set the regulatory domain, update all driver channels | ||
1456 | * and finally inform them of what we have done, in case they want | 1951 | * and finally inform them of what we have done, in case they want |
1457 | * to review or adjust their own settings based on their own | 1952 | * to review or adjust their own settings based on their own |
1458 | * internal EEPROM data */ | 1953 | * internal EEPROM data |
1954 | */ | ||
1459 | 1955 | ||
1460 | if (WARN_ON(!reg_is_valid_request(rd->alpha2))) | 1956 | if (WARN_ON(!reg_is_valid_request(rd->alpha2))) |
1461 | return -EINVAL; | 1957 | return -EINVAL; |
@@ -1467,21 +1963,25 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) | |||
1467 | return -EINVAL; | 1963 | return -EINVAL; |
1468 | } | 1964 | } |
1469 | 1965 | ||
1966 | request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); | ||
1967 | |||
1470 | if (!last_request->intersect) { | 1968 | if (!last_request->intersect) { |
1471 | int r; | 1969 | int r; |
1472 | 1970 | ||
1473 | if (last_request->initiator != REGDOM_SET_BY_DRIVER) { | 1971 | if (last_request->initiator != NL80211_REGDOM_SET_BY_DRIVER) { |
1474 | reset_regdomains(); | 1972 | reset_regdomains(); |
1475 | cfg80211_regdomain = rd; | 1973 | cfg80211_regdomain = rd; |
1476 | return 0; | 1974 | return 0; |
1477 | } | 1975 | } |
1478 | 1976 | ||
1479 | /* For a driver hint, lets copy the regulatory domain the | 1977 | /* |
1480 | * driver wanted to the wiphy to deal with conflicts */ | 1978 | * For a driver hint, lets copy the regulatory domain the |
1979 | * driver wanted to the wiphy to deal with conflicts | ||
1980 | */ | ||
1481 | 1981 | ||
1482 | BUG_ON(last_request->wiphy->regd); | 1982 | BUG_ON(request_wiphy->regd); |
1483 | 1983 | ||
1484 | r = reg_copy_regd(&last_request->wiphy->regd, rd); | 1984 | r = reg_copy_regd(&request_wiphy->regd, rd); |
1485 | if (r) | 1985 | if (r) |
1486 | return r; | 1986 | return r; |
1487 | 1987 | ||
@@ -1492,17 +1992,19 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) | |||
1492 | 1992 | ||
1493 | /* Intersection requires a bit more work */ | 1993 | /* Intersection requires a bit more work */ |
1494 | 1994 | ||
1495 | if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) { | 1995 | if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { |
1496 | 1996 | ||
1497 | intersected_rd = regdom_intersect(rd, cfg80211_regdomain); | 1997 | intersected_rd = regdom_intersect(rd, cfg80211_regdomain); |
1498 | if (!intersected_rd) | 1998 | if (!intersected_rd) |
1499 | return -EINVAL; | 1999 | return -EINVAL; |
1500 | 2000 | ||
1501 | /* We can trash what CRDA provided now. | 2001 | /* |
2002 | * We can trash what CRDA provided now. | ||
1502 | * However if a driver requested this specific regulatory | 2003 | * However if a driver requested this specific regulatory |
1503 | * domain we keep it for its private use */ | 2004 | * domain we keep it for its private use |
1504 | if (last_request->initiator == REGDOM_SET_BY_DRIVER) | 2005 | */ |
1505 | last_request->wiphy->regd = rd; | 2006 | if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) |
2007 | request_wiphy->regd = rd; | ||
1506 | else | 2008 | else |
1507 | kfree(rd); | 2009 | kfree(rd); |
1508 | 2010 | ||
@@ -1522,8 +2024,10 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) | |||
1522 | BUG_ON(!country_ie_regdomain); | 2024 | BUG_ON(!country_ie_regdomain); |
1523 | 2025 | ||
1524 | if (rd != country_ie_regdomain) { | 2026 | if (rd != country_ie_regdomain) { |
1525 | /* Intersect what CRDA returned and our what we | 2027 | /* |
1526 | * had built from the Country IE received */ | 2028 | * Intersect what CRDA returned and our what we |
2029 | * had built from the Country IE received | ||
2030 | */ | ||
1527 | 2031 | ||
1528 | intersected_rd = regdom_intersect(rd, country_ie_regdomain); | 2032 | intersected_rd = regdom_intersect(rd, country_ie_regdomain); |
1529 | 2033 | ||
@@ -1533,16 +2037,18 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) | |||
1533 | kfree(country_ie_regdomain); | 2037 | kfree(country_ie_regdomain); |
1534 | country_ie_regdomain = NULL; | 2038 | country_ie_regdomain = NULL; |
1535 | } else { | 2039 | } else { |
1536 | /* This would happen when CRDA was not present and | 2040 | /* |
2041 | * This would happen when CRDA was not present and | ||
1537 | * OLD_REGULATORY was enabled. We intersect our Country | 2042 | * OLD_REGULATORY was enabled. We intersect our Country |
1538 | * IE rd and what was set on cfg80211 originally */ | 2043 | * IE rd and what was set on cfg80211 originally |
2044 | */ | ||
1539 | intersected_rd = regdom_intersect(rd, cfg80211_regdomain); | 2045 | intersected_rd = regdom_intersect(rd, cfg80211_regdomain); |
1540 | } | 2046 | } |
1541 | 2047 | ||
1542 | if (!intersected_rd) | 2048 | if (!intersected_rd) |
1543 | return -EINVAL; | 2049 | return -EINVAL; |
1544 | 2050 | ||
1545 | drv = wiphy_to_dev(wiphy); | 2051 | drv = wiphy_to_dev(request_wiphy); |
1546 | 2052 | ||
1547 | drv->country_ie_alpha2[0] = rd->alpha2[0]; | 2053 | drv->country_ie_alpha2[0] = rd->alpha2[0]; |
1548 | drv->country_ie_alpha2[1] = rd->alpha2[1]; | 2054 | drv->country_ie_alpha2[1] = rd->alpha2[1]; |
@@ -1560,13 +2066,17 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) | |||
1560 | } | 2066 | } |
1561 | 2067 | ||
1562 | 2068 | ||
1563 | /* Use this call to set the current regulatory domain. Conflicts with | 2069 | /* |
2070 | * Use this call to set the current regulatory domain. Conflicts with | ||
1564 | * multiple drivers can be ironed out later. Caller must've already | 2071 | * multiple drivers can be ironed out later. Caller must've already |
1565 | * kmalloc'd the rd structure. Caller must hold cfg80211_drv_mutex */ | 2072 | * kmalloc'd the rd structure. Caller must hold cfg80211_mutex |
2073 | */ | ||
1566 | int set_regdom(const struct ieee80211_regdomain *rd) | 2074 | int set_regdom(const struct ieee80211_regdomain *rd) |
1567 | { | 2075 | { |
1568 | int r; | 2076 | int r; |
1569 | 2077 | ||
2078 | assert_cfg80211_lock(); | ||
2079 | |||
1570 | /* Note that this doesn't update the wiphys, this is done below */ | 2080 | /* Note that this doesn't update the wiphys, this is done below */ |
1571 | r = __set_regdom(rd); | 2081 | r = __set_regdom(rd); |
1572 | if (r) { | 2082 | if (r) { |
@@ -1583,57 +2093,87 @@ int set_regdom(const struct ieee80211_regdomain *rd) | |||
1583 | 2093 | ||
1584 | print_regdomain(cfg80211_regdomain); | 2094 | print_regdomain(cfg80211_regdomain); |
1585 | 2095 | ||
2096 | nl80211_send_reg_change_event(last_request); | ||
2097 | |||
1586 | return r; | 2098 | return r; |
1587 | } | 2099 | } |
1588 | 2100 | ||
1589 | /* Caller must hold cfg80211_drv_mutex */ | 2101 | /* Caller must hold cfg80211_mutex */ |
1590 | void reg_device_remove(struct wiphy *wiphy) | 2102 | void reg_device_remove(struct wiphy *wiphy) |
1591 | { | 2103 | { |
2104 | struct wiphy *request_wiphy; | ||
2105 | |||
2106 | assert_cfg80211_lock(); | ||
2107 | |||
2108 | request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); | ||
2109 | |||
1592 | kfree(wiphy->regd); | 2110 | kfree(wiphy->regd); |
1593 | if (!last_request || !last_request->wiphy) | 2111 | if (!last_request || !request_wiphy) |
1594 | return; | 2112 | return; |
1595 | if (last_request->wiphy != wiphy) | 2113 | if (request_wiphy != wiphy) |
1596 | return; | 2114 | return; |
1597 | last_request->wiphy = NULL; | 2115 | last_request->wiphy_idx = WIPHY_IDX_STALE; |
1598 | last_request->country_ie_env = ENVIRON_ANY; | 2116 | last_request->country_ie_env = ENVIRON_ANY; |
1599 | } | 2117 | } |
1600 | 2118 | ||
1601 | int regulatory_init(void) | 2119 | int regulatory_init(void) |
1602 | { | 2120 | { |
1603 | int err; | 2121 | int err = 0; |
1604 | 2122 | ||
1605 | reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); | 2123 | reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); |
1606 | if (IS_ERR(reg_pdev)) | 2124 | if (IS_ERR(reg_pdev)) |
1607 | return PTR_ERR(reg_pdev); | 2125 | return PTR_ERR(reg_pdev); |
1608 | 2126 | ||
2127 | spin_lock_init(®_requests_lock); | ||
2128 | spin_lock_init(®_pending_beacons_lock); | ||
2129 | |||
1609 | #ifdef CONFIG_WIRELESS_OLD_REGULATORY | 2130 | #ifdef CONFIG_WIRELESS_OLD_REGULATORY |
1610 | cfg80211_regdomain = static_regdom(ieee80211_regdom); | 2131 | cfg80211_regdomain = static_regdom(ieee80211_regdom); |
1611 | 2132 | ||
1612 | printk(KERN_INFO "cfg80211: Using static regulatory domain info\n"); | 2133 | printk(KERN_INFO "cfg80211: Using static regulatory domain info\n"); |
1613 | print_regdomain_info(cfg80211_regdomain); | 2134 | print_regdomain_info(cfg80211_regdomain); |
1614 | /* The old code still requests for a new regdomain and if | 2135 | /* |
2136 | * The old code still requests for a new regdomain and if | ||
1615 | * you have CRDA you get it updated, otherwise you get | 2137 | * you have CRDA you get it updated, otherwise you get |
1616 | * stuck with the static values. We ignore "EU" code as | 2138 | * stuck with the static values. We ignore "EU" code as |
1617 | * that is not a valid ISO / IEC 3166 alpha2 */ | 2139 | * that is not a valid ISO / IEC 3166 alpha2 |
2140 | */ | ||
1618 | if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U') | 2141 | if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U') |
1619 | err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, | 2142 | err = regulatory_hint_core(ieee80211_regdom); |
1620 | ieee80211_regdom, 0, ENVIRON_ANY); | ||
1621 | #else | 2143 | #else |
1622 | cfg80211_regdomain = cfg80211_world_regdom; | 2144 | cfg80211_regdomain = cfg80211_world_regdom; |
1623 | 2145 | ||
1624 | err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", 0, ENVIRON_ANY); | 2146 | err = regulatory_hint_core("00"); |
1625 | if (err) | ||
1626 | printk(KERN_ERR "cfg80211: calling CRDA failed - " | ||
1627 | "unable to update world regulatory domain, " | ||
1628 | "using static definition\n"); | ||
1629 | #endif | 2147 | #endif |
2148 | if (err) { | ||
2149 | if (err == -ENOMEM) | ||
2150 | return err; | ||
2151 | /* | ||
2152 | * N.B. kobject_uevent_env() can fail mainly for when we're out | ||
2153 | * memory which is handled and propagated appropriately above | ||
2154 | * but it can also fail during a netlink_broadcast() or during | ||
2155 | * early boot for call_usermodehelper(). For now treat these | ||
2156 | * errors as non-fatal. | ||
2157 | */ | ||
2158 | printk(KERN_ERR "cfg80211: kobject_uevent_env() was unable " | ||
2159 | "to call CRDA during init"); | ||
2160 | #ifdef CONFIG_CFG80211_REG_DEBUG | ||
2161 | /* We want to find out exactly why when debugging */ | ||
2162 | WARN_ON(err); | ||
2163 | #endif | ||
2164 | } | ||
1630 | 2165 | ||
1631 | return 0; | 2166 | return 0; |
1632 | } | 2167 | } |
1633 | 2168 | ||
1634 | void regulatory_exit(void) | 2169 | void regulatory_exit(void) |
1635 | { | 2170 | { |
1636 | mutex_lock(&cfg80211_drv_mutex); | 2171 | struct regulatory_request *reg_request, *tmp; |
2172 | struct reg_beacon *reg_beacon, *btmp; | ||
2173 | |||
2174 | cancel_work_sync(®_work); | ||
2175 | |||
2176 | mutex_lock(&cfg80211_mutex); | ||
1637 | 2177 | ||
1638 | reset_regdomains(); | 2178 | reset_regdomains(); |
1639 | 2179 | ||
@@ -1644,5 +2184,33 @@ void regulatory_exit(void) | |||
1644 | 2184 | ||
1645 | platform_device_unregister(reg_pdev); | 2185 | platform_device_unregister(reg_pdev); |
1646 | 2186 | ||
1647 | mutex_unlock(&cfg80211_drv_mutex); | 2187 | spin_lock_bh(®_pending_beacons_lock); |
2188 | if (!list_empty(®_pending_beacons)) { | ||
2189 | list_for_each_entry_safe(reg_beacon, btmp, | ||
2190 | ®_pending_beacons, list) { | ||
2191 | list_del(®_beacon->list); | ||
2192 | kfree(reg_beacon); | ||
2193 | } | ||
2194 | } | ||
2195 | spin_unlock_bh(®_pending_beacons_lock); | ||
2196 | |||
2197 | if (!list_empty(®_beacon_list)) { | ||
2198 | list_for_each_entry_safe(reg_beacon, btmp, | ||
2199 | ®_beacon_list, list) { | ||
2200 | list_del(®_beacon->list); | ||
2201 | kfree(reg_beacon); | ||
2202 | } | ||
2203 | } | ||
2204 | |||
2205 | spin_lock(®_requests_lock); | ||
2206 | if (!list_empty(®_requests_list)) { | ||
2207 | list_for_each_entry_safe(reg_request, tmp, | ||
2208 | ®_requests_list, list) { | ||
2209 | list_del(®_request->list); | ||
2210 | kfree(reg_request); | ||
2211 | } | ||
2212 | } | ||
2213 | spin_unlock(®_requests_lock); | ||
2214 | |||
2215 | mutex_unlock(&cfg80211_mutex); | ||
1648 | } | 2216 | } |
diff --git a/net/wireless/reg.h b/net/wireless/reg.h index fe8c83f34fb7..e37829a49dc4 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h | |||
@@ -6,6 +6,8 @@ extern const struct ieee80211_regdomain *cfg80211_regdomain; | |||
6 | bool is_world_regdom(const char *alpha2); | 6 | bool is_world_regdom(const char *alpha2); |
7 | bool reg_is_valid_request(const char *alpha2); | 7 | bool reg_is_valid_request(const char *alpha2); |
8 | 8 | ||
9 | int regulatory_hint_user(const char *alpha2); | ||
10 | |||
9 | void reg_device_remove(struct wiphy *wiphy); | 11 | void reg_device_remove(struct wiphy *wiphy); |
10 | 12 | ||
11 | int regulatory_init(void); | 13 | int regulatory_init(void); |
@@ -14,26 +16,24 @@ void regulatory_exit(void); | |||
14 | int set_regdom(const struct ieee80211_regdomain *rd); | 16 | int set_regdom(const struct ieee80211_regdomain *rd); |
15 | 17 | ||
16 | /** | 18 | /** |
17 | * __regulatory_hint - hint to the wireless core a regulatory domain | 19 | * regulatory_hint_found_beacon - hints a beacon was found on a channel |
18 | * @wiphy: if the hint comes from country information from an AP, this | 20 | * @wiphy: the wireless device where the beacon was found on |
19 | * is required to be set to the wiphy that received the information | 21 | * @beacon_chan: the channel on which the beacon was found on |
20 | * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain | 22 | * @gfp: context flags |
21 | * should be in. | ||
22 | * @country_ie_checksum: checksum of processed country IE, set this to 0 | ||
23 | * if the hint did not come from a country IE | ||
24 | * @country_ie_env: the environment the IE told us we are in, %ENVIRON_* | ||
25 | * | ||
26 | * The Wireless subsystem can use this function to hint to the wireless core | ||
27 | * what it believes should be the current regulatory domain by giving it an | ||
28 | * ISO/IEC 3166 alpha2 country code it knows its regulatory domain should be | ||
29 | * in. | ||
30 | * | 23 | * |
31 | * Returns zero if all went fine, %-EALREADY if a regulatory domain had | 24 | * This informs the wireless core that a beacon from an AP was found on |
32 | * already been set or other standard error codes. | 25 | * the channel provided. This allows the wireless core to make educated |
26 | * guesses on regulatory to help with world roaming. This is only used for | ||
27 | * world roaming -- when we do not know our current location. This is | ||
28 | * only useful on channels 12, 13 and 14 on the 2 GHz band as channels | ||
29 | * 1-11 are already enabled by the world regulatory domain; and on | ||
30 | * non-radar 5 GHz channels. | ||
33 | * | 31 | * |
32 | * Drivers do not need to call this, cfg80211 will do it for after a scan | ||
33 | * on a newly found BSS. | ||
34 | */ | 34 | */ |
35 | extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, | 35 | int regulatory_hint_found_beacon(struct wiphy *wiphy, |
36 | const char *alpha2, u32 country_ie_checksum, | 36 | struct ieee80211_channel *beacon_chan, |
37 | enum environment_cap country_ie_env); | 37 | gfp_t gfp); |
38 | 38 | ||
39 | #endif /* __NET_WIRELESS_REG_H */ | 39 | #endif /* __NET_WIRELESS_REG_H */ |
diff --git a/net/wireless/scan.c b/net/wireless/scan.c index b1893c863b97..280dbcd02c15 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c | |||
@@ -62,6 +62,18 @@ static void bss_release(struct kref *ref) | |||
62 | } | 62 | } |
63 | 63 | ||
64 | /* must hold dev->bss_lock! */ | 64 | /* must hold dev->bss_lock! */ |
65 | void cfg80211_bss_age(struct cfg80211_registered_device *dev, | ||
66 | unsigned long age_secs) | ||
67 | { | ||
68 | struct cfg80211_internal_bss *bss; | ||
69 | unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC); | ||
70 | |||
71 | list_for_each_entry(bss, &dev->bss_list, list) { | ||
72 | bss->ts -= age_jiffies; | ||
73 | } | ||
74 | } | ||
75 | |||
76 | /* must hold dev->bss_lock! */ | ||
65 | void cfg80211_bss_expire(struct cfg80211_registered_device *dev) | 77 | void cfg80211_bss_expire(struct cfg80211_registered_device *dev) |
66 | { | 78 | { |
67 | struct cfg80211_internal_bss *bss, *tmp; | 79 | struct cfg80211_internal_bss *bss, *tmp; |
@@ -358,7 +370,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, | |||
358 | found->pub.beacon_interval = res->pub.beacon_interval; | 370 | found->pub.beacon_interval = res->pub.beacon_interval; |
359 | found->pub.tsf = res->pub.tsf; | 371 | found->pub.tsf = res->pub.tsf; |
360 | found->pub.signal = res->pub.signal; | 372 | found->pub.signal = res->pub.signal; |
361 | found->pub.signal_type = res->pub.signal_type; | ||
362 | found->pub.capability = res->pub.capability; | 373 | found->pub.capability = res->pub.capability; |
363 | found->ts = res->ts; | 374 | found->ts = res->ts; |
364 | kref_put(&res->ref, bss_release); | 375 | kref_put(&res->ref, bss_release); |
@@ -380,8 +391,7 @@ struct cfg80211_bss * | |||
380 | cfg80211_inform_bss_frame(struct wiphy *wiphy, | 391 | cfg80211_inform_bss_frame(struct wiphy *wiphy, |
381 | struct ieee80211_channel *channel, | 392 | struct ieee80211_channel *channel, |
382 | struct ieee80211_mgmt *mgmt, size_t len, | 393 | struct ieee80211_mgmt *mgmt, size_t len, |
383 | s32 signal, enum cfg80211_signal_type sigtype, | 394 | s32 signal, gfp_t gfp) |
384 | gfp_t gfp) | ||
385 | { | 395 | { |
386 | struct cfg80211_internal_bss *res; | 396 | struct cfg80211_internal_bss *res; |
387 | size_t ielen = len - offsetof(struct ieee80211_mgmt, | 397 | size_t ielen = len - offsetof(struct ieee80211_mgmt, |
@@ -389,7 +399,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, | |||
389 | bool overwrite; | 399 | bool overwrite; |
390 | size_t privsz = wiphy->bss_priv_size; | 400 | size_t privsz = wiphy->bss_priv_size; |
391 | 401 | ||
392 | if (WARN_ON(sigtype == NL80211_BSS_SIGNAL_UNSPEC && | 402 | if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC && |
393 | (signal < 0 || signal > 100))) | 403 | (signal < 0 || signal > 100))) |
394 | return NULL; | 404 | return NULL; |
395 | 405 | ||
@@ -403,7 +413,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, | |||
403 | 413 | ||
404 | memcpy(res->pub.bssid, mgmt->bssid, ETH_ALEN); | 414 | memcpy(res->pub.bssid, mgmt->bssid, ETH_ALEN); |
405 | res->pub.channel = channel; | 415 | res->pub.channel = channel; |
406 | res->pub.signal_type = sigtype; | ||
407 | res->pub.signal = signal; | 416 | res->pub.signal = signal; |
408 | res->pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); | 417 | res->pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); |
409 | res->pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); | 418 | res->pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); |
@@ -421,6 +430,9 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, | |||
421 | if (!res) | 430 | if (!res) |
422 | return NULL; | 431 | return NULL; |
423 | 432 | ||
433 | if (res->pub.capability & WLAN_CAPABILITY_ESS) | ||
434 | regulatory_hint_found_beacon(wiphy, channel, gfp); | ||
435 | |||
424 | /* cfg80211_bss_update gives us a referenced result */ | 436 | /* cfg80211_bss_update gives us a referenced result */ |
425 | return &res->pub; | 437 | return &res->pub; |
426 | } | 438 | } |
@@ -584,16 +596,25 @@ static void ieee80211_scan_add_ies(struct iw_request_info *info, | |||
584 | } | 596 | } |
585 | } | 597 | } |
586 | 598 | ||
599 | static inline unsigned int elapsed_jiffies_msecs(unsigned long start) | ||
600 | { | ||
601 | unsigned long end = jiffies; | ||
602 | |||
603 | if (end >= start) | ||
604 | return jiffies_to_msecs(end - start); | ||
605 | |||
606 | return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); | ||
607 | } | ||
587 | 608 | ||
588 | static char * | 609 | static char * |
589 | ieee80211_bss(struct iw_request_info *info, | 610 | ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, |
590 | struct cfg80211_internal_bss *bss, | 611 | struct cfg80211_internal_bss *bss, char *current_ev, |
591 | char *current_ev, char *end_buf) | 612 | char *end_buf) |
592 | { | 613 | { |
593 | struct iw_event iwe; | 614 | struct iw_event iwe; |
594 | u8 *buf, *cfg, *p; | 615 | u8 *buf, *cfg, *p; |
595 | u8 *ie = bss->pub.information_elements; | 616 | u8 *ie = bss->pub.information_elements; |
596 | int rem = bss->pub.len_information_elements, i; | 617 | int rem = bss->pub.len_information_elements, i, sig; |
597 | bool ismesh = false; | 618 | bool ismesh = false; |
598 | 619 | ||
599 | memset(&iwe, 0, sizeof(iwe)); | 620 | memset(&iwe, 0, sizeof(iwe)); |
@@ -617,19 +638,28 @@ ieee80211_bss(struct iw_request_info *info, | |||
617 | current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, | 638 | current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, |
618 | IW_EV_FREQ_LEN); | 639 | IW_EV_FREQ_LEN); |
619 | 640 | ||
620 | if (bss->pub.signal_type != CFG80211_SIGNAL_TYPE_NONE) { | 641 | if (wiphy->signal_type != CFG80211_SIGNAL_TYPE_NONE) { |
621 | memset(&iwe, 0, sizeof(iwe)); | 642 | memset(&iwe, 0, sizeof(iwe)); |
622 | iwe.cmd = IWEVQUAL; | 643 | iwe.cmd = IWEVQUAL; |
623 | iwe.u.qual.updated = IW_QUAL_LEVEL_UPDATED | | 644 | iwe.u.qual.updated = IW_QUAL_LEVEL_UPDATED | |
624 | IW_QUAL_NOISE_INVALID | | 645 | IW_QUAL_NOISE_INVALID | |
625 | IW_QUAL_QUAL_INVALID; | 646 | IW_QUAL_QUAL_UPDATED; |
626 | switch (bss->pub.signal_type) { | 647 | switch (wiphy->signal_type) { |
627 | case CFG80211_SIGNAL_TYPE_MBM: | 648 | case CFG80211_SIGNAL_TYPE_MBM: |
628 | iwe.u.qual.level = bss->pub.signal / 100; | 649 | sig = bss->pub.signal / 100; |
650 | iwe.u.qual.level = sig; | ||
629 | iwe.u.qual.updated |= IW_QUAL_DBM; | 651 | iwe.u.qual.updated |= IW_QUAL_DBM; |
652 | if (sig < -110) /* rather bad */ | ||
653 | sig = -110; | ||
654 | else if (sig > -40) /* perfect */ | ||
655 | sig = -40; | ||
656 | /* will give a range of 0 .. 70 */ | ||
657 | iwe.u.qual.qual = sig + 110; | ||
630 | break; | 658 | break; |
631 | case CFG80211_SIGNAL_TYPE_UNSPEC: | 659 | case CFG80211_SIGNAL_TYPE_UNSPEC: |
632 | iwe.u.qual.level = bss->pub.signal; | 660 | iwe.u.qual.level = bss->pub.signal; |
661 | /* will give range 0 .. 100 */ | ||
662 | iwe.u.qual.qual = bss->pub.signal; | ||
633 | break; | 663 | break; |
634 | default: | 664 | default: |
635 | /* not reached */ | 665 | /* not reached */ |
@@ -763,8 +793,8 @@ ieee80211_bss(struct iw_request_info *info, | |||
763 | &iwe, buf); | 793 | &iwe, buf); |
764 | memset(&iwe, 0, sizeof(iwe)); | 794 | memset(&iwe, 0, sizeof(iwe)); |
765 | iwe.cmd = IWEVCUSTOM; | 795 | iwe.cmd = IWEVCUSTOM; |
766 | sprintf(buf, " Last beacon: %dms ago", | 796 | sprintf(buf, " Last beacon: %ums ago", |
767 | jiffies_to_msecs(jiffies - bss->ts)); | 797 | elapsed_jiffies_msecs(bss->ts)); |
768 | iwe.u.data.length = strlen(buf); | 798 | iwe.u.data.length = strlen(buf); |
769 | current_ev = iwe_stream_add_point(info, current_ev, | 799 | current_ev = iwe_stream_add_point(info, current_ev, |
770 | end_buf, &iwe, buf); | 800 | end_buf, &iwe, buf); |
@@ -793,8 +823,8 @@ static int ieee80211_scan_results(struct cfg80211_registered_device *dev, | |||
793 | spin_unlock_bh(&dev->bss_lock); | 823 | spin_unlock_bh(&dev->bss_lock); |
794 | return -E2BIG; | 824 | return -E2BIG; |
795 | } | 825 | } |
796 | current_ev = ieee80211_bss(info, bss, | 826 | current_ev = ieee80211_bss(&dev->wiphy, info, bss, |
797 | current_ev, end_buf); | 827 | current_ev, end_buf); |
798 | } | 828 | } |
799 | spin_unlock_bh(&dev->bss_lock); | 829 | spin_unlock_bh(&dev->bss_lock); |
800 | return current_ev - buf; | 830 | return current_ev - buf; |
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 26a72b0797a0..efe3c5c92b2d 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c | |||
@@ -31,7 +31,7 @@ static ssize_t name ## _show(struct device *dev, \ | |||
31 | return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \ | 31 | return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \ |
32 | } | 32 | } |
33 | 33 | ||
34 | SHOW_FMT(index, "%d", idx); | 34 | SHOW_FMT(index, "%d", wiphy_idx); |
35 | SHOW_FMT(macaddress, "%pM", wiphy.perm_addr); | 35 | SHOW_FMT(macaddress, "%pM", wiphy.perm_addr); |
36 | 36 | ||
37 | static struct device_attribute ieee80211_dev_attrs[] = { | 37 | static struct device_attribute ieee80211_dev_attrs[] = { |
@@ -60,6 +60,8 @@ static int wiphy_suspend(struct device *dev, pm_message_t state) | |||
60 | struct cfg80211_registered_device *rdev = dev_to_rdev(dev); | 60 | struct cfg80211_registered_device *rdev = dev_to_rdev(dev); |
61 | int ret = 0; | 61 | int ret = 0; |
62 | 62 | ||
63 | rdev->suspend_at = get_seconds(); | ||
64 | |||
63 | if (rdev->ops->suspend) { | 65 | if (rdev->ops->suspend) { |
64 | rtnl_lock(); | 66 | rtnl_lock(); |
65 | ret = rdev->ops->suspend(&rdev->wiphy); | 67 | ret = rdev->ops->suspend(&rdev->wiphy); |
@@ -74,6 +76,11 @@ static int wiphy_resume(struct device *dev) | |||
74 | struct cfg80211_registered_device *rdev = dev_to_rdev(dev); | 76 | struct cfg80211_registered_device *rdev = dev_to_rdev(dev); |
75 | int ret = 0; | 77 | int ret = 0; |
76 | 78 | ||
79 | /* Age scan results with time spent in suspend */ | ||
80 | spin_lock_bh(&rdev->bss_lock); | ||
81 | cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at); | ||
82 | spin_unlock_bh(&rdev->bss_lock); | ||
83 | |||
77 | if (rdev->ops->resume) { | 84 | if (rdev->ops->resume) { |
78 | rtnl_lock(); | 85 | rtnl_lock(); |
79 | ret = rdev->ops->resume(&rdev->wiphy); | 86 | ret = rdev->ops->resume(&rdev->wiphy); |
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 58e489fd4aed..b84a9b4fe96a 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c | |||
@@ -137,3 +137,100 @@ int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, | |||
137 | return 0; | 137 | return 0; |
138 | } | 138 | } |
139 | EXPORT_SYMBOL(cfg80211_wext_giwmode); | 139 | EXPORT_SYMBOL(cfg80211_wext_giwmode); |
140 | |||
141 | |||
142 | int cfg80211_wext_giwrange(struct net_device *dev, | ||
143 | struct iw_request_info *info, | ||
144 | struct iw_point *data, char *extra) | ||
145 | { | ||
146 | struct wireless_dev *wdev = dev->ieee80211_ptr; | ||
147 | struct iw_range *range = (struct iw_range *) extra; | ||
148 | enum ieee80211_band band; | ||
149 | int c = 0; | ||
150 | |||
151 | if (!wdev) | ||
152 | return -EOPNOTSUPP; | ||
153 | |||
154 | data->length = sizeof(struct iw_range); | ||
155 | memset(range, 0, sizeof(struct iw_range)); | ||
156 | |||
157 | range->we_version_compiled = WIRELESS_EXT; | ||
158 | range->we_version_source = 21; | ||
159 | range->retry_capa = IW_RETRY_LIMIT; | ||
160 | range->retry_flags = IW_RETRY_LIMIT; | ||
161 | range->min_retry = 0; | ||
162 | range->max_retry = 255; | ||
163 | range->min_rts = 0; | ||
164 | range->max_rts = 2347; | ||
165 | range->min_frag = 256; | ||
166 | range->max_frag = 2346; | ||
167 | |||
168 | range->encoding_size[0] = 5; | ||
169 | range->encoding_size[1] = 13; | ||
170 | range->num_encoding_sizes = 2; | ||
171 | range->max_encoding_tokens = 4; | ||
172 | |||
173 | range->max_qual.updated = IW_QUAL_NOISE_INVALID; | ||
174 | |||
175 | switch (wdev->wiphy->signal_type) { | ||
176 | case CFG80211_SIGNAL_TYPE_NONE: | ||
177 | break; | ||
178 | case CFG80211_SIGNAL_TYPE_MBM: | ||
179 | range->max_qual.level = -110; | ||
180 | range->max_qual.qual = 70; | ||
181 | range->avg_qual.qual = 35; | ||
182 | range->max_qual.updated |= IW_QUAL_DBM; | ||
183 | range->max_qual.updated |= IW_QUAL_QUAL_UPDATED; | ||
184 | range->max_qual.updated |= IW_QUAL_LEVEL_UPDATED; | ||
185 | break; | ||
186 | case CFG80211_SIGNAL_TYPE_UNSPEC: | ||
187 | range->max_qual.level = 100; | ||
188 | range->max_qual.qual = 100; | ||
189 | range->avg_qual.qual = 50; | ||
190 | range->max_qual.updated |= IW_QUAL_QUAL_UPDATED; | ||
191 | range->max_qual.updated |= IW_QUAL_LEVEL_UPDATED; | ||
192 | break; | ||
193 | } | ||
194 | |||
195 | range->avg_qual.level = range->max_qual.level / 2; | ||
196 | range->avg_qual.noise = range->max_qual.noise / 2; | ||
197 | range->avg_qual.updated = range->max_qual.updated; | ||
198 | |||
199 | range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 | | ||
200 | IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP; | ||
201 | |||
202 | |||
203 | for (band = 0; band < IEEE80211_NUM_BANDS; band ++) { | ||
204 | int i; | ||
205 | struct ieee80211_supported_band *sband; | ||
206 | |||
207 | sband = wdev->wiphy->bands[band]; | ||
208 | |||
209 | if (!sband) | ||
210 | continue; | ||
211 | |||
212 | for (i = 0; i < sband->n_channels && c < IW_MAX_FREQUENCIES; i++) { | ||
213 | struct ieee80211_channel *chan = &sband->channels[i]; | ||
214 | |||
215 | if (!(chan->flags & IEEE80211_CHAN_DISABLED)) { | ||
216 | range->freq[c].i = | ||
217 | ieee80211_frequency_to_channel( | ||
218 | chan->center_freq); | ||
219 | range->freq[c].m = chan->center_freq; | ||
220 | range->freq[c].e = 6; | ||
221 | c++; | ||
222 | } | ||
223 | } | ||
224 | } | ||
225 | range->num_channels = c; | ||
226 | range->num_frequency = c; | ||
227 | |||
228 | IW_EVENT_CAPA_SET_KERNEL(range->event_capa); | ||
229 | IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); | ||
230 | IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); | ||
231 | |||
232 | range->scan_capa |= IW_SCAN_CAPA_ESSID; | ||
233 | |||
234 | return 0; | ||
235 | } | ||
236 | EXPORT_SYMBOL(cfg80211_wext_giwrange); | ||
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 8f76f4009c24..9ca17b1ce52e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c | |||
@@ -951,10 +951,8 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, | |||
951 | /* | 951 | /* |
952 | * Incoming Call User Data. | 952 | * Incoming Call User Data. |
953 | */ | 953 | */ |
954 | if (skb->len >= 0) { | 954 | skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len); |
955 | skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len); | 955 | makex25->calluserdata.cudlength = skb->len; |
956 | makex25->calluserdata.cudlength = skb->len; | ||
957 | } | ||
958 | 956 | ||
959 | sk->sk_ack_backlog++; | 957 | sk->sk_ack_backlog++; |
960 | 958 | ||
@@ -1122,8 +1120,9 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, | |||
1122 | if (msg->msg_flags & MSG_OOB) | 1120 | if (msg->msg_flags & MSG_OOB) |
1123 | skb_queue_tail(&x25->interrupt_out_queue, skb); | 1121 | skb_queue_tail(&x25->interrupt_out_queue, skb); |
1124 | else { | 1122 | else { |
1125 | len = x25_output(sk, skb); | 1123 | rc = x25_output(sk, skb); |
1126 | if (len < 0) | 1124 | len = rc; |
1125 | if (rc < 0) | ||
1127 | kfree_skb(skb); | 1126 | kfree_skb(skb); |
1128 | else if (x25->qbitincl) | 1127 | else if (x25->qbitincl) |
1129 | len++; | 1128 | len++; |
@@ -1608,7 +1607,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { | |||
1608 | 1607 | ||
1609 | SOCKOPS_WRAP(x25_proto, AF_X25); | 1608 | SOCKOPS_WRAP(x25_proto, AF_X25); |
1610 | 1609 | ||
1611 | static struct packet_type x25_packet_type = { | 1610 | static struct packet_type x25_packet_type __read_mostly = { |
1612 | .type = cpu_to_be16(ETH_P_X25), | 1611 | .type = cpu_to_be16(ETH_P_X25), |
1613 | .func = x25_lapb_receive_frame, | 1612 | .func = x25_lapb_receive_frame, |
1614 | }; | 1613 | }; |
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index e25ff62ab2a6..62a5425cc6aa 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c | |||
@@ -748,12 +748,51 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision) | |||
748 | schedule_work(&net->xfrm.state_hash_work); | 748 | schedule_work(&net->xfrm.state_hash_work); |
749 | } | 749 | } |
750 | 750 | ||
751 | static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, | ||
752 | struct flowi *fl, unsigned short family, | ||
753 | xfrm_address_t *daddr, xfrm_address_t *saddr, | ||
754 | struct xfrm_state **best, int *acq_in_progress, | ||
755 | int *error) | ||
756 | { | ||
757 | /* Resolution logic: | ||
758 | * 1. There is a valid state with matching selector. Done. | ||
759 | * 2. Valid state with inappropriate selector. Skip. | ||
760 | * | ||
761 | * Entering area of "sysdeps". | ||
762 | * | ||
763 | * 3. If state is not valid, selector is temporary, it selects | ||
764 | * only session which triggered previous resolution. Key | ||
765 | * manager will do something to install a state with proper | ||
766 | * selector. | ||
767 | */ | ||
768 | if (x->km.state == XFRM_STATE_VALID) { | ||
769 | if ((x->sel.family && | ||
770 | !xfrm_selector_match(&x->sel, fl, x->sel.family)) || | ||
771 | !security_xfrm_state_pol_flow_match(x, pol, fl)) | ||
772 | return; | ||
773 | |||
774 | if (!*best || | ||
775 | (*best)->km.dying > x->km.dying || | ||
776 | ((*best)->km.dying == x->km.dying && | ||
777 | (*best)->curlft.add_time < x->curlft.add_time)) | ||
778 | *best = x; | ||
779 | } else if (x->km.state == XFRM_STATE_ACQ) { | ||
780 | *acq_in_progress = 1; | ||
781 | } else if (x->km.state == XFRM_STATE_ERROR || | ||
782 | x->km.state == XFRM_STATE_EXPIRED) { | ||
783 | if (xfrm_selector_match(&x->sel, fl, x->sel.family) && | ||
784 | security_xfrm_state_pol_flow_match(x, pol, fl)) | ||
785 | *error = -ESRCH; | ||
786 | } | ||
787 | } | ||
788 | |||
751 | struct xfrm_state * | 789 | struct xfrm_state * |
752 | xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, | 790 | xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, |
753 | struct flowi *fl, struct xfrm_tmpl *tmpl, | 791 | struct flowi *fl, struct xfrm_tmpl *tmpl, |
754 | struct xfrm_policy *pol, int *err, | 792 | struct xfrm_policy *pol, int *err, |
755 | unsigned short family) | 793 | unsigned short family) |
756 | { | 794 | { |
795 | static xfrm_address_t saddr_wildcard = { }; | ||
757 | struct net *net = xp_net(pol); | 796 | struct net *net = xp_net(pol); |
758 | unsigned int h; | 797 | unsigned int h; |
759 | struct hlist_node *entry; | 798 | struct hlist_node *entry; |
@@ -773,40 +812,27 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, | |||
773 | xfrm_state_addr_check(x, daddr, saddr, family) && | 812 | xfrm_state_addr_check(x, daddr, saddr, family) && |
774 | tmpl->mode == x->props.mode && | 813 | tmpl->mode == x->props.mode && |
775 | tmpl->id.proto == x->id.proto && | 814 | tmpl->id.proto == x->id.proto && |
776 | (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) { | 815 | (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) |
777 | /* Resolution logic: | 816 | xfrm_state_look_at(pol, x, fl, family, daddr, saddr, |
778 | 1. There is a valid state with matching selector. | 817 | &best, &acquire_in_progress, &error); |
779 | Done. | 818 | } |
780 | 2. Valid state with inappropriate selector. Skip. | 819 | if (best) |
781 | 820 | goto found; | |
782 | Entering area of "sysdeps". | 821 | |
783 | 822 | h = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); | |
784 | 3. If state is not valid, selector is temporary, | 823 | hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { |
785 | it selects only session which triggered | 824 | if (x->props.family == family && |
786 | previous resolution. Key manager will do | 825 | x->props.reqid == tmpl->reqid && |
787 | something to install a state with proper | 826 | !(x->props.flags & XFRM_STATE_WILDRECV) && |
788 | selector. | 827 | xfrm_state_addr_check(x, daddr, saddr, family) && |
789 | */ | 828 | tmpl->mode == x->props.mode && |
790 | if (x->km.state == XFRM_STATE_VALID) { | 829 | tmpl->id.proto == x->id.proto && |
791 | if ((x->sel.family && !xfrm_selector_match(&x->sel, fl, x->sel.family)) || | 830 | (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) |
792 | !security_xfrm_state_pol_flow_match(x, pol, fl)) | 831 | xfrm_state_look_at(pol, x, fl, family, daddr, saddr, |
793 | continue; | 832 | &best, &acquire_in_progress, &error); |
794 | if (!best || | ||
795 | best->km.dying > x->km.dying || | ||
796 | (best->km.dying == x->km.dying && | ||
797 | best->curlft.add_time < x->curlft.add_time)) | ||
798 | best = x; | ||
799 | } else if (x->km.state == XFRM_STATE_ACQ) { | ||
800 | acquire_in_progress = 1; | ||
801 | } else if (x->km.state == XFRM_STATE_ERROR || | ||
802 | x->km.state == XFRM_STATE_EXPIRED) { | ||
803 | if (xfrm_selector_match(&x->sel, fl, x->sel.family) && | ||
804 | security_xfrm_state_pol_flow_match(x, pol, fl)) | ||
805 | error = -ESRCH; | ||
806 | } | ||
807 | } | ||
808 | } | 833 | } |
809 | 834 | ||
835 | found: | ||
810 | x = best; | 836 | x = best; |
811 | if (!x && !error && !acquire_in_progress) { | 837 | if (!x && !error && !acquire_in_progress) { |
812 | if (tmpl->id.spi && | 838 | if (tmpl->id.spi && |