aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-03-30 17:53:32 -0400
committerIngo Molnar <mingo@elte.hu>2009-03-30 17:53:32 -0400
commit65fb0d23fcddd8697c871047b700c78817bdaa43 (patch)
tree119e6e5f276622c4c862f6c9b6d795264ba1603a /net
parent8c083f081d0014057901c68a0a3e0f8ca7ac8d23 (diff)
parentdfbbe89e197a77f2c8046a51c74e33e35f878080 (diff)
Merge branch 'linus' into cpumask-for-linus
Conflicts: arch/x86/kernel/cpu/common.c
Diffstat (limited to 'net')
-rw-r--r--net/802/psnap.c15
-rw-r--r--net/802/tr.c2
-rw-r--r--net/8021q/vlan.c4
-rw-r--r--net/8021q/vlan_core.c53
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/Kconfig18
-rw-r--r--net/Makefile1
-rw-r--r--net/appletalk/ddp.c16
-rw-r--r--net/appletalk/dev.c10
-rw-r--r--net/atm/br2684.c58
-rw-r--r--net/atm/clip.c39
-rw-r--r--net/atm/lec.c66
-rw-r--r--net/atm/lec.h1
-rw-r--r--net/atm/mpc.c32
-rw-r--r--net/atm/mpc.h5
-rw-r--r--net/ax25/af_ax25.c9
-rw-r--r--net/ax25/ax25_iface.c13
-rw-r--r--net/bluetooth/af_bluetooth.c17
-rw-r--r--net/bluetooth/cmtp/core.c3
-rw-r--r--net/bluetooth/hci_conn.c64
-rw-r--r--net/bluetooth/hci_core.c3
-rw-r--r--net/bluetooth/hci_event.c26
-rw-r--r--net/bluetooth/hci_sysfs.c2
-rw-r--r--net/bluetooth/l2cap.c602
-rw-r--r--net/bluetooth/rfcomm/core.c179
-rw-r--r--net/bluetooth/rfcomm/sock.c189
-rw-r--r--net/bluetooth/rfcomm/tty.c5
-rw-r--r--net/bluetooth/sco.c57
-rw-r--r--net/bridge/br_if.c1
-rw-r--r--net/bridge/br_netfilter.c2
-rw-r--r--net/bridge/br_netlink.c3
-rw-r--r--net/bridge/netfilter/ebt_log.c2
-rw-r--r--net/bridge/netfilter/ebt_ulog.c14
-rw-r--r--net/bridge/netfilter/ebtable_broute.c1
-rw-r--r--net/bridge/netfilter/ebtable_filter.c1
-rw-r--r--net/bridge/netfilter/ebtable_nat.c1
-rw-r--r--net/can/af_can.c5
-rw-r--r--net/can/raw.c3
-rw-r--r--net/compat.c19
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c268
-rw-r--r--net/core/drop_monitor.c263
-rw-r--r--net/core/ethtool.c58
-rw-r--r--net/core/fib_rules.c3
-rw-r--r--net/core/neighbour.c15
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/net-traces.c29
-rw-r--r--net/core/pktgen.c18
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/skbuff.c235
-rw-r--r--net/core/sock.c95
-rw-r--r--net/core/sysctl_net_core.c1
-rw-r--r--net/core/utils.c1
-rw-r--r--net/dccp/ackvec.h3
-rw-r--r--net/dccp/dccp.h26
-rw-r--r--net/dccp/feat.c232
-rw-r--r--net/dccp/feat.h21
-rw-r--r--net/dccp/minisocks.c11
-rw-r--r--net/dccp/options.c8
-rw-r--r--net/dccp/output.c37
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/dccp/sysctl.c43
-rw-r--r--net/decnet/af_decnet.c25
-rw-r--r--net/decnet/dn_dev.c6
-rw-r--r--net/decnet/dn_route.c8
-rw-r--r--net/decnet/dn_table.c3
-rw-r--r--net/decnet/sysctl_net_decnet.c2
-rw-r--r--net/dsa/Kconfig6
-rw-r--r--net/dsa/dsa.c177
-rw-r--r--net/dsa/dsa_priv.h97
-rw-r--r--net/dsa/mv88e6060.c12
-rw-r--r--net/dsa/mv88e6123_61_65.c92
-rw-r--r--net/dsa/mv88e6131.c98
-rw-r--r--net/dsa/slave.c34
-rw-r--r--net/dsa/tag_dsa.c34
-rw-r--r--net/dsa/tag_edsa.c34
-rw-r--r--net/dsa/tag_trailer.c14
-rw-r--r--net/econet/af_econet.c4
-rw-r--r--net/ipv4/Kconfig52
-rw-r--r--net/ipv4/af_inet.c28
-rw-r--r--net/ipv4/arp.c13
-rw-r--r--net/ipv4/cipso_ipv4.c130
-rw-r--r--net/ipv4/devinet.c12
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_semantics.c5
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/inet_connection_sock.c42
-rw-r--r--net/ipv4/inet_fragment.c1
-rw-r--r--net/ipv4/inet_hashtables.c12
-rw-r--r--net/ipv4/ip_fragment.c3
-rw-r--r--net/ipv4/ip_gre.c136
-rw-r--r--net/ipv4/ip_output.c6
-rw-r--r--net/ipv4/ipconfig.c8
-rw-r--r--net/ipv4/ipip.c7
-rw-r--r--net/ipv4/ipmr.c464
-rw-r--r--net/ipv4/netfilter/Kconfig30
-rw-r--r--net/ipv4/netfilter/Makefile2
-rw-r--r--net/ipv4/netfilter/arp_tables.c155
-rw-r--r--net/ipv4/netfilter/arptable_filter.c2
-rw-r--r--net/ipv4/netfilter/ip_queue.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c138
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c2
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c97
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c2
-rw-r--r--net/ipv4/netfilter/ipt_ttl.c63
-rw-r--r--net/ipv4/netfilter/iptable_filter.c1
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c1
-rw-r--r--net/ipv4/netfilter/iptable_raw.c1
-rw-r--r--net/ipv4/netfilter/iptable_security.c1
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c63
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c1
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c4
-rw-r--r--net/ipv4/proc.c4
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/route.c15
-rw-r--r--net/ipv4/syncookies.c9
-rw-r--r--net/ipv4/tcp.c89
-rw-r--r--net/ipv4/tcp_bic.c11
-rw-r--r--net/ipv4/tcp_cong.c21
-rw-r--r--net/ipv4/tcp_cubic.c11
-rw-r--r--net/ipv4/tcp_htcp.c3
-rw-r--r--net/ipv4/tcp_input.c198
-rw-r--r--net/ipv4/tcp_ipv4.c18
-rw-r--r--net/ipv4/tcp_minisocks.c9
-rw-r--r--net/ipv4/tcp_output.c94
-rw-r--r--net/ipv4/tcp_probe.c5
-rw-r--r--net/ipv4/tcp_scalable.c10
-rw-r--r--net/ipv4/tcp_timer.c23
-rw-r--r--net/ipv4/tcp_veno.c7
-rw-r--r--net/ipv4/tcp_yeah.c9
-rw-r--r--net/ipv4/udp.c15
-rw-r--r--net/ipv4/xfrm4_policy.c2
-rw-r--r--net/ipv6/addrconf.c116
-rw-r--r--net/ipv6/af_inet6.c63
-rw-r--r--net/ipv6/ip6_input.c4
-rw-r--r--net/ipv6/ipv6_sockglue.c3
-rw-r--r--net/ipv6/ndisc.c22
-rw-r--r--net/ipv6/netfilter/Kconfig38
-rw-r--r--net/ipv6/netfilter/Makefile2
-rw-r--r--net/ipv6/netfilter/ip6_queue.c1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c137
-rw-r--r--net/ipv6/netfilter/ip6t_HL.c95
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c2
-rw-r--r--net/ipv6/netfilter/ip6t_hl.c68
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c1
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c1
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c1
-rw-r--r--net/ipv6/netfilter/ip6table_security.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c10
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c8
-rw-r--r--net/ipv6/reassembly.c7
-rw-r--r--net/ipv6/route.c9
-rw-r--r--net/ipv6/sit.c7
-rw-r--r--net/ipv6/tcp_ipv6.c8
-rw-r--r--net/ipv6/udp.c28
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/ipv6/xfrm6_state.c2
-rw-r--r--net/ipx/af_ipx.c20
-rw-r--r--net/irda/irda_device.c5
-rw-r--r--net/irda/irlan/irlan_eth.c19
-rw-r--r--net/irda/irmod.c4
-rw-r--r--net/iucv/af_iucv.c3
-rw-r--r--net/key/af_key.c6
-rw-r--r--net/llc/af_llc.c6
-rw-r--r--net/llc/llc_conn.c3
-rw-r--r--net/llc/llc_core.c8
-rw-r--r--net/mac80211/Makefile6
-rw-r--r--net/mac80211/aes_cmac.c135
-rw-r--r--net/mac80211/aes_cmac.h19
-rw-r--r--net/mac80211/agg-rx.c310
-rw-r--r--net/mac80211/agg-tx.c695
-rw-r--r--net/mac80211/cfg.c237
-rw-r--r--net/mac80211/debugfs.c83
-rw-r--r--net/mac80211/debugfs_key.c79
-rw-r--r--net/mac80211/debugfs_key.h10
-rw-r--r--net/mac80211/debugfs_netdev.c48
-rw-r--r--net/mac80211/debugfs_sta.c5
-rw-r--r--net/mac80211/ht.c903
-rw-r--r--net/mac80211/ibss.c908
-rw-r--r--net/mac80211/ieee80211_i.h314
-rw-r--r--net/mac80211/iface.c168
-rw-r--r--net/mac80211/key.c115
-rw-r--r--net/mac80211/key.h16
-rw-r--r--net/mac80211/main.c185
-rw-r--r--net/mac80211/mesh.c15
-rw-r--r--net/mac80211/mesh.h10
-rw-r--r--net/mac80211/mesh_hwmp.c7
-rw-r--r--net/mac80211/mesh_plink.c38
-rw-r--r--net/mac80211/mlme.c2026
-rw-r--r--net/mac80211/pm.c179
-rw-r--r--net/mac80211/rate.c6
-rw-r--r--net/mac80211/rate.h16
-rw-r--r--net/mac80211/rx.c423
-rw-r--r--net/mac80211/scan.c763
-rw-r--r--net/mac80211/spectmgmt.c103
-rw-r--r--net/mac80211/sta_info.c45
-rw-r--r--net/mac80211/sta_info.h16
-rw-r--r--net/mac80211/tx.c675
-rw-r--r--net/mac80211/util.c243
-rw-r--r--net/mac80211/wep.c21
-rw-r--r--net/mac80211/wext.c492
-rw-r--r--net/mac80211/wme.c170
-rw-r--r--net/mac80211/wme.h6
-rw-r--r--net/mac80211/wpa.c180
-rw-r--r--net/mac80211/wpa.h5
-rw-r--r--net/netfilter/Kconfig63
-rw-r--r--net/netfilter/Makefile4
-rw-r--r--net/netfilter/core.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c4
-rw-r--r--net/netfilter/nf_conntrack_amanda.c4
-rw-r--r--net/netfilter/nf_conntrack_core.c141
-rw-r--r--net/netfilter/nf_conntrack_expect.c5
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c8
-rw-r--r--net/netfilter/nf_conntrack_helper.c8
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c252
-rw-r--r--net/netfilter/nf_conntrack_pptp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto.c37
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c154
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c10
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c24
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c1
-rw-r--r--net/netfilter/nf_conntrack_standalone.c57
-rw-r--r--net/netfilter/nf_log.c201
-rw-r--r--net/netfilter/nf_tproxy_core.c1
-rw-r--r--net/netfilter/nfnetlink.c6
-rw-r--r--net/netfilter/nfnetlink_log.c18
-rw-r--r--net/netfilter/x_tables.c26
-rw-r--r--net/netfilter/xt_HL.c171
-rw-r--r--net/netfilter/xt_LED.c161
-rw-r--r--net/netfilter/xt_cluster.c164
-rw-r--r--net/netfilter/xt_connlimit.c6
-rw-r--r--net/netfilter/xt_hashlimit.c7
-rw-r--r--net/netfilter/xt_hl.c108
-rw-r--r--net/netfilter/xt_limit.c40
-rw-r--r--net/netfilter/xt_physdev.c20
-rw-r--r--net/netfilter/xt_quota.c31
-rw-r--r--net/netfilter/xt_statistic.c28
-rw-r--r--net/netlabel/netlabel_kapi.c165
-rw-r--r--net/netlink/Makefile2
-rw-r--r--net/netlink/af_netlink.c88
-rw-r--r--net/netlink/attr.c473
-rw-r--r--net/netrom/af_netrom.c8
-rw-r--r--net/netrom/nr_dev.c26
-rw-r--r--net/packet/af_packet.c5
-rw-r--r--net/phonet/af_phonet.c34
-rw-r--r--net/phonet/pn_dev.c119
-rw-r--r--net/phonet/pn_netlink.c29
-rw-r--r--net/rds/Kconfig14
-rw-r--r--net/rds/Makefile14
-rw-r--r--net/rds/af_rds.c586
-rw-r--r--net/rds/bind.c199
-rw-r--r--net/rds/cong.c404
-rw-r--r--net/rds/connection.c487
-rw-r--r--net/rds/ib.c323
-rw-r--r--net/rds/ib.h367
-rw-r--r--net/rds/ib_cm.c726
-rw-r--r--net/rds/ib_rdma.c641
-rw-r--r--net/rds/ib_recv.c869
-rw-r--r--net/rds/ib_ring.c168
-rw-r--r--net/rds/ib_send.c874
-rw-r--r--net/rds/ib_stats.c95
-rw-r--r--net/rds/ib_sysctl.c137
-rw-r--r--net/rds/info.c241
-rw-r--r--net/rds/info.h30
-rw-r--r--net/rds/iw.c333
-rw-r--r--net/rds/iw.h395
-rw-r--r--net/rds/iw_cm.c750
-rw-r--r--net/rds/iw_rdma.c888
-rw-r--r--net/rds/iw_recv.c869
-rw-r--r--net/rds/iw_ring.c169
-rw-r--r--net/rds/iw_send.c975
-rw-r--r--net/rds/iw_stats.c95
-rw-r--r--net/rds/iw_sysctl.c137
-rw-r--r--net/rds/loop.c188
-rw-r--r--net/rds/loop.h9
-rw-r--r--net/rds/message.c402
-rw-r--r--net/rds/page.c221
-rw-r--r--net/rds/rdma.c679
-rw-r--r--net/rds/rdma.h84
-rw-r--r--net/rds/rdma_transport.c214
-rw-r--r--net/rds/rdma_transport.h28
-rw-r--r--net/rds/rds.h686
-rw-r--r--net/rds/recv.c542
-rw-r--r--net/rds/send.c1003
-rw-r--r--net/rds/stats.c148
-rw-r--r--net/rds/sysctl.c122
-rw-r--r--net/rds/threads.c265
-rw-r--r--net/rds/transport.c117
-rw-r--r--net/rose/af_rose.c7
-rw-r--r--net/rose/rose_dev.c22
-rw-r--r--net/sched/sch_api.c11
-rw-r--r--net/sched/sch_cbq.c7
-rw-r--r--net/sched/sch_drr.c7
-rw-r--r--net/sched/sch_hfsc.c13
-rw-r--r--net/sched/sch_htb.c49
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_tbf.c9
-rw-r--r--net/sctp/debug.c4
-rw-r--r--net/sctp/endpointola.c3
-rw-r--r--net/sctp/input.c14
-rw-r--r--net/sctp/ipv6.c36
-rw-r--r--net/sctp/output.c21
-rw-r--r--net/sctp/outqueue.c6
-rw-r--r--net/sctp/protocol.c31
-rw-r--r--net/sctp/sm_make_chunk.c37
-rw-r--r--net/sctp/sm_sideeffect.c43
-rw-r--r--net/sctp/sm_statefuns.c6
-rw-r--r--net/sctp/socket.c216
-rw-r--r--net/sctp/transport.c9
-rw-r--r--net/socket.c95
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/sched.c33
-rw-r--r--net/sunrpc/xprt.c2
-rw-r--r--net/sunrpc/xprtsock.c76
-rw-r--r--net/sysctl_net.c2
-rw-r--r--net/tipc/bcast.c4
-rw-r--r--net/tipc/bcast.h2
-rw-r--r--net/tipc/dbg.c2
-rw-r--r--net/tipc/node.c2
-rw-r--r--net/unix/af_unix.c3
-rw-r--r--net/wanrouter/wanmain.c8
-rw-r--r--net/wanrouter/wanproc.c2
-rw-r--r--net/wimax/op-msg.c9
-rw-r--r--net/wimax/stack.c12
-rw-r--r--net/wireless/Kconfig60
-rw-r--r--net/wireless/Makefile3
-rw-r--r--net/wireless/core.c162
-rw-r--r--net/wireless/core.h64
-rw-r--r--net/wireless/lib80211_crypt_ccmp.c2
-rw-r--r--net/wireless/lib80211_crypt_tkip.c4
-rw-r--r--net/wireless/mlme.c46
-rw-r--r--net/wireless/nl80211.c1335
-rw-r--r--net/wireless/nl80211.h31
-rw-r--r--net/wireless/reg.c1255
-rw-r--r--net/wireless/reg.h45
-rw-r--r--net/wireless/scan.c891
-rw-r--r--net/wireless/sysfs.c39
-rw-r--r--net/wireless/util.c2
-rw-r--r--net/wireless/wext-compat.c108
-rw-r--r--net/x25/af_x25.c21
-rw-r--r--net/xfrm/xfrm_state.c92
350 files changed, 31016 insertions, 8195 deletions
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 70980baeb682..6fea0750662b 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -29,7 +29,7 @@ static struct llc_sap *snap_sap;
29/* 29/*
30 * Find a snap client by matching the 5 bytes. 30 * Find a snap client by matching the 5 bytes.
31 */ 31 */
32static struct datalink_proto *find_snap_client(unsigned char *desc) 32static struct datalink_proto *find_snap_client(const unsigned char *desc)
33{ 33{
34 struct datalink_proto *proto = NULL, *p; 34 struct datalink_proto *proto = NULL, *p;
35 35
@@ -51,7 +51,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev,
51 int rc = 1; 51 int rc = 1;
52 struct datalink_proto *proto; 52 struct datalink_proto *proto;
53 static struct packet_type snap_packet_type = { 53 static struct packet_type snap_packet_type = {
54 .type = __constant_htons(ETH_P_SNAP), 54 .type = cpu_to_be16(ETH_P_SNAP),
55 }; 55 };
56 56
57 if (unlikely(!pskb_may_pull(skb, 5))) 57 if (unlikely(!pskb_may_pull(skb, 5)))
@@ -95,15 +95,16 @@ static int snap_request(struct datalink_proto *dl,
95EXPORT_SYMBOL(register_snap_client); 95EXPORT_SYMBOL(register_snap_client);
96EXPORT_SYMBOL(unregister_snap_client); 96EXPORT_SYMBOL(unregister_snap_client);
97 97
98static char snap_err_msg[] __initdata = 98static const char snap_err_msg[] __initconst =
99 KERN_CRIT "SNAP - unable to register with 802.2\n"; 99 KERN_CRIT "SNAP - unable to register with 802.2\n";
100 100
101static int __init snap_init(void) 101static int __init snap_init(void)
102{ 102{
103 snap_sap = llc_sap_open(0xAA, snap_rcv); 103 snap_sap = llc_sap_open(0xAA, snap_rcv);
104 104 if (!snap_sap) {
105 if (!snap_sap)
106 printk(snap_err_msg); 105 printk(snap_err_msg);
106 return -EBUSY;
107 }
107 108
108 return 0; 109 return 0;
109} 110}
@@ -121,7 +122,7 @@ module_exit(snap_exit);
121/* 122/*
122 * Register SNAP clients. We don't yet use this for IP. 123 * Register SNAP clients. We don't yet use this for IP.
123 */ 124 */
124struct datalink_proto *register_snap_client(unsigned char *desc, 125struct datalink_proto *register_snap_client(const unsigned char *desc,
125 int (*rcvfunc)(struct sk_buff *, 126 int (*rcvfunc)(struct sk_buff *,
126 struct net_device *, 127 struct net_device *,
127 struct packet_type *, 128 struct packet_type *,
@@ -136,7 +137,7 @@ struct datalink_proto *register_snap_client(unsigned char *desc,
136 137
137 proto = kmalloc(sizeof(*proto), GFP_ATOMIC); 138 proto = kmalloc(sizeof(*proto), GFP_ATOMIC);
138 if (proto) { 139 if (proto) {
139 memcpy(proto->type, desc,5); 140 memcpy(proto->type, desc, 5);
140 proto->rcvfunc = rcvfunc; 141 proto->rcvfunc = rcvfunc;
141 proto->header_length = 5 + 3; /* snap + 802.2 */ 142 proto->header_length = 5 + 3; /* snap + 802.2 */
142 proto->request = snap_request; 143 proto->request = snap_request;
diff --git a/net/802/tr.c b/net/802/tr.c
index f47ae289d83b..e7eb13084d71 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -486,6 +486,7 @@ static struct rif_cache *rif_get_idx(loff_t pos)
486} 486}
487 487
488static void *rif_seq_start(struct seq_file *seq, loff_t *pos) 488static void *rif_seq_start(struct seq_file *seq, loff_t *pos)
489 __acquires(&rif_lock)
489{ 490{
490 spin_lock_irq(&rif_lock); 491 spin_lock_irq(&rif_lock);
491 492
@@ -517,6 +518,7 @@ static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
517} 518}
518 519
519static void rif_seq_stop(struct seq_file *seq, void *v) 520static void rif_seq_stop(struct seq_file *seq, void *v)
521 __releases(&rif_lock)
520{ 522{
521 spin_unlock_irq(&rif_lock); 523 spin_unlock_irq(&rif_lock);
522} 524}
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 41e8f65bd3f0..2b7390e377b3 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -51,8 +51,8 @@ const char vlan_version[] = DRV_VERSION;
51static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>"; 51static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
52static const char vlan_buggyright[] = "David S. Miller <davem@redhat.com>"; 52static const char vlan_buggyright[] = "David S. Miller <davem@redhat.com>";
53 53
54static struct packet_type vlan_packet_type = { 54static struct packet_type vlan_packet_type __read_mostly = {
55 .type = __constant_htons(ETH_P_8021Q), 55 .type = cpu_to_be16(ETH_P_8021Q),
56 .func = vlan_skb_recv, /* VLAN receive method */ 56 .func = vlan_skb_recv, /* VLAN receive method */
57}; 57};
58 58
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 2886d2fb9ab5..654e45f5719d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -89,38 +89,27 @@ static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp,
89 goto drop; 89 goto drop;
90 90
91 for (p = napi->gro_list; p; p = p->next) { 91 for (p = napi->gro_list; p; p = p->next) {
92 NAPI_GRO_CB(p)->same_flow = p->dev == skb->dev; 92 NAPI_GRO_CB(p)->same_flow =
93 p->dev == skb->dev && !compare_ether_header(
94 skb_mac_header(p), skb_gro_mac_header(skb));
93 NAPI_GRO_CB(p)->flush = 0; 95 NAPI_GRO_CB(p)->flush = 0;
94 } 96 }
95 97
96 return dev_gro_receive(napi, skb); 98 return dev_gro_receive(napi, skb);
97 99
98drop: 100drop:
99 return 2; 101 return GRO_DROP;
100} 102}
101 103
102int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, 104int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
103 unsigned int vlan_tci, struct sk_buff *skb) 105 unsigned int vlan_tci, struct sk_buff *skb)
104{ 106{
105 int err = NET_RX_SUCCESS; 107 if (netpoll_rx_on(skb))
108 return vlan_hwaccel_receive_skb(skb, grp, vlan_tci);
106 109
107 if (netpoll_receive_skb(skb)) 110 skb_gro_reset_offset(skb);
108 return NET_RX_DROP;
109
110 switch (vlan_gro_common(napi, grp, vlan_tci, skb)) {
111 case -1:
112 return netif_receive_skb(skb);
113 111
114 case 2: 112 return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb);
115 err = NET_RX_DROP;
116 /* fall through */
117
118 case 1:
119 kfree_skb(skb);
120 break;
121 }
122
123 return err;
124} 113}
125EXPORT_SYMBOL(vlan_gro_receive); 114EXPORT_SYMBOL(vlan_gro_receive);
126 115
@@ -128,30 +117,14 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
128 unsigned int vlan_tci, struct napi_gro_fraginfo *info) 117 unsigned int vlan_tci, struct napi_gro_fraginfo *info)
129{ 118{
130 struct sk_buff *skb = napi_fraginfo_skb(napi, info); 119 struct sk_buff *skb = napi_fraginfo_skb(napi, info);
131 int err = NET_RX_DROP;
132 120
133 if (!skb) 121 if (!skb)
134 goto out; 122 return NET_RX_DROP;
135
136 if (netpoll_receive_skb(skb))
137 goto out;
138
139 err = NET_RX_SUCCESS;
140
141 switch (vlan_gro_common(napi, grp, vlan_tci, skb)) {
142 case -1:
143 return netif_receive_skb(skb);
144
145 case 2:
146 err = NET_RX_DROP;
147 /* fall through */
148 123
149 case 1: 124 if (netpoll_rx_on(skb))
150 napi_reuse_skb(napi, skb); 125 return vlan_hwaccel_receive_skb(skb, grp, vlan_tci);
151 break;
152 }
153 126
154out: 127 return napi_frags_finish(napi, skb,
155 return err; 128 vlan_gro_common(napi, grp, vlan_tci, skb));
156} 129}
157EXPORT_SYMBOL(vlan_gro_frags); 130EXPORT_SYMBOL(vlan_gro_frags);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 1df0356f242b..c613ed08a5ee 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -417,7 +417,7 @@ static int p9_fd_write(struct p9_client *client, void *v, int len)
417 oldfs = get_fs(); 417 oldfs = get_fs();
418 set_fs(get_ds()); 418 set_fs(get_ds());
419 /* The cast to a user pointer is valid due to the set_fs() */ 419 /* The cast to a user pointer is valid due to the set_fs() */
420 ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos); 420 ret = vfs_write(ts->wr, (__force void __user *)v, len, &ts->wr->f_pos);
421 set_fs(oldfs); 421 set_fs(oldfs);
422 422
423 if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) 423 if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN)
diff --git a/net/Kconfig b/net/Kconfig
index cdb8fdef6c4a..ec93e7e38b38 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -4,6 +4,7 @@
4 4
5menuconfig NET 5menuconfig NET
6 bool "Networking support" 6 bool "Networking support"
7 select NLATTR
7 ---help--- 8 ---help---
8 Unless you really know what you are doing, you should say Y here. 9 Unless you really know what you are doing, you should say Y here.
9 The reason is that some programs need kernel networking support even 10 The reason is that some programs need kernel networking support even
@@ -24,9 +25,6 @@ if NET
24 25
25menu "Networking options" 26menu "Networking options"
26 27
27config COMPAT_NET_DEV_OPS
28 def_bool y
29
30source "net/packet/Kconfig" 28source "net/packet/Kconfig"
31source "net/unix/Kconfig" 29source "net/unix/Kconfig"
32source "net/xfrm/Kconfig" 30source "net/xfrm/Kconfig"
@@ -171,6 +169,7 @@ endif
171 169
172source "net/dccp/Kconfig" 170source "net/dccp/Kconfig"
173source "net/sctp/Kconfig" 171source "net/sctp/Kconfig"
172source "net/rds/Kconfig"
174source "net/tipc/Kconfig" 173source "net/tipc/Kconfig"
175source "net/atm/Kconfig" 174source "net/atm/Kconfig"
176source "net/802/Kconfig" 175source "net/802/Kconfig"
@@ -185,6 +184,7 @@ source "net/x25/Kconfig"
185source "net/lapb/Kconfig" 184source "net/lapb/Kconfig"
186source "net/econet/Kconfig" 185source "net/econet/Kconfig"
187source "net/wanrouter/Kconfig" 186source "net/wanrouter/Kconfig"
187source "net/phonet/Kconfig"
188source "net/sched/Kconfig" 188source "net/sched/Kconfig"
189source "net/dcb/Kconfig" 189source "net/dcb/Kconfig"
190 190
@@ -220,6 +220,17 @@ config NET_TCPPROBE
220 To compile this code as a module, choose M here: the 220 To compile this code as a module, choose M here: the
221 module will be called tcp_probe. 221 module will be called tcp_probe.
222 222
223config NET_DROP_MONITOR
224 boolean "Network packet drop alerting service"
225 depends on INET && EXPERIMENTAL && TRACEPOINTS
226 ---help---
227 This feature provides an alerting service to userspace in the
228 event that packets are discarded in the network stack. Alerts
229 are broadcast via netlink socket to any listening user space
230 process. If you don't need network drop alerts, or if you are ok
231 just checking the various proc files and other utilities for
232 drop statistics, say N here.
233
223endmenu 234endmenu
224 235
225endmenu 236endmenu
@@ -229,7 +240,6 @@ source "net/can/Kconfig"
229source "net/irda/Kconfig" 240source "net/irda/Kconfig"
230source "net/bluetooth/Kconfig" 241source "net/bluetooth/Kconfig"
231source "net/rxrpc/Kconfig" 242source "net/rxrpc/Kconfig"
232source "net/phonet/Kconfig"
233 243
234config FIB_RULES 244config FIB_RULES
235 bool 245 bool
diff --git a/net/Makefile b/net/Makefile
index 0fcce89d7169..9e00a55a901b 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -49,6 +49,7 @@ obj-y += 8021q/
49endif 49endif
50obj-$(CONFIG_IP_DCCP) += dccp/ 50obj-$(CONFIG_IP_DCCP) += dccp/
51obj-$(CONFIG_IP_SCTP) += sctp/ 51obj-$(CONFIG_IP_SCTP) += sctp/
52obj-$(CONFIG_RDS) += rds/
52obj-y += wireless/ 53obj-y += wireless/
53obj-$(CONFIG_MAC80211) += mac80211/ 54obj-$(CONFIG_MAC80211) += mac80211/
54obj-$(CONFIG_TIPC) += tipc/ 55obj-$(CONFIG_TIPC) += tipc/
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 5abce07fb50a..d6a9243641af 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1571,14 +1571,10 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
1571 usat->sat_family != AF_APPLETALK) 1571 usat->sat_family != AF_APPLETALK)
1572 return -EINVAL; 1572 return -EINVAL;
1573 1573
1574 /* netatalk doesn't implement this check */ 1574 /* netatalk didn't implement this check */
1575 if (usat->sat_addr.s_node == ATADDR_BCAST && 1575 if (usat->sat_addr.s_node == ATADDR_BCAST &&
1576 !sock_flag(sk, SOCK_BROADCAST)) { 1576 !sock_flag(sk, SOCK_BROADCAST)) {
1577 printk(KERN_INFO "SO_BROADCAST: Fix your netatalk as "
1578 "it will break before 2.2\n");
1579#if 0
1580 return -EPERM; 1577 return -EPERM;
1581#endif
1582 } 1578 }
1583 } else { 1579 } else {
1584 if (sk->sk_state != TCP_ESTABLISHED) 1580 if (sk->sk_state != TCP_ESTABLISHED)
@@ -1860,13 +1856,13 @@ static struct notifier_block ddp_notifier = {
1860 .notifier_call = ddp_device_event, 1856 .notifier_call = ddp_device_event,
1861}; 1857};
1862 1858
1863static struct packet_type ltalk_packet_type = { 1859static struct packet_type ltalk_packet_type __read_mostly = {
1864 .type = __constant_htons(ETH_P_LOCALTALK), 1860 .type = cpu_to_be16(ETH_P_LOCALTALK),
1865 .func = ltalk_rcv, 1861 .func = ltalk_rcv,
1866}; 1862};
1867 1863
1868static struct packet_type ppptalk_packet_type = { 1864static struct packet_type ppptalk_packet_type __read_mostly = {
1869 .type = __constant_htons(ETH_P_PPPTALK), 1865 .type = cpu_to_be16(ETH_P_PPPTALK),
1870 .func = atalk_rcv, 1866 .func = atalk_rcv,
1871}; 1867};
1872 1868
@@ -1877,7 +1873,7 @@ EXPORT_SYMBOL(aarp_send_ddp);
1877EXPORT_SYMBOL(atrtr_get_dev); 1873EXPORT_SYMBOL(atrtr_get_dev);
1878EXPORT_SYMBOL(atalk_find_dev_addr); 1874EXPORT_SYMBOL(atalk_find_dev_addr);
1879 1875
1880static char atalk_err_snap[] __initdata = 1876static const char atalk_err_snap[] __initconst =
1881 KERN_CRIT "Unable to register DDP with SNAP.\n"; 1877 KERN_CRIT "Unable to register DDP with SNAP.\n";
1882 1878
1883/* Called by proto.c on kernel start up */ 1879/* Called by proto.c on kernel start up */
diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c
index d856a62ab50f..72277d70c980 100644
--- a/net/appletalk/dev.c
+++ b/net/appletalk/dev.c
@@ -9,22 +9,20 @@
9#include <linux/if_arp.h> 9#include <linux/if_arp.h>
10#include <linux/if_ltalk.h> 10#include <linux/if_ltalk.h>
11 11
12#ifdef CONFIG_COMPAT_NET_DEV_OPS
12static int ltalk_change_mtu(struct net_device *dev, int mtu) 13static int ltalk_change_mtu(struct net_device *dev, int mtu)
13{ 14{
14 return -EINVAL; 15 return -EINVAL;
15} 16}
16 17#endif
17static int ltalk_mac_addr(struct net_device *dev, void *addr)
18{
19 return -EINVAL;
20}
21 18
22static void ltalk_setup(struct net_device *dev) 19static void ltalk_setup(struct net_device *dev)
23{ 20{
24 /* Fill in the fields of the device structure with localtalk-generic values. */ 21 /* Fill in the fields of the device structure with localtalk-generic values. */
25 22
23#ifdef CONFIG_COMPAT_NET_DEV_OPS
26 dev->change_mtu = ltalk_change_mtu; 24 dev->change_mtu = ltalk_change_mtu;
27 dev->set_mac_address = ltalk_mac_addr; 25#endif
28 26
29 dev->type = ARPHRD_LOCALTLK; 27 dev->type = ARPHRD_LOCALTLK;
30 dev->hard_header_len = LTALK_HLEN; 28 dev->hard_header_len = LTALK_HLEN;
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index ea9438fc6855..334fcd4a4ea4 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -83,7 +83,6 @@ struct br2684_dev {
83 struct list_head br2684_devs; 83 struct list_head br2684_devs;
84 int number; 84 int number;
85 struct list_head brvccs; /* one device <=> one vcc (before xmas) */ 85 struct list_head brvccs; /* one device <=> one vcc (before xmas) */
86 struct net_device_stats stats;
87 int mac_was_set; 86 int mac_was_set;
88 enum br2684_payload payload; 87 enum br2684_payload payload;
89}; 88};
@@ -148,9 +147,10 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s)
148 * the way for multiple vcc's per itf. Returns true if we can send, 147 * the way for multiple vcc's per itf. Returns true if we can send,
149 * otherwise false 148 * otherwise false
150 */ 149 */
151static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, 150static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
152 struct br2684_vcc *brvcc) 151 struct br2684_vcc *brvcc)
153{ 152{
153 struct br2684_dev *brdev = BRPRIV(dev);
154 struct atm_vcc *atmvcc; 154 struct atm_vcc *atmvcc;
155 int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2; 155 int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2;
156 156
@@ -211,8 +211,8 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev,
211 } 211 }
212 atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); 212 atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
213 ATM_SKB(skb)->atm_options = atmvcc->atm_options; 213 ATM_SKB(skb)->atm_options = atmvcc->atm_options;
214 brdev->stats.tx_packets++; 214 dev->stats.tx_packets++;
215 brdev->stats.tx_bytes += skb->len; 215 dev->stats.tx_bytes += skb->len;
216 atmvcc->send(atmvcc, skb); 216 atmvcc->send(atmvcc, skb);
217 return 1; 217 return 1;
218} 218}
@@ -233,14 +233,14 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev)
233 brvcc = pick_outgoing_vcc(skb, brdev); 233 brvcc = pick_outgoing_vcc(skb, brdev);
234 if (brvcc == NULL) { 234 if (brvcc == NULL) {
235 pr_debug("no vcc attached to dev %s\n", dev->name); 235 pr_debug("no vcc attached to dev %s\n", dev->name);
236 brdev->stats.tx_errors++; 236 dev->stats.tx_errors++;
237 brdev->stats.tx_carrier_errors++; 237 dev->stats.tx_carrier_errors++;
238 /* netif_stop_queue(dev); */ 238 /* netif_stop_queue(dev); */
239 dev_kfree_skb(skb); 239 dev_kfree_skb(skb);
240 read_unlock(&devs_lock); 240 read_unlock(&devs_lock);
241 return 0; 241 return 0;
242 } 242 }
243 if (!br2684_xmit_vcc(skb, brdev, brvcc)) { 243 if (!br2684_xmit_vcc(skb, dev, brvcc)) {
244 /* 244 /*
245 * We should probably use netif_*_queue() here, but that 245 * We should probably use netif_*_queue() here, but that
246 * involves added complication. We need to walk before 246 * involves added complication. We need to walk before
@@ -248,27 +248,20 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev)
248 * 248 *
249 * Don't free here! this pointer might be no longer valid! 249 * Don't free here! this pointer might be no longer valid!
250 */ 250 */
251 brdev->stats.tx_errors++; 251 dev->stats.tx_errors++;
252 brdev->stats.tx_fifo_errors++; 252 dev->stats.tx_fifo_errors++;
253 } 253 }
254 read_unlock(&devs_lock); 254 read_unlock(&devs_lock);
255 return 0; 255 return 0;
256} 256}
257 257
258static struct net_device_stats *br2684_get_stats(struct net_device *dev)
259{
260 pr_debug("br2684_get_stats\n");
261 return &BRPRIV(dev)->stats;
262}
263
264/* 258/*
265 * We remember when the MAC gets set, so we don't override it later with 259 * We remember when the MAC gets set, so we don't override it later with
266 * the ESI of the ATM card of the first VC 260 * the ESI of the ATM card of the first VC
267 */ 261 */
268static int (*my_eth_mac_addr) (struct net_device *, void *);
269static int br2684_mac_addr(struct net_device *dev, void *p) 262static int br2684_mac_addr(struct net_device *dev, void *p)
270{ 263{
271 int err = my_eth_mac_addr(dev, p); 264 int err = eth_mac_addr(dev, p);
272 if (!err) 265 if (!err)
273 BRPRIV(dev)->mac_was_set = 1; 266 BRPRIV(dev)->mac_was_set = 1;
274 return err; 267 return err;
@@ -430,17 +423,17 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
430 /* sigh, interface is down? */ 423 /* sigh, interface is down? */
431 if (unlikely(!(net_dev->flags & IFF_UP))) 424 if (unlikely(!(net_dev->flags & IFF_UP)))
432 goto dropped; 425 goto dropped;
433 brdev->stats.rx_packets++; 426 net_dev->stats.rx_packets++;
434 brdev->stats.rx_bytes += skb->len; 427 net_dev->stats.rx_bytes += skb->len;
435 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); 428 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
436 netif_rx(skb); 429 netif_rx(skb);
437 return; 430 return;
438 431
439dropped: 432dropped:
440 brdev->stats.rx_dropped++; 433 net_dev->stats.rx_dropped++;
441 goto free_skb; 434 goto free_skb;
442error: 435error:
443 brdev->stats.rx_errors++; 436 net_dev->stats.rx_errors++;
444free_skb: 437free_skb:
445 dev_kfree_skb(skb); 438 dev_kfree_skb(skb);
446 return; 439 return;
@@ -531,8 +524,8 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
531 524
532 skb->next = skb->prev = NULL; 525 skb->next = skb->prev = NULL;
533 br2684_push(atmvcc, skb); 526 br2684_push(atmvcc, skb);
534 BRPRIV(skb->dev)->stats.rx_bytes -= skb->len; 527 skb->dev->stats.rx_bytes -= skb->len;
535 BRPRIV(skb->dev)->stats.rx_packets--; 528 skb->dev->stats.rx_packets--;
536 529
537 skb = next; 530 skb = next;
538 } 531 }
@@ -544,17 +537,20 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg)
544 return err; 537 return err;
545} 538}
546 539
540static const struct net_device_ops br2684_netdev_ops = {
541 .ndo_start_xmit = br2684_start_xmit,
542 .ndo_set_mac_address = br2684_mac_addr,
543 .ndo_change_mtu = eth_change_mtu,
544 .ndo_validate_addr = eth_validate_addr,
545};
546
547static void br2684_setup(struct net_device *netdev) 547static void br2684_setup(struct net_device *netdev)
548{ 548{
549 struct br2684_dev *brdev = BRPRIV(netdev); 549 struct br2684_dev *brdev = BRPRIV(netdev);
550 550
551 ether_setup(netdev); 551 ether_setup(netdev);
552 brdev->net_dev = netdev;
553 552
554 my_eth_mac_addr = netdev->set_mac_address; 553 netdev->netdev_ops = &br2684_netdev_ops;
555 netdev->set_mac_address = br2684_mac_addr;
556 netdev->hard_start_xmit = br2684_start_xmit;
557 netdev->get_stats = br2684_get_stats;
558 554
559 INIT_LIST_HEAD(&brdev->brvccs); 555 INIT_LIST_HEAD(&brdev->brvccs);
560} 556}
@@ -565,10 +561,8 @@ static void br2684_setup_routed(struct net_device *netdev)
565 brdev->net_dev = netdev; 561 brdev->net_dev = netdev;
566 562
567 netdev->hard_header_len = 0; 563 netdev->hard_header_len = 0;
568 my_eth_mac_addr = netdev->set_mac_address; 564
569 netdev->set_mac_address = br2684_mac_addr; 565 netdev->netdev_ops = &br2684_netdev_ops;
570 netdev->hard_start_xmit = br2684_start_xmit;
571 netdev->get_stats = br2684_get_stats;
572 netdev->addr_len = 0; 566 netdev->addr_len = 0;
573 netdev->mtu = 1500; 567 netdev->mtu = 1500;
574 netdev->type = ARPHRD_PPP; 568 netdev->type = ARPHRD_PPP;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 2d33a83be799..3dc0a3a42a57 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -214,15 +214,15 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb)
214 skb->protocol = ((__be16 *) skb->data)[3]; 214 skb->protocol = ((__be16 *) skb->data)[3];
215 skb_pull(skb, RFC1483LLC_LEN); 215 skb_pull(skb, RFC1483LLC_LEN);
216 if (skb->protocol == htons(ETH_P_ARP)) { 216 if (skb->protocol == htons(ETH_P_ARP)) {
217 PRIV(skb->dev)->stats.rx_packets++; 217 skb->dev->stats.rx_packets++;
218 PRIV(skb->dev)->stats.rx_bytes += skb->len; 218 skb->dev->stats.rx_bytes += skb->len;
219 clip_arp_rcv(skb); 219 clip_arp_rcv(skb);
220 return; 220 return;
221 } 221 }
222 } 222 }
223 clip_vcc->last_use = jiffies; 223 clip_vcc->last_use = jiffies;
224 PRIV(skb->dev)->stats.rx_packets++; 224 skb->dev->stats.rx_packets++;
225 PRIV(skb->dev)->stats.rx_bytes += skb->len; 225 skb->dev->stats.rx_bytes += skb->len;
226 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); 226 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
227 netif_rx(skb); 227 netif_rx(skb);
228} 228}
@@ -372,7 +372,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
372 if (!skb->dst) { 372 if (!skb->dst) {
373 printk(KERN_ERR "clip_start_xmit: skb->dst == NULL\n"); 373 printk(KERN_ERR "clip_start_xmit: skb->dst == NULL\n");
374 dev_kfree_skb(skb); 374 dev_kfree_skb(skb);
375 clip_priv->stats.tx_dropped++; 375 dev->stats.tx_dropped++;
376 return 0; 376 return 0;
377 } 377 }
378 if (!skb->dst->neighbour) { 378 if (!skb->dst->neighbour) {
@@ -380,13 +380,13 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
380 skb->dst->neighbour = clip_find_neighbour(skb->dst, 1); 380 skb->dst->neighbour = clip_find_neighbour(skb->dst, 1);
381 if (!skb->dst->neighbour) { 381 if (!skb->dst->neighbour) {
382 dev_kfree_skb(skb); /* lost that one */ 382 dev_kfree_skb(skb); /* lost that one */
383 clip_priv->stats.tx_dropped++; 383 dev->stats.tx_dropped++;
384 return 0; 384 return 0;
385 } 385 }
386#endif 386#endif
387 printk(KERN_ERR "clip_start_xmit: NO NEIGHBOUR !\n"); 387 printk(KERN_ERR "clip_start_xmit: NO NEIGHBOUR !\n");
388 dev_kfree_skb(skb); 388 dev_kfree_skb(skb);
389 clip_priv->stats.tx_dropped++; 389 dev->stats.tx_dropped++;
390 return 0; 390 return 0;
391 } 391 }
392 entry = NEIGH2ENTRY(skb->dst->neighbour); 392 entry = NEIGH2ENTRY(skb->dst->neighbour);
@@ -400,7 +400,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
400 skb_queue_tail(&entry->neigh->arp_queue, skb); 400 skb_queue_tail(&entry->neigh->arp_queue, skb);
401 else { 401 else {
402 dev_kfree_skb(skb); 402 dev_kfree_skb(skb);
403 clip_priv->stats.tx_dropped++; 403 dev->stats.tx_dropped++;
404 } 404 }
405 return 0; 405 return 0;
406 } 406 }
@@ -423,8 +423,8 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
423 printk(KERN_WARNING "clip_start_xmit: XOFF->XOFF transition\n"); 423 printk(KERN_WARNING "clip_start_xmit: XOFF->XOFF transition\n");
424 return 0; 424 return 0;
425 } 425 }
426 clip_priv->stats.tx_packets++; 426 dev->stats.tx_packets++;
427 clip_priv->stats.tx_bytes += skb->len; 427 dev->stats.tx_bytes += skb->len;
428 vcc->send(vcc, skb); 428 vcc->send(vcc, skb);
429 if (atm_may_send(vcc, 0)) { 429 if (atm_may_send(vcc, 0)) {
430 entry->vccs->xoff = 0; 430 entry->vccs->xoff = 0;
@@ -443,11 +443,6 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev)
443 return 0; 443 return 0;
444} 444}
445 445
446static struct net_device_stats *clip_get_stats(struct net_device *dev)
447{
448 return &PRIV(dev)->stats;
449}
450
451static int clip_mkip(struct atm_vcc *vcc, int timeout) 446static int clip_mkip(struct atm_vcc *vcc, int timeout)
452{ 447{
453 struct clip_vcc *clip_vcc; 448 struct clip_vcc *clip_vcc;
@@ -501,8 +496,8 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout)
501 496
502 skb_get(skb); 497 skb_get(skb);
503 clip_push(vcc, skb); 498 clip_push(vcc, skb);
504 PRIV(skb->dev)->stats.rx_packets--; 499 skb->dev->stats.rx_packets--;
505 PRIV(skb->dev)->stats.rx_bytes -= len; 500 skb->dev->stats.rx_bytes -= len;
506 kfree_skb(skb); 501 kfree_skb(skb);
507 } 502 }
508 503
@@ -557,11 +552,13 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip)
557 return error; 552 return error;
558} 553}
559 554
555static const struct net_device_ops clip_netdev_ops = {
556 .ndo_start_xmit = clip_start_xmit,
557};
558
560static void clip_setup(struct net_device *dev) 559static void clip_setup(struct net_device *dev)
561{ 560{
562 dev->hard_start_xmit = clip_start_xmit; 561 dev->netdev_ops = &clip_netdev_ops;
563 /* sg_xmit ... */
564 dev->get_stats = clip_get_stats;
565 dev->type = ARPHRD_ATM; 562 dev->type = ARPHRD_ATM;
566 dev->hard_header_len = RFC1483LLC_LEN; 563 dev->hard_header_len = RFC1483LLC_LEN;
567 dev->mtu = RFC1626_MTU; 564 dev->mtu = RFC1626_MTU;
@@ -621,7 +618,7 @@ static int clip_device_event(struct notifier_block *this, unsigned long event,
621 } 618 }
622 619
623 /* ignore non-CLIP devices */ 620 /* ignore non-CLIP devices */
624 if (dev->type != ARPHRD_ATM || dev->hard_start_xmit != clip_start_xmit) 621 if (dev->type != ARPHRD_ATM || dev->netdev_ops != &clip_netdev_ops)
625 return NOTIFY_DONE; 622 return NOTIFY_DONE;
626 623
627 switch (event) { 624 switch (event) {
diff --git a/net/atm/lec.c b/net/atm/lec.c
index e5e301550e8a..199b6bb79f42 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -62,7 +62,6 @@ static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 };
62static int lec_open(struct net_device *dev); 62static int lec_open(struct net_device *dev);
63static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev); 63static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev);
64static int lec_close(struct net_device *dev); 64static int lec_close(struct net_device *dev);
65static struct net_device_stats *lec_get_stats(struct net_device *dev);
66static void lec_init(struct net_device *dev); 65static void lec_init(struct net_device *dev);
67static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, 66static struct lec_arp_table *lec_arp_find(struct lec_priv *priv,
68 const unsigned char *mac_addr); 67 const unsigned char *mac_addr);
@@ -218,28 +217,28 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
218 217
219static int lec_open(struct net_device *dev) 218static int lec_open(struct net_device *dev)
220{ 219{
221 struct lec_priv *priv = netdev_priv(dev);
222
223 netif_start_queue(dev); 220 netif_start_queue(dev);
224 memset(&priv->stats, 0, sizeof(struct net_device_stats)); 221 memset(&dev->stats, 0, sizeof(struct net_device_stats));
225 222
226 return 0; 223 return 0;
227} 224}
228 225
229static __inline__ void 226static void
230lec_send(struct atm_vcc *vcc, struct sk_buff *skb, struct lec_priv *priv) 227lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
231{ 228{
229 struct net_device *dev = skb->dev;
230
232 ATM_SKB(skb)->vcc = vcc; 231 ATM_SKB(skb)->vcc = vcc;
233 ATM_SKB(skb)->atm_options = vcc->atm_options; 232 ATM_SKB(skb)->atm_options = vcc->atm_options;
234 233
235 atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); 234 atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
236 if (vcc->send(vcc, skb) < 0) { 235 if (vcc->send(vcc, skb) < 0) {
237 priv->stats.tx_dropped++; 236 dev->stats.tx_dropped++;
238 return; 237 return;
239 } 238 }
240 239
241 priv->stats.tx_packets++; 240 dev->stats.tx_packets++;
242 priv->stats.tx_bytes += skb->len; 241 dev->stats.tx_bytes += skb->len;
243} 242}
244 243
245static void lec_tx_timeout(struct net_device *dev) 244static void lec_tx_timeout(struct net_device *dev)
@@ -270,7 +269,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
270 pr_debug("lec_start_xmit called\n"); 269 pr_debug("lec_start_xmit called\n");
271 if (!priv->lecd) { 270 if (!priv->lecd) {
272 printk("%s:No lecd attached\n", dev->name); 271 printk("%s:No lecd attached\n", dev->name);
273 priv->stats.tx_errors++; 272 dev->stats.tx_errors++;
274 netif_stop_queue(dev); 273 netif_stop_queue(dev);
275 return -EUNATCH; 274 return -EUNATCH;
276 } 275 }
@@ -345,7 +344,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
345 GFP_ATOMIC); 344 GFP_ATOMIC);
346 dev_kfree_skb(skb); 345 dev_kfree_skb(skb);
347 if (skb2 == NULL) { 346 if (skb2 == NULL) {
348 priv->stats.tx_dropped++; 347 dev->stats.tx_dropped++;
349 return 0; 348 return 0;
350 } 349 }
351 skb = skb2; 350 skb = skb2;
@@ -380,7 +379,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
380 ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", 379 ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ",
381 dev->name); 380 dev->name);
382 pr_debug("MAC address %pM\n", lec_h->h_dest); 381 pr_debug("MAC address %pM\n", lec_h->h_dest);
383 priv->stats.tx_dropped++; 382 dev->stats.tx_dropped++;
384 dev_kfree_skb(skb); 383 dev_kfree_skb(skb);
385 } 384 }
386 goto out; 385 goto out;
@@ -392,10 +391,10 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev)
392 while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) { 391 while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) {
393 pr_debug("lec.c: emptying tx queue, "); 392 pr_debug("lec.c: emptying tx queue, ");
394 pr_debug("MAC address %pM\n", lec_h->h_dest); 393 pr_debug("MAC address %pM\n", lec_h->h_dest);
395 lec_send(vcc, skb2, priv); 394 lec_send(vcc, skb2);
396 } 395 }
397 396
398 lec_send(vcc, skb, priv); 397 lec_send(vcc, skb);
399 398
400 if (!atm_may_send(vcc, 0)) { 399 if (!atm_may_send(vcc, 0)) {
401 struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); 400 struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc);
@@ -427,15 +426,6 @@ static int lec_close(struct net_device *dev)
427 return 0; 426 return 0;
428} 427}
429 428
430/*
431 * Get the current statistics.
432 * This may be called with the card open or closed.
433 */
434static struct net_device_stats *lec_get_stats(struct net_device *dev)
435{
436 return &((struct lec_priv *)netdev_priv(dev))->stats;
437}
438
439static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) 429static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
440{ 430{
441 unsigned long flags; 431 unsigned long flags;
@@ -512,7 +502,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
512 priv->lane2_ops = NULL; 502 priv->lane2_ops = NULL;
513 if (priv->lane_version > 1) 503 if (priv->lane_version > 1)
514 priv->lane2_ops = &lane2_ops; 504 priv->lane2_ops = &lane2_ops;
515 if (dev->change_mtu(dev, mesg->content.config.mtu)) 505 if (dev_set_mtu(dev, mesg->content.config.mtu))
516 printk("%s: change_mtu to %d failed\n", dev->name, 506 printk("%s: change_mtu to %d failed\n", dev->name,
517 mesg->content.config.mtu); 507 mesg->content.config.mtu);
518 priv->is_proxy = mesg->content.config.is_proxy; 508 priv->is_proxy = mesg->content.config.is_proxy;
@@ -677,17 +667,19 @@ static void lec_set_multicast_list(struct net_device *dev)
677 return; 667 return;
678} 668}
679 669
670static const struct net_device_ops lec_netdev_ops = {
671 .ndo_open = lec_open,
672 .ndo_stop = lec_close,
673 .ndo_start_xmit = lec_start_xmit,
674 .ndo_change_mtu = lec_change_mtu,
675 .ndo_tx_timeout = lec_tx_timeout,
676 .ndo_set_multicast_list = lec_set_multicast_list,
677};
678
679
680static void lec_init(struct net_device *dev) 680static void lec_init(struct net_device *dev)
681{ 681{
682 dev->change_mtu = lec_change_mtu; 682 dev->netdev_ops = &lec_netdev_ops;
683 dev->open = lec_open;
684 dev->stop = lec_close;
685 dev->hard_start_xmit = lec_start_xmit;
686 dev->tx_timeout = lec_tx_timeout;
687
688 dev->get_stats = lec_get_stats;
689 dev->set_multicast_list = lec_set_multicast_list;
690 dev->do_ioctl = NULL;
691 printk("%s: Initialized!\n", dev->name); 683 printk("%s: Initialized!\n", dev->name);
692} 684}
693 685
@@ -810,8 +802,8 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb)
810 else 802 else
811#endif 803#endif
812 skb->protocol = eth_type_trans(skb, dev); 804 skb->protocol = eth_type_trans(skb, dev);
813 priv->stats.rx_packets++; 805 dev->stats.rx_packets++;
814 priv->stats.rx_bytes += skb->len; 806 dev->stats.rx_bytes += skb->len;
815 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); 807 memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
816 netif_rx(skb); 808 netif_rx(skb);
817 } 809 }
@@ -1887,7 +1879,7 @@ restart:
1887 lec_arp_hold(entry); 1879 lec_arp_hold(entry);
1888 spin_unlock_irqrestore(&priv->lec_arp_lock, flags); 1880 spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
1889 while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) 1881 while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
1890 lec_send(vcc, skb, entry->priv); 1882 lec_send(vcc, skb);
1891 entry->last_used = jiffies; 1883 entry->last_used = jiffies;
1892 entry->status = ESI_FORWARD_DIRECT; 1884 entry->status = ESI_FORWARD_DIRECT;
1893 lec_arp_put(entry); 1885 lec_arp_put(entry);
@@ -2305,7 +2297,7 @@ restart:
2305 lec_arp_hold(entry); 2297 lec_arp_hold(entry);
2306 spin_unlock_irqrestore(&priv->lec_arp_lock, flags); 2298 spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
2307 while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) 2299 while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
2308 lec_send(vcc, skb, entry->priv); 2300 lec_send(vcc, skb);
2309 entry->last_used = jiffies; 2301 entry->last_used = jiffies;
2310 entry->status = ESI_FORWARD_DIRECT; 2302 entry->status = ESI_FORWARD_DIRECT;
2311 lec_arp_put(entry); 2303 lec_arp_put(entry);
diff --git a/net/atm/lec.h b/net/atm/lec.h
index 0d376682c1a3..9d14d196cc1d 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -69,7 +69,6 @@ struct lane2_ops {
69#define LEC_ARP_TABLE_SIZE 16 69#define LEC_ARP_TABLE_SIZE 16
70 70
71struct lec_priv { 71struct lec_priv {
72 struct net_device_stats stats;
73 unsigned short lecid; /* Lecid of this client */ 72 unsigned short lecid; /* Lecid of this client */
74 struct hlist_head lec_arp_empty_ones; 73 struct hlist_head lec_arp_empty_ones;
75 /* Used for storing VCC's that don't have a MAC address attached yet */ 74 /* Used for storing VCC's that don't have a MAC address attached yet */
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 039d5cc72c3d..e5bf11453a18 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -286,33 +286,32 @@ static void start_mpc(struct mpoa_client *mpc, struct net_device *dev)
286{ 286{
287 287
288 dprintk("mpoa: (%s) start_mpc:\n", mpc->dev->name); 288 dprintk("mpoa: (%s) start_mpc:\n", mpc->dev->name);
289 if (dev->hard_start_xmit == NULL) { 289 if (!dev->netdev_ops)
290 printk("mpoa: (%s) start_mpc: dev->hard_start_xmit == NULL, not starting\n", 290 printk("mpoa: (%s) start_mpc not starting\n", dev->name);
291 dev->name); 291 else {
292 return; 292 mpc->old_ops = dev->netdev_ops;
293 mpc->new_ops = *mpc->old_ops;
294 mpc->new_ops.ndo_start_xmit = mpc_send_packet;
295 dev->netdev_ops = &mpc->new_ops;
293 } 296 }
294 mpc->old_hard_start_xmit = dev->hard_start_xmit;
295 dev->hard_start_xmit = mpc_send_packet;
296
297 return;
298} 297}
299 298
300static void stop_mpc(struct mpoa_client *mpc) 299static void stop_mpc(struct mpoa_client *mpc)
301{ 300{
302 301 struct net_device *dev = mpc->dev;
303 dprintk("mpoa: (%s) stop_mpc:", mpc->dev->name); 302 dprintk("mpoa: (%s) stop_mpc:", mpc->dev->name);
304 303
305 /* Lets not nullify lec device's dev->hard_start_xmit */ 304 /* Lets not nullify lec device's dev->hard_start_xmit */
306 if (mpc->dev->hard_start_xmit != mpc_send_packet) { 305 if (dev->netdev_ops != &mpc->new_ops) {
307 dprintk(" mpc already stopped, not fatal\n"); 306 dprintk(" mpc already stopped, not fatal\n");
308 return; 307 return;
309 } 308 }
310 dprintk("\n"); 309 dprintk("\n");
311 mpc->dev->hard_start_xmit = mpc->old_hard_start_xmit;
312 mpc->old_hard_start_xmit = NULL;
313 /* close_shortcuts(mpc); ??? FIXME */
314 310
315 return; 311 dev->netdev_ops = mpc->old_ops;
312 mpc->old_ops = NULL;
313
314 /* close_shortcuts(mpc); ??? FIXME */
316} 315}
317 316
318static const char *mpoa_device_type_string(char type) __attribute__ ((unused)); 317static const char *mpoa_device_type_string(char type) __attribute__ ((unused));
@@ -531,7 +530,6 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
531 */ 530 */
532static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev) 531static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev)
533{ 532{
534 int retval;
535 struct mpoa_client *mpc; 533 struct mpoa_client *mpc;
536 struct ethhdr *eth; 534 struct ethhdr *eth;
537 int i = 0; 535 int i = 0;
@@ -561,9 +559,7 @@ static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev)
561 } 559 }
562 560
563 non_ip: 561 non_ip:
564 retval = mpc->old_hard_start_xmit(skb,dev); 562 return mpc->old_ops->ndo_start_xmit(skb,dev);
565
566 return retval;
567} 563}
568 564
569static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) 565static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
diff --git a/net/atm/mpc.h b/net/atm/mpc.h
index 24c386c35f57..0919a88bbc70 100644
--- a/net/atm/mpc.h
+++ b/net/atm/mpc.h
@@ -15,7 +15,7 @@ struct mpoa_client {
15 struct mpoa_client *next; 15 struct mpoa_client *next;
16 struct net_device *dev; /* lec in question */ 16 struct net_device *dev; /* lec in question */
17 int dev_num; /* e.g. 2 for lec2 */ 17 int dev_num; /* e.g. 2 for lec2 */
18 int (*old_hard_start_xmit)(struct sk_buff *skb, struct net_device *dev); 18
19 struct atm_vcc *mpoad_vcc; /* control channel to mpoad */ 19 struct atm_vcc *mpoad_vcc; /* control channel to mpoad */
20 uint8_t mps_ctrl_addr[ATM_ESA_LEN]; /* MPS control ATM address */ 20 uint8_t mps_ctrl_addr[ATM_ESA_LEN]; /* MPS control ATM address */
21 uint8_t our_ctrl_addr[ATM_ESA_LEN]; /* MPC's control ATM address */ 21 uint8_t our_ctrl_addr[ATM_ESA_LEN]; /* MPC's control ATM address */
@@ -31,6 +31,9 @@ struct mpoa_client {
31 uint8_t *mps_macs; /* array of MPS MAC addresses, >=1 */ 31 uint8_t *mps_macs; /* array of MPS MAC addresses, >=1 */
32 int number_of_mps_macs; /* number of the above MAC addresses */ 32 int number_of_mps_macs; /* number of the above MAC addresses */
33 struct mpc_parameters parameters; /* parameters for this client */ 33 struct mpc_parameters parameters; /* parameters for this client */
34
35 const struct net_device_ops *old_ops;
36 struct net_device_ops new_ops;
34}; 37};
35 38
36 39
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 00d9e5e13158..fd9d06f291dc 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1529,10 +1529,8 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
1529 dp = ax25->digipeat; 1529 dp = ax25->digipeat;
1530 } 1530 }
1531 1531
1532 SOCK_DEBUG(sk, "AX.25: sendto: Addresses built.\n");
1533
1534 /* Build a packet */ 1532 /* Build a packet */
1535 SOCK_DEBUG(sk, "AX.25: sendto: building packet.\n"); 1533 SOCK_DEBUG(sk, "AX.25: sendto: Addresses built. Building packet.\n");
1536 1534
1537 /* Assume the worst case */ 1535 /* Assume the worst case */
1538 size = len + ax25->ax25_dev->dev->hard_header_len; 1536 size = len + ax25->ax25_dev->dev->hard_header_len;
@@ -1985,9 +1983,8 @@ static const struct proto_ops ax25_proto_ops = {
1985/* 1983/*
1986 * Called by socket.c on kernel start up 1984 * Called by socket.c on kernel start up
1987 */ 1985 */
1988static struct packet_type ax25_packet_type = { 1986static struct packet_type ax25_packet_type __read_mostly = {
1989 .type = __constant_htons(ETH_P_AX25), 1987 .type = cpu_to_be16(ETH_P_AX25),
1990 .dev = NULL, /* All devices */
1991 .func = ax25_kiss_rcv, 1988 .func = ax25_kiss_rcv,
1992}; 1989};
1993 1990
diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c
index 8443af57a374..71338f112108 100644
--- a/net/ax25/ax25_iface.c
+++ b/net/ax25/ax25_iface.c
@@ -61,27 +61,24 @@ void ax25_protocol_release(unsigned int pid)
61 61
62 write_lock_bh(&protocol_list_lock); 62 write_lock_bh(&protocol_list_lock);
63 protocol = protocol_list; 63 protocol = protocol_list;
64 if (protocol == NULL) { 64 if (protocol == NULL)
65 write_unlock_bh(&protocol_list_lock); 65 goto out;
66 return;
67 }
68 66
69 if (protocol->pid == pid) { 67 if (protocol->pid == pid) {
70 protocol_list = protocol->next; 68 protocol_list = protocol->next;
71 write_unlock_bh(&protocol_list_lock); 69 goto out;
72 return;
73 } 70 }
74 71
75 while (protocol != NULL && protocol->next != NULL) { 72 while (protocol != NULL && protocol->next != NULL) {
76 if (protocol->next->pid == pid) { 73 if (protocol->next->pid == pid) {
77 s = protocol->next; 74 s = protocol->next;
78 protocol->next = protocol->next->next; 75 protocol->next = protocol->next->next;
79 write_unlock_bh(&protocol_list_lock); 76 goto out;
80 return;
81 } 77 }
82 78
83 protocol = protocol->next; 79 protocol = protocol->next;
84 } 80 }
81out:
85 write_unlock_bh(&protocol_list_lock); 82 write_unlock_bh(&protocol_list_lock);
86} 83}
87 84
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 744ed3f07ef3..02b9baa1930b 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -41,14 +41,13 @@
41 41
42#include <net/bluetooth/bluetooth.h> 42#include <net/bluetooth/bluetooth.h>
43 43
44#define VERSION "2.14" 44#define VERSION "2.15"
45 45
46/* Bluetooth sockets */ 46/* Bluetooth sockets */
47#define BT_MAX_PROTO 8 47#define BT_MAX_PROTO 8
48static struct net_proto_family *bt_proto[BT_MAX_PROTO]; 48static struct net_proto_family *bt_proto[BT_MAX_PROTO];
49static DEFINE_RWLOCK(bt_proto_lock); 49static DEFINE_RWLOCK(bt_proto_lock);
50 50
51#ifdef CONFIG_DEBUG_LOCK_ALLOC
52static struct lock_class_key bt_lock_key[BT_MAX_PROTO]; 51static struct lock_class_key bt_lock_key[BT_MAX_PROTO];
53static const char *bt_key_strings[BT_MAX_PROTO] = { 52static const char *bt_key_strings[BT_MAX_PROTO] = {
54 "sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP", 53 "sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP",
@@ -86,11 +85,6 @@ static inline void bt_sock_reclassify_lock(struct socket *sock, int proto)
86 bt_slock_key_strings[proto], &bt_slock_key[proto], 85 bt_slock_key_strings[proto], &bt_slock_key[proto],
87 bt_key_strings[proto], &bt_lock_key[proto]); 86 bt_key_strings[proto], &bt_lock_key[proto]);
88} 87}
89#else
90static inline void bt_sock_reclassify_lock(struct socket *sock, int proto)
91{
92}
93#endif
94 88
95int bt_sock_register(int proto, struct net_proto_family *ops) 89int bt_sock_register(int proto, struct net_proto_family *ops)
96{ 90{
@@ -217,7 +211,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
217 continue; 211 continue;
218 } 212 }
219 213
220 if (sk->sk_state == BT_CONNECTED || !newsock) { 214 if (sk->sk_state == BT_CONNECTED || !newsock ||
215 bt_sk(parent)->defer_setup) {
221 bt_accept_unlink(sk); 216 bt_accept_unlink(sk);
222 if (newsock) 217 if (newsock)
223 sock_graft(sk, newsock); 218 sock_graft(sk, newsock);
@@ -232,7 +227,7 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock)
232EXPORT_SYMBOL(bt_accept_dequeue); 227EXPORT_SYMBOL(bt_accept_dequeue);
233 228
234int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, 229int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
235 struct msghdr *msg, size_t len, int flags) 230 struct msghdr *msg, size_t len, int flags)
236{ 231{
237 int noblock = flags & MSG_DONTWAIT; 232 int noblock = flags & MSG_DONTWAIT;
238 struct sock *sk = sock->sk; 233 struct sock *sk = sock->sk;
@@ -277,7 +272,9 @@ static inline unsigned int bt_accept_poll(struct sock *parent)
277 272
278 list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { 273 list_for_each_safe(p, n, &bt_sk(parent)->accept_q) {
279 sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); 274 sk = (struct sock *) list_entry(p, struct bt_sock, accept_q);
280 if (sk->sk_state == BT_CONNECTED) 275 if (sk->sk_state == BT_CONNECTED ||
276 (bt_sk(parent)->defer_setup &&
277 sk->sk_state == BT_CONNECT2))
281 return POLLIN | POLLRDNORM; 278 return POLLIN | POLLRDNORM;
282 } 279 }
283 280
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index c9cac7719efe..0073ec8495da 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -126,8 +126,7 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const
126 126
127 session->reassembly[id] = nskb; 127 session->reassembly[id] = nskb;
128 128
129 if (skb) 129 kfree_skb(skb);
130 kfree_skb(skb);
131} 130}
132 131
133static inline int cmtp_recv_frame(struct cmtp_session *session, struct sk_buff *skb) 132static inline int cmtp_recv_frame(struct cmtp_session *session, struct sk_buff *skb)
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index a4a789f24c8d..1181db08d9de 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -123,6 +123,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle)
123 conn->state = BT_CONNECT; 123 conn->state = BT_CONNECT;
124 conn->out = 1; 124 conn->out = 1;
125 125
126 conn->attempt++;
127
126 cp.handle = cpu_to_le16(handle); 128 cp.handle = cpu_to_le16(handle);
127 cp.pkt_type = cpu_to_le16(conn->pkt_type); 129 cp.pkt_type = cpu_to_le16(conn->pkt_type);
128 130
@@ -139,6 +141,8 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle)
139 conn->state = BT_CONNECT; 141 conn->state = BT_CONNECT;
140 conn->out = 1; 142 conn->out = 1;
141 143
144 conn->attempt++;
145
142 cp.handle = cpu_to_le16(handle); 146 cp.handle = cpu_to_le16(handle);
143 cp.pkt_type = cpu_to_le16(conn->pkt_type); 147 cp.pkt_type = cpu_to_le16(conn->pkt_type);
144 148
@@ -155,6 +159,7 @@ static void hci_conn_timeout(unsigned long arg)
155{ 159{
156 struct hci_conn *conn = (void *) arg; 160 struct hci_conn *conn = (void *) arg;
157 struct hci_dev *hdev = conn->hdev; 161 struct hci_dev *hdev = conn->hdev;
162 __u8 reason;
158 163
159 BT_DBG("conn %p state %d", conn, conn->state); 164 BT_DBG("conn %p state %d", conn, conn->state);
160 165
@@ -173,7 +178,8 @@ static void hci_conn_timeout(unsigned long arg)
173 break; 178 break;
174 case BT_CONFIG: 179 case BT_CONFIG:
175 case BT_CONNECTED: 180 case BT_CONNECTED:
176 hci_acl_disconn(conn, 0x13); 181 reason = hci_proto_disconn_ind(conn);
182 hci_acl_disconn(conn, reason);
177 break; 183 break;
178 default: 184 default:
179 conn->state = BT_CLOSED; 185 conn->state = BT_CLOSED;
@@ -216,12 +222,13 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst)
216 break; 222 break;
217 case SCO_LINK: 223 case SCO_LINK:
218 if (lmp_esco_capable(hdev)) 224 if (lmp_esco_capable(hdev))
219 conn->pkt_type = hdev->esco_type & SCO_ESCO_MASK; 225 conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
226 (hdev->esco_type & EDR_ESCO_MASK);
220 else 227 else
221 conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK; 228 conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK;
222 break; 229 break;
223 case ESCO_LINK: 230 case ESCO_LINK:
224 conn->pkt_type = hdev->esco_type; 231 conn->pkt_type = hdev->esco_type & ~EDR_ESCO_MASK;
225 break; 232 break;
226 } 233 }
227 234
@@ -280,6 +287,8 @@ int hci_conn_del(struct hci_conn *conn)
280 287
281 skb_queue_purge(&conn->data_q); 288 skb_queue_purge(&conn->data_q);
282 289
290 hci_conn_del_sysfs(conn);
291
283 return 0; 292 return 0;
284} 293}
285 294
@@ -325,7 +334,7 @@ EXPORT_SYMBOL(hci_get_route);
325 334
326/* Create SCO or ACL connection. 335/* Create SCO or ACL connection.
327 * Device _must_ be locked */ 336 * Device _must_ be locked */
328struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 auth_type) 337struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type)
329{ 338{
330 struct hci_conn *acl; 339 struct hci_conn *acl;
331 struct hci_conn *sco; 340 struct hci_conn *sco;
@@ -340,6 +349,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8
340 hci_conn_hold(acl); 349 hci_conn_hold(acl);
341 350
342 if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { 351 if (acl->state == BT_OPEN || acl->state == BT_CLOSED) {
352 acl->sec_level = sec_level;
343 acl->auth_type = auth_type; 353 acl->auth_type = auth_type;
344 hci_acl_connect(acl); 354 hci_acl_connect(acl);
345 } 355 }
@@ -385,51 +395,59 @@ int hci_conn_check_link_mode(struct hci_conn *conn)
385EXPORT_SYMBOL(hci_conn_check_link_mode); 395EXPORT_SYMBOL(hci_conn_check_link_mode);
386 396
387/* Authenticate remote device */ 397/* Authenticate remote device */
388int hci_conn_auth(struct hci_conn *conn) 398static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
389{ 399{
390 BT_DBG("conn %p", conn); 400 BT_DBG("conn %p", conn);
391 401
392 if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) { 402 if (sec_level > conn->sec_level)
393 if (!(conn->auth_type & 0x01)) { 403 conn->sec_level = sec_level;
394 conn->auth_type |= 0x01; 404 else if (conn->link_mode & HCI_LM_AUTH)
395 conn->link_mode &= ~HCI_LM_AUTH;
396 }
397 }
398
399 if (conn->link_mode & HCI_LM_AUTH)
400 return 1; 405 return 1;
401 406
407 conn->auth_type = auth_type;
408
402 if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { 409 if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) {
403 struct hci_cp_auth_requested cp; 410 struct hci_cp_auth_requested cp;
404 cp.handle = cpu_to_le16(conn->handle); 411 cp.handle = cpu_to_le16(conn->handle);
405 hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, 412 hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED,
406 sizeof(cp), &cp); 413 sizeof(cp), &cp);
407 } 414 }
415
408 return 0; 416 return 0;
409} 417}
410EXPORT_SYMBOL(hci_conn_auth);
411 418
412/* Enable encryption */ 419/* Enable security */
413int hci_conn_encrypt(struct hci_conn *conn) 420int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type)
414{ 421{
415 BT_DBG("conn %p", conn); 422 BT_DBG("conn %p", conn);
416 423
424 if (sec_level == BT_SECURITY_SDP)
425 return 1;
426
427 if (sec_level == BT_SECURITY_LOW) {
428 if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0)
429 return hci_conn_auth(conn, sec_level, auth_type);
430 else
431 return 1;
432 }
433
417 if (conn->link_mode & HCI_LM_ENCRYPT) 434 if (conn->link_mode & HCI_LM_ENCRYPT)
418 return hci_conn_auth(conn); 435 return hci_conn_auth(conn, sec_level, auth_type);
419 436
420 if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) 437 if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend))
421 return 0; 438 return 0;
422 439
423 if (hci_conn_auth(conn)) { 440 if (hci_conn_auth(conn, sec_level, auth_type)) {
424 struct hci_cp_set_conn_encrypt cp; 441 struct hci_cp_set_conn_encrypt cp;
425 cp.handle = cpu_to_le16(conn->handle); 442 cp.handle = cpu_to_le16(conn->handle);
426 cp.encrypt = 1; 443 cp.encrypt = 1;
427 hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, 444 hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT,
428 sizeof(cp), &cp); 445 sizeof(cp), &cp);
429 } 446 }
447
430 return 0; 448 return 0;
431} 449}
432EXPORT_SYMBOL(hci_conn_encrypt); 450EXPORT_SYMBOL(hci_conn_security);
433 451
434/* Change link key */ 452/* Change link key */
435int hci_conn_change_link_key(struct hci_conn *conn) 453int hci_conn_change_link_key(struct hci_conn *conn)
@@ -442,12 +460,13 @@ int hci_conn_change_link_key(struct hci_conn *conn)
442 hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, 460 hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY,
443 sizeof(cp), &cp); 461 sizeof(cp), &cp);
444 } 462 }
463
445 return 0; 464 return 0;
446} 465}
447EXPORT_SYMBOL(hci_conn_change_link_key); 466EXPORT_SYMBOL(hci_conn_change_link_key);
448 467
449/* Switch role */ 468/* Switch role */
450int hci_conn_switch_role(struct hci_conn *conn, uint8_t role) 469int hci_conn_switch_role(struct hci_conn *conn, __u8 role)
451{ 470{
452 BT_DBG("conn %p", conn); 471 BT_DBG("conn %p", conn);
453 472
@@ -460,6 +479,7 @@ int hci_conn_switch_role(struct hci_conn *conn, uint8_t role)
460 cp.role = role; 479 cp.role = role;
461 hci_send_cmd(conn->hdev, HCI_OP_SWITCH_ROLE, sizeof(cp), &cp); 480 hci_send_cmd(conn->hdev, HCI_OP_SWITCH_ROLE, sizeof(cp), &cp);
462 } 481 }
482
463 return 0; 483 return 0;
464} 484}
465EXPORT_SYMBOL(hci_conn_switch_role); 485EXPORT_SYMBOL(hci_conn_switch_role);
@@ -542,9 +562,7 @@ void hci_conn_hash_flush(struct hci_dev *hdev)
542 562
543 c->state = BT_CLOSED; 563 c->state = BT_CLOSED;
544 564
545 hci_conn_del_sysfs(c); 565 hci_proto_disconn_cfm(c, 0x16);
546
547 hci_proto_disconn_ind(c, 0x16);
548 hci_conn_del(c); 566 hci_conn_del(c);
549 } 567 }
550} 568}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index ba78cc1eb8d9..cd061510b6bd 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1565,8 +1565,7 @@ static void hci_cmd_task(unsigned long arg)
1565 1565
1566 /* Send queued commands */ 1566 /* Send queued commands */
1567 if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) { 1567 if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) {
1568 if (hdev->sent_cmd) 1568 kfree_skb(hdev->sent_cmd);
1569 kfree_skb(hdev->sent_cmd);
1570 1569
1571 if ((hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC))) { 1570 if ((hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC))) {
1572 atomic_dec(&hdev->cmd_cnt); 1571 atomic_dec(&hdev->cmd_cnt);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index f91ba690f5d2..55534244c3a0 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -484,6 +484,15 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, struct sk_buff *skb
484 if (hdev->features[4] & LMP_EV5) 484 if (hdev->features[4] & LMP_EV5)
485 hdev->esco_type |= (ESCO_EV5); 485 hdev->esco_type |= (ESCO_EV5);
486 486
487 if (hdev->features[5] & LMP_EDR_ESCO_2M)
488 hdev->esco_type |= (ESCO_2EV3);
489
490 if (hdev->features[5] & LMP_EDR_ESCO_3M)
491 hdev->esco_type |= (ESCO_3EV3);
492
493 if (hdev->features[5] & LMP_EDR_3S_ESCO)
494 hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5);
495
487 BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name, 496 BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name,
488 hdev->features[0], hdev->features[1], 497 hdev->features[0], hdev->features[1],
489 hdev->features[2], hdev->features[3], 498 hdev->features[2], hdev->features[3],
@@ -914,7 +923,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
914 if (ev->status) { 923 if (ev->status) {
915 hci_proto_connect_cfm(conn, ev->status); 924 hci_proto_connect_cfm(conn, ev->status);
916 hci_conn_del(conn); 925 hci_conn_del(conn);
917 } 926 } else if (ev->link_type != ACL_LINK)
927 hci_proto_connect_cfm(conn, ev->status);
918 928
919unlock: 929unlock:
920 hci_dev_unlock(hdev); 930 hci_dev_unlock(hdev);
@@ -1009,9 +1019,7 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff
1009 if (conn) { 1019 if (conn) {
1010 conn->state = BT_CLOSED; 1020 conn->state = BT_CLOSED;
1011 1021
1012 hci_conn_del_sysfs(conn); 1022 hci_proto_disconn_cfm(conn, ev->reason);
1013
1014 hci_proto_disconn_ind(conn, ev->reason);
1015 hci_conn_del(conn); 1023 hci_conn_del(conn);
1016 } 1024 }
1017 1025
@@ -1600,7 +1608,8 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b
1600 1608
1601 if (conn->state == BT_CONFIG) { 1609 if (conn->state == BT_CONFIG) {
1602 if (!ev->status && hdev->ssp_mode > 0 && 1610 if (!ev->status && hdev->ssp_mode > 0 &&
1603 conn->ssp_mode > 0 && conn->out) { 1611 conn->ssp_mode > 0 && conn->out &&
1612 conn->sec_level != BT_SECURITY_SDP) {
1604 struct hci_cp_auth_requested cp; 1613 struct hci_cp_auth_requested cp;
1605 cp.handle = ev->handle; 1614 cp.handle = ev->handle;
1606 hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, 1615 hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED,
@@ -1637,6 +1646,13 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu
1637 conn->type = SCO_LINK; 1646 conn->type = SCO_LINK;
1638 } 1647 }
1639 1648
1649 if (conn->out && ev->status == 0x1c && conn->attempt < 2) {
1650 conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) |
1651 (hdev->esco_type & EDR_ESCO_MASK);
1652 hci_setup_sync(conn, conn->link->handle);
1653 goto unlock;
1654 }
1655
1640 if (!ev->status) { 1656 if (!ev->status) {
1641 conn->handle = __le16_to_cpu(ev->handle); 1657 conn->handle = __le16_to_cpu(ev->handle);
1642 conn->state = BT_CONNECTED; 1658 conn->state = BT_CONNECTED;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 1a1f916be44e..ed82796d4a0f 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -140,7 +140,7 @@ static void del_conn(struct work_struct *work)
140 dev = device_find_child(&conn->dev, NULL, __match_tty); 140 dev = device_find_child(&conn->dev, NULL, __match_tty);
141 if (!dev) 141 if (!dev)
142 break; 142 break;
143 device_move(dev, NULL); 143 device_move(dev, NULL, DPM_ORDER_DEV_LAST);
144 put_device(dev); 144 put_device(dev);
145 } 145 }
146 146
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index b93748e224ff..ca4d3b40d5ce 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -50,9 +50,10 @@
50#include <net/bluetooth/hci_core.h> 50#include <net/bluetooth/hci_core.h>
51#include <net/bluetooth/l2cap.h> 51#include <net/bluetooth/l2cap.h>
52 52
53#define VERSION "2.11" 53#define VERSION "2.13"
54 54
55static u32 l2cap_feat_mask = 0x0000; 55static u32 l2cap_feat_mask = 0x0080;
56static u8 l2cap_fixed_chan[8] = { 0x02, };
56 57
57static const struct proto_ops l2cap_sock_ops; 58static const struct proto_ops l2cap_sock_ops;
58 59
@@ -77,9 +78,10 @@ static void l2cap_sock_timeout(unsigned long arg)
77 78
78 bh_lock_sock(sk); 79 bh_lock_sock(sk);
79 80
80 if (sk->sk_state == BT_CONNECT && 81 if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG)
81 (l2cap_pi(sk)->link_mode & (L2CAP_LM_AUTH | 82 reason = ECONNREFUSED;
82 L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE))) 83 else if (sk->sk_state == BT_CONNECT &&
84 l2cap_pi(sk)->sec_level != BT_SECURITY_SDP)
83 reason = ECONNREFUSED; 85 reason = ECONNREFUSED;
84 else 86 else
85 reason = ETIMEDOUT; 87 reason = ETIMEDOUT;
@@ -204,6 +206,8 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so
204 206
205 BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid); 207 BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid);
206 208
209 conn->disc_reason = 0x13;
210
207 l2cap_pi(sk)->conn = conn; 211 l2cap_pi(sk)->conn = conn;
208 212
209 if (sk->sk_type == SOCK_SEQPACKET) { 213 if (sk->sk_type == SOCK_SEQPACKET) {
@@ -259,18 +263,35 @@ static void l2cap_chan_del(struct sock *sk, int err)
259} 263}
260 264
261/* Service level security */ 265/* Service level security */
262static inline int l2cap_check_link_mode(struct sock *sk) 266static inline int l2cap_check_security(struct sock *sk)
263{ 267{
264 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 268 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
269 __u8 auth_type;
265 270
266 if ((l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) || 271 if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) {
267 (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) 272 if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH)
268 return hci_conn_encrypt(conn->hcon); 273 auth_type = HCI_AT_NO_BONDING_MITM;
274 else
275 auth_type = HCI_AT_NO_BONDING;
269 276
270 if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH) 277 if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW)
271 return hci_conn_auth(conn->hcon); 278 l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
279 } else {
280 switch (l2cap_pi(sk)->sec_level) {
281 case BT_SECURITY_HIGH:
282 auth_type = HCI_AT_GENERAL_BONDING_MITM;
283 break;
284 case BT_SECURITY_MEDIUM:
285 auth_type = HCI_AT_GENERAL_BONDING;
286 break;
287 default:
288 auth_type = HCI_AT_NO_BONDING;
289 break;
290 }
291 }
272 292
273 return 1; 293 return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level,
294 auth_type);
274} 295}
275 296
276static inline u8 l2cap_get_ident(struct l2cap_conn *conn) 297static inline u8 l2cap_get_ident(struct l2cap_conn *conn)
@@ -312,7 +333,10 @@ static void l2cap_do_start(struct sock *sk)
312 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 333 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
313 334
314 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { 335 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) {
315 if (l2cap_check_link_mode(sk)) { 336 if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE))
337 return;
338
339 if (l2cap_check_security(sk)) {
316 struct l2cap_conn_req req; 340 struct l2cap_conn_req req;
317 req.scid = cpu_to_le16(l2cap_pi(sk)->scid); 341 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
318 req.psm = l2cap_pi(sk)->psm; 342 req.psm = l2cap_pi(sk)->psm;
@@ -356,7 +380,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
356 } 380 }
357 381
358 if (sk->sk_state == BT_CONNECT) { 382 if (sk->sk_state == BT_CONNECT) {
359 if (l2cap_check_link_mode(sk)) { 383 if (l2cap_check_security(sk)) {
360 struct l2cap_conn_req req; 384 struct l2cap_conn_req req;
361 req.scid = cpu_to_le16(l2cap_pi(sk)->scid); 385 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
362 req.psm = l2cap_pi(sk)->psm; 386 req.psm = l2cap_pi(sk)->psm;
@@ -371,10 +395,18 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
371 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); 395 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
372 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); 396 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
373 397
374 if (l2cap_check_link_mode(sk)) { 398 if (l2cap_check_security(sk)) {
375 sk->sk_state = BT_CONFIG; 399 if (bt_sk(sk)->defer_setup) {
376 rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); 400 struct sock *parent = bt_sk(sk)->parent;
377 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); 401 rsp.result = cpu_to_le16(L2CAP_CR_PEND);
402 rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND);
403 parent->sk_data_ready(parent, 0);
404
405 } else {
406 sk->sk_state = BT_CONFIG;
407 rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
408 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
409 }
378 } else { 410 } else {
379 rsp.result = cpu_to_le16(L2CAP_CR_PEND); 411 rsp.result = cpu_to_le16(L2CAP_CR_PEND);
380 rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND); 412 rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND);
@@ -426,7 +458,7 @@ static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err)
426 read_lock(&l->lock); 458 read_lock(&l->lock);
427 459
428 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { 460 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
429 if (l2cap_pi(sk)->link_mode & L2CAP_LM_RELIABLE) 461 if (l2cap_pi(sk)->force_reliable)
430 sk->sk_err = err; 462 sk->sk_err = err;
431 } 463 }
432 464
@@ -437,6 +469,7 @@ static void l2cap_info_timeout(unsigned long arg)
437{ 469{
438 struct l2cap_conn *conn = (void *) arg; 470 struct l2cap_conn *conn = (void *) arg;
439 471
472 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
440 conn->info_ident = 0; 473 conn->info_ident = 0;
441 474
442 l2cap_conn_start(conn); 475 l2cap_conn_start(conn);
@@ -470,6 +503,8 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status)
470 spin_lock_init(&conn->lock); 503 spin_lock_init(&conn->lock);
471 rwlock_init(&conn->chan_list.lock); 504 rwlock_init(&conn->chan_list.lock);
472 505
506 conn->disc_reason = 0x13;
507
473 return conn; 508 return conn;
474} 509}
475 510
@@ -483,8 +518,7 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err)
483 518
484 BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); 519 BT_DBG("hcon %p conn %p, err %d", hcon, conn, err);
485 520
486 if (conn->rx_skb) 521 kfree_skb(conn->rx_skb);
487 kfree_skb(conn->rx_skb);
488 522
489 /* Kill channels */ 523 /* Kill channels */
490 while ((sk = conn->chan_list.head)) { 524 while ((sk = conn->chan_list.head)) {
@@ -608,7 +642,6 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
608 642
609 case BT_CONNECTED: 643 case BT_CONNECTED:
610 case BT_CONFIG: 644 case BT_CONFIG:
611 case BT_CONNECT2:
612 if (sk->sk_type == SOCK_SEQPACKET) { 645 if (sk->sk_type == SOCK_SEQPACKET) {
613 struct l2cap_conn *conn = l2cap_pi(sk)->conn; 646 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
614 struct l2cap_disconn_req req; 647 struct l2cap_disconn_req req;
@@ -624,6 +657,27 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
624 l2cap_chan_del(sk, reason); 657 l2cap_chan_del(sk, reason);
625 break; 658 break;
626 659
660 case BT_CONNECT2:
661 if (sk->sk_type == SOCK_SEQPACKET) {
662 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
663 struct l2cap_conn_rsp rsp;
664 __u16 result;
665
666 if (bt_sk(sk)->defer_setup)
667 result = L2CAP_CR_SEC_BLOCK;
668 else
669 result = L2CAP_CR_BAD_PSM;
670
671 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
672 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
673 rsp.result = cpu_to_le16(result);
674 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
675 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
676 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
677 } else
678 l2cap_chan_del(sk, reason);
679 break;
680
627 case BT_CONNECT: 681 case BT_CONNECT:
628 case BT_DISCONN: 682 case BT_DISCONN:
629 l2cap_chan_del(sk, reason); 683 l2cap_chan_del(sk, reason);
@@ -653,13 +707,19 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
653 707
654 if (parent) { 708 if (parent) {
655 sk->sk_type = parent->sk_type; 709 sk->sk_type = parent->sk_type;
710 bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup;
711
656 pi->imtu = l2cap_pi(parent)->imtu; 712 pi->imtu = l2cap_pi(parent)->imtu;
657 pi->omtu = l2cap_pi(parent)->omtu; 713 pi->omtu = l2cap_pi(parent)->omtu;
658 pi->link_mode = l2cap_pi(parent)->link_mode; 714 pi->sec_level = l2cap_pi(parent)->sec_level;
715 pi->role_switch = l2cap_pi(parent)->role_switch;
716 pi->force_reliable = l2cap_pi(parent)->force_reliable;
659 } else { 717 } else {
660 pi->imtu = L2CAP_DEFAULT_MTU; 718 pi->imtu = L2CAP_DEFAULT_MTU;
661 pi->omtu = 0; 719 pi->omtu = 0;
662 pi->link_mode = 0; 720 pi->sec_level = BT_SECURITY_LOW;
721 pi->role_switch = 0;
722 pi->force_reliable = 0;
663 } 723 }
664 724
665 /* Default config options */ 725 /* Default config options */
@@ -723,17 +783,24 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol)
723 return 0; 783 return 0;
724} 784}
725 785
726static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) 786static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
727{ 787{
728 struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
729 struct sock *sk = sock->sk; 788 struct sock *sk = sock->sk;
730 int err = 0; 789 struct sockaddr_l2 la;
790 int len, err = 0;
731 791
732 BT_DBG("sk %p, %s %d", sk, batostr(&la->l2_bdaddr), la->l2_psm); 792 BT_DBG("sk %p", sk);
733 793
734 if (!addr || addr->sa_family != AF_BLUETOOTH) 794 if (!addr || addr->sa_family != AF_BLUETOOTH)
735 return -EINVAL; 795 return -EINVAL;
736 796
797 memset(&la, 0, sizeof(la));
798 len = min_t(unsigned int, sizeof(la), alen);
799 memcpy(&la, addr, len);
800
801 if (la.l2_cid)
802 return -EINVAL;
803
737 lock_sock(sk); 804 lock_sock(sk);
738 805
739 if (sk->sk_state != BT_OPEN) { 806 if (sk->sk_state != BT_OPEN) {
@@ -741,7 +808,7 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_
741 goto done; 808 goto done;
742 } 809 }
743 810
744 if (la->l2_psm && btohs(la->l2_psm) < 0x1001 && 811 if (la.l2_psm && btohs(la.l2_psm) < 0x1001 &&
745 !capable(CAP_NET_BIND_SERVICE)) { 812 !capable(CAP_NET_BIND_SERVICE)) {
746 err = -EACCES; 813 err = -EACCES;
747 goto done; 814 goto done;
@@ -749,14 +816,17 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_
749 816
750 write_lock_bh(&l2cap_sk_list.lock); 817 write_lock_bh(&l2cap_sk_list.lock);
751 818
752 if (la->l2_psm && __l2cap_get_sock_by_addr(la->l2_psm, &la->l2_bdaddr)) { 819 if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) {
753 err = -EADDRINUSE; 820 err = -EADDRINUSE;
754 } else { 821 } else {
755 /* Save source address */ 822 /* Save source address */
756 bacpy(&bt_sk(sk)->src, &la->l2_bdaddr); 823 bacpy(&bt_sk(sk)->src, &la.l2_bdaddr);
757 l2cap_pi(sk)->psm = la->l2_psm; 824 l2cap_pi(sk)->psm = la.l2_psm;
758 l2cap_pi(sk)->sport = la->l2_psm; 825 l2cap_pi(sk)->sport = la.l2_psm;
759 sk->sk_state = BT_BOUND; 826 sk->sk_state = BT_BOUND;
827
828 if (btohs(la.l2_psm) == 0x0001 || btohs(la.l2_psm) == 0x0003)
829 l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
760 } 830 }
761 831
762 write_unlock_bh(&l2cap_sk_list.lock); 832 write_unlock_bh(&l2cap_sk_list.lock);
@@ -776,7 +846,8 @@ static int l2cap_do_connect(struct sock *sk)
776 __u8 auth_type; 846 __u8 auth_type;
777 int err = 0; 847 int err = 0;
778 848
779 BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst), l2cap_pi(sk)->psm); 849 BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst),
850 l2cap_pi(sk)->psm);
780 851
781 if (!(hdev = hci_get_route(dst, src))) 852 if (!(hdev = hci_get_route(dst, src)))
782 return -EHOSTUNREACH; 853 return -EHOSTUNREACH;
@@ -785,21 +856,42 @@ static int l2cap_do_connect(struct sock *sk)
785 856
786 err = -ENOMEM; 857 err = -ENOMEM;
787 858
788 if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH || 859 if (sk->sk_type == SOCK_RAW) {
789 l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT || 860 switch (l2cap_pi(sk)->sec_level) {
790 l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) { 861 case BT_SECURITY_HIGH:
791 if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) 862 auth_type = HCI_AT_DEDICATED_BONDING_MITM;
863 break;
864 case BT_SECURITY_MEDIUM:
865 auth_type = HCI_AT_DEDICATED_BONDING;
866 break;
867 default:
868 auth_type = HCI_AT_NO_BONDING;
869 break;
870 }
871 } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) {
872 if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH)
792 auth_type = HCI_AT_NO_BONDING_MITM; 873 auth_type = HCI_AT_NO_BONDING_MITM;
793 else 874 else
794 auth_type = HCI_AT_GENERAL_BONDING_MITM;
795 } else {
796 if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001))
797 auth_type = HCI_AT_NO_BONDING; 875 auth_type = HCI_AT_NO_BONDING;
798 else 876
877 if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW)
878 l2cap_pi(sk)->sec_level = BT_SECURITY_SDP;
879 } else {
880 switch (l2cap_pi(sk)->sec_level) {
881 case BT_SECURITY_HIGH:
882 auth_type = HCI_AT_GENERAL_BONDING_MITM;
883 break;
884 case BT_SECURITY_MEDIUM:
799 auth_type = HCI_AT_GENERAL_BONDING; 885 auth_type = HCI_AT_GENERAL_BONDING;
886 break;
887 default:
888 auth_type = HCI_AT_NO_BONDING;
889 break;
890 }
800 } 891 }
801 892
802 hcon = hci_connect(hdev, ACL_LINK, dst, auth_type); 893 hcon = hci_connect(hdev, ACL_LINK, dst,
894 l2cap_pi(sk)->sec_level, auth_type);
803 if (!hcon) 895 if (!hcon)
804 goto done; 896 goto done;
805 897
@@ -835,20 +927,25 @@ done:
835 927
836static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) 928static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)
837{ 929{
838 struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr;
839 struct sock *sk = sock->sk; 930 struct sock *sk = sock->sk;
840 int err = 0; 931 struct sockaddr_l2 la;
841 932 int len, err = 0;
842 lock_sock(sk);
843 933
844 BT_DBG("sk %p", sk); 934 BT_DBG("sk %p", sk);
845 935
846 if (addr->sa_family != AF_BLUETOOTH || alen < sizeof(struct sockaddr_l2)) { 936 if (!addr || addr->sa_family != AF_BLUETOOTH)
847 err = -EINVAL; 937 return -EINVAL;
848 goto done; 938
849 } 939 memset(&la, 0, sizeof(la));
940 len = min_t(unsigned int, sizeof(la), alen);
941 memcpy(&la, addr, len);
942
943 if (la.l2_cid)
944 return -EINVAL;
945
946 lock_sock(sk);
850 947
851 if (sk->sk_type == SOCK_SEQPACKET && !la->l2_psm) { 948 if (sk->sk_type == SOCK_SEQPACKET && !la.l2_psm) {
852 err = -EINVAL; 949 err = -EINVAL;
853 goto done; 950 goto done;
854 } 951 }
@@ -875,8 +972,8 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
875 } 972 }
876 973
877 /* Set destination address and psm */ 974 /* Set destination address and psm */
878 bacpy(&bt_sk(sk)->dst, &la->l2_bdaddr); 975 bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr);
879 l2cap_pi(sk)->psm = la->l2_psm; 976 l2cap_pi(sk)->psm = la.l2_psm;
880 977
881 if ((err = l2cap_do_connect(sk))) 978 if ((err = l2cap_do_connect(sk)))
882 goto done; 979 goto done;
@@ -1000,12 +1097,16 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
1000 addr->sa_family = AF_BLUETOOTH; 1097 addr->sa_family = AF_BLUETOOTH;
1001 *len = sizeof(struct sockaddr_l2); 1098 *len = sizeof(struct sockaddr_l2);
1002 1099
1003 if (peer) 1100 if (peer) {
1101 la->l2_psm = l2cap_pi(sk)->psm;
1004 bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst); 1102 bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst);
1005 else 1103 la->l2_cid = htobs(l2cap_pi(sk)->dcid);
1104 } else {
1105 la->l2_psm = l2cap_pi(sk)->sport;
1006 bacpy(&la->l2_bdaddr, &bt_sk(sk)->src); 1106 bacpy(&la->l2_bdaddr, &bt_sk(sk)->src);
1107 la->l2_cid = htobs(l2cap_pi(sk)->scid);
1108 }
1007 1109
1008 la->l2_psm = l2cap_pi(sk)->psm;
1009 return 0; 1110 return 0;
1010} 1111}
1011 1112
@@ -1106,11 +1207,38 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
1106 return err; 1207 return err;
1107} 1208}
1108 1209
1109static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) 1210static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags)
1211{
1212 struct sock *sk = sock->sk;
1213
1214 lock_sock(sk);
1215
1216 if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) {
1217 struct l2cap_conn_rsp rsp;
1218
1219 sk->sk_state = BT_CONFIG;
1220
1221 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid);
1222 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid);
1223 rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS);
1224 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO);
1225 l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident,
1226 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
1227
1228 release_sock(sk);
1229 return 0;
1230 }
1231
1232 release_sock(sk);
1233
1234 return bt_sock_recvmsg(iocb, sock, msg, len, flags);
1235}
1236
1237static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, int optlen)
1110{ 1238{
1111 struct sock *sk = sock->sk; 1239 struct sock *sk = sock->sk;
1112 struct l2cap_options opts; 1240 struct l2cap_options opts;
1113 int err = 0, len; 1241 int len, err = 0;
1114 u32 opt; 1242 u32 opt;
1115 1243
1116 BT_DBG("sk %p", sk); 1244 BT_DBG("sk %p", sk);
@@ -1140,7 +1268,15 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch
1140 break; 1268 break;
1141 } 1269 }
1142 1270
1143 l2cap_pi(sk)->link_mode = opt; 1271 if (opt & L2CAP_LM_AUTH)
1272 l2cap_pi(sk)->sec_level = BT_SECURITY_LOW;
1273 if (opt & L2CAP_LM_ENCRYPT)
1274 l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM;
1275 if (opt & L2CAP_LM_SECURE)
1276 l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH;
1277
1278 l2cap_pi(sk)->role_switch = (opt & L2CAP_LM_MASTER);
1279 l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE);
1144 break; 1280 break;
1145 1281
1146 default: 1282 default:
@@ -1152,12 +1288,77 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch
1152 return err; 1288 return err;
1153} 1289}
1154 1290
1155static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) 1291static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1292{
1293 struct sock *sk = sock->sk;
1294 struct bt_security sec;
1295 int len, err = 0;
1296 u32 opt;
1297
1298 BT_DBG("sk %p", sk);
1299
1300 if (level == SOL_L2CAP)
1301 return l2cap_sock_setsockopt_old(sock, optname, optval, optlen);
1302
1303 if (level != SOL_BLUETOOTH)
1304 return -ENOPROTOOPT;
1305
1306 lock_sock(sk);
1307
1308 switch (optname) {
1309 case BT_SECURITY:
1310 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) {
1311 err = -EINVAL;
1312 break;
1313 }
1314
1315 sec.level = BT_SECURITY_LOW;
1316
1317 len = min_t(unsigned int, sizeof(sec), optlen);
1318 if (copy_from_user((char *) &sec, optval, len)) {
1319 err = -EFAULT;
1320 break;
1321 }
1322
1323 if (sec.level < BT_SECURITY_LOW ||
1324 sec.level > BT_SECURITY_HIGH) {
1325 err = -EINVAL;
1326 break;
1327 }
1328
1329 l2cap_pi(sk)->sec_level = sec.level;
1330 break;
1331
1332 case BT_DEFER_SETUP:
1333 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
1334 err = -EINVAL;
1335 break;
1336 }
1337
1338 if (get_user(opt, (u32 __user *) optval)) {
1339 err = -EFAULT;
1340 break;
1341 }
1342
1343 bt_sk(sk)->defer_setup = opt;
1344 break;
1345
1346 default:
1347 err = -ENOPROTOOPT;
1348 break;
1349 }
1350
1351 release_sock(sk);
1352 return err;
1353}
1354
1355static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
1156{ 1356{
1157 struct sock *sk = sock->sk; 1357 struct sock *sk = sock->sk;
1158 struct l2cap_options opts; 1358 struct l2cap_options opts;
1159 struct l2cap_conninfo cinfo; 1359 struct l2cap_conninfo cinfo;
1160 int len, err = 0; 1360 int len, err = 0;
1361 u32 opt;
1161 1362
1162 BT_DBG("sk %p", sk); 1363 BT_DBG("sk %p", sk);
1163 1364
@@ -1180,12 +1381,36 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch
1180 break; 1381 break;
1181 1382
1182 case L2CAP_LM: 1383 case L2CAP_LM:
1183 if (put_user(l2cap_pi(sk)->link_mode, (u32 __user *) optval)) 1384 switch (l2cap_pi(sk)->sec_level) {
1385 case BT_SECURITY_LOW:
1386 opt = L2CAP_LM_AUTH;
1387 break;
1388 case BT_SECURITY_MEDIUM:
1389 opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT;
1390 break;
1391 case BT_SECURITY_HIGH:
1392 opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT |
1393 L2CAP_LM_SECURE;
1394 break;
1395 default:
1396 opt = 0;
1397 break;
1398 }
1399
1400 if (l2cap_pi(sk)->role_switch)
1401 opt |= L2CAP_LM_MASTER;
1402
1403 if (l2cap_pi(sk)->force_reliable)
1404 opt |= L2CAP_LM_RELIABLE;
1405
1406 if (put_user(opt, (u32 __user *) optval))
1184 err = -EFAULT; 1407 err = -EFAULT;
1185 break; 1408 break;
1186 1409
1187 case L2CAP_CONNINFO: 1410 case L2CAP_CONNINFO:
1188 if (sk->sk_state != BT_CONNECTED) { 1411 if (sk->sk_state != BT_CONNECTED &&
1412 !(sk->sk_state == BT_CONNECT2 &&
1413 bt_sk(sk)->defer_setup)) {
1189 err = -ENOTCONN; 1414 err = -ENOTCONN;
1190 break; 1415 break;
1191 } 1416 }
@@ -1208,6 +1433,60 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch
1208 return err; 1433 return err;
1209} 1434}
1210 1435
1436static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
1437{
1438 struct sock *sk = sock->sk;
1439 struct bt_security sec;
1440 int len, err = 0;
1441
1442 BT_DBG("sk %p", sk);
1443
1444 if (level == SOL_L2CAP)
1445 return l2cap_sock_getsockopt_old(sock, optname, optval, optlen);
1446
1447 if (level != SOL_BLUETOOTH)
1448 return -ENOPROTOOPT;
1449
1450 if (get_user(len, optlen))
1451 return -EFAULT;
1452
1453 lock_sock(sk);
1454
1455 switch (optname) {
1456 case BT_SECURITY:
1457 if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) {
1458 err = -EINVAL;
1459 break;
1460 }
1461
1462 sec.level = l2cap_pi(sk)->sec_level;
1463
1464 len = min_t(unsigned int, len, sizeof(sec));
1465 if (copy_to_user(optval, (char *) &sec, len))
1466 err = -EFAULT;
1467
1468 break;
1469
1470 case BT_DEFER_SETUP:
1471 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
1472 err = -EINVAL;
1473 break;
1474 }
1475
1476 if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval))
1477 err = -EFAULT;
1478
1479 break;
1480
1481 default:
1482 err = -ENOPROTOOPT;
1483 break;
1484 }
1485
1486 release_sock(sk);
1487 return err;
1488}
1489
1211static int l2cap_sock_shutdown(struct socket *sock, int how) 1490static int l2cap_sock_shutdown(struct socket *sock, int how)
1212{ 1491{
1213 struct sock *sk = sock->sk; 1492 struct sock *sk = sock->sk;
@@ -1270,11 +1549,6 @@ static void l2cap_chan_ready(struct sock *sk)
1270 */ 1549 */
1271 parent->sk_data_ready(parent, 0); 1550 parent->sk_data_ready(parent, 0);
1272 } 1551 }
1273
1274 if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) {
1275 struct l2cap_conn *conn = l2cap_pi(sk)->conn;
1276 hci_conn_change_link_key(conn->hcon);
1277 }
1278} 1552}
1279 1553
1280/* Copy frame to all raw sockets on that connection */ 1554/* Copy frame to all raw sockets on that connection */
@@ -1549,8 +1823,11 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hd
1549 1823
1550 if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && 1824 if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) &&
1551 cmd->ident == conn->info_ident) { 1825 cmd->ident == conn->info_ident) {
1552 conn->info_ident = 0;
1553 del_timer(&conn->info_timer); 1826 del_timer(&conn->info_timer);
1827
1828 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
1829 conn->info_ident = 0;
1830
1554 l2cap_conn_start(conn); 1831 l2cap_conn_start(conn);
1555 } 1832 }
1556 1833
@@ -1580,6 +1857,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
1580 /* Check if the ACL is secure enough (if not SDP) */ 1857 /* Check if the ACL is secure enough (if not SDP) */
1581 if (psm != cpu_to_le16(0x0001) && 1858 if (psm != cpu_to_le16(0x0001) &&
1582 !hci_conn_check_link_mode(conn->hcon)) { 1859 !hci_conn_check_link_mode(conn->hcon)) {
1860 conn->disc_reason = 0x05;
1583 result = L2CAP_CR_SEC_BLOCK; 1861 result = L2CAP_CR_SEC_BLOCK;
1584 goto response; 1862 goto response;
1585 } 1863 }
@@ -1621,11 +1899,18 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
1621 1899
1622 l2cap_pi(sk)->ident = cmd->ident; 1900 l2cap_pi(sk)->ident = cmd->ident;
1623 1901
1624 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { 1902 if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) {
1625 if (l2cap_check_link_mode(sk)) { 1903 if (l2cap_check_security(sk)) {
1626 sk->sk_state = BT_CONFIG; 1904 if (bt_sk(sk)->defer_setup) {
1627 result = L2CAP_CR_SUCCESS; 1905 sk->sk_state = BT_CONNECT2;
1628 status = L2CAP_CS_NO_INFO; 1906 result = L2CAP_CR_PEND;
1907 status = L2CAP_CS_AUTHOR_PEND;
1908 parent->sk_data_ready(parent, 0);
1909 } else {
1910 sk->sk_state = BT_CONFIG;
1911 result = L2CAP_CR_SUCCESS;
1912 status = L2CAP_CS_NO_INFO;
1913 }
1629 } else { 1914 } else {
1630 sk->sk_state = BT_CONNECT2; 1915 sk->sk_state = BT_CONNECT2;
1631 result = L2CAP_CR_PEND; 1916 result = L2CAP_CR_PEND;
@@ -1695,11 +1980,14 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd
1695 l2cap_pi(sk)->dcid = dcid; 1980 l2cap_pi(sk)->dcid = dcid;
1696 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; 1981 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
1697 1982
1983 l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND;
1984
1698 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, 1985 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
1699 l2cap_build_conf_req(sk, req), req); 1986 l2cap_build_conf_req(sk, req), req);
1700 break; 1987 break;
1701 1988
1702 case L2CAP_CR_PEND: 1989 case L2CAP_CR_PEND:
1990 l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND;
1703 break; 1991 break;
1704 1992
1705 default: 1993 default:
@@ -1908,6 +2196,14 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm
1908 put_unaligned(cpu_to_le32(l2cap_feat_mask), (__le32 *) rsp->data); 2196 put_unaligned(cpu_to_le32(l2cap_feat_mask), (__le32 *) rsp->data);
1909 l2cap_send_cmd(conn, cmd->ident, 2197 l2cap_send_cmd(conn, cmd->ident,
1910 L2CAP_INFO_RSP, sizeof(buf), buf); 2198 L2CAP_INFO_RSP, sizeof(buf), buf);
2199 } else if (type == L2CAP_IT_FIXED_CHAN) {
2200 u8 buf[12];
2201 struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf;
2202 rsp->type = cpu_to_le16(L2CAP_IT_FIXED_CHAN);
2203 rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS);
2204 memcpy(buf + 4, l2cap_fixed_chan, 8);
2205 l2cap_send_cmd(conn, cmd->ident,
2206 L2CAP_INFO_RSP, sizeof(buf), buf);
1911 } else { 2207 } else {
1912 struct l2cap_info_rsp rsp; 2208 struct l2cap_info_rsp rsp;
1913 rsp.type = cpu_to_le16(type); 2209 rsp.type = cpu_to_le16(type);
@@ -1929,14 +2225,31 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm
1929 2225
1930 BT_DBG("type 0x%4.4x result 0x%2.2x", type, result); 2226 BT_DBG("type 0x%4.4x result 0x%2.2x", type, result);
1931 2227
1932 conn->info_ident = 0;
1933
1934 del_timer(&conn->info_timer); 2228 del_timer(&conn->info_timer);
1935 2229
1936 if (type == L2CAP_IT_FEAT_MASK) 2230 if (type == L2CAP_IT_FEAT_MASK) {
1937 conn->feat_mask = get_unaligned_le32(rsp->data); 2231 conn->feat_mask = get_unaligned_le32(rsp->data);
1938 2232
1939 l2cap_conn_start(conn); 2233 if (conn->feat_mask & 0x0080) {
2234 struct l2cap_info_req req;
2235 req.type = cpu_to_le16(L2CAP_IT_FIXED_CHAN);
2236
2237 conn->info_ident = l2cap_get_ident(conn);
2238
2239 l2cap_send_cmd(conn, conn->info_ident,
2240 L2CAP_INFO_REQ, sizeof(req), &req);
2241 } else {
2242 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
2243 conn->info_ident = 0;
2244
2245 l2cap_conn_start(conn);
2246 }
2247 } else if (type == L2CAP_IT_FIXED_CHAN) {
2248 conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE;
2249 conn->info_ident = 0;
2250
2251 l2cap_conn_start(conn);
2252 }
1940 2253
1941 return 0; 2254 return 0;
1942} 2255}
@@ -2143,10 +2456,15 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type)
2143 continue; 2456 continue;
2144 2457
2145 if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr)) { 2458 if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr)) {
2146 lm1 |= (HCI_LM_ACCEPT | l2cap_pi(sk)->link_mode); 2459 lm1 |= HCI_LM_ACCEPT;
2460 if (l2cap_pi(sk)->role_switch)
2461 lm1 |= HCI_LM_MASTER;
2147 exact++; 2462 exact++;
2148 } else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) 2463 } else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) {
2149 lm2 |= (HCI_LM_ACCEPT | l2cap_pi(sk)->link_mode); 2464 lm2 |= HCI_LM_ACCEPT;
2465 if (l2cap_pi(sk)->role_switch)
2466 lm2 |= HCI_LM_MASTER;
2467 }
2150 } 2468 }
2151 read_unlock(&l2cap_sk_list.lock); 2469 read_unlock(&l2cap_sk_list.lock);
2152 2470
@@ -2172,89 +2490,48 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status)
2172 return 0; 2490 return 0;
2173} 2491}
2174 2492
2175static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason) 2493static int l2cap_disconn_ind(struct hci_conn *hcon)
2176{ 2494{
2177 BT_DBG("hcon %p reason %d", hcon, reason); 2495 struct l2cap_conn *conn = hcon->l2cap_data;
2178 2496
2179 if (hcon->type != ACL_LINK) 2497 BT_DBG("hcon %p", hcon);
2180 return 0;
2181 2498
2182 l2cap_conn_del(hcon, bt_err(reason)); 2499 if (hcon->type != ACL_LINK || !conn)
2500 return 0x13;
2183 2501
2184 return 0; 2502 return conn->disc_reason;
2185} 2503}
2186 2504
2187static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) 2505static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
2188{ 2506{
2189 struct l2cap_chan_list *l; 2507 BT_DBG("hcon %p reason %d", hcon, reason);
2190 struct l2cap_conn *conn = hcon->l2cap_data;
2191 struct sock *sk;
2192 2508
2193 if (!conn) 2509 if (hcon->type != ACL_LINK)
2194 return 0; 2510 return 0;
2195 2511
2196 l = &conn->chan_list; 2512 l2cap_conn_del(hcon, bt_err(reason));
2197
2198 BT_DBG("conn %p", conn);
2199
2200 read_lock(&l->lock);
2201
2202 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
2203 struct l2cap_pinfo *pi = l2cap_pi(sk);
2204
2205 bh_lock_sock(sk);
2206
2207 if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) &&
2208 !(hcon->link_mode & HCI_LM_ENCRYPT) &&
2209 !status) {
2210 bh_unlock_sock(sk);
2211 continue;
2212 }
2213
2214 if (sk->sk_state == BT_CONNECT) {
2215 if (!status) {
2216 struct l2cap_conn_req req;
2217 req.scid = cpu_to_le16(l2cap_pi(sk)->scid);
2218 req.psm = l2cap_pi(sk)->psm;
2219
2220 l2cap_pi(sk)->ident = l2cap_get_ident(conn);
2221
2222 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
2223 L2CAP_CONN_REQ, sizeof(req), &req);
2224 } else {
2225 l2cap_sock_clear_timer(sk);
2226 l2cap_sock_set_timer(sk, HZ / 10);
2227 }
2228 } else if (sk->sk_state == BT_CONNECT2) {
2229 struct l2cap_conn_rsp rsp;
2230 __u16 result;
2231 2513
2232 if (!status) { 2514 return 0;
2233 sk->sk_state = BT_CONFIG; 2515}
2234 result = L2CAP_CR_SUCCESS;
2235 } else {
2236 sk->sk_state = BT_DISCONN;
2237 l2cap_sock_set_timer(sk, HZ / 10);
2238 result = L2CAP_CR_SEC_BLOCK;
2239 }
2240 2516
2241 rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); 2517static inline void l2cap_check_encryption(struct sock *sk, u8 encrypt)
2242 rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); 2518{
2243 rsp.result = cpu_to_le16(result); 2519 if (sk->sk_type != SOCK_SEQPACKET)
2244 rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); 2520 return;
2245 l2cap_send_cmd(conn, l2cap_pi(sk)->ident,
2246 L2CAP_CONN_RSP, sizeof(rsp), &rsp);
2247 }
2248 2521
2249 bh_unlock_sock(sk); 2522 if (encrypt == 0x00) {
2523 if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM) {
2524 l2cap_sock_clear_timer(sk);
2525 l2cap_sock_set_timer(sk, HZ * 5);
2526 } else if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH)
2527 __l2cap_sock_close(sk, ECONNREFUSED);
2528 } else {
2529 if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM)
2530 l2cap_sock_clear_timer(sk);
2250 } 2531 }
2251
2252 read_unlock(&l->lock);
2253
2254 return 0;
2255} 2532}
2256 2533
2257static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) 2534static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
2258{ 2535{
2259 struct l2cap_chan_list *l; 2536 struct l2cap_chan_list *l;
2260 struct l2cap_conn *conn = hcon->l2cap_data; 2537 struct l2cap_conn *conn = hcon->l2cap_data;
@@ -2270,15 +2547,16 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt)
2270 read_lock(&l->lock); 2547 read_lock(&l->lock);
2271 2548
2272 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { 2549 for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
2273 struct l2cap_pinfo *pi = l2cap_pi(sk);
2274
2275 bh_lock_sock(sk); 2550 bh_lock_sock(sk);
2276 2551
2277 if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) && 2552 if (l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND) {
2278 (sk->sk_state == BT_CONNECTED || 2553 bh_unlock_sock(sk);
2279 sk->sk_state == BT_CONFIG) && 2554 continue;
2280 !status && encrypt == 0x00) { 2555 }
2281 __l2cap_sock_close(sk, ECONNREFUSED); 2556
2557 if (!status && (sk->sk_state == BT_CONNECTED ||
2558 sk->sk_state == BT_CONFIG)) {
2559 l2cap_check_encryption(sk, encrypt);
2282 bh_unlock_sock(sk); 2560 bh_unlock_sock(sk);
2283 continue; 2561 continue;
2284 } 2562 }
@@ -2376,7 +2654,7 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
2376 goto drop; 2654 goto drop;
2377 2655
2378 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), 2656 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
2379 skb->len); 2657 skb->len);
2380 conn->rx_len = len - skb->len; 2658 conn->rx_len = len - skb->len;
2381 } else { 2659 } else {
2382 BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len); 2660 BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len);
@@ -2398,7 +2676,7 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
2398 } 2676 }
2399 2677
2400 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), 2678 skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len),
2401 skb->len); 2679 skb->len);
2402 conn->rx_len -= skb->len; 2680 conn->rx_len -= skb->len;
2403 2681
2404 if (!conn->rx_len) { 2682 if (!conn->rx_len) {
@@ -2424,10 +2702,10 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf)
2424 sk_for_each(sk, node, &l2cap_sk_list.head) { 2702 sk_for_each(sk, node, &l2cap_sk_list.head) {
2425 struct l2cap_pinfo *pi = l2cap_pi(sk); 2703 struct l2cap_pinfo *pi = l2cap_pi(sk);
2426 2704
2427 str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d 0x%x\n", 2705 str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n",
2428 batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), 2706 batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst),
2429 sk->sk_state, btohs(pi->psm), pi->scid, pi->dcid, 2707 sk->sk_state, btohs(pi->psm), pi->scid, pi->dcid,
2430 pi->imtu, pi->omtu, pi->link_mode); 2708 pi->imtu, pi->omtu, pi->sec_level);
2431 } 2709 }
2432 2710
2433 read_unlock_bh(&l2cap_sk_list.lock); 2711 read_unlock_bh(&l2cap_sk_list.lock);
@@ -2447,7 +2725,7 @@ static const struct proto_ops l2cap_sock_ops = {
2447 .accept = l2cap_sock_accept, 2725 .accept = l2cap_sock_accept,
2448 .getname = l2cap_sock_getname, 2726 .getname = l2cap_sock_getname,
2449 .sendmsg = l2cap_sock_sendmsg, 2727 .sendmsg = l2cap_sock_sendmsg,
2450 .recvmsg = bt_sock_recvmsg, 2728 .recvmsg = l2cap_sock_recvmsg,
2451 .poll = bt_sock_poll, 2729 .poll = bt_sock_poll,
2452 .ioctl = bt_sock_ioctl, 2730 .ioctl = bt_sock_ioctl,
2453 .mmap = sock_no_mmap, 2731 .mmap = sock_no_mmap,
@@ -2469,8 +2747,8 @@ static struct hci_proto l2cap_hci_proto = {
2469 .connect_ind = l2cap_connect_ind, 2747 .connect_ind = l2cap_connect_ind,
2470 .connect_cfm = l2cap_connect_cfm, 2748 .connect_cfm = l2cap_connect_cfm,
2471 .disconn_ind = l2cap_disconn_ind, 2749 .disconn_ind = l2cap_disconn_ind,
2472 .auth_cfm = l2cap_auth_cfm, 2750 .disconn_cfm = l2cap_disconn_cfm,
2473 .encrypt_cfm = l2cap_encrypt_cfm, 2751 .security_cfm = l2cap_security_cfm,
2474 .recv_acldata = l2cap_recv_acldata 2752 .recv_acldata = l2cap_recv_acldata
2475}; 2753};
2476 2754
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index acd84fd524b8..1d0fb0f23c63 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -46,7 +46,7 @@
46#include <net/bluetooth/l2cap.h> 46#include <net/bluetooth/l2cap.h>
47#include <net/bluetooth/rfcomm.h> 47#include <net/bluetooth/rfcomm.h>
48 48
49#define VERSION "1.10" 49#define VERSION "1.11"
50 50
51static int disable_cfc = 0; 51static int disable_cfc = 0;
52static int channel_mtu = -1; 52static int channel_mtu = -1;
@@ -223,19 +223,25 @@ static int rfcomm_l2sock_create(struct socket **sock)
223 return err; 223 return err;
224} 224}
225 225
226static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d) 226static inline int rfcomm_check_security(struct rfcomm_dlc *d)
227{ 227{
228 struct sock *sk = d->session->sock->sk; 228 struct sock *sk = d->session->sock->sk;
229 __u8 auth_type;
229 230
230 if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) { 231 switch (d->sec_level) {
231 if (!hci_conn_encrypt(l2cap_pi(sk)->conn->hcon)) 232 case BT_SECURITY_HIGH:
232 return 1; 233 auth_type = HCI_AT_GENERAL_BONDING_MITM;
233 } else if (d->link_mode & RFCOMM_LM_AUTH) { 234 break;
234 if (!hci_conn_auth(l2cap_pi(sk)->conn->hcon)) 235 case BT_SECURITY_MEDIUM:
235 return 1; 236 auth_type = HCI_AT_GENERAL_BONDING;
237 break;
238 default:
239 auth_type = HCI_AT_NO_BONDING;
240 break;
236 } 241 }
237 242
238 return 0; 243 return hci_conn_security(l2cap_pi(sk)->conn->hcon, d->sec_level,
244 auth_type);
239} 245}
240 246
241/* ---- RFCOMM DLCs ---- */ 247/* ---- RFCOMM DLCs ---- */
@@ -388,10 +394,10 @@ static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst,
388 d->cfc = (s->cfc == RFCOMM_CFC_UNKNOWN) ? 0 : s->cfc; 394 d->cfc = (s->cfc == RFCOMM_CFC_UNKNOWN) ? 0 : s->cfc;
389 395
390 if (s->state == BT_CONNECTED) { 396 if (s->state == BT_CONNECTED) {
391 if (rfcomm_check_link_mode(d)) 397 if (rfcomm_check_security(d))
392 set_bit(RFCOMM_AUTH_PENDING, &d->flags);
393 else
394 rfcomm_send_pn(s, 1, d); 398 rfcomm_send_pn(s, 1, d);
399 else
400 set_bit(RFCOMM_AUTH_PENDING, &d->flags);
395 } 401 }
396 402
397 rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); 403 rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT);
@@ -421,9 +427,16 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
421 d, d->state, d->dlci, err, s); 427 d, d->state, d->dlci, err, s);
422 428
423 switch (d->state) { 429 switch (d->state) {
424 case BT_CONNECTED:
425 case BT_CONFIG:
426 case BT_CONNECT: 430 case BT_CONNECT:
431 case BT_CONFIG:
432 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
433 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
434 rfcomm_schedule(RFCOMM_SCHED_AUTH);
435 break;
436 }
437 /* Fall through */
438
439 case BT_CONNECTED:
427 d->state = BT_DISCONN; 440 d->state = BT_DISCONN;
428 if (skb_queue_empty(&d->tx_queue)) { 441 if (skb_queue_empty(&d->tx_queue)) {
429 rfcomm_send_disc(s, d->dlci); 442 rfcomm_send_disc(s, d->dlci);
@@ -434,6 +447,15 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
434 } 447 }
435 break; 448 break;
436 449
450 case BT_OPEN:
451 case BT_CONNECT2:
452 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
453 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
454 rfcomm_schedule(RFCOMM_SCHED_AUTH);
455 break;
456 }
457 /* Fall through */
458
437 default: 459 default:
438 rfcomm_dlc_clear_timer(d); 460 rfcomm_dlc_clear_timer(d);
439 461
@@ -636,6 +658,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
636 bacpy(&addr.l2_bdaddr, src); 658 bacpy(&addr.l2_bdaddr, src);
637 addr.l2_family = AF_BLUETOOTH; 659 addr.l2_family = AF_BLUETOOTH;
638 addr.l2_psm = 0; 660 addr.l2_psm = 0;
661 addr.l2_cid = 0;
639 *err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); 662 *err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr));
640 if (*err < 0) 663 if (*err < 0)
641 goto failed; 664 goto failed;
@@ -657,6 +680,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst
657 bacpy(&addr.l2_bdaddr, dst); 680 bacpy(&addr.l2_bdaddr, dst);
658 addr.l2_family = AF_BLUETOOTH; 681 addr.l2_family = AF_BLUETOOTH;
659 addr.l2_psm = htobs(RFCOMM_PSM); 682 addr.l2_psm = htobs(RFCOMM_PSM);
683 addr.l2_cid = 0;
660 *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); 684 *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK);
661 if (*err == 0 || *err == -EINPROGRESS) 685 if (*err == 0 || *err == -EINPROGRESS)
662 return s; 686 return s;
@@ -1162,7 +1186,7 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci)
1162 return 0; 1186 return 0;
1163} 1187}
1164 1188
1165static void rfcomm_dlc_accept(struct rfcomm_dlc *d) 1189void rfcomm_dlc_accept(struct rfcomm_dlc *d)
1166{ 1190{
1167 struct sock *sk = d->session->sock->sk; 1191 struct sock *sk = d->session->sock->sk;
1168 1192
@@ -1175,12 +1199,31 @@ static void rfcomm_dlc_accept(struct rfcomm_dlc *d)
1175 d->state_change(d, 0); 1199 d->state_change(d, 0);
1176 rfcomm_dlc_unlock(d); 1200 rfcomm_dlc_unlock(d);
1177 1201
1178 if (d->link_mode & RFCOMM_LM_MASTER) 1202 if (d->role_switch)
1179 hci_conn_switch_role(l2cap_pi(sk)->conn->hcon, 0x00); 1203 hci_conn_switch_role(l2cap_pi(sk)->conn->hcon, 0x00);
1180 1204
1181 rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig); 1205 rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig);
1182} 1206}
1183 1207
1208static void rfcomm_check_accept(struct rfcomm_dlc *d)
1209{
1210 if (rfcomm_check_security(d)) {
1211 if (d->defer_setup) {
1212 set_bit(RFCOMM_DEFER_SETUP, &d->flags);
1213 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
1214
1215 rfcomm_dlc_lock(d);
1216 d->state = BT_CONNECT2;
1217 d->state_change(d, 0);
1218 rfcomm_dlc_unlock(d);
1219 } else
1220 rfcomm_dlc_accept(d);
1221 } else {
1222 set_bit(RFCOMM_AUTH_PENDING, &d->flags);
1223 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
1224 }
1225}
1226
1184static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci) 1227static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci)
1185{ 1228{
1186 struct rfcomm_dlc *d; 1229 struct rfcomm_dlc *d;
@@ -1203,11 +1246,7 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci)
1203 if (d) { 1246 if (d) {
1204 if (d->state == BT_OPEN) { 1247 if (d->state == BT_OPEN) {
1205 /* DLC was previously opened by PN request */ 1248 /* DLC was previously opened by PN request */
1206 if (rfcomm_check_link_mode(d)) { 1249 rfcomm_check_accept(d);
1207 set_bit(RFCOMM_AUTH_PENDING, &d->flags);
1208 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
1209 } else
1210 rfcomm_dlc_accept(d);
1211 } 1250 }
1212 return 0; 1251 return 0;
1213 } 1252 }
@@ -1219,11 +1258,7 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci)
1219 d->addr = __addr(s->initiator, dlci); 1258 d->addr = __addr(s->initiator, dlci);
1220 rfcomm_dlc_link(s, d); 1259 rfcomm_dlc_link(s, d);
1221 1260
1222 if (rfcomm_check_link_mode(d)) { 1261 rfcomm_check_accept(d);
1223 set_bit(RFCOMM_AUTH_PENDING, &d->flags);
1224 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
1225 } else
1226 rfcomm_dlc_accept(d);
1227 } else { 1262 } else {
1228 rfcomm_send_dm(s, dlci); 1263 rfcomm_send_dm(s, dlci);
1229 } 1264 }
@@ -1637,11 +1672,12 @@ static void rfcomm_process_connect(struct rfcomm_session *s)
1637 d = list_entry(p, struct rfcomm_dlc, list); 1672 d = list_entry(p, struct rfcomm_dlc, list);
1638 if (d->state == BT_CONFIG) { 1673 if (d->state == BT_CONFIG) {
1639 d->mtu = s->mtu; 1674 d->mtu = s->mtu;
1640 if (rfcomm_check_link_mode(d)) { 1675 if (rfcomm_check_security(d)) {
1676 rfcomm_send_pn(s, 1, d);
1677 } else {
1641 set_bit(RFCOMM_AUTH_PENDING, &d->flags); 1678 set_bit(RFCOMM_AUTH_PENDING, &d->flags);
1642 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); 1679 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
1643 } else 1680 }
1644 rfcomm_send_pn(s, 1, d);
1645 } 1681 }
1646 } 1682 }
1647} 1683}
@@ -1717,11 +1753,17 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s)
1717 if (d->out) { 1753 if (d->out) {
1718 rfcomm_send_pn(s, 1, d); 1754 rfcomm_send_pn(s, 1, d);
1719 rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); 1755 rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT);
1720 } else 1756 } else {
1721 rfcomm_dlc_accept(d); 1757 if (d->defer_setup) {
1722 if (d->link_mode & RFCOMM_LM_SECURE) { 1758 set_bit(RFCOMM_DEFER_SETUP, &d->flags);
1723 struct sock *sk = s->sock->sk; 1759 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
1724 hci_conn_change_link_key(l2cap_pi(sk)->conn->hcon); 1760
1761 rfcomm_dlc_lock(d);
1762 d->state = BT_CONNECT2;
1763 d->state_change(d, 0);
1764 rfcomm_dlc_unlock(d);
1765 } else
1766 rfcomm_dlc_accept(d);
1725 } 1767 }
1726 continue; 1768 continue;
1727 } else if (test_and_clear_bit(RFCOMM_AUTH_REJECT, &d->flags)) { 1769 } else if (test_and_clear_bit(RFCOMM_AUTH_REJECT, &d->flags)) {
@@ -1734,6 +1776,9 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s)
1734 continue; 1776 continue;
1735 } 1777 }
1736 1778
1779 if (test_bit(RFCOMM_SEC_PENDING, &d->flags))
1780 continue;
1781
1737 if (test_bit(RFCOMM_TX_THROTTLED, &s->flags)) 1782 if (test_bit(RFCOMM_TX_THROTTLED, &s->flags))
1738 continue; 1783 continue;
1739 1784
@@ -1876,6 +1921,7 @@ static int rfcomm_add_listener(bdaddr_t *ba)
1876 bacpy(&addr.l2_bdaddr, ba); 1921 bacpy(&addr.l2_bdaddr, ba);
1877 addr.l2_family = AF_BLUETOOTH; 1922 addr.l2_family = AF_BLUETOOTH;
1878 addr.l2_psm = htobs(RFCOMM_PSM); 1923 addr.l2_psm = htobs(RFCOMM_PSM);
1924 addr.l2_cid = 0;
1879 err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); 1925 err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr));
1880 if (err < 0) { 1926 if (err < 0) {
1881 BT_ERR("Bind failed %d", err); 1927 BT_ERR("Bind failed %d", err);
@@ -1947,42 +1993,7 @@ static int rfcomm_run(void *unused)
1947 return 0; 1993 return 0;
1948} 1994}
1949 1995
1950static void rfcomm_auth_cfm(struct hci_conn *conn, u8 status) 1996static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
1951{
1952 struct rfcomm_session *s;
1953 struct rfcomm_dlc *d;
1954 struct list_head *p, *n;
1955
1956 BT_DBG("conn %p status 0x%02x", conn, status);
1957
1958 s = rfcomm_session_get(&conn->hdev->bdaddr, &conn->dst);
1959 if (!s)
1960 return;
1961
1962 rfcomm_session_hold(s);
1963
1964 list_for_each_safe(p, n, &s->dlcs) {
1965 d = list_entry(p, struct rfcomm_dlc, list);
1966
1967 if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) &&
1968 !(conn->link_mode & HCI_LM_ENCRYPT) && !status)
1969 continue;
1970
1971 if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags))
1972 continue;
1973
1974 if (!status)
1975 set_bit(RFCOMM_AUTH_ACCEPT, &d->flags);
1976 else
1977 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
1978 }
1979
1980 rfcomm_session_put(s);
1981
1982 rfcomm_schedule(RFCOMM_SCHED_AUTH);
1983}
1984
1985static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
1986{ 1997{
1987 struct rfcomm_session *s; 1998 struct rfcomm_session *s;
1988 struct rfcomm_dlc *d; 1999 struct rfcomm_dlc *d;
@@ -1999,18 +2010,29 @@ static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
1999 list_for_each_safe(p, n, &s->dlcs) { 2010 list_for_each_safe(p, n, &s->dlcs) {
2000 d = list_entry(p, struct rfcomm_dlc, list); 2011 d = list_entry(p, struct rfcomm_dlc, list);
2001 2012
2002 if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) && 2013 if (test_and_clear_bit(RFCOMM_SEC_PENDING, &d->flags)) {
2003 (d->state == BT_CONNECTED || 2014 rfcomm_dlc_clear_timer(d);
2004 d->state == BT_CONFIG) && 2015 if (status || encrypt == 0x00) {
2005 !status && encrypt == 0x00) { 2016 __rfcomm_dlc_close(d, ECONNREFUSED);
2006 __rfcomm_dlc_close(d, ECONNREFUSED); 2017 continue;
2007 continue; 2018 }
2019 }
2020
2021 if (d->state == BT_CONNECTED && !status && encrypt == 0x00) {
2022 if (d->sec_level == BT_SECURITY_MEDIUM) {
2023 set_bit(RFCOMM_SEC_PENDING, &d->flags);
2024 rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT);
2025 continue;
2026 } else if (d->sec_level == BT_SECURITY_HIGH) {
2027 __rfcomm_dlc_close(d, ECONNREFUSED);
2028 continue;
2029 }
2008 } 2030 }
2009 2031
2010 if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags)) 2032 if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags))
2011 continue; 2033 continue;
2012 2034
2013 if (!status && encrypt) 2035 if (!status)
2014 set_bit(RFCOMM_AUTH_ACCEPT, &d->flags); 2036 set_bit(RFCOMM_AUTH_ACCEPT, &d->flags);
2015 else 2037 else
2016 set_bit(RFCOMM_AUTH_REJECT, &d->flags); 2038 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
@@ -2023,8 +2045,7 @@ static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
2023 2045
2024static struct hci_cb rfcomm_cb = { 2046static struct hci_cb rfcomm_cb = {
2025 .name = "RFCOMM", 2047 .name = "RFCOMM",
2026 .auth_cfm = rfcomm_auth_cfm, 2048 .security_cfm = rfcomm_security_cfm
2027 .encrypt_cfm = rfcomm_encrypt_cfm
2028}; 2049};
2029 2050
2030static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf) 2051static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf)
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index d3fc6fca38d0..7f482784e9f7 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -261,12 +261,19 @@ static void rfcomm_sock_init(struct sock *sk, struct sock *parent)
261 261
262 if (parent) { 262 if (parent) {
263 sk->sk_type = parent->sk_type; 263 sk->sk_type = parent->sk_type;
264 pi->link_mode = rfcomm_pi(parent)->link_mode; 264 pi->dlc->defer_setup = bt_sk(parent)->defer_setup;
265
266 pi->sec_level = rfcomm_pi(parent)->sec_level;
267 pi->role_switch = rfcomm_pi(parent)->role_switch;
265 } else { 268 } else {
266 pi->link_mode = 0; 269 pi->dlc->defer_setup = 0;
270
271 pi->sec_level = BT_SECURITY_LOW;
272 pi->role_switch = 0;
267 } 273 }
268 274
269 pi->dlc->link_mode = pi->link_mode; 275 pi->dlc->sec_level = pi->sec_level;
276 pi->dlc->role_switch = pi->role_switch;
270} 277}
271 278
272static struct proto rfcomm_proto = { 279static struct proto rfcomm_proto = {
@@ -406,7 +413,8 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a
406 bacpy(&bt_sk(sk)->dst, &sa->rc_bdaddr); 413 bacpy(&bt_sk(sk)->dst, &sa->rc_bdaddr);
407 rfcomm_pi(sk)->channel = sa->rc_channel; 414 rfcomm_pi(sk)->channel = sa->rc_channel;
408 415
409 d->link_mode = rfcomm_pi(sk)->link_mode; 416 d->sec_level = rfcomm_pi(sk)->sec_level;
417 d->role_switch = rfcomm_pi(sk)->role_switch;
410 418
411 err = rfcomm_dlc_open(d, &bt_sk(sk)->src, &sa->rc_bdaddr, sa->rc_channel); 419 err = rfcomm_dlc_open(d, &bt_sk(sk)->src, &sa->rc_bdaddr, sa->rc_channel);
412 if (!err) 420 if (!err)
@@ -554,6 +562,9 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
554 struct sk_buff *skb; 562 struct sk_buff *skb;
555 int sent = 0; 563 int sent = 0;
556 564
565 if (test_bit(RFCOMM_DEFER_SETUP, &d->flags))
566 return -ENOTCONN;
567
557 if (msg->msg_flags & MSG_OOB) 568 if (msg->msg_flags & MSG_OOB)
558 return -EOPNOTSUPP; 569 return -EOPNOTSUPP;
559 570
@@ -570,8 +581,11 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
570 581
571 skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE, 582 skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE,
572 msg->msg_flags & MSG_DONTWAIT, &err); 583 msg->msg_flags & MSG_DONTWAIT, &err);
573 if (!skb) 584 if (!skb) {
585 if (sent == 0)
586 sent = err;
574 break; 587 break;
588 }
575 skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE); 589 skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE);
576 590
577 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 591 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
@@ -630,10 +644,16 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
630 struct msghdr *msg, size_t size, int flags) 644 struct msghdr *msg, size_t size, int flags)
631{ 645{
632 struct sock *sk = sock->sk; 646 struct sock *sk = sock->sk;
647 struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc;
633 int err = 0; 648 int err = 0;
634 size_t target, copied = 0; 649 size_t target, copied = 0;
635 long timeo; 650 long timeo;
636 651
652 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
653 rfcomm_dlc_accept(d);
654 return 0;
655 }
656
637 if (flags & MSG_OOB) 657 if (flags & MSG_OOB)
638 return -EOPNOTSUPP; 658 return -EOPNOTSUPP;
639 659
@@ -710,7 +730,7 @@ out:
710 return copied ? : err; 730 return copied ? : err;
711} 731}
712 732
713static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) 733static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, int optlen)
714{ 734{
715 struct sock *sk = sock->sk; 735 struct sock *sk = sock->sk;
716 int err = 0; 736 int err = 0;
@@ -727,7 +747,14 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c
727 break; 747 break;
728 } 748 }
729 749
730 rfcomm_pi(sk)->link_mode = opt; 750 if (opt & RFCOMM_LM_AUTH)
751 rfcomm_pi(sk)->sec_level = BT_SECURITY_LOW;
752 if (opt & RFCOMM_LM_ENCRYPT)
753 rfcomm_pi(sk)->sec_level = BT_SECURITY_MEDIUM;
754 if (opt & RFCOMM_LM_SECURE)
755 rfcomm_pi(sk)->sec_level = BT_SECURITY_HIGH;
756
757 rfcomm_pi(sk)->role_switch = (opt & RFCOMM_LM_MASTER);
731 break; 758 break;
732 759
733 default: 760 default:
@@ -739,12 +766,76 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c
739 return err; 766 return err;
740} 767}
741 768
742static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) 769static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
770{
771 struct sock *sk = sock->sk;
772 struct bt_security sec;
773 int len, err = 0;
774 u32 opt;
775
776 BT_DBG("sk %p", sk);
777
778 if (level == SOL_RFCOMM)
779 return rfcomm_sock_setsockopt_old(sock, optname, optval, optlen);
780
781 if (level != SOL_BLUETOOTH)
782 return -ENOPROTOOPT;
783
784 lock_sock(sk);
785
786 switch (optname) {
787 case BT_SECURITY:
788 if (sk->sk_type != SOCK_STREAM) {
789 err = -EINVAL;
790 break;
791 }
792
793 sec.level = BT_SECURITY_LOW;
794
795 len = min_t(unsigned int, sizeof(sec), optlen);
796 if (copy_from_user((char *) &sec, optval, len)) {
797 err = -EFAULT;
798 break;
799 }
800
801 if (sec.level > BT_SECURITY_HIGH) {
802 err = -EINVAL;
803 break;
804 }
805
806 rfcomm_pi(sk)->sec_level = sec.level;
807 break;
808
809 case BT_DEFER_SETUP:
810 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
811 err = -EINVAL;
812 break;
813 }
814
815 if (get_user(opt, (u32 __user *) optval)) {
816 err = -EFAULT;
817 break;
818 }
819
820 bt_sk(sk)->defer_setup = opt;
821 break;
822
823 default:
824 err = -ENOPROTOOPT;
825 break;
826 }
827
828 release_sock(sk);
829 return err;
830}
831
832static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
743{ 833{
744 struct sock *sk = sock->sk; 834 struct sock *sk = sock->sk;
745 struct sock *l2cap_sk; 835 struct sock *l2cap_sk;
746 struct rfcomm_conninfo cinfo; 836 struct rfcomm_conninfo cinfo;
747 int len, err = 0; 837 int len, err = 0;
838 u32 opt;
748 839
749 BT_DBG("sk %p", sk); 840 BT_DBG("sk %p", sk);
750 841
@@ -755,12 +846,32 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c
755 846
756 switch (optname) { 847 switch (optname) {
757 case RFCOMM_LM: 848 case RFCOMM_LM:
758 if (put_user(rfcomm_pi(sk)->link_mode, (u32 __user *) optval)) 849 switch (rfcomm_pi(sk)->sec_level) {
850 case BT_SECURITY_LOW:
851 opt = RFCOMM_LM_AUTH;
852 break;
853 case BT_SECURITY_MEDIUM:
854 opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT;
855 break;
856 case BT_SECURITY_HIGH:
857 opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT |
858 RFCOMM_LM_SECURE;
859 break;
860 default:
861 opt = 0;
862 break;
863 }
864
865 if (rfcomm_pi(sk)->role_switch)
866 opt |= RFCOMM_LM_MASTER;
867
868 if (put_user(opt, (u32 __user *) optval))
759 err = -EFAULT; 869 err = -EFAULT;
760 break; 870 break;
761 871
762 case RFCOMM_CONNINFO: 872 case RFCOMM_CONNINFO:
763 if (sk->sk_state != BT_CONNECTED) { 873 if (sk->sk_state != BT_CONNECTED &&
874 !rfcomm_pi(sk)->dlc->defer_setup) {
764 err = -ENOTCONN; 875 err = -ENOTCONN;
765 break; 876 break;
766 } 877 }
@@ -785,6 +896,60 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c
785 return err; 896 return err;
786} 897}
787 898
899static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
900{
901 struct sock *sk = sock->sk;
902 struct bt_security sec;
903 int len, err = 0;
904
905 BT_DBG("sk %p", sk);
906
907 if (level == SOL_RFCOMM)
908 return rfcomm_sock_getsockopt_old(sock, optname, optval, optlen);
909
910 if (level != SOL_BLUETOOTH)
911 return -ENOPROTOOPT;
912
913 if (get_user(len, optlen))
914 return -EFAULT;
915
916 lock_sock(sk);
917
918 switch (optname) {
919 case BT_SECURITY:
920 if (sk->sk_type != SOCK_STREAM) {
921 err = -EINVAL;
922 break;
923 }
924
925 sec.level = rfcomm_pi(sk)->sec_level;
926
927 len = min_t(unsigned int, len, sizeof(sec));
928 if (copy_to_user(optval, (char *) &sec, len))
929 err = -EFAULT;
930
931 break;
932
933 case BT_DEFER_SETUP:
934 if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) {
935 err = -EINVAL;
936 break;
937 }
938
939 if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval))
940 err = -EFAULT;
941
942 break;
943
944 default:
945 err = -ENOPROTOOPT;
946 break;
947 }
948
949 release_sock(sk);
950 return err;
951}
952
788static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) 953static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
789{ 954{
790 struct sock *sk __maybe_unused = sock->sk; 955 struct sock *sk __maybe_unused = sock->sk;
@@ -888,6 +1053,10 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc *
888 1053
889done: 1054done:
890 bh_unlock_sock(parent); 1055 bh_unlock_sock(parent);
1056
1057 if (bt_sk(parent)->defer_setup)
1058 parent->sk_state_change(parent);
1059
891 return result; 1060 return result;
892} 1061}
893 1062
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index d030c69cb5a3..abdc703a11d2 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -731,7 +731,8 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
731 remove_wait_queue(&dev->wait, &wait); 731 remove_wait_queue(&dev->wait, &wait);
732 732
733 if (err == 0) 733 if (err == 0)
734 device_move(dev->tty_dev, rfcomm_get_device(dev)); 734 device_move(dev->tty_dev, rfcomm_get_device(dev),
735 DPM_ORDER_DEV_AFTER_PARENT);
735 736
736 rfcomm_tty_copy_pending(dev); 737 rfcomm_tty_copy_pending(dev);
737 738
@@ -751,7 +752,7 @@ static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp)
751 752
752 if (atomic_dec_and_test(&dev->opened)) { 753 if (atomic_dec_and_test(&dev->opened)) {
753 if (dev->tty_dev->parent) 754 if (dev->tty_dev->parent)
754 device_move(dev->tty_dev, NULL); 755 device_move(dev->tty_dev, NULL, DPM_ORDER_DEV_LAST);
755 756
756 /* Close DLC and dettach TTY */ 757 /* Close DLC and dettach TTY */
757 rfcomm_dlc_close(dev->dlc, 0); 758 rfcomm_dlc_close(dev->dlc, 0);
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 46fd8bf9a690..51ae0c3e470a 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -195,7 +195,7 @@ static int sco_connect(struct sock *sk)
195 else 195 else
196 type = SCO_LINK; 196 type = SCO_LINK;
197 197
198 hcon = hci_connect(hdev, type, dst, HCI_AT_NO_BONDING); 198 hcon = hci_connect(hdev, type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING);
199 if (!hcon) 199 if (!hcon)
200 goto done; 200 goto done;
201 201
@@ -668,7 +668,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char
668 return err; 668 return err;
669} 669}
670 670
671static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) 671static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen)
672{ 672{
673 struct sock *sk = sock->sk; 673 struct sock *sk = sock->sk;
674 struct sco_options opts; 674 struct sco_options opts;
@@ -723,6 +723,31 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char
723 return err; 723 return err;
724} 724}
725 725
726static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)
727{
728 struct sock *sk = sock->sk;
729 int len, err = 0;
730
731 BT_DBG("sk %p", sk);
732
733 if (level == SOL_SCO)
734 return sco_sock_getsockopt_old(sock, optname, optval, optlen);
735
736 if (get_user(len, optlen))
737 return -EFAULT;
738
739 lock_sock(sk);
740
741 switch (optname) {
742 default:
743 err = -ENOPROTOOPT;
744 break;
745 }
746
747 release_sock(sk);
748 return err;
749}
750
726static int sco_sock_release(struct socket *sock) 751static int sco_sock_release(struct socket *sock)
727{ 752{
728 struct sock *sk = sock->sk; 753 struct sock *sk = sock->sk;
@@ -832,10 +857,30 @@ done:
832/* ----- SCO interface with lower layer (HCI) ----- */ 857/* ----- SCO interface with lower layer (HCI) ----- */
833static int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type) 858static int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type)
834{ 859{
860 register struct sock *sk;
861 struct hlist_node *node;
862 int lm = 0;
863
864 if (type != SCO_LINK && type != ESCO_LINK)
865 return 0;
866
835 BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); 867 BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr));
836 868
837 /* Always accept connection */ 869 /* Find listening sockets */
838 return HCI_LM_ACCEPT; 870 read_lock(&sco_sk_list.lock);
871 sk_for_each(sk, node, &sco_sk_list.head) {
872 if (sk->sk_state != BT_LISTEN)
873 continue;
874
875 if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr) ||
876 !bacmp(&bt_sk(sk)->src, BDADDR_ANY)) {
877 lm |= HCI_LM_ACCEPT;
878 break;
879 }
880 }
881 read_unlock(&sco_sk_list.lock);
882
883 return lm;
839} 884}
840 885
841static int sco_connect_cfm(struct hci_conn *hcon, __u8 status) 886static int sco_connect_cfm(struct hci_conn *hcon, __u8 status)
@@ -857,7 +902,7 @@ static int sco_connect_cfm(struct hci_conn *hcon, __u8 status)
857 return 0; 902 return 0;
858} 903}
859 904
860static int sco_disconn_ind(struct hci_conn *hcon, __u8 reason) 905static int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason)
861{ 906{
862 BT_DBG("hcon %p reason %d", hcon, reason); 907 BT_DBG("hcon %p reason %d", hcon, reason);
863 908
@@ -940,7 +985,7 @@ static struct hci_proto sco_hci_proto = {
940 .id = HCI_PROTO_SCO, 985 .id = HCI_PROTO_SCO,
941 .connect_ind = sco_connect_ind, 986 .connect_ind = sco_connect_ind,
942 .connect_cfm = sco_connect_cfm, 987 .connect_cfm = sco_connect_cfm,
943 .disconn_ind = sco_disconn_ind, 988 .disconn_cfm = sco_disconn_cfm,
944 .recv_scodata = sco_recv_scodata 989 .recv_scodata = sco_recv_scodata
945}; 990};
946 991
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 727c5c510a60..8a96672e2c5c 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -426,7 +426,6 @@ err2:
426err1: 426err1:
427 kobject_del(&p->kobj); 427 kobject_del(&p->kobj);
428err0: 428err0:
429 kobject_put(&p->kobj);
430 dev_set_promiscuity(dev, -1); 429 dev_set_promiscuity(dev, -1);
431put_back: 430put_back:
432 dev_put(dev); 431 dev_put(dev);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index cf754ace0b75..3953ac4214c8 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -107,7 +107,7 @@ static void fake_update_pmtu(struct dst_entry *dst, u32 mtu)
107 107
108static struct dst_ops fake_dst_ops = { 108static struct dst_ops fake_dst_ops = {
109 .family = AF_INET, 109 .family = AF_INET,
110 .protocol = __constant_htons(ETH_P_IP), 110 .protocol = cpu_to_be16(ETH_P_IP),
111 .update_pmtu = fake_update_pmtu, 111 .update_pmtu = fake_update_pmtu,
112 .entries = ATOMIC_INIT(0), 112 .entries = ATOMIC_INIT(0),
113}; 113};
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index ba7be195803c..fcffb3fb1177 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -98,7 +98,8 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
98 kfree_skb(skb); 98 kfree_skb(skb);
99 goto errout; 99 goto errout;
100 } 100 }
101 err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); 101 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
102 return;
102errout: 103errout:
103 if (err < 0) 104 if (err < 0)
104 rtnl_set_sk_err(net, RTNLGRP_LINK, err); 105 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index d44cbf8c374a..a94f3cc377c0 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -214,7 +214,7 @@ static struct xt_target ebt_log_tg_reg __read_mostly = {
214 .me = THIS_MODULE, 214 .me = THIS_MODULE,
215}; 215};
216 216
217static const struct nf_logger ebt_log_logger = { 217static struct nf_logger ebt_log_logger __read_mostly = {
218 .name = "ebt_log", 218 .name = "ebt_log",
219 .logfn = &ebt_log_packet, 219 .logfn = &ebt_log_packet,
220 .me = THIS_MODULE, 220 .me = THIS_MODULE,
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 2c6d6823e703..133eeae45a4f 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -279,21 +279,21 @@ static struct xt_target ebt_ulog_tg_reg __read_mostly = {
279 .me = THIS_MODULE, 279 .me = THIS_MODULE,
280}; 280};
281 281
282static const struct nf_logger ebt_ulog_logger = { 282static struct nf_logger ebt_ulog_logger __read_mostly = {
283 .name = "ulog", 283 .name = "ebt_ulog",
284 .logfn = &ebt_log_packet, 284 .logfn = &ebt_log_packet,
285 .me = THIS_MODULE, 285 .me = THIS_MODULE,
286}; 286};
287 287
288static int __init ebt_ulog_init(void) 288static int __init ebt_ulog_init(void)
289{ 289{
290 bool ret = true; 290 int ret;
291 int i; 291 int i;
292 292
293 if (nlbufsiz >= 128*1024) { 293 if (nlbufsiz >= 128*1024) {
294 printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB," 294 printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB,"
295 " please try a smaller nlbufsiz parameter.\n"); 295 " please try a smaller nlbufsiz parameter.\n");
296 return false; 296 return -EINVAL;
297 } 297 }
298 298
299 /* initialize ulog_buffers */ 299 /* initialize ulog_buffers */
@@ -308,12 +308,12 @@ static int __init ebt_ulog_init(void)
308 if (!ebtulognl) { 308 if (!ebtulognl) {
309 printk(KERN_WARNING KBUILD_MODNAME ": out of memory trying to " 309 printk(KERN_WARNING KBUILD_MODNAME ": out of memory trying to "
310 "call netlink_kernel_create\n"); 310 "call netlink_kernel_create\n");
311 ret = false; 311 ret = -ENOMEM;
312 } else if (xt_register_target(&ebt_ulog_tg_reg) != 0) { 312 } else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) {
313 netlink_kernel_release(ebtulognl); 313 netlink_kernel_release(ebtulognl);
314 } 314 }
315 315
316 if (ret) 316 if (ret == 0)
317 nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); 317 nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
318 318
319 return ret; 319 return ret;
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index 8604dfc1fc3b..c751111440f8 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -46,7 +46,6 @@ static struct ebt_table broute_table =
46 .name = "broute", 46 .name = "broute",
47 .table = &initial_table, 47 .table = &initial_table,
48 .valid_hooks = 1 << NF_BR_BROUTING, 48 .valid_hooks = 1 << NF_BR_BROUTING,
49 .lock = __RW_LOCK_UNLOCKED(broute_table.lock),
50 .check = check, 49 .check = check,
51 .me = THIS_MODULE, 50 .me = THIS_MODULE,
52}; 51};
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 2b2e8040a9c6..a5eea72938a6 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -55,7 +55,6 @@ static struct ebt_table frame_filter =
55 .name = "filter", 55 .name = "filter",
56 .table = &initial_table, 56 .table = &initial_table,
57 .valid_hooks = FILTER_VALID_HOOKS, 57 .valid_hooks = FILTER_VALID_HOOKS,
58 .lock = __RW_LOCK_UNLOCKED(frame_filter.lock),
59 .check = check, 58 .check = check,
60 .me = THIS_MODULE, 59 .me = THIS_MODULE,
61}; 60};
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index 3fe1ae87e35f..6024c551f9a9 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -55,7 +55,6 @@ static struct ebt_table frame_nat =
55 .name = "nat", 55 .name = "nat",
56 .table = &initial_table, 56 .table = &initial_table,
57 .valid_hooks = NAT_VALID_HOOKS, 57 .valid_hooks = NAT_VALID_HOOKS,
58 .lock = __RW_LOCK_UNLOCKED(frame_nat.lock),
59 .check = check, 58 .check = check,
60 .me = THIS_MODULE, 59 .me = THIS_MODULE,
61}; 60};
diff --git a/net/can/af_can.c b/net/can/af_can.c
index fa417ca6cbe6..547bafc79e28 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -273,8 +273,7 @@ int can_send(struct sk_buff *skb, int loop)
273 err = net_xmit_errno(err); 273 err = net_xmit_errno(err);
274 274
275 if (err) { 275 if (err) {
276 if (newskb) 276 kfree_skb(newskb);
277 kfree_skb(newskb);
278 return err; 277 return err;
279 } 278 }
280 279
@@ -828,7 +827,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg,
828 */ 827 */
829 828
830static struct packet_type can_packet __read_mostly = { 829static struct packet_type can_packet __read_mostly = {
831 .type = __constant_htons(ETH_P_CAN), 830 .type = cpu_to_be16(ETH_P_CAN),
832 .dev = NULL, 831 .dev = NULL,
833 .func = can_rcv, 832 .func = can_rcv,
834}; 833};
diff --git a/net/can/raw.c b/net/can/raw.c
index 0703cba4bf9f..6aa154e806ae 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -648,6 +648,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
648 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 648 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
649 if (err < 0) 649 if (err < 0)
650 goto free_skb; 650 goto free_skb;
651 err = sock_tx_timestamp(msg, sk, skb_tx(skb));
652 if (err < 0)
653 goto free_skb;
651 skb->dev = dev; 654 skb->dev = dev;
652 skb->sk = sk; 655 skb->sk = sk;
653 656
diff --git a/net/compat.c b/net/compat.c
index a3a2ba0fac08..8d739053afe4 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -216,7 +216,7 @@ Efault:
216int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data) 216int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data)
217{ 217{
218 struct compat_timeval ctv; 218 struct compat_timeval ctv;
219 struct compat_timespec cts; 219 struct compat_timespec cts[3];
220 struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control; 220 struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control;
221 struct compat_cmsghdr cmhdr; 221 struct compat_cmsghdr cmhdr;
222 int cmlen; 222 int cmlen;
@@ -233,12 +233,17 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat
233 data = &ctv; 233 data = &ctv;
234 len = sizeof(ctv); 234 len = sizeof(ctv);
235 } 235 }
236 if (level == SOL_SOCKET && type == SCM_TIMESTAMPNS) { 236 if (level == SOL_SOCKET &&
237 (type == SCM_TIMESTAMPNS || type == SCM_TIMESTAMPING)) {
238 int count = type == SCM_TIMESTAMPNS ? 1 : 3;
239 int i;
237 struct timespec *ts = (struct timespec *)data; 240 struct timespec *ts = (struct timespec *)data;
238 cts.tv_sec = ts->tv_sec; 241 for (i = 0; i < count; i++) {
239 cts.tv_nsec = ts->tv_nsec; 242 cts[i].tv_sec = ts[i].tv_sec;
243 cts[i].tv_nsec = ts[i].tv_nsec;
244 }
240 data = &cts; 245 data = &cts;
241 len = sizeof(cts); 246 len = sizeof(cts[0]) * count;
242 } 247 }
243 248
244 cmlen = CMSG_COMPAT_LEN(len); 249 cmlen = CMSG_COMPAT_LEN(len);
@@ -455,7 +460,7 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
455 struct timeval tv; 460 struct timeval tv;
456 461
457 if (!sock_flag(sk, SOCK_TIMESTAMP)) 462 if (!sock_flag(sk, SOCK_TIMESTAMP))
458 sock_enable_timestamp(sk); 463 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
459 tv = ktime_to_timeval(sk->sk_stamp); 464 tv = ktime_to_timeval(sk->sk_stamp);
460 if (tv.tv_sec == -1) 465 if (tv.tv_sec == -1)
461 return err; 466 return err;
@@ -479,7 +484,7 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta
479 struct timespec ts; 484 struct timespec ts;
480 485
481 if (!sock_flag(sk, SOCK_TIMESTAMP)) 486 if (!sock_flag(sk, SOCK_TIMESTAMP))
482 sock_enable_timestamp(sk); 487 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
483 ts = ktime_to_timespec(sk->sk_stamp); 488 ts = ktime_to_timespec(sk->sk_stamp);
484 if (ts.tv_sec == -1) 489 if (ts.tv_sec == -1)
485 return err; 490 return err;
diff --git a/net/core/Makefile b/net/core/Makefile
index 26a37cb31923..796f46eece5f 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -17,3 +17,6 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o
17obj-$(CONFIG_NETPOLL) += netpoll.o 17obj-$(CONFIG_NETPOLL) += netpoll.o
18obj-$(CONFIG_NET_DMA) += user_dma.o 18obj-$(CONFIG_NET_DMA) += user_dma.o
19obj-$(CONFIG_FIB_RULES) += fib_rules.o 19obj-$(CONFIG_FIB_RULES) += fib_rules.o
20obj-$(CONFIG_TRACEPOINTS) += net-traces.o
21obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
22
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 5e2ac0c4b07c..d0de644b378d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -208,7 +208,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
208 208
209void skb_free_datagram(struct sock *sk, struct sk_buff *skb) 209void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
210{ 210{
211 kfree_skb(skb); 211 consume_skb(skb);
212 sk_mem_reclaim_partial(sk); 212 sk_mem_reclaim_partial(sk);
213} 213}
214 214
diff --git a/net/core/dev.c b/net/core/dev.c
index f1129706ce7b..52fea5b28ca6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1457,7 +1457,9 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1457 ((features & NETIF_F_IP_CSUM) && 1457 ((features & NETIF_F_IP_CSUM) &&
1458 protocol == htons(ETH_P_IP)) || 1458 protocol == htons(ETH_P_IP)) ||
1459 ((features & NETIF_F_IPV6_CSUM) && 1459 ((features & NETIF_F_IPV6_CSUM) &&
1460 protocol == htons(ETH_P_IPV6))); 1460 protocol == htons(ETH_P_IPV6)) ||
1461 ((features & NETIF_F_FCOE_CRC) &&
1462 protocol == htons(ETH_P_FCOE)));
1461} 1463}
1462 1464
1463static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) 1465static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
@@ -1668,8 +1670,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1668 struct netdev_queue *txq) 1670 struct netdev_queue *txq)
1669{ 1671{
1670 const struct net_device_ops *ops = dev->netdev_ops; 1672 const struct net_device_ops *ops = dev->netdev_ops;
1673 int rc;
1671 1674
1672 prefetch(&dev->netdev_ops->ndo_start_xmit);
1673 if (likely(!skb->next)) { 1675 if (likely(!skb->next)) {
1674 if (!list_empty(&ptype_all)) 1676 if (!list_empty(&ptype_all))
1675 dev_queue_xmit_nit(skb, dev); 1677 dev_queue_xmit_nit(skb, dev);
@@ -1681,13 +1683,27 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1681 goto gso; 1683 goto gso;
1682 } 1684 }
1683 1685
1684 return ops->ndo_start_xmit(skb, dev); 1686 rc = ops->ndo_start_xmit(skb, dev);
1687 /*
1688 * TODO: if skb_orphan() was called by
1689 * dev->hard_start_xmit() (for example, the unmodified
1690 * igb driver does that; bnx2 doesn't), then
1691 * skb_tx_software_timestamp() will be unable to send
1692 * back the time stamp.
1693 *
1694 * How can this be prevented? Always create another
1695 * reference to the socket before calling
1696 * dev->hard_start_xmit()? Prevent that skb_orphan()
1697 * does anything in dev->hard_start_xmit() by clearing
1698 * the skb destructor before the call and restoring it
1699 * afterwards, then doing the skb_orphan() ourselves?
1700 */
1701 return rc;
1685 } 1702 }
1686 1703
1687gso: 1704gso:
1688 do { 1705 do {
1689 struct sk_buff *nskb = skb->next; 1706 struct sk_buff *nskb = skb->next;
1690 int rc;
1691 1707
1692 skb->next = nskb->next; 1708 skb->next = nskb->next;
1693 nskb->next = NULL; 1709 nskb->next = NULL;
@@ -1708,59 +1724,24 @@ out_kfree_skb:
1708 return 0; 1724 return 0;
1709} 1725}
1710 1726
1711static u32 simple_tx_hashrnd; 1727static u32 skb_tx_hashrnd;
1712static int simple_tx_hashrnd_initialized = 0;
1713 1728
1714static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) 1729u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
1715{ 1730{
1716 u32 addr1, addr2, ports; 1731 u32 hash;
1717 u32 hash, ihl;
1718 u8 ip_proto = 0;
1719
1720 if (unlikely(!simple_tx_hashrnd_initialized)) {
1721 get_random_bytes(&simple_tx_hashrnd, 4);
1722 simple_tx_hashrnd_initialized = 1;
1723 }
1724
1725 switch (skb->protocol) {
1726 case htons(ETH_P_IP):
1727 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1728 ip_proto = ip_hdr(skb)->protocol;
1729 addr1 = ip_hdr(skb)->saddr;
1730 addr2 = ip_hdr(skb)->daddr;
1731 ihl = ip_hdr(skb)->ihl;
1732 break;
1733 case htons(ETH_P_IPV6):
1734 ip_proto = ipv6_hdr(skb)->nexthdr;
1735 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1736 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
1737 ihl = (40 >> 2);
1738 break;
1739 default:
1740 return 0;
1741 }
1742
1743 1732
1744 switch (ip_proto) { 1733 if (skb_rx_queue_recorded(skb)) {
1745 case IPPROTO_TCP: 1734 hash = skb_get_rx_queue(skb);
1746 case IPPROTO_UDP: 1735 } else if (skb->sk && skb->sk->sk_hash) {
1747 case IPPROTO_DCCP: 1736 hash = skb->sk->sk_hash;
1748 case IPPROTO_ESP: 1737 } else
1749 case IPPROTO_AH: 1738 hash = skb->protocol;
1750 case IPPROTO_SCTP:
1751 case IPPROTO_UDPLITE:
1752 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
1753 break;
1754
1755 default:
1756 ports = 0;
1757 break;
1758 }
1759 1739
1760 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); 1740 hash = jhash_1word(hash, skb_tx_hashrnd);
1761 1741
1762 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); 1742 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
1763} 1743}
1744EXPORT_SYMBOL(skb_tx_hash);
1764 1745
1765static struct netdev_queue *dev_pick_tx(struct net_device *dev, 1746static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1766 struct sk_buff *skb) 1747 struct sk_buff *skb)
@@ -1771,7 +1752,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1771 if (ops->ndo_select_queue) 1752 if (ops->ndo_select_queue)
1772 queue_index = ops->ndo_select_queue(dev, skb); 1753 queue_index = ops->ndo_select_queue(dev, skb);
1773 else if (dev->real_num_tx_queues > 1) 1754 else if (dev->real_num_tx_queues > 1)
1774 queue_index = simple_tx_hash(dev, skb); 1755 queue_index = skb_tx_hash(dev, skb);
1775 1756
1776 skb_set_queue_mapping(skb, queue_index); 1757 skb_set_queue_mapping(skb, queue_index);
1777 return netdev_get_tx_queue(dev, queue_index); 1758 return netdev_get_tx_queue(dev, queue_index);
@@ -2297,6 +2278,8 @@ ncls:
2297 if (!skb) 2278 if (!skb)
2298 goto out; 2279 goto out;
2299 2280
2281 skb_orphan(skb);
2282
2300 type = skb->protocol; 2283 type = skb->protocol;
2301 list_for_each_entry_rcu(ptype, 2284 list_for_each_entry_rcu(ptype,
2302 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { 2285 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
@@ -2366,7 +2349,6 @@ static int napi_gro_complete(struct sk_buff *skb)
2366 2349
2367out: 2350out:
2368 skb_shinfo(skb)->gso_size = 0; 2351 skb_shinfo(skb)->gso_size = 0;
2369 __skb_push(skb, -skb_network_offset(skb));
2370 return netif_receive_skb(skb); 2352 return netif_receive_skb(skb);
2371} 2353}
2372 2354
@@ -2380,20 +2362,40 @@ void napi_gro_flush(struct napi_struct *napi)
2380 napi_gro_complete(skb); 2362 napi_gro_complete(skb);
2381 } 2363 }
2382 2364
2365 napi->gro_count = 0;
2383 napi->gro_list = NULL; 2366 napi->gro_list = NULL;
2384} 2367}
2385EXPORT_SYMBOL(napi_gro_flush); 2368EXPORT_SYMBOL(napi_gro_flush);
2386 2369
2370void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
2371{
2372 unsigned int offset = skb_gro_offset(skb);
2373
2374 hlen += offset;
2375 if (hlen <= skb_headlen(skb))
2376 return skb->data + offset;
2377
2378 if (unlikely(!skb_shinfo(skb)->nr_frags ||
2379 skb_shinfo(skb)->frags[0].size <=
2380 hlen - skb_headlen(skb) ||
2381 PageHighMem(skb_shinfo(skb)->frags[0].page)))
2382 return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
2383
2384 return page_address(skb_shinfo(skb)->frags[0].page) +
2385 skb_shinfo(skb)->frags[0].page_offset +
2386 offset - skb_headlen(skb);
2387}
2388EXPORT_SYMBOL(skb_gro_header);
2389
2387int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2390int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2388{ 2391{
2389 struct sk_buff **pp = NULL; 2392 struct sk_buff **pp = NULL;
2390 struct packet_type *ptype; 2393 struct packet_type *ptype;
2391 __be16 type = skb->protocol; 2394 __be16 type = skb->protocol;
2392 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; 2395 struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
2393 int count = 0;
2394 int same_flow; 2396 int same_flow;
2395 int mac_len; 2397 int mac_len;
2396 int free; 2398 int ret;
2397 2399
2398 if (!(skb->dev->features & NETIF_F_GRO)) 2400 if (!(skb->dev->features & NETIF_F_GRO))
2399 goto normal; 2401 goto normal;
@@ -2403,30 +2405,16 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2403 2405
2404 rcu_read_lock(); 2406 rcu_read_lock();
2405 list_for_each_entry_rcu(ptype, head, list) { 2407 list_for_each_entry_rcu(ptype, head, list) {
2406 struct sk_buff *p;
2407
2408 if (ptype->type != type || ptype->dev || !ptype->gro_receive) 2408 if (ptype->type != type || ptype->dev || !ptype->gro_receive)
2409 continue; 2409 continue;
2410 2410
2411 skb_reset_network_header(skb); 2411 skb_set_network_header(skb, skb_gro_offset(skb));
2412 mac_len = skb->network_header - skb->mac_header; 2412 mac_len = skb->network_header - skb->mac_header;
2413 skb->mac_len = mac_len; 2413 skb->mac_len = mac_len;
2414 NAPI_GRO_CB(skb)->same_flow = 0; 2414 NAPI_GRO_CB(skb)->same_flow = 0;
2415 NAPI_GRO_CB(skb)->flush = 0; 2415 NAPI_GRO_CB(skb)->flush = 0;
2416 NAPI_GRO_CB(skb)->free = 0; 2416 NAPI_GRO_CB(skb)->free = 0;
2417 2417
2418 for (p = napi->gro_list; p; p = p->next) {
2419 count++;
2420
2421 if (!NAPI_GRO_CB(p)->same_flow)
2422 continue;
2423
2424 if (p->mac_len != mac_len ||
2425 memcmp(skb_mac_header(p), skb_mac_header(skb),
2426 mac_len))
2427 NAPI_GRO_CB(p)->same_flow = 0;
2428 }
2429
2430 pp = ptype->gro_receive(&napi->gro_list, skb); 2418 pp = ptype->gro_receive(&napi->gro_list, skb);
2431 break; 2419 break;
2432 } 2420 }
@@ -2436,7 +2424,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2436 goto normal; 2424 goto normal;
2437 2425
2438 same_flow = NAPI_GRO_CB(skb)->same_flow; 2426 same_flow = NAPI_GRO_CB(skb)->same_flow;
2439 free = NAPI_GRO_CB(skb)->free; 2427 ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
2440 2428
2441 if (pp) { 2429 if (pp) {
2442 struct sk_buff *nskb = *pp; 2430 struct sk_buff *nskb = *pp;
@@ -2444,27 +2432,35 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2444 *pp = nskb->next; 2432 *pp = nskb->next;
2445 nskb->next = NULL; 2433 nskb->next = NULL;
2446 napi_gro_complete(nskb); 2434 napi_gro_complete(nskb);
2447 count--; 2435 napi->gro_count--;
2448 } 2436 }
2449 2437
2450 if (same_flow) 2438 if (same_flow)
2451 goto ok; 2439 goto ok;
2452 2440
2453 if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { 2441 if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
2454 __skb_push(skb, -skb_network_offset(skb));
2455 goto normal; 2442 goto normal;
2456 }
2457 2443
2444 napi->gro_count++;
2458 NAPI_GRO_CB(skb)->count = 1; 2445 NAPI_GRO_CB(skb)->count = 1;
2459 skb_shinfo(skb)->gso_size = skb->len; 2446 skb_shinfo(skb)->gso_size = skb_gro_len(skb);
2460 skb->next = napi->gro_list; 2447 skb->next = napi->gro_list;
2461 napi->gro_list = skb; 2448 napi->gro_list = skb;
2449 ret = GRO_HELD;
2450
2451pull:
2452 if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
2453 if (napi->gro_list == skb)
2454 napi->gro_list = skb->next;
2455 ret = GRO_DROP;
2456 }
2462 2457
2463ok: 2458ok:
2464 return free; 2459 return ret;
2465 2460
2466normal: 2461normal:
2467 return -1; 2462 ret = GRO_NORMAL;
2463 goto pull;
2468} 2464}
2469EXPORT_SYMBOL(dev_gro_receive); 2465EXPORT_SYMBOL(dev_gro_receive);
2470 2466
@@ -2472,29 +2468,44 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2472{ 2468{
2473 struct sk_buff *p; 2469 struct sk_buff *p;
2474 2470
2471 if (netpoll_rx_on(skb))
2472 return GRO_NORMAL;
2473
2475 for (p = napi->gro_list; p; p = p->next) { 2474 for (p = napi->gro_list; p; p = p->next) {
2476 NAPI_GRO_CB(p)->same_flow = 1; 2475 NAPI_GRO_CB(p)->same_flow = !compare_ether_header(
2476 skb_mac_header(p), skb_gro_mac_header(skb));
2477 NAPI_GRO_CB(p)->flush = 0; 2477 NAPI_GRO_CB(p)->flush = 0;
2478 } 2478 }
2479 2479
2480 return dev_gro_receive(napi, skb); 2480 return dev_gro_receive(napi, skb);
2481} 2481}
2482 2482
2483int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 2483int napi_skb_finish(int ret, struct sk_buff *skb)
2484{ 2484{
2485 if (netpoll_receive_skb(skb)) 2485 int err = NET_RX_SUCCESS;
2486 return NET_RX_DROP;
2487 2486
2488 switch (__napi_gro_receive(napi, skb)) { 2487 switch (ret) {
2489 case -1: 2488 case GRO_NORMAL:
2490 return netif_receive_skb(skb); 2489 return netif_receive_skb(skb);
2491 2490
2492 case 1: 2491 case GRO_DROP:
2492 err = NET_RX_DROP;
2493 /* fall through */
2494
2495 case GRO_MERGED_FREE:
2493 kfree_skb(skb); 2496 kfree_skb(skb);
2494 break; 2497 break;
2495 } 2498 }
2496 2499
2497 return NET_RX_SUCCESS; 2500 return err;
2501}
2502EXPORT_SYMBOL(napi_skb_finish);
2503
2504int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
2505{
2506 skb_gro_reset_offset(skb);
2507
2508 return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
2498} 2509}
2499EXPORT_SYMBOL(napi_gro_receive); 2510EXPORT_SYMBOL(napi_gro_receive);
2500 2511
@@ -2512,6 +2523,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
2512{ 2523{
2513 struct net_device *dev = napi->dev; 2524 struct net_device *dev = napi->dev;
2514 struct sk_buff *skb = napi->skb; 2525 struct sk_buff *skb = napi->skb;
2526 struct ethhdr *eth;
2527 skb_frag_t *frag;
2528 int i;
2515 2529
2516 napi->skb = NULL; 2530 napi->skb = NULL;
2517 2531
@@ -2524,20 +2538,36 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
2524 } 2538 }
2525 2539
2526 BUG_ON(info->nr_frags > MAX_SKB_FRAGS); 2540 BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
2541 frag = &info->frags[info->nr_frags - 1];
2542
2543 for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) {
2544 skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
2545 frag->size);
2546 frag++;
2547 }
2527 skb_shinfo(skb)->nr_frags = info->nr_frags; 2548 skb_shinfo(skb)->nr_frags = info->nr_frags;
2528 memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
2529 2549
2530 skb->data_len = info->len; 2550 skb->data_len = info->len;
2531 skb->len += info->len; 2551 skb->len += info->len;
2532 skb->truesize += info->len; 2552 skb->truesize += info->len;
2533 2553
2534 if (!pskb_may_pull(skb, ETH_HLEN)) { 2554 skb_reset_mac_header(skb);
2555 skb_gro_reset_offset(skb);
2556
2557 eth = skb_gro_header(skb, sizeof(*eth));
2558 if (!eth) {
2535 napi_reuse_skb(napi, skb); 2559 napi_reuse_skb(napi, skb);
2536 skb = NULL; 2560 skb = NULL;
2537 goto out; 2561 goto out;
2538 } 2562 }
2539 2563
2540 skb->protocol = eth_type_trans(skb, dev); 2564 skb_gro_pull(skb, sizeof(*eth));
2565
2566 /*
2567 * This works because the only protocols we care about don't require
2568 * special handling. We'll fix it up properly at the end.
2569 */
2570 skb->protocol = eth->h_proto;
2541 2571
2542 skb->ip_summed = info->ip_summed; 2572 skb->ip_summed = info->ip_summed;
2543 skb->csum = info->csum; 2573 skb->csum = info->csum;
@@ -2547,32 +2577,43 @@ out:
2547} 2577}
2548EXPORT_SYMBOL(napi_fraginfo_skb); 2578EXPORT_SYMBOL(napi_fraginfo_skb);
2549 2579
2550int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) 2580int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
2551{ 2581{
2552 struct sk_buff *skb = napi_fraginfo_skb(napi, info); 2582 int err = NET_RX_SUCCESS;
2553 int err = NET_RX_DROP;
2554 2583
2555 if (!skb) 2584 switch (ret) {
2556 goto out; 2585 case GRO_NORMAL:
2586 case GRO_HELD:
2587 skb->protocol = eth_type_trans(skb, napi->dev);
2557 2588
2558 if (netpoll_receive_skb(skb)) 2589 if (ret == GRO_NORMAL)
2559 goto out; 2590 return netif_receive_skb(skb);
2560 2591
2561 err = NET_RX_SUCCESS; 2592 skb_gro_pull(skb, -ETH_HLEN);
2593 break;
2562 2594
2563 switch (__napi_gro_receive(napi, skb)) { 2595 case GRO_DROP:
2564 case -1: 2596 err = NET_RX_DROP;
2565 return netif_receive_skb(skb); 2597 /* fall through */
2566 2598
2567 case 0: 2599 case GRO_MERGED_FREE:
2568 goto out; 2600 napi_reuse_skb(napi, skb);
2601 break;
2569 } 2602 }
2570 2603
2571 napi_reuse_skb(napi, skb);
2572
2573out:
2574 return err; 2604 return err;
2575} 2605}
2606EXPORT_SYMBOL(napi_frags_finish);
2607
2608int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
2609{
2610 struct sk_buff *skb = napi_fraginfo_skb(napi, info);
2611
2612 if (!skb)
2613 return NET_RX_DROP;
2614
2615 return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
2616}
2576EXPORT_SYMBOL(napi_gro_frags); 2617EXPORT_SYMBOL(napi_gro_frags);
2577 2618
2578static int process_backlog(struct napi_struct *napi, int quota) 2619static int process_backlog(struct napi_struct *napi, int quota)
@@ -2594,11 +2635,9 @@ static int process_backlog(struct napi_struct *napi, int quota)
2594 } 2635 }
2595 local_irq_enable(); 2636 local_irq_enable();
2596 2637
2597 napi_gro_receive(napi, skb); 2638 netif_receive_skb(skb);
2598 } while (++work < quota && jiffies == start_time); 2639 } while (++work < quota && jiffies == start_time);
2599 2640
2600 napi_gro_flush(napi);
2601
2602 return work; 2641 return work;
2603} 2642}
2604 2643
@@ -2652,6 +2691,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
2652 int (*poll)(struct napi_struct *, int), int weight) 2691 int (*poll)(struct napi_struct *, int), int weight)
2653{ 2692{
2654 INIT_LIST_HEAD(&napi->poll_list); 2693 INIT_LIST_HEAD(&napi->poll_list);
2694 napi->gro_count = 0;
2655 napi->gro_list = NULL; 2695 napi->gro_list = NULL;
2656 napi->skb = NULL; 2696 napi->skb = NULL;
2657 napi->poll = poll; 2697 napi->poll = poll;
@@ -2671,7 +2711,7 @@ void netif_napi_del(struct napi_struct *napi)
2671 struct sk_buff *skb, *next; 2711 struct sk_buff *skb, *next;
2672 2712
2673 list_del_init(&napi->dev_list); 2713 list_del_init(&napi->dev_list);
2674 kfree(napi->skb); 2714 kfree_skb(napi->skb);
2675 2715
2676 for (skb = napi->gro_list; skb; skb = next) { 2716 for (skb = napi->gro_list; skb; skb = next) {
2677 next = skb->next; 2717 next = skb->next;
@@ -2680,6 +2720,7 @@ void netif_napi_del(struct napi_struct *napi)
2680 } 2720 }
2681 2721
2682 napi->gro_list = NULL; 2722 napi->gro_list = NULL;
2723 napi->gro_count = 0;
2683} 2724}
2684EXPORT_SYMBOL(netif_napi_del); 2725EXPORT_SYMBOL(netif_napi_del);
2685 2726
@@ -3948,6 +3989,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3948 cmd == SIOCSMIIREG || 3989 cmd == SIOCSMIIREG ||
3949 cmd == SIOCBRADDIF || 3990 cmd == SIOCBRADDIF ||
3950 cmd == SIOCBRDELIF || 3991 cmd == SIOCBRDELIF ||
3992 cmd == SIOCSHWTSTAMP ||
3951 cmd == SIOCWANDEV) { 3993 cmd == SIOCWANDEV) {
3952 err = -EOPNOTSUPP; 3994 err = -EOPNOTSUPP;
3953 if (ops->ndo_do_ioctl) { 3995 if (ops->ndo_do_ioctl) {
@@ -4102,6 +4144,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4102 case SIOCBONDCHANGEACTIVE: 4144 case SIOCBONDCHANGEACTIVE:
4103 case SIOCBRADDIF: 4145 case SIOCBRADDIF:
4104 case SIOCBRDELIF: 4146 case SIOCBRDELIF:
4147 case SIOCSHWTSTAMP:
4105 if (!capable(CAP_NET_ADMIN)) 4148 if (!capable(CAP_NET_ADMIN))
4106 return -EPERM; 4149 return -EPERM;
4107 /* fall through */ 4150 /* fall through */
@@ -5198,6 +5241,7 @@ static int __init net_dev_init(void)
5198 queue->backlog.poll = process_backlog; 5241 queue->backlog.poll = process_backlog;
5199 queue->backlog.weight = weight_p; 5242 queue->backlog.weight = weight_p;
5200 queue->backlog.gro_list = NULL; 5243 queue->backlog.gro_list = NULL;
5244 queue->backlog.gro_count = 0;
5201 } 5245 }
5202 5246
5203 dev_boot_phase = 0; 5247 dev_boot_phase = 0;
@@ -5230,6 +5274,14 @@ out:
5230 5274
5231subsys_initcall(net_dev_init); 5275subsys_initcall(net_dev_init);
5232 5276
5277static int __init initialize_hashrnd(void)
5278{
5279 get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
5280 return 0;
5281}
5282
5283late_initcall_sync(initialize_hashrnd);
5284
5233EXPORT_SYMBOL(__dev_get_by_index); 5285EXPORT_SYMBOL(__dev_get_by_index);
5234EXPORT_SYMBOL(__dev_get_by_name); 5286EXPORT_SYMBOL(__dev_get_by_name);
5235EXPORT_SYMBOL(__dev_remove_pack); 5287EXPORT_SYMBOL(__dev_remove_pack);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
new file mode 100644
index 000000000000..9fd0dc3cca99
--- /dev/null
+++ b/net/core/drop_monitor.c
@@ -0,0 +1,263 @@
1/*
2 * Monitoring code for network dropped packet alerts
3 *
4 * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
5 */
6
7#include <linux/netdevice.h>
8#include <linux/etherdevice.h>
9#include <linux/string.h>
10#include <linux/if_arp.h>
11#include <linux/inetdevice.h>
12#include <linux/inet.h>
13#include <linux/interrupt.h>
14#include <linux/netpoll.h>
15#include <linux/sched.h>
16#include <linux/delay.h>
17#include <linux/types.h>
18#include <linux/workqueue.h>
19#include <linux/netlink.h>
20#include <linux/net_dropmon.h>
21#include <linux/percpu.h>
22#include <linux/timer.h>
23#include <linux/bitops.h>
24#include <net/genetlink.h>
25
26#include <trace/skb.h>
27
28#include <asm/unaligned.h>
29
30#define TRACE_ON 1
31#define TRACE_OFF 0
32
33static void send_dm_alert(struct work_struct *unused);
34
35
36/*
37 * Globals, our netlink socket pointer
38 * and the work handle that will send up
39 * netlink alerts
40 */
41struct sock *dm_sock;
42
43struct per_cpu_dm_data {
44 struct work_struct dm_alert_work;
45 struct sk_buff *skb;
46 atomic_t dm_hit_count;
47 struct timer_list send_timer;
48};
49
50static struct genl_family net_drop_monitor_family = {
51 .id = GENL_ID_GENERATE,
52 .hdrsize = 0,
53 .name = "NET_DM",
54 .version = 1,
55 .maxattr = NET_DM_CMD_MAX,
56};
57
58static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
59
60static int dm_hit_limit = 64;
61static int dm_delay = 1;
62
63
64static void reset_per_cpu_data(struct per_cpu_dm_data *data)
65{
66 size_t al;
67 struct net_dm_alert_msg *msg;
68
69 al = sizeof(struct net_dm_alert_msg);
70 al += dm_hit_limit * sizeof(struct net_dm_drop_point);
71 data->skb = genlmsg_new(al, GFP_KERNEL);
72 genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
73 0, NET_DM_CMD_ALERT);
74 msg = __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_alert_msg));
75 memset(msg, 0, al);
76 atomic_set(&data->dm_hit_count, dm_hit_limit);
77}
78
79static void send_dm_alert(struct work_struct *unused)
80{
81 struct sk_buff *skb;
82 struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
83
84 /*
85 * Grab the skb we're about to send
86 */
87 skb = data->skb;
88
89 /*
90 * Replace it with a new one
91 */
92 reset_per_cpu_data(data);
93
94 /*
95 * Ship it!
96 */
97 genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
98
99}
100
101/*
102 * This is the timer function to delay the sending of an alert
103 * in the event that more drops will arrive during the
104 * hysteresis period. Note that it operates under the timer interrupt
105 * so we don't need to disable preemption here
106 */
107static void sched_send_work(unsigned long unused)
108{
109 struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
110
111 schedule_work(&data->dm_alert_work);
112}
113
114static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
115{
116 struct net_dm_alert_msg *msg;
117 struct nlmsghdr *nlh;
118 int i;
119 struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
120
121
122 if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
123 /*
124 * we're already at zero, discard this hit
125 */
126 goto out;
127 }
128
129 nlh = (struct nlmsghdr *)data->skb->data;
130 msg = genlmsg_data(nlmsg_data(nlh));
131 for (i = 0; i < msg->entries; i++) {
132 if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
133 msg->points[i].count++;
134 goto out;
135 }
136 }
137
138 /*
139 * We need to create a new entry
140 */
141 __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
142 memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
143 msg->points[msg->entries].count = 1;
144 msg->entries++;
145
146 if (!timer_pending(&data->send_timer)) {
147 data->send_timer.expires = jiffies + dm_delay * HZ;
148 add_timer_on(&data->send_timer, smp_processor_id());
149 }
150
151out:
152 return;
153}
154
155static int set_all_monitor_traces(int state)
156{
157 int rc = 0;
158
159 switch (state) {
160 case TRACE_ON:
161 rc |= register_trace_kfree_skb(trace_kfree_skb_hit);
162 break;
163 case TRACE_OFF:
164 rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit);
165
166 tracepoint_synchronize_unregister();
167 break;
168 default:
169 rc = 1;
170 break;
171 }
172
173 if (rc)
174 return -EINPROGRESS;
175 return rc;
176}
177
178
179static int net_dm_cmd_config(struct sk_buff *skb,
180 struct genl_info *info)
181{
182 return -ENOTSUPP;
183}
184
185static int net_dm_cmd_trace(struct sk_buff *skb,
186 struct genl_info *info)
187{
188 switch (info->genlhdr->cmd) {
189 case NET_DM_CMD_START:
190 return set_all_monitor_traces(TRACE_ON);
191 break;
192 case NET_DM_CMD_STOP:
193 return set_all_monitor_traces(TRACE_OFF);
194 break;
195 }
196
197 return -ENOTSUPP;
198}
199
200
201static struct genl_ops dropmon_ops[] = {
202 {
203 .cmd = NET_DM_CMD_CONFIG,
204 .doit = net_dm_cmd_config,
205 },
206 {
207 .cmd = NET_DM_CMD_START,
208 .doit = net_dm_cmd_trace,
209 },
210 {
211 .cmd = NET_DM_CMD_STOP,
212 .doit = net_dm_cmd_trace,
213 },
214};
215
216static int __init init_net_drop_monitor(void)
217{
218 int cpu;
219 int rc, i, ret;
220 struct per_cpu_dm_data *data;
221 printk(KERN_INFO "Initalizing network drop monitor service\n");
222
223 if (sizeof(void *) > 8) {
224 printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
225 return -ENOSPC;
226 }
227
228 if (genl_register_family(&net_drop_monitor_family) < 0) {
229 printk(KERN_ERR "Could not create drop monitor netlink family\n");
230 return -EFAULT;
231 }
232
233 rc = -EFAULT;
234
235 for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) {
236 ret = genl_register_ops(&net_drop_monitor_family,
237 &dropmon_ops[i]);
238 if (ret) {
239 printk(KERN_CRIT "failed to register operation %d\n",
240 dropmon_ops[i].cmd);
241 goto out_unreg;
242 }
243 }
244
245 rc = 0;
246
247 for_each_present_cpu(cpu) {
248 data = &per_cpu(dm_cpu_data, cpu);
249 reset_per_cpu_data(data);
250 INIT_WORK(&data->dm_alert_work, send_dm_alert);
251 init_timer(&data->send_timer);
252 data->send_timer.data = cpu;
253 data->send_timer.function = sched_send_work;
254 }
255 goto out;
256
257out_unreg:
258 genl_unregister_family(&net_drop_monitor_family);
259out:
260 return rc;
261}
262
263late_initcall(init_net_drop_monitor);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 947710a36ced..244ca56dffac 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -209,34 +209,62 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
209 return 0; 209 return 0;
210} 210}
211 211
212static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr) 212static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
213{ 213{
214 struct ethtool_rxnfc cmd; 214 struct ethtool_rxnfc cmd;
215 215
216 if (!dev->ethtool_ops->set_rxhash) 216 if (!dev->ethtool_ops->set_rxnfc)
217 return -EOPNOTSUPP; 217 return -EOPNOTSUPP;
218 218
219 if (copy_from_user(&cmd, useraddr, sizeof(cmd))) 219 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
220 return -EFAULT; 220 return -EFAULT;
221 221
222 return dev->ethtool_ops->set_rxhash(dev, &cmd); 222 return dev->ethtool_ops->set_rxnfc(dev, &cmd);
223} 223}
224 224
225static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr) 225static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
226{ 226{
227 struct ethtool_rxnfc info; 227 struct ethtool_rxnfc info;
228 const struct ethtool_ops *ops = dev->ethtool_ops;
229 int ret;
230 void *rule_buf = NULL;
228 231
229 if (!dev->ethtool_ops->get_rxhash) 232 if (!ops->get_rxnfc)
230 return -EOPNOTSUPP; 233 return -EOPNOTSUPP;
231 234
232 if (copy_from_user(&info, useraddr, sizeof(info))) 235 if (copy_from_user(&info, useraddr, sizeof(info)))
233 return -EFAULT; 236 return -EFAULT;
234 237
235 dev->ethtool_ops->get_rxhash(dev, &info); 238 if (info.cmd == ETHTOOL_GRXCLSRLALL) {
239 if (info.rule_cnt > 0) {
240 rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
241 GFP_USER);
242 if (!rule_buf)
243 return -ENOMEM;
244 }
245 }
236 246
247 ret = ops->get_rxnfc(dev, &info, rule_buf);
248 if (ret < 0)
249 goto err_out;
250
251 ret = -EFAULT;
237 if (copy_to_user(useraddr, &info, sizeof(info))) 252 if (copy_to_user(useraddr, &info, sizeof(info)))
238 return -EFAULT; 253 goto err_out;
239 return 0; 254
255 if (rule_buf) {
256 useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
257 if (copy_to_user(useraddr, rule_buf,
258 info.rule_cnt * sizeof(u32)))
259 goto err_out;
260 }
261 ret = 0;
262
263err_out:
264 if (rule_buf)
265 kfree(rule_buf);
266
267 return ret;
240} 268}
241 269
242static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) 270static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
@@ -901,6 +929,10 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
901 case ETHTOOL_GFLAGS: 929 case ETHTOOL_GFLAGS:
902 case ETHTOOL_GPFLAGS: 930 case ETHTOOL_GPFLAGS:
903 case ETHTOOL_GRXFH: 931 case ETHTOOL_GRXFH:
932 case ETHTOOL_GRXRINGS:
933 case ETHTOOL_GRXCLSRLCNT:
934 case ETHTOOL_GRXCLSRULE:
935 case ETHTOOL_GRXCLSRLALL:
904 break; 936 break;
905 default: 937 default:
906 if (!capable(CAP_NET_ADMIN)) 938 if (!capable(CAP_NET_ADMIN))
@@ -1052,10 +1084,16 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1052 dev->ethtool_ops->set_priv_flags); 1084 dev->ethtool_ops->set_priv_flags);
1053 break; 1085 break;
1054 case ETHTOOL_GRXFH: 1086 case ETHTOOL_GRXFH:
1055 rc = ethtool_get_rxhash(dev, useraddr); 1087 case ETHTOOL_GRXRINGS:
1088 case ETHTOOL_GRXCLSRLCNT:
1089 case ETHTOOL_GRXCLSRULE:
1090 case ETHTOOL_GRXCLSRLALL:
1091 rc = ethtool_get_rxnfc(dev, useraddr);
1056 break; 1092 break;
1057 case ETHTOOL_SRXFH: 1093 case ETHTOOL_SRXFH:
1058 rc = ethtool_set_rxhash(dev, useraddr); 1094 case ETHTOOL_SRXCLSRLDEL:
1095 case ETHTOOL_SRXCLSRLINS:
1096 rc = ethtool_set_rxnfc(dev, useraddr);
1059 break; 1097 break;
1060 case ETHTOOL_GGRO: 1098 case ETHTOOL_GGRO:
1061 rc = ethtool_get_gro(dev, useraddr); 1099 rc = ethtool_get_gro(dev, useraddr);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 32b3a0152d7a..98691e1466b8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -588,7 +588,8 @@ static void notify_rule_change(int event, struct fib_rule *rule,
588 goto errout; 588 goto errout;
589 } 589 }
590 590
591 err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); 591 rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
592 return;
592errout: 593errout:
593 if (err < 0) 594 if (err < 0)
594 rtnl_set_sk_err(net, ops->nlgroup, err); 595 rtnl_set_sk_err(net, ops->nlgroup, err);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 278a142d1047..a1cbce7fdae5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -871,8 +871,7 @@ static void neigh_timer_handler(unsigned long arg)
871 write_unlock(&neigh->lock); 871 write_unlock(&neigh->lock);
872 neigh->ops->solicit(neigh, skb); 872 neigh->ops->solicit(neigh, skb);
873 atomic_inc(&neigh->probes); 873 atomic_inc(&neigh->probes);
874 if (skb) 874 kfree_skb(skb);
875 kfree_skb(skb);
876 } else { 875 } else {
877out: 876out:
878 write_unlock(&neigh->lock); 877 write_unlock(&neigh->lock);
@@ -908,8 +907,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
908 neigh->updated = jiffies; 907 neigh->updated = jiffies;
909 write_unlock_bh(&neigh->lock); 908 write_unlock_bh(&neigh->lock);
910 909
911 if (skb) 910 kfree_skb(skb);
912 kfree_skb(skb);
913 return 1; 911 return 1;
914 } 912 }
915 } else if (neigh->nud_state & NUD_STALE) { 913 } else if (neigh->nud_state & NUD_STALE) {
@@ -1656,7 +1654,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1656 flags &= ~NEIGH_UPDATE_F_OVERRIDE; 1654 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1657 } 1655 }
1658 1656
1659 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); 1657 if (ndm->ndm_flags & NTF_USE) {
1658 neigh_event_send(neigh, NULL);
1659 err = 0;
1660 } else
1661 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1660 neigh_release(neigh); 1662 neigh_release(neigh);
1661 goto out_dev_put; 1663 goto out_dev_put;
1662 } 1664 }
@@ -2534,7 +2536,8 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
2534 kfree_skb(skb); 2536 kfree_skb(skb);
2535 goto errout; 2537 goto errout;
2536 } 2538 }
2537 err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 2539 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2540 return;
2538errout: 2541errout:
2539 if (err < 0) 2542 if (err < 0)
2540 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 2543 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 484f58750eba..2da59a0ac4ac 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -498,7 +498,7 @@ int netdev_register_kobject(struct net_device *net)
498 dev->groups = groups; 498 dev->groups = groups;
499 499
500 BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); 500 BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
501 dev_set_name(dev, net->name); 501 dev_set_name(dev, "%s", net->name);
502 502
503#ifdef CONFIG_SYSFS 503#ifdef CONFIG_SYSFS
504 *groups++ = &netstat_group; 504 *groups++ = &netstat_group;
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
new file mode 100644
index 000000000000..c8fb45665e4f
--- /dev/null
+++ b/net/core/net-traces.c
@@ -0,0 +1,29 @@
1/*
2 * consolidates trace point definitions
3 *
4 * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
5 */
6
7#include <linux/netdevice.h>
8#include <linux/etherdevice.h>
9#include <linux/string.h>
10#include <linux/if_arp.h>
11#include <linux/inetdevice.h>
12#include <linux/inet.h>
13#include <linux/interrupt.h>
14#include <linux/netpoll.h>
15#include <linux/sched.h>
16#include <linux/delay.h>
17#include <linux/rcupdate.h>
18#include <linux/types.h>
19#include <linux/workqueue.h>
20#include <linux/netlink.h>
21#include <linux/net_dropmon.h>
22#include <trace/skb.h>
23
24#include <asm/unaligned.h>
25#include <asm/bitops.h>
26
27
28DEFINE_TRACE(kfree_skb);
29EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 65498483325a..32d419f5ac98 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3275,8 +3275,7 @@ static void pktgen_stop(struct pktgen_thread *t)
3275 3275
3276 list_for_each_entry(pkt_dev, &t->if_list, list) { 3276 list_for_each_entry(pkt_dev, &t->if_list, list) {
3277 pktgen_stop_device(pkt_dev); 3277 pktgen_stop_device(pkt_dev);
3278 if (pkt_dev->skb) 3278 kfree_skb(pkt_dev->skb);
3279 kfree_skb(pkt_dev->skb);
3280 3279
3281 pkt_dev->skb = NULL; 3280 pkt_dev->skb = NULL;
3282 } 3281 }
@@ -3303,8 +3302,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
3303 if (!cur->removal_mark) 3302 if (!cur->removal_mark)
3304 continue; 3303 continue;
3305 3304
3306 if (cur->skb) 3305 kfree_skb(cur->skb);
3307 kfree_skb(cur->skb);
3308 cur->skb = NULL; 3306 cur->skb = NULL;
3309 3307
3310 pktgen_remove_device(t, cur); 3308 pktgen_remove_device(t, cur);
@@ -3328,8 +3326,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
3328 list_for_each_safe(q, n, &t->if_list) { 3326 list_for_each_safe(q, n, &t->if_list) {
3329 cur = list_entry(q, struct pktgen_dev, list); 3327 cur = list_entry(q, struct pktgen_dev, list);
3330 3328
3331 if (cur->skb) 3329 kfree_skb(cur->skb);
3332 kfree_skb(cur->skb);
3333 cur->skb = NULL; 3330 cur->skb = NULL;
3334 3331
3335 pktgen_remove_device(t, cur); 3332 pktgen_remove_device(t, cur);
@@ -3393,8 +3390,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3393 3390
3394 if (!netif_running(odev)) { 3391 if (!netif_running(odev)) {
3395 pktgen_stop_device(pkt_dev); 3392 pktgen_stop_device(pkt_dev);
3396 if (pkt_dev->skb) 3393 kfree_skb(pkt_dev->skb);
3397 kfree_skb(pkt_dev->skb);
3398 pkt_dev->skb = NULL; 3394 pkt_dev->skb = NULL;
3399 goto out; 3395 goto out;
3400 } 3396 }
@@ -3415,8 +3411,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3415 if ((++pkt_dev->clone_count >= pkt_dev->clone_skb) 3411 if ((++pkt_dev->clone_count >= pkt_dev->clone_skb)
3416 || (!pkt_dev->skb)) { 3412 || (!pkt_dev->skb)) {
3417 /* build a new pkt */ 3413 /* build a new pkt */
3418 if (pkt_dev->skb) 3414 kfree_skb(pkt_dev->skb);
3419 kfree_skb(pkt_dev->skb);
3420 3415
3421 pkt_dev->skb = fill_packet(odev, pkt_dev); 3416 pkt_dev->skb = fill_packet(odev, pkt_dev);
3422 if (pkt_dev->skb == NULL) { 3417 if (pkt_dev->skb == NULL) {
@@ -3498,8 +3493,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
3498 3493
3499 /* Done with this */ 3494 /* Done with this */
3500 pktgen_stop_device(pkt_dev); 3495 pktgen_stop_device(pkt_dev);
3501 if (pkt_dev->skb) 3496 kfree_skb(pkt_dev->skb);
3502 kfree_skb(pkt_dev->skb);
3503 pkt_dev->skb = NULL; 3497 pkt_dev->skb = NULL;
3504 } 3498 }
3505out:; 3499out:;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 790dd205bb5d..d78030f88bd0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -455,8 +455,8 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
455 return nlmsg_unicast(rtnl, skb, pid); 455 return nlmsg_unicast(rtnl, skb, pid);
456} 456}
457 457
458int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, 458void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
459 struct nlmsghdr *nlh, gfp_t flags) 459 struct nlmsghdr *nlh, gfp_t flags)
460{ 460{
461 struct sock *rtnl = net->rtnl; 461 struct sock *rtnl = net->rtnl;
462 int report = 0; 462 int report = 0;
@@ -464,7 +464,7 @@ int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
464 if (nlh) 464 if (nlh)
465 report = nlmsg_report(nlh); 465 report = nlmsg_report(nlh);
466 466
467 return nlmsg_notify(rtnl, skb, pid, group, report, flags); 467 nlmsg_notify(rtnl, skb, pid, group, report, flags);
468} 468}
469 469
470void rtnl_set_sk_err(struct net *net, u32 group, int error) 470void rtnl_set_sk_err(struct net *net, u32 group, int error)
@@ -1246,7 +1246,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
1246 kfree_skb(skb); 1246 kfree_skb(skb);
1247 goto errout; 1247 goto errout;
1248 } 1248 }
1249 err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); 1249 rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
1250 return;
1250errout: 1251errout:
1251 if (err < 0) 1252 if (err < 0)
1252 rtnl_set_sk_err(net, RTNLGRP_LINK, err); 1253 rtnl_set_sk_err(net, RTNLGRP_LINK, err);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c6a6b166f8d6..6acbf9e79eb1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -55,6 +55,7 @@
55#include <linux/rtnetlink.h> 55#include <linux/rtnetlink.h>
56#include <linux/init.h> 56#include <linux/init.h>
57#include <linux/scatterlist.h> 57#include <linux/scatterlist.h>
58#include <linux/errqueue.h>
58 59
59#include <net/protocol.h> 60#include <net/protocol.h>
60#include <net/dst.h> 61#include <net/dst.h>
@@ -64,6 +65,7 @@
64 65
65#include <asm/uaccess.h> 66#include <asm/uaccess.h>
66#include <asm/system.h> 67#include <asm/system.h>
68#include <trace/skb.h>
67 69
68#include "kmap_skb.h" 70#include "kmap_skb.h"
69 71
@@ -123,6 +125,7 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
123 skb->dev ? skb->dev->name : "<NULL>"); 125 skb->dev ? skb->dev->name : "<NULL>");
124 BUG(); 126 BUG();
125} 127}
128EXPORT_SYMBOL(skb_over_panic);
126 129
127/** 130/**
128 * skb_under_panic - private function 131 * skb_under_panic - private function
@@ -142,6 +145,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
142 skb->dev ? skb->dev->name : "<NULL>"); 145 skb->dev ? skb->dev->name : "<NULL>");
143 BUG(); 146 BUG();
144} 147}
148EXPORT_SYMBOL(skb_under_panic);
145 149
146/* Allocate a new skbuff. We do this ourselves so we can fill in a few 150/* Allocate a new skbuff. We do this ourselves so we can fill in a few
147 * 'private' fields and also do memory statistics to find all the 151 * 'private' fields and also do memory statistics to find all the
@@ -205,7 +209,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
205 shinfo->gso_segs = 0; 209 shinfo->gso_segs = 0;
206 shinfo->gso_type = 0; 210 shinfo->gso_type = 0;
207 shinfo->ip6_frag_id = 0; 211 shinfo->ip6_frag_id = 0;
212 shinfo->tx_flags.flags = 0;
208 shinfo->frag_list = NULL; 213 shinfo->frag_list = NULL;
214 memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
209 215
210 if (fclone) { 216 if (fclone) {
211 struct sk_buff *child = skb + 1; 217 struct sk_buff *child = skb + 1;
@@ -223,6 +229,7 @@ nodata:
223 skb = NULL; 229 skb = NULL;
224 goto out; 230 goto out;
225} 231}
232EXPORT_SYMBOL(__alloc_skb);
226 233
227/** 234/**
228 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device 235 * __netdev_alloc_skb - allocate an skbuff for rx on a specific device
@@ -250,6 +257,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
250 } 257 }
251 return skb; 258 return skb;
252} 259}
260EXPORT_SYMBOL(__netdev_alloc_skb);
253 261
254struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask) 262struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
255{ 263{
@@ -418,6 +426,7 @@ void __kfree_skb(struct sk_buff *skb)
418 skb_release_all(skb); 426 skb_release_all(skb);
419 kfree_skbmem(skb); 427 kfree_skbmem(skb);
420} 428}
429EXPORT_SYMBOL(__kfree_skb);
421 430
422/** 431/**
423 * kfree_skb - free an sk_buff 432 * kfree_skb - free an sk_buff
@@ -434,8 +443,30 @@ void kfree_skb(struct sk_buff *skb)
434 smp_rmb(); 443 smp_rmb();
435 else if (likely(!atomic_dec_and_test(&skb->users))) 444 else if (likely(!atomic_dec_and_test(&skb->users)))
436 return; 445 return;
446 trace_kfree_skb(skb, __builtin_return_address(0));
447 __kfree_skb(skb);
448}
449EXPORT_SYMBOL(kfree_skb);
450
451/**
452 * consume_skb - free an skbuff
453 * @skb: buffer to free
454 *
455 * Drop a ref to the buffer and free it if the usage count has hit zero
456 * Functions identically to kfree_skb, but kfree_skb assumes that the frame
457 * is being dropped after a failure and notes that
458 */
459void consume_skb(struct sk_buff *skb)
460{
461 if (unlikely(!skb))
462 return;
463 if (likely(atomic_read(&skb->users) == 1))
464 smp_rmb();
465 else if (likely(!atomic_dec_and_test(&skb->users)))
466 return;
437 __kfree_skb(skb); 467 __kfree_skb(skb);
438} 468}
469EXPORT_SYMBOL(consume_skb);
439 470
440/** 471/**
441 * skb_recycle_check - check if skb can be reused for receive 472 * skb_recycle_check - check if skb can be reused for receive
@@ -605,6 +636,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
605 636
606 return __skb_clone(n, skb); 637 return __skb_clone(n, skb);
607} 638}
639EXPORT_SYMBOL(skb_clone);
608 640
609static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) 641static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
610{ 642{
@@ -671,7 +703,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
671 copy_skb_header(n, skb); 703 copy_skb_header(n, skb);
672 return n; 704 return n;
673} 705}
674 706EXPORT_SYMBOL(skb_copy);
675 707
676/** 708/**
677 * pskb_copy - create copy of an sk_buff with private head. 709 * pskb_copy - create copy of an sk_buff with private head.
@@ -730,6 +762,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
730out: 762out:
731 return n; 763 return n;
732} 764}
765EXPORT_SYMBOL(pskb_copy);
733 766
734/** 767/**
735 * pskb_expand_head - reallocate header of &sk_buff 768 * pskb_expand_head - reallocate header of &sk_buff
@@ -813,6 +846,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
813nodata: 846nodata:
814 return -ENOMEM; 847 return -ENOMEM;
815} 848}
849EXPORT_SYMBOL(pskb_expand_head);
816 850
817/* Make private copy of skb with writable head and some headroom */ 851/* Make private copy of skb with writable head and some headroom */
818 852
@@ -833,7 +867,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
833 } 867 }
834 return skb2; 868 return skb2;
835} 869}
836 870EXPORT_SYMBOL(skb_realloc_headroom);
837 871
838/** 872/**
839 * skb_copy_expand - copy and expand sk_buff 873 * skb_copy_expand - copy and expand sk_buff
@@ -898,6 +932,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
898 932
899 return n; 933 return n;
900} 934}
935EXPORT_SYMBOL(skb_copy_expand);
901 936
902/** 937/**
903 * skb_pad - zero pad the tail of an skb 938 * skb_pad - zero pad the tail of an skb
@@ -943,6 +978,7 @@ free_skb:
943 kfree_skb(skb); 978 kfree_skb(skb);
944 return err; 979 return err;
945} 980}
981EXPORT_SYMBOL(skb_pad);
946 982
947/** 983/**
948 * skb_put - add data to a buffer 984 * skb_put - add data to a buffer
@@ -1100,6 +1136,7 @@ done:
1100 1136
1101 return 0; 1137 return 0;
1102} 1138}
1139EXPORT_SYMBOL(___pskb_trim);
1103 1140
1104/** 1141/**
1105 * __pskb_pull_tail - advance tail of skb header 1142 * __pskb_pull_tail - advance tail of skb header
@@ -1193,8 +1230,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
1193 insp = list; 1230 insp = list;
1194 } 1231 }
1195 if (!pskb_pull(list, eat)) { 1232 if (!pskb_pull(list, eat)) {
1196 if (clone) 1233 kfree_skb(clone);
1197 kfree_skb(clone);
1198 return NULL; 1234 return NULL;
1199 } 1235 }
1200 break; 1236 break;
@@ -1238,6 +1274,7 @@ pull_pages:
1238 1274
1239 return skb_tail_pointer(skb); 1275 return skb_tail_pointer(skb);
1240} 1276}
1277EXPORT_SYMBOL(__pskb_pull_tail);
1241 1278
1242/* Copy some data bits from skb to kernel buffer. */ 1279/* Copy some data bits from skb to kernel buffer. */
1243 1280
@@ -1315,6 +1352,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
1315fault: 1352fault:
1316 return -EFAULT; 1353 return -EFAULT;
1317} 1354}
1355EXPORT_SYMBOL(skb_copy_bits);
1318 1356
1319/* 1357/*
1320 * Callback from splice_to_pipe(), if we need to release some pages 1358 * Callback from splice_to_pipe(), if we need to release some pages
@@ -1325,14 +1363,39 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
1325 put_page(spd->pages[i]); 1363 put_page(spd->pages[i]);
1326} 1364}
1327 1365
1328static inline struct page *linear_to_page(struct page *page, unsigned int len, 1366static inline struct page *linear_to_page(struct page *page, unsigned int *len,
1329 unsigned int offset) 1367 unsigned int *offset,
1368 struct sk_buff *skb)
1330{ 1369{
1331 struct page *p = alloc_pages(GFP_KERNEL, 0); 1370 struct sock *sk = skb->sk;
1371 struct page *p = sk->sk_sndmsg_page;
1372 unsigned int off;
1332 1373
1333 if (!p) 1374 if (!p) {
1334 return NULL; 1375new_page:
1335 memcpy(page_address(p) + offset, page_address(page) + offset, len); 1376 p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0);
1377 if (!p)
1378 return NULL;
1379
1380 off = sk->sk_sndmsg_off = 0;
1381 /* hold one ref to this page until it's full */
1382 } else {
1383 unsigned int mlen;
1384
1385 off = sk->sk_sndmsg_off;
1386 mlen = PAGE_SIZE - off;
1387 if (mlen < 64 && mlen < *len) {
1388 put_page(p);
1389 goto new_page;
1390 }
1391
1392 *len = min_t(unsigned int, *len, mlen);
1393 }
1394
1395 memcpy(page_address(p) + off, page_address(page) + *offset, *len);
1396 sk->sk_sndmsg_off += *len;
1397 *offset = off;
1398 get_page(p);
1336 1399
1337 return p; 1400 return p;
1338} 1401}
@@ -1341,21 +1404,21 @@ static inline struct page *linear_to_page(struct page *page, unsigned int len,
1341 * Fill page/offset/length into spd, if it can hold more pages. 1404 * Fill page/offset/length into spd, if it can hold more pages.
1342 */ 1405 */
1343static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, 1406static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
1344 unsigned int len, unsigned int offset, 1407 unsigned int *len, unsigned int offset,
1345 struct sk_buff *skb, int linear) 1408 struct sk_buff *skb, int linear)
1346{ 1409{
1347 if (unlikely(spd->nr_pages == PIPE_BUFFERS)) 1410 if (unlikely(spd->nr_pages == PIPE_BUFFERS))
1348 return 1; 1411 return 1;
1349 1412
1350 if (linear) { 1413 if (linear) {
1351 page = linear_to_page(page, len, offset); 1414 page = linear_to_page(page, len, &offset, skb);
1352 if (!page) 1415 if (!page)
1353 return 1; 1416 return 1;
1354 } else 1417 } else
1355 get_page(page); 1418 get_page(page);
1356 1419
1357 spd->pages[spd->nr_pages] = page; 1420 spd->pages[spd->nr_pages] = page;
1358 spd->partial[spd->nr_pages].len = len; 1421 spd->partial[spd->nr_pages].len = *len;
1359 spd->partial[spd->nr_pages].offset = offset; 1422 spd->partial[spd->nr_pages].offset = offset;
1360 spd->nr_pages++; 1423 spd->nr_pages++;
1361 1424
@@ -1365,8 +1428,13 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
1365static inline void __segment_seek(struct page **page, unsigned int *poff, 1428static inline void __segment_seek(struct page **page, unsigned int *poff,
1366 unsigned int *plen, unsigned int off) 1429 unsigned int *plen, unsigned int off)
1367{ 1430{
1431 unsigned long n;
1432
1368 *poff += off; 1433 *poff += off;
1369 *page += *poff / PAGE_SIZE; 1434 n = *poff / PAGE_SIZE;
1435 if (n)
1436 *page = nth_page(*page, n);
1437
1370 *poff = *poff % PAGE_SIZE; 1438 *poff = *poff % PAGE_SIZE;
1371 *plen -= off; 1439 *plen -= off;
1372} 1440}
@@ -1397,7 +1465,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
1397 /* the linear region may spread across several pages */ 1465 /* the linear region may spread across several pages */
1398 flen = min_t(unsigned int, flen, PAGE_SIZE - poff); 1466 flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
1399 1467
1400 if (spd_fill_page(spd, page, flen, poff, skb, linear)) 1468 if (spd_fill_page(spd, page, &flen, poff, skb, linear))
1401 return 1; 1469 return 1;
1402 1470
1403 __segment_seek(&page, &poff, &plen, flen); 1471 __segment_seek(&page, &poff, &plen, flen);
@@ -1590,7 +1658,6 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
1590fault: 1658fault:
1591 return -EFAULT; 1659 return -EFAULT;
1592} 1660}
1593
1594EXPORT_SYMBOL(skb_store_bits); 1661EXPORT_SYMBOL(skb_store_bits);
1595 1662
1596/* Checksum skb data. */ 1663/* Checksum skb data. */
@@ -1667,6 +1734,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
1667 1734
1668 return csum; 1735 return csum;
1669} 1736}
1737EXPORT_SYMBOL(skb_checksum);
1670 1738
1671/* Both of above in one bottle. */ 1739/* Both of above in one bottle. */
1672 1740
@@ -1748,6 +1816,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
1748 BUG_ON(len); 1816 BUG_ON(len);
1749 return csum; 1817 return csum;
1750} 1818}
1819EXPORT_SYMBOL(skb_copy_and_csum_bits);
1751 1820
1752void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) 1821void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1753{ 1822{
@@ -1774,6 +1843,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
1774 *((__sum16 *)(to + csstuff)) = csum_fold(csum); 1843 *((__sum16 *)(to + csstuff)) = csum_fold(csum);
1775 } 1844 }
1776} 1845}
1846EXPORT_SYMBOL(skb_copy_and_csum_dev);
1777 1847
1778/** 1848/**
1779 * skb_dequeue - remove from the head of the queue 1849 * skb_dequeue - remove from the head of the queue
@@ -1794,6 +1864,7 @@ struct sk_buff *skb_dequeue(struct sk_buff_head *list)
1794 spin_unlock_irqrestore(&list->lock, flags); 1864 spin_unlock_irqrestore(&list->lock, flags);
1795 return result; 1865 return result;
1796} 1866}
1867EXPORT_SYMBOL(skb_dequeue);
1797 1868
1798/** 1869/**
1799 * skb_dequeue_tail - remove from the tail of the queue 1870 * skb_dequeue_tail - remove from the tail of the queue
@@ -1813,6 +1884,7 @@ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
1813 spin_unlock_irqrestore(&list->lock, flags); 1884 spin_unlock_irqrestore(&list->lock, flags);
1814 return result; 1885 return result;
1815} 1886}
1887EXPORT_SYMBOL(skb_dequeue_tail);
1816 1888
1817/** 1889/**
1818 * skb_queue_purge - empty a list 1890 * skb_queue_purge - empty a list
@@ -1828,6 +1900,7 @@ void skb_queue_purge(struct sk_buff_head *list)
1828 while ((skb = skb_dequeue(list)) != NULL) 1900 while ((skb = skb_dequeue(list)) != NULL)
1829 kfree_skb(skb); 1901 kfree_skb(skb);
1830} 1902}
1903EXPORT_SYMBOL(skb_queue_purge);
1831 1904
1832/** 1905/**
1833 * skb_queue_head - queue a buffer at the list head 1906 * skb_queue_head - queue a buffer at the list head
@@ -1848,6 +1921,7 @@ void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
1848 __skb_queue_head(list, newsk); 1921 __skb_queue_head(list, newsk);
1849 spin_unlock_irqrestore(&list->lock, flags); 1922 spin_unlock_irqrestore(&list->lock, flags);
1850} 1923}
1924EXPORT_SYMBOL(skb_queue_head);
1851 1925
1852/** 1926/**
1853 * skb_queue_tail - queue a buffer at the list tail 1927 * skb_queue_tail - queue a buffer at the list tail
@@ -1868,6 +1942,7 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
1868 __skb_queue_tail(list, newsk); 1942 __skb_queue_tail(list, newsk);
1869 spin_unlock_irqrestore(&list->lock, flags); 1943 spin_unlock_irqrestore(&list->lock, flags);
1870} 1944}
1945EXPORT_SYMBOL(skb_queue_tail);
1871 1946
1872/** 1947/**
1873 * skb_unlink - remove a buffer from a list 1948 * skb_unlink - remove a buffer from a list
@@ -1887,6 +1962,7 @@ void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
1887 __skb_unlink(skb, list); 1962 __skb_unlink(skb, list);
1888 spin_unlock_irqrestore(&list->lock, flags); 1963 spin_unlock_irqrestore(&list->lock, flags);
1889} 1964}
1965EXPORT_SYMBOL(skb_unlink);
1890 1966
1891/** 1967/**
1892 * skb_append - append a buffer 1968 * skb_append - append a buffer
@@ -1906,7 +1982,7 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
1906 __skb_queue_after(list, old, newsk); 1982 __skb_queue_after(list, old, newsk);
1907 spin_unlock_irqrestore(&list->lock, flags); 1983 spin_unlock_irqrestore(&list->lock, flags);
1908} 1984}
1909 1985EXPORT_SYMBOL(skb_append);
1910 1986
1911/** 1987/**
1912 * skb_insert - insert a buffer 1988 * skb_insert - insert a buffer
@@ -1928,6 +2004,7 @@ void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
1928 __skb_insert(newsk, old->prev, old, list); 2004 __skb_insert(newsk, old->prev, old, list);
1929 spin_unlock_irqrestore(&list->lock, flags); 2005 spin_unlock_irqrestore(&list->lock, flags);
1930} 2006}
2007EXPORT_SYMBOL(skb_insert);
1931 2008
1932static inline void skb_split_inside_header(struct sk_buff *skb, 2009static inline void skb_split_inside_header(struct sk_buff *skb,
1933 struct sk_buff* skb1, 2010 struct sk_buff* skb1,
@@ -2006,6 +2083,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
2006 else /* Second chunk has no header, nothing to copy. */ 2083 else /* Second chunk has no header, nothing to copy. */
2007 skb_split_no_header(skb, skb1, len, pos); 2084 skb_split_no_header(skb, skb1, len, pos);
2008} 2085}
2086EXPORT_SYMBOL(skb_split);
2009 2087
2010/* Shifting from/to a cloned skb is a no-go. 2088/* Shifting from/to a cloned skb is a no-go.
2011 * 2089 *
@@ -2168,6 +2246,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
2168 st->frag_idx = st->stepped_offset = 0; 2246 st->frag_idx = st->stepped_offset = 0;
2169 st->frag_data = NULL; 2247 st->frag_data = NULL;
2170} 2248}
2249EXPORT_SYMBOL(skb_prepare_seq_read);
2171 2250
2172/** 2251/**
2173 * skb_seq_read - Sequentially read skb data 2252 * skb_seq_read - Sequentially read skb data
@@ -2255,6 +2334,7 @@ next_skb:
2255 2334
2256 return 0; 2335 return 0;
2257} 2336}
2337EXPORT_SYMBOL(skb_seq_read);
2258 2338
2259/** 2339/**
2260 * skb_abort_seq_read - Abort a sequential read of skb data 2340 * skb_abort_seq_read - Abort a sequential read of skb data
@@ -2268,6 +2348,7 @@ void skb_abort_seq_read(struct skb_seq_state *st)
2268 if (st->frag_data) 2348 if (st->frag_data)
2269 kunmap_skb_frag(st->frag_data); 2349 kunmap_skb_frag(st->frag_data);
2270} 2350}
2351EXPORT_SYMBOL(skb_abort_seq_read);
2271 2352
2272#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) 2353#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
2273 2354
@@ -2310,6 +2391,7 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
2310 ret = textsearch_find(config, state); 2391 ret = textsearch_find(config, state);
2311 return (ret <= to - from ? ret : UINT_MAX); 2392 return (ret <= to - from ? ret : UINT_MAX);
2312} 2393}
2394EXPORT_SYMBOL(skb_find_text);
2313 2395
2314/** 2396/**
2315 * skb_append_datato_frags: - append the user data to a skb 2397 * skb_append_datato_frags: - append the user data to a skb
@@ -2382,6 +2464,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
2382 2464
2383 return 0; 2465 return 0;
2384} 2466}
2467EXPORT_SYMBOL(skb_append_datato_frags);
2385 2468
2386/** 2469/**
2387 * skb_pull_rcsum - pull skb and update receive checksum 2470 * skb_pull_rcsum - pull skb and update receive checksum
@@ -2569,7 +2652,6 @@ err:
2569 } 2652 }
2570 return ERR_PTR(err); 2653 return ERR_PTR(err);
2571} 2654}
2572
2573EXPORT_SYMBOL_GPL(skb_segment); 2655EXPORT_SYMBOL_GPL(skb_segment);
2574 2656
2575int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) 2657int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
@@ -2577,17 +2659,23 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2577 struct sk_buff *p = *head; 2659 struct sk_buff *p = *head;
2578 struct sk_buff *nskb; 2660 struct sk_buff *nskb;
2579 unsigned int headroom; 2661 unsigned int headroom;
2580 unsigned int hlen = p->data - skb_mac_header(p); 2662 unsigned int len = skb_gro_len(skb);
2581 unsigned int len = skb->len;
2582 2663
2583 if (hlen + p->len + len >= 65536) 2664 if (p->len + len >= 65536)
2584 return -E2BIG; 2665 return -E2BIG;
2585 2666
2586 if (skb_shinfo(p)->frag_list) 2667 if (skb_shinfo(p)->frag_list)
2587 goto merge; 2668 goto merge;
2588 else if (!skb_headlen(p) && !skb_headlen(skb) && 2669 else if (skb_headlen(skb) <= skb_gro_offset(skb)) {
2589 skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags < 2670 if (skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags >
2590 MAX_SKB_FRAGS) { 2671 MAX_SKB_FRAGS)
2672 return -E2BIG;
2673
2674 skb_shinfo(skb)->frags[0].page_offset +=
2675 skb_gro_offset(skb) - skb_headlen(skb);
2676 skb_shinfo(skb)->frags[0].size -=
2677 skb_gro_offset(skb) - skb_headlen(skb);
2678
2591 memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags, 2679 memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
2592 skb_shinfo(skb)->frags, 2680 skb_shinfo(skb)->frags,
2593 skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); 2681 skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
@@ -2604,7 +2692,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2604 } 2692 }
2605 2693
2606 headroom = skb_headroom(p); 2694 headroom = skb_headroom(p);
2607 nskb = netdev_alloc_skb(p->dev, headroom); 2695 nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p));
2608 if (unlikely(!nskb)) 2696 if (unlikely(!nskb))
2609 return -ENOMEM; 2697 return -ENOMEM;
2610 2698
@@ -2612,12 +2700,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2612 nskb->mac_len = p->mac_len; 2700 nskb->mac_len = p->mac_len;
2613 2701
2614 skb_reserve(nskb, headroom); 2702 skb_reserve(nskb, headroom);
2703 __skb_put(nskb, skb_gro_offset(p));
2615 2704
2616 skb_set_mac_header(nskb, -hlen); 2705 skb_set_mac_header(nskb, skb_mac_header(p) - p->data);
2617 skb_set_network_header(nskb, skb_network_offset(p)); 2706 skb_set_network_header(nskb, skb_network_offset(p));
2618 skb_set_transport_header(nskb, skb_transport_offset(p)); 2707 skb_set_transport_header(nskb, skb_transport_offset(p));
2619 2708
2620 memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen); 2709 __skb_pull(p, skb_gro_offset(p));
2710 memcpy(skb_mac_header(nskb), skb_mac_header(p),
2711 p->data - skb_mac_header(p));
2621 2712
2622 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); 2713 *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
2623 skb_shinfo(nskb)->frag_list = p; 2714 skb_shinfo(nskb)->frag_list = p;
@@ -2636,6 +2727,17 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2636 p = nskb; 2727 p = nskb;
2637 2728
2638merge: 2729merge:
2730 if (skb_gro_offset(skb) > skb_headlen(skb)) {
2731 skb_shinfo(skb)->frags[0].page_offset +=
2732 skb_gro_offset(skb) - skb_headlen(skb);
2733 skb_shinfo(skb)->frags[0].size -=
2734 skb_gro_offset(skb) - skb_headlen(skb);
2735 skb_gro_reset_offset(skb);
2736 skb_gro_pull(skb, skb_headlen(skb));
2737 }
2738
2739 __skb_pull(skb, skb_gro_offset(skb));
2740
2639 p->prev->next = skb; 2741 p->prev->next = skb;
2640 p->prev = skb; 2742 p->prev = skb;
2641 skb_header_release(skb); 2743 skb_header_release(skb);
@@ -2747,6 +2849,7 @@ int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int le
2747 2849
2748 return nsg; 2850 return nsg;
2749} 2851}
2852EXPORT_SYMBOL_GPL(skb_to_sgvec);
2750 2853
2751/** 2854/**
2752 * skb_cow_data - Check that a socket buffer's data buffers are writable 2855 * skb_cow_data - Check that a socket buffer's data buffers are writable
@@ -2856,6 +2959,45 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2856 2959
2857 return elt; 2960 return elt;
2858} 2961}
2962EXPORT_SYMBOL_GPL(skb_cow_data);
2963
2964void skb_tstamp_tx(struct sk_buff *orig_skb,
2965 struct skb_shared_hwtstamps *hwtstamps)
2966{
2967 struct sock *sk = orig_skb->sk;
2968 struct sock_exterr_skb *serr;
2969 struct sk_buff *skb;
2970 int err;
2971
2972 if (!sk)
2973 return;
2974
2975 skb = skb_clone(orig_skb, GFP_ATOMIC);
2976 if (!skb)
2977 return;
2978
2979 if (hwtstamps) {
2980 *skb_hwtstamps(skb) =
2981 *hwtstamps;
2982 } else {
2983 /*
2984 * no hardware time stamps available,
2985 * so keep the skb_shared_tx and only
2986 * store software time stamp
2987 */
2988 skb->tstamp = ktime_get_real();
2989 }
2990
2991 serr = SKB_EXT_ERR(skb);
2992 memset(serr, 0, sizeof(*serr));
2993 serr->ee.ee_errno = ENOMSG;
2994 serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
2995 err = sock_queue_err_skb(sk, skb);
2996 if (err)
2997 kfree_skb(skb);
2998}
2999EXPORT_SYMBOL_GPL(skb_tstamp_tx);
3000
2859 3001
2860/** 3002/**
2861 * skb_partial_csum_set - set up and verify partial csum values for packet 3003 * skb_partial_csum_set - set up and verify partial csum values for packet
@@ -2884,6 +3026,7 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
2884 skb->csum_offset = off; 3026 skb->csum_offset = off;
2885 return true; 3027 return true;
2886} 3028}
3029EXPORT_SYMBOL_GPL(skb_partial_csum_set);
2887 3030
2888void __skb_warn_lro_forwarding(const struct sk_buff *skb) 3031void __skb_warn_lro_forwarding(const struct sk_buff *skb)
2889{ 3032{
@@ -2891,42 +3034,4 @@ void __skb_warn_lro_forwarding(const struct sk_buff *skb)
2891 pr_warning("%s: received packets cannot be forwarded" 3034 pr_warning("%s: received packets cannot be forwarded"
2892 " while LRO is enabled\n", skb->dev->name); 3035 " while LRO is enabled\n", skb->dev->name);
2893} 3036}
2894
2895EXPORT_SYMBOL(___pskb_trim);
2896EXPORT_SYMBOL(__kfree_skb);
2897EXPORT_SYMBOL(kfree_skb);
2898EXPORT_SYMBOL(__pskb_pull_tail);
2899EXPORT_SYMBOL(__alloc_skb);
2900EXPORT_SYMBOL(__netdev_alloc_skb);
2901EXPORT_SYMBOL(pskb_copy);
2902EXPORT_SYMBOL(pskb_expand_head);
2903EXPORT_SYMBOL(skb_checksum);
2904EXPORT_SYMBOL(skb_clone);
2905EXPORT_SYMBOL(skb_copy);
2906EXPORT_SYMBOL(skb_copy_and_csum_bits);
2907EXPORT_SYMBOL(skb_copy_and_csum_dev);
2908EXPORT_SYMBOL(skb_copy_bits);
2909EXPORT_SYMBOL(skb_copy_expand);
2910EXPORT_SYMBOL(skb_over_panic);
2911EXPORT_SYMBOL(skb_pad);
2912EXPORT_SYMBOL(skb_realloc_headroom);
2913EXPORT_SYMBOL(skb_under_panic);
2914EXPORT_SYMBOL(skb_dequeue);
2915EXPORT_SYMBOL(skb_dequeue_tail);
2916EXPORT_SYMBOL(skb_insert);
2917EXPORT_SYMBOL(skb_queue_purge);
2918EXPORT_SYMBOL(skb_queue_head);
2919EXPORT_SYMBOL(skb_queue_tail);
2920EXPORT_SYMBOL(skb_unlink);
2921EXPORT_SYMBOL(skb_append);
2922EXPORT_SYMBOL(skb_split);
2923EXPORT_SYMBOL(skb_prepare_seq_read);
2924EXPORT_SYMBOL(skb_seq_read);
2925EXPORT_SYMBOL(skb_abort_seq_read);
2926EXPORT_SYMBOL(skb_find_text);
2927EXPORT_SYMBOL(skb_append_datato_frags);
2928EXPORT_SYMBOL(__skb_warn_lro_forwarding); 3037EXPORT_SYMBOL(__skb_warn_lro_forwarding);
2929
2930EXPORT_SYMBOL_GPL(skb_to_sgvec);
2931EXPORT_SYMBOL_GPL(skb_cow_data);
2932EXPORT_SYMBOL_GPL(skb_partial_csum_set);
diff --git a/net/core/sock.c b/net/core/sock.c
index 5f97caa158e8..0620046e4eba 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -120,6 +120,7 @@
120#include <net/net_namespace.h> 120#include <net/net_namespace.h>
121#include <net/request_sock.h> 121#include <net/request_sock.h>
122#include <net/sock.h> 122#include <net/sock.h>
123#include <linux/net_tstamp.h>
123#include <net/xfrm.h> 124#include <net/xfrm.h>
124#include <linux/ipsec.h> 125#include <linux/ipsec.h>
125 126
@@ -149,7 +150,7 @@ static const char *af_family_key_strings[AF_MAX+1] = {
149 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , 150 "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
150 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , 151 "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
151 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , 152 "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
152 "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , 153 "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
153 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , 154 "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
154 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , 155 "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
155 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , 156 "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
@@ -164,7 +165,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
164 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , 165 "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
165 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , 166 "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
166 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , 167 "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
167 "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" , 168 "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
168 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , 169 "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
169 "slock-27" , "slock-28" , "slock-AF_CAN" , 170 "slock-27" , "slock-28" , "slock-AF_CAN" ,
170 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , 171 "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
@@ -179,7 +180,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
179 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , 180 "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
180 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , 181 "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
181 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , 182 "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
182 "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" , 183 "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
183 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , 184 "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
184 "clock-27" , "clock-28" , "clock-AF_CAN" , 185 "clock-27" , "clock-28" , "clock-AF_CAN" ,
185 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , 186 "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
@@ -255,11 +256,14 @@ static void sock_warn_obsolete_bsdism(const char *name)
255 } 256 }
256} 257}
257 258
258static void sock_disable_timestamp(struct sock *sk) 259static void sock_disable_timestamp(struct sock *sk, int flag)
259{ 260{
260 if (sock_flag(sk, SOCK_TIMESTAMP)) { 261 if (sock_flag(sk, flag)) {
261 sock_reset_flag(sk, SOCK_TIMESTAMP); 262 sock_reset_flag(sk, flag);
262 net_disable_timestamp(); 263 if (!sock_flag(sk, SOCK_TIMESTAMP) &&
264 !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) {
265 net_disable_timestamp();
266 }
263 } 267 }
264} 268}
265 269
@@ -614,13 +618,38 @@ set_rcvbuf:
614 else 618 else
615 sock_set_flag(sk, SOCK_RCVTSTAMPNS); 619 sock_set_flag(sk, SOCK_RCVTSTAMPNS);
616 sock_set_flag(sk, SOCK_RCVTSTAMP); 620 sock_set_flag(sk, SOCK_RCVTSTAMP);
617 sock_enable_timestamp(sk); 621 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
618 } else { 622 } else {
619 sock_reset_flag(sk, SOCK_RCVTSTAMP); 623 sock_reset_flag(sk, SOCK_RCVTSTAMP);
620 sock_reset_flag(sk, SOCK_RCVTSTAMPNS); 624 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
621 } 625 }
622 break; 626 break;
623 627
628 case SO_TIMESTAMPING:
629 if (val & ~SOF_TIMESTAMPING_MASK) {
630 ret = EINVAL;
631 break;
632 }
633 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
634 val & SOF_TIMESTAMPING_TX_HARDWARE);
635 sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
636 val & SOF_TIMESTAMPING_TX_SOFTWARE);
637 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
638 val & SOF_TIMESTAMPING_RX_HARDWARE);
639 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
640 sock_enable_timestamp(sk,
641 SOCK_TIMESTAMPING_RX_SOFTWARE);
642 else
643 sock_disable_timestamp(sk,
644 SOCK_TIMESTAMPING_RX_SOFTWARE);
645 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
646 val & SOF_TIMESTAMPING_SOFTWARE);
647 sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
648 val & SOF_TIMESTAMPING_SYS_HARDWARE);
649 sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
650 val & SOF_TIMESTAMPING_RAW_HARDWARE);
651 break;
652
624 case SO_RCVLOWAT: 653 case SO_RCVLOWAT:
625 if (val < 0) 654 if (val < 0)
626 val = INT_MAX; 655 val = INT_MAX;
@@ -768,6 +797,24 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
768 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); 797 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
769 break; 798 break;
770 799
800 case SO_TIMESTAMPING:
801 v.val = 0;
802 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
803 v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
804 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
805 v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
806 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
807 v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
808 if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
809 v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
810 if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
811 v.val |= SOF_TIMESTAMPING_SOFTWARE;
812 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
813 v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
814 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
815 v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
816 break;
817
771 case SO_RCVTIMEO: 818 case SO_RCVTIMEO:
772 lv=sizeof(struct timeval); 819 lv=sizeof(struct timeval);
773 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { 820 if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
@@ -969,7 +1016,8 @@ void sk_free(struct sock *sk)
969 rcu_assign_pointer(sk->sk_filter, NULL); 1016 rcu_assign_pointer(sk->sk_filter, NULL);
970 } 1017 }
971 1018
972 sock_disable_timestamp(sk); 1019 sock_disable_timestamp(sk, SOCK_TIMESTAMP);
1020 sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE);
973 1021
974 if (atomic_read(&sk->sk_omem_alloc)) 1022 if (atomic_read(&sk->sk_omem_alloc))
975 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", 1023 printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
@@ -1255,10 +1303,9 @@ static long sock_wait_for_wmem(struct sock * sk, long timeo)
1255 * Generic send/receive buffer handlers 1303 * Generic send/receive buffer handlers
1256 */ 1304 */
1257 1305
1258static struct sk_buff *sock_alloc_send_pskb(struct sock *sk, 1306struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
1259 unsigned long header_len, 1307 unsigned long data_len, int noblock,
1260 unsigned long data_len, 1308 int *errcode)
1261 int noblock, int *errcode)
1262{ 1309{
1263 struct sk_buff *skb; 1310 struct sk_buff *skb;
1264 gfp_t gfp_mask; 1311 gfp_t gfp_mask;
@@ -1338,6 +1385,7 @@ failure:
1338 *errcode = err; 1385 *errcode = err;
1339 return NULL; 1386 return NULL;
1340} 1387}
1388EXPORT_SYMBOL(sock_alloc_send_pskb);
1341 1389
1342struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, 1390struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1343 int noblock, int *errcode) 1391 int noblock, int *errcode)
@@ -1786,7 +1834,7 @@ int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
1786{ 1834{
1787 struct timeval tv; 1835 struct timeval tv;
1788 if (!sock_flag(sk, SOCK_TIMESTAMP)) 1836 if (!sock_flag(sk, SOCK_TIMESTAMP))
1789 sock_enable_timestamp(sk); 1837 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
1790 tv = ktime_to_timeval(sk->sk_stamp); 1838 tv = ktime_to_timeval(sk->sk_stamp);
1791 if (tv.tv_sec == -1) 1839 if (tv.tv_sec == -1)
1792 return -ENOENT; 1840 return -ENOENT;
@@ -1802,7 +1850,7 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1802{ 1850{
1803 struct timespec ts; 1851 struct timespec ts;
1804 if (!sock_flag(sk, SOCK_TIMESTAMP)) 1852 if (!sock_flag(sk, SOCK_TIMESTAMP))
1805 sock_enable_timestamp(sk); 1853 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
1806 ts = ktime_to_timespec(sk->sk_stamp); 1854 ts = ktime_to_timespec(sk->sk_stamp);
1807 if (ts.tv_sec == -1) 1855 if (ts.tv_sec == -1)
1808 return -ENOENT; 1856 return -ENOENT;
@@ -1814,11 +1862,20 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
1814} 1862}
1815EXPORT_SYMBOL(sock_get_timestampns); 1863EXPORT_SYMBOL(sock_get_timestampns);
1816 1864
1817void sock_enable_timestamp(struct sock *sk) 1865void sock_enable_timestamp(struct sock *sk, int flag)
1818{ 1866{
1819 if (!sock_flag(sk, SOCK_TIMESTAMP)) { 1867 if (!sock_flag(sk, flag)) {
1820 sock_set_flag(sk, SOCK_TIMESTAMP); 1868 sock_set_flag(sk, flag);
1821 net_enable_timestamp(); 1869 /*
1870 * we just set one of the two flags which require net
1871 * time stamping, but time stamping might have been on
1872 * already because of the other one
1873 */
1874 if (!sock_flag(sk,
1875 flag == SOCK_TIMESTAMP ?
1876 SOCK_TIMESTAMPING_RX_SOFTWARE :
1877 SOCK_TIMESTAMP))
1878 net_enable_timestamp();
1822 } 1879 }
1823} 1880}
1824 1881
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 83d3398559ea..7db1de0497c6 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -11,6 +11,7 @@
11#include <linux/socket.h> 11#include <linux/socket.h>
12#include <linux/netdevice.h> 12#include <linux/netdevice.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <net/ip.h>
14#include <net/sock.h> 15#include <net/sock.h>
15 16
16static struct ctl_table net_core_table[] = { 17static struct ctl_table net_core_table[] = {
diff --git a/net/core/utils.c b/net/core/utils.c
index 72e0ebe964a0..83221aee7084 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -22,7 +22,6 @@
22#include <linux/net.h> 22#include <linux/net.h>
23#include <linux/string.h> 23#include <linux/string.h>
24#include <linux/types.h> 24#include <linux/types.h>
25#include <linux/random.h>
26#include <linux/percpu.h> 25#include <linux/percpu.h>
27#include <linux/init.h> 26#include <linux/init.h>
28#include <net/sock.h> 27#include <net/sock.h>
diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h
index 45f95e55f873..7ea557b7c6b1 100644
--- a/net/dccp/ackvec.h
+++ b/net/dccp/ackvec.h
@@ -20,6 +20,9 @@
20/* We can spread an ack vector across multiple options */ 20/* We can spread an ack vector across multiple options */
21#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2) 21#define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2)
22 22
23/* Estimated minimum average Ack Vector length - used for updating MPS */
24#define DCCPAV_MIN_OPTLEN 16
25
23#define DCCP_ACKVEC_STATE_RECEIVED 0 26#define DCCP_ACKVEC_STATE_RECEIVED 0
24#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) 27#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6)
25#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) 28#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6)
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f2230fc168e1..d6bc47363b1c 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -42,9 +42,11 @@
42extern int dccp_debug; 42extern int dccp_debug;
43#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a) 43#define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a)
44#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) 44#define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a)
45#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a)
45#else 46#else
46#define dccp_pr_debug(format, a...) 47#define dccp_pr_debug(format, a...)
47#define dccp_pr_debug_cat(format, a...) 48#define dccp_pr_debug_cat(format, a...)
49#define dccp_debug(format, a...)
48#endif 50#endif
49 51
50extern struct inet_hashinfo dccp_hashinfo; 52extern struct inet_hashinfo dccp_hashinfo;
@@ -61,11 +63,14 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
61 * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields 63 * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields
62 * Hence a safe upper bound for the maximum option length is 1020-28 = 992 64 * Hence a safe upper bound for the maximum option length is 1020-28 = 992
63 */ 65 */
64#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int)) 66#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(uint32_t))
65#define DCCP_MAX_PACKET_HDR 28 67#define DCCP_MAX_PACKET_HDR 28
66#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR) 68#define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR)
67#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER) 69#define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER)
68 70
71/* Upper bound for initial feature-negotiation overhead (padded to 32 bits) */
72#define DCCP_FEATNEG_OVERHEAD (32 * sizeof(uint32_t))
73
69#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT 74#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
70 * state, about 60 seconds */ 75 * state, about 60 seconds */
71 76
@@ -95,9 +100,6 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo);
95extern int sysctl_dccp_request_retries; 100extern int sysctl_dccp_request_retries;
96extern int sysctl_dccp_retries1; 101extern int sysctl_dccp_retries1;
97extern int sysctl_dccp_retries2; 102extern int sysctl_dccp_retries2;
98extern int sysctl_dccp_feat_sequence_window;
99extern int sysctl_dccp_feat_rx_ccid;
100extern int sysctl_dccp_feat_tx_ccid;
101extern int sysctl_dccp_tx_qlen; 103extern int sysctl_dccp_tx_qlen;
102extern int sysctl_dccp_sync_ratelimit; 104extern int sysctl_dccp_sync_ratelimit;
103 105
@@ -409,23 +411,21 @@ static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack,
409static inline void dccp_update_gsr(struct sock *sk, u64 seq) 411static inline void dccp_update_gsr(struct sock *sk, u64 seq)
410{ 412{
411 struct dccp_sock *dp = dccp_sk(sk); 413 struct dccp_sock *dp = dccp_sk(sk);
412 const struct dccp_minisock *dmsk = dccp_msk(sk);
413 414
414 dp->dccps_gsr = seq; 415 dp->dccps_gsr = seq;
415 dccp_set_seqno(&dp->dccps_swl, 416 /* Sequence validity window depends on remote Sequence Window (7.5.1) */
416 dp->dccps_gsr + 1 - (dmsk->dccpms_sequence_window / 4)); 417 dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
417 dccp_set_seqno(&dp->dccps_swh, 418 dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4);
418 dp->dccps_gsr + (3 * dmsk->dccpms_sequence_window) / 4);
419} 419}
420 420
421static inline void dccp_update_gss(struct sock *sk, u64 seq) 421static inline void dccp_update_gss(struct sock *sk, u64 seq)
422{ 422{
423 struct dccp_sock *dp = dccp_sk(sk); 423 struct dccp_sock *dp = dccp_sk(sk);
424 424
425 dp->dccps_awh = dp->dccps_gss = seq; 425 dp->dccps_gss = seq;
426 dccp_set_seqno(&dp->dccps_awl, 426 /* Ack validity window depends on local Sequence Window value (7.5.1) */
427 (dp->dccps_gss - 427 dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win);
428 dccp_msk(sk)->dccpms_sequence_window + 1)); 428 dp->dccps_awh = dp->dccps_gss;
429} 429}
430 430
431static inline int dccp_ack_pending(const struct sock *sk) 431static inline int dccp_ack_pending(const struct sock *sk)
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 4152308958ab..b04160a2eea5 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -25,6 +25,11 @@
25#include "ccid.h" 25#include "ccid.h"
26#include "feat.h" 26#include "feat.h"
27 27
28/* feature-specific sysctls - initialised to the defaults from RFC 4340, 6.4 */
29unsigned long sysctl_dccp_sequence_window __read_mostly = 100;
30int sysctl_dccp_rx_ccid __read_mostly = 2,
31 sysctl_dccp_tx_ccid __read_mostly = 2;
32
28/* 33/*
29 * Feature activation handlers. 34 * Feature activation handlers.
30 * 35 *
@@ -51,8 +56,17 @@ static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx)
51 56
52static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx) 57static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx)
53{ 58{
54 if (!rx) 59 struct dccp_sock *dp = dccp_sk(sk);
55 dccp_msk(sk)->dccpms_sequence_window = seq_win; 60
61 if (rx) {
62 dp->dccps_r_seq_win = seq_win;
63 /* propagate changes to update SWL/SWH */
64 dccp_update_gsr(sk, dp->dccps_gsr);
65 } else {
66 dp->dccps_l_seq_win = seq_win;
67 /* propagate changes to update AWL */
68 dccp_update_gss(sk, dp->dccps_gss);
69 }
56 return 0; 70 return 0;
57} 71}
58 72
@@ -194,6 +208,100 @@ static int dccp_feat_default_value(u8 feat_num)
194 return idx < 0 ? 0 : dccp_feat_table[idx].default_value; 208 return idx < 0 ? 0 : dccp_feat_table[idx].default_value;
195} 209}
196 210
211/*
212 * Debugging and verbose-printing section
213 */
214static const char *dccp_feat_fname(const u8 feat)
215{
216 static const char *feature_names[] = {
217 [DCCPF_RESERVED] = "Reserved",
218 [DCCPF_CCID] = "CCID",
219 [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos",
220 [DCCPF_SEQUENCE_WINDOW] = "Sequence Window",
221 [DCCPF_ECN_INCAPABLE] = "ECN Incapable",
222 [DCCPF_ACK_RATIO] = "Ack Ratio",
223 [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector",
224 [DCCPF_SEND_NDP_COUNT] = "Send NDP Count",
225 [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage",
226 [DCCPF_DATA_CHECKSUM] = "Send Data Checksum",
227 };
228 if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
229 return feature_names[DCCPF_RESERVED];
230
231 if (feat == DCCPF_SEND_LEV_RATE)
232 return "Send Loss Event Rate";
233 if (feat >= DCCPF_MIN_CCID_SPECIFIC)
234 return "CCID-specific";
235
236 return feature_names[feat];
237}
238
239static const char *dccp_feat_sname[] = { "DEFAULT", "INITIALISING", "CHANGING",
240 "UNSTABLE", "STABLE" };
241
242#ifdef CONFIG_IP_DCCP_DEBUG
243static const char *dccp_feat_oname(const u8 opt)
244{
245 switch (opt) {
246 case DCCPO_CHANGE_L: return "Change_L";
247 case DCCPO_CONFIRM_L: return "Confirm_L";
248 case DCCPO_CHANGE_R: return "Change_R";
249 case DCCPO_CONFIRM_R: return "Confirm_R";
250 }
251 return NULL;
252}
253
254static void dccp_feat_printval(u8 feat_num, dccp_feat_val const *val)
255{
256 u8 i, type = dccp_feat_type(feat_num);
257
258 if (val == NULL || (type == FEAT_SP && val->sp.vec == NULL))
259 dccp_pr_debug_cat("(NULL)");
260 else if (type == FEAT_SP)
261 for (i = 0; i < val->sp.len; i++)
262 dccp_pr_debug_cat("%s%u", i ? " " : "", val->sp.vec[i]);
263 else if (type == FEAT_NN)
264 dccp_pr_debug_cat("%llu", (unsigned long long)val->nn);
265 else
266 dccp_pr_debug_cat("unknown type %u", type);
267}
268
269static void dccp_feat_printvals(u8 feat_num, u8 *list, u8 len)
270{
271 u8 type = dccp_feat_type(feat_num);
272 dccp_feat_val fval = { .sp.vec = list, .sp.len = len };
273
274 if (type == FEAT_NN)
275 fval.nn = dccp_decode_value_var(list, len);
276 dccp_feat_printval(feat_num, &fval);
277}
278
279static void dccp_feat_print_entry(struct dccp_feat_entry const *entry)
280{
281 dccp_debug(" * %s %s = ", entry->is_local ? "local" : "remote",
282 dccp_feat_fname(entry->feat_num));
283 dccp_feat_printval(entry->feat_num, &entry->val);
284 dccp_pr_debug_cat(", state=%s %s\n", dccp_feat_sname[entry->state],
285 entry->needs_confirm ? "(Confirm pending)" : "");
286}
287
288#define dccp_feat_print_opt(opt, feat, val, len, mandatory) do { \
289 dccp_pr_debug("%s(%s, ", dccp_feat_oname(opt), dccp_feat_fname(feat));\
290 dccp_feat_printvals(feat, val, len); \
291 dccp_pr_debug_cat(") %s\n", mandatory ? "!" : ""); } while (0)
292
293#define dccp_feat_print_fnlist(fn_list) { \
294 const struct dccp_feat_entry *___entry; \
295 \
296 dccp_pr_debug("List Dump:\n"); \
297 list_for_each_entry(___entry, fn_list, node) \
298 dccp_feat_print_entry(___entry); \
299}
300#else /* ! CONFIG_IP_DCCP_DEBUG */
301#define dccp_feat_print_opt(opt, feat, val, len, mandatory)
302#define dccp_feat_print_fnlist(fn_list)
303#endif
304
197static int __dccp_feat_activate(struct sock *sk, const int idx, 305static int __dccp_feat_activate(struct sock *sk, const int idx,
198 const bool is_local, dccp_feat_val const *fval) 306 const bool is_local, dccp_feat_val const *fval)
199{ 307{
@@ -226,6 +334,10 @@ static int __dccp_feat_activate(struct sock *sk, const int idx,
226 /* Location is RX if this is a local-RX or remote-TX feature */ 334 /* Location is RX if this is a local-RX or remote-TX feature */
227 rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX)); 335 rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX));
228 336
337 dccp_debug(" -> activating %s %s, %sval=%llu\n", rx ? "RX" : "TX",
338 dccp_feat_fname(dccp_feat_table[idx].feat_num),
339 fval ? "" : "default ", (unsigned long long)val);
340
229 return dccp_feat_table[idx].activation_hdlr(sk, val, rx); 341 return dccp_feat_table[idx].activation_hdlr(sk, val, rx);
230} 342}
231 343
@@ -530,6 +642,7 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq,
530 return -1; 642 return -1;
531 } 643 }
532 } 644 }
645 dccp_feat_print_opt(opt, pos->feat_num, ptr, len, 0);
533 646
534 if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt)) 647 if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt))
535 return -1; 648 return -1;
@@ -783,6 +896,7 @@ int dccp_feat_finalise_settings(struct dccp_sock *dp)
783 while (i--) 896 while (i--)
784 if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i)) 897 if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i))
785 return -1; 898 return -1;
899 dccp_feat_print_fnlist(fn);
786 return 0; 900 return 0;
787} 901}
788 902
@@ -901,6 +1015,8 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
901 if (len == 0 || type == FEAT_UNKNOWN) /* 6.1 and 6.6.8 */ 1015 if (len == 0 || type == FEAT_UNKNOWN) /* 6.1 and 6.6.8 */
902 goto unknown_feature_or_value; 1016 goto unknown_feature_or_value;
903 1017
1018 dccp_feat_print_opt(opt, feat, val, len, is_mandatory);
1019
904 /* 1020 /*
905 * Negotiation of NN features: Change R is invalid, so there is no 1021 * Negotiation of NN features: Change R is invalid, so there is no
906 * simultaneous negotiation; hence we do not look up in the list. 1022 * simultaneous negotiation; hence we do not look up in the list.
@@ -1006,6 +1122,8 @@ static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt,
1006 const bool local = (opt == DCCPO_CONFIRM_R); 1122 const bool local = (opt == DCCPO_CONFIRM_R);
1007 struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local); 1123 struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local);
1008 1124
1125 dccp_feat_print_opt(opt, feat, val, len, is_mandatory);
1126
1009 if (entry == NULL) { /* nothing queued: ignore or handle error */ 1127 if (entry == NULL) { /* nothing queued: ignore or handle error */
1010 if (is_mandatory && type == FEAT_UNKNOWN) 1128 if (is_mandatory && type == FEAT_UNKNOWN)
1011 return DCCP_RESET_CODE_MANDATORY_ERROR; 1129 return DCCP_RESET_CODE_MANDATORY_ERROR;
@@ -1115,23 +1233,70 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
1115 return 0; /* ignore FN options in all other states */ 1233 return 0; /* ignore FN options in all other states */
1116} 1234}
1117 1235
1236/**
1237 * dccp_feat_init - Seed feature negotiation with host-specific defaults
1238 * This initialises global defaults, depending on the value of the sysctls.
1239 * These can later be overridden by registering changes via setsockopt calls.
1240 * The last link in the chain is finalise_settings, to make sure that between
1241 * here and the start of actual feature negotiation no inconsistencies enter.
1242 *
1243 * All features not appearing below use either defaults or are otherwise
1244 * later adjusted through dccp_feat_finalise_settings().
1245 */
1118int dccp_feat_init(struct sock *sk) 1246int dccp_feat_init(struct sock *sk)
1119{ 1247{
1120 struct dccp_sock *dp = dccp_sk(sk); 1248 struct list_head *fn = &dccp_sk(sk)->dccps_featneg;
1121 struct dccp_minisock *dmsk = dccp_msk(sk); 1249 u8 on = 1, off = 0;
1122 int rc; 1250 int rc;
1251 struct {
1252 u8 *val;
1253 u8 len;
1254 } tx, rx;
1255
1256 /* Non-negotiable (NN) features */
1257 rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0,
1258 sysctl_dccp_sequence_window);
1259 if (rc)
1260 return rc;
1261
1262 /* Server-priority (SP) features */
1263
1264 /* Advertise that short seqnos are not supported (7.6.1) */
1265 rc = __feat_register_sp(fn, DCCPF_SHORT_SEQNOS, true, true, &off, 1);
1266 if (rc)
1267 return rc;
1123 1268
1124 INIT_LIST_HEAD(&dmsk->dccpms_pending); /* XXX no longer used */ 1269 /* RFC 4340 12.1: "If a DCCP is not ECN capable, ..." */
1125 INIT_LIST_HEAD(&dmsk->dccpms_conf); /* XXX no longer used */ 1270 rc = __feat_register_sp(fn, DCCPF_ECN_INCAPABLE, true, true, &on, 1);
1271 if (rc)
1272 return rc;
1273
1274 /*
1275 * We advertise the available list of CCIDs and reorder according to
1276 * preferences, to avoid failure resulting from negotiating different
1277 * singleton values (which always leads to failure).
1278 * These settings can still (later) be overridden via sockopts.
1279 */
1280 if (ccid_get_builtin_ccids(&tx.val, &tx.len) ||
1281 ccid_get_builtin_ccids(&rx.val, &rx.len))
1282 return -ENOBUFS;
1283
1284 if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) ||
1285 !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len))
1286 goto free_ccid_lists;
1287
1288 rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len);
1289 if (rc)
1290 goto free_ccid_lists;
1291
1292 rc = __feat_register_sp(fn, DCCPF_CCID, false, false, rx.val, rx.len);
1126 1293
1127 /* Ack ratio */ 1294free_ccid_lists:
1128 rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0, 1295 kfree(tx.val);
1129 dp->dccps_l_ack_ratio); 1296 kfree(rx.val);
1130 return rc; 1297 return rc;
1131} 1298}
1132 1299
1133EXPORT_SYMBOL_GPL(dccp_feat_init);
1134
1135int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list) 1300int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list)
1136{ 1301{
1137 struct dccp_sock *dp = dccp_sk(sk); 1302 struct dccp_sock *dp = dccp_sk(sk);
@@ -1156,9 +1321,10 @@ int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list)
1156 goto activation_failed; 1321 goto activation_failed;
1157 } 1322 }
1158 if (cur->state != FEAT_STABLE) { 1323 if (cur->state != FEAT_STABLE) {
1159 DCCP_CRIT("Negotiation of %s %u failed in state %u", 1324 DCCP_CRIT("Negotiation of %s %s failed in state %s",
1160 cur->is_local ? "local" : "remote", 1325 cur->is_local ? "local" : "remote",
1161 cur->feat_num, cur->state); 1326 dccp_feat_fname(cur->feat_num),
1327 dccp_feat_sname[cur->state]);
1162 goto activation_failed; 1328 goto activation_failed;
1163 } 1329 }
1164 fvals[idx][cur->is_local] = &cur->val; 1330 fvals[idx][cur->is_local] = &cur->val;
@@ -1199,43 +1365,3 @@ activation_failed:
1199 dp->dccps_hc_rx_ackvec = NULL; 1365 dp->dccps_hc_rx_ackvec = NULL;
1200 return -1; 1366 return -1;
1201} 1367}
1202
1203#ifdef CONFIG_IP_DCCP_DEBUG
1204const char *dccp_feat_typename(const u8 type)
1205{
1206 switch(type) {
1207 case DCCPO_CHANGE_L: return("ChangeL");
1208 case DCCPO_CONFIRM_L: return("ConfirmL");
1209 case DCCPO_CHANGE_R: return("ChangeR");
1210 case DCCPO_CONFIRM_R: return("ConfirmR");
1211 /* the following case must not appear in feature negotation */
1212 default: dccp_pr_debug("unknown type %d [BUG!]\n", type);
1213 }
1214 return NULL;
1215}
1216
1217const char *dccp_feat_name(const u8 feat)
1218{
1219 static const char *feature_names[] = {
1220 [DCCPF_RESERVED] = "Reserved",
1221 [DCCPF_CCID] = "CCID",
1222 [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos",
1223 [DCCPF_SEQUENCE_WINDOW] = "Sequence Window",
1224 [DCCPF_ECN_INCAPABLE] = "ECN Incapable",
1225 [DCCPF_ACK_RATIO] = "Ack Ratio",
1226 [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector",
1227 [DCCPF_SEND_NDP_COUNT] = "Send NDP Count",
1228 [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage",
1229 [DCCPF_DATA_CHECKSUM] = "Send Data Checksum",
1230 };
1231 if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
1232 return feature_names[DCCPF_RESERVED];
1233
1234 if (feat == DCCPF_SEND_LEV_RATE)
1235 return "Send Loss Event Rate";
1236 if (feat >= DCCPF_MIN_CCID_SPECIFIC)
1237 return "CCID-specific";
1238
1239 return feature_names[feat];
1240}
1241#endif /* CONFIG_IP_DCCP_DEBUG */
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index 9b46e2a7866e..f96721619def 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -100,26 +100,21 @@ struct ccid_dependency {
100 u8 val; 100 u8 val;
101}; 101};
102 102
103#ifdef CONFIG_IP_DCCP_DEBUG 103/*
104extern const char *dccp_feat_typename(const u8 type); 104 * Sysctls to seed defaults for feature negotiation
105extern const char *dccp_feat_name(const u8 feat); 105 */
106 106extern unsigned long sysctl_dccp_sequence_window;
107static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) 107extern int sysctl_dccp_rx_ccid;
108{ 108extern int sysctl_dccp_tx_ccid;
109 dccp_pr_debug("%s(%s (%d), %d)\n", dccp_feat_typename(type),
110 dccp_feat_name(feat), feat, val);
111}
112#else
113#define dccp_feat_debug(type, feat, val)
114#endif /* CONFIG_IP_DCCP_DEBUG */
115 109
110extern int dccp_feat_init(struct sock *sk);
111extern void dccp_feat_initialise_sysctls(void);
116extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, 112extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
117 u8 const *list, u8 len); 113 u8 const *list, u8 len);
118extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val); 114extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
119extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, 115extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
120 u8 mand, u8 opt, u8 feat, u8 *val, u8 len); 116 u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
121extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); 117extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
122extern int dccp_feat_init(struct sock *sk);
123 118
124/* 119/*
125 * Encoding variable-length options and their maximum length. 120 * Encoding variable-length options and their maximum length.
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 6821ae33dd37..5ca49cec95f5 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -42,11 +42,6 @@ struct inet_timewait_death_row dccp_death_row = {
42 42
43EXPORT_SYMBOL_GPL(dccp_death_row); 43EXPORT_SYMBOL_GPL(dccp_death_row);
44 44
45void dccp_minisock_init(struct dccp_minisock *dmsk)
46{
47 dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window;
48}
49
50void dccp_time_wait(struct sock *sk, int state, int timeo) 45void dccp_time_wait(struct sock *sk, int state, int timeo)
51{ 46{
52 struct inet_timewait_sock *tw = NULL; 47 struct inet_timewait_sock *tw = NULL;
@@ -110,7 +105,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
110 struct dccp_request_sock *dreq = dccp_rsk(req); 105 struct dccp_request_sock *dreq = dccp_rsk(req);
111 struct inet_connection_sock *newicsk = inet_csk(newsk); 106 struct inet_connection_sock *newicsk = inet_csk(newsk);
112 struct dccp_sock *newdp = dccp_sk(newsk); 107 struct dccp_sock *newdp = dccp_sk(newsk);
113 struct dccp_minisock *newdmsk = dccp_msk(newsk);
114 108
115 newdp->dccps_role = DCCP_ROLE_SERVER; 109 newdp->dccps_role = DCCP_ROLE_SERVER;
116 newdp->dccps_hc_rx_ackvec = NULL; 110 newdp->dccps_hc_rx_ackvec = NULL;
@@ -128,10 +122,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
128 * Initialize S.GAR := S.ISS 122 * Initialize S.GAR := S.ISS
129 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies 123 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
130 */ 124 */
131
132 /* See dccp_v4_conn_request */
133 newdmsk->dccpms_sequence_window = req->rcv_wnd;
134
135 newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss; 125 newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss;
136 dccp_update_gss(newsk, dreq->dreq_iss); 126 dccp_update_gss(newsk, dreq->dreq_iss);
137 127
@@ -290,7 +280,6 @@ int dccp_reqsk_init(struct request_sock *req,
290 inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; 280 inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
291 inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport; 281 inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport;
292 inet_rsk(req)->acked = 0; 282 inet_rsk(req)->acked = 0;
293 req->rcv_wnd = sysctl_dccp_feat_sequence_window;
294 dreq->dreq_timestamp_echo = 0; 283 dreq->dreq_timestamp_echo = 0;
295 284
296 /* inherit feature negotiation options from listening socket */ 285 /* inherit feature negotiation options from listening socket */
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 7b1165c21f51..1b08cae9c65b 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -23,10 +23,6 @@
23#include "dccp.h" 23#include "dccp.h"
24#include "feat.h" 24#include "feat.h"
25 25
26int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW;
27int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID;
28int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID;
29
30u64 dccp_decode_value_var(const u8 *bf, const u8 len) 26u64 dccp_decode_value_var(const u8 *bf, const u8 len)
31{ 27{
32 u64 value = 0; 28 u64 value = 0;
@@ -502,10 +498,6 @@ int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat,
502 *to++ = *val; 498 *to++ = *val;
503 if (len) 499 if (len)
504 memcpy(to, val, len); 500 memcpy(to, val, len);
505
506 dccp_pr_debug("%s(%s (%d), ...), length %d\n",
507 dccp_feat_typename(type),
508 dccp_feat_name(feat), feat, len);
509 return 0; 501 return 0;
510} 502}
511 503
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 22a618af4893..36bcc00654d3 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -161,21 +161,27 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
161 struct inet_connection_sock *icsk = inet_csk(sk); 161 struct inet_connection_sock *icsk = inet_csk(sk);
162 struct dccp_sock *dp = dccp_sk(sk); 162 struct dccp_sock *dp = dccp_sk(sk);
163 u32 ccmps = dccp_determine_ccmps(dp); 163 u32 ccmps = dccp_determine_ccmps(dp);
164 int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; 164 u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu;
165 165
166 /* Account for header lengths and IPv4/v6 option overhead */ 166 /* Account for header lengths and IPv4/v6 option overhead */
167 cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + 167 cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len +
168 sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); 168 sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext));
169 169
170 /* 170 /*
171 * FIXME: this should come from the CCID infrastructure, where, say, 171 * Leave enough headroom for common DCCP header options.
172 * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets 172 * This only considers options which may appear on DCCP-Data packets, as
173 * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED 173 * per table 3 in RFC 4340, 5.8. When running out of space for other
174 * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to 174 * options (eg. Ack Vector which can take up to 255 bytes), it is better
175 * make it a multiple of 4 175 * to schedule a separate Ack. Thus we leave headroom for the following:
176 * - 1 byte for Slow Receiver (11.6)
177 * - 6 bytes for Timestamp (13.1)
178 * - 10 bytes for Timestamp Echo (13.3)
179 * - 8 bytes for NDP count (7.7, when activated)
180 * - 6 bytes for Data Checksum (9.3)
181 * - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled)
176 */ 182 */
177 183 cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 +
178 cur_mps -= roundup(5 + 6 + 10 + 6 + 6 + 6, 4); 184 (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4);
179 185
180 /* And store cached results */ 186 /* And store cached results */
181 icsk->icsk_pmtu_cookie = pmtu; 187 icsk->icsk_pmtu_cookie = pmtu;
@@ -270,7 +276,20 @@ void dccp_write_xmit(struct sock *sk, int block)
270 const int len = skb->len; 276 const int len = skb->len;
271 277
272 if (sk->sk_state == DCCP_PARTOPEN) { 278 if (sk->sk_state == DCCP_PARTOPEN) {
273 /* See 8.1.5. Handshake Completion */ 279 const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD;
280 /*
281 * See 8.1.5 - Handshake Completion.
282 *
283 * For robustness we resend Confirm options until the client has
284 * entered OPEN. During the initial feature negotiation, the MPS
285 * is smaller than usual, reduced by the Change/Confirm options.
286 */
287 if (!list_empty(&dp->dccps_featneg) && len > cur_mps) {
288 DCCP_WARN("Payload too large (%d) for featneg.\n", len);
289 dccp_send_ack(sk);
290 dccp_feat_list_purge(&dp->dccps_featneg);
291 }
292
274 inet_csk_schedule_ack(sk); 293 inet_csk_schedule_ack(sk);
275 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 294 inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
276 inet_csk(sk)->icsk_rto, 295 inet_csk(sk)->icsk_rto,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 945b4d5d23b3..314a1b5c033c 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -174,8 +174,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
174 struct dccp_sock *dp = dccp_sk(sk); 174 struct dccp_sock *dp = dccp_sk(sk);
175 struct inet_connection_sock *icsk = inet_csk(sk); 175 struct inet_connection_sock *icsk = inet_csk(sk);
176 176
177 dccp_minisock_init(&dp->dccps_minisock);
178
179 icsk->icsk_rto = DCCP_TIMEOUT_INIT; 177 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
180 icsk->icsk_syn_retries = sysctl_dccp_request_retries; 178 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
181 sk->sk_state = DCCP_CLOSED; 179 sk->sk_state = DCCP_CLOSED;
diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c
index 018e210875e1..a5a1856234e7 100644
--- a/net/dccp/sysctl.c
+++ b/net/dccp/sysctl.c
@@ -18,55 +18,72 @@
18#error This file should not be compiled without CONFIG_SYSCTL defined 18#error This file should not be compiled without CONFIG_SYSCTL defined
19#endif 19#endif
20 20
21/* Boundary values */
22static int zero = 0,
23 u8_max = 0xFF;
24static unsigned long seqw_min = 32;
25
21static struct ctl_table dccp_default_table[] = { 26static struct ctl_table dccp_default_table[] = {
22 { 27 {
23 .procname = "seq_window", 28 .procname = "seq_window",
24 .data = &sysctl_dccp_feat_sequence_window, 29 .data = &sysctl_dccp_sequence_window,
25 .maxlen = sizeof(sysctl_dccp_feat_sequence_window), 30 .maxlen = sizeof(sysctl_dccp_sequence_window),
26 .mode = 0644, 31 .mode = 0644,
27 .proc_handler = proc_dointvec, 32 .proc_handler = proc_doulongvec_minmax,
33 .extra1 = &seqw_min, /* RFC 4340, 7.5.2 */
28 }, 34 },
29 { 35 {
30 .procname = "rx_ccid", 36 .procname = "rx_ccid",
31 .data = &sysctl_dccp_feat_rx_ccid, 37 .data = &sysctl_dccp_rx_ccid,
32 .maxlen = sizeof(sysctl_dccp_feat_rx_ccid), 38 .maxlen = sizeof(sysctl_dccp_rx_ccid),
33 .mode = 0644, 39 .mode = 0644,
34 .proc_handler = proc_dointvec, 40 .proc_handler = proc_dointvec_minmax,
41 .extra1 = &zero,
42 .extra2 = &u8_max, /* RFC 4340, 10. */
35 }, 43 },
36 { 44 {
37 .procname = "tx_ccid", 45 .procname = "tx_ccid",
38 .data = &sysctl_dccp_feat_tx_ccid, 46 .data = &sysctl_dccp_tx_ccid,
39 .maxlen = sizeof(sysctl_dccp_feat_tx_ccid), 47 .maxlen = sizeof(sysctl_dccp_tx_ccid),
40 .mode = 0644, 48 .mode = 0644,
41 .proc_handler = proc_dointvec, 49 .proc_handler = proc_dointvec_minmax,
50 .extra1 = &zero,
51 .extra2 = &u8_max, /* RFC 4340, 10. */
42 }, 52 },
43 { 53 {
44 .procname = "request_retries", 54 .procname = "request_retries",
45 .data = &sysctl_dccp_request_retries, 55 .data = &sysctl_dccp_request_retries,
46 .maxlen = sizeof(sysctl_dccp_request_retries), 56 .maxlen = sizeof(sysctl_dccp_request_retries),
47 .mode = 0644, 57 .mode = 0644,
48 .proc_handler = proc_dointvec, 58 .proc_handler = proc_dointvec_minmax,
59 .extra1 = &zero,
60 .extra2 = &u8_max,
49 }, 61 },
50 { 62 {
51 .procname = "retries1", 63 .procname = "retries1",
52 .data = &sysctl_dccp_retries1, 64 .data = &sysctl_dccp_retries1,
53 .maxlen = sizeof(sysctl_dccp_retries1), 65 .maxlen = sizeof(sysctl_dccp_retries1),
54 .mode = 0644, 66 .mode = 0644,
55 .proc_handler = proc_dointvec, 67 .proc_handler = proc_dointvec_minmax,
68 .extra1 = &zero,
69 .extra2 = &u8_max,
56 }, 70 },
57 { 71 {
58 .procname = "retries2", 72 .procname = "retries2",
59 .data = &sysctl_dccp_retries2, 73 .data = &sysctl_dccp_retries2,
60 .maxlen = sizeof(sysctl_dccp_retries2), 74 .maxlen = sizeof(sysctl_dccp_retries2),
61 .mode = 0644, 75 .mode = 0644,
62 .proc_handler = proc_dointvec, 76 .proc_handler = proc_dointvec_minmax,
77 .extra1 = &zero,
78 .extra2 = &u8_max,
63 }, 79 },
64 { 80 {
65 .procname = "tx_qlen", 81 .procname = "tx_qlen",
66 .data = &sysctl_dccp_tx_qlen, 82 .data = &sysctl_dccp_tx_qlen,
67 .maxlen = sizeof(sysctl_dccp_tx_qlen), 83 .maxlen = sizeof(sysctl_dccp_tx_qlen),
68 .mode = 0644, 84 .mode = 0644,
69 .proc_handler = proc_dointvec, 85 .proc_handler = proc_dointvec_minmax,
86 .extra1 = &zero,
70 }, 87 },
71 { 88 {
72 .procname = "sync_ratelimit", 89 .procname = "sync_ratelimit",
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index cf0e18499297..9647d911f916 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1246,11 +1246,12 @@ static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1246 1246
1247 case TIOCINQ: 1247 case TIOCINQ:
1248 lock_sock(sk); 1248 lock_sock(sk);
1249 if ((skb = skb_peek(&scp->other_receive_queue)) != NULL) { 1249 skb = skb_peek(&scp->other_receive_queue);
1250 if (skb) {
1250 amount = skb->len; 1251 amount = skb->len;
1251 } else { 1252 } else {
1252 struct sk_buff *skb = sk->sk_receive_queue.next; 1253 skb = sk->sk_receive_queue.next;
1253 for(;;) { 1254 for (;;) {
1254 if (skb == 1255 if (skb ==
1255 (struct sk_buff *)&sk->sk_receive_queue) 1256 (struct sk_buff *)&sk->sk_receive_queue)
1256 break; 1257 break;
@@ -1579,16 +1580,16 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us
1579 default: 1580 default:
1580#ifdef CONFIG_NETFILTER 1581#ifdef CONFIG_NETFILTER
1581 { 1582 {
1582 int val, len; 1583 int ret, len;
1583 1584
1584 if(get_user(len, optlen)) 1585 if(get_user(len, optlen))
1585 return -EFAULT; 1586 return -EFAULT;
1586 1587
1587 val = nf_getsockopt(sk, PF_DECnet, optname, 1588 ret = nf_getsockopt(sk, PF_DECnet, optname,
1588 optval, &len); 1589 optval, &len);
1589 if (val >= 0) 1590 if (ret >= 0)
1590 val = put_user(len, optlen); 1591 ret = put_user(len, optlen);
1591 return val; 1592 return ret;
1592 } 1593 }
1593#endif 1594#endif
1594 case DSO_STREAM: 1595 case DSO_STREAM:
@@ -2071,8 +2072,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
2071 } 2072 }
2072out: 2073out:
2073 2074
2074 if (skb) 2075 kfree_skb(skb);
2075 kfree_skb(skb);
2076 2076
2077 release_sock(sk); 2077 release_sock(sk);
2078 2078
@@ -2112,9 +2112,8 @@ static struct notifier_block dn_dev_notifier = {
2112 2112
2113extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); 2113extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
2114 2114
2115static struct packet_type dn_dix_packet_type = { 2115static struct packet_type dn_dix_packet_type __read_mostly = {
2116 .type = __constant_htons(ETH_P_DNA_RT), 2116 .type = cpu_to_be16(ETH_P_DNA_RT),
2117 .dev = NULL, /* All devices */
2118 .func = dn_route_rcv, 2117 .func = dn_route_rcv,
2119}; 2118};
2120 2119
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index daf2b98b15fe..1c6a5bb6f0c8 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -684,7 +684,6 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
684 return -ENODEV; 684 return -ENODEV;
685 685
686 if ((dn_db = dev->dn_ptr) == NULL) { 686 if ((dn_db = dev->dn_ptr) == NULL) {
687 int err;
688 dn_db = dn_dev_create(dev, &err); 687 dn_db = dn_dev_create(dev, &err);
689 if (!dn_db) 688 if (!dn_db)
690 return err; 689 return err;
@@ -769,7 +768,8 @@ static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa)
769 kfree_skb(skb); 768 kfree_skb(skb);
770 goto errout; 769 goto errout;
771 } 770 }
772 err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); 771 rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL);
772 return;
773errout: 773errout:
774 if (err < 0) 774 if (err < 0)
775 rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err); 775 rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err);
@@ -1322,6 +1322,7 @@ static inline int is_dn_dev(struct net_device *dev)
1322} 1322}
1323 1323
1324static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) 1324static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos)
1325 __acquires(&dev_base_lock)
1325{ 1326{
1326 int i; 1327 int i;
1327 struct net_device *dev; 1328 struct net_device *dev;
@@ -1364,6 +1365,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1364} 1365}
1365 1366
1366static void dn_dev_seq_stop(struct seq_file *seq, void *v) 1367static void dn_dev_seq_stop(struct seq_file *seq, void *v)
1368 __releases(&dev_base_lock)
1367{ 1369{
1368 read_unlock(&dev_base_lock); 1370 read_unlock(&dev_base_lock);
1369} 1371}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index c754670b7fca..0cc4394117df 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -124,7 +124,7 @@ int decnet_dst_gc_interval = 2;
124 124
125static struct dst_ops dn_dst_ops = { 125static struct dst_ops dn_dst_ops = {
126 .family = PF_DECnet, 126 .family = PF_DECnet,
127 .protocol = __constant_htons(ETH_P_DNA_RT), 127 .protocol = cpu_to_be16(ETH_P_DNA_RT),
128 .gc_thresh = 128, 128 .gc_thresh = 128,
129 .gc = dn_dst_gc, 129 .gc = dn_dst_gc,
130 .check = dn_dst_check, 130 .check = dn_dst_check,
@@ -380,7 +380,6 @@ static int dn_return_short(struct sk_buff *skb)
380 unsigned char *ptr; 380 unsigned char *ptr;
381 __le16 *src; 381 __le16 *src;
382 __le16 *dst; 382 __le16 *dst;
383 __le16 tmp;
384 383
385 /* Add back headers */ 384 /* Add back headers */
386 skb_push(skb, skb->data - skb_network_header(skb)); 385 skb_push(skb, skb->data - skb_network_header(skb));
@@ -399,10 +398,7 @@ static int dn_return_short(struct sk_buff *skb)
399 ptr += 2; 398 ptr += 2;
400 *ptr = 0; /* Zero hop count */ 399 *ptr = 0; /* Zero hop count */
401 400
402 /* Swap source and destination */ 401 swap(*src, *dst);
403 tmp = *src;
404 *src = *dst;
405 *dst = tmp;
406 402
407 skb->pkt_type = PACKET_OUTGOING; 403 skb->pkt_type = PACKET_OUTGOING;
408 dn_rt_finish_output(skb, NULL, NULL); 404 dn_rt_finish_output(skb, NULL, NULL);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 69ad9280c693..67054b0d550f 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -375,7 +375,8 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id,
375 kfree_skb(skb); 375 kfree_skb(skb);
376 goto errout; 376 goto errout;
377 } 377 }
378 err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); 378 rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL);
379 return;
379errout: 380errout:
380 if (err < 0) 381 if (err < 0)
381 rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err); 382 rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 965397af9a80..5bcd592ae6dd 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -179,7 +179,7 @@ static int dn_node_address_handler(ctl_table *table, int write,
179 } 179 }
180 180
181 if (write) { 181 if (write) {
182 int len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1); 182 len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1);
183 183
184 if (copy_from_user(addr, buffer, len)) 184 if (copy_from_user(addr, buffer, len))
185 return -EFAULT; 185 return -EFAULT;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 49211b35725b..c51b55400dc5 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -41,13 +41,13 @@ config NET_DSA_MV88E6XXX_NEED_PPU
41 default n 41 default n
42 42
43config NET_DSA_MV88E6131 43config NET_DSA_MV88E6131
44 bool "Marvell 88E6131 ethernet switch chip support" 44 bool "Marvell 88E6095/6095F/6131 ethernet switch chip support"
45 select NET_DSA_MV88E6XXX 45 select NET_DSA_MV88E6XXX
46 select NET_DSA_MV88E6XXX_NEED_PPU 46 select NET_DSA_MV88E6XXX_NEED_PPU
47 select NET_DSA_TAG_DSA 47 select NET_DSA_TAG_DSA
48 ---help--- 48 ---help---
49 This enables support for the Marvell 88E6131 ethernet switch 49 This enables support for the Marvell 88E6095/6095F/6131
50 chip. 50 ethernet switch chips.
51 51
52config NET_DSA_MV88E6123_61_65 52config NET_DSA_MV88E6123_61_65
53 bool "Marvell 88E6123/6161/6165 ethernet switch chip support" 53 bool "Marvell 88E6123/6161/6165 ethernet switch chip support"
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 33e99462023a..71489f69a42c 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * net/dsa/dsa.c - Hardware switch handling 2 * net/dsa/dsa.c - Hardware switch handling
3 * Copyright (c) 2008 Marvell Semiconductor 3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -67,12 +67,13 @@ dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name)
67 67
68/* basic switch operations **************************************************/ 68/* basic switch operations **************************************************/
69static struct dsa_switch * 69static struct dsa_switch *
70dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, 70dsa_switch_setup(struct dsa_switch_tree *dst, int index,
71 struct mii_bus *bus, struct net_device *dev) 71 struct device *parent, struct mii_bus *bus)
72{ 72{
73 struct dsa_chip_data *pd = dst->pd->chip + index;
74 struct dsa_switch_driver *drv;
73 struct dsa_switch *ds; 75 struct dsa_switch *ds;
74 int ret; 76 int ret;
75 struct dsa_switch_driver *drv;
76 char *name; 77 char *name;
77 int i; 78 int i;
78 79
@@ -81,11 +82,12 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd,
81 */ 82 */
82 drv = dsa_switch_probe(bus, pd->sw_addr, &name); 83 drv = dsa_switch_probe(bus, pd->sw_addr, &name);
83 if (drv == NULL) { 84 if (drv == NULL) {
84 printk(KERN_ERR "%s: could not detect attached switch\n", 85 printk(KERN_ERR "%s[%d]: could not detect attached switch\n",
85 dev->name); 86 dst->master_netdev->name, index);
86 return ERR_PTR(-EINVAL); 87 return ERR_PTR(-EINVAL);
87 } 88 }
88 printk(KERN_INFO "%s: detected a %s switch\n", dev->name, name); 89 printk(KERN_INFO "%s[%d]: detected a %s switch\n",
90 dst->master_netdev->name, index, name);
89 91
90 92
91 /* 93 /*
@@ -95,18 +97,16 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd,
95 if (ds == NULL) 97 if (ds == NULL)
96 return ERR_PTR(-ENOMEM); 98 return ERR_PTR(-ENOMEM);
97 99
98 ds->pd = pd; 100 ds->dst = dst;
99 ds->master_netdev = dev; 101 ds->index = index;
100 ds->master_mii_bus = bus; 102 ds->pd = dst->pd->chip + index;
101
102 ds->drv = drv; 103 ds->drv = drv;
103 ds->tag_protocol = drv->tag_protocol; 104 ds->master_mii_bus = bus;
104 105
105 106
106 /* 107 /*
107 * Validate supplied switch configuration. 108 * Validate supplied switch configuration.
108 */ 109 */
109 ds->cpu_port = -1;
110 for (i = 0; i < DSA_MAX_PORTS; i++) { 110 for (i = 0; i < DSA_MAX_PORTS; i++) {
111 char *name; 111 char *name;
112 112
@@ -115,32 +115,28 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd,
115 continue; 115 continue;
116 116
117 if (!strcmp(name, "cpu")) { 117 if (!strcmp(name, "cpu")) {
118 if (ds->cpu_port != -1) { 118 if (dst->cpu_switch != -1) {
119 printk(KERN_ERR "multiple cpu ports?!\n"); 119 printk(KERN_ERR "multiple cpu ports?!\n");
120 ret = -EINVAL; 120 ret = -EINVAL;
121 goto out; 121 goto out;
122 } 122 }
123 ds->cpu_port = i; 123 dst->cpu_switch = index;
124 dst->cpu_port = i;
125 } else if (!strcmp(name, "dsa")) {
126 ds->dsa_port_mask |= 1 << i;
124 } else { 127 } else {
125 ds->valid_port_mask |= 1 << i; 128 ds->phys_port_mask |= 1 << i;
126 } 129 }
127 } 130 }
128 131
129 if (ds->cpu_port == -1) {
130 printk(KERN_ERR "no cpu port?!\n");
131 ret = -EINVAL;
132 goto out;
133 }
134
135 132
136 /* 133 /*
137 * If we use a tagging format that doesn't have an ethertype 134 * If the CPU connects to this switch, set the switch tree
138 * field, make sure that all packets from this point on get 135 * tagging protocol to the preferred tagging format of this
139 * sent to the tag format's receive function. (Which will 136 * switch.
140 * discard received packets until we set ds->ports[] below.)
141 */ 137 */
142 wmb(); 138 if (ds->dst->cpu_switch == index)
143 dev->dsa_ptr = (void *)ds; 139 ds->dst->tag_protocol = drv->tag_protocol;
144 140
145 141
146 /* 142 /*
@@ -150,7 +146,7 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd,
150 if (ret < 0) 146 if (ret < 0)
151 goto out; 147 goto out;
152 148
153 ret = drv->set_addr(ds, dev->dev_addr); 149 ret = drv->set_addr(ds, dst->master_netdev->dev_addr);
154 if (ret < 0) 150 if (ret < 0)
155 goto out; 151 goto out;
156 152
@@ -169,18 +165,18 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd,
169 /* 165 /*
170 * Create network devices for physical switch ports. 166 * Create network devices for physical switch ports.
171 */ 167 */
172 wmb();
173 for (i = 0; i < DSA_MAX_PORTS; i++) { 168 for (i = 0; i < DSA_MAX_PORTS; i++) {
174 struct net_device *slave_dev; 169 struct net_device *slave_dev;
175 170
176 if (!(ds->valid_port_mask & (1 << i))) 171 if (!(ds->phys_port_mask & (1 << i)))
177 continue; 172 continue;
178 173
179 slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]); 174 slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]);
180 if (slave_dev == NULL) { 175 if (slave_dev == NULL) {
181 printk(KERN_ERR "%s: can't create dsa slave " 176 printk(KERN_ERR "%s[%d]: can't create dsa "
182 "device for port %d(%s)\n", 177 "slave device for port %d(%s)\n",
183 dev->name, i, pd->port_names[i]); 178 dst->master_netdev->name,
179 index, i, pd->port_names[i]);
184 continue; 180 continue;
185 } 181 }
186 182
@@ -192,7 +188,6 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd,
192out_free: 188out_free:
193 mdiobus_free(ds->slave_mii_bus); 189 mdiobus_free(ds->slave_mii_bus);
194out: 190out:
195 dev->dsa_ptr = NULL;
196 kfree(ds); 191 kfree(ds);
197 return ERR_PTR(ret); 192 return ERR_PTR(ret);
198} 193}
@@ -212,35 +207,42 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
212 */ 207 */
213bool dsa_uses_dsa_tags(void *dsa_ptr) 208bool dsa_uses_dsa_tags(void *dsa_ptr)
214{ 209{
215 struct dsa_switch *ds = dsa_ptr; 210 struct dsa_switch_tree *dst = dsa_ptr;
216 211
217 return !!(ds->tag_protocol == htons(ETH_P_DSA)); 212 return !!(dst->tag_protocol == htons(ETH_P_DSA));
218} 213}
219 214
220bool dsa_uses_trailer_tags(void *dsa_ptr) 215bool dsa_uses_trailer_tags(void *dsa_ptr)
221{ 216{
222 struct dsa_switch *ds = dsa_ptr; 217 struct dsa_switch_tree *dst = dsa_ptr;
223 218
224 return !!(ds->tag_protocol == htons(ETH_P_TRAILER)); 219 return !!(dst->tag_protocol == htons(ETH_P_TRAILER));
225} 220}
226 221
227 222
228/* link polling *************************************************************/ 223/* link polling *************************************************************/
229static void dsa_link_poll_work(struct work_struct *ugly) 224static void dsa_link_poll_work(struct work_struct *ugly)
230{ 225{
231 struct dsa_switch *ds; 226 struct dsa_switch_tree *dst;
227 int i;
228
229 dst = container_of(ugly, struct dsa_switch_tree, link_poll_work);
232 230
233 ds = container_of(ugly, struct dsa_switch, link_poll_work); 231 for (i = 0; i < dst->pd->nr_chips; i++) {
232 struct dsa_switch *ds = dst->ds[i];
234 233
235 ds->drv->poll_link(ds); 234 if (ds != NULL && ds->drv->poll_link != NULL)
236 mod_timer(&ds->link_poll_timer, round_jiffies(jiffies + HZ)); 235 ds->drv->poll_link(ds);
236 }
237
238 mod_timer(&dst->link_poll_timer, round_jiffies(jiffies + HZ));
237} 239}
238 240
239static void dsa_link_poll_timer(unsigned long _ds) 241static void dsa_link_poll_timer(unsigned long _dst)
240{ 242{
241 struct dsa_switch *ds = (void *)_ds; 243 struct dsa_switch_tree *dst = (void *)_dst;
242 244
243 schedule_work(&ds->link_poll_work); 245 schedule_work(&dst->link_poll_work);
244} 246}
245 247
246 248
@@ -303,18 +305,14 @@ static int dsa_probe(struct platform_device *pdev)
303 static int dsa_version_printed; 305 static int dsa_version_printed;
304 struct dsa_platform_data *pd = pdev->dev.platform_data; 306 struct dsa_platform_data *pd = pdev->dev.platform_data;
305 struct net_device *dev; 307 struct net_device *dev;
306 struct mii_bus *bus; 308 struct dsa_switch_tree *dst;
307 struct dsa_switch *ds; 309 int i;
308 310
309 if (!dsa_version_printed++) 311 if (!dsa_version_printed++)
310 printk(KERN_NOTICE "Distributed Switch Architecture " 312 printk(KERN_NOTICE "Distributed Switch Architecture "
311 "driver version %s\n", dsa_driver_version); 313 "driver version %s\n", dsa_driver_version);
312 314
313 if (pd == NULL || pd->mii_bus == NULL || pd->netdev == NULL) 315 if (pd == NULL || pd->netdev == NULL)
314 return -EINVAL;
315
316 bus = dev_to_mii_bus(pd->mii_bus);
317 if (bus == NULL)
318 return -EINVAL; 316 return -EINVAL;
319 317
320 dev = dev_to_net_device(pd->netdev); 318 dev = dev_to_net_device(pd->netdev);
@@ -326,36 +324,79 @@ static int dsa_probe(struct platform_device *pdev)
326 return -EEXIST; 324 return -EEXIST;
327 } 325 }
328 326
329 ds = dsa_switch_setup(&pdev->dev, pd, bus, dev); 327 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
330 if (IS_ERR(ds)) { 328 if (dst == NULL) {
331 dev_put(dev); 329 dev_put(dev);
332 return PTR_ERR(ds); 330 return -ENOMEM;
333 } 331 }
334 332
335 if (ds->drv->poll_link != NULL) { 333 platform_set_drvdata(pdev, dst);
336 INIT_WORK(&ds->link_poll_work, dsa_link_poll_work); 334
337 init_timer(&ds->link_poll_timer); 335 dst->pd = pd;
338 ds->link_poll_timer.data = (unsigned long)ds; 336 dst->master_netdev = dev;
339 ds->link_poll_timer.function = dsa_link_poll_timer; 337 dst->cpu_switch = -1;
340 ds->link_poll_timer.expires = round_jiffies(jiffies + HZ); 338 dst->cpu_port = -1;
341 add_timer(&ds->link_poll_timer); 339
340 for (i = 0; i < pd->nr_chips; i++) {
341 struct mii_bus *bus;
342 struct dsa_switch *ds;
343
344 bus = dev_to_mii_bus(pd->chip[i].mii_bus);
345 if (bus == NULL) {
346 printk(KERN_ERR "%s[%d]: no mii bus found for "
347 "dsa switch\n", dev->name, i);
348 continue;
349 }
350
351 ds = dsa_switch_setup(dst, i, &pdev->dev, bus);
352 if (IS_ERR(ds)) {
353 printk(KERN_ERR "%s[%d]: couldn't create dsa switch "
354 "instance (error %ld)\n", dev->name, i,
355 PTR_ERR(ds));
356 continue;
357 }
358
359 dst->ds[i] = ds;
360 if (ds->drv->poll_link != NULL)
361 dst->link_poll_needed = 1;
342 } 362 }
343 363
344 platform_set_drvdata(pdev, ds); 364 /*
365 * If we use a tagging format that doesn't have an ethertype
366 * field, make sure that all packets from this point on get
367 * sent to the tag format's receive function.
368 */
369 wmb();
370 dev->dsa_ptr = (void *)dst;
371
372 if (dst->link_poll_needed) {
373 INIT_WORK(&dst->link_poll_work, dsa_link_poll_work);
374 init_timer(&dst->link_poll_timer);
375 dst->link_poll_timer.data = (unsigned long)dst;
376 dst->link_poll_timer.function = dsa_link_poll_timer;
377 dst->link_poll_timer.expires = round_jiffies(jiffies + HZ);
378 add_timer(&dst->link_poll_timer);
379 }
345 380
346 return 0; 381 return 0;
347} 382}
348 383
349static int dsa_remove(struct platform_device *pdev) 384static int dsa_remove(struct platform_device *pdev)
350{ 385{
351 struct dsa_switch *ds = platform_get_drvdata(pdev); 386 struct dsa_switch_tree *dst = platform_get_drvdata(pdev);
387 int i;
352 388
353 if (ds->drv->poll_link != NULL) 389 if (dst->link_poll_needed)
354 del_timer_sync(&ds->link_poll_timer); 390 del_timer_sync(&dst->link_poll_timer);
355 391
356 flush_scheduled_work(); 392 flush_scheduled_work();
357 393
358 dsa_switch_destroy(ds); 394 for (i = 0; i < dst->pd->nr_chips; i++) {
395 struct dsa_switch *ds = dst->ds[i];
396
397 if (ds != NULL)
398 dsa_switch_destroy(ds);
399 }
359 400
360 return 0; 401 return 0;
361} 402}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 7063378a1ebf..41055f33d28a 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -1,6 +1,6 @@
1/* 1/*
2 * net/dsa/dsa_priv.h - Hardware switch handling 2 * net/dsa/dsa_priv.h - Hardware switch handling
3 * Copyright (c) 2008 Marvell Semiconductor 3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -19,42 +19,107 @@
19 19
20struct dsa_switch { 20struct dsa_switch {
21 /* 21 /*
22 * Configuration data for the platform device that owns 22 * Parent switch tree, and switch index.
23 * this dsa switch instance.
24 */ 23 */
25 struct dsa_platform_data *pd; 24 struct dsa_switch_tree *dst;
25 int index;
26 26
27 /* 27 /*
28 * References to network device and mii bus to use. 28 * Configuration data for this switch.
29 */ 29 */
30 struct net_device *master_netdev; 30 struct dsa_chip_data *pd;
31 struct mii_bus *master_mii_bus;
32 31
33 /* 32 /*
34 * The used switch driver and frame tagging type. 33 * The used switch driver.
35 */ 34 */
36 struct dsa_switch_driver *drv; 35 struct dsa_switch_driver *drv;
37 __be16 tag_protocol; 36
37 /*
38 * Reference to mii bus to use.
39 */
40 struct mii_bus *master_mii_bus;
38 41
39 /* 42 /*
40 * Slave mii_bus and devices for the individual ports. 43 * Slave mii_bus and devices for the individual ports.
41 */ 44 */
42 int cpu_port; 45 u32 dsa_port_mask;
43 u32 valid_port_mask; 46 u32 phys_port_mask;
44 struct mii_bus *slave_mii_bus; 47 struct mii_bus *slave_mii_bus;
45 struct net_device *ports[DSA_MAX_PORTS]; 48 struct net_device *ports[DSA_MAX_PORTS];
49};
50
51struct dsa_switch_tree {
52 /*
53 * Configuration data for the platform device that owns
54 * this dsa switch tree instance.
55 */
56 struct dsa_platform_data *pd;
57
58 /*
59 * Reference to network device to use, and which tagging
60 * protocol to use.
61 */
62 struct net_device *master_netdev;
63 __be16 tag_protocol;
64
65 /*
66 * The switch and port to which the CPU is attached.
67 */
68 s8 cpu_switch;
69 s8 cpu_port;
46 70
47 /* 71 /*
48 * Link state polling. 72 * Link state polling.
49 */ 73 */
50 struct work_struct link_poll_work; 74 int link_poll_needed;
51 struct timer_list link_poll_timer; 75 struct work_struct link_poll_work;
76 struct timer_list link_poll_timer;
77
78 /*
79 * Data for the individual switch chips.
80 */
81 struct dsa_switch *ds[DSA_MAX_SWITCHES];
52}; 82};
53 83
84static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
85{
86 return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port);
87}
88
89static inline u8 dsa_upstream_port(struct dsa_switch *ds)
90{
91 struct dsa_switch_tree *dst = ds->dst;
92
93 /*
94 * If this is the root switch (i.e. the switch that connects
95 * to the CPU), return the cpu port number on this switch.
96 * Else return the (DSA) port number that connects to the
97 * switch that is one hop closer to the cpu.
98 */
99 if (dst->cpu_switch == ds->index)
100 return dst->cpu_port;
101 else
102 return ds->pd->rtable[dst->cpu_switch];
103}
104
54struct dsa_slave_priv { 105struct dsa_slave_priv {
106 /*
107 * The linux network interface corresponding to this
108 * switch port.
109 */
55 struct net_device *dev; 110 struct net_device *dev;
111
112 /*
113 * Which switch this port is a part of, and the port index
114 * for this port.
115 */
56 struct dsa_switch *parent; 116 struct dsa_switch *parent;
57 int port; 117 u8 port;
118
119 /*
120 * The phylib phy_device pointer for the PHY connected
121 * to this port.
122 */
58 struct phy_device *phy; 123 struct phy_device *phy;
59}; 124};
60 125
diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c
index 85081ae9fe89..83277f463af7 100644
--- a/net/dsa/mv88e6060.c
+++ b/net/dsa/mv88e6060.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * net/dsa/mv88e6060.c - Driver for Marvell 88e6060 switch chips 2 * net/dsa/mv88e6060.c - Driver for Marvell 88e6060 switch chips
3 * Copyright (c) 2008 Marvell Semiconductor 3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -81,7 +81,7 @@ static int mv88e6060_switch_reset(struct dsa_switch *ds)
81 /* 81 /*
82 * Reset the switch. 82 * Reset the switch.
83 */ 83 */
84 REG_WRITE(REG_GLOBAL, 0x0A, 0xa130); 84 REG_WRITE(REG_GLOBAL, 0x0a, 0xa130);
85 85
86 /* 86 /*
87 * Wait up to one second for reset to complete. 87 * Wait up to one second for reset to complete.
@@ -128,7 +128,7 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p)
128 * state to Forwarding. Additionally, if this is the CPU 128 * state to Forwarding. Additionally, if this is the CPU
129 * port, enable Ingress and Egress Trailer tagging mode. 129 * port, enable Ingress and Egress Trailer tagging mode.
130 */ 130 */
131 REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x4103 : 0x0003); 131 REG_WRITE(addr, 0x04, dsa_is_cpu_port(ds, p) ? 0x4103 : 0x0003);
132 132
133 /* 133 /*
134 * Port based VLAN map: give each port its own address 134 * Port based VLAN map: give each port its own address
@@ -138,9 +138,9 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p)
138 */ 138 */
139 REG_WRITE(addr, 0x06, 139 REG_WRITE(addr, 0x06,
140 ((p & 0xf) << 12) | 140 ((p & 0xf) << 12) |
141 ((p == ds->cpu_port) ? 141 (dsa_is_cpu_port(ds, p) ?
142 ds->valid_port_mask : 142 ds->phys_port_mask :
143 (1 << ds->cpu_port))); 143 (1 << ds->dst->cpu_port)));
144 144
145 /* 145 /*
146 * Port Association Vector: when learning source addresses 146 * Port Association Vector: when learning source addresses
diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c
index ec8c6a0482d3..52faaa21a4d9 100644
--- a/net/dsa/mv88e6123_61_65.c
+++ b/net/dsa/mv88e6123_61_65.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support 2 * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support
3 * Copyright (c) 2008 Marvell Semiconductor 3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -98,17 +98,17 @@ static int mv88e6123_61_65_setup_global(struct dsa_switch *ds)
98 return ret; 98 return ret;
99 99
100 /* 100 /*
101 * Configure the cpu port, and configure the cpu port as the 101 * Configure the upstream port, and configure the upstream
102 * port to which ingress and egress monitor frames are to be 102 * port as the port to which ingress and egress monitor frames
103 * sent. 103 * are to be sent.
104 */ 104 */
105 REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1110)); 105 REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110));
106 106
107 /* 107 /*
108 * Disable remote management for now, and set the switch's 108 * Disable remote management for now, and set the switch's
109 * DSA device number to zero. 109 * DSA device number.
110 */ 110 */
111 REG_WRITE(REG_GLOBAL, 0x1c, 0x0000); 111 REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f);
112 112
113 /* 113 /*
114 * Send all frames with destination addresses matching 114 * Send all frames with destination addresses matching
@@ -133,10 +133,17 @@ static int mv88e6123_61_65_setup_global(struct dsa_switch *ds)
133 REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); 133 REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff);
134 134
135 /* 135 /*
136 * Map all DSA device IDs to the CPU port. 136 * Program the DSA routing table.
137 */ 137 */
138 for (i = 0; i < 32; i++) 138 for (i = 0; i < 32; i++) {
139 REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port); 139 int nexthop;
140
141 nexthop = 0x1f;
142 if (i != ds->index && i < ds->dst->pd->nr_chips)
143 nexthop = ds->pd->rtable[i] & 0x1f;
144
145 REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop);
146 }
140 147
141 /* 148 /*
142 * Clear all trunk masks. 149 * Clear all trunk masks.
@@ -176,12 +183,18 @@ static int mv88e6123_61_65_setup_global(struct dsa_switch *ds)
176static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) 183static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p)
177{ 184{
178 int addr = REG_PORT(p); 185 int addr = REG_PORT(p);
186 u16 val;
179 187
180 /* 188 /*
181 * MAC Forcing register: don't force link, speed, duplex 189 * MAC Forcing register: don't force link, speed, duplex
182 * or flow control state to any particular values. 190 * or flow control state to any particular values on physical
191 * ports, but force the CPU port and all DSA ports to 1000 Mb/s
192 * full duplex.
183 */ 193 */
184 REG_WRITE(addr, 0x01, 0x0003); 194 if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p))
195 REG_WRITE(addr, 0x01, 0x003e);
196 else
197 REG_WRITE(addr, 0x01, 0x0003);
185 198
186 /* 199 /*
187 * Do not limit the period of time that this port can be 200 * Do not limit the period of time that this port can be
@@ -192,37 +205,50 @@ static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p)
192 205
193 /* 206 /*
194 * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, 207 * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock,
195 * configure the requested (DSA/EDSA) tagging mode if this is 208 * disable Header mode, enable IGMP/MLD snooping, disable VLAN
196 * the CPU port, disable Header mode, enable IGMP/MLD snooping, 209 * tunneling, determine priority by looking at 802.1p and IP
197 * disable VLAN tunneling, determine priority by looking at 210 * priority fields (IP prio has precedence), and set STP state
198 * 802.1p and IP priority fields (IP prio has precedence), and 211 * to Forwarding.
199 * set STP state to Forwarding. Finally, if this is the CPU 212 *
200 * port, additionally enable forwarding of unknown unicast and 213 * If this is the CPU link, use DSA or EDSA tagging depending
201 * multicast addresses. 214 * on which tagging mode was configured.
202 */ 215 *
203 REG_WRITE(addr, 0x04, 216 * If this is a link to another switch, use DSA tagging mode.
204 (p == ds->cpu_port) ? 217 *
205 (ds->tag_protocol == htons(ETH_P_DSA)) ? 218 * If this is the upstream port for this switch, enable
206 0x053f : 0x373f : 219 * forwarding of unknown unicasts and multicasts.
207 0x0433); 220 */
221 val = 0x0433;
222 if (dsa_is_cpu_port(ds, p)) {
223 if (ds->dst->tag_protocol == htons(ETH_P_EDSA))
224 val |= 0x3300;
225 else
226 val |= 0x0100;
227 }
228 if (ds->dsa_port_mask & (1 << p))
229 val |= 0x0100;
230 if (p == dsa_upstream_port(ds))
231 val |= 0x000c;
232 REG_WRITE(addr, 0x04, val);
208 233
209 /* 234 /*
210 * Port Control 1: disable trunking. Also, if this is the 235 * Port Control 1: disable trunking. Also, if this is the
211 * CPU port, enable learn messages to be sent to this port. 236 * CPU port, enable learn messages to be sent to this port.
212 */ 237 */
213 REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000); 238 REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
214 239
215 /* 240 /*
216 * Port based VLAN map: give each port its own address 241 * Port based VLAN map: give each port its own address
217 * database, allow the CPU port to talk to each of the 'real' 242 * database, allow the CPU port to talk to each of the 'real'
218 * ports, and allow each of the 'real' ports to only talk to 243 * ports, and allow each of the 'real' ports to only talk to
219 * the CPU port. 244 * the upstream port.
220 */ 245 */
221 REG_WRITE(addr, 0x06, 246 val = (p & 0xf) << 12;
222 ((p & 0xf) << 12) | 247 if (dsa_is_cpu_port(ds, p))
223 ((p == ds->cpu_port) ? 248 val |= ds->phys_port_mask;
224 ds->valid_port_mask : 249 else
225 (1 << ds->cpu_port))); 250 val |= 1 << dsa_upstream_port(ds);
251 REG_WRITE(addr, 0x06, val);
226 252
227 /* 253 /*
228 * Default VLAN ID and priority: don't set a default VLAN 254 * Default VLAN ID and priority: don't set a default VLAN
@@ -394,7 +420,7 @@ static int mv88e6123_61_65_get_sset_count(struct dsa_switch *ds)
394} 420}
395 421
396static struct dsa_switch_driver mv88e6123_61_65_switch_driver = { 422static struct dsa_switch_driver mv88e6123_61_65_switch_driver = {
397 .tag_protocol = __constant_htons(ETH_P_EDSA), 423 .tag_protocol = cpu_to_be16(ETH_P_EDSA),
398 .priv_size = sizeof(struct mv88e6xxx_priv_state), 424 .priv_size = sizeof(struct mv88e6xxx_priv_state),
399 .probe = mv88e6123_61_65_probe, 425 .probe = mv88e6123_61_65_probe,
400 .setup = mv88e6123_61_65_setup, 426 .setup = mv88e6123_61_65_setup,
diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c
index 374d46a01265..bb2b41bc854e 100644
--- a/net/dsa/mv88e6131.c
+++ b/net/dsa/mv88e6131.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * net/dsa/mv88e6131.c - Marvell 88e6131 switch chip support 2 * net/dsa/mv88e6131.c - Marvell 88e6095/6095f/6131 switch chip support
3 * Copyright (c) 2008 Marvell Semiconductor 3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -21,6 +21,8 @@ static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr)
21 ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); 21 ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03);
22 if (ret >= 0) { 22 if (ret >= 0) {
23 ret &= 0xfff0; 23 ret &= 0xfff0;
24 if (ret == 0x0950)
25 return "Marvell 88E6095/88E6095F";
24 if (ret == 0x1060) 26 if (ret == 0x1060)
25 return "Marvell 88E6131"; 27 return "Marvell 88E6131";
26 } 28 }
@@ -36,7 +38,7 @@ static int mv88e6131_switch_reset(struct dsa_switch *ds)
36 /* 38 /*
37 * Set all ports to the disabled state. 39 * Set all ports to the disabled state.
38 */ 40 */
39 for (i = 0; i < 8; i++) { 41 for (i = 0; i < 11; i++) {
40 ret = REG_READ(REG_PORT(i), 0x04); 42 ret = REG_READ(REG_PORT(i), 0x04);
41 REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); 43 REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc);
42 } 44 }
@@ -100,17 +102,17 @@ static int mv88e6131_setup_global(struct dsa_switch *ds)
100 REG_WRITE(REG_GLOBAL, 0x19, 0x8100); 102 REG_WRITE(REG_GLOBAL, 0x19, 0x8100);
101 103
102 /* 104 /*
103 * Disable ARP mirroring, and configure the cpu port as the 105 * Disable ARP mirroring, and configure the upstream port as
104 * port to which ingress and egress monitor frames are to be 106 * the port to which ingress and egress monitor frames are to
105 * sent. 107 * be sent.
106 */ 108 */
107 REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1100) | 0x00f0); 109 REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1100) | 0x00f0);
108 110
109 /* 111 /*
110 * Disable cascade port functionality, and set the switch's 112 * Disable cascade port functionality, and set the switch's
111 * DSA device number to zero. 113 * DSA device number.
112 */ 114 */
113 REG_WRITE(REG_GLOBAL, 0x1c, 0xe000); 115 REG_WRITE(REG_GLOBAL, 0x1c, 0xe000 | (ds->index & 0x1f));
114 116
115 /* 117 /*
116 * Send all frames with destination addresses matching 118 * Send all frames with destination addresses matching
@@ -127,16 +129,23 @@ static int mv88e6131_setup_global(struct dsa_switch *ds)
127 REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); 129 REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff);
128 130
129 /* 131 /*
130 * Map all DSA device IDs to the CPU port. 132 * Program the DSA routing table.
131 */ 133 */
132 for (i = 0; i < 32; i++) 134 for (i = 0; i < 32; i++) {
133 REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port); 135 int nexthop;
136
137 nexthop = 0x1f;
138 if (i != ds->index && i < ds->dst->pd->nr_chips)
139 nexthop = ds->pd->rtable[i] & 0x1f;
140
141 REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop);
142 }
134 143
135 /* 144 /*
136 * Clear all trunk masks. 145 * Clear all trunk masks.
137 */ 146 */
138 for (i = 0; i < 8; i++) 147 for (i = 0; i < 8; i++)
139 REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); 148 REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0x7ff);
140 149
141 /* 150 /*
142 * Clear all trunk mappings. 151 * Clear all trunk mappings.
@@ -156,12 +165,18 @@ static int mv88e6131_setup_global(struct dsa_switch *ds)
156static int mv88e6131_setup_port(struct dsa_switch *ds, int p) 165static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
157{ 166{
158 int addr = REG_PORT(p); 167 int addr = REG_PORT(p);
168 u16 val;
159 169
160 /* 170 /*
161 * MAC Forcing register: don't force link, speed, duplex 171 * MAC Forcing register: don't force link, speed, duplex
162 * or flow control state to any particular values. 172 * or flow control state to any particular values on physical
173 * ports, but force the CPU port and all DSA ports to 1000 Mb/s
174 * full duplex.
163 */ 175 */
164 REG_WRITE(addr, 0x01, 0x0003); 176 if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p))
177 REG_WRITE(addr, 0x01, 0x003e);
178 else
179 REG_WRITE(addr, 0x01, 0x0003);
165 180
166 /* 181 /*
167 * Port Control: disable Core Tag, disable Drop-on-Lock, 182 * Port Control: disable Core Tag, disable Drop-on-Lock,
@@ -169,29 +184,40 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
169 * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN 184 * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN
170 * tunneling, determine priority by looking at 802.1p and 185 * tunneling, determine priority by looking at 802.1p and
171 * IP priority fields (IP prio has precedence), and set STP 186 * IP priority fields (IP prio has precedence), and set STP
172 * state to Forwarding. Finally, if this is the CPU port, 187 * state to Forwarding.
173 * additionally enable DSA tagging and forwarding of unknown 188 *
174 * unicast addresses. 189 * If this is the upstream port for this switch, enable
190 * forwarding of unknown unicasts, and enable DSA tagging
191 * mode.
192 *
193 * If this is the link to another switch, use DSA tagging
194 * mode, but do not enable forwarding of unknown unicasts.
175 */ 195 */
176 REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x0537 : 0x0433); 196 val = 0x0433;
197 if (p == dsa_upstream_port(ds))
198 val |= 0x0104;
199 if (ds->dsa_port_mask & (1 << p))
200 val |= 0x0100;
201 REG_WRITE(addr, 0x04, val);
177 202
178 /* 203 /*
179 * Port Control 1: disable trunking. Also, if this is the 204 * Port Control 1: disable trunking. Also, if this is the
180 * CPU port, enable learn messages to be sent to this port. 205 * CPU port, enable learn messages to be sent to this port.
181 */ 206 */
182 REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000); 207 REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000);
183 208
184 /* 209 /*
185 * Port based VLAN map: give each port its own address 210 * Port based VLAN map: give each port its own address
186 * database, allow the CPU port to talk to each of the 'real' 211 * database, allow the CPU port to talk to each of the 'real'
187 * ports, and allow each of the 'real' ports to only talk to 212 * ports, and allow each of the 'real' ports to only talk to
188 * the CPU port. 213 * the upstream port.
189 */ 214 */
190 REG_WRITE(addr, 0x06, 215 val = (p & 0xf) << 12;
191 ((p & 0xf) << 12) | 216 if (dsa_is_cpu_port(ds, p))
192 ((p == ds->cpu_port) ? 217 val |= ds->phys_port_mask;
193 ds->valid_port_mask : 218 else
194 (1 << ds->cpu_port))); 219 val |= 1 << dsa_upstream_port(ds);
220 REG_WRITE(addr, 0x06, val);
195 221
196 /* 222 /*
197 * Default VLAN ID and priority: don't set a default VLAN 223 * Default VLAN ID and priority: don't set a default VLAN
@@ -207,13 +233,15 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p)
207 * untagged frames on this port, do a destination address 233 * untagged frames on this port, do a destination address
208 * lookup on received packets as usual, don't send a copy 234 * lookup on received packets as usual, don't send a copy
209 * of all transmitted/received frames on this port to the 235 * of all transmitted/received frames on this port to the
210 * CPU, and configure the CPU port number. Also, if this 236 * CPU, and configure the upstream port number.
211 * is the CPU port, enable forwarding of unknown multicast 237 *
212 * addresses. 238 * If this is the upstream port for this switch, enable
239 * forwarding of unknown multicast addresses.
213 */ 240 */
214 REG_WRITE(addr, 0x08, 241 val = 0x0080 | dsa_upstream_port(ds);
215 ((p == ds->cpu_port) ? 0x00c0 : 0x0080) | 242 if (p == dsa_upstream_port(ds))
216 ds->cpu_port); 243 val |= 0x0040;
244 REG_WRITE(addr, 0x08, val);
217 245
218 /* 246 /*
219 * Rate Control: disable ingress rate limiting. 247 * Rate Control: disable ingress rate limiting.
@@ -268,7 +296,7 @@ static int mv88e6131_setup(struct dsa_switch *ds)
268 if (ret < 0) 296 if (ret < 0)
269 return ret; 297 return ret;
270 298
271 for (i = 0; i < 6; i++) { 299 for (i = 0; i < 11; i++) {
272 ret = mv88e6131_setup_port(ds, i); 300 ret = mv88e6131_setup_port(ds, i);
273 if (ret < 0) 301 if (ret < 0)
274 return ret; 302 return ret;
@@ -279,7 +307,7 @@ static int mv88e6131_setup(struct dsa_switch *ds)
279 307
280static int mv88e6131_port_to_phy_addr(int port) 308static int mv88e6131_port_to_phy_addr(int port)
281{ 309{
282 if (port >= 0 && port != 3 && port <= 7) 310 if (port >= 0 && port <= 11)
283 return port; 311 return port;
284 return -1; 312 return -1;
285} 313}
@@ -353,7 +381,7 @@ static int mv88e6131_get_sset_count(struct dsa_switch *ds)
353} 381}
354 382
355static struct dsa_switch_driver mv88e6131_switch_driver = { 383static struct dsa_switch_driver mv88e6131_switch_driver = {
356 .tag_protocol = __constant_htons(ETH_P_DSA), 384 .tag_protocol = cpu_to_be16(ETH_P_DSA),
357 .priv_size = sizeof(struct mv88e6xxx_priv_state), 385 .priv_size = sizeof(struct mv88e6xxx_priv_state),
358 .probe = mv88e6131_probe, 386 .probe = mv88e6131_probe,
359 .setup = mv88e6131_setup, 387 .setup = mv88e6131_setup,
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index a68fd79e9eca..ed131181215d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * net/dsa/slave.c - Slave device handling 2 * net/dsa/slave.c - Slave device handling
3 * Copyright (c) 2008 Marvell Semiconductor 3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -19,7 +19,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
19{ 19{
20 struct dsa_switch *ds = bus->priv; 20 struct dsa_switch *ds = bus->priv;
21 21
22 if (ds->valid_port_mask & (1 << addr)) 22 if (ds->phys_port_mask & (1 << addr))
23 return ds->drv->phy_read(ds, addr, reg); 23 return ds->drv->phy_read(ds, addr, reg);
24 24
25 return 0xffff; 25 return 0xffff;
@@ -29,7 +29,7 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val)
29{ 29{
30 struct dsa_switch *ds = bus->priv; 30 struct dsa_switch *ds = bus->priv;
31 31
32 if (ds->valid_port_mask & (1 << addr)) 32 if (ds->phys_port_mask & (1 << addr))
33 return ds->drv->phy_write(ds, addr, reg, val); 33 return ds->drv->phy_write(ds, addr, reg, val);
34 34
35 return 0; 35 return 0;
@@ -43,15 +43,24 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds)
43 ds->slave_mii_bus->write = dsa_slave_phy_write; 43 ds->slave_mii_bus->write = dsa_slave_phy_write;
44 snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x", 44 snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x",
45 ds->master_mii_bus->id, ds->pd->sw_addr); 45 ds->master_mii_bus->id, ds->pd->sw_addr);
46 ds->slave_mii_bus->parent = &(ds->master_mii_bus->dev); 46 ds->slave_mii_bus->parent = &ds->master_mii_bus->dev;
47} 47}
48 48
49 49
50/* slave device handling ****************************************************/ 50/* slave device handling ****************************************************/
51static int dsa_slave_init(struct net_device *dev)
52{
53 struct dsa_slave_priv *p = netdev_priv(dev);
54
55 dev->iflink = p->parent->dst->master_netdev->ifindex;
56
57 return 0;
58}
59
51static int dsa_slave_open(struct net_device *dev) 60static int dsa_slave_open(struct net_device *dev)
52{ 61{
53 struct dsa_slave_priv *p = netdev_priv(dev); 62 struct dsa_slave_priv *p = netdev_priv(dev);
54 struct net_device *master = p->parent->master_netdev; 63 struct net_device *master = p->parent->dst->master_netdev;
55 int err; 64 int err;
56 65
57 if (!(master->flags & IFF_UP)) 66 if (!(master->flags & IFF_UP))
@@ -89,7 +98,7 @@ out:
89static int dsa_slave_close(struct net_device *dev) 98static int dsa_slave_close(struct net_device *dev)
90{ 99{
91 struct dsa_slave_priv *p = netdev_priv(dev); 100 struct dsa_slave_priv *p = netdev_priv(dev);
92 struct net_device *master = p->parent->master_netdev; 101 struct net_device *master = p->parent->dst->master_netdev;
93 102
94 dev_mc_unsync(master, dev); 103 dev_mc_unsync(master, dev);
95 dev_unicast_unsync(master, dev); 104 dev_unicast_unsync(master, dev);
@@ -107,7 +116,7 @@ static int dsa_slave_close(struct net_device *dev)
107static void dsa_slave_change_rx_flags(struct net_device *dev, int change) 116static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
108{ 117{
109 struct dsa_slave_priv *p = netdev_priv(dev); 118 struct dsa_slave_priv *p = netdev_priv(dev);
110 struct net_device *master = p->parent->master_netdev; 119 struct net_device *master = p->parent->dst->master_netdev;
111 120
112 if (change & IFF_ALLMULTI) 121 if (change & IFF_ALLMULTI)
113 dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); 122 dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1);
@@ -118,7 +127,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change)
118static void dsa_slave_set_rx_mode(struct net_device *dev) 127static void dsa_slave_set_rx_mode(struct net_device *dev)
119{ 128{
120 struct dsa_slave_priv *p = netdev_priv(dev); 129 struct dsa_slave_priv *p = netdev_priv(dev);
121 struct net_device *master = p->parent->master_netdev; 130 struct net_device *master = p->parent->dst->master_netdev;
122 131
123 dev_mc_sync(master, dev); 132 dev_mc_sync(master, dev);
124 dev_unicast_sync(master, dev); 133 dev_unicast_sync(master, dev);
@@ -127,7 +136,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev)
127static int dsa_slave_set_mac_address(struct net_device *dev, void *a) 136static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
128{ 137{
129 struct dsa_slave_priv *p = netdev_priv(dev); 138 struct dsa_slave_priv *p = netdev_priv(dev);
130 struct net_device *master = p->parent->master_netdev; 139 struct net_device *master = p->parent->dst->master_netdev;
131 struct sockaddr *addr = a; 140 struct sockaddr *addr = a;
132 int err; 141 int err;
133 142
@@ -288,6 +297,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
288 297
289#ifdef CONFIG_NET_DSA_TAG_DSA 298#ifdef CONFIG_NET_DSA_TAG_DSA
290static const struct net_device_ops dsa_netdev_ops = { 299static const struct net_device_ops dsa_netdev_ops = {
300 .ndo_init = dsa_slave_init,
291 .ndo_open = dsa_slave_open, 301 .ndo_open = dsa_slave_open,
292 .ndo_stop = dsa_slave_close, 302 .ndo_stop = dsa_slave_close,
293 .ndo_start_xmit = dsa_xmit, 303 .ndo_start_xmit = dsa_xmit,
@@ -300,6 +310,7 @@ static const struct net_device_ops dsa_netdev_ops = {
300#endif 310#endif
301#ifdef CONFIG_NET_DSA_TAG_EDSA 311#ifdef CONFIG_NET_DSA_TAG_EDSA
302static const struct net_device_ops edsa_netdev_ops = { 312static const struct net_device_ops edsa_netdev_ops = {
313 .ndo_init = dsa_slave_init,
303 .ndo_open = dsa_slave_open, 314 .ndo_open = dsa_slave_open,
304 .ndo_stop = dsa_slave_close, 315 .ndo_stop = dsa_slave_close,
305 .ndo_start_xmit = edsa_xmit, 316 .ndo_start_xmit = edsa_xmit,
@@ -312,6 +323,7 @@ static const struct net_device_ops edsa_netdev_ops = {
312#endif 323#endif
313#ifdef CONFIG_NET_DSA_TAG_TRAILER 324#ifdef CONFIG_NET_DSA_TAG_TRAILER
314static const struct net_device_ops trailer_netdev_ops = { 325static const struct net_device_ops trailer_netdev_ops = {
326 .ndo_init = dsa_slave_init,
315 .ndo_open = dsa_slave_open, 327 .ndo_open = dsa_slave_open,
316 .ndo_stop = dsa_slave_close, 328 .ndo_stop = dsa_slave_close,
317 .ndo_start_xmit = trailer_xmit, 329 .ndo_start_xmit = trailer_xmit,
@@ -328,7 +340,7 @@ struct net_device *
328dsa_slave_create(struct dsa_switch *ds, struct device *parent, 340dsa_slave_create(struct dsa_switch *ds, struct device *parent,
329 int port, char *name) 341 int port, char *name)
330{ 342{
331 struct net_device *master = ds->master_netdev; 343 struct net_device *master = ds->dst->master_netdev;
332 struct net_device *slave_dev; 344 struct net_device *slave_dev;
333 struct dsa_slave_priv *p; 345 struct dsa_slave_priv *p;
334 int ret; 346 int ret;
@@ -343,7 +355,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
343 memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN); 355 memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN);
344 slave_dev->tx_queue_len = 0; 356 slave_dev->tx_queue_len = 0;
345 357
346 switch (ds->tag_protocol) { 358 switch (ds->dst->tag_protocol) {
347#ifdef CONFIG_NET_DSA_TAG_DSA 359#ifdef CONFIG_NET_DSA_TAG_DSA
348 case htons(ETH_P_DSA): 360 case htons(ETH_P_DSA):
349 slave_dev->netdev_ops = &dsa_netdev_ops; 361 slave_dev->netdev_ops = &dsa_netdev_ops;
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index f99a019b939e..8fa25bafe6ca 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging 2 * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging
3 * Copyright (c) 2008 Marvell Semiconductor 3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -36,7 +36,7 @@ int dsa_xmit(struct sk_buff *skb, struct net_device *dev)
36 * Construct tagged FROM_CPU DSA tag from 802.1q tag. 36 * Construct tagged FROM_CPU DSA tag from 802.1q tag.
37 */ 37 */
38 dsa_header = skb->data + 2 * ETH_ALEN; 38 dsa_header = skb->data + 2 * ETH_ALEN;
39 dsa_header[0] = 0x60; 39 dsa_header[0] = 0x60 | p->parent->index;
40 dsa_header[1] = p->port << 3; 40 dsa_header[1] = p->port << 3;
41 41
42 /* 42 /*
@@ -57,7 +57,7 @@ int dsa_xmit(struct sk_buff *skb, struct net_device *dev)
57 * Construct untagged FROM_CPU DSA tag. 57 * Construct untagged FROM_CPU DSA tag.
58 */ 58 */
59 dsa_header = skb->data + 2 * ETH_ALEN; 59 dsa_header = skb->data + 2 * ETH_ALEN;
60 dsa_header[0] = 0x40; 60 dsa_header[0] = 0x40 | p->parent->index;
61 dsa_header[1] = p->port << 3; 61 dsa_header[1] = p->port << 3;
62 dsa_header[2] = 0x00; 62 dsa_header[2] = 0x00;
63 dsa_header[3] = 0x00; 63 dsa_header[3] = 0x00;
@@ -65,7 +65,7 @@ int dsa_xmit(struct sk_buff *skb, struct net_device *dev)
65 65
66 skb->protocol = htons(ETH_P_DSA); 66 skb->protocol = htons(ETH_P_DSA);
67 67
68 skb->dev = p->parent->master_netdev; 68 skb->dev = p->parent->dst->master_netdev;
69 dev_queue_xmit(skb); 69 dev_queue_xmit(skb);
70 70
71 return NETDEV_TX_OK; 71 return NETDEV_TX_OK;
@@ -78,11 +78,13 @@ out_free:
78static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, 78static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
79 struct packet_type *pt, struct net_device *orig_dev) 79 struct packet_type *pt, struct net_device *orig_dev)
80{ 80{
81 struct dsa_switch *ds = dev->dsa_ptr; 81 struct dsa_switch_tree *dst = dev->dsa_ptr;
82 struct dsa_switch *ds;
82 u8 *dsa_header; 83 u8 *dsa_header;
84 int source_device;
83 int source_port; 85 int source_port;
84 86
85 if (unlikely(ds == NULL)) 87 if (unlikely(dst == NULL))
86 goto out_drop; 88 goto out_drop;
87 89
88 skb = skb_unshare(skb, GFP_ATOMIC); 90 skb = skb_unshare(skb, GFP_ATOMIC);
@@ -98,16 +100,24 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
98 dsa_header = skb->data - 2; 100 dsa_header = skb->data - 2;
99 101
100 /* 102 /*
101 * Check that frame type is either TO_CPU or FORWARD, and 103 * Check that frame type is either TO_CPU or FORWARD.
102 * that the source device is zero.
103 */ 104 */
104 if ((dsa_header[0] & 0xdf) != 0x00 && (dsa_header[0] & 0xdf) != 0xc0) 105 if ((dsa_header[0] & 0xc0) != 0x00 && (dsa_header[0] & 0xc0) != 0xc0)
105 goto out_drop; 106 goto out_drop;
106 107
107 /* 108 /*
108 * Check that the source port is a registered DSA port. 109 * Determine source device and port.
109 */ 110 */
111 source_device = dsa_header[0] & 0x1f;
110 source_port = (dsa_header[1] >> 3) & 0x1f; 112 source_port = (dsa_header[1] >> 3) & 0x1f;
113
114 /*
115 * Check that the source device exists and that the source
116 * port is a registered DSA port.
117 */
118 if (source_device >= dst->pd->nr_chips)
119 goto out_drop;
120 ds = dst->ds[source_device];
111 if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) 121 if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
112 goto out_drop; 122 goto out_drop;
113 123
@@ -175,8 +185,8 @@ out:
175 return 0; 185 return 0;
176} 186}
177 187
178static struct packet_type dsa_packet_type = { 188static struct packet_type dsa_packet_type __read_mostly = {
179 .type = __constant_htons(ETH_P_DSA), 189 .type = cpu_to_be16(ETH_P_DSA),
180 .func = dsa_rcv, 190 .func = dsa_rcv,
181}; 191};
182 192
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 328ec957f786..815607bd286f 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * net/dsa/tag_edsa.c - Ethertype DSA tagging 2 * net/dsa/tag_edsa.c - Ethertype DSA tagging
3 * Copyright (c) 2008 Marvell Semiconductor 3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -45,7 +45,7 @@ int edsa_xmit(struct sk_buff *skb, struct net_device *dev)
45 edsa_header[1] = ETH_P_EDSA & 0xff; 45 edsa_header[1] = ETH_P_EDSA & 0xff;
46 edsa_header[2] = 0x00; 46 edsa_header[2] = 0x00;
47 edsa_header[3] = 0x00; 47 edsa_header[3] = 0x00;
48 edsa_header[4] = 0x60; 48 edsa_header[4] = 0x60 | p->parent->index;
49 edsa_header[5] = p->port << 3; 49 edsa_header[5] = p->port << 3;
50 50
51 /* 51 /*
@@ -70,7 +70,7 @@ int edsa_xmit(struct sk_buff *skb, struct net_device *dev)
70 edsa_header[1] = ETH_P_EDSA & 0xff; 70 edsa_header[1] = ETH_P_EDSA & 0xff;
71 edsa_header[2] = 0x00; 71 edsa_header[2] = 0x00;
72 edsa_header[3] = 0x00; 72 edsa_header[3] = 0x00;
73 edsa_header[4] = 0x40; 73 edsa_header[4] = 0x40 | p->parent->index;
74 edsa_header[5] = p->port << 3; 74 edsa_header[5] = p->port << 3;
75 edsa_header[6] = 0x00; 75 edsa_header[6] = 0x00;
76 edsa_header[7] = 0x00; 76 edsa_header[7] = 0x00;
@@ -78,7 +78,7 @@ int edsa_xmit(struct sk_buff *skb, struct net_device *dev)
78 78
79 skb->protocol = htons(ETH_P_EDSA); 79 skb->protocol = htons(ETH_P_EDSA);
80 80
81 skb->dev = p->parent->master_netdev; 81 skb->dev = p->parent->dst->master_netdev;
82 dev_queue_xmit(skb); 82 dev_queue_xmit(skb);
83 83
84 return NETDEV_TX_OK; 84 return NETDEV_TX_OK;
@@ -91,11 +91,13 @@ out_free:
91static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, 91static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
92 struct packet_type *pt, struct net_device *orig_dev) 92 struct packet_type *pt, struct net_device *orig_dev)
93{ 93{
94 struct dsa_switch *ds = dev->dsa_ptr; 94 struct dsa_switch_tree *dst = dev->dsa_ptr;
95 struct dsa_switch *ds;
95 u8 *edsa_header; 96 u8 *edsa_header;
97 int source_device;
96 int source_port; 98 int source_port;
97 99
98 if (unlikely(ds == NULL)) 100 if (unlikely(dst == NULL))
99 goto out_drop; 101 goto out_drop;
100 102
101 skb = skb_unshare(skb, GFP_ATOMIC); 103 skb = skb_unshare(skb, GFP_ATOMIC);
@@ -111,16 +113,24 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
111 edsa_header = skb->data + 2; 113 edsa_header = skb->data + 2;
112 114
113 /* 115 /*
114 * Check that frame type is either TO_CPU or FORWARD, and 116 * Check that frame type is either TO_CPU or FORWARD.
115 * that the source device is zero.
116 */ 117 */
117 if ((edsa_header[0] & 0xdf) != 0x00 && (edsa_header[0] & 0xdf) != 0xc0) 118 if ((edsa_header[0] & 0xc0) != 0x00 && (edsa_header[0] & 0xc0) != 0xc0)
118 goto out_drop; 119 goto out_drop;
119 120
120 /* 121 /*
121 * Check that the source port is a registered DSA port. 122 * Determine source device and port.
122 */ 123 */
124 source_device = edsa_header[0] & 0x1f;
123 source_port = (edsa_header[1] >> 3) & 0x1f; 125 source_port = (edsa_header[1] >> 3) & 0x1f;
126
127 /*
128 * Check that the source device exists and that the source
129 * port is a registered DSA port.
130 */
131 if (source_device >= dst->pd->nr_chips)
132 goto out_drop;
133 ds = dst->ds[source_device];
124 if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) 134 if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL)
125 goto out_drop; 135 goto out_drop;
126 136
@@ -194,8 +204,8 @@ out:
194 return 0; 204 return 0;
195} 205}
196 206
197static struct packet_type edsa_packet_type = { 207static struct packet_type edsa_packet_type __read_mostly = {
198 .type = __constant_htons(ETH_P_EDSA), 208 .type = cpu_to_be16(ETH_P_EDSA),
199 .func = edsa_rcv, 209 .func = edsa_rcv,
200}; 210};
201 211
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index b59132878ad1..1c3e30c38b86 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * net/dsa/tag_trailer.c - Trailer tag format handling 2 * net/dsa/tag_trailer.c - Trailer tag format handling
3 * Copyright (c) 2008 Marvell Semiconductor 3 * Copyright (c) 2008-2009 Marvell Semiconductor
4 * 4 *
5 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 6 * it under the terms of the GNU General Public License as published by
@@ -59,7 +59,7 @@ int trailer_xmit(struct sk_buff *skb, struct net_device *dev)
59 59
60 nskb->protocol = htons(ETH_P_TRAILER); 60 nskb->protocol = htons(ETH_P_TRAILER);
61 61
62 nskb->dev = p->parent->master_netdev; 62 nskb->dev = p->parent->dst->master_netdev;
63 dev_queue_xmit(nskb); 63 dev_queue_xmit(nskb);
64 64
65 return NETDEV_TX_OK; 65 return NETDEV_TX_OK;
@@ -68,12 +68,14 @@ int trailer_xmit(struct sk_buff *skb, struct net_device *dev)
68static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, 68static int trailer_rcv(struct sk_buff *skb, struct net_device *dev,
69 struct packet_type *pt, struct net_device *orig_dev) 69 struct packet_type *pt, struct net_device *orig_dev)
70{ 70{
71 struct dsa_switch *ds = dev->dsa_ptr; 71 struct dsa_switch_tree *dst = dev->dsa_ptr;
72 struct dsa_switch *ds;
72 u8 *trailer; 73 u8 *trailer;
73 int source_port; 74 int source_port;
74 75
75 if (unlikely(ds == NULL)) 76 if (unlikely(dst == NULL))
76 goto out_drop; 77 goto out_drop;
78 ds = dst->ds[0];
77 79
78 skb = skb_unshare(skb, GFP_ATOMIC); 80 skb = skb_unshare(skb, GFP_ATOMIC);
79 if (skb == NULL) 81 if (skb == NULL)
@@ -111,8 +113,8 @@ out:
111 return 0; 113 return 0;
112} 114}
113 115
114static struct packet_type trailer_packet_type = { 116static struct packet_type trailer_packet_type __read_mostly = {
115 .type = __constant_htons(ETH_P_TRAILER), 117 .type = cpu_to_be16(ETH_P_TRAILER),
116 .func = trailer_rcv, 118 .func = trailer_rcv,
117}; 119};
118 120
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 8789d2bb1b06..6f479fa522c3 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -1102,8 +1102,8 @@ drop:
1102 return NET_RX_DROP; 1102 return NET_RX_DROP;
1103} 1103}
1104 1104
1105static struct packet_type econet_packet_type = { 1105static struct packet_type econet_packet_type __read_mostly = {
1106 .type = __constant_htons(ETH_P_ECONET), 1106 .type = cpu_to_be16(ETH_P_ECONET),
1107 .func = econet_rcv, 1107 .func = econet_rcv,
1108}; 1108};
1109 1109
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 691268f3a359..b2cf91e4ccaa 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -35,7 +35,7 @@ config IP_ADVANCED_ROUTER
35 35
36 at boot time after the /proc file system has been mounted. 36 at boot time after the /proc file system has been mounted.
37 37
38 If you turn on IP forwarding, you will also get the rp_filter, which 38 If you turn on IP forwarding, you should consider the rp_filter, which
39 automatically rejects incoming packets if the routing table entry 39 automatically rejects incoming packets if the routing table entry
40 for their source address doesn't match the network interface they're 40 for their source address doesn't match the network interface they're
41 arriving on. This has security advantages because it prevents the 41 arriving on. This has security advantages because it prevents the
@@ -46,12 +46,16 @@ config IP_ADVANCED_ROUTER
46 rp_filter on use: 46 rp_filter on use:
47 47
48 echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter 48 echo 1 > /proc/sys/net/ipv4/conf/<device>/rp_filter
49 or 49 and
50 echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter 50 echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter
51 51
52 Note that some distributions enable it in startup scripts.
53 For details about rp_filter strict and loose mode read
54 <file:Documentation/networking/ip-sysctl.txt>.
55
52 If unsure, say N here. 56 If unsure, say N here.
53 57
54choice 58choice
55 prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" 59 prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)"
56 depends on IP_ADVANCED_ROUTER 60 depends on IP_ADVANCED_ROUTER
57 default ASK_IP_FIB_HASH 61 default ASK_IP_FIB_HASH
@@ -59,27 +63,29 @@ choice
59config ASK_IP_FIB_HASH 63config ASK_IP_FIB_HASH
60 bool "FIB_HASH" 64 bool "FIB_HASH"
61 ---help--- 65 ---help---
62 Current FIB is very proven and good enough for most users. 66 Current FIB is very proven and good enough for most users.
63 67
64config IP_FIB_TRIE 68config IP_FIB_TRIE
65 bool "FIB_TRIE" 69 bool "FIB_TRIE"
66 ---help--- 70 ---help---
67 Use new experimental LC-trie as FIB lookup algorithm. 71 Use new experimental LC-trie as FIB lookup algorithm.
68 This improves lookup performance if you have a large 72 This improves lookup performance if you have a large
69 number of routes. 73 number of routes.
70 74
71 LC-trie is a longest matching prefix lookup algorithm which 75 LC-trie is a longest matching prefix lookup algorithm which
72 performs better than FIB_HASH for large routing tables. 76 performs better than FIB_HASH for large routing tables.
73 But, it consumes more memory and is more complex. 77 But, it consumes more memory and is more complex.
74 78
75 LC-trie is described in: 79 LC-trie is described in:
76 80
77 IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson 81 IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
78 IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 82 IEEE Journal on Selected Areas in Communications, 17(6):1083-1092,
79 An experimental study of compression methods for dynamic tries 83 June 1999
80 Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. 84
81 http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ 85 An experimental study of compression methods for dynamic tries
82 86 Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
87 http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
88
83endchoice 89endchoice
84 90
85config IP_FIB_HASH 91config IP_FIB_HASH
@@ -191,7 +197,7 @@ config IP_PNP_RARP
191 <file:Documentation/filesystems/nfsroot.txt> for details. 197 <file:Documentation/filesystems/nfsroot.txt> for details.
192 198
193# not yet ready.. 199# not yet ready..
194# bool ' IP: ARP support' CONFIG_IP_PNP_ARP 200# bool ' IP: ARP support' CONFIG_IP_PNP_ARP
195config NET_IPIP 201config NET_IPIP
196 tristate "IP: tunneling" 202 tristate "IP: tunneling"
197 select INET_TUNNEL 203 select INET_TUNNEL
@@ -361,7 +367,7 @@ config INET_IPCOMP
361 ---help--- 367 ---help---
362 Support for IP Payload Compression Protocol (IPComp) (RFC3173), 368 Support for IP Payload Compression Protocol (IPComp) (RFC3173),
363 typically needed for IPsec. 369 typically needed for IPsec.
364 370
365 If unsure, say Y. 371 If unsure, say Y.
366 372
367config INET_XFRM_TUNNEL 373config INET_XFRM_TUNNEL
@@ -415,7 +421,7 @@ config INET_DIAG
415 Support for INET (TCP, DCCP, etc) socket monitoring interface used by 421 Support for INET (TCP, DCCP, etc) socket monitoring interface used by
416 native Linux tools such as ss. ss is included in iproute2, currently 422 native Linux tools such as ss. ss is included in iproute2, currently
417 downloadable at <http://linux-net.osdl.org/index.php/Iproute2>. 423 downloadable at <http://linux-net.osdl.org/index.php/Iproute2>.
418 424
419 If unsure, say Y. 425 If unsure, say Y.
420 426
421config INET_TCP_DIAG 427config INET_TCP_DIAG
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 3a3dad801354..7f03373b8c07 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -369,7 +369,6 @@ lookup_protocol:
369 sock_init_data(sock, sk); 369 sock_init_data(sock, sk);
370 370
371 sk->sk_destruct = inet_sock_destruct; 371 sk->sk_destruct = inet_sock_destruct;
372 sk->sk_family = PF_INET;
373 sk->sk_protocol = protocol; 372 sk->sk_protocol = protocol;
374 sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; 373 sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
375 374
@@ -1253,10 +1252,10 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1253 int proto; 1252 int proto;
1254 int id; 1253 int id;
1255 1254
1256 if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) 1255 iph = skb_gro_header(skb, sizeof(*iph));
1256 if (unlikely(!iph))
1257 goto out; 1257 goto out;
1258 1258
1259 iph = ip_hdr(skb);
1260 proto = iph->protocol & (MAX_INET_PROTOS - 1); 1259 proto = iph->protocol & (MAX_INET_PROTOS - 1);
1261 1260
1262 rcu_read_lock(); 1261 rcu_read_lock();
@@ -1264,13 +1263,13 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1264 if (!ops || !ops->gro_receive) 1263 if (!ops || !ops->gro_receive)
1265 goto out_unlock; 1264 goto out_unlock;
1266 1265
1267 if (iph->version != 4 || iph->ihl != 5) 1266 if (*(u8 *)iph != 0x45)
1268 goto out_unlock; 1267 goto out_unlock;
1269 1268
1270 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) 1269 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
1271 goto out_unlock; 1270 goto out_unlock;
1272 1271
1273 flush = ntohs(iph->tot_len) != skb->len || 1272 flush = ntohs(iph->tot_len) != skb_gro_len(skb) ||
1274 iph->frag_off != htons(IP_DF); 1273 iph->frag_off != htons(IP_DF);
1275 id = ntohs(iph->id); 1274 id = ntohs(iph->id);
1276 1275
@@ -1282,24 +1281,25 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
1282 1281
1283 iph2 = ip_hdr(p); 1282 iph2 = ip_hdr(p);
1284 1283
1285 if (iph->protocol != iph2->protocol || 1284 if ((iph->protocol ^ iph2->protocol) |
1286 iph->tos != iph2->tos || 1285 (iph->tos ^ iph2->tos) |
1287 memcmp(&iph->saddr, &iph2->saddr, 8)) { 1286 (iph->saddr ^ iph2->saddr) |
1287 (iph->daddr ^ iph2->daddr)) {
1288 NAPI_GRO_CB(p)->same_flow = 0; 1288 NAPI_GRO_CB(p)->same_flow = 0;
1289 continue; 1289 continue;
1290 } 1290 }
1291 1291
1292 /* All fields must match except length and checksum. */ 1292 /* All fields must match except length and checksum. */
1293 NAPI_GRO_CB(p)->flush |= 1293 NAPI_GRO_CB(p)->flush |=
1294 memcmp(&iph->frag_off, &iph2->frag_off, 4) || 1294 (iph->ttl ^ iph2->ttl) |
1295 (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id; 1295 ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id);
1296 1296
1297 NAPI_GRO_CB(p)->flush |= flush; 1297 NAPI_GRO_CB(p)->flush |= flush;
1298 } 1298 }
1299 1299
1300 NAPI_GRO_CB(skb)->flush |= flush; 1300 NAPI_GRO_CB(skb)->flush |= flush;
1301 __skb_pull(skb, sizeof(*iph)); 1301 skb_gro_pull(skb, sizeof(*iph));
1302 skb_reset_transport_header(skb); 1302 skb_set_transport_header(skb, skb_gro_offset(skb));
1303 1303
1304 pp = ops->gro_receive(head, skb); 1304 pp = ops->gro_receive(head, skb);
1305 1305
@@ -1500,8 +1500,8 @@ static int ipv4_proc_init(void);
1500 * IP protocol layer initialiser 1500 * IP protocol layer initialiser
1501 */ 1501 */
1502 1502
1503static struct packet_type ip_packet_type = { 1503static struct packet_type ip_packet_type __read_mostly = {
1504 .type = __constant_htons(ETH_P_IP), 1504 .type = cpu_to_be16(ETH_P_IP),
1505 .func = ip_rcv, 1505 .func = ip_rcv,
1506 .gso_send_check = inet_gso_send_check, 1506 .gso_send_check = inet_gso_send_check,
1507 .gso_segment = inet_gso_segment, 1507 .gso_segment = inet_gso_segment,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 29a74c01d8de..f11931c18381 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -801,8 +801,11 @@ static int arp_process(struct sk_buff *skb)
801 * cache. 801 * cache.
802 */ 802 */
803 803
804 /* Special case: IPv4 duplicate address detection packet (RFC2131) */ 804 /*
805 if (sip == 0) { 805 * Special case: IPv4 duplicate address detection packet (RFC2131)
806 * and Gratuitous ARP/ARP Announce. (RFC3927, Section 2.4)
807 */
808 if (sip == 0 || tip == sip) {
806 if (arp->ar_op == htons(ARPOP_REQUEST) && 809 if (arp->ar_op == htons(ARPOP_REQUEST) &&
807 inet_addr_type(net, tip) == RTN_LOCAL && 810 inet_addr_type(net, tip) == RTN_LOCAL &&
808 !arp_ignore(in_dev, sip, tip)) 811 !arp_ignore(in_dev, sip, tip))
@@ -892,7 +895,7 @@ static int arp_process(struct sk_buff *skb)
892out: 895out:
893 if (in_dev) 896 if (in_dev)
894 in_dev_put(in_dev); 897 in_dev_put(in_dev);
895 kfree_skb(skb); 898 consume_skb(skb);
896 return 0; 899 return 0;
897} 900}
898 901
@@ -1225,8 +1228,8 @@ void arp_ifdown(struct net_device *dev)
1225 * Called once on startup. 1228 * Called once on startup.
1226 */ 1229 */
1227 1230
1228static struct packet_type arp_packet_type = { 1231static struct packet_type arp_packet_type __read_mostly = {
1229 .type = __constant_htons(ETH_P_ARP), 1232 .type = cpu_to_be16(ETH_P_ARP),
1230 .func = arp_rcv, 1233 .func = arp_rcv,
1231}; 1234};
1232 1235
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 7bc992976d29..039cc1ffe977 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1942,23 +1942,85 @@ socket_setattr_failure:
1942} 1942}
1943 1943
1944/** 1944/**
1945 * cipso_v4_sock_delattr - Delete the CIPSO option from a socket 1945 * cipso_v4_req_setattr - Add a CIPSO option to a connection request socket
1946 * @sk: the socket 1946 * @req: the connection request socket
1947 * @doi_def: the CIPSO DOI to use
1948 * @secattr: the specific security attributes of the socket
1947 * 1949 *
1948 * Description: 1950 * Description:
1949 * Removes the CIPSO option from a socket, if present. 1951 * Set the CIPSO option on the given socket using the DOI definition and
1952 * security attributes passed to the function. Returns zero on success and
1953 * negative values on failure.
1950 * 1954 *
1951 */ 1955 */
1952void cipso_v4_sock_delattr(struct sock *sk) 1956int cipso_v4_req_setattr(struct request_sock *req,
1957 const struct cipso_v4_doi *doi_def,
1958 const struct netlbl_lsm_secattr *secattr)
1953{ 1959{
1954 u8 hdr_delta; 1960 int ret_val = -EPERM;
1955 struct ip_options *opt; 1961 unsigned char *buf = NULL;
1956 struct inet_sock *sk_inet; 1962 u32 buf_len;
1963 u32 opt_len;
1964 struct ip_options *opt = NULL;
1965 struct inet_request_sock *req_inet;
1957 1966
1958 sk_inet = inet_sk(sk); 1967 /* We allocate the maximum CIPSO option size here so we are probably
1959 opt = sk_inet->opt; 1968 * being a little wasteful, but it makes our life _much_ easier later
1960 if (opt == NULL || opt->cipso == 0) 1969 * on and after all we are only talking about 40 bytes. */
1961 return; 1970 buf_len = CIPSO_V4_OPT_LEN_MAX;
1971 buf = kmalloc(buf_len, GFP_ATOMIC);
1972 if (buf == NULL) {
1973 ret_val = -ENOMEM;
1974 goto req_setattr_failure;
1975 }
1976
1977 ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr);
1978 if (ret_val < 0)
1979 goto req_setattr_failure;
1980 buf_len = ret_val;
1981
1982 /* We can't use ip_options_get() directly because it makes a call to
1983 * ip_options_get_alloc() which allocates memory with GFP_KERNEL and
1984 * we won't always have CAP_NET_RAW even though we _always_ want to
1985 * set the IPOPT_CIPSO option. */
1986 opt_len = (buf_len + 3) & ~3;
1987 opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC);
1988 if (opt == NULL) {
1989 ret_val = -ENOMEM;
1990 goto req_setattr_failure;
1991 }
1992 memcpy(opt->__data, buf, buf_len);
1993 opt->optlen = opt_len;
1994 opt->cipso = sizeof(struct iphdr);
1995 kfree(buf);
1996 buf = NULL;
1997
1998 req_inet = inet_rsk(req);
1999 opt = xchg(&req_inet->opt, opt);
2000 kfree(opt);
2001
2002 return 0;
2003
2004req_setattr_failure:
2005 kfree(buf);
2006 kfree(opt);
2007 return ret_val;
2008}
2009
2010/**
2011 * cipso_v4_delopt - Delete the CIPSO option from a set of IP options
2012 * @opt_ptr: IP option pointer
2013 *
2014 * Description:
2015 * Deletes the CIPSO IP option from a set of IP options and makes the necessary
2016 * adjustments to the IP option structure. Returns zero on success, negative
2017 * values on failure.
2018 *
2019 */
2020int cipso_v4_delopt(struct ip_options **opt_ptr)
2021{
2022 int hdr_delta = 0;
2023 struct ip_options *opt = *opt_ptr;
1962 2024
1963 if (opt->srr || opt->rr || opt->ts || opt->router_alert) { 2025 if (opt->srr || opt->rr || opt->ts || opt->router_alert) {
1964 u8 cipso_len; 2026 u8 cipso_len;
@@ -2003,11 +2065,34 @@ void cipso_v4_sock_delattr(struct sock *sk)
2003 } else { 2065 } else {
2004 /* only the cipso option was present on the socket so we can 2066 /* only the cipso option was present on the socket so we can
2005 * remove the entire option struct */ 2067 * remove the entire option struct */
2006 sk_inet->opt = NULL; 2068 *opt_ptr = NULL;
2007 hdr_delta = opt->optlen; 2069 hdr_delta = opt->optlen;
2008 kfree(opt); 2070 kfree(opt);
2009 } 2071 }
2010 2072
2073 return hdr_delta;
2074}
2075
2076/**
2077 * cipso_v4_sock_delattr - Delete the CIPSO option from a socket
2078 * @sk: the socket
2079 *
2080 * Description:
2081 * Removes the CIPSO option from a socket, if present.
2082 *
2083 */
2084void cipso_v4_sock_delattr(struct sock *sk)
2085{
2086 int hdr_delta;
2087 struct ip_options *opt;
2088 struct inet_sock *sk_inet;
2089
2090 sk_inet = inet_sk(sk);
2091 opt = sk_inet->opt;
2092 if (opt == NULL || opt->cipso == 0)
2093 return;
2094
2095 hdr_delta = cipso_v4_delopt(&sk_inet->opt);
2011 if (sk_inet->is_icsk && hdr_delta > 0) { 2096 if (sk_inet->is_icsk && hdr_delta > 0) {
2012 struct inet_connection_sock *sk_conn = inet_csk(sk); 2097 struct inet_connection_sock *sk_conn = inet_csk(sk);
2013 sk_conn->icsk_ext_hdr_len -= hdr_delta; 2098 sk_conn->icsk_ext_hdr_len -= hdr_delta;
@@ -2016,6 +2101,27 @@ void cipso_v4_sock_delattr(struct sock *sk)
2016} 2101}
2017 2102
2018/** 2103/**
2104 * cipso_v4_req_delattr - Delete the CIPSO option from a request socket
2105 * @reg: the request socket
2106 *
2107 * Description:
2108 * Removes the CIPSO option from a request socket, if present.
2109 *
2110 */
2111void cipso_v4_req_delattr(struct request_sock *req)
2112{
2113 struct ip_options *opt;
2114 struct inet_request_sock *req_inet;
2115
2116 req_inet = inet_rsk(req);
2117 opt = req_inet->opt;
2118 if (opt == NULL || opt->cipso == 0)
2119 return;
2120
2121 cipso_v4_delopt(&req_inet->opt);
2122}
2123
2124/**
2019 * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions 2125 * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions
2020 * @cipso: the CIPSO v4 option 2126 * @cipso: the CIPSO v4 option
2021 * @secattr: the security attributes 2127 * @secattr: the security attributes
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 309997edc8a5..126bb911880f 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1075,6 +1075,14 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1075 } 1075 }
1076 } 1076 }
1077 ip_mc_up(in_dev); 1077 ip_mc_up(in_dev);
1078 /* fall through */
1079 case NETDEV_CHANGEADDR:
1080 if (IN_DEV_ARP_NOTIFY(in_dev))
1081 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1082 in_dev->ifa_list->ifa_address,
1083 dev,
1084 in_dev->ifa_list->ifa_address,
1085 NULL, dev->dev_addr, NULL);
1078 break; 1086 break;
1079 case NETDEV_DOWN: 1087 case NETDEV_DOWN:
1080 ip_mc_down(in_dev); 1088 ip_mc_down(in_dev);
@@ -1208,7 +1216,8 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1208 kfree_skb(skb); 1216 kfree_skb(skb);
1209 goto errout; 1217 goto errout;
1210 } 1218 }
1211 err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); 1219 rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1220 return;
1212errout: 1221errout:
1213 if (err < 0) 1222 if (err < 0)
1214 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); 1223 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
@@ -1439,6 +1448,7 @@ static struct devinet_sysctl_table {
1439 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), 1448 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1440 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), 1449 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1441 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), 1450 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1451 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1442 1452
1443 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), 1453 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1444 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), 1454 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 741e4fa3e474..cafcc49d0993 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -275,7 +275,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
275 fib_res_put(&res); 275 fib_res_put(&res);
276 if (no_addr) 276 if (no_addr)
277 goto last_resort; 277 goto last_resort;
278 if (rpf) 278 if (rpf == 1)
279 goto e_inval; 279 goto e_inval;
280 fl.oif = dev->ifindex; 280 fl.oif = dev->ifindex;
281 281
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 4817dea3bc73..f831df500907 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -322,8 +322,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa,
322 kfree_skb(skb); 322 kfree_skb(skb);
323 goto errout; 323 goto errout;
324 } 324 }
325 err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, 325 rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE,
326 info->nlh, GFP_KERNEL); 326 info->nlh, GFP_KERNEL);
327 return;
327errout: 328errout:
328 if (err < 0) 329 if (err < 0)
329 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); 330 rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fc562d29cc46..3f50807237e0 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -375,6 +375,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
375 inet->tos = ip_hdr(skb)->tos; 375 inet->tos = ip_hdr(skb)->tos;
376 daddr = ipc.addr = rt->rt_src; 376 daddr = ipc.addr = rt->rt_src;
377 ipc.opt = NULL; 377 ipc.opt = NULL;
378 ipc.shtx.flags = 0;
378 if (icmp_param->replyopts.optlen) { 379 if (icmp_param->replyopts.optlen) {
379 ipc.opt = &icmp_param->replyopts; 380 ipc.opt = &icmp_param->replyopts;
380 if (ipc.opt->srr) 381 if (ipc.opt->srr)
@@ -532,6 +533,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
532 inet_sk(sk)->tos = tos; 533 inet_sk(sk)->tos = tos;
533 ipc.addr = iph->saddr; 534 ipc.addr = iph->saddr;
534 ipc.opt = &icmp_param.replyopts; 535 ipc.opt = &icmp_param.replyopts;
536 ipc.shtx.flags = 0;
535 537
536 { 538 {
537 struct flowi fl = { 539 struct flowi fl = {
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index f26ab38680de..22cd19ee44e5 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -93,24 +93,40 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
93 struct inet_bind_hashbucket *head; 93 struct inet_bind_hashbucket *head;
94 struct hlist_node *node; 94 struct hlist_node *node;
95 struct inet_bind_bucket *tb; 95 struct inet_bind_bucket *tb;
96 int ret; 96 int ret, attempts = 5;
97 struct net *net = sock_net(sk); 97 struct net *net = sock_net(sk);
98 int smallest_size = -1, smallest_rover;
98 99
99 local_bh_disable(); 100 local_bh_disable();
100 if (!snum) { 101 if (!snum) {
101 int remaining, rover, low, high; 102 int remaining, rover, low, high;
102 103
104again:
103 inet_get_local_port_range(&low, &high); 105 inet_get_local_port_range(&low, &high);
104 remaining = (high - low) + 1; 106 remaining = (high - low) + 1;
105 rover = net_random() % remaining + low; 107 smallest_rover = rover = net_random() % remaining + low;
106 108
109 smallest_size = -1;
107 do { 110 do {
108 head = &hashinfo->bhash[inet_bhashfn(net, rover, 111 head = &hashinfo->bhash[inet_bhashfn(net, rover,
109 hashinfo->bhash_size)]; 112 hashinfo->bhash_size)];
110 spin_lock(&head->lock); 113 spin_lock(&head->lock);
111 inet_bind_bucket_for_each(tb, node, &head->chain) 114 inet_bind_bucket_for_each(tb, node, &head->chain)
112 if (ib_net(tb) == net && tb->port == rover) 115 if (ib_net(tb) == net && tb->port == rover) {
116 if (tb->fastreuse > 0 &&
117 sk->sk_reuse &&
118 sk->sk_state != TCP_LISTEN &&
119 (tb->num_owners < smallest_size || smallest_size == -1)) {
120 smallest_size = tb->num_owners;
121 smallest_rover = rover;
122 if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) {
123 spin_unlock(&head->lock);
124 snum = smallest_rover;
125 goto have_snum;
126 }
127 }
113 goto next; 128 goto next;
129 }
114 break; 130 break;
115 next: 131 next:
116 spin_unlock(&head->lock); 132 spin_unlock(&head->lock);
@@ -125,14 +141,19 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
125 * the top level, not from the 'break;' statement. 141 * the top level, not from the 'break;' statement.
126 */ 142 */
127 ret = 1; 143 ret = 1;
128 if (remaining <= 0) 144 if (remaining <= 0) {
145 if (smallest_size != -1) {
146 snum = smallest_rover;
147 goto have_snum;
148 }
129 goto fail; 149 goto fail;
130 150 }
131 /* OK, here is the one we will use. HEAD is 151 /* OK, here is the one we will use. HEAD is
132 * non-NULL and we hold it's mutex. 152 * non-NULL and we hold it's mutex.
133 */ 153 */
134 snum = rover; 154 snum = rover;
135 } else { 155 } else {
156have_snum:
136 head = &hashinfo->bhash[inet_bhashfn(net, snum, 157 head = &hashinfo->bhash[inet_bhashfn(net, snum,
137 hashinfo->bhash_size)]; 158 hashinfo->bhash_size)];
138 spin_lock(&head->lock); 159 spin_lock(&head->lock);
@@ -145,12 +166,19 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
145tb_found: 166tb_found:
146 if (!hlist_empty(&tb->owners)) { 167 if (!hlist_empty(&tb->owners)) {
147 if (tb->fastreuse > 0 && 168 if (tb->fastreuse > 0 &&
148 sk->sk_reuse && sk->sk_state != TCP_LISTEN) { 169 sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
170 smallest_size == -1) {
149 goto success; 171 goto success;
150 } else { 172 } else {
151 ret = 1; 173 ret = 1;
152 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) 174 if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) {
175 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN &&
176 smallest_size != -1 && --attempts >= 0) {
177 spin_unlock(&head->lock);
178 goto again;
179 }
153 goto fail_unlock; 180 goto fail_unlock;
181 }
154 } 182 }
155 } 183 }
156tb_not_found: 184tb_not_found:
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 6c52e08f786e..eaf3e2c8646a 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -267,6 +267,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
267 267
268struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, 268struct inet_frag_queue *inet_frag_find(struct netns_frags *nf,
269 struct inet_frags *f, void *key, unsigned int hash) 269 struct inet_frags *f, void *key, unsigned int hash)
270 __releases(&f->lock)
270{ 271{
271 struct inet_frag_queue *q; 272 struct inet_frag_queue *q;
272 struct hlist_node *n; 273 struct hlist_node *n;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 6a1045da48d2..625cc5f64c94 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -38,6 +38,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep,
38 write_pnet(&tb->ib_net, hold_net(net)); 38 write_pnet(&tb->ib_net, hold_net(net));
39 tb->port = snum; 39 tb->port = snum;
40 tb->fastreuse = 0; 40 tb->fastreuse = 0;
41 tb->num_owners = 0;
41 INIT_HLIST_HEAD(&tb->owners); 42 INIT_HLIST_HEAD(&tb->owners);
42 hlist_add_head(&tb->node, &head->chain); 43 hlist_add_head(&tb->node, &head->chain);
43 } 44 }
@@ -59,8 +60,13 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket
59void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, 60void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
60 const unsigned short snum) 61 const unsigned short snum)
61{ 62{
63 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
64
65 atomic_inc(&hashinfo->bsockets);
66
62 inet_sk(sk)->num = snum; 67 inet_sk(sk)->num = snum;
63 sk_add_bind_node(sk, &tb->owners); 68 sk_add_bind_node(sk, &tb->owners);
69 tb->num_owners++;
64 inet_csk(sk)->icsk_bind_hash = tb; 70 inet_csk(sk)->icsk_bind_hash = tb;
65} 71}
66 72
@@ -75,9 +81,12 @@ static void __inet_put_port(struct sock *sk)
75 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; 81 struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
76 struct inet_bind_bucket *tb; 82 struct inet_bind_bucket *tb;
77 83
84 atomic_dec(&hashinfo->bsockets);
85
78 spin_lock(&head->lock); 86 spin_lock(&head->lock);
79 tb = inet_csk(sk)->icsk_bind_hash; 87 tb = inet_csk(sk)->icsk_bind_hash;
80 __sk_del_bind_node(sk); 88 __sk_del_bind_node(sk);
89 tb->num_owners--;
81 inet_csk(sk)->icsk_bind_hash = NULL; 90 inet_csk(sk)->icsk_bind_hash = NULL;
82 inet_sk(sk)->num = 0; 91 inet_sk(sk)->num = 0;
83 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); 92 inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
@@ -444,9 +453,9 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
444 */ 453 */
445 inet_bind_bucket_for_each(tb, node, &head->chain) { 454 inet_bind_bucket_for_each(tb, node, &head->chain) {
446 if (ib_net(tb) == net && tb->port == port) { 455 if (ib_net(tb) == net && tb->port == port) {
447 WARN_ON(hlist_empty(&tb->owners));
448 if (tb->fastreuse >= 0) 456 if (tb->fastreuse >= 0)
449 goto next_port; 457 goto next_port;
458 WARN_ON(hlist_empty(&tb->owners));
450 if (!check_established(death_row, sk, 459 if (!check_established(death_row, sk,
451 port, &tw)) 460 port, &tw))
452 goto ok; 461 goto ok;
@@ -523,6 +532,7 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
523{ 532{
524 int i; 533 int i;
525 534
535 atomic_set(&h->bsockets, 0);
526 for (i = 0; i < INET_LHTABLE_SIZE; i++) { 536 for (i = 0; i < INET_LHTABLE_SIZE; i++) {
527 spin_lock_init(&h->listening_hash[i].lock); 537 spin_lock_init(&h->listening_hash[i].lock);
528 INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, 538 INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head,
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 6659ac000eeb..7985346653bd 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -463,6 +463,7 @@ err:
463static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, 463static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
464 struct net_device *dev) 464 struct net_device *dev)
465{ 465{
466 struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
466 struct iphdr *iph; 467 struct iphdr *iph;
467 struct sk_buff *fp, *head = qp->q.fragments; 468 struct sk_buff *fp, *head = qp->q.fragments;
468 int len; 469 int len;
@@ -548,7 +549,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
548 iph = ip_hdr(head); 549 iph = ip_hdr(head);
549 iph->frag_off = 0; 550 iph->frag_off = 0;
550 iph->tot_len = htons(len); 551 iph->tot_len = htons(len);
551 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMOKS); 552 IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
552 qp->q.fragments = NULL; 553 qp->q.fragments = NULL;
553 return 0; 554 return 0;
554 555
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0101521f366b..e62510d5ea5a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -164,67 +164,124 @@ static DEFINE_RWLOCK(ipgre_lock);
164 164
165/* Given src, dst and key, find appropriate for input tunnel. */ 165/* Given src, dst and key, find appropriate for input tunnel. */
166 166
167static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net, 167static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
168 __be32 remote, __be32 local, 168 __be32 remote, __be32 local,
169 __be32 key, __be16 gre_proto) 169 __be32 key, __be16 gre_proto)
170{ 170{
171 struct net *net = dev_net(dev);
172 int link = dev->ifindex;
171 unsigned h0 = HASH(remote); 173 unsigned h0 = HASH(remote);
172 unsigned h1 = HASH(key); 174 unsigned h1 = HASH(key);
173 struct ip_tunnel *t; 175 struct ip_tunnel *t, *cand = NULL;
174 struct ip_tunnel *t2 = NULL;
175 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 176 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
176 int dev_type = (gre_proto == htons(ETH_P_TEB)) ? 177 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
177 ARPHRD_ETHER : ARPHRD_IPGRE; 178 ARPHRD_ETHER : ARPHRD_IPGRE;
179 int score, cand_score = 4;
178 180
179 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { 181 for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
180 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { 182 if (local != t->parms.iph.saddr ||
181 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 183 remote != t->parms.iph.daddr ||
182 if (t->dev->type == dev_type) 184 key != t->parms.i_key ||
183 return t; 185 !(t->dev->flags & IFF_UP))
184 if (t->dev->type == ARPHRD_IPGRE && !t2) 186 continue;
185 t2 = t; 187
186 } 188 if (t->dev->type != ARPHRD_IPGRE &&
189 t->dev->type != dev_type)
190 continue;
191
192 score = 0;
193 if (t->parms.link != link)
194 score |= 1;
195 if (t->dev->type != dev_type)
196 score |= 2;
197 if (score == 0)
198 return t;
199
200 if (score < cand_score) {
201 cand = t;
202 cand_score = score;
187 } 203 }
188 } 204 }
189 205
190 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { 206 for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
191 if (remote == t->parms.iph.daddr) { 207 if (remote != t->parms.iph.daddr ||
192 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 208 key != t->parms.i_key ||
193 if (t->dev->type == dev_type) 209 !(t->dev->flags & IFF_UP))
194 return t; 210 continue;
195 if (t->dev->type == ARPHRD_IPGRE && !t2) 211
196 t2 = t; 212 if (t->dev->type != ARPHRD_IPGRE &&
197 } 213 t->dev->type != dev_type)
214 continue;
215
216 score = 0;
217 if (t->parms.link != link)
218 score |= 1;
219 if (t->dev->type != dev_type)
220 score |= 2;
221 if (score == 0)
222 return t;
223
224 if (score < cand_score) {
225 cand = t;
226 cand_score = score;
198 } 227 }
199 } 228 }
200 229
201 for (t = ign->tunnels_l[h1]; t; t = t->next) { 230 for (t = ign->tunnels_l[h1]; t; t = t->next) {
202 if (local == t->parms.iph.saddr || 231 if ((local != t->parms.iph.saddr &&
203 (local == t->parms.iph.daddr && 232 (local != t->parms.iph.daddr ||
204 ipv4_is_multicast(local))) { 233 !ipv4_is_multicast(local))) ||
205 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 234 key != t->parms.i_key ||
206 if (t->dev->type == dev_type) 235 !(t->dev->flags & IFF_UP))
207 return t; 236 continue;
208 if (t->dev->type == ARPHRD_IPGRE && !t2) 237
209 t2 = t; 238 if (t->dev->type != ARPHRD_IPGRE &&
210 } 239 t->dev->type != dev_type)
240 continue;
241
242 score = 0;
243 if (t->parms.link != link)
244 score |= 1;
245 if (t->dev->type != dev_type)
246 score |= 2;
247 if (score == 0)
248 return t;
249
250 if (score < cand_score) {
251 cand = t;
252 cand_score = score;
211 } 253 }
212 } 254 }
213 255
214 for (t = ign->tunnels_wc[h1]; t; t = t->next) { 256 for (t = ign->tunnels_wc[h1]; t; t = t->next) {
215 if (t->parms.i_key == key && t->dev->flags & IFF_UP) { 257 if (t->parms.i_key != key ||
216 if (t->dev->type == dev_type) 258 !(t->dev->flags & IFF_UP))
217 return t; 259 continue;
218 if (t->dev->type == ARPHRD_IPGRE && !t2) 260
219 t2 = t; 261 if (t->dev->type != ARPHRD_IPGRE &&
262 t->dev->type != dev_type)
263 continue;
264
265 score = 0;
266 if (t->parms.link != link)
267 score |= 1;
268 if (t->dev->type != dev_type)
269 score |= 2;
270 if (score == 0)
271 return t;
272
273 if (score < cand_score) {
274 cand = t;
275 cand_score = score;
220 } 276 }
221 } 277 }
222 278
223 if (t2) 279 if (cand != NULL)
224 return t2; 280 return cand;
225 281
226 if (ign->fb_tunnel_dev->flags&IFF_UP) 282 if (ign->fb_tunnel_dev->flags & IFF_UP)
227 return netdev_priv(ign->fb_tunnel_dev); 283 return netdev_priv(ign->fb_tunnel_dev);
284
228 return NULL; 285 return NULL;
229} 286}
230 287
@@ -284,6 +341,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
284 __be32 remote = parms->iph.daddr; 341 __be32 remote = parms->iph.daddr;
285 __be32 local = parms->iph.saddr; 342 __be32 local = parms->iph.saddr;
286 __be32 key = parms->i_key; 343 __be32 key = parms->i_key;
344 int link = parms->link;
287 struct ip_tunnel *t, **tp; 345 struct ip_tunnel *t, **tp;
288 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 346 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
289 347
@@ -291,6 +349,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
291 if (local == t->parms.iph.saddr && 349 if (local == t->parms.iph.saddr &&
292 remote == t->parms.iph.daddr && 350 remote == t->parms.iph.daddr &&
293 key == t->parms.i_key && 351 key == t->parms.i_key &&
352 link == t->parms.link &&
294 type == t->dev->type) 353 type == t->dev->type)
295 break; 354 break;
296 355
@@ -421,7 +480,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
421 } 480 }
422 481
423 read_lock(&ipgre_lock); 482 read_lock(&ipgre_lock);
424 t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr, 483 t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
425 flags & GRE_KEY ? 484 flags & GRE_KEY ?
426 *(((__be32 *)p) + (grehlen / 4) - 1) : 0, 485 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
427 p[1]); 486 p[1]);
@@ -432,7 +491,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
432 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 491 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
433 goto out; 492 goto out;
434 493
435 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 494 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
436 t->err_count++; 495 t->err_count++;
437 else 496 else
438 t->err_count = 1; 497 t->err_count = 1;
@@ -518,7 +577,7 @@ static int ipgre_rcv(struct sk_buff *skb)
518 gre_proto = *(__be16 *)(h + 2); 577 gre_proto = *(__be16 *)(h + 2);
519 578
520 read_lock(&ipgre_lock); 579 read_lock(&ipgre_lock);
521 if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), 580 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
522 iph->saddr, iph->daddr, key, 581 iph->saddr, iph->daddr, key,
523 gre_proto))) { 582 gre_proto))) {
524 struct net_device_stats *stats = &tunnel->dev->stats; 583 struct net_device_stats *stats = &tunnel->dev->stats;
@@ -744,7 +803,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
744#endif 803#endif
745 804
746 if (tunnel->err_count > 0) { 805 if (tunnel->err_count > 0) {
747 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 806 if (time_before(jiffies,
807 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
748 tunnel->err_count--; 808 tunnel->err_count--;
749 809
750 dst_link_failure(skb); 810 dst_link_failure(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8ebe86dd72af..3e7e910c7c0f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -935,6 +935,10 @@ alloc_new_skb:
935 sk->sk_allocation); 935 sk->sk_allocation);
936 if (unlikely(skb == NULL)) 936 if (unlikely(skb == NULL))
937 err = -ENOBUFS; 937 err = -ENOBUFS;
938 else
939 /* only the initial fragment is
940 time stamped */
941 ipc->shtx.flags = 0;
938 } 942 }
939 if (skb == NULL) 943 if (skb == NULL)
940 goto error; 944 goto error;
@@ -945,6 +949,7 @@ alloc_new_skb:
945 skb->ip_summed = csummode; 949 skb->ip_summed = csummode;
946 skb->csum = 0; 950 skb->csum = 0;
947 skb_reserve(skb, hh_len); 951 skb_reserve(skb, hh_len);
952 *skb_tx(skb) = ipc->shtx;
948 953
949 /* 954 /*
950 * Find where to start putting bytes. 955 * Find where to start putting bytes.
@@ -1364,6 +1369,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1364 1369
1365 daddr = ipc.addr = rt->rt_src; 1370 daddr = ipc.addr = rt->rt_src;
1366 ipc.opt = NULL; 1371 ipc.opt = NULL;
1372 ipc.shtx.flags = 0;
1367 1373
1368 if (replyopts.opt.optlen) { 1374 if (replyopts.opt.optlen) {
1369 ipc.opt = &replyopts.opt; 1375 ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index d722013c1cae..90d22ae0a419 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -100,8 +100,8 @@
100#define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers 100#define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers
101 - '3' from resolv.h */ 101 - '3' from resolv.h */
102 102
103#define NONE __constant_htonl(INADDR_NONE) 103#define NONE cpu_to_be32(INADDR_NONE)
104#define ANY __constant_htonl(INADDR_ANY) 104#define ANY cpu_to_be32(INADDR_ANY)
105 105
106/* 106/*
107 * Public IP configuration 107 * Public IP configuration
@@ -406,7 +406,7 @@ static int __init ic_defaults(void)
406static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); 406static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev);
407 407
408static struct packet_type rarp_packet_type __initdata = { 408static struct packet_type rarp_packet_type __initdata = {
409 .type = __constant_htons(ETH_P_RARP), 409 .type = cpu_to_be16(ETH_P_RARP),
410 .func = ic_rarp_recv, 410 .func = ic_rarp_recv,
411}; 411};
412 412
@@ -568,7 +568,7 @@ struct bootp_pkt { /* BOOTP packet format */
568static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); 568static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev);
569 569
570static struct packet_type bootp_packet_type __initdata = { 570static struct packet_type bootp_packet_type __initdata = {
571 .type = __constant_htons(ETH_P_IP), 571 .type = cpu_to_be16(ETH_P_IP),
572 .func = ic_bootp_recv, 572 .func = ic_bootp_recv,
573}; 573};
574 574
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 5079dfbc6f38..9054139795af 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -327,7 +327,7 @@ static int ipip_err(struct sk_buff *skb, u32 info)
327 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 327 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
328 goto out; 328 goto out;
329 329
330 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 330 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
331 t->err_count++; 331 t->err_count++;
332 else 332 else
333 t->err_count = 1; 333 t->err_count = 1;
@@ -466,7 +466,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
466 } 466 }
467 467
468 if (tunnel->err_count > 0) { 468 if (tunnel->err_count > 0) {
469 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 469 if (time_before(jiffies,
470 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
470 tunnel->err_count--; 471 tunnel->err_count--;
471 dst_link_failure(skb); 472 dst_link_failure(skb);
472 } else 473 } else
@@ -750,7 +751,7 @@ static struct xfrm_tunnel ipip_handler = {
750 .priority = 1, 751 .priority = 1,
751}; 752};
752 753
753static char banner[] __initdata = 754static const char banner[] __initconst =
754 KERN_INFO "IPv4 over IPv4 tunneling driver\n"; 755 KERN_INFO "IPv4 over IPv4 tunneling driver\n";
755 756
756static void ipip_destroy_tunnels(struct ipip_net *ipn) 757static void ipip_destroy_tunnels(struct ipip_net *ipn)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 14666449dc1c..13e9dd3012b3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -67,9 +67,6 @@
67#define CONFIG_IP_PIMSM 1 67#define CONFIG_IP_PIMSM 1
68#endif 68#endif
69 69
70static struct sock *mroute_socket;
71
72
73/* Big lock, protecting vif table, mrt cache and mroute socket state. 70/* Big lock, protecting vif table, mrt cache and mroute socket state.
74 Note that the changes are semaphored via rtnl_lock. 71 Note that the changes are semaphored via rtnl_lock.
75 */ 72 */
@@ -80,18 +77,9 @@ static DEFINE_RWLOCK(mrt_lock);
80 * Multicast router control variables 77 * Multicast router control variables
81 */ 78 */
82 79
83static struct vif_device vif_table[MAXVIFS]; /* Devices */ 80#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
84static int maxvif;
85
86#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
87
88static int mroute_do_assert; /* Set in PIM assert */
89static int mroute_do_pim;
90
91static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */
92 81
93static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ 82static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */
94static atomic_t cache_resolve_queue_len; /* Size of unresolved */
95 83
96/* Special spinlock for queue of unresolved entries */ 84/* Special spinlock for queue of unresolved entries */
97static DEFINE_SPINLOCK(mfc_unres_lock); 85static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -107,7 +95,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
107static struct kmem_cache *mrt_cachep __read_mostly; 95static struct kmem_cache *mrt_cachep __read_mostly;
108 96
109static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); 97static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
110static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); 98static int ipmr_cache_report(struct net *net,
99 struct sk_buff *pkt, vifi_t vifi, int assert);
111static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); 100static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
112 101
113#ifdef CONFIG_IP_PIMSM_V2 102#ifdef CONFIG_IP_PIMSM_V2
@@ -120,9 +109,11 @@ static struct timer_list ipmr_expire_timer;
120 109
121static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) 110static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
122{ 111{
112 struct net *net = dev_net(dev);
113
123 dev_close(dev); 114 dev_close(dev);
124 115
125 dev = __dev_get_by_name(&init_net, "tunl0"); 116 dev = __dev_get_by_name(net, "tunl0");
126 if (dev) { 117 if (dev) {
127 const struct net_device_ops *ops = dev->netdev_ops; 118 const struct net_device_ops *ops = dev->netdev_ops;
128 struct ifreq ifr; 119 struct ifreq ifr;
@@ -148,11 +139,11 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
148} 139}
149 140
150static 141static
151struct net_device *ipmr_new_tunnel(struct vifctl *v) 142struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
152{ 143{
153 struct net_device *dev; 144 struct net_device *dev;
154 145
155 dev = __dev_get_by_name(&init_net, "tunl0"); 146 dev = __dev_get_by_name(net, "tunl0");
156 147
157 if (dev) { 148 if (dev) {
158 const struct net_device_ops *ops = dev->netdev_ops; 149 const struct net_device_ops *ops = dev->netdev_ops;
@@ -181,7 +172,8 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v)
181 172
182 dev = NULL; 173 dev = NULL;
183 174
184 if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) { 175 if (err == 0 &&
176 (dev = __dev_get_by_name(net, p.name)) != NULL) {
185 dev->flags |= IFF_MULTICAST; 177 dev->flags |= IFF_MULTICAST;
186 178
187 in_dev = __in_dev_get_rtnl(dev); 179 in_dev = __in_dev_get_rtnl(dev);
@@ -209,14 +201,15 @@ failure:
209 201
210#ifdef CONFIG_IP_PIMSM 202#ifdef CONFIG_IP_PIMSM
211 203
212static int reg_vif_num = -1;
213
214static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) 204static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
215{ 205{
206 struct net *net = dev_net(dev);
207
216 read_lock(&mrt_lock); 208 read_lock(&mrt_lock);
217 dev->stats.tx_bytes += skb->len; 209 dev->stats.tx_bytes += skb->len;
218 dev->stats.tx_packets++; 210 dev->stats.tx_packets++;
219 ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); 211 ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
212 IGMPMSG_WHOLEPKT);
220 read_unlock(&mrt_lock); 213 read_unlock(&mrt_lock);
221 kfree_skb(skb); 214 kfree_skb(skb);
222 return 0; 215 return 0;
@@ -283,16 +276,16 @@ failure:
283 * @notify: Set to 1, if the caller is a notifier_call 276 * @notify: Set to 1, if the caller is a notifier_call
284 */ 277 */
285 278
286static int vif_delete(int vifi, int notify) 279static int vif_delete(struct net *net, int vifi, int notify)
287{ 280{
288 struct vif_device *v; 281 struct vif_device *v;
289 struct net_device *dev; 282 struct net_device *dev;
290 struct in_device *in_dev; 283 struct in_device *in_dev;
291 284
292 if (vifi < 0 || vifi >= maxvif) 285 if (vifi < 0 || vifi >= net->ipv4.maxvif)
293 return -EADDRNOTAVAIL; 286 return -EADDRNOTAVAIL;
294 287
295 v = &vif_table[vifi]; 288 v = &net->ipv4.vif_table[vifi];
296 289
297 write_lock_bh(&mrt_lock); 290 write_lock_bh(&mrt_lock);
298 dev = v->dev; 291 dev = v->dev;
@@ -304,17 +297,17 @@ static int vif_delete(int vifi, int notify)
304 } 297 }
305 298
306#ifdef CONFIG_IP_PIMSM 299#ifdef CONFIG_IP_PIMSM
307 if (vifi == reg_vif_num) 300 if (vifi == net->ipv4.mroute_reg_vif_num)
308 reg_vif_num = -1; 301 net->ipv4.mroute_reg_vif_num = -1;
309#endif 302#endif
310 303
311 if (vifi+1 == maxvif) { 304 if (vifi+1 == net->ipv4.maxvif) {
312 int tmp; 305 int tmp;
313 for (tmp=vifi-1; tmp>=0; tmp--) { 306 for (tmp=vifi-1; tmp>=0; tmp--) {
314 if (VIF_EXISTS(tmp)) 307 if (VIF_EXISTS(net, tmp))
315 break; 308 break;
316 } 309 }
317 maxvif = tmp+1; 310 net->ipv4.maxvif = tmp+1;
318 } 311 }
319 312
320 write_unlock_bh(&mrt_lock); 313 write_unlock_bh(&mrt_lock);
@@ -333,6 +326,12 @@ static int vif_delete(int vifi, int notify)
333 return 0; 326 return 0;
334} 327}
335 328
329static inline void ipmr_cache_free(struct mfc_cache *c)
330{
331 release_net(mfc_net(c));
332 kmem_cache_free(mrt_cachep, c);
333}
334
336/* Destroy an unresolved cache entry, killing queued skbs 335/* Destroy an unresolved cache entry, killing queued skbs
337 and reporting error to netlink readers. 336 and reporting error to netlink readers.
338 */ 337 */
@@ -341,8 +340,9 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
341{ 340{
342 struct sk_buff *skb; 341 struct sk_buff *skb;
343 struct nlmsgerr *e; 342 struct nlmsgerr *e;
343 struct net *net = mfc_net(c);
344 344
345 atomic_dec(&cache_resolve_queue_len); 345 atomic_dec(&net->ipv4.cache_resolve_queue_len);
346 346
347 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { 347 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
348 if (ip_hdr(skb)->version == 0) { 348 if (ip_hdr(skb)->version == 0) {
@@ -354,12 +354,12 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
354 e->error = -ETIMEDOUT; 354 e->error = -ETIMEDOUT;
355 memset(&e->msg, 0, sizeof(e->msg)); 355 memset(&e->msg, 0, sizeof(e->msg));
356 356
357 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); 357 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
358 } else 358 } else
359 kfree_skb(skb); 359 kfree_skb(skb);
360 } 360 }
361 361
362 kmem_cache_free(mrt_cachep, c); 362 ipmr_cache_free(c);
363} 363}
364 364
365 365
@@ -376,7 +376,7 @@ static void ipmr_expire_process(unsigned long dummy)
376 return; 376 return;
377 } 377 }
378 378
379 if (atomic_read(&cache_resolve_queue_len) == 0) 379 if (mfc_unres_queue == NULL)
380 goto out; 380 goto out;
381 381
382 now = jiffies; 382 now = jiffies;
@@ -397,7 +397,7 @@ static void ipmr_expire_process(unsigned long dummy)
397 ipmr_destroy_unres(c); 397 ipmr_destroy_unres(c);
398 } 398 }
399 399
400 if (atomic_read(&cache_resolve_queue_len)) 400 if (mfc_unres_queue != NULL)
401 mod_timer(&ipmr_expire_timer, jiffies + expires); 401 mod_timer(&ipmr_expire_timer, jiffies + expires);
402 402
403out: 403out:
@@ -409,13 +409,15 @@ out:
409static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) 409static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
410{ 410{
411 int vifi; 411 int vifi;
412 struct net *net = mfc_net(cache);
412 413
413 cache->mfc_un.res.minvif = MAXVIFS; 414 cache->mfc_un.res.minvif = MAXVIFS;
414 cache->mfc_un.res.maxvif = 0; 415 cache->mfc_un.res.maxvif = 0;
415 memset(cache->mfc_un.res.ttls, 255, MAXVIFS); 416 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
416 417
417 for (vifi=0; vifi<maxvif; vifi++) { 418 for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
418 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) { 419 if (VIF_EXISTS(net, vifi) &&
420 ttls[vifi] && ttls[vifi] < 255) {
419 cache->mfc_un.res.ttls[vifi] = ttls[vifi]; 421 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
420 if (cache->mfc_un.res.minvif > vifi) 422 if (cache->mfc_un.res.minvif > vifi)
421 cache->mfc_un.res.minvif = vifi; 423 cache->mfc_un.res.minvif = vifi;
@@ -425,16 +427,16 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
425 } 427 }
426} 428}
427 429
428static int vif_add(struct vifctl *vifc, int mrtsock) 430static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
429{ 431{
430 int vifi = vifc->vifc_vifi; 432 int vifi = vifc->vifc_vifi;
431 struct vif_device *v = &vif_table[vifi]; 433 struct vif_device *v = &net->ipv4.vif_table[vifi];
432 struct net_device *dev; 434 struct net_device *dev;
433 struct in_device *in_dev; 435 struct in_device *in_dev;
434 int err; 436 int err;
435 437
436 /* Is vif busy ? */ 438 /* Is vif busy ? */
437 if (VIF_EXISTS(vifi)) 439 if (VIF_EXISTS(net, vifi))
438 return -EADDRINUSE; 440 return -EADDRINUSE;
439 441
440 switch (vifc->vifc_flags) { 442 switch (vifc->vifc_flags) {
@@ -444,7 +446,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
444 * Special Purpose VIF in PIM 446 * Special Purpose VIF in PIM
445 * All the packets will be sent to the daemon 447 * All the packets will be sent to the daemon
446 */ 448 */
447 if (reg_vif_num >= 0) 449 if (net->ipv4.mroute_reg_vif_num >= 0)
448 return -EADDRINUSE; 450 return -EADDRINUSE;
449 dev = ipmr_reg_vif(); 451 dev = ipmr_reg_vif();
450 if (!dev) 452 if (!dev)
@@ -458,7 +460,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
458 break; 460 break;
459#endif 461#endif
460 case VIFF_TUNNEL: 462 case VIFF_TUNNEL:
461 dev = ipmr_new_tunnel(vifc); 463 dev = ipmr_new_tunnel(net, vifc);
462 if (!dev) 464 if (!dev)
463 return -ENOBUFS; 465 return -ENOBUFS;
464 err = dev_set_allmulti(dev, 1); 466 err = dev_set_allmulti(dev, 1);
@@ -469,7 +471,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
469 } 471 }
470 break; 472 break;
471 case 0: 473 case 0:
472 dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr); 474 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
473 if (!dev) 475 if (!dev)
474 return -EADDRNOTAVAIL; 476 return -EADDRNOTAVAIL;
475 err = dev_set_allmulti(dev, 1); 477 err = dev_set_allmulti(dev, 1);
@@ -510,20 +512,22 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
510 v->dev = dev; 512 v->dev = dev;
511#ifdef CONFIG_IP_PIMSM 513#ifdef CONFIG_IP_PIMSM
512 if (v->flags&VIFF_REGISTER) 514 if (v->flags&VIFF_REGISTER)
513 reg_vif_num = vifi; 515 net->ipv4.mroute_reg_vif_num = vifi;
514#endif 516#endif
515 if (vifi+1 > maxvif) 517 if (vifi+1 > net->ipv4.maxvif)
516 maxvif = vifi+1; 518 net->ipv4.maxvif = vifi+1;
517 write_unlock_bh(&mrt_lock); 519 write_unlock_bh(&mrt_lock);
518 return 0; 520 return 0;
519} 521}
520 522
521static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) 523static struct mfc_cache *ipmr_cache_find(struct net *net,
524 __be32 origin,
525 __be32 mcastgrp)
522{ 526{
523 int line = MFC_HASH(mcastgrp, origin); 527 int line = MFC_HASH(mcastgrp, origin);
524 struct mfc_cache *c; 528 struct mfc_cache *c;
525 529
526 for (c=mfc_cache_array[line]; c; c = c->next) { 530 for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
527 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) 531 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
528 break; 532 break;
529 } 533 }
@@ -533,22 +537,24 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp)
533/* 537/*
534 * Allocate a multicast cache entry 538 * Allocate a multicast cache entry
535 */ 539 */
536static struct mfc_cache *ipmr_cache_alloc(void) 540static struct mfc_cache *ipmr_cache_alloc(struct net *net)
537{ 541{
538 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 542 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
539 if (c == NULL) 543 if (c == NULL)
540 return NULL; 544 return NULL;
541 c->mfc_un.res.minvif = MAXVIFS; 545 c->mfc_un.res.minvif = MAXVIFS;
546 mfc_net_set(c, net);
542 return c; 547 return c;
543} 548}
544 549
545static struct mfc_cache *ipmr_cache_alloc_unres(void) 550static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
546{ 551{
547 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 552 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
548 if (c == NULL) 553 if (c == NULL)
549 return NULL; 554 return NULL;
550 skb_queue_head_init(&c->mfc_un.unres.unresolved); 555 skb_queue_head_init(&c->mfc_un.unres.unresolved);
551 c->mfc_un.unres.expires = jiffies + 10*HZ; 556 c->mfc_un.unres.expires = jiffies + 10*HZ;
557 mfc_net_set(c, net);
552 return c; 558 return c;
553} 559}
554 560
@@ -581,7 +587,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
581 memset(&e->msg, 0, sizeof(e->msg)); 587 memset(&e->msg, 0, sizeof(e->msg));
582 } 588 }
583 589
584 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); 590 rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
585 } else 591 } else
586 ip_mr_forward(skb, c, 0); 592 ip_mr_forward(skb, c, 0);
587 } 593 }
@@ -594,7 +600,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
594 * Called under mrt_lock. 600 * Called under mrt_lock.
595 */ 601 */
596 602
597static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) 603static int ipmr_cache_report(struct net *net,
604 struct sk_buff *pkt, vifi_t vifi, int assert)
598{ 605{
599 struct sk_buff *skb; 606 struct sk_buff *skb;
600 const int ihl = ip_hdrlen(pkt); 607 const int ihl = ip_hdrlen(pkt);
@@ -626,7 +633,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
626 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); 633 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
627 msg->im_msgtype = IGMPMSG_WHOLEPKT; 634 msg->im_msgtype = IGMPMSG_WHOLEPKT;
628 msg->im_mbz = 0; 635 msg->im_mbz = 0;
629 msg->im_vif = reg_vif_num; 636 msg->im_vif = net->ipv4.mroute_reg_vif_num;
630 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; 637 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
631 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + 638 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
632 sizeof(struct iphdr)); 639 sizeof(struct iphdr));
@@ -658,7 +665,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
658 skb->transport_header = skb->network_header; 665 skb->transport_header = skb->network_header;
659 } 666 }
660 667
661 if (mroute_socket == NULL) { 668 if (net->ipv4.mroute_sk == NULL) {
662 kfree_skb(skb); 669 kfree_skb(skb);
663 return -EINVAL; 670 return -EINVAL;
664 } 671 }
@@ -666,7 +673,8 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
666 /* 673 /*
667 * Deliver to mrouted 674 * Deliver to mrouted
668 */ 675 */
669 if ((ret = sock_queue_rcv_skb(mroute_socket, skb))<0) { 676 ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
677 if (ret < 0) {
670 if (net_ratelimit()) 678 if (net_ratelimit())
671 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 679 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
672 kfree_skb(skb); 680 kfree_skb(skb);
@@ -680,7 +688,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
680 */ 688 */
681 689
682static int 690static int
683ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) 691ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
684{ 692{
685 int err; 693 int err;
686 struct mfc_cache *c; 694 struct mfc_cache *c;
@@ -688,7 +696,8 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
688 696
689 spin_lock_bh(&mfc_unres_lock); 697 spin_lock_bh(&mfc_unres_lock);
690 for (c=mfc_unres_queue; c; c=c->next) { 698 for (c=mfc_unres_queue; c; c=c->next) {
691 if (c->mfc_mcastgrp == iph->daddr && 699 if (net_eq(mfc_net(c), net) &&
700 c->mfc_mcastgrp == iph->daddr &&
692 c->mfc_origin == iph->saddr) 701 c->mfc_origin == iph->saddr)
693 break; 702 break;
694 } 703 }
@@ -698,8 +707,8 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
698 * Create a new entry if allowable 707 * Create a new entry if allowable
699 */ 708 */
700 709
701 if (atomic_read(&cache_resolve_queue_len) >= 10 || 710 if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
702 (c=ipmr_cache_alloc_unres())==NULL) { 711 (c = ipmr_cache_alloc_unres(net)) == NULL) {
703 spin_unlock_bh(&mfc_unres_lock); 712 spin_unlock_bh(&mfc_unres_lock);
704 713
705 kfree_skb(skb); 714 kfree_skb(skb);
@@ -716,18 +725,19 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
716 /* 725 /*
717 * Reflect first query at mrouted. 726 * Reflect first query at mrouted.
718 */ 727 */
719 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) { 728 err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
729 if (err < 0) {
720 /* If the report failed throw the cache entry 730 /* If the report failed throw the cache entry
721 out - Brad Parker 731 out - Brad Parker
722 */ 732 */
723 spin_unlock_bh(&mfc_unres_lock); 733 spin_unlock_bh(&mfc_unres_lock);
724 734
725 kmem_cache_free(mrt_cachep, c); 735 ipmr_cache_free(c);
726 kfree_skb(skb); 736 kfree_skb(skb);
727 return err; 737 return err;
728 } 738 }
729 739
730 atomic_inc(&cache_resolve_queue_len); 740 atomic_inc(&net->ipv4.cache_resolve_queue_len);
731 c->next = mfc_unres_queue; 741 c->next = mfc_unres_queue;
732 mfc_unres_queue = c; 742 mfc_unres_queue = c;
733 743
@@ -753,35 +763,37 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
753 * MFC cache manipulation by user space mroute daemon 763 * MFC cache manipulation by user space mroute daemon
754 */ 764 */
755 765
756static int ipmr_mfc_delete(struct mfcctl *mfc) 766static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
757{ 767{
758 int line; 768 int line;
759 struct mfc_cache *c, **cp; 769 struct mfc_cache *c, **cp;
760 770
761 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 771 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
762 772
763 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { 773 for (cp = &net->ipv4.mfc_cache_array[line];
774 (c = *cp) != NULL; cp = &c->next) {
764 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 775 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
765 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 776 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
766 write_lock_bh(&mrt_lock); 777 write_lock_bh(&mrt_lock);
767 *cp = c->next; 778 *cp = c->next;
768 write_unlock_bh(&mrt_lock); 779 write_unlock_bh(&mrt_lock);
769 780
770 kmem_cache_free(mrt_cachep, c); 781 ipmr_cache_free(c);
771 return 0; 782 return 0;
772 } 783 }
773 } 784 }
774 return -ENOENT; 785 return -ENOENT;
775} 786}
776 787
777static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) 788static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
778{ 789{
779 int line; 790 int line;
780 struct mfc_cache *uc, *c, **cp; 791 struct mfc_cache *uc, *c, **cp;
781 792
782 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); 793 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
783 794
784 for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { 795 for (cp = &net->ipv4.mfc_cache_array[line];
796 (c = *cp) != NULL; cp = &c->next) {
785 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 797 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
786 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) 798 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
787 break; 799 break;
@@ -800,7 +812,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
800 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) 812 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
801 return -EINVAL; 813 return -EINVAL;
802 814
803 c = ipmr_cache_alloc(); 815 c = ipmr_cache_alloc(net);
804 if (c == NULL) 816 if (c == NULL)
805 return -ENOMEM; 817 return -ENOMEM;
806 818
@@ -812,8 +824,8 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
812 c->mfc_flags |= MFC_STATIC; 824 c->mfc_flags |= MFC_STATIC;
813 825
814 write_lock_bh(&mrt_lock); 826 write_lock_bh(&mrt_lock);
815 c->next = mfc_cache_array[line]; 827 c->next = net->ipv4.mfc_cache_array[line];
816 mfc_cache_array[line] = c; 828 net->ipv4.mfc_cache_array[line] = c;
817 write_unlock_bh(&mrt_lock); 829 write_unlock_bh(&mrt_lock);
818 830
819 /* 831 /*
@@ -823,19 +835,21 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
823 spin_lock_bh(&mfc_unres_lock); 835 spin_lock_bh(&mfc_unres_lock);
824 for (cp = &mfc_unres_queue; (uc=*cp) != NULL; 836 for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
825 cp = &uc->next) { 837 cp = &uc->next) {
826 if (uc->mfc_origin == c->mfc_origin && 838 if (net_eq(mfc_net(uc), net) &&
839 uc->mfc_origin == c->mfc_origin &&
827 uc->mfc_mcastgrp == c->mfc_mcastgrp) { 840 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
828 *cp = uc->next; 841 *cp = uc->next;
829 if (atomic_dec_and_test(&cache_resolve_queue_len)) 842 atomic_dec(&net->ipv4.cache_resolve_queue_len);
830 del_timer(&ipmr_expire_timer);
831 break; 843 break;
832 } 844 }
833 } 845 }
846 if (mfc_unres_queue == NULL)
847 del_timer(&ipmr_expire_timer);
834 spin_unlock_bh(&mfc_unres_lock); 848 spin_unlock_bh(&mfc_unres_lock);
835 849
836 if (uc) { 850 if (uc) {
837 ipmr_cache_resolve(uc, c); 851 ipmr_cache_resolve(uc, c);
838 kmem_cache_free(mrt_cachep, uc); 852 ipmr_cache_free(uc);
839 } 853 }
840 return 0; 854 return 0;
841} 855}
@@ -844,16 +858,16 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
844 * Close the multicast socket, and clear the vif tables etc 858 * Close the multicast socket, and clear the vif tables etc
845 */ 859 */
846 860
847static void mroute_clean_tables(struct sock *sk) 861static void mroute_clean_tables(struct net *net)
848{ 862{
849 int i; 863 int i;
850 864
851 /* 865 /*
852 * Shut down all active vif entries 866 * Shut down all active vif entries
853 */ 867 */
854 for (i=0; i<maxvif; i++) { 868 for (i = 0; i < net->ipv4.maxvif; i++) {
855 if (!(vif_table[i].flags&VIFF_STATIC)) 869 if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
856 vif_delete(i, 0); 870 vif_delete(net, i, 0);
857 } 871 }
858 872
859 /* 873 /*
@@ -862,7 +876,7 @@ static void mroute_clean_tables(struct sock *sk)
862 for (i=0; i<MFC_LINES; i++) { 876 for (i=0; i<MFC_LINES; i++) {
863 struct mfc_cache *c, **cp; 877 struct mfc_cache *c, **cp;
864 878
865 cp = &mfc_cache_array[i]; 879 cp = &net->ipv4.mfc_cache_array[i];
866 while ((c = *cp) != NULL) { 880 while ((c = *cp) != NULL) {
867 if (c->mfc_flags&MFC_STATIC) { 881 if (c->mfc_flags&MFC_STATIC) {
868 cp = &c->next; 882 cp = &c->next;
@@ -872,22 +886,23 @@ static void mroute_clean_tables(struct sock *sk)
872 *cp = c->next; 886 *cp = c->next;
873 write_unlock_bh(&mrt_lock); 887 write_unlock_bh(&mrt_lock);
874 888
875 kmem_cache_free(mrt_cachep, c); 889 ipmr_cache_free(c);
876 } 890 }
877 } 891 }
878 892
879 if (atomic_read(&cache_resolve_queue_len) != 0) { 893 if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
880 struct mfc_cache *c; 894 struct mfc_cache *c, **cp;
881 895
882 spin_lock_bh(&mfc_unres_lock); 896 spin_lock_bh(&mfc_unres_lock);
883 while (mfc_unres_queue != NULL) { 897 cp = &mfc_unres_queue;
884 c = mfc_unres_queue; 898 while ((c = *cp) != NULL) {
885 mfc_unres_queue = c->next; 899 if (!net_eq(mfc_net(c), net)) {
886 spin_unlock_bh(&mfc_unres_lock); 900 cp = &c->next;
901 continue;
902 }
903 *cp = c->next;
887 904
888 ipmr_destroy_unres(c); 905 ipmr_destroy_unres(c);
889
890 spin_lock_bh(&mfc_unres_lock);
891 } 906 }
892 spin_unlock_bh(&mfc_unres_lock); 907 spin_unlock_bh(&mfc_unres_lock);
893 } 908 }
@@ -895,15 +910,17 @@ static void mroute_clean_tables(struct sock *sk)
895 910
896static void mrtsock_destruct(struct sock *sk) 911static void mrtsock_destruct(struct sock *sk)
897{ 912{
913 struct net *net = sock_net(sk);
914
898 rtnl_lock(); 915 rtnl_lock();
899 if (sk == mroute_socket) { 916 if (sk == net->ipv4.mroute_sk) {
900 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--; 917 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
901 918
902 write_lock_bh(&mrt_lock); 919 write_lock_bh(&mrt_lock);
903 mroute_socket = NULL; 920 net->ipv4.mroute_sk = NULL;
904 write_unlock_bh(&mrt_lock); 921 write_unlock_bh(&mrt_lock);
905 922
906 mroute_clean_tables(sk); 923 mroute_clean_tables(net);
907 } 924 }
908 rtnl_unlock(); 925 rtnl_unlock();
909} 926}
@@ -920,9 +937,10 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
920 int ret; 937 int ret;
921 struct vifctl vif; 938 struct vifctl vif;
922 struct mfcctl mfc; 939 struct mfcctl mfc;
940 struct net *net = sock_net(sk);
923 941
924 if (optname != MRT_INIT) { 942 if (optname != MRT_INIT) {
925 if (sk != mroute_socket && !capable(CAP_NET_ADMIN)) 943 if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
926 return -EACCES; 944 return -EACCES;
927 } 945 }
928 946
@@ -935,7 +953,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
935 return -ENOPROTOOPT; 953 return -ENOPROTOOPT;
936 954
937 rtnl_lock(); 955 rtnl_lock();
938 if (mroute_socket) { 956 if (net->ipv4.mroute_sk) {
939 rtnl_unlock(); 957 rtnl_unlock();
940 return -EADDRINUSE; 958 return -EADDRINUSE;
941 } 959 }
@@ -943,15 +961,15 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
943 ret = ip_ra_control(sk, 1, mrtsock_destruct); 961 ret = ip_ra_control(sk, 1, mrtsock_destruct);
944 if (ret == 0) { 962 if (ret == 0) {
945 write_lock_bh(&mrt_lock); 963 write_lock_bh(&mrt_lock);
946 mroute_socket = sk; 964 net->ipv4.mroute_sk = sk;
947 write_unlock_bh(&mrt_lock); 965 write_unlock_bh(&mrt_lock);
948 966
949 IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++; 967 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
950 } 968 }
951 rtnl_unlock(); 969 rtnl_unlock();
952 return ret; 970 return ret;
953 case MRT_DONE: 971 case MRT_DONE:
954 if (sk != mroute_socket) 972 if (sk != net->ipv4.mroute_sk)
955 return -EACCES; 973 return -EACCES;
956 return ip_ra_control(sk, 0, NULL); 974 return ip_ra_control(sk, 0, NULL);
957 case MRT_ADD_VIF: 975 case MRT_ADD_VIF:
@@ -964,9 +982,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
964 return -ENFILE; 982 return -ENFILE;
965 rtnl_lock(); 983 rtnl_lock();
966 if (optname == MRT_ADD_VIF) { 984 if (optname == MRT_ADD_VIF) {
967 ret = vif_add(&vif, sk==mroute_socket); 985 ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
968 } else { 986 } else {
969 ret = vif_delete(vif.vifc_vifi, 0); 987 ret = vif_delete(net, vif.vifc_vifi, 0);
970 } 988 }
971 rtnl_unlock(); 989 rtnl_unlock();
972 return ret; 990 return ret;
@@ -983,9 +1001,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
983 return -EFAULT; 1001 return -EFAULT;
984 rtnl_lock(); 1002 rtnl_lock();
985 if (optname == MRT_DEL_MFC) 1003 if (optname == MRT_DEL_MFC)
986 ret = ipmr_mfc_delete(&mfc); 1004 ret = ipmr_mfc_delete(net, &mfc);
987 else 1005 else
988 ret = ipmr_mfc_add(&mfc, sk==mroute_socket); 1006 ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
989 rtnl_unlock(); 1007 rtnl_unlock();
990 return ret; 1008 return ret;
991 /* 1009 /*
@@ -996,7 +1014,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
996 int v; 1014 int v;
997 if (get_user(v,(int __user *)optval)) 1015 if (get_user(v,(int __user *)optval))
998 return -EFAULT; 1016 return -EFAULT;
999 mroute_do_assert=(v)?1:0; 1017 net->ipv4.mroute_do_assert = (v) ? 1 : 0;
1000 return 0; 1018 return 0;
1001 } 1019 }
1002#ifdef CONFIG_IP_PIMSM 1020#ifdef CONFIG_IP_PIMSM
@@ -1010,11 +1028,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int
1010 1028
1011 rtnl_lock(); 1029 rtnl_lock();
1012 ret = 0; 1030 ret = 0;
1013 if (v != mroute_do_pim) { 1031 if (v != net->ipv4.mroute_do_pim) {
1014 mroute_do_pim = v; 1032 net->ipv4.mroute_do_pim = v;
1015 mroute_do_assert = v; 1033 net->ipv4.mroute_do_assert = v;
1016#ifdef CONFIG_IP_PIMSM_V2 1034#ifdef CONFIG_IP_PIMSM_V2
1017 if (mroute_do_pim) 1035 if (net->ipv4.mroute_do_pim)
1018 ret = inet_add_protocol(&pim_protocol, 1036 ret = inet_add_protocol(&pim_protocol,
1019 IPPROTO_PIM); 1037 IPPROTO_PIM);
1020 else 1038 else
@@ -1045,6 +1063,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1045{ 1063{
1046 int olr; 1064 int olr;
1047 int val; 1065 int val;
1066 struct net *net = sock_net(sk);
1048 1067
1049 if (optname != MRT_VERSION && 1068 if (optname != MRT_VERSION &&
1050#ifdef CONFIG_IP_PIMSM 1069#ifdef CONFIG_IP_PIMSM
@@ -1066,10 +1085,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1066 val = 0x0305; 1085 val = 0x0305;
1067#ifdef CONFIG_IP_PIMSM 1086#ifdef CONFIG_IP_PIMSM
1068 else if (optname == MRT_PIM) 1087 else if (optname == MRT_PIM)
1069 val = mroute_do_pim; 1088 val = net->ipv4.mroute_do_pim;
1070#endif 1089#endif
1071 else 1090 else
1072 val = mroute_do_assert; 1091 val = net->ipv4.mroute_do_assert;
1073 if (copy_to_user(optval, &val, olr)) 1092 if (copy_to_user(optval, &val, olr))
1074 return -EFAULT; 1093 return -EFAULT;
1075 return 0; 1094 return 0;
@@ -1085,16 +1104,17 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1085 struct sioc_vif_req vr; 1104 struct sioc_vif_req vr;
1086 struct vif_device *vif; 1105 struct vif_device *vif;
1087 struct mfc_cache *c; 1106 struct mfc_cache *c;
1107 struct net *net = sock_net(sk);
1088 1108
1089 switch (cmd) { 1109 switch (cmd) {
1090 case SIOCGETVIFCNT: 1110 case SIOCGETVIFCNT:
1091 if (copy_from_user(&vr, arg, sizeof(vr))) 1111 if (copy_from_user(&vr, arg, sizeof(vr)))
1092 return -EFAULT; 1112 return -EFAULT;
1093 if (vr.vifi >= maxvif) 1113 if (vr.vifi >= net->ipv4.maxvif)
1094 return -EINVAL; 1114 return -EINVAL;
1095 read_lock(&mrt_lock); 1115 read_lock(&mrt_lock);
1096 vif=&vif_table[vr.vifi]; 1116 vif = &net->ipv4.vif_table[vr.vifi];
1097 if (VIF_EXISTS(vr.vifi)) { 1117 if (VIF_EXISTS(net, vr.vifi)) {
1098 vr.icount = vif->pkt_in; 1118 vr.icount = vif->pkt_in;
1099 vr.ocount = vif->pkt_out; 1119 vr.ocount = vif->pkt_out;
1100 vr.ibytes = vif->bytes_in; 1120 vr.ibytes = vif->bytes_in;
@@ -1112,7 +1132,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1112 return -EFAULT; 1132 return -EFAULT;
1113 1133
1114 read_lock(&mrt_lock); 1134 read_lock(&mrt_lock);
1115 c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr); 1135 c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
1116 if (c) { 1136 if (c) {
1117 sr.pktcnt = c->mfc_un.res.pkt; 1137 sr.pktcnt = c->mfc_un.res.pkt;
1118 sr.bytecnt = c->mfc_un.res.bytes; 1138 sr.bytecnt = c->mfc_un.res.bytes;
@@ -1134,18 +1154,19 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1134static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) 1154static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1135{ 1155{
1136 struct net_device *dev = ptr; 1156 struct net_device *dev = ptr;
1157 struct net *net = dev_net(dev);
1137 struct vif_device *v; 1158 struct vif_device *v;
1138 int ct; 1159 int ct;
1139 1160
1140 if (!net_eq(dev_net(dev), &init_net)) 1161 if (!net_eq(dev_net(dev), net))
1141 return NOTIFY_DONE; 1162 return NOTIFY_DONE;
1142 1163
1143 if (event != NETDEV_UNREGISTER) 1164 if (event != NETDEV_UNREGISTER)
1144 return NOTIFY_DONE; 1165 return NOTIFY_DONE;
1145 v=&vif_table[0]; 1166 v = &net->ipv4.vif_table[0];
1146 for (ct=0; ct<maxvif; ct++,v++) { 1167 for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
1147 if (v->dev == dev) 1168 if (v->dev == dev)
1148 vif_delete(ct, 1); 1169 vif_delete(net, ct, 1);
1149 } 1170 }
1150 return NOTIFY_DONE; 1171 return NOTIFY_DONE;
1151} 1172}
@@ -1205,8 +1226,9 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
1205 1226
1206static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) 1227static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1207{ 1228{
1229 struct net *net = mfc_net(c);
1208 const struct iphdr *iph = ip_hdr(skb); 1230 const struct iphdr *iph = ip_hdr(skb);
1209 struct vif_device *vif = &vif_table[vifi]; 1231 struct vif_device *vif = &net->ipv4.vif_table[vifi];
1210 struct net_device *dev; 1232 struct net_device *dev;
1211 struct rtable *rt; 1233 struct rtable *rt;
1212 int encap = 0; 1234 int encap = 0;
@@ -1220,9 +1242,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1220 vif->bytes_out += skb->len; 1242 vif->bytes_out += skb->len;
1221 vif->dev->stats.tx_bytes += skb->len; 1243 vif->dev->stats.tx_bytes += skb->len;
1222 vif->dev->stats.tx_packets++; 1244 vif->dev->stats.tx_packets++;
1223 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); 1245 ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
1224 kfree_skb(skb); 1246 goto out_free;
1225 return;
1226 } 1247 }
1227#endif 1248#endif
1228 1249
@@ -1233,7 +1254,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1233 .saddr = vif->local, 1254 .saddr = vif->local,
1234 .tos = RT_TOS(iph->tos) } }, 1255 .tos = RT_TOS(iph->tos) } },
1235 .proto = IPPROTO_IPIP }; 1256 .proto = IPPROTO_IPIP };
1236 if (ip_route_output_key(&init_net, &rt, &fl)) 1257 if (ip_route_output_key(net, &rt, &fl))
1237 goto out_free; 1258 goto out_free;
1238 encap = sizeof(struct iphdr); 1259 encap = sizeof(struct iphdr);
1239 } else { 1260 } else {
@@ -1242,7 +1263,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
1242 { .daddr = iph->daddr, 1263 { .daddr = iph->daddr,
1243 .tos = RT_TOS(iph->tos) } }, 1264 .tos = RT_TOS(iph->tos) } },
1244 .proto = IPPROTO_IPIP }; 1265 .proto = IPPROTO_IPIP };
1245 if (ip_route_output_key(&init_net, &rt, &fl)) 1266 if (ip_route_output_key(net, &rt, &fl))
1246 goto out_free; 1267 goto out_free;
1247 } 1268 }
1248 1269
@@ -1306,9 +1327,10 @@ out_free:
1306 1327
1307static int ipmr_find_vif(struct net_device *dev) 1328static int ipmr_find_vif(struct net_device *dev)
1308{ 1329{
1330 struct net *net = dev_net(dev);
1309 int ct; 1331 int ct;
1310 for (ct=maxvif-1; ct>=0; ct--) { 1332 for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
1311 if (vif_table[ct].dev == dev) 1333 if (net->ipv4.vif_table[ct].dev == dev)
1312 break; 1334 break;
1313 } 1335 }
1314 return ct; 1336 return ct;
@@ -1320,6 +1342,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1320{ 1342{
1321 int psend = -1; 1343 int psend = -1;
1322 int vif, ct; 1344 int vif, ct;
1345 struct net *net = mfc_net(cache);
1323 1346
1324 vif = cache->mfc_parent; 1347 vif = cache->mfc_parent;
1325 cache->mfc_un.res.pkt++; 1348 cache->mfc_un.res.pkt++;
@@ -1328,7 +1351,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1328 /* 1351 /*
1329 * Wrong interface: drop packet and (maybe) send PIM assert. 1352 * Wrong interface: drop packet and (maybe) send PIM assert.
1330 */ 1353 */
1331 if (vif_table[vif].dev != skb->dev) { 1354 if (net->ipv4.vif_table[vif].dev != skb->dev) {
1332 int true_vifi; 1355 int true_vifi;
1333 1356
1334 if (skb->rtable->fl.iif == 0) { 1357 if (skb->rtable->fl.iif == 0) {
@@ -1349,23 +1372,24 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
1349 cache->mfc_un.res.wrong_if++; 1372 cache->mfc_un.res.wrong_if++;
1350 true_vifi = ipmr_find_vif(skb->dev); 1373 true_vifi = ipmr_find_vif(skb->dev);
1351 1374
1352 if (true_vifi >= 0 && mroute_do_assert && 1375 if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
1353 /* pimsm uses asserts, when switching from RPT to SPT, 1376 /* pimsm uses asserts, when switching from RPT to SPT,
1354 so that we cannot check that packet arrived on an oif. 1377 so that we cannot check that packet arrived on an oif.
1355 It is bad, but otherwise we would need to move pretty 1378 It is bad, but otherwise we would need to move pretty
1356 large chunk of pimd to kernel. Ough... --ANK 1379 large chunk of pimd to kernel. Ough... --ANK
1357 */ 1380 */
1358 (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && 1381 (net->ipv4.mroute_do_pim ||
1382 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1359 time_after(jiffies, 1383 time_after(jiffies,
1360 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { 1384 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1361 cache->mfc_un.res.last_assert = jiffies; 1385 cache->mfc_un.res.last_assert = jiffies;
1362 ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF); 1386 ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
1363 } 1387 }
1364 goto dont_forward; 1388 goto dont_forward;
1365 } 1389 }
1366 1390
1367 vif_table[vif].pkt_in++; 1391 net->ipv4.vif_table[vif].pkt_in++;
1368 vif_table[vif].bytes_in += skb->len; 1392 net->ipv4.vif_table[vif].bytes_in += skb->len;
1369 1393
1370 /* 1394 /*
1371 * Forward the frame 1395 * Forward the frame
@@ -1405,6 +1429,7 @@ dont_forward:
1405int ip_mr_input(struct sk_buff *skb) 1429int ip_mr_input(struct sk_buff *skb)
1406{ 1430{
1407 struct mfc_cache *cache; 1431 struct mfc_cache *cache;
1432 struct net *net = dev_net(skb->dev);
1408 int local = skb->rtable->rt_flags&RTCF_LOCAL; 1433 int local = skb->rtable->rt_flags&RTCF_LOCAL;
1409 1434
1410 /* Packet is looped back after forward, it should not be 1435 /* Packet is looped back after forward, it should not be
@@ -1425,9 +1450,9 @@ int ip_mr_input(struct sk_buff *skb)
1425 that we can forward NO IGMP messages. 1450 that we can forward NO IGMP messages.
1426 */ 1451 */
1427 read_lock(&mrt_lock); 1452 read_lock(&mrt_lock);
1428 if (mroute_socket) { 1453 if (net->ipv4.mroute_sk) {
1429 nf_reset(skb); 1454 nf_reset(skb);
1430 raw_rcv(mroute_socket, skb); 1455 raw_rcv(net->ipv4.mroute_sk, skb);
1431 read_unlock(&mrt_lock); 1456 read_unlock(&mrt_lock);
1432 return 0; 1457 return 0;
1433 } 1458 }
@@ -1436,7 +1461,7 @@ int ip_mr_input(struct sk_buff *skb)
1436 } 1461 }
1437 1462
1438 read_lock(&mrt_lock); 1463 read_lock(&mrt_lock);
1439 cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1464 cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1440 1465
1441 /* 1466 /*
1442 * No usable cache entry 1467 * No usable cache entry
@@ -1456,7 +1481,7 @@ int ip_mr_input(struct sk_buff *skb)
1456 1481
1457 vif = ipmr_find_vif(skb->dev); 1482 vif = ipmr_find_vif(skb->dev);
1458 if (vif >= 0) { 1483 if (vif >= 0) {
1459 int err = ipmr_cache_unresolved(vif, skb); 1484 int err = ipmr_cache_unresolved(net, vif, skb);
1460 read_unlock(&mrt_lock); 1485 read_unlock(&mrt_lock);
1461 1486
1462 return err; 1487 return err;
@@ -1487,6 +1512,7 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1487{ 1512{
1488 struct net_device *reg_dev = NULL; 1513 struct net_device *reg_dev = NULL;
1489 struct iphdr *encap; 1514 struct iphdr *encap;
1515 struct net *net = dev_net(skb->dev);
1490 1516
1491 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 1517 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1492 /* 1518 /*
@@ -1501,8 +1527,8 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1501 return 1; 1527 return 1;
1502 1528
1503 read_lock(&mrt_lock); 1529 read_lock(&mrt_lock);
1504 if (reg_vif_num >= 0) 1530 if (net->ipv4.mroute_reg_vif_num >= 0)
1505 reg_dev = vif_table[reg_vif_num].dev; 1531 reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
1506 if (reg_dev) 1532 if (reg_dev)
1507 dev_hold(reg_dev); 1533 dev_hold(reg_dev);
1508 read_unlock(&mrt_lock); 1534 read_unlock(&mrt_lock);
@@ -1537,13 +1563,14 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
1537int pim_rcv_v1(struct sk_buff * skb) 1563int pim_rcv_v1(struct sk_buff * skb)
1538{ 1564{
1539 struct igmphdr *pim; 1565 struct igmphdr *pim;
1566 struct net *net = dev_net(skb->dev);
1540 1567
1541 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) 1568 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1542 goto drop; 1569 goto drop;
1543 1570
1544 pim = igmp_hdr(skb); 1571 pim = igmp_hdr(skb);
1545 1572
1546 if (!mroute_do_pim || 1573 if (!net->ipv4.mroute_do_pim ||
1547 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 1574 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1548 goto drop; 1575 goto drop;
1549 1576
@@ -1583,7 +1610,8 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1583{ 1610{
1584 int ct; 1611 int ct;
1585 struct rtnexthop *nhp; 1612 struct rtnexthop *nhp;
1586 struct net_device *dev = vif_table[c->mfc_parent].dev; 1613 struct net *net = mfc_net(c);
1614 struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev;
1587 u8 *b = skb_tail_pointer(skb); 1615 u8 *b = skb_tail_pointer(skb);
1588 struct rtattr *mp_head; 1616 struct rtattr *mp_head;
1589 1617
@@ -1599,7 +1627,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1599 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); 1627 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1600 nhp->rtnh_flags = 0; 1628 nhp->rtnh_flags = 0;
1601 nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; 1629 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1602 nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; 1630 nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
1603 nhp->rtnh_len = sizeof(*nhp); 1631 nhp->rtnh_len = sizeof(*nhp);
1604 } 1632 }
1605 } 1633 }
@@ -1613,14 +1641,15 @@ rtattr_failure:
1613 return -EMSGSIZE; 1641 return -EMSGSIZE;
1614} 1642}
1615 1643
1616int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) 1644int ipmr_get_route(struct net *net,
1645 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1617{ 1646{
1618 int err; 1647 int err;
1619 struct mfc_cache *cache; 1648 struct mfc_cache *cache;
1620 struct rtable *rt = skb->rtable; 1649 struct rtable *rt = skb->rtable;
1621 1650
1622 read_lock(&mrt_lock); 1651 read_lock(&mrt_lock);
1623 cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); 1652 cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
1624 1653
1625 if (cache == NULL) { 1654 if (cache == NULL) {
1626 struct sk_buff *skb2; 1655 struct sk_buff *skb2;
@@ -1651,7 +1680,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1651 iph->saddr = rt->rt_src; 1680 iph->saddr = rt->rt_src;
1652 iph->daddr = rt->rt_dst; 1681 iph->daddr = rt->rt_dst;
1653 iph->version = 0; 1682 iph->version = 0;
1654 err = ipmr_cache_unresolved(vif, skb2); 1683 err = ipmr_cache_unresolved(net, vif, skb2);
1655 read_unlock(&mrt_lock); 1684 read_unlock(&mrt_lock);
1656 return err; 1685 return err;
1657 } 1686 }
@@ -1668,17 +1697,19 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1668 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 1697 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1669 */ 1698 */
1670struct ipmr_vif_iter { 1699struct ipmr_vif_iter {
1700 struct seq_net_private p;
1671 int ct; 1701 int ct;
1672}; 1702};
1673 1703
1674static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter, 1704static struct vif_device *ipmr_vif_seq_idx(struct net *net,
1705 struct ipmr_vif_iter *iter,
1675 loff_t pos) 1706 loff_t pos)
1676{ 1707{
1677 for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { 1708 for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
1678 if (!VIF_EXISTS(iter->ct)) 1709 if (!VIF_EXISTS(net, iter->ct))
1679 continue; 1710 continue;
1680 if (pos-- == 0) 1711 if (pos-- == 0)
1681 return &vif_table[iter->ct]; 1712 return &net->ipv4.vif_table[iter->ct];
1682 } 1713 }
1683 return NULL; 1714 return NULL;
1684} 1715}
@@ -1686,23 +1717,26 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter,
1686static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) 1717static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
1687 __acquires(mrt_lock) 1718 __acquires(mrt_lock)
1688{ 1719{
1720 struct net *net = seq_file_net(seq);
1721
1689 read_lock(&mrt_lock); 1722 read_lock(&mrt_lock);
1690 return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) 1723 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
1691 : SEQ_START_TOKEN; 1724 : SEQ_START_TOKEN;
1692} 1725}
1693 1726
1694static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1727static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1695{ 1728{
1696 struct ipmr_vif_iter *iter = seq->private; 1729 struct ipmr_vif_iter *iter = seq->private;
1730 struct net *net = seq_file_net(seq);
1697 1731
1698 ++*pos; 1732 ++*pos;
1699 if (v == SEQ_START_TOKEN) 1733 if (v == SEQ_START_TOKEN)
1700 return ipmr_vif_seq_idx(iter, 0); 1734 return ipmr_vif_seq_idx(net, iter, 0);
1701 1735
1702 while (++iter->ct < maxvif) { 1736 while (++iter->ct < net->ipv4.maxvif) {
1703 if (!VIF_EXISTS(iter->ct)) 1737 if (!VIF_EXISTS(net, iter->ct))
1704 continue; 1738 continue;
1705 return &vif_table[iter->ct]; 1739 return &net->ipv4.vif_table[iter->ct];
1706 } 1740 }
1707 return NULL; 1741 return NULL;
1708} 1742}
@@ -1715,6 +1749,8 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
1715 1749
1716static int ipmr_vif_seq_show(struct seq_file *seq, void *v) 1750static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1717{ 1751{
1752 struct net *net = seq_file_net(seq);
1753
1718 if (v == SEQ_START_TOKEN) { 1754 if (v == SEQ_START_TOKEN) {
1719 seq_puts(seq, 1755 seq_puts(seq,
1720 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); 1756 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
@@ -1724,7 +1760,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
1724 1760
1725 seq_printf(seq, 1761 seq_printf(seq,
1726 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", 1762 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
1727 vif - vif_table, 1763 vif - net->ipv4.vif_table,
1728 name, vif->bytes_in, vif->pkt_in, 1764 name, vif->bytes_in, vif->pkt_in,
1729 vif->bytes_out, vif->pkt_out, 1765 vif->bytes_out, vif->pkt_out,
1730 vif->flags, vif->local, vif->remote); 1766 vif->flags, vif->local, vif->remote);
@@ -1741,8 +1777,8 @@ static const struct seq_operations ipmr_vif_seq_ops = {
1741 1777
1742static int ipmr_vif_open(struct inode *inode, struct file *file) 1778static int ipmr_vif_open(struct inode *inode, struct file *file)
1743{ 1779{
1744 return seq_open_private(file, &ipmr_vif_seq_ops, 1780 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
1745 sizeof(struct ipmr_vif_iter)); 1781 sizeof(struct ipmr_vif_iter));
1746} 1782}
1747 1783
1748static const struct file_operations ipmr_vif_fops = { 1784static const struct file_operations ipmr_vif_fops = {
@@ -1750,23 +1786,26 @@ static const struct file_operations ipmr_vif_fops = {
1750 .open = ipmr_vif_open, 1786 .open = ipmr_vif_open,
1751 .read = seq_read, 1787 .read = seq_read,
1752 .llseek = seq_lseek, 1788 .llseek = seq_lseek,
1753 .release = seq_release_private, 1789 .release = seq_release_net,
1754}; 1790};
1755 1791
1756struct ipmr_mfc_iter { 1792struct ipmr_mfc_iter {
1793 struct seq_net_private p;
1757 struct mfc_cache **cache; 1794 struct mfc_cache **cache;
1758 int ct; 1795 int ct;
1759}; 1796};
1760 1797
1761 1798
1762static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) 1799static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
1800 struct ipmr_mfc_iter *it, loff_t pos)
1763{ 1801{
1764 struct mfc_cache *mfc; 1802 struct mfc_cache *mfc;
1765 1803
1766 it->cache = mfc_cache_array; 1804 it->cache = net->ipv4.mfc_cache_array;
1767 read_lock(&mrt_lock); 1805 read_lock(&mrt_lock);
1768 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) 1806 for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
1769 for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) 1807 for (mfc = net->ipv4.mfc_cache_array[it->ct];
1808 mfc; mfc = mfc->next)
1770 if (pos-- == 0) 1809 if (pos-- == 0)
1771 return mfc; 1810 return mfc;
1772 read_unlock(&mrt_lock); 1811 read_unlock(&mrt_lock);
@@ -1774,7 +1813,8 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1774 it->cache = &mfc_unres_queue; 1813 it->cache = &mfc_unres_queue;
1775 spin_lock_bh(&mfc_unres_lock); 1814 spin_lock_bh(&mfc_unres_lock);
1776 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) 1815 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
1777 if (pos-- == 0) 1816 if (net_eq(mfc_net(mfc), net) &&
1817 pos-- == 0)
1778 return mfc; 1818 return mfc;
1779 spin_unlock_bh(&mfc_unres_lock); 1819 spin_unlock_bh(&mfc_unres_lock);
1780 1820
@@ -1786,9 +1826,11 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
1786static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) 1826static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
1787{ 1827{
1788 struct ipmr_mfc_iter *it = seq->private; 1828 struct ipmr_mfc_iter *it = seq->private;
1829 struct net *net = seq_file_net(seq);
1830
1789 it->cache = NULL; 1831 it->cache = NULL;
1790 it->ct = 0; 1832 it->ct = 0;
1791 return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) 1833 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
1792 : SEQ_START_TOKEN; 1834 : SEQ_START_TOKEN;
1793} 1835}
1794 1836
@@ -1796,11 +1838,12 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1796{ 1838{
1797 struct mfc_cache *mfc = v; 1839 struct mfc_cache *mfc = v;
1798 struct ipmr_mfc_iter *it = seq->private; 1840 struct ipmr_mfc_iter *it = seq->private;
1841 struct net *net = seq_file_net(seq);
1799 1842
1800 ++*pos; 1843 ++*pos;
1801 1844
1802 if (v == SEQ_START_TOKEN) 1845 if (v == SEQ_START_TOKEN)
1803 return ipmr_mfc_seq_idx(seq->private, 0); 1846 return ipmr_mfc_seq_idx(net, seq->private, 0);
1804 1847
1805 if (mfc->next) 1848 if (mfc->next)
1806 return mfc->next; 1849 return mfc->next;
@@ -1808,10 +1851,10 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1808 if (it->cache == &mfc_unres_queue) 1851 if (it->cache == &mfc_unres_queue)
1809 goto end_of_list; 1852 goto end_of_list;
1810 1853
1811 BUG_ON(it->cache != mfc_cache_array); 1854 BUG_ON(it->cache != net->ipv4.mfc_cache_array);
1812 1855
1813 while (++it->ct < MFC_LINES) { 1856 while (++it->ct < MFC_LINES) {
1814 mfc = mfc_cache_array[it->ct]; 1857 mfc = net->ipv4.mfc_cache_array[it->ct];
1815 if (mfc) 1858 if (mfc)
1816 return mfc; 1859 return mfc;
1817 } 1860 }
@@ -1823,6 +1866,8 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1823 1866
1824 spin_lock_bh(&mfc_unres_lock); 1867 spin_lock_bh(&mfc_unres_lock);
1825 mfc = mfc_unres_queue; 1868 mfc = mfc_unres_queue;
1869 while (mfc && !net_eq(mfc_net(mfc), net))
1870 mfc = mfc->next;
1826 if (mfc) 1871 if (mfc)
1827 return mfc; 1872 return mfc;
1828 1873
@@ -1836,16 +1881,18 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1836static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) 1881static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
1837{ 1882{
1838 struct ipmr_mfc_iter *it = seq->private; 1883 struct ipmr_mfc_iter *it = seq->private;
1884 struct net *net = seq_file_net(seq);
1839 1885
1840 if (it->cache == &mfc_unres_queue) 1886 if (it->cache == &mfc_unres_queue)
1841 spin_unlock_bh(&mfc_unres_lock); 1887 spin_unlock_bh(&mfc_unres_lock);
1842 else if (it->cache == mfc_cache_array) 1888 else if (it->cache == net->ipv4.mfc_cache_array)
1843 read_unlock(&mrt_lock); 1889 read_unlock(&mrt_lock);
1844} 1890}
1845 1891
1846static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 1892static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1847{ 1893{
1848 int n; 1894 int n;
1895 struct net *net = seq_file_net(seq);
1849 1896
1850 if (v == SEQ_START_TOKEN) { 1897 if (v == SEQ_START_TOKEN) {
1851 seq_puts(seq, 1898 seq_puts(seq,
@@ -1866,9 +1913,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
1866 mfc->mfc_un.res.wrong_if); 1913 mfc->mfc_un.res.wrong_if);
1867 for (n = mfc->mfc_un.res.minvif; 1914 for (n = mfc->mfc_un.res.minvif;
1868 n < mfc->mfc_un.res.maxvif; n++ ) { 1915 n < mfc->mfc_un.res.maxvif; n++ ) {
1869 if (VIF_EXISTS(n) 1916 if (VIF_EXISTS(net, n) &&
1870 && mfc->mfc_un.res.ttls[n] < 255) 1917 mfc->mfc_un.res.ttls[n] < 255)
1871 seq_printf(seq, 1918 seq_printf(seq,
1872 " %2d:%-3d", 1919 " %2d:%-3d",
1873 n, mfc->mfc_un.res.ttls[n]); 1920 n, mfc->mfc_un.res.ttls[n]);
1874 } 1921 }
@@ -1892,8 +1939,8 @@ static const struct seq_operations ipmr_mfc_seq_ops = {
1892 1939
1893static int ipmr_mfc_open(struct inode *inode, struct file *file) 1940static int ipmr_mfc_open(struct inode *inode, struct file *file)
1894{ 1941{
1895 return seq_open_private(file, &ipmr_mfc_seq_ops, 1942 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
1896 sizeof(struct ipmr_mfc_iter)); 1943 sizeof(struct ipmr_mfc_iter));
1897} 1944}
1898 1945
1899static const struct file_operations ipmr_mfc_fops = { 1946static const struct file_operations ipmr_mfc_fops = {
@@ -1901,7 +1948,7 @@ static const struct file_operations ipmr_mfc_fops = {
1901 .open = ipmr_mfc_open, 1948 .open = ipmr_mfc_open,
1902 .read = seq_read, 1949 .read = seq_read,
1903 .llseek = seq_lseek, 1950 .llseek = seq_lseek,
1904 .release = seq_release_private, 1951 .release = seq_release_net,
1905}; 1952};
1906#endif 1953#endif
1907 1954
@@ -1915,6 +1962,65 @@ static struct net_protocol pim_protocol = {
1915/* 1962/*
1916 * Setup for IP multicast routing 1963 * Setup for IP multicast routing
1917 */ 1964 */
1965static int __net_init ipmr_net_init(struct net *net)
1966{
1967 int err = 0;
1968
1969 net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
1970 GFP_KERNEL);
1971 if (!net->ipv4.vif_table) {
1972 err = -ENOMEM;
1973 goto fail;
1974 }
1975
1976 /* Forwarding cache */
1977 net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
1978 sizeof(struct mfc_cache *),
1979 GFP_KERNEL);
1980 if (!net->ipv4.mfc_cache_array) {
1981 err = -ENOMEM;
1982 goto fail_mfc_cache;
1983 }
1984
1985#ifdef CONFIG_IP_PIMSM
1986 net->ipv4.mroute_reg_vif_num = -1;
1987#endif
1988
1989#ifdef CONFIG_PROC_FS
1990 err = -ENOMEM;
1991 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
1992 goto proc_vif_fail;
1993 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1994 goto proc_cache_fail;
1995#endif
1996 return 0;
1997
1998#ifdef CONFIG_PROC_FS
1999proc_cache_fail:
2000 proc_net_remove(net, "ip_mr_vif");
2001proc_vif_fail:
2002 kfree(net->ipv4.mfc_cache_array);
2003#endif
2004fail_mfc_cache:
2005 kfree(net->ipv4.vif_table);
2006fail:
2007 return err;
2008}
2009
2010static void __net_exit ipmr_net_exit(struct net *net)
2011{
2012#ifdef CONFIG_PROC_FS
2013 proc_net_remove(net, "ip_mr_cache");
2014 proc_net_remove(net, "ip_mr_vif");
2015#endif
2016 kfree(net->ipv4.mfc_cache_array);
2017 kfree(net->ipv4.vif_table);
2018}
2019
2020static struct pernet_operations ipmr_net_ops = {
2021 .init = ipmr_net_init,
2022 .exit = ipmr_net_exit,
2023};
1918 2024
1919int __init ip_mr_init(void) 2025int __init ip_mr_init(void)
1920{ 2026{
@@ -1927,26 +2033,20 @@ int __init ip_mr_init(void)
1927 if (!mrt_cachep) 2033 if (!mrt_cachep)
1928 return -ENOMEM; 2034 return -ENOMEM;
1929 2035
2036 err = register_pernet_subsys(&ipmr_net_ops);
2037 if (err)
2038 goto reg_pernet_fail;
2039
1930 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); 2040 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
1931 err = register_netdevice_notifier(&ip_mr_notifier); 2041 err = register_netdevice_notifier(&ip_mr_notifier);
1932 if (err) 2042 if (err)
1933 goto reg_notif_fail; 2043 goto reg_notif_fail;
1934#ifdef CONFIG_PROC_FS
1935 err = -ENOMEM;
1936 if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops))
1937 goto proc_vif_fail;
1938 if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops))
1939 goto proc_cache_fail;
1940#endif
1941 return 0; 2044 return 0;
1942#ifdef CONFIG_PROC_FS 2045
1943proc_cache_fail:
1944 proc_net_remove(&init_net, "ip_mr_vif");
1945proc_vif_fail:
1946 unregister_netdevice_notifier(&ip_mr_notifier);
1947#endif
1948reg_notif_fail: 2046reg_notif_fail:
1949 del_timer(&ipmr_expire_timer); 2047 del_timer(&ipmr_expire_timer);
2048 unregister_pernet_subsys(&ipmr_net_ops);
2049reg_pernet_fail:
1950 kmem_cache_destroy(mrt_cachep); 2050 kmem_cache_destroy(mrt_cachep);
1951 return err; 2051 return err;
1952} 2052}
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 3816e1dc9295..1833bdbf9805 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -31,7 +31,7 @@ config NF_CONNTRACK_PROC_COMPAT
31 default y 31 default y
32 help 32 help
33 This option enables /proc and sysctl compatibility with the old 33 This option enables /proc and sysctl compatibility with the old
34 layer 3 dependant connection tracking. This is needed to keep 34 layer 3 dependent connection tracking. This is needed to keep
35 old programs that have not been adapted to the new names working. 35 old programs that have not been adapted to the new names working.
36 36
37 If unsure, say Y. 37 If unsure, say Y.
@@ -95,11 +95,11 @@ config IP_NF_MATCH_ECN
95config IP_NF_MATCH_TTL 95config IP_NF_MATCH_TTL
96 tristate '"ttl" match support' 96 tristate '"ttl" match support'
97 depends on NETFILTER_ADVANCED 97 depends on NETFILTER_ADVANCED
98 help 98 select NETFILTER_XT_MATCH_HL
99 This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user 99 ---help---
100 to match packets by their TTL value. 100 This is a backwards-compat option for the user's convenience
101 101 (e.g. when running oldconfig). It selects
102 To compile it as a module, choose M here. If unsure, say N. 102 CONFIG_NETFILTER_XT_MATCH_HL.
103 103
104# `filter', generic and specific targets 104# `filter', generic and specific targets
105config IP_NF_FILTER 105config IP_NF_FILTER
@@ -323,19 +323,13 @@ config IP_NF_TARGET_ECN
323 To compile it as a module, choose M here. If unsure, say N. 323 To compile it as a module, choose M here. If unsure, say N.
324 324
325config IP_NF_TARGET_TTL 325config IP_NF_TARGET_TTL
326 tristate 'TTL target support' 326 tristate '"TTL" target support'
327 depends on IP_NF_MANGLE
328 depends on NETFILTER_ADVANCED 327 depends on NETFILTER_ADVANCED
329 help 328 select NETFILTER_XT_TARGET_HL
330 This option adds a `TTL' target, which enables the user to modify 329 ---help---
331 the TTL value of the IP header. 330 This is a backwards-compat option for the user's convenience
332 331 (e.g. when running oldconfig). It selects
333 While it is safe to decrement/lower the TTL, this target also enables 332 CONFIG_NETFILTER_XT_TARGET_HL.
334 functionality to increment and set the TTL value of the IP header to
335 arbitrary values. This is EXTREMELY DANGEROUS since you can easily
336 create immortal packets that loop forever on the network.
337
338 To compile it as a module, choose M here. If unsure, say N.
339 333
340# raw + specific targets 334# raw + specific targets
341config IP_NF_RAW 335config IP_NF_RAW
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 5f9b650d90fc..48111594ee9b 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -51,7 +51,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
51obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o 51obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
52obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o 52obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
53obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o 53obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
54obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
55 54
56# targets 55# targets
57obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o 56obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
@@ -61,7 +60,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
61obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o 60obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
62obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o 61obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
63obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o 62obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
64obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
65obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o 63obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
66 64
67# generic ARP tables 65# generic ARP tables
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 7ea88b61cb0d..35c5f6a5cb7c 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -73,6 +73,28 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
73 return (ret != 0); 73 return (ret != 0);
74} 74}
75 75
76/*
77 * Unfortunatly, _b and _mask are not aligned to an int (or long int)
78 * Some arches dont care, unrolling the loop is a win on them.
79 * For other arches, we only have a 16bit alignement.
80 */
81static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask)
82{
83#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
84 unsigned long ret = ifname_compare_aligned(_a, _b, _mask);
85#else
86 unsigned long ret = 0;
87 const u16 *a = (const u16 *)_a;
88 const u16 *b = (const u16 *)_b;
89 const u16 *mask = (const u16 *)_mask;
90 int i;
91
92 for (i = 0; i < IFNAMSIZ/sizeof(u16); i++)
93 ret |= (a[i] ^ b[i]) & mask[i];
94#endif
95 return ret;
96}
97
76/* Returns whether packet matches rule or not. */ 98/* Returns whether packet matches rule or not. */
77static inline int arp_packet_match(const struct arphdr *arphdr, 99static inline int arp_packet_match(const struct arphdr *arphdr,
78 struct net_device *dev, 100 struct net_device *dev,
@@ -83,7 +105,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
83 const char *arpptr = (char *)(arphdr + 1); 105 const char *arpptr = (char *)(arphdr + 1);
84 const char *src_devaddr, *tgt_devaddr; 106 const char *src_devaddr, *tgt_devaddr;
85 __be32 src_ipaddr, tgt_ipaddr; 107 __be32 src_ipaddr, tgt_ipaddr;
86 int i, ret; 108 long ret;
87 109
88#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) 110#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
89 111
@@ -156,10 +178,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
156 } 178 }
157 179
158 /* Look for ifname matches. */ 180 /* Look for ifname matches. */
159 for (i = 0, ret = 0; i < IFNAMSIZ; i++) { 181 ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask);
160 ret |= (indev[i] ^ arpinfo->iniface[i])
161 & arpinfo->iniface_mask[i];
162 }
163 182
164 if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { 183 if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
165 dprintf("VIA in mismatch (%s vs %s).%s\n", 184 dprintf("VIA in mismatch (%s vs %s).%s\n",
@@ -168,10 +187,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
168 return 0; 187 return 0;
169 } 188 }
170 189
171 for (i = 0, ret = 0; i < IFNAMSIZ; i++) { 190 ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask);
172 ret |= (outdev[i] ^ arpinfo->outiface[i])
173 & arpinfo->outiface_mask[i];
174 }
175 191
176 if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { 192 if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
177 dprintf("VIA out mismatch (%s vs %s).%s\n", 193 dprintf("VIA out mismatch (%s vs %s).%s\n",
@@ -221,7 +237,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
221 const struct net_device *out, 237 const struct net_device *out,
222 struct xt_table *table) 238 struct xt_table *table)
223{ 239{
224 static const char nulldevname[IFNAMSIZ]; 240 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
225 unsigned int verdict = NF_DROP; 241 unsigned int verdict = NF_DROP;
226 const struct arphdr *arp; 242 const struct arphdr *arp;
227 bool hotdrop = false; 243 bool hotdrop = false;
@@ -237,9 +253,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
237 indev = in ? in->name : nulldevname; 253 indev = in ? in->name : nulldevname;
238 outdev = out ? out->name : nulldevname; 254 outdev = out ? out->name : nulldevname;
239 255
240 read_lock_bh(&table->lock); 256 rcu_read_lock();
241 private = table->private; 257 private = rcu_dereference(table->private);
242 table_base = (void *)private->entries[smp_processor_id()]; 258 table_base = rcu_dereference(private->entries[smp_processor_id()]);
259
243 e = get_entry(table_base, private->hook_entry[hook]); 260 e = get_entry(table_base, private->hook_entry[hook]);
244 back = get_entry(table_base, private->underflow[hook]); 261 back = get_entry(table_base, private->underflow[hook]);
245 262
@@ -311,7 +328,8 @@ unsigned int arpt_do_table(struct sk_buff *skb,
311 e = (void *)e + e->next_offset; 328 e = (void *)e + e->next_offset;
312 } 329 }
313 } while (!hotdrop); 330 } while (!hotdrop);
314 read_unlock_bh(&table->lock); 331
332 rcu_read_unlock();
315 333
316 if (hotdrop) 334 if (hotdrop)
317 return NF_DROP; 335 return NF_DROP;
@@ -374,7 +392,9 @@ static int mark_source_chains(struct xt_table_info *newinfo,
374 && unconditional(&e->arp)) || visited) { 392 && unconditional(&e->arp)) || visited) {
375 unsigned int oldpos, size; 393 unsigned int oldpos, size;
376 394
377 if (t->verdict < -NF_MAX_VERDICT - 1) { 395 if ((strcmp(t->target.u.user.name,
396 ARPT_STANDARD_TARGET) == 0) &&
397 t->verdict < -NF_MAX_VERDICT - 1) {
378 duprintf("mark_source_chains: bad " 398 duprintf("mark_source_chains: bad "
379 "negative verdict (%i)\n", 399 "negative verdict (%i)\n",
380 t->verdict); 400 t->verdict);
@@ -714,11 +734,65 @@ static void get_counters(const struct xt_table_info *t,
714 } 734 }
715} 735}
716 736
717static inline struct xt_counters *alloc_counters(struct xt_table *table) 737
738/* We're lazy, and add to the first CPU; overflow works its fey magic
739 * and everything is OK. */
740static int
741add_counter_to_entry(struct arpt_entry *e,
742 const struct xt_counters addme[],
743 unsigned int *i)
744{
745 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
746
747 (*i)++;
748 return 0;
749}
750
751/* Take values from counters and add them back onto the current cpu */
752static void put_counters(struct xt_table_info *t,
753 const struct xt_counters counters[])
754{
755 unsigned int i, cpu;
756
757 local_bh_disable();
758 cpu = smp_processor_id();
759 i = 0;
760 ARPT_ENTRY_ITERATE(t->entries[cpu],
761 t->size,
762 add_counter_to_entry,
763 counters,
764 &i);
765 local_bh_enable();
766}
767
768static inline int
769zero_entry_counter(struct arpt_entry *e, void *arg)
770{
771 e->counters.bcnt = 0;
772 e->counters.pcnt = 0;
773 return 0;
774}
775
776static void
777clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
778{
779 unsigned int cpu;
780 const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
781
782 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
783 for_each_possible_cpu(cpu) {
784 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
785 ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
786 zero_entry_counter, NULL);
787 }
788}
789
790static struct xt_counters *alloc_counters(struct xt_table *table)
718{ 791{
719 unsigned int countersize; 792 unsigned int countersize;
720 struct xt_counters *counters; 793 struct xt_counters *counters;
721 const struct xt_table_info *private = table->private; 794 struct xt_table_info *private = table->private;
795 struct xt_table_info *info;
722 796
723 /* We need atomic snapshot of counters: rest doesn't change 797 /* We need atomic snapshot of counters: rest doesn't change
724 * (other than comefrom, which userspace doesn't care 798 * (other than comefrom, which userspace doesn't care
@@ -728,14 +802,30 @@ static inline struct xt_counters *alloc_counters(struct xt_table *table)
728 counters = vmalloc_node(countersize, numa_node_id()); 802 counters = vmalloc_node(countersize, numa_node_id());
729 803
730 if (counters == NULL) 804 if (counters == NULL)
731 return ERR_PTR(-ENOMEM); 805 goto nomem;
806
807 info = xt_alloc_table_info(private->size);
808 if (!info)
809 goto free_counters;
810
811 clone_counters(info, private);
812
813 mutex_lock(&table->lock);
814 xt_table_entry_swap_rcu(private, info);
815 synchronize_net(); /* Wait until smoke has cleared */
816
817 get_counters(info, counters);
818 put_counters(private, counters);
819 mutex_unlock(&table->lock);
732 820
733 /* First, sum counters... */ 821 xt_free_table_info(info);
734 write_lock_bh(&table->lock);
735 get_counters(private, counters);
736 write_unlock_bh(&table->lock);
737 822
738 return counters; 823 return counters;
824
825 free_counters:
826 vfree(counters);
827 nomem:
828 return ERR_PTR(-ENOMEM);
739} 829}
740 830
741static int copy_entries_to_user(unsigned int total_size, 831static int copy_entries_to_user(unsigned int total_size,
@@ -1075,20 +1165,6 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
1075 return ret; 1165 return ret;
1076} 1166}
1077 1167
1078/* We're lazy, and add to the first CPU; overflow works its fey magic
1079 * and everything is OK.
1080 */
1081static inline int add_counter_to_entry(struct arpt_entry *e,
1082 const struct xt_counters addme[],
1083 unsigned int *i)
1084{
1085
1086 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1087
1088 (*i)++;
1089 return 0;
1090}
1091
1092static int do_add_counters(struct net *net, void __user *user, unsigned int len, 1168static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1093 int compat) 1169 int compat)
1094{ 1170{
@@ -1148,13 +1224,14 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1148 goto free; 1224 goto free;
1149 } 1225 }
1150 1226
1151 write_lock_bh(&t->lock); 1227 mutex_lock(&t->lock);
1152 private = t->private; 1228 private = t->private;
1153 if (private->number != num_counters) { 1229 if (private->number != num_counters) {
1154 ret = -EINVAL; 1230 ret = -EINVAL;
1155 goto unlock_up_free; 1231 goto unlock_up_free;
1156 } 1232 }
1157 1233
1234 preempt_disable();
1158 i = 0; 1235 i = 0;
1159 /* Choose the copy that is on our node */ 1236 /* Choose the copy that is on our node */
1160 loc_cpu_entry = private->entries[smp_processor_id()]; 1237 loc_cpu_entry = private->entries[smp_processor_id()];
@@ -1163,8 +1240,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
1163 add_counter_to_entry, 1240 add_counter_to_entry,
1164 paddc, 1241 paddc,
1165 &i); 1242 &i);
1243 preempt_enable();
1166 unlock_up_free: 1244 unlock_up_free:
1167 write_unlock_bh(&t->lock); 1245 mutex_unlock(&t->lock);
1246
1168 xt_table_unlock(t); 1247 xt_table_unlock(t);
1169 module_put(t->me); 1248 module_put(t->me);
1170 free: 1249 free:
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index e091187e864f..6ecfdae7c589 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -48,8 +48,6 @@ static struct
48static struct xt_table packet_filter = { 48static struct xt_table packet_filter = {
49 .name = "filter", 49 .name = "filter",
50 .valid_hooks = FILTER_VALID_HOOKS, 50 .valid_hooks = FILTER_VALID_HOOKS,
51 .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
52 .private = NULL,
53 .me = THIS_MODULE, 51 .me = THIS_MODULE,
54 .af = NFPROTO_ARP, 52 .af = NFPROTO_ARP,
55}; 53};
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 432ce9d1c11c..5f22c91c6e15 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -24,6 +24,7 @@
24#include <linux/proc_fs.h> 24#include <linux/proc_fs.h>
25#include <linux/seq_file.h> 25#include <linux/seq_file.h>
26#include <linux/security.h> 26#include <linux/security.h>
27#include <linux/net.h>
27#include <linux/mutex.h> 28#include <linux/mutex.h>
28#include <net/net_namespace.h> 29#include <net/net_namespace.h>
29#include <net/sock.h> 30#include <net/sock.h>
@@ -640,6 +641,7 @@ static void __exit ip_queue_fini(void)
640MODULE_DESCRIPTION("IPv4 packet queue handler"); 641MODULE_DESCRIPTION("IPv4 packet queue handler");
641MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); 642MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
642MODULE_LICENSE("GPL"); 643MODULE_LICENSE("GPL");
644MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL);
643 645
644module_init(ip_queue_init); 646module_init(ip_queue_init);
645module_exit(ip_queue_fini); 647module_exit(ip_queue_fini);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ef8b6ca068b2..82ee7c9049ff 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -83,7 +83,6 @@ ip_packet_match(const struct iphdr *ip,
83 const struct ipt_ip *ipinfo, 83 const struct ipt_ip *ipinfo,
84 int isfrag) 84 int isfrag)
85{ 85{
86 size_t i;
87 unsigned long ret; 86 unsigned long ret;
88 87
89#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) 88#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
@@ -103,12 +102,7 @@ ip_packet_match(const struct iphdr *ip,
103 return false; 102 return false;
104 } 103 }
105 104
106 /* Look for ifname matches; this should unroll nicely. */ 105 ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
107 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
108 ret |= (((const unsigned long *)indev)[i]
109 ^ ((const unsigned long *)ipinfo->iniface)[i])
110 & ((const unsigned long *)ipinfo->iniface_mask)[i];
111 }
112 106
113 if (FWINV(ret != 0, IPT_INV_VIA_IN)) { 107 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
114 dprintf("VIA in mismatch (%s vs %s).%s\n", 108 dprintf("VIA in mismatch (%s vs %s).%s\n",
@@ -117,11 +111,7 @@ ip_packet_match(const struct iphdr *ip,
117 return false; 111 return false;
118 } 112 }
119 113
120 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { 114 ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
121 ret |= (((const unsigned long *)outdev)[i]
122 ^ ((const unsigned long *)ipinfo->outiface)[i])
123 & ((const unsigned long *)ipinfo->outiface_mask)[i];
124 }
125 115
126 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { 116 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
127 dprintf("VIA out mismatch (%s vs %s).%s\n", 117 dprintf("VIA out mismatch (%s vs %s).%s\n",
@@ -347,10 +337,12 @@ ipt_do_table(struct sk_buff *skb,
347 mtpar.family = tgpar.family = NFPROTO_IPV4; 337 mtpar.family = tgpar.family = NFPROTO_IPV4;
348 tgpar.hooknum = hook; 338 tgpar.hooknum = hook;
349 339
350 read_lock_bh(&table->lock);
351 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 340 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
352 private = table->private; 341
353 table_base = (void *)private->entries[smp_processor_id()]; 342 rcu_read_lock();
343 private = rcu_dereference(table->private);
344 table_base = rcu_dereference(private->entries[smp_processor_id()]);
345
354 e = get_entry(table_base, private->hook_entry[hook]); 346 e = get_entry(table_base, private->hook_entry[hook]);
355 347
356 /* For return from builtin chain */ 348 /* For return from builtin chain */
@@ -445,7 +437,7 @@ ipt_do_table(struct sk_buff *skb,
445 } 437 }
446 } while (!hotdrop); 438 } while (!hotdrop);
447 439
448 read_unlock_bh(&table->lock); 440 rcu_read_unlock();
449 441
450#ifdef DEBUG_ALLOW_ALL 442#ifdef DEBUG_ALLOW_ALL
451 return NF_ACCEPT; 443 return NF_ACCEPT;
@@ -496,7 +488,9 @@ mark_source_chains(struct xt_table_info *newinfo,
496 && unconditional(&e->ip)) || visited) { 488 && unconditional(&e->ip)) || visited) {
497 unsigned int oldpos, size; 489 unsigned int oldpos, size;
498 490
499 if (t->verdict < -NF_MAX_VERDICT - 1) { 491 if ((strcmp(t->target.u.user.name,
492 IPT_STANDARD_TARGET) == 0) &&
493 t->verdict < -NF_MAX_VERDICT - 1) {
500 duprintf("mark_source_chains: bad " 494 duprintf("mark_source_chains: bad "
501 "negative verdict (%i)\n", 495 "negative verdict (%i)\n",
502 t->verdict); 496 t->verdict);
@@ -924,13 +918,68 @@ get_counters(const struct xt_table_info *t,
924 counters, 918 counters,
925 &i); 919 &i);
926 } 920 }
921
922}
923
924/* We're lazy, and add to the first CPU; overflow works its fey magic
925 * and everything is OK. */
926static int
927add_counter_to_entry(struct ipt_entry *e,
928 const struct xt_counters addme[],
929 unsigned int *i)
930{
931 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
932
933 (*i)++;
934 return 0;
935}
936
937/* Take values from counters and add them back onto the current cpu */
938static void put_counters(struct xt_table_info *t,
939 const struct xt_counters counters[])
940{
941 unsigned int i, cpu;
942
943 local_bh_disable();
944 cpu = smp_processor_id();
945 i = 0;
946 IPT_ENTRY_ITERATE(t->entries[cpu],
947 t->size,
948 add_counter_to_entry,
949 counters,
950 &i);
951 local_bh_enable();
952}
953
954
955static inline int
956zero_entry_counter(struct ipt_entry *e, void *arg)
957{
958 e->counters.bcnt = 0;
959 e->counters.pcnt = 0;
960 return 0;
961}
962
963static void
964clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
965{
966 unsigned int cpu;
967 const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
968
969 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
970 for_each_possible_cpu(cpu) {
971 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
972 IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
973 zero_entry_counter, NULL);
974 }
927} 975}
928 976
929static struct xt_counters * alloc_counters(struct xt_table *table) 977static struct xt_counters * alloc_counters(struct xt_table *table)
930{ 978{
931 unsigned int countersize; 979 unsigned int countersize;
932 struct xt_counters *counters; 980 struct xt_counters *counters;
933 const struct xt_table_info *private = table->private; 981 struct xt_table_info *private = table->private;
982 struct xt_table_info *info;
934 983
935 /* We need atomic snapshot of counters: rest doesn't change 984 /* We need atomic snapshot of counters: rest doesn't change
936 (other than comefrom, which userspace doesn't care 985 (other than comefrom, which userspace doesn't care
@@ -939,14 +988,30 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
939 counters = vmalloc_node(countersize, numa_node_id()); 988 counters = vmalloc_node(countersize, numa_node_id());
940 989
941 if (counters == NULL) 990 if (counters == NULL)
942 return ERR_PTR(-ENOMEM); 991 goto nomem;
992
993 info = xt_alloc_table_info(private->size);
994 if (!info)
995 goto free_counters;
996
997 clone_counters(info, private);
943 998
944 /* First, sum counters... */ 999 mutex_lock(&table->lock);
945 write_lock_bh(&table->lock); 1000 xt_table_entry_swap_rcu(private, info);
946 get_counters(private, counters); 1001 synchronize_net(); /* Wait until smoke has cleared */
947 write_unlock_bh(&table->lock); 1002
1003 get_counters(info, counters);
1004 put_counters(private, counters);
1005 mutex_unlock(&table->lock);
1006
1007 xt_free_table_info(info);
948 1008
949 return counters; 1009 return counters;
1010
1011 free_counters:
1012 vfree(counters);
1013 nomem:
1014 return ERR_PTR(-ENOMEM);
950} 1015}
951 1016
952static int 1017static int
@@ -1312,27 +1377,6 @@ do_replace(struct net *net, void __user *user, unsigned int len)
1312 return ret; 1377 return ret;
1313} 1378}
1314 1379
1315/* We're lazy, and add to the first CPU; overflow works its fey magic
1316 * and everything is OK. */
1317static int
1318add_counter_to_entry(struct ipt_entry *e,
1319 const struct xt_counters addme[],
1320 unsigned int *i)
1321{
1322#if 0
1323 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1324 *i,
1325 (long unsigned int)e->counters.pcnt,
1326 (long unsigned int)e->counters.bcnt,
1327 (long unsigned int)addme[*i].pcnt,
1328 (long unsigned int)addme[*i].bcnt);
1329#endif
1330
1331 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1332
1333 (*i)++;
1334 return 0;
1335}
1336 1380
1337static int 1381static int
1338do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) 1382do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
@@ -1393,13 +1437,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
1393 goto free; 1437 goto free;
1394 } 1438 }
1395 1439
1396 write_lock_bh(&t->lock); 1440 mutex_lock(&t->lock);
1397 private = t->private; 1441 private = t->private;
1398 if (private->number != num_counters) { 1442 if (private->number != num_counters) {
1399 ret = -EINVAL; 1443 ret = -EINVAL;
1400 goto unlock_up_free; 1444 goto unlock_up_free;
1401 } 1445 }
1402 1446
1447 preempt_disable();
1403 i = 0; 1448 i = 0;
1404 /* Choose the copy that is on our node */ 1449 /* Choose the copy that is on our node */
1405 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1450 loc_cpu_entry = private->entries[raw_smp_processor_id()];
@@ -1408,8 +1453,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
1408 add_counter_to_entry, 1453 add_counter_to_entry,
1409 paddc, 1454 paddc,
1410 &i); 1455 &i);
1456 preempt_enable();
1411 unlock_up_free: 1457 unlock_up_free:
1412 write_unlock_bh(&t->lock); 1458 mutex_unlock(&t->lock);
1413 xt_table_unlock(t); 1459 xt_table_unlock(t);
1414 module_put(t->me); 1460 module_put(t->me);
1415 free: 1461 free:
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 27a78fbbd92b..acc44c69eb68 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -464,7 +464,7 @@ static struct xt_target log_tg_reg __read_mostly = {
464 .me = THIS_MODULE, 464 .me = THIS_MODULE,
465}; 465};
466 466
467static const struct nf_logger ipt_log_logger ={ 467static struct nf_logger ipt_log_logger __read_mostly = {
468 .name = "ipt_LOG", 468 .name = "ipt_LOG",
469 .logfn = &ipt_log_packet, 469 .logfn = &ipt_log_packet,
470 .me = THIS_MODULE, 470 .me = THIS_MODULE,
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
deleted file mode 100644
index 6d76aae90cc0..000000000000
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ /dev/null
@@ -1,97 +0,0 @@
1/* TTL modification target for IP tables
2 * (C) 2000,2005 by Harald Welte <laforge@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 *
8 */
9
10#include <linux/module.h>
11#include <linux/skbuff.h>
12#include <linux/ip.h>
13#include <net/checksum.h>
14
15#include <linux/netfilter/x_tables.h>
16#include <linux/netfilter_ipv4/ipt_TTL.h>
17
18MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
19MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target");
20MODULE_LICENSE("GPL");
21
22static unsigned int
23ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
24{
25 struct iphdr *iph;
26 const struct ipt_TTL_info *info = par->targinfo;
27 int new_ttl;
28
29 if (!skb_make_writable(skb, skb->len))
30 return NF_DROP;
31
32 iph = ip_hdr(skb);
33
34 switch (info->mode) {
35 case IPT_TTL_SET:
36 new_ttl = info->ttl;
37 break;
38 case IPT_TTL_INC:
39 new_ttl = iph->ttl + info->ttl;
40 if (new_ttl > 255)
41 new_ttl = 255;
42 break;
43 case IPT_TTL_DEC:
44 new_ttl = iph->ttl - info->ttl;
45 if (new_ttl < 0)
46 new_ttl = 0;
47 break;
48 default:
49 new_ttl = iph->ttl;
50 break;
51 }
52
53 if (new_ttl != iph->ttl) {
54 csum_replace2(&iph->check, htons(iph->ttl << 8),
55 htons(new_ttl << 8));
56 iph->ttl = new_ttl;
57 }
58
59 return XT_CONTINUE;
60}
61
62static bool ttl_tg_check(const struct xt_tgchk_param *par)
63{
64 const struct ipt_TTL_info *info = par->targinfo;
65
66 if (info->mode > IPT_TTL_MAXMODE) {
67 printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n",
68 info->mode);
69 return false;
70 }
71 if (info->mode != IPT_TTL_SET && info->ttl == 0)
72 return false;
73 return true;
74}
75
76static struct xt_target ttl_tg_reg __read_mostly = {
77 .name = "TTL",
78 .family = NFPROTO_IPV4,
79 .target = ttl_tg,
80 .targetsize = sizeof(struct ipt_TTL_info),
81 .table = "mangle",
82 .checkentry = ttl_tg_check,
83 .me = THIS_MODULE,
84};
85
86static int __init ttl_tg_init(void)
87{
88 return xt_register_target(&ttl_tg_reg);
89}
90
91static void __exit ttl_tg_exit(void)
92{
93 xt_unregister_target(&ttl_tg_reg);
94}
95
96module_init(ttl_tg_init);
97module_exit(ttl_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 18a2826b57c6..d32cc4bb328a 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -379,7 +379,7 @@ static struct xt_target ulog_tg_reg __read_mostly = {
379 .me = THIS_MODULE, 379 .me = THIS_MODULE,
380}; 380};
381 381
382static struct nf_logger ipt_ulog_logger = { 382static struct nf_logger ipt_ulog_logger __read_mostly = {
383 .name = "ipt_ULOG", 383 .name = "ipt_ULOG",
384 .logfn = ipt_logfn, 384 .logfn = ipt_logfn,
385 .me = THIS_MODULE, 385 .me = THIS_MODULE,
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
deleted file mode 100644
index 297f1cbf4ff5..000000000000
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ /dev/null
@@ -1,63 +0,0 @@
1/* IP tables module for matching the value of the TTL
2 *
3 * (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/ip.h>
11#include <linux/module.h>
12#include <linux/skbuff.h>
13
14#include <linux/netfilter_ipv4/ipt_ttl.h>
15#include <linux/netfilter/x_tables.h>
16
17MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
18MODULE_DESCRIPTION("Xtables: IPv4 TTL field match");
19MODULE_LICENSE("GPL");
20
21static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
22{
23 const struct ipt_ttl_info *info = par->matchinfo;
24 const u8 ttl = ip_hdr(skb)->ttl;
25
26 switch (info->mode) {
27 case IPT_TTL_EQ:
28 return ttl == info->ttl;
29 case IPT_TTL_NE:
30 return ttl != info->ttl;
31 case IPT_TTL_LT:
32 return ttl < info->ttl;
33 case IPT_TTL_GT:
34 return ttl > info->ttl;
35 default:
36 printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
37 info->mode);
38 return false;
39 }
40
41 return false;
42}
43
44static struct xt_match ttl_mt_reg __read_mostly = {
45 .name = "ttl",
46 .family = NFPROTO_IPV4,
47 .match = ttl_mt,
48 .matchsize = sizeof(struct ipt_ttl_info),
49 .me = THIS_MODULE,
50};
51
52static int __init ttl_mt_init(void)
53{
54 return xt_register_match(&ttl_mt_reg);
55}
56
57static void __exit ttl_mt_exit(void)
58{
59 xt_unregister_match(&ttl_mt_reg);
60}
61
62module_init(ttl_mt_init);
63module_exit(ttl_mt_exit);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 52cb6939d093..c30a969724f8 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -56,7 +56,6 @@ static struct
56static struct xt_table packet_filter = { 56static struct xt_table packet_filter = {
57 .name = "filter", 57 .name = "filter",
58 .valid_hooks = FILTER_VALID_HOOKS, 58 .valid_hooks = FILTER_VALID_HOOKS,
59 .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
60 .me = THIS_MODULE, 59 .me = THIS_MODULE,
61 .af = AF_INET, 60 .af = AF_INET,
62}; 61};
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 3929d20b9e45..4087614d9519 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -67,7 +67,6 @@ static struct
67static struct xt_table packet_mangler = { 67static struct xt_table packet_mangler = {
68 .name = "mangle", 68 .name = "mangle",
69 .valid_hooks = MANGLE_VALID_HOOKS, 69 .valid_hooks = MANGLE_VALID_HOOKS,
70 .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock),
71 .me = THIS_MODULE, 70 .me = THIS_MODULE,
72 .af = AF_INET, 71 .af = AF_INET,
73}; 72};
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 7f65d18333e3..e5356da1fb54 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -39,7 +39,6 @@ static struct
39static struct xt_table packet_raw = { 39static struct xt_table packet_raw = {
40 .name = "raw", 40 .name = "raw",
41 .valid_hooks = RAW_VALID_HOOKS, 41 .valid_hooks = RAW_VALID_HOOKS,
42 .lock = __RW_LOCK_UNLOCKED(packet_raw.lock),
43 .me = THIS_MODULE, 42 .me = THIS_MODULE,
44 .af = AF_INET, 43 .af = AF_INET,
45}; 44};
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index a52a35f4a584..29ab630f240a 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -60,7 +60,6 @@ static struct
60static struct xt_table security_table = { 60static struct xt_table security_table = {
61 .name = "security", 61 .name = "security",
62 .valid_hooks = SECURITY_VALID_HOOKS, 62 .valid_hooks = SECURITY_VALID_HOOKS,
63 .lock = __RW_LOCK_UNLOCKED(security_table.lock),
64 .me = THIS_MODULE, 63 .me = THIS_MODULE,
65 .af = AF_INET, 64 .af = AF_INET,
66}; 65};
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 4beb04fac588..7d2ead7228ac 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -120,8 +120,10 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
120 typeof(nf_nat_seq_adjust_hook) seq_adjust; 120 typeof(nf_nat_seq_adjust_hook) seq_adjust;
121 121
122 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); 122 seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
123 if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) 123 if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) {
124 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
124 return NF_DROP; 125 return NF_DROP;
126 }
125 } 127 }
126out: 128out:
127 /* We've seen it coming out the other side: confirm it */ 129 /* We've seen it coming out the other side: confirm it */
@@ -326,6 +328,11 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
326 328
327 return 0; 329 return 0;
328} 330}
331
332static int ipv4_nlattr_tuple_size(void)
333{
334 return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1);
335}
329#endif 336#endif
330 337
331static struct nf_sockopt_ops so_getorigdst = { 338static struct nf_sockopt_ops so_getorigdst = {
@@ -345,6 +352,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
345 .get_l4proto = ipv4_get_l4proto, 352 .get_l4proto = ipv4_get_l4proto,
346#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 353#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
347 .tuple_to_nlattr = ipv4_tuple_to_nlattr, 354 .tuple_to_nlattr = ipv4_tuple_to_nlattr,
355 .nlattr_tuple_size = ipv4_nlattr_tuple_size,
348 .nlattr_to_tuple = ipv4_nlattr_to_tuple, 356 .nlattr_to_tuple = ipv4_nlattr_to_tuple,
349 .nla_policy = ipv4_nla_policy, 357 .nla_policy = ipv4_nla_policy,
350#endif 358#endif
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 6ba5c557690c..8668a3defda6 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -25,40 +25,42 @@ struct ct_iter_state {
25 unsigned int bucket; 25 unsigned int bucket;
26}; 26};
27 27
28static struct hlist_node *ct_get_first(struct seq_file *seq) 28static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
29{ 29{
30 struct net *net = seq_file_net(seq); 30 struct net *net = seq_file_net(seq);
31 struct ct_iter_state *st = seq->private; 31 struct ct_iter_state *st = seq->private;
32 struct hlist_node *n; 32 struct hlist_nulls_node *n;
33 33
34 for (st->bucket = 0; 34 for (st->bucket = 0;
35 st->bucket < nf_conntrack_htable_size; 35 st->bucket < nf_conntrack_htable_size;
36 st->bucket++) { 36 st->bucket++) {
37 n = rcu_dereference(net->ct.hash[st->bucket].first); 37 n = rcu_dereference(net->ct.hash[st->bucket].first);
38 if (n) 38 if (!is_a_nulls(n))
39 return n; 39 return n;
40 } 40 }
41 return NULL; 41 return NULL;
42} 42}
43 43
44static struct hlist_node *ct_get_next(struct seq_file *seq, 44static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
45 struct hlist_node *head) 45 struct hlist_nulls_node *head)
46{ 46{
47 struct net *net = seq_file_net(seq); 47 struct net *net = seq_file_net(seq);
48 struct ct_iter_state *st = seq->private; 48 struct ct_iter_state *st = seq->private;
49 49
50 head = rcu_dereference(head->next); 50 head = rcu_dereference(head->next);
51 while (head == NULL) { 51 while (is_a_nulls(head)) {
52 if (++st->bucket >= nf_conntrack_htable_size) 52 if (likely(get_nulls_value(head) == st->bucket)) {
53 return NULL; 53 if (++st->bucket >= nf_conntrack_htable_size)
54 return NULL;
55 }
54 head = rcu_dereference(net->ct.hash[st->bucket].first); 56 head = rcu_dereference(net->ct.hash[st->bucket].first);
55 } 57 }
56 return head; 58 return head;
57} 59}
58 60
59static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) 61static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
60{ 62{
61 struct hlist_node *head = ct_get_first(seq); 63 struct hlist_nulls_node *head = ct_get_first(seq);
62 64
63 if (head) 65 if (head)
64 while (pos && (head = ct_get_next(seq, head))) 66 while (pos && (head = ct_get_next(seq, head)))
@@ -87,69 +89,76 @@ static void ct_seq_stop(struct seq_file *s, void *v)
87 89
88static int ct_seq_show(struct seq_file *s, void *v) 90static int ct_seq_show(struct seq_file *s, void *v)
89{ 91{
90 const struct nf_conntrack_tuple_hash *hash = v; 92 struct nf_conntrack_tuple_hash *hash = v;
91 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); 93 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
92 const struct nf_conntrack_l3proto *l3proto; 94 const struct nf_conntrack_l3proto *l3proto;
93 const struct nf_conntrack_l4proto *l4proto; 95 const struct nf_conntrack_l4proto *l4proto;
96 int ret = 0;
94 97
95 NF_CT_ASSERT(ct); 98 NF_CT_ASSERT(ct);
99 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
100 return 0;
101
96 102
97 /* we only want to print DIR_ORIGINAL */ 103 /* we only want to print DIR_ORIGINAL */
98 if (NF_CT_DIRECTION(hash)) 104 if (NF_CT_DIRECTION(hash))
99 return 0; 105 goto release;
100 if (nf_ct_l3num(ct) != AF_INET) 106 if (nf_ct_l3num(ct) != AF_INET)
101 return 0; 107 goto release;
102 108
103 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 109 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
104 NF_CT_ASSERT(l3proto); 110 NF_CT_ASSERT(l3proto);
105 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 111 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
106 NF_CT_ASSERT(l4proto); 112 NF_CT_ASSERT(l4proto);
107 113
114 ret = -ENOSPC;
108 if (seq_printf(s, "%-8s %u %ld ", 115 if (seq_printf(s, "%-8s %u %ld ",
109 l4proto->name, nf_ct_protonum(ct), 116 l4proto->name, nf_ct_protonum(ct),
110 timer_pending(&ct->timeout) 117 timer_pending(&ct->timeout)
111 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) 118 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
112 return -ENOSPC; 119 goto release;
113 120
114 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) 121 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
115 return -ENOSPC; 122 goto release;
116 123
117 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 124 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
118 l3proto, l4proto)) 125 l3proto, l4proto))
119 return -ENOSPC; 126 goto release;
120 127
121 if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) 128 if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
122 return -ENOSPC; 129 goto release;
123 130
124 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) 131 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
125 if (seq_printf(s, "[UNREPLIED] ")) 132 if (seq_printf(s, "[UNREPLIED] "))
126 return -ENOSPC; 133 goto release;
127 134
128 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 135 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
129 l3proto, l4proto)) 136 l3proto, l4proto))
130 return -ENOSPC; 137 goto release;
131 138
132 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) 139 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
133 return -ENOSPC; 140 goto release;
134 141
135 if (test_bit(IPS_ASSURED_BIT, &ct->status)) 142 if (test_bit(IPS_ASSURED_BIT, &ct->status))
136 if (seq_printf(s, "[ASSURED] ")) 143 if (seq_printf(s, "[ASSURED] "))
137 return -ENOSPC; 144 goto release;
138 145
139#ifdef CONFIG_NF_CONNTRACK_MARK 146#ifdef CONFIG_NF_CONNTRACK_MARK
140 if (seq_printf(s, "mark=%u ", ct->mark)) 147 if (seq_printf(s, "mark=%u ", ct->mark))
141 return -ENOSPC; 148 goto release;
142#endif 149#endif
143 150
144#ifdef CONFIG_NF_CONNTRACK_SECMARK 151#ifdef CONFIG_NF_CONNTRACK_SECMARK
145 if (seq_printf(s, "secmark=%u ", ct->secmark)) 152 if (seq_printf(s, "secmark=%u ", ct->secmark))
146 return -ENOSPC; 153 goto release;
147#endif 154#endif
148 155
149 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) 156 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
150 return -ENOSPC; 157 goto release;
151 158 ret = 0;
152 return 0; 159release:
160 nf_ct_put(ct);
161 return ret;
153} 162}
154 163
155static const struct seq_operations ct_seq_ops = { 164static const struct seq_operations ct_seq_ops = {
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 2a8bee26f43d..23b2c2ee869a 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -262,6 +262,11 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[],
262 262
263 return 0; 263 return 0;
264} 264}
265
266static int icmp_nlattr_tuple_size(void)
267{
268 return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
269}
265#endif 270#endif
266 271
267#ifdef CONFIG_SYSCTL 272#ifdef CONFIG_SYSCTL
@@ -309,6 +314,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
309 .me = NULL, 314 .me = NULL,
310#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 315#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
311 .tuple_to_nlattr = icmp_tuple_to_nlattr, 316 .tuple_to_nlattr = icmp_tuple_to_nlattr,
317 .nlattr_tuple_size = icmp_nlattr_tuple_size,
312 .nlattr_to_tuple = icmp_nlattr_to_tuple, 318 .nlattr_to_tuple = icmp_nlattr_to_tuple,
313 .nla_policy = icmp_nla_policy, 319 .nla_policy = icmp_nla_policy,
314#endif 320#endif
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index a65cf692359f..fe65187810f0 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -679,7 +679,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
679static int __net_init nf_nat_net_init(struct net *net) 679static int __net_init nf_nat_net_init(struct net *net)
680{ 680{
681 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 681 net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
682 &net->ipv4.nat_vmalloced); 682 &net->ipv4.nat_vmalloced, 0);
683 if (!net->ipv4.nat_bysource) 683 if (!net->ipv4.nat_bysource)
684 return -ENOMEM; 684 return -ENOMEM;
685 return 0; 685 return 0;
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index a7eb04719044..6348a793936e 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -61,7 +61,6 @@ static struct
61static struct xt_table nat_table = { 61static struct xt_table nat_table = {
62 .name = "nat", 62 .name = "nat",
63 .valid_hooks = NAT_VALID_HOOKS, 63 .valid_hooks = NAT_VALID_HOOKS,
64 .lock = __RW_LOCK_UNLOCKED(nat_table.lock),
65 .me = THIS_MODULE, 64 .me = THIS_MODULE,
66 .af = AF_INET, 65 .af = AF_INET,
67}; 66};
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 182f845de92f..d9521f6f9ed0 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1292,7 +1292,7 @@ static struct nf_conntrack_helper snmp_helper __read_mostly = {
1292 .expect_policy = &snmp_exp_policy, 1292 .expect_policy = &snmp_exp_policy,
1293 .name = "snmp", 1293 .name = "snmp",
1294 .tuple.src.l3num = AF_INET, 1294 .tuple.src.l3num = AF_INET,
1295 .tuple.src.u.udp.port = __constant_htons(SNMP_PORT), 1295 .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
1296 .tuple.dst.protonum = IPPROTO_UDP, 1296 .tuple.dst.protonum = IPPROTO_UDP,
1297}; 1297};
1298 1298
@@ -1302,7 +1302,7 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
1302 .expect_policy = &snmp_exp_policy, 1302 .expect_policy = &snmp_exp_policy,
1303 .name = "snmp_trap", 1303 .name = "snmp_trap",
1304 .tuple.src.l3num = AF_INET, 1304 .tuple.src.l3num = AF_INET,
1305 .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT), 1305 .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT),
1306 .tuple.dst.protonum = IPPROTO_UDP, 1306 .tuple.dst.protonum = IPPROTO_UDP,
1307}; 1307};
1308 1308
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index eb62e58bff79..cf0cdeeb1db0 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -54,8 +54,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
54 int orphans, sockets; 54 int orphans, sockets;
55 55
56 local_bh_disable(); 56 local_bh_disable();
57 orphans = percpu_counter_sum_positive(&tcp_orphan_count), 57 orphans = percpu_counter_sum_positive(&tcp_orphan_count);
58 sockets = percpu_counter_sum_positive(&tcp_sockets_allocated), 58 sockets = percpu_counter_sum_positive(&tcp_sockets_allocated);
59 local_bh_enable(); 59 local_bh_enable();
60 60
61 socket_seq_show(seq); 61 socket_seq_show(seq);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index dff8bc4e0fac..f774651f0a47 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -493,6 +493,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
493 493
494 ipc.addr = inet->saddr; 494 ipc.addr = inet->saddr;
495 ipc.opt = NULL; 495 ipc.opt = NULL;
496 ipc.shtx.flags = 0;
496 ipc.oif = sk->sk_bound_dev_if; 497 ipc.oif = sk->sk_bound_dev_if;
497 498
498 if (msg->msg_controllen) { 499 if (msg->msg_controllen) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index bf895401218f..c40debe51b38 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -151,7 +151,7 @@ static void rt_emergency_hash_rebuild(struct net *net);
151 151
152static struct dst_ops ipv4_dst_ops = { 152static struct dst_ops ipv4_dst_ops = {
153 .family = AF_INET, 153 .family = AF_INET,
154 .protocol = __constant_htons(ETH_P_IP), 154 .protocol = cpu_to_be16(ETH_P_IP),
155 .gc = rt_garbage_collect, 155 .gc = rt_garbage_collect,
156 .check = ipv4_dst_check, 156 .check = ipv4_dst_check,
157 .destroy = ipv4_dst_destroy, 157 .destroy = ipv4_dst_destroy,
@@ -2696,7 +2696,7 @@ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
2696 2696
2697static struct dst_ops ipv4_dst_blackhole_ops = { 2697static struct dst_ops ipv4_dst_blackhole_ops = {
2698 .family = AF_INET, 2698 .family = AF_INET,
2699 .protocol = __constant_htons(ETH_P_IP), 2699 .protocol = cpu_to_be16(ETH_P_IP),
2700 .destroy = ipv4_dst_destroy, 2700 .destroy = ipv4_dst_destroy,
2701 .check = ipv4_dst_check, 2701 .check = ipv4_dst_check,
2702 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2702 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
@@ -2779,7 +2779,8 @@ int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp)
2779 return ip_route_output_flow(net, rp, flp, NULL, 0); 2779 return ip_route_output_flow(net, rp, flp, NULL, 0);
2780} 2780}
2781 2781
2782static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, 2782static int rt_fill_info(struct net *net,
2783 struct sk_buff *skb, u32 pid, u32 seq, int event,
2783 int nowait, unsigned int flags) 2784 int nowait, unsigned int flags)
2784{ 2785{
2785 struct rtable *rt = skb->rtable; 2786 struct rtable *rt = skb->rtable;
@@ -2844,8 +2845,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
2844 __be32 dst = rt->rt_dst; 2845 __be32 dst = rt->rt_dst;
2845 2846
2846 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && 2847 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2847 IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) { 2848 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2848 int err = ipmr_get_route(skb, r, nowait); 2849 int err = ipmr_get_route(net, skb, r, nowait);
2849 if (err <= 0) { 2850 if (err <= 0) {
2850 if (!nowait) { 2851 if (!nowait) {
2851 if (err == 0) 2852 if (err == 0)
@@ -2950,7 +2951,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
2950 if (rtm->rtm_flags & RTM_F_NOTIFY) 2951 if (rtm->rtm_flags & RTM_F_NOTIFY)
2951 rt->rt_flags |= RTCF_NOTIFY; 2952 rt->rt_flags |= RTCF_NOTIFY;
2952 2953
2953 err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 2954 err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
2954 RTM_NEWROUTE, 0, 0); 2955 RTM_NEWROUTE, 0, 0);
2955 if (err <= 0) 2956 if (err <= 0)
2956 goto errout_free; 2957 goto errout_free;
@@ -2988,7 +2989,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2988 if (rt_is_expired(rt)) 2989 if (rt_is_expired(rt))
2989 continue; 2990 continue;
2990 skb->dst = dst_clone(&rt->u.dst); 2991 skb->dst = dst_clone(&rt->u.dst);
2991 if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 2992 if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
2992 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 2993 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
2993 1, NLM_F_MULTI) <= 0) { 2994 1, NLM_F_MULTI) <= 0) {
2994 dst_release(xchg(&skb->dst, NULL)); 2995 dst_release(xchg(&skb->dst, NULL));
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index d346c22aa6ae..b35a950d2e06 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -288,10 +288,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
288 if (!req) 288 if (!req)
289 goto out; 289 goto out;
290 290
291 if (security_inet_conn_request(sk, skb, req)) {
292 reqsk_free(req);
293 goto out;
294 }
295 ireq = inet_rsk(req); 291 ireq = inet_rsk(req);
296 treq = tcp_rsk(req); 292 treq = tcp_rsk(req);
297 treq->rcv_isn = ntohl(th->seq) - 1; 293 treq->rcv_isn = ntohl(th->seq) - 1;
@@ -322,6 +318,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
322 } 318 }
323 } 319 }
324 320
321 if (security_inet_conn_request(sk, skb, req)) {
322 reqsk_free(req);
323 goto out;
324 }
325
325 req->expires = 0UL; 326 req->expires = 0UL;
326 req->retrans = 0; 327 req->retrans = 0;
327 328
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 76b148bcb0dc..2451aeb5ac23 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -661,6 +661,47 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp)
661 return NULL; 661 return NULL;
662} 662}
663 663
664static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
665 int large_allowed)
666{
667 struct tcp_sock *tp = tcp_sk(sk);
668 u32 xmit_size_goal, old_size_goal;
669
670 xmit_size_goal = mss_now;
671
672 if (large_allowed && sk_can_gso(sk)) {
673 xmit_size_goal = ((sk->sk_gso_max_size - 1) -
674 inet_csk(sk)->icsk_af_ops->net_header_len -
675 inet_csk(sk)->icsk_ext_hdr_len -
676 tp->tcp_header_len);
677
678 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
679
680 /* We try hard to avoid divides here */
681 old_size_goal = tp->xmit_size_goal_segs * mss_now;
682
683 if (likely(old_size_goal <= xmit_size_goal &&
684 old_size_goal + mss_now > xmit_size_goal)) {
685 xmit_size_goal = old_size_goal;
686 } else {
687 tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
688 xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
689 }
690 }
691
692 return max(xmit_size_goal, mss_now);
693}
694
695static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
696{
697 int mss_now;
698
699 mss_now = tcp_current_mss(sk);
700 *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB));
701
702 return mss_now;
703}
704
664static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, 705static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset,
665 size_t psize, int flags) 706 size_t psize, int flags)
666{ 707{
@@ -677,13 +718,12 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse
677 718
678 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 719 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
679 720
680 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); 721 mss_now = tcp_send_mss(sk, &size_goal, flags);
681 size_goal = tp->xmit_size_goal;
682 copied = 0; 722 copied = 0;
683 723
684 err = -EPIPE; 724 err = -EPIPE;
685 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 725 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
686 goto do_error; 726 goto out_err;
687 727
688 while (psize > 0) { 728 while (psize > 0) {
689 struct sk_buff *skb = tcp_write_queue_tail(sk); 729 struct sk_buff *skb = tcp_write_queue_tail(sk);
@@ -761,8 +801,7 @@ wait_for_memory:
761 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 801 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
762 goto do_error; 802 goto do_error;
763 803
764 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); 804 mss_now = tcp_send_mss(sk, &size_goal, flags);
765 size_goal = tp->xmit_size_goal;
766 } 805 }
767 806
768out: 807out:
@@ -844,8 +883,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
844 /* This should be in poll */ 883 /* This should be in poll */
845 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); 884 clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
846 885
847 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); 886 mss_now = tcp_send_mss(sk, &size_goal, flags);
848 size_goal = tp->xmit_size_goal;
849 887
850 /* Ok commence sending. */ 888 /* Ok commence sending. */
851 iovlen = msg->msg_iovlen; 889 iovlen = msg->msg_iovlen;
@@ -854,7 +892,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
854 892
855 err = -EPIPE; 893 err = -EPIPE;
856 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) 894 if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
857 goto do_error; 895 goto out_err;
858 896
859 while (--iovlen >= 0) { 897 while (--iovlen >= 0) {
860 int seglen = iov->iov_len; 898 int seglen = iov->iov_len;
@@ -1007,8 +1045,7 @@ wait_for_memory:
1007 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) 1045 if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
1008 goto do_error; 1046 goto do_error;
1009 1047
1010 mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); 1048 mss_now = tcp_send_mss(sk, &size_goal, flags);
1011 size_goal = tp->xmit_size_goal;
1012 } 1049 }
1013 } 1050 }
1014 1051
@@ -1045,8 +1082,7 @@ out_err:
1045 */ 1082 */
1046 1083
1047static int tcp_recv_urg(struct sock *sk, long timeo, 1084static int tcp_recv_urg(struct sock *sk, long timeo,
1048 struct msghdr *msg, int len, int flags, 1085 struct msghdr *msg, int len, int flags)
1049 int *addr_len)
1050{ 1086{
1051 struct tcp_sock *tp = tcp_sk(sk); 1087 struct tcp_sock *tp = tcp_sk(sk);
1052 1088
@@ -1661,7 +1697,7 @@ out:
1661 return err; 1697 return err;
1662 1698
1663recv_urg: 1699recv_urg:
1664 err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); 1700 err = tcp_recv_urg(sk, timeo, msg, len, flags);
1665 goto out; 1701 goto out;
1666} 1702}
1667 1703
@@ -2478,23 +2514,23 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2478 struct tcphdr *th2; 2514 struct tcphdr *th2;
2479 unsigned int thlen; 2515 unsigned int thlen;
2480 unsigned int flags; 2516 unsigned int flags;
2481 unsigned int total;
2482 unsigned int mss = 1; 2517 unsigned int mss = 1;
2483 int flush = 1; 2518 int flush = 1;
2519 int i;
2484 2520
2485 if (!pskb_may_pull(skb, sizeof(*th))) 2521 th = skb_gro_header(skb, sizeof(*th));
2522 if (unlikely(!th))
2486 goto out; 2523 goto out;
2487 2524
2488 th = tcp_hdr(skb);
2489 thlen = th->doff * 4; 2525 thlen = th->doff * 4;
2490 if (thlen < sizeof(*th)) 2526 if (thlen < sizeof(*th))
2491 goto out; 2527 goto out;
2492 2528
2493 if (!pskb_may_pull(skb, thlen)) 2529 th = skb_gro_header(skb, thlen);
2530 if (unlikely(!th))
2494 goto out; 2531 goto out;
2495 2532
2496 th = tcp_hdr(skb); 2533 skb_gro_pull(skb, thlen);
2497 __skb_pull(skb, thlen);
2498 2534
2499 flags = tcp_flag_word(th); 2535 flags = tcp_flag_word(th);
2500 2536
@@ -2504,7 +2540,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2504 2540
2505 th2 = tcp_hdr(p); 2541 th2 = tcp_hdr(p);
2506 2542
2507 if (th->source != th2->source || th->dest != th2->dest) { 2543 if ((th->source ^ th2->source) | (th->dest ^ th2->dest)) {
2508 NAPI_GRO_CB(p)->same_flow = 0; 2544 NAPI_GRO_CB(p)->same_flow = 0;
2509 continue; 2545 continue;
2510 } 2546 }
@@ -2519,14 +2555,15 @@ found:
2519 flush |= flags & TCP_FLAG_CWR; 2555 flush |= flags & TCP_FLAG_CWR;
2520 flush |= (flags ^ tcp_flag_word(th2)) & 2556 flush |= (flags ^ tcp_flag_word(th2)) &
2521 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); 2557 ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH);
2522 flush |= th->ack_seq != th2->ack_seq || th->window != th2->window; 2558 flush |= (th->ack_seq ^ th2->ack_seq) | (th->window ^ th2->window);
2523 flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th)); 2559 for (i = sizeof(*th); !flush && i < thlen; i += 4)
2560 flush |= *(u32 *)((u8 *)th + i) ^
2561 *(u32 *)((u8 *)th2 + i);
2524 2562
2525 total = p->len;
2526 mss = skb_shinfo(p)->gso_size; 2563 mss = skb_shinfo(p)->gso_size;
2527 2564
2528 flush |= skb->len > mss || skb->len <= 0; 2565 flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb);
2529 flush |= ntohl(th2->seq) + total != ntohl(th->seq); 2566 flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
2530 2567
2531 if (flush || skb_gro_receive(head, skb)) { 2568 if (flush || skb_gro_receive(head, skb)) {
2532 mss = 1; 2569 mss = 1;
@@ -2538,7 +2575,7 @@ found:
2538 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); 2575 tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
2539 2576
2540out_check_final: 2577out_check_final:
2541 flush = skb->len < mss; 2578 flush = skb_gro_len(skb) < mss;
2542 flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | 2579 flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST |
2543 TCP_FLAG_SYN | TCP_FLAG_FIN); 2580 TCP_FLAG_SYN | TCP_FLAG_FIN);
2544 2581
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 7eb7636db0d0..3b53fd1af23f 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -149,16 +149,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
149 tcp_slow_start(tp); 149 tcp_slow_start(tp);
150 else { 150 else {
151 bictcp_update(ca, tp->snd_cwnd); 151 bictcp_update(ca, tp->snd_cwnd);
152 152 tcp_cong_avoid_ai(tp, ca->cnt);
153 /* In dangerous area, increase slowly.
154 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
155 */
156 if (tp->snd_cwnd_cnt >= ca->cnt) {
157 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
158 tp->snd_cwnd++;
159 tp->snd_cwnd_cnt = 0;
160 } else
161 tp->snd_cwnd_cnt++;
162 } 153 }
163 154
164} 155}
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 4ec5b4e97c4e..e92beb9e55e0 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -336,6 +336,19 @@ void tcp_slow_start(struct tcp_sock *tp)
336} 336}
337EXPORT_SYMBOL_GPL(tcp_slow_start); 337EXPORT_SYMBOL_GPL(tcp_slow_start);
338 338
339/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */
340void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w)
341{
342 if (tp->snd_cwnd_cnt >= w) {
343 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
344 tp->snd_cwnd++;
345 tp->snd_cwnd_cnt = 0;
346 } else {
347 tp->snd_cwnd_cnt++;
348 }
349}
350EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
351
339/* 352/*
340 * TCP Reno congestion control 353 * TCP Reno congestion control
341 * This is special case used for fallback as well. 354 * This is special case used for fallback as well.
@@ -365,13 +378,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
365 tp->snd_cwnd++; 378 tp->snd_cwnd++;
366 } 379 }
367 } else { 380 } else {
368 /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ 381 tcp_cong_avoid_ai(tp, tp->snd_cwnd);
369 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
370 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
371 tp->snd_cwnd++;
372 tp->snd_cwnd_cnt = 0;
373 } else
374 tp->snd_cwnd_cnt++;
375 } 382 }
376} 383}
377EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); 384EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index ee467ec40c4f..71d5f2f29fa6 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -294,16 +294,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
294 tcp_slow_start(tp); 294 tcp_slow_start(tp);
295 } else { 295 } else {
296 bictcp_update(ca, tp->snd_cwnd); 296 bictcp_update(ca, tp->snd_cwnd);
297 297 tcp_cong_avoid_ai(tp, ca->cnt);
298 /* In dangerous area, increase slowly.
299 * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd
300 */
301 if (tp->snd_cwnd_cnt >= ca->cnt) {
302 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
303 tp->snd_cwnd++;
304 tp->snd_cwnd_cnt = 0;
305 } else
306 tp->snd_cwnd_cnt++;
307 } 298 }
308 299
309} 300}
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 937549b8a921..26d5c7fc7de5 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -115,8 +115,7 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt
115 return; 115 return;
116 116
117 /* achieved throughput calculations */ 117 /* achieved throughput calculations */
118 if (icsk->icsk_ca_state != TCP_CA_Open && 118 if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_Disorder))) {
119 icsk->icsk_ca_state != TCP_CA_Disorder) {
120 ca->packetcount = 0; 119 ca->packetcount = 0;
121 ca->lasttime = now; 120 ca->lasttime = now;
122 return; 121 return;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c28976a7e596..2bc8e27a163d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -64,6 +64,7 @@
64#include <linux/mm.h> 64#include <linux/mm.h>
65#include <linux/module.h> 65#include <linux/module.h>
66#include <linux/sysctl.h> 66#include <linux/sysctl.h>
67#include <linux/kernel.h>
67#include <net/dst.h> 68#include <net/dst.h>
68#include <net/tcp.h> 69#include <net/tcp.h>
69#include <net/inet_common.h> 70#include <net/inet_common.h>
@@ -1178,10 +1179,18 @@ static void tcp_mark_lost_retrans(struct sock *sk)
1178 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) 1179 if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS))
1179 continue; 1180 continue;
1180 1181
1181 if (after(received_upto, ack_seq) && 1182 /* TODO: We would like to get rid of tcp_is_fack(tp) only
1182 (tcp_is_fack(tp) || 1183 * constraint here (see above) but figuring out that at
1183 !before(received_upto, 1184 * least tp->reordering SACK blocks reside between ack_seq
1184 ack_seq + tp->reordering * tp->mss_cache))) { 1185 * and received_upto is not easy task to do cheaply with
1186 * the available datastructures.
1187 *
1188 * Whether FACK should check here for tp->reordering segs
1189 * in-between one could argue for either way (it would be
1190 * rather simple to implement as we could count fack_count
1191 * during the walk and do tp->fackets_out - fack_count).
1192 */
1193 if (after(received_upto, ack_seq)) {
1185 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; 1194 TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
1186 tp->retrans_out -= tcp_skb_pcount(skb); 1195 tp->retrans_out -= tcp_skb_pcount(skb);
1187 1196
@@ -1794,11 +1803,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
1794 for (i = used_sacks - 1; i > 0; i--) { 1803 for (i = used_sacks - 1; i > 0; i--) {
1795 for (j = 0; j < i; j++) { 1804 for (j = 0; j < i; j++) {
1796 if (after(sp[j].start_seq, sp[j + 1].start_seq)) { 1805 if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
1797 struct tcp_sack_block tmp; 1806 swap(sp[j], sp[j + 1]);
1798
1799 tmp = sp[j];
1800 sp[j] = sp[j + 1];
1801 sp[j + 1] = tmp;
1802 1807
1803 /* Track where the first SACK block goes to */ 1808 /* Track where the first SACK block goes to */
1804 if (j == first_sack_index) 1809 if (j == first_sack_index)
@@ -2453,6 +2458,44 @@ static int tcp_time_to_recover(struct sock *sk)
2453 return 0; 2458 return 0;
2454} 2459}
2455 2460
2461/* New heuristics: it is possible only after we switched to restart timer
2462 * each time when something is ACKed. Hence, we can detect timed out packets
2463 * during fast retransmit without falling to slow start.
2464 *
2465 * Usefulness of this as is very questionable, since we should know which of
2466 * the segments is the next to timeout which is relatively expensive to find
2467 * in general case unless we add some data structure just for that. The
2468 * current approach certainly won't find the right one too often and when it
2469 * finally does find _something_ it usually marks large part of the window
2470 * right away (because a retransmission with a larger timestamp blocks the
2471 * loop from advancing). -ij
2472 */
2473static void tcp_timeout_skbs(struct sock *sk)
2474{
2475 struct tcp_sock *tp = tcp_sk(sk);
2476 struct sk_buff *skb;
2477
2478 if (!tcp_is_fack(tp) || !tcp_head_timedout(sk))
2479 return;
2480
2481 skb = tp->scoreboard_skb_hint;
2482 if (tp->scoreboard_skb_hint == NULL)
2483 skb = tcp_write_queue_head(sk);
2484
2485 tcp_for_write_queue_from(skb, sk) {
2486 if (skb == tcp_send_head(sk))
2487 break;
2488 if (!tcp_skb_timedout(sk, skb))
2489 break;
2490
2491 tcp_skb_mark_lost(tp, skb);
2492 }
2493
2494 tp->scoreboard_skb_hint = skb;
2495
2496 tcp_verify_left_out(tp);
2497}
2498
2456/* Mark head of queue up as lost. With RFC3517 SACK, the packets is 2499/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
2457 * is against sacked "cnt", otherwise it's against facked "cnt" 2500 * is against sacked "cnt", otherwise it's against facked "cnt"
2458 */ 2501 */
@@ -2525,30 +2568,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2525 tcp_mark_head_lost(sk, sacked_upto); 2568 tcp_mark_head_lost(sk, sacked_upto);
2526 } 2569 }
2527 2570
2528 /* New heuristics: it is possible only after we switched 2571 tcp_timeout_skbs(sk);
2529 * to restart timer each time when something is ACKed.
2530 * Hence, we can detect timed out packets during fast
2531 * retransmit without falling to slow start.
2532 */
2533 if (tcp_is_fack(tp) && tcp_head_timedout(sk)) {
2534 struct sk_buff *skb;
2535
2536 skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
2537 : tcp_write_queue_head(sk);
2538
2539 tcp_for_write_queue_from(skb, sk) {
2540 if (skb == tcp_send_head(sk))
2541 break;
2542 if (!tcp_skb_timedout(sk, skb))
2543 break;
2544
2545 tcp_skb_mark_lost(tp, skb);
2546 }
2547
2548 tp->scoreboard_skb_hint = skb;
2549
2550 tcp_verify_left_out(tp);
2551 }
2552} 2572}
2553 2573
2554/* CWND moderation, preventing bursts due to too big ACKs 2574/* CWND moderation, preventing bursts due to too big ACKs
@@ -2813,7 +2833,7 @@ static void tcp_mtup_probe_failed(struct sock *sk)
2813 icsk->icsk_mtup.probe_size = 0; 2833 icsk->icsk_mtup.probe_size = 0;
2814} 2834}
2815 2835
2816static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) 2836static void tcp_mtup_probe_success(struct sock *sk)
2817{ 2837{
2818 struct tcp_sock *tp = tcp_sk(sk); 2838 struct tcp_sock *tp = tcp_sk(sk);
2819 struct inet_connection_sock *icsk = inet_csk(sk); 2839 struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2841,7 +2861,7 @@ void tcp_simple_retransmit(struct sock *sk)
2841 const struct inet_connection_sock *icsk = inet_csk(sk); 2861 const struct inet_connection_sock *icsk = inet_csk(sk);
2842 struct tcp_sock *tp = tcp_sk(sk); 2862 struct tcp_sock *tp = tcp_sk(sk);
2843 struct sk_buff *skb; 2863 struct sk_buff *skb;
2844 unsigned int mss = tcp_current_mss(sk, 0); 2864 unsigned int mss = tcp_current_mss(sk);
2845 u32 prior_lost = tp->lost_out; 2865 u32 prior_lost = tp->lost_out;
2846 2866
2847 tcp_for_write_queue(skb, sk) { 2867 tcp_for_write_queue(skb, sk) {
@@ -3178,7 +3198,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3178 3198
3179 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { 3199 while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
3180 struct tcp_skb_cb *scb = TCP_SKB_CB(skb); 3200 struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
3181 u32 end_seq;
3182 u32 acked_pcount; 3201 u32 acked_pcount;
3183 u8 sacked = scb->sacked; 3202 u8 sacked = scb->sacked;
3184 3203
@@ -3193,16 +3212,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3193 break; 3212 break;
3194 3213
3195 fully_acked = 0; 3214 fully_acked = 0;
3196 end_seq = tp->snd_una;
3197 } else { 3215 } else {
3198 acked_pcount = tcp_skb_pcount(skb); 3216 acked_pcount = tcp_skb_pcount(skb);
3199 end_seq = scb->end_seq;
3200 }
3201
3202 /* MTU probing checks */
3203 if (fully_acked && icsk->icsk_mtup.probe_size &&
3204 !after(tp->mtu_probe.probe_seq_end, scb->end_seq)) {
3205 tcp_mtup_probe_success(sk, skb);
3206 } 3217 }
3207 3218
3208 if (sacked & TCPCB_RETRANS) { 3219 if (sacked & TCPCB_RETRANS) {
@@ -3267,24 +3278,26 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
3267 const struct tcp_congestion_ops *ca_ops 3278 const struct tcp_congestion_ops *ca_ops
3268 = inet_csk(sk)->icsk_ca_ops; 3279 = inet_csk(sk)->icsk_ca_ops;
3269 3280
3281 if (unlikely(icsk->icsk_mtup.probe_size &&
3282 !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
3283 tcp_mtup_probe_success(sk);
3284 }
3285
3270 tcp_ack_update_rtt(sk, flag, seq_rtt); 3286 tcp_ack_update_rtt(sk, flag, seq_rtt);
3271 tcp_rearm_rto(sk); 3287 tcp_rearm_rto(sk);
3272 3288
3273 if (tcp_is_reno(tp)) { 3289 if (tcp_is_reno(tp)) {
3274 tcp_remove_reno_sacks(sk, pkts_acked); 3290 tcp_remove_reno_sacks(sk, pkts_acked);
3275 } else { 3291 } else {
3292 int delta;
3293
3276 /* Non-retransmitted hole got filled? That's reordering */ 3294 /* Non-retransmitted hole got filled? That's reordering */
3277 if (reord < prior_fackets) 3295 if (reord < prior_fackets)
3278 tcp_update_reordering(sk, tp->fackets_out - reord, 0); 3296 tcp_update_reordering(sk, tp->fackets_out - reord, 0);
3279 3297
3280 /* No need to care for underflows here because 3298 delta = tcp_is_fack(tp) ? pkts_acked :
3281 * the lost_skb_hint gets NULLed if we're past it 3299 prior_sacked - tp->sacked_out;
3282 * (or something non-trivial happened) 3300 tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta);
3283 */
3284 if (tcp_is_fack(tp))
3285 tp->lost_cnt_hint -= pkts_acked;
3286 else
3287 tp->lost_cnt_hint -= prior_sacked - tp->sacked_out;
3288 } 3301 }
3289 3302
3290 tp->fackets_out -= min(pkts_acked, tp->fackets_out); 3303 tp->fackets_out -= min(pkts_acked, tp->fackets_out);
@@ -3396,7 +3409,7 @@ static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack,
3396 3409
3397 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { 3410 if (tcp_may_update_window(tp, ack, ack_seq, nwin)) {
3398 flag |= FLAG_WIN_UPDATE; 3411 flag |= FLAG_WIN_UPDATE;
3399 tcp_update_wl(tp, ack, ack_seq); 3412 tcp_update_wl(tp, ack_seq);
3400 3413
3401 if (tp->snd_wnd != nwin) { 3414 if (tp->snd_wnd != nwin) {
3402 tp->snd_wnd = nwin; 3415 tp->snd_wnd = nwin;
@@ -3572,15 +3585,18 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3572 int prior_packets; 3585 int prior_packets;
3573 int frto_cwnd = 0; 3586 int frto_cwnd = 0;
3574 3587
3575 /* If the ack is newer than sent or older than previous acks 3588 /* If the ack is older than previous acks
3576 * then we can probably ignore it. 3589 * then we can probably ignore it.
3577 */ 3590 */
3578 if (after(ack, tp->snd_nxt))
3579 goto uninteresting_ack;
3580
3581 if (before(ack, prior_snd_una)) 3591 if (before(ack, prior_snd_una))
3582 goto old_ack; 3592 goto old_ack;
3583 3593
3594 /* If the ack includes data we haven't sent yet, discard
3595 * this segment (RFC793 Section 3.9).
3596 */
3597 if (after(ack, tp->snd_nxt))
3598 goto invalid_ack;
3599
3584 if (after(ack, prior_snd_una)) 3600 if (after(ack, prior_snd_una))
3585 flag |= FLAG_SND_UNA_ADVANCED; 3601 flag |= FLAG_SND_UNA_ADVANCED;
3586 3602
@@ -3601,7 +3617,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
3601 * No more checks are required. 3617 * No more checks are required.
3602 * Note, we use the fact that SND.UNA>=SND.WL2. 3618 * Note, we use the fact that SND.UNA>=SND.WL2.
3603 */ 3619 */
3604 tcp_update_wl(tp, ack, ack_seq); 3620 tcp_update_wl(tp, ack_seq);
3605 tp->snd_una = ack; 3621 tp->snd_una = ack;
3606 flag |= FLAG_WIN_UPDATE; 3622 flag |= FLAG_WIN_UPDATE;
3607 3623
@@ -3670,6 +3686,10 @@ no_queue:
3670 tcp_ack_probe(sk); 3686 tcp_ack_probe(sk);
3671 return 1; 3687 return 1;
3672 3688
3689invalid_ack:
3690 SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3691 return -1;
3692
3673old_ack: 3693old_ack:
3674 if (TCP_SKB_CB(skb)->sacked) { 3694 if (TCP_SKB_CB(skb)->sacked) {
3675 tcp_sacktag_write_queue(sk, skb, prior_snd_una); 3695 tcp_sacktag_write_queue(sk, skb, prior_snd_una);
@@ -3677,8 +3697,7 @@ old_ack:
3677 tcp_try_keep_open(sk); 3697 tcp_try_keep_open(sk);
3678 } 3698 }
3679 3699
3680uninteresting_ack: 3700 SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3681 SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
3682 return 0; 3701 return 0;
3683} 3702}
3684 3703
@@ -3866,8 +3885,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq)
3866 * Not only, also it occurs for expired timestamps. 3885 * Not only, also it occurs for expired timestamps.
3867 */ 3886 */
3868 3887
3869 if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 || 3888 if (tcp_paws_check(&tp->rx_opt, 0))
3870 get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS)
3871 tcp_store_ts_recent(tp); 3889 tcp_store_ts_recent(tp);
3872 } 3890 }
3873} 3891}
@@ -3919,9 +3937,9 @@ static inline int tcp_paws_discard(const struct sock *sk,
3919 const struct sk_buff *skb) 3937 const struct sk_buff *skb)
3920{ 3938{
3921 const struct tcp_sock *tp = tcp_sk(sk); 3939 const struct tcp_sock *tp = tcp_sk(sk);
3922 return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && 3940
3923 get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && 3941 return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) &&
3924 !tcp_disordered_ack(sk, skb)); 3942 !tcp_disordered_ack(sk, skb);
3925} 3943}
3926 3944
3927/* Check segment sequence number for validity. 3945/* Check segment sequence number for validity.
@@ -4079,7 +4097,6 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq)
4079 tp->rx_opt.dsack = 1; 4097 tp->rx_opt.dsack = 1;
4080 tp->duplicate_sack[0].start_seq = seq; 4098 tp->duplicate_sack[0].start_seq = seq;
4081 tp->duplicate_sack[0].end_seq = end_seq; 4099 tp->duplicate_sack[0].end_seq = end_seq;
4082 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1;
4083 } 4100 }
4084} 4101}
4085 4102
@@ -4134,8 +4151,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4134 * Decrease num_sacks. 4151 * Decrease num_sacks.
4135 */ 4152 */
4136 tp->rx_opt.num_sacks--; 4153 tp->rx_opt.num_sacks--;
4137 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
4138 tp->rx_opt.dsack;
4139 for (i = this_sack; i < tp->rx_opt.num_sacks; i++) 4154 for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
4140 sp[i] = sp[i + 1]; 4155 sp[i] = sp[i + 1];
4141 continue; 4156 continue;
@@ -4144,20 +4159,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
4144 } 4159 }
4145} 4160}
4146 4161
4147static inline void tcp_sack_swap(struct tcp_sack_block *sack1,
4148 struct tcp_sack_block *sack2)
4149{
4150 __u32 tmp;
4151
4152 tmp = sack1->start_seq;
4153 sack1->start_seq = sack2->start_seq;
4154 sack2->start_seq = tmp;
4155
4156 tmp = sack1->end_seq;
4157 sack1->end_seq = sack2->end_seq;
4158 sack2->end_seq = tmp;
4159}
4160
4161static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) 4162static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4162{ 4163{
4163 struct tcp_sock *tp = tcp_sk(sk); 4164 struct tcp_sock *tp = tcp_sk(sk);
@@ -4172,7 +4173,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
4172 if (tcp_sack_extend(sp, seq, end_seq)) { 4173 if (tcp_sack_extend(sp, seq, end_seq)) {
4173 /* Rotate this_sack to the first one. */ 4174 /* Rotate this_sack to the first one. */
4174 for (; this_sack > 0; this_sack--, sp--) 4175 for (; this_sack > 0; this_sack--, sp--)
4175 tcp_sack_swap(sp, sp - 1); 4176 swap(*sp, *(sp - 1));
4176 if (cur_sacks > 1) 4177 if (cur_sacks > 1)
4177 tcp_sack_maybe_coalesce(tp); 4178 tcp_sack_maybe_coalesce(tp);
4178 return; 4179 return;
@@ -4198,7 +4199,6 @@ new_sack:
4198 sp->start_seq = seq; 4199 sp->start_seq = seq;
4199 sp->end_seq = end_seq; 4200 sp->end_seq = end_seq;
4200 tp->rx_opt.num_sacks++; 4201 tp->rx_opt.num_sacks++;
4201 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
4202} 4202}
4203 4203
4204/* RCV.NXT advances, some SACKs should be eaten. */ 4204/* RCV.NXT advances, some SACKs should be eaten. */
@@ -4212,7 +4212,6 @@ static void tcp_sack_remove(struct tcp_sock *tp)
4212 /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ 4212 /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
4213 if (skb_queue_empty(&tp->out_of_order_queue)) { 4213 if (skb_queue_empty(&tp->out_of_order_queue)) {
4214 tp->rx_opt.num_sacks = 0; 4214 tp->rx_opt.num_sacks = 0;
4215 tp->rx_opt.eff_sacks = tp->rx_opt.dsack;
4216 return; 4215 return;
4217 } 4216 }
4218 4217
@@ -4233,11 +4232,7 @@ static void tcp_sack_remove(struct tcp_sock *tp)
4233 this_sack++; 4232 this_sack++;
4234 sp++; 4233 sp++;
4235 } 4234 }
4236 if (num_sacks != tp->rx_opt.num_sacks) { 4235 tp->rx_opt.num_sacks = num_sacks;
4237 tp->rx_opt.num_sacks = num_sacks;
4238 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks +
4239 tp->rx_opt.dsack;
4240 }
4241} 4236}
4242 4237
4243/* This one checks to see if we can put data from the 4238/* This one checks to see if we can put data from the
@@ -4313,10 +4308,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
4313 4308
4314 TCP_ECN_accept_cwr(tp, skb); 4309 TCP_ECN_accept_cwr(tp, skb);
4315 4310
4316 if (tp->rx_opt.dsack) { 4311 tp->rx_opt.dsack = 0;
4317 tp->rx_opt.dsack = 0;
4318 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
4319 }
4320 4312
4321 /* Queue data for delivery to the user. 4313 /* Queue data for delivery to the user.
4322 * Packets in sequence go to the receive queue. 4314 * Packets in sequence go to the receive queue.
@@ -4435,8 +4427,6 @@ drop:
4435 /* Initial out of order segment, build 1 SACK. */ 4427 /* Initial out of order segment, build 1 SACK. */
4436 if (tcp_is_sack(tp)) { 4428 if (tcp_is_sack(tp)) {
4437 tp->rx_opt.num_sacks = 1; 4429 tp->rx_opt.num_sacks = 1;
4438 tp->rx_opt.dsack = 0;
4439 tp->rx_opt.eff_sacks = 1;
4440 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; 4430 tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
4441 tp->selective_acks[0].end_seq = 4431 tp->selective_acks[0].end_seq =
4442 TCP_SKB_CB(skb)->end_seq; 4432 TCP_SKB_CB(skb)->end_seq;
@@ -5157,7 +5147,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
5157 */ 5147 */
5158 5148
5159 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && 5149 if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
5160 TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { 5150 TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
5151 !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
5161 int tcp_header_len = tp->tcp_header_len; 5152 int tcp_header_len = tp->tcp_header_len;
5162 5153
5163 /* Timestamp header prediction: tcp_header_len 5154 /* Timestamp header prediction: tcp_header_len
@@ -5310,8 +5301,8 @@ slow_path:
5310 return -res; 5301 return -res;
5311 5302
5312step5: 5303step5:
5313 if (th->ack) 5304 if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0)
5314 tcp_ack(sk, skb, FLAG_SLOWPATH); 5305 goto discard;
5315 5306
5316 tcp_rcv_rtt_measure_ts(sk, skb); 5307 tcp_rcv_rtt_measure_ts(sk, skb);
5317 5308
@@ -5409,7 +5400,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
5409 * never scaled. 5400 * never scaled.
5410 */ 5401 */
5411 tp->snd_wnd = ntohs(th->window); 5402 tp->snd_wnd = ntohs(th->window);
5412 tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq); 5403 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5413 5404
5414 if (!tp->rx_opt.wscale_ok) { 5405 if (!tp->rx_opt.wscale_ok) {
5415 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; 5406 tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
@@ -5510,7 +5501,7 @@ discard:
5510 5501
5511 /* PAWS check. */ 5502 /* PAWS check. */
5512 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && 5503 if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
5513 tcp_paws_check(&tp->rx_opt, 0)) 5504 tcp_paws_reject(&tp->rx_opt, 0))
5514 goto discard_and_undo; 5505 goto discard_and_undo;
5515 5506
5516 if (th->syn) { 5507 if (th->syn) {
@@ -5648,7 +5639,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5648 5639
5649 /* step 5: check the ACK field */ 5640 /* step 5: check the ACK field */
5650 if (th->ack) { 5641 if (th->ack) {
5651 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH); 5642 int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0;
5652 5643
5653 switch (sk->sk_state) { 5644 switch (sk->sk_state) {
5654 case TCP_SYN_RECV: 5645 case TCP_SYN_RECV:
@@ -5670,8 +5661,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
5670 tp->snd_una = TCP_SKB_CB(skb)->ack_seq; 5661 tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
5671 tp->snd_wnd = ntohs(th->window) << 5662 tp->snd_wnd = ntohs(th->window) <<
5672 tp->rx_opt.snd_wscale; 5663 tp->rx_opt.snd_wscale;
5673 tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, 5664 tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
5674 TCP_SKB_CB(skb)->seq);
5675 5665
5676 /* tcp_ack considers this ACK as duplicate 5666 /* tcp_ack considers this ACK as duplicate
5677 * and does not calculate rtt. 5667 * and does not calculate rtt.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cf74c416831a..5d427f86b414 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1226,27 +1226,19 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1226 if (want_cookie && !tmp_opt.saw_tstamp) 1226 if (want_cookie && !tmp_opt.saw_tstamp)
1227 tcp_clear_options(&tmp_opt); 1227 tcp_clear_options(&tmp_opt);
1228 1228
1229 if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) {
1230 /* Some OSes (unknown ones, but I see them on web server, which
1231 * contains information interesting only for windows'
1232 * users) do not send their stamp in SYN. It is easy case.
1233 * We simply do not advertise TS support.
1234 */
1235 tmp_opt.saw_tstamp = 0;
1236 tmp_opt.tstamp_ok = 0;
1237 }
1238 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; 1229 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1239 1230
1240 tcp_openreq_init(req, &tmp_opt, skb); 1231 tcp_openreq_init(req, &tmp_opt, skb);
1241 1232
1242 if (security_inet_conn_request(sk, skb, req))
1243 goto drop_and_free;
1244
1245 ireq = inet_rsk(req); 1233 ireq = inet_rsk(req);
1246 ireq->loc_addr = daddr; 1234 ireq->loc_addr = daddr;
1247 ireq->rmt_addr = saddr; 1235 ireq->rmt_addr = saddr;
1248 ireq->no_srccheck = inet_sk(sk)->transparent; 1236 ireq->no_srccheck = inet_sk(sk)->transparent;
1249 ireq->opt = tcp_v4_save_options(sk, skb); 1237 ireq->opt = tcp_v4_save_options(sk, skb);
1238
1239 if (security_inet_conn_request(sk, skb, req))
1240 goto drop_and_free;
1241
1250 if (!want_cookie) 1242 if (!want_cookie)
1251 TCP_ECN_create_request(req, tcp_hdr(skb)); 1243 TCP_ECN_create_request(req, tcp_hdr(skb));
1252 1244
@@ -2355,7 +2347,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2355 2347
2356 switch (skb->ip_summed) { 2348 switch (skb->ip_summed) {
2357 case CHECKSUM_COMPLETE: 2349 case CHECKSUM_COMPLETE:
2358 if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr, 2350 if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr,
2359 skb->csum)) { 2351 skb->csum)) {
2360 skb->ip_summed = CHECKSUM_UNNECESSARY; 2352 skb->ip_summed = CHECKSUM_UNNECESSARY;
2361 break; 2353 break;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f67effbb102b..43bbba7926ee 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -107,7 +107,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
107 if (tmp_opt.saw_tstamp) { 107 if (tmp_opt.saw_tstamp) {
108 tmp_opt.ts_recent = tcptw->tw_ts_recent; 108 tmp_opt.ts_recent = tcptw->tw_ts_recent;
109 tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; 109 tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
110 paws_reject = tcp_paws_check(&tmp_opt, th->rst); 110 paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
111 } 111 }
112 } 112 }
113 113
@@ -399,7 +399,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
399 399
400 tcp_prequeue_init(newtp); 400 tcp_prequeue_init(newtp);
401 401
402 tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn); 402 tcp_init_wl(newtp, treq->rcv_isn);
403 403
404 newtp->srtt = 0; 404 newtp->srtt = 0;
405 newtp->mdev = TCP_TIMEOUT_INIT; 405 newtp->mdev = TCP_TIMEOUT_INIT;
@@ -434,9 +434,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
434 newtp->rx_opt.saw_tstamp = 0; 434 newtp->rx_opt.saw_tstamp = 0;
435 435
436 newtp->rx_opt.dsack = 0; 436 newtp->rx_opt.dsack = 0;
437 newtp->rx_opt.eff_sacks = 0;
438
439 newtp->rx_opt.num_sacks = 0; 437 newtp->rx_opt.num_sacks = 0;
438
440 newtp->urg_data = 0; 439 newtp->urg_data = 0;
441 440
442 if (sock_flag(newsk, SOCK_KEEPOPEN)) 441 if (sock_flag(newsk, SOCK_KEEPOPEN))
@@ -512,7 +511,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
512 * from another data. 511 * from another data.
513 */ 512 */
514 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans); 513 tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<<req->retrans);
515 paws_reject = tcp_paws_check(&tmp_opt, th->rst); 514 paws_reject = tcp_paws_reject(&tmp_opt, th->rst);
516 } 515 }
517 } 516 }
518 517
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index da2c3b8794f2..c1f259d2d33b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -441,10 +441,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
441 *ptr++ = htonl(sp[this_sack].end_seq); 441 *ptr++ = htonl(sp[this_sack].end_seq);
442 } 442 }
443 443
444 if (tp->rx_opt.dsack) { 444 tp->rx_opt.dsack = 0;
445 tp->rx_opt.dsack = 0;
446 tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks;
447 }
448 } 445 }
449} 446}
450 447
@@ -550,6 +547,7 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
550 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; 547 struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL;
551 struct tcp_sock *tp = tcp_sk(sk); 548 struct tcp_sock *tp = tcp_sk(sk);
552 unsigned size = 0; 549 unsigned size = 0;
550 unsigned int eff_sacks;
553 551
554#ifdef CONFIG_TCP_MD5SIG 552#ifdef CONFIG_TCP_MD5SIG
555 *md5 = tp->af_specific->md5_lookup(sk, sk); 553 *md5 = tp->af_specific->md5_lookup(sk, sk);
@@ -568,10 +566,11 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb,
568 size += TCPOLEN_TSTAMP_ALIGNED; 566 size += TCPOLEN_TSTAMP_ALIGNED;
569 } 567 }
570 568
571 if (unlikely(tp->rx_opt.eff_sacks)) { 569 eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
570 if (unlikely(eff_sacks)) {
572 const unsigned remaining = MAX_TCP_OPTION_SPACE - size; 571 const unsigned remaining = MAX_TCP_OPTION_SPACE - size;
573 opts->num_sack_blocks = 572 opts->num_sack_blocks =
574 min_t(unsigned, tp->rx_opt.eff_sacks, 573 min_t(unsigned, eff_sacks,
575 (remaining - TCPOLEN_SACK_BASE_ALIGNED) / 574 (remaining - TCPOLEN_SACK_BASE_ALIGNED) /
576 TCPOLEN_SACK_PERBLOCK); 575 TCPOLEN_SACK_PERBLOCK);
577 size += TCPOLEN_SACK_BASE_ALIGNED + 576 size += TCPOLEN_SACK_BASE_ALIGNED +
@@ -663,10 +662,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
663 th->urg_ptr = 0; 662 th->urg_ptr = 0;
664 663
665 /* The urg_mode check is necessary during a below snd_una win probe */ 664 /* The urg_mode check is necessary during a below snd_una win probe */
666 if (unlikely(tcp_urg_mode(tp) && 665 if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
667 between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { 666 if (before(tp->snd_up, tcb->seq + 0x10000)) {
668 th->urg_ptr = htons(tp->snd_up - tcb->seq); 667 th->urg_ptr = htons(tp->snd_up - tcb->seq);
669 th->urg = 1; 668 th->urg = 1;
669 } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
670 th->urg_ptr = 0xFFFF;
671 th->urg = 1;
672 }
670 } 673 }
671 674
672 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); 675 tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location);
@@ -763,11 +766,10 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
763 struct sk_buff *buff; 766 struct sk_buff *buff;
764 int nsize, old_factor; 767 int nsize, old_factor;
765 int nlen; 768 int nlen;
766 u16 flags; 769 u8 flags;
767 770
768 BUG_ON(len > skb->len); 771 BUG_ON(len > skb->len);
769 772
770 tcp_clear_retrans_hints_partial(tp);
771 nsize = skb_headlen(skb) - len; 773 nsize = skb_headlen(skb) - len;
772 if (nsize < 0) 774 if (nsize < 0)
773 nsize = 0; 775 nsize = 0;
@@ -850,6 +852,12 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
850 tcp_verify_left_out(tp); 852 tcp_verify_left_out(tp);
851 } 853 }
852 tcp_adjust_fackets_out(sk, skb, diff); 854 tcp_adjust_fackets_out(sk, skb, diff);
855
856 if (tp->lost_skb_hint &&
857 before(TCP_SKB_CB(skb)->seq,
858 TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
859 (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
860 tp->lost_cnt_hint -= diff;
853 } 861 }
854 862
855 /* Link BUFF into the send queue. */ 863 /* Link BUFF into the send queue. */
@@ -913,7 +921,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
913 * factor and mss. 921 * factor and mss.
914 */ 922 */
915 if (tcp_skb_pcount(skb) > 1) 923 if (tcp_skb_pcount(skb) > 1)
916 tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1)); 924 tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk));
917 925
918 return 0; 926 return 0;
919} 927}
@@ -974,15 +982,6 @@ void tcp_mtup_init(struct sock *sk)
974 icsk->icsk_mtup.probe_size = 0; 982 icsk->icsk_mtup.probe_size = 0;
975} 983}
976 984
977/* Bound MSS / TSO packet size with the half of the window */
978static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
979{
980 if (tp->max_window && pktsize > (tp->max_window >> 1))
981 return max(tp->max_window >> 1, 68U - tp->tcp_header_len);
982 else
983 return pktsize;
984}
985
986/* This function synchronize snd mss to current pmtu/exthdr set. 985/* This function synchronize snd mss to current pmtu/exthdr set.
987 986
988 tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts 987 tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts
@@ -1029,22 +1028,17 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
1029/* Compute the current effective MSS, taking SACKs and IP options, 1028/* Compute the current effective MSS, taking SACKs and IP options,
1030 * and even PMTU discovery events into account. 1029 * and even PMTU discovery events into account.
1031 */ 1030 */
1032unsigned int tcp_current_mss(struct sock *sk, int large_allowed) 1031unsigned int tcp_current_mss(struct sock *sk)
1033{ 1032{
1034 struct tcp_sock *tp = tcp_sk(sk); 1033 struct tcp_sock *tp = tcp_sk(sk);
1035 struct dst_entry *dst = __sk_dst_get(sk); 1034 struct dst_entry *dst = __sk_dst_get(sk);
1036 u32 mss_now; 1035 u32 mss_now;
1037 u16 xmit_size_goal;
1038 int doing_tso = 0;
1039 unsigned header_len; 1036 unsigned header_len;
1040 struct tcp_out_options opts; 1037 struct tcp_out_options opts;
1041 struct tcp_md5sig_key *md5; 1038 struct tcp_md5sig_key *md5;
1042 1039
1043 mss_now = tp->mss_cache; 1040 mss_now = tp->mss_cache;
1044 1041
1045 if (large_allowed && sk_can_gso(sk))
1046 doing_tso = 1;
1047
1048 if (dst) { 1042 if (dst) {
1049 u32 mtu = dst_mtu(dst); 1043 u32 mtu = dst_mtu(dst);
1050 if (mtu != inet_csk(sk)->icsk_pmtu_cookie) 1044 if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
@@ -1062,19 +1056,6 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
1062 mss_now -= delta; 1056 mss_now -= delta;
1063 } 1057 }
1064 1058
1065 xmit_size_goal = mss_now;
1066
1067 if (doing_tso) {
1068 xmit_size_goal = ((sk->sk_gso_max_size - 1) -
1069 inet_csk(sk)->icsk_af_ops->net_header_len -
1070 inet_csk(sk)->icsk_ext_hdr_len -
1071 tp->tcp_header_len);
1072
1073 xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
1074 xmit_size_goal -= (xmit_size_goal % mss_now);
1075 }
1076 tp->xmit_size_goal = xmit_size_goal;
1077
1078 return mss_now; 1059 return mss_now;
1079} 1060}
1080 1061
@@ -1256,7 +1237,7 @@ int tcp_may_send_now(struct sock *sk)
1256 struct sk_buff *skb = tcp_send_head(sk); 1237 struct sk_buff *skb = tcp_send_head(sk);
1257 1238
1258 return (skb && 1239 return (skb &&
1259 tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), 1240 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1260 (tcp_skb_is_last(sk, skb) ? 1241 (tcp_skb_is_last(sk, skb) ?
1261 tp->nonagle : TCP_NAGLE_PUSH))); 1242 tp->nonagle : TCP_NAGLE_PUSH)));
1262} 1243}
@@ -1273,7 +1254,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
1273{ 1254{
1274 struct sk_buff *buff; 1255 struct sk_buff *buff;
1275 int nlen = skb->len - len; 1256 int nlen = skb->len - len;
1276 u16 flags; 1257 u8 flags;
1277 1258
1278 /* All of a TSO frame must be composed of paged data. */ 1259 /* All of a TSO frame must be composed of paged data. */
1279 if (skb->len != skb->data_len) 1260 if (skb->len != skb->data_len)
@@ -1352,6 +1333,10 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
1352 if (limit >= sk->sk_gso_max_size) 1333 if (limit >= sk->sk_gso_max_size)
1353 goto send_now; 1334 goto send_now;
1354 1335
1336 /* Middle in queue won't get any more data, full sendable already? */
1337 if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len))
1338 goto send_now;
1339
1355 if (sysctl_tcp_tso_win_divisor) { 1340 if (sysctl_tcp_tso_win_divisor) {
1356 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); 1341 u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
1357 1342
@@ -1405,11 +1390,11 @@ static int tcp_mtu_probe(struct sock *sk)
1405 icsk->icsk_mtup.probe_size || 1390 icsk->icsk_mtup.probe_size ||
1406 inet_csk(sk)->icsk_ca_state != TCP_CA_Open || 1391 inet_csk(sk)->icsk_ca_state != TCP_CA_Open ||
1407 tp->snd_cwnd < 11 || 1392 tp->snd_cwnd < 11 ||
1408 tp->rx_opt.eff_sacks) 1393 tp->rx_opt.num_sacks || tp->rx_opt.dsack)
1409 return -1; 1394 return -1;
1410 1395
1411 /* Very simple search strategy: just double the MSS. */ 1396 /* Very simple search strategy: just double the MSS. */
1412 mss_now = tcp_current_mss(sk, 0); 1397 mss_now = tcp_current_mss(sk);
1413 probe_size = 2 * tp->mss_cache; 1398 probe_size = 2 * tp->mss_cache;
1414 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; 1399 size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache;
1415 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { 1400 if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) {
@@ -1754,11 +1739,9 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1754 struct tcp_sock *tp = tcp_sk(sk); 1739 struct tcp_sock *tp = tcp_sk(sk);
1755 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); 1740 struct sk_buff *next_skb = tcp_write_queue_next(sk, skb);
1756 int skb_size, next_skb_size; 1741 int skb_size, next_skb_size;
1757 u16 flags;
1758 1742
1759 skb_size = skb->len; 1743 skb_size = skb->len;
1760 next_skb_size = next_skb->len; 1744 next_skb_size = next_skb->len;
1761 flags = TCP_SKB_CB(skb)->flags;
1762 1745
1763 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); 1746 BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
1764 1747
@@ -1778,9 +1761,8 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
1778 /* Update sequence range on original skb. */ 1761 /* Update sequence range on original skb. */
1779 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; 1762 TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq;
1780 1763
1781 /* Merge over control information. */ 1764 /* Merge over control information. This moves PSH/FIN etc. over */
1782 flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ 1765 TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags;
1783 TCP_SKB_CB(skb)->flags = flags;
1784 1766
1785 /* All done, get rid of second SKB and account for it so 1767 /* All done, get rid of second SKB and account for it so
1786 * packet counting does not break. 1768 * packet counting does not break.
@@ -1894,7 +1876,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1894 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) 1876 if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
1895 return -EHOSTUNREACH; /* Routing failure or similar. */ 1877 return -EHOSTUNREACH; /* Routing failure or similar. */
1896 1878
1897 cur_mss = tcp_current_mss(sk, 0); 1879 cur_mss = tcp_current_mss(sk);
1898 1880
1899 /* If receiver has shrunk his window, and skb is out of 1881 /* If receiver has shrunk his window, and skb is out of
1900 * new window, do not retransmit it. The exception is the 1882 * new window, do not retransmit it. The exception is the
@@ -1908,6 +1890,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
1908 if (skb->len > cur_mss) { 1890 if (skb->len > cur_mss) {
1909 if (tcp_fragment(sk, skb, cur_mss, cur_mss)) 1891 if (tcp_fragment(sk, skb, cur_mss, cur_mss))
1910 return -ENOMEM; /* We'll try again later. */ 1892 return -ENOMEM; /* We'll try again later. */
1893 } else {
1894 tcp_init_tso_segs(sk, skb, cur_mss);
1911 } 1895 }
1912 1896
1913 tcp_retrans_try_collapse(sk, skb, cur_mss); 1897 tcp_retrans_try_collapse(sk, skb, cur_mss);
@@ -2061,7 +2045,7 @@ begin_fwd:
2061 goto begin_fwd; 2045 goto begin_fwd;
2062 2046
2063 } else if (!(sacked & TCPCB_LOST)) { 2047 } else if (!(sacked & TCPCB_LOST)) {
2064 if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS)) 2048 if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED)))
2065 hole = skb; 2049 hole = skb;
2066 continue; 2050 continue;
2067 2051
@@ -2100,7 +2084,7 @@ void tcp_send_fin(struct sock *sk)
2100 * unsent frames. But be careful about outgoing SACKS 2084 * unsent frames. But be careful about outgoing SACKS
2101 * and IP options. 2085 * and IP options.
2102 */ 2086 */
2103 mss_now = tcp_current_mss(sk, 1); 2087 mss_now = tcp_current_mss(sk);
2104 2088
2105 if (tcp_send_head(sk) != NULL) { 2089 if (tcp_send_head(sk) != NULL) {
2106 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; 2090 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
@@ -2325,7 +2309,7 @@ static void tcp_connect_init(struct sock *sk)
2325 sk->sk_err = 0; 2309 sk->sk_err = 0;
2326 sock_reset_flag(sk, SOCK_DONE); 2310 sock_reset_flag(sk, SOCK_DONE);
2327 tp->snd_wnd = 0; 2311 tp->snd_wnd = 0;
2328 tcp_init_wl(tp, tp->write_seq, 0); 2312 tcp_init_wl(tp, 0);
2329 tp->snd_una = tp->write_seq; 2313 tp->snd_una = tp->write_seq;
2330 tp->snd_sml = tp->write_seq; 2314 tp->snd_sml = tp->write_seq;
2331 tp->snd_up = tp->write_seq; 2315 tp->snd_up = tp->write_seq;
@@ -2512,7 +2496,7 @@ int tcp_write_wakeup(struct sock *sk)
2512 if ((skb = tcp_send_head(sk)) != NULL && 2496 if ((skb = tcp_send_head(sk)) != NULL &&
2513 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { 2497 before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) {
2514 int err; 2498 int err;
2515 unsigned int mss = tcp_current_mss(sk, 0); 2499 unsigned int mss = tcp_current_mss(sk);
2516 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; 2500 unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
2517 2501
2518 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) 2502 if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c
index 25524d4e372a..59f5b5e7c566 100644
--- a/net/ipv4/tcp_probe.c
+++ b/net/ipv4/tcp_probe.c
@@ -165,9 +165,10 @@ static int tcpprobe_sprint(char *tbuf, int n)
165static ssize_t tcpprobe_read(struct file *file, char __user *buf, 165static ssize_t tcpprobe_read(struct file *file, char __user *buf,
166 size_t len, loff_t *ppos) 166 size_t len, loff_t *ppos)
167{ 167{
168 int error = 0, cnt = 0; 168 int error = 0;
169 size_t cnt = 0;
169 170
170 if (!buf || len < 0) 171 if (!buf)
171 return -EINVAL; 172 return -EINVAL;
172 173
173 while (cnt < len) { 174 while (cnt < len) {
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 4660b088a8ce..a76513779e2b 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -24,14 +24,8 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
24 24
25 if (tp->snd_cwnd <= tp->snd_ssthresh) 25 if (tp->snd_cwnd <= tp->snd_ssthresh)
26 tcp_slow_start(tp); 26 tcp_slow_start(tp);
27 else { 27 else
28 tp->snd_cwnd_cnt++; 28 tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT));
29 if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){
30 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
31 tp->snd_cwnd++;
32 tp->snd_cwnd_cnt = 0;
33 }
34 }
35} 29}
36 30
37static u32 tcp_scalable_ssthresh(struct sock *sk) 31static u32 tcp_scalable_ssthresh(struct sock *sk)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 0170e914f1b0..b144a26359bc 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -328,19 +328,16 @@ static void tcp_retransmit_timer(struct sock *sk)
328 if (icsk->icsk_retransmits == 0) { 328 if (icsk->icsk_retransmits == 0) {
329 int mib_idx; 329 int mib_idx;
330 330
331 if (icsk->icsk_ca_state == TCP_CA_Disorder || 331 if (icsk->icsk_ca_state == TCP_CA_Disorder) {
332 icsk->icsk_ca_state == TCP_CA_Recovery) { 332 if (tcp_is_sack(tp))
333 if (tcp_is_sack(tp)) { 333 mib_idx = LINUX_MIB_TCPSACKFAILURES;
334 if (icsk->icsk_ca_state == TCP_CA_Recovery) 334 else
335 mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; 335 mib_idx = LINUX_MIB_TCPRENOFAILURES;
336 else 336 } else if (icsk->icsk_ca_state == TCP_CA_Recovery) {
337 mib_idx = LINUX_MIB_TCPSACKFAILURES; 337 if (tcp_is_sack(tp))
338 } else { 338 mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
339 if (icsk->icsk_ca_state == TCP_CA_Recovery) 339 else
340 mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; 340 mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
341 else
342 mib_idx = LINUX_MIB_TCPRENOFAILURES;
343 }
344 } else if (icsk->icsk_ca_state == TCP_CA_Loss) { 341 } else if (icsk->icsk_ca_state == TCP_CA_Loss) {
345 mib_idx = LINUX_MIB_TCPLOSSFAILURES; 342 mib_idx = LINUX_MIB_TCPLOSSFAILURES;
346 } else { 343 } else {
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index d08b2e855c22..e9bbff746488 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -159,12 +159,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
159 /* In the "non-congestive state", increase cwnd 159 /* In the "non-congestive state", increase cwnd
160 * every rtt. 160 * every rtt.
161 */ 161 */
162 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { 162 tcp_cong_avoid_ai(tp, tp->snd_cwnd);
163 if (tp->snd_cwnd < tp->snd_cwnd_clamp)
164 tp->snd_cwnd++;
165 tp->snd_cwnd_cnt = 0;
166 } else
167 tp->snd_cwnd_cnt++;
168 } else { 163 } else {
169 /* In the "congestive state", increase cwnd 164 /* In the "congestive state", increase cwnd
170 * every other rtt. 165 * every other rtt.
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 9ec843a9bbb2..66b6821b984e 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -94,14 +94,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
94 94
95 } else { 95 } else {
96 /* Reno */ 96 /* Reno */
97 97 tcp_cong_avoid_ai(tp, tp->snd_cwnd);
98 if (tp->snd_cwnd_cnt < tp->snd_cwnd)
99 tp->snd_cwnd_cnt++;
100
101 if (tp->snd_cwnd_cnt >= tp->snd_cwnd) {
102 tp->snd_cwnd++;
103 tp->snd_cwnd_cnt = 0;
104 }
105 } 98 }
106 99
107 /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. 100 /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt.
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index c47c989cb1fb..bda08a09357d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -222,7 +222,7 @@ fail:
222 return error; 222 return error;
223} 223}
224 224
225static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) 225int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
226{ 226{
227 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); 227 struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
228 228
@@ -596,6 +596,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
596 return -EOPNOTSUPP; 596 return -EOPNOTSUPP;
597 597
598 ipc.opt = NULL; 598 ipc.opt = NULL;
599 ipc.shtx.flags = 0;
599 600
600 if (up->pending) { 601 if (up->pending) {
601 /* 602 /*
@@ -643,6 +644,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
643 ipc.addr = inet->saddr; 644 ipc.addr = inet->saddr;
644 645
645 ipc.oif = sk->sk_bound_dev_if; 646 ipc.oif = sk->sk_bound_dev_if;
647 err = sock_tx_timestamp(msg, sk, &ipc.shtx);
648 if (err)
649 return err;
646 if (msg->msg_controllen) { 650 if (msg->msg_controllen) {
647 err = ip_cmsg_send(sock_net(sk), msg, &ipc); 651 err = ip_cmsg_send(sock_net(sk), msg, &ipc);
648 if (err) 652 if (err)
@@ -1180,7 +1184,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
1180 sk = sknext; 1184 sk = sknext;
1181 } while (sknext); 1185 } while (sknext);
1182 } else 1186 } else
1183 kfree_skb(skb); 1187 consume_skb(skb);
1184 spin_unlock(&hslot->lock); 1188 spin_unlock(&hslot->lock);
1185 return 0; 1189 return 0;
1186} 1190}
@@ -1614,7 +1618,8 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
1614 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); 1618 } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family));
1615 1619
1616 if (!sk) { 1620 if (!sk) {
1617 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); 1621 if (state->bucket < UDP_HTABLE_SIZE)
1622 spin_unlock_bh(&state->udp_table->hash[state->bucket].lock);
1618 return udp_get_first(seq, state->bucket + 1); 1623 return udp_get_first(seq, state->bucket + 1);
1619 } 1624 }
1620 return sk; 1625 return sk;
@@ -1632,6 +1637,9 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos)
1632 1637
1633static void *udp_seq_start(struct seq_file *seq, loff_t *pos) 1638static void *udp_seq_start(struct seq_file *seq, loff_t *pos)
1634{ 1639{
1640 struct udp_iter_state *state = seq->private;
1641 state->bucket = UDP_HTABLE_SIZE;
1642
1635 return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; 1643 return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN;
1636} 1644}
1637 1645
@@ -1815,6 +1823,7 @@ EXPORT_SYMBOL(udp_lib_getsockopt);
1815EXPORT_SYMBOL(udp_lib_setsockopt); 1823EXPORT_SYMBOL(udp_lib_setsockopt);
1816EXPORT_SYMBOL(udp_poll); 1824EXPORT_SYMBOL(udp_poll);
1817EXPORT_SYMBOL(udp_lib_get_port); 1825EXPORT_SYMBOL(udp_lib_get_port);
1826EXPORT_SYMBOL(ipv4_rcv_saddr_equal);
1818 1827
1819#ifdef CONFIG_PROC_FS 1828#ifdef CONFIG_PROC_FS
1820EXPORT_SYMBOL(udp_proc_register); 1829EXPORT_SYMBOL(udp_proc_register);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 2ad24ba31f9d..60d918c96a4f 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -241,7 +241,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
241 241
242static struct dst_ops xfrm4_dst_ops = { 242static struct dst_ops xfrm4_dst_ops = {
243 .family = AF_INET, 243 .family = AF_INET,
244 .protocol = __constant_htons(ETH_P_IP), 244 .protocol = cpu_to_be16(ETH_P_IP),
245 .gc = xfrm4_garbage_collect, 245 .gc = xfrm4_garbage_collect,
246 .update_pmtu = xfrm4_update_pmtu, 246 .update_pmtu = xfrm4_update_pmtu,
247 .destroy = xfrm4_dst_destroy, 247 .destroy = xfrm4_dst_destroy,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1220e2c7831e..a8218bc1806a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -40,6 +40,7 @@
40 40
41#include <linux/errno.h> 41#include <linux/errno.h>
42#include <linux/types.h> 42#include <linux/types.h>
43#include <linux/kernel.h>
43#include <linux/socket.h> 44#include <linux/socket.h>
44#include <linux/sockios.h> 45#include <linux/sockios.h>
45#include <linux/net.h> 46#include <linux/net.h>
@@ -590,6 +591,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
590{ 591{
591 struct inet6_ifaddr *ifa = NULL; 592 struct inet6_ifaddr *ifa = NULL;
592 struct rt6_info *rt; 593 struct rt6_info *rt;
594 struct net *net = dev_net(idev->dev);
593 int hash; 595 int hash;
594 int err = 0; 596 int err = 0;
595 int addr_type = ipv6_addr_type(addr); 597 int addr_type = ipv6_addr_type(addr);
@@ -606,6 +608,11 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
606 goto out2; 608 goto out2;
607 } 609 }
608 610
611 if (idev->cnf.disable_ipv6 || net->ipv6.devconf_all->disable_ipv6) {
612 err = -EACCES;
613 goto out2;
614 }
615
609 write_lock(&addrconf_hash_lock); 616 write_lock(&addrconf_hash_lock);
610 617
611 /* Ignore adding duplicate addresses on an interface */ 618 /* Ignore adding duplicate addresses on an interface */
@@ -1209,16 +1216,12 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
1209 } 1216 }
1210 break; 1217 break;
1211 } else if (minihiscore < miniscore) { 1218 } else if (minihiscore < miniscore) {
1212 struct ipv6_saddr_score *tmp;
1213
1214 if (hiscore->ifa) 1219 if (hiscore->ifa)
1215 in6_ifa_put(hiscore->ifa); 1220 in6_ifa_put(hiscore->ifa);
1216 1221
1217 in6_ifa_hold(score->ifa); 1222 in6_ifa_hold(score->ifa);
1218 1223
1219 tmp = hiscore; 1224 swap(hiscore, score);
1220 hiscore = score;
1221 score = tmp;
1222 1225
1223 /* restore our iterator */ 1226 /* restore our iterator */
1224 score->ifa = hiscore->ifa; 1227 score->ifa = hiscore->ifa;
@@ -1367,40 +1370,6 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
1367 return ifp; 1370 return ifp;
1368} 1371}
1369 1372
1370int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
1371{
1372 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
1373 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
1374 __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
1375 __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
1376 int sk_ipv6only = ipv6_only_sock(sk);
1377 int sk2_ipv6only = inet_v6_ipv6only(sk2);
1378 int addr_type = ipv6_addr_type(sk_rcv_saddr6);
1379 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
1380
1381 if (!sk2_rcv_saddr && !sk_ipv6only)
1382 return 1;
1383
1384 if (addr_type2 == IPV6_ADDR_ANY &&
1385 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
1386 return 1;
1387
1388 if (addr_type == IPV6_ADDR_ANY &&
1389 !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
1390 return 1;
1391
1392 if (sk2_rcv_saddr6 &&
1393 ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
1394 return 1;
1395
1396 if (addr_type == IPV6_ADDR_MAPPED &&
1397 !sk2_ipv6only &&
1398 (!sk2_rcv_saddr || !sk_rcv_saddr || sk_rcv_saddr == sk2_rcv_saddr))
1399 return 1;
1400
1401 return 0;
1402}
1403
1404/* Gets referenced address, destroys ifaddr */ 1373/* Gets referenced address, destroys ifaddr */
1405 1374
1406static void addrconf_dad_stop(struct inet6_ifaddr *ifp) 1375static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
@@ -1433,6 +1402,11 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp)
1433void addrconf_dad_failure(struct inet6_ifaddr *ifp) 1402void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1434{ 1403{
1435 struct inet6_dev *idev = ifp->idev; 1404 struct inet6_dev *idev = ifp->idev;
1405
1406 if (net_ratelimit())
1407 printk(KERN_INFO "%s: IPv6 duplicate address detected!\n",
1408 ifp->idev->dev->name);
1409
1436 if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { 1410 if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) {
1437 struct in6_addr addr; 1411 struct in6_addr addr;
1438 1412
@@ -1443,11 +1417,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
1443 ipv6_addr_equal(&ifp->addr, &addr)) { 1417 ipv6_addr_equal(&ifp->addr, &addr)) {
1444 /* DAD failed for link-local based on MAC address */ 1418 /* DAD failed for link-local based on MAC address */
1445 idev->cnf.disable_ipv6 = 1; 1419 idev->cnf.disable_ipv6 = 1;
1420
1421 printk(KERN_INFO "%s: IPv6 being disabled!\n",
1422 ifp->idev->dev->name);
1446 } 1423 }
1447 } 1424 }
1448 1425
1449 if (net_ratelimit())
1450 printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name);
1451 addrconf_dad_stop(ifp); 1426 addrconf_dad_stop(ifp);
1452} 1427}
1453 1428
@@ -2227,10 +2202,24 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
2227 return err; 2202 return err;
2228} 2203}
2229 2204
2205static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
2206 int plen, int scope)
2207{
2208 struct inet6_ifaddr *ifp;
2209
2210 ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT);
2211 if (!IS_ERR(ifp)) {
2212 spin_lock_bh(&ifp->lock);
2213 ifp->flags &= ~IFA_F_TENTATIVE;
2214 spin_unlock_bh(&ifp->lock);
2215 ipv6_ifa_notify(RTM_NEWADDR, ifp);
2216 in6_ifa_put(ifp);
2217 }
2218}
2219
2230#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) 2220#if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
2231static void sit_add_v4_addrs(struct inet6_dev *idev) 2221static void sit_add_v4_addrs(struct inet6_dev *idev)
2232{ 2222{
2233 struct inet6_ifaddr * ifp;
2234 struct in6_addr addr; 2223 struct in6_addr addr;
2235 struct net_device *dev; 2224 struct net_device *dev;
2236 struct net *net = dev_net(idev->dev); 2225 struct net *net = dev_net(idev->dev);
@@ -2249,14 +2238,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
2249 } 2238 }
2250 2239
2251 if (addr.s6_addr32[3]) { 2240 if (addr.s6_addr32[3]) {
2252 ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT); 2241 add_addr(idev, &addr, 128, scope);
2253 if (!IS_ERR(ifp)) {
2254 spin_lock_bh(&ifp->lock);
2255 ifp->flags &= ~IFA_F_TENTATIVE;
2256 spin_unlock_bh(&ifp->lock);
2257 ipv6_ifa_notify(RTM_NEWADDR, ifp);
2258 in6_ifa_put(ifp);
2259 }
2260 return; 2242 return;
2261 } 2243 }
2262 2244
@@ -2284,15 +2266,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
2284 else 2266 else
2285 plen = 96; 2267 plen = 96;
2286 2268
2287 ifp = ipv6_add_addr(idev, &addr, plen, flag, 2269 add_addr(idev, &addr, plen, flag);
2288 IFA_F_PERMANENT);
2289 if (!IS_ERR(ifp)) {
2290 spin_lock_bh(&ifp->lock);
2291 ifp->flags &= ~IFA_F_TENTATIVE;
2292 spin_unlock_bh(&ifp->lock);
2293 ipv6_ifa_notify(RTM_NEWADDR, ifp);
2294 in6_ifa_put(ifp);
2295 }
2296 } 2270 }
2297 } 2271 }
2298 } 2272 }
@@ -2302,7 +2276,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
2302static void init_loopback(struct net_device *dev) 2276static void init_loopback(struct net_device *dev)
2303{ 2277{
2304 struct inet6_dev *idev; 2278 struct inet6_dev *idev;
2305 struct inet6_ifaddr * ifp;
2306 2279
2307 /* ::1 */ 2280 /* ::1 */
2308 2281
@@ -2313,14 +2286,7 @@ static void init_loopback(struct net_device *dev)
2313 return; 2286 return;
2314 } 2287 }
2315 2288
2316 ifp = ipv6_add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFA_F_PERMANENT); 2289 add_addr(idev, &in6addr_loopback, 128, IFA_HOST);
2317 if (!IS_ERR(ifp)) {
2318 spin_lock_bh(&ifp->lock);
2319 ifp->flags &= ~IFA_F_TENTATIVE;
2320 spin_unlock_bh(&ifp->lock);
2321 ipv6_ifa_notify(RTM_NEWADDR, ifp);
2322 in6_ifa_put(ifp);
2323 }
2324} 2290}
2325 2291
2326static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr) 2292static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr)
@@ -2832,11 +2798,6 @@ static void addrconf_dad_timer(unsigned long data)
2832 read_unlock_bh(&idev->lock); 2798 read_unlock_bh(&idev->lock);
2833 goto out; 2799 goto out;
2834 } 2800 }
2835 if (idev->cnf.accept_dad > 1 && idev->cnf.disable_ipv6) {
2836 read_unlock_bh(&idev->lock);
2837 addrconf_dad_failure(ifp);
2838 return;
2839 }
2840 spin_lock_bh(&ifp->lock); 2801 spin_lock_bh(&ifp->lock);
2841 if (ifp->probes == 0) { 2802 if (ifp->probes == 0) {
2842 /* 2803 /*
@@ -3647,7 +3608,8 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa)
3647 kfree_skb(skb); 3608 kfree_skb(skb);
3648 goto errout; 3609 goto errout;
3649 } 3610 }
3650 err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); 3611 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
3612 return;
3651errout: 3613errout:
3652 if (err < 0) 3614 if (err < 0)
3653 rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); 3615 rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
@@ -3858,7 +3820,8 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev)
3858 kfree_skb(skb); 3820 kfree_skb(skb);
3859 goto errout; 3821 goto errout;
3860 } 3822 }
3861 err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); 3823 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC);
3824 return;
3862errout: 3825errout:
3863 if (err < 0) 3826 if (err < 0)
3864 rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); 3827 rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err);
@@ -3928,7 +3891,8 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev,
3928 kfree_skb(skb); 3891 kfree_skb(skb);
3929 goto errout; 3892 goto errout;
3930 } 3893 }
3931 err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); 3894 rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC);
3895 return;
3932errout: 3896errout:
3933 if (err < 0) 3897 if (err < 0)
3934 rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); 3898 rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index da944eca2ca6..61f55386a236 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -276,11 +276,26 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
276 276
277 /* Check if the address belongs to the host. */ 277 /* Check if the address belongs to the host. */
278 if (addr_type == IPV6_ADDR_MAPPED) { 278 if (addr_type == IPV6_ADDR_MAPPED) {
279 v4addr = addr->sin6_addr.s6_addr32[3]; 279 int chk_addr_ret;
280 if (inet_addr_type(net, v4addr) != RTN_LOCAL) { 280
281 err = -EADDRNOTAVAIL; 281 /* Binding to v4-mapped address on a v6-only socket
282 * makes no sense
283 */
284 if (np->ipv6only) {
285 err = -EINVAL;
282 goto out; 286 goto out;
283 } 287 }
288
289 /* Reproduce AF_INET checks to make the bindings consitant */
290 v4addr = addr->sin6_addr.s6_addr32[3];
291 chk_addr_ret = inet_addr_type(net, v4addr);
292 if (!sysctl_ip_nonlocal_bind &&
293 !(inet->freebind || inet->transparent) &&
294 v4addr != htonl(INADDR_ANY) &&
295 chk_addr_ret != RTN_LOCAL &&
296 chk_addr_ret != RTN_MULTICAST &&
297 chk_addr_ret != RTN_BROADCAST)
298 goto out;
284 } else { 299 } else {
285 if (addr_type != IPV6_ADDR_ANY) { 300 if (addr_type != IPV6_ADDR_ANY) {
286 struct net_device *dev = NULL; 301 struct net_device *dev = NULL;
@@ -339,8 +354,11 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
339 goto out; 354 goto out;
340 } 355 }
341 356
342 if (addr_type != IPV6_ADDR_ANY) 357 if (addr_type != IPV6_ADDR_ANY) {
343 sk->sk_userlocks |= SOCK_BINDADDR_LOCK; 358 sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
359 if (addr_type != IPV6_ADDR_MAPPED)
360 np->ipv6only = 1;
361 }
344 if (snum) 362 if (snum)
345 sk->sk_userlocks |= SOCK_BINDPORT_LOCK; 363 sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
346 inet->sport = htons(inet->num); 364 inet->sport = htons(inet->num);
@@ -803,24 +821,34 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,
803 int proto; 821 int proto;
804 __wsum csum; 822 __wsum csum;
805 823
806 if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) 824 iph = skb_gro_header(skb, sizeof(*iph));
825 if (unlikely(!iph))
807 goto out; 826 goto out;
808 827
809 iph = ipv6_hdr(skb); 828 skb_gro_pull(skb, sizeof(*iph));
810 __skb_pull(skb, sizeof(*iph)); 829 skb_set_transport_header(skb, skb_gro_offset(skb));
811 830
812 flush += ntohs(iph->payload_len) != skb->len; 831 flush += ntohs(iph->payload_len) != skb_gro_len(skb);
813 832
814 rcu_read_lock(); 833 rcu_read_lock();
815 proto = ipv6_gso_pull_exthdrs(skb, iph->nexthdr); 834 proto = iph->nexthdr;
816 iph = ipv6_hdr(skb);
817 IPV6_GRO_CB(skb)->proto = proto;
818 ops = rcu_dereference(inet6_protos[proto]); 835 ops = rcu_dereference(inet6_protos[proto]);
819 if (!ops || !ops->gro_receive) 836 if (!ops || !ops->gro_receive) {
820 goto out_unlock; 837 __pskb_pull(skb, skb_gro_offset(skb));
838 proto = ipv6_gso_pull_exthdrs(skb, proto);
839 skb_gro_pull(skb, -skb_transport_offset(skb));
840 skb_reset_transport_header(skb);
841 __skb_push(skb, skb_gro_offset(skb));
842
843 if (!ops || !ops->gro_receive)
844 goto out_unlock;
845
846 iph = ipv6_hdr(skb);
847 }
848
849 IPV6_GRO_CB(skb)->proto = proto;
821 850
822 flush--; 851 flush--;
823 skb_reset_transport_header(skb);
824 nlen = skb_network_header_len(skb); 852 nlen = skb_network_header_len(skb);
825 853
826 for (p = *head; p; p = p->next) { 854 for (p = *head; p; p = p->next) {
@@ -883,8 +911,8 @@ out_unlock:
883 return err; 911 return err;
884} 912}
885 913
886static struct packet_type ipv6_packet_type = { 914static struct packet_type ipv6_packet_type __read_mostly = {
887 .type = __constant_htons(ETH_P_IPV6), 915 .type = cpu_to_be16(ETH_P_IPV6),
888 .func = ipv6_rcv, 916 .func = ipv6_rcv,
889 .gso_send_check = ipv6_gso_send_check, 917 .gso_send_check = ipv6_gso_send_check,
890 .gso_segment = ipv6_gso_segment, 918 .gso_segment = ipv6_gso_segment,
@@ -1192,6 +1220,9 @@ module_init(inet6_init);
1192 1220
1193static void __exit inet6_exit(void) 1221static void __exit inet6_exit(void)
1194{ 1222{
1223 if (disable_ipv6)
1224 return;
1225
1195 /* First of all disallow new sockets creation. */ 1226 /* First of all disallow new sockets creation. */
1196 sock_unregister(PF_INET6); 1227 sock_unregister(PF_INET6);
1197 /* Disallow any further netlink messages */ 1228 /* Disallow any further netlink messages */
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index f171e8dbac91..8f04bd9da274 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -75,8 +75,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
75 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || 75 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL ||
76 !idev || unlikely(idev->cnf.disable_ipv6)) { 76 !idev || unlikely(idev->cnf.disable_ipv6)) {
77 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); 77 IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS);
78 rcu_read_unlock(); 78 goto drop;
79 goto out;
80 } 79 }
81 80
82 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); 81 memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
@@ -147,7 +146,6 @@ err:
147drop: 146drop:
148 rcu_read_unlock(); 147 rcu_read_unlock();
149 kfree_skb(skb); 148 kfree_skb(skb);
150out:
151 return 0; 149 return 0;
152} 150}
153 151
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 40f324655e24..d31df0f4bc9a 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -218,8 +218,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
218 if (opt) 218 if (opt)
219 sock_kfree_s(sk, opt, opt->tot_len); 219 sock_kfree_s(sk, opt, opt->tot_len);
220 pktopt = xchg(&np->pktoptions, NULL); 220 pktopt = xchg(&np->pktoptions, NULL);
221 if (pktopt) 221 kfree_skb(pktopt);
222 kfree_skb(pktopt);
223 222
224 sk->sk_destruct = inet_sock_destruct; 223 sk->sk_destruct = inet_sock_destruct;
225 /* 224 /*
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3e2970841bd8..9f061d1adbc2 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1095,11 +1095,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1095 &ipv6_hdr(ra)->saddr); 1095 &ipv6_hdr(ra)->saddr);
1096 nlmsg_end(skb, nlh); 1096 nlmsg_end(skb, nlh);
1097 1097
1098 err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, 1098 rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC);
1099 GFP_ATOMIC);
1100 if (err < 0)
1101 goto errout;
1102
1103 return; 1099 return;
1104 1100
1105nla_put_failure: 1101nla_put_failure:
@@ -1538,13 +1534,10 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1538 if (rt->rt6i_flags & RTF_GATEWAY) { 1534 if (rt->rt6i_flags & RTF_GATEWAY) {
1539 ND_PRINTK2(KERN_WARNING 1535 ND_PRINTK2(KERN_WARNING
1540 "ICMPv6 Redirect: destination is not a neighbour.\n"); 1536 "ICMPv6 Redirect: destination is not a neighbour.\n");
1541 dst_release(dst); 1537 goto release;
1542 return;
1543 }
1544 if (!xrlim_allow(dst, 1*HZ)) {
1545 dst_release(dst);
1546 return;
1547 } 1538 }
1539 if (!xrlim_allow(dst, 1*HZ))
1540 goto release;
1548 1541
1549 if (dev->addr_len) { 1542 if (dev->addr_len) {
1550 read_lock_bh(&neigh->lock); 1543 read_lock_bh(&neigh->lock);
@@ -1570,8 +1563,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1570 ND_PRINTK0(KERN_ERR 1563 ND_PRINTK0(KERN_ERR
1571 "ICMPv6 Redirect: %s() failed to allocate an skb.\n", 1564 "ICMPv6 Redirect: %s() failed to allocate an skb.\n",
1572 __func__); 1565 __func__);
1573 dst_release(dst); 1566 goto release;
1574 return;
1575 } 1567 }
1576 1568
1577 skb_reserve(buff, LL_RESERVED_SPACE(dev)); 1569 skb_reserve(buff, LL_RESERVED_SPACE(dev));
@@ -1631,6 +1623,10 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1631 1623
1632 if (likely(idev != NULL)) 1624 if (likely(idev != NULL))
1633 in6_dev_put(idev); 1625 in6_dev_put(idev);
1626 return;
1627
1628release:
1629 dst_release(dst);
1634} 1630}
1635 1631
1636static void pndisc_redo(struct sk_buff *skb) 1632static void pndisc_redo(struct sk_buff *skb)
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 53ea512c4608..29d643bcafa4 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -95,13 +95,13 @@ config IP6_NF_MATCH_OPTS
95 To compile it as a module, choose M here. If unsure, say N. 95 To compile it as a module, choose M here. If unsure, say N.
96 96
97config IP6_NF_MATCH_HL 97config IP6_NF_MATCH_HL
98 tristate '"hl" match support' 98 tristate '"hl" hoplimit match support'
99 depends on NETFILTER_ADVANCED 99 depends on NETFILTER_ADVANCED
100 help 100 select NETFILTER_XT_MATCH_HL
101 HL matching allows you to match packets based on the hop 101 ---help---
102 limit of the packet. 102 This is a backwards-compat option for the user's convenience
103 103 (e.g. when running oldconfig). It selects
104 To compile it as a module, choose M here. If unsure, say N. 104 CONFIG_NETFILTER_XT_MATCH_HL.
105 105
106config IP6_NF_MATCH_IPV6HEADER 106config IP6_NF_MATCH_IPV6HEADER
107 tristate '"ipv6header" IPv6 Extension Headers Match' 107 tristate '"ipv6header" IPv6 Extension Headers Match'
@@ -130,6 +130,15 @@ config IP6_NF_MATCH_RT
130 To compile it as a module, choose M here. If unsure, say N. 130 To compile it as a module, choose M here. If unsure, say N.
131 131
132# The targets 132# The targets
133config IP6_NF_TARGET_HL
134 tristate '"HL" hoplimit target support'
135 depends on NETFILTER_ADVANCED
136 select NETFILTER_XT_TARGET_HL
137 ---help---
138 This is a backwards-compat option for the user's convenience
139 (e.g. when running oldconfig). It selects
140 CONFIG_NETFILTER_XT_TARGET_HL.
141
133config IP6_NF_TARGET_LOG 142config IP6_NF_TARGET_LOG
134 tristate "LOG target support" 143 tristate "LOG target support"
135 default m if NETFILTER_ADVANCED=n 144 default m if NETFILTER_ADVANCED=n
@@ -170,23 +179,6 @@ config IP6_NF_MANGLE
170 179
171 To compile it as a module, choose M here. If unsure, say N. 180 To compile it as a module, choose M here. If unsure, say N.
172 181
173config IP6_NF_TARGET_HL
174 tristate 'HL (hoplimit) target support'
175 depends on IP6_NF_MANGLE
176 depends on NETFILTER_ADVANCED
177 help
178 This option adds a `HL' target, which enables the user to decrement
179 the hoplimit value of the IPv6 header or set it to a given (lower)
180 value.
181
182 While it is safe to decrement the hoplimit value, this option also
183 enables functionality to increment and set the hoplimit value of the
184 IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since
185 you can easily create immortal packets that loop forever on the
186 network.
187
188 To compile it as a module, choose M here. If unsure, say N.
189
190config IP6_NF_RAW 182config IP6_NF_RAW
191 tristate 'raw table support (required for TRACE)' 183 tristate 'raw table support (required for TRACE)'
192 depends on NETFILTER_ADVANCED 184 depends on NETFILTER_ADVANCED
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 3f17c948eefb..aafbba30c899 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -20,13 +20,11 @@ obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
20obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o 20obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
21obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o 21obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
22obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o 22obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o
23obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o
24obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o 23obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o
25obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o 24obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
26obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o 25obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
27obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o 26obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
28 27
29# targets 28# targets
30obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o
31obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o 29obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o
32obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o 30obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 5859c046cbc4..b693f841aeb4 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -643,6 +643,7 @@ static void __exit ip6_queue_fini(void)
643 643
644MODULE_DESCRIPTION("IPv6 packet queue handler"); 644MODULE_DESCRIPTION("IPv6 packet queue handler");
645MODULE_LICENSE("GPL"); 645MODULE_LICENSE("GPL");
646MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW);
646 647
647module_init(ip6_queue_init); 648module_init(ip6_queue_init);
648module_exit(ip6_queue_fini); 649module_exit(ip6_queue_fini);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index a33485dc81cb..e89cfa3a8f25 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -99,7 +99,6 @@ ip6_packet_match(const struct sk_buff *skb,
99 unsigned int *protoff, 99 unsigned int *protoff,
100 int *fragoff, bool *hotdrop) 100 int *fragoff, bool *hotdrop)
101{ 101{
102 size_t i;
103 unsigned long ret; 102 unsigned long ret;
104 const struct ipv6hdr *ipv6 = ipv6_hdr(skb); 103 const struct ipv6hdr *ipv6 = ipv6_hdr(skb);
105 104
@@ -120,12 +119,7 @@ ip6_packet_match(const struct sk_buff *skb,
120 return false; 119 return false;
121 } 120 }
122 121
123 /* Look for ifname matches; this should unroll nicely. */ 122 ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask);
124 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
125 ret |= (((const unsigned long *)indev)[i]
126 ^ ((const unsigned long *)ip6info->iniface)[i])
127 & ((const unsigned long *)ip6info->iniface_mask)[i];
128 }
129 123
130 if (FWINV(ret != 0, IP6T_INV_VIA_IN)) { 124 if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
131 dprintf("VIA in mismatch (%s vs %s).%s\n", 125 dprintf("VIA in mismatch (%s vs %s).%s\n",
@@ -134,11 +128,7 @@ ip6_packet_match(const struct sk_buff *skb,
134 return false; 128 return false;
135 } 129 }
136 130
137 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { 131 ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask);
138 ret |= (((const unsigned long *)outdev)[i]
139 ^ ((const unsigned long *)ip6info->outiface)[i])
140 & ((const unsigned long *)ip6info->outiface_mask)[i];
141 }
142 132
143 if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) { 133 if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
144 dprintf("VIA out mismatch (%s vs %s).%s\n", 134 dprintf("VIA out mismatch (%s vs %s).%s\n",
@@ -373,10 +363,12 @@ ip6t_do_table(struct sk_buff *skb,
373 mtpar.family = tgpar.family = NFPROTO_IPV6; 363 mtpar.family = tgpar.family = NFPROTO_IPV6;
374 tgpar.hooknum = hook; 364 tgpar.hooknum = hook;
375 365
376 read_lock_bh(&table->lock);
377 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 366 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
378 private = table->private; 367
379 table_base = (void *)private->entries[smp_processor_id()]; 368 rcu_read_lock();
369 private = rcu_dereference(table->private);
370 table_base = rcu_dereference(private->entries[smp_processor_id()]);
371
380 e = get_entry(table_base, private->hook_entry[hook]); 372 e = get_entry(table_base, private->hook_entry[hook]);
381 373
382 /* For return from builtin chain */ 374 /* For return from builtin chain */
@@ -474,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb,
474#ifdef CONFIG_NETFILTER_DEBUG 466#ifdef CONFIG_NETFILTER_DEBUG
475 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; 467 ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
476#endif 468#endif
477 read_unlock_bh(&table->lock); 469 rcu_read_unlock();
478 470
479#ifdef DEBUG_ALLOW_ALL 471#ifdef DEBUG_ALLOW_ALL
480 return NF_ACCEPT; 472 return NF_ACCEPT;
@@ -525,7 +517,9 @@ mark_source_chains(struct xt_table_info *newinfo,
525 && unconditional(&e->ipv6)) || visited) { 517 && unconditional(&e->ipv6)) || visited) {
526 unsigned int oldpos, size; 518 unsigned int oldpos, size;
527 519
528 if (t->verdict < -NF_MAX_VERDICT - 1) { 520 if ((strcmp(t->target.u.user.name,
521 IP6T_STANDARD_TARGET) == 0) &&
522 t->verdict < -NF_MAX_VERDICT - 1) {
529 duprintf("mark_source_chains: bad " 523 duprintf("mark_source_chains: bad "
530 "negative verdict (%i)\n", 524 "negative verdict (%i)\n",
531 t->verdict); 525 t->verdict);
@@ -955,11 +949,64 @@ get_counters(const struct xt_table_info *t,
955 } 949 }
956} 950}
957 951
952/* We're lazy, and add to the first CPU; overflow works its fey magic
953 * and everything is OK. */
954static int
955add_counter_to_entry(struct ip6t_entry *e,
956 const struct xt_counters addme[],
957 unsigned int *i)
958{
959 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
960
961 (*i)++;
962 return 0;
963}
964
965/* Take values from counters and add them back onto the current cpu */
966static void put_counters(struct xt_table_info *t,
967 const struct xt_counters counters[])
968{
969 unsigned int i, cpu;
970
971 local_bh_disable();
972 cpu = smp_processor_id();
973 i = 0;
974 IP6T_ENTRY_ITERATE(t->entries[cpu],
975 t->size,
976 add_counter_to_entry,
977 counters,
978 &i);
979 local_bh_enable();
980}
981
982static inline int
983zero_entry_counter(struct ip6t_entry *e, void *arg)
984{
985 e->counters.bcnt = 0;
986 e->counters.pcnt = 0;
987 return 0;
988}
989
990static void
991clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
992{
993 unsigned int cpu;
994 const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
995
996 memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
997 for_each_possible_cpu(cpu) {
998 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
999 IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
1000 zero_entry_counter, NULL);
1001 }
1002}
1003
958static struct xt_counters *alloc_counters(struct xt_table *table) 1004static struct xt_counters *alloc_counters(struct xt_table *table)
959{ 1005{
960 unsigned int countersize; 1006 unsigned int countersize;
961 struct xt_counters *counters; 1007 struct xt_counters *counters;
962 const struct xt_table_info *private = table->private; 1008 struct xt_table_info *private = table->private;
1009 struct xt_table_info *info;
963 1010
964 /* We need atomic snapshot of counters: rest doesn't change 1011 /* We need atomic snapshot of counters: rest doesn't change
965 (other than comefrom, which userspace doesn't care 1012 (other than comefrom, which userspace doesn't care
@@ -968,14 +1015,28 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
968 counters = vmalloc_node(countersize, numa_node_id()); 1015 counters = vmalloc_node(countersize, numa_node_id());
969 1016
970 if (counters == NULL) 1017 if (counters == NULL)
971 return ERR_PTR(-ENOMEM); 1018 goto nomem;
972 1019
973 /* First, sum counters... */ 1020 info = xt_alloc_table_info(private->size);
974 write_lock_bh(&table->lock); 1021 if (!info)
975 get_counters(private, counters); 1022 goto free_counters;
976 write_unlock_bh(&table->lock); 1023
1024 clone_counters(info, private);
1025
1026 mutex_lock(&table->lock);
1027 xt_table_entry_swap_rcu(private, info);
1028 synchronize_net(); /* Wait until smoke has cleared */
1029
1030 get_counters(info, counters);
1031 put_counters(private, counters);
1032 mutex_unlock(&table->lock);
977 1033
978 return counters; 1034 xt_free_table_info(info);
1035
1036 free_counters:
1037 vfree(counters);
1038 nomem:
1039 return ERR_PTR(-ENOMEM);
979} 1040}
980 1041
981static int 1042static int
@@ -1342,28 +1403,6 @@ do_replace(struct net *net, void __user *user, unsigned int len)
1342 return ret; 1403 return ret;
1343} 1404}
1344 1405
1345/* We're lazy, and add to the first CPU; overflow works its fey magic
1346 * and everything is OK. */
1347static inline int
1348add_counter_to_entry(struct ip6t_entry *e,
1349 const struct xt_counters addme[],
1350 unsigned int *i)
1351{
1352#if 0
1353 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1354 *i,
1355 (long unsigned int)e->counters.pcnt,
1356 (long unsigned int)e->counters.bcnt,
1357 (long unsigned int)addme[*i].pcnt,
1358 (long unsigned int)addme[*i].bcnt);
1359#endif
1360
1361 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1362
1363 (*i)++;
1364 return 0;
1365}
1366
1367static int 1406static int
1368do_add_counters(struct net *net, void __user *user, unsigned int len, 1407do_add_counters(struct net *net, void __user *user, unsigned int len,
1369 int compat) 1408 int compat)
@@ -1424,13 +1463,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
1424 goto free; 1463 goto free;
1425 } 1464 }
1426 1465
1427 write_lock_bh(&t->lock); 1466 mutex_lock(&t->lock);
1428 private = t->private; 1467 private = t->private;
1429 if (private->number != num_counters) { 1468 if (private->number != num_counters) {
1430 ret = -EINVAL; 1469 ret = -EINVAL;
1431 goto unlock_up_free; 1470 goto unlock_up_free;
1432 } 1471 }
1433 1472
1473 preempt_disable();
1434 i = 0; 1474 i = 0;
1435 /* Choose the copy that is on our node */ 1475 /* Choose the copy that is on our node */
1436 loc_cpu_entry = private->entries[raw_smp_processor_id()]; 1476 loc_cpu_entry = private->entries[raw_smp_processor_id()];
@@ -1439,8 +1479,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len,
1439 add_counter_to_entry, 1479 add_counter_to_entry,
1440 paddc, 1480 paddc,
1441 &i); 1481 &i);
1482 preempt_enable();
1442 unlock_up_free: 1483 unlock_up_free:
1443 write_unlock_bh(&t->lock); 1484 mutex_unlock(&t->lock);
1444 xt_table_unlock(t); 1485 xt_table_unlock(t);
1445 module_put(t->me); 1486 module_put(t->me);
1446 free: 1487 free:
diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c
deleted file mode 100644
index 27b5adf670a2..000000000000
--- a/net/ipv6/netfilter/ip6t_HL.c
+++ /dev/null
@@ -1,95 +0,0 @@
1/*
2 * Hop Limit modification target for ip6tables
3 * Maciej Soltysiak <solt@dns.toxicfilms.tv>
4 * Based on HW's TTL module
5 *
6 * This software is distributed under the terms of GNU GPL
7 */
8
9#include <linux/module.h>
10#include <linux/skbuff.h>
11#include <linux/ip.h>
12#include <linux/ipv6.h>
13
14#include <linux/netfilter/x_tables.h>
15#include <linux/netfilter_ipv6/ip6t_HL.h>
16
17MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
18MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field modification target");
19MODULE_LICENSE("GPL");
20
21static unsigned int
22hl_tg6(struct sk_buff *skb, const struct xt_target_param *par)
23{
24 struct ipv6hdr *ip6h;
25 const struct ip6t_HL_info *info = par->targinfo;
26 int new_hl;
27
28 if (!skb_make_writable(skb, skb->len))
29 return NF_DROP;
30
31 ip6h = ipv6_hdr(skb);
32
33 switch (info->mode) {
34 case IP6T_HL_SET:
35 new_hl = info->hop_limit;
36 break;
37 case IP6T_HL_INC:
38 new_hl = ip6h->hop_limit + info->hop_limit;
39 if (new_hl > 255)
40 new_hl = 255;
41 break;
42 case IP6T_HL_DEC:
43 new_hl = ip6h->hop_limit - info->hop_limit;
44 if (new_hl < 0)
45 new_hl = 0;
46 break;
47 default:
48 new_hl = ip6h->hop_limit;
49 break;
50 }
51
52 ip6h->hop_limit = new_hl;
53
54 return XT_CONTINUE;
55}
56
57static bool hl_tg6_check(const struct xt_tgchk_param *par)
58{
59 const struct ip6t_HL_info *info = par->targinfo;
60
61 if (info->mode > IP6T_HL_MAXMODE) {
62 printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n",
63 info->mode);
64 return false;
65 }
66 if (info->mode != IP6T_HL_SET && info->hop_limit == 0) {
67 printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't "
68 "make sense with value 0\n");
69 return false;
70 }
71 return true;
72}
73
74static struct xt_target hl_tg6_reg __read_mostly = {
75 .name = "HL",
76 .family = NFPROTO_IPV6,
77 .target = hl_tg6,
78 .targetsize = sizeof(struct ip6t_HL_info),
79 .table = "mangle",
80 .checkentry = hl_tg6_check,
81 .me = THIS_MODULE
82};
83
84static int __init hl_tg6_init(void)
85{
86 return xt_register_target(&hl_tg6_reg);
87}
88
89static void __exit hl_tg6_exit(void)
90{
91 xt_unregister_target(&hl_tg6_reg);
92}
93
94module_init(hl_tg6_init);
95module_exit(hl_tg6_exit);
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 37adf5abc51e..7018cac4fddc 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -477,7 +477,7 @@ static struct xt_target log_tg6_reg __read_mostly = {
477 .me = THIS_MODULE, 477 .me = THIS_MODULE,
478}; 478};
479 479
480static const struct nf_logger ip6t_logger = { 480static struct nf_logger ip6t_logger __read_mostly = {
481 .name = "ip6t_LOG", 481 .name = "ip6t_LOG",
482 .logfn = &ip6t_log_packet, 482 .logfn = &ip6t_log_packet,
483 .me = THIS_MODULE, 483 .me = THIS_MODULE,
diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c
deleted file mode 100644
index c964dca1132d..000000000000
--- a/net/ipv6/netfilter/ip6t_hl.c
+++ /dev/null
@@ -1,68 +0,0 @@
1/* Hop Limit matching module */
2
3/* (C) 2001-2002 Maciej Soltysiak <solt@dns.toxicfilms.tv>
4 * Based on HW's ttl module
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/ipv6.h>
12#include <linux/module.h>
13#include <linux/skbuff.h>
14
15#include <linux/netfilter_ipv6/ip6t_hl.h>
16#include <linux/netfilter/x_tables.h>
17
18MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
19MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field match");
20MODULE_LICENSE("GPL");
21
22static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
23{
24 const struct ip6t_hl_info *info = par->matchinfo;
25 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
26
27 switch (info->mode) {
28 case IP6T_HL_EQ:
29 return ip6h->hop_limit == info->hop_limit;
30 break;
31 case IP6T_HL_NE:
32 return ip6h->hop_limit != info->hop_limit;
33 break;
34 case IP6T_HL_LT:
35 return ip6h->hop_limit < info->hop_limit;
36 break;
37 case IP6T_HL_GT:
38 return ip6h->hop_limit > info->hop_limit;
39 break;
40 default:
41 printk(KERN_WARNING "ip6t_hl: unknown mode %d\n",
42 info->mode);
43 return false;
44 }
45
46 return false;
47}
48
49static struct xt_match hl_mt6_reg __read_mostly = {
50 .name = "hl",
51 .family = NFPROTO_IPV6,
52 .match = hl_mt6,
53 .matchsize = sizeof(struct ip6t_hl_info),
54 .me = THIS_MODULE,
55};
56
57static int __init hl_mt6_init(void)
58{
59 return xt_register_match(&hl_mt6_reg);
60}
61
62static void __exit hl_mt6_exit(void)
63{
64 xt_unregister_match(&hl_mt6_reg);
65}
66
67module_init(hl_mt6_init);
68module_exit(hl_mt6_exit);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 40d2e36d8fac..ef5a0a32bf8e 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -54,7 +54,6 @@ static struct
54static struct xt_table packet_filter = { 54static struct xt_table packet_filter = {
55 .name = "filter", 55 .name = "filter",
56 .valid_hooks = FILTER_VALID_HOOKS, 56 .valid_hooks = FILTER_VALID_HOOKS,
57 .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
58 .me = THIS_MODULE, 57 .me = THIS_MODULE,
59 .af = AF_INET6, 58 .af = AF_INET6,
60}; 59};
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index d0b31b259d4d..ab0d398a2ba7 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -60,7 +60,6 @@ static struct
60static struct xt_table packet_mangler = { 60static struct xt_table packet_mangler = {
61 .name = "mangle", 61 .name = "mangle",
62 .valid_hooks = MANGLE_VALID_HOOKS, 62 .valid_hooks = MANGLE_VALID_HOOKS,
63 .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock),
64 .me = THIS_MODULE, 63 .me = THIS_MODULE,
65 .af = AF_INET6, 64 .af = AF_INET6,
66}; 65};
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 109fab6f831a..4b792b6ca321 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -38,7 +38,6 @@ static struct
38static struct xt_table packet_raw = { 38static struct xt_table packet_raw = {
39 .name = "raw", 39 .name = "raw",
40 .valid_hooks = RAW_VALID_HOOKS, 40 .valid_hooks = RAW_VALID_HOOKS,
41 .lock = __RW_LOCK_UNLOCKED(packet_raw.lock),
42 .me = THIS_MODULE, 41 .me = THIS_MODULE,
43 .af = AF_INET6, 42 .af = AF_INET6,
44}; 43};
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 20bc52f13e43..0ea37ff15d56 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -59,7 +59,6 @@ static struct
59static struct xt_table security_table = { 59static struct xt_table security_table = {
60 .name = "security", 60 .name = "security",
61 .valid_hooks = SECURITY_VALID_HOOKS, 61 .valid_hooks = SECURITY_VALID_HOOKS,
62 .lock = __RW_LOCK_UNLOCKED(security_table.lock),
63 .me = THIS_MODULE, 62 .me = THIS_MODULE,
64 .af = AF_INET6, 63 .af = AF_INET6,
65}; 64};
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 727b9530448a..2a15c2d66c69 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -26,6 +26,7 @@
26#include <net/netfilter/nf_conntrack_l4proto.h> 26#include <net/netfilter/nf_conntrack_l4proto.h>
27#include <net/netfilter/nf_conntrack_l3proto.h> 27#include <net/netfilter/nf_conntrack_l3proto.h>
28#include <net/netfilter/nf_conntrack_core.h> 28#include <net/netfilter/nf_conntrack_core.h>
29#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
29 30
30static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 31static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
31 struct nf_conntrack_tuple *tuple) 32 struct nf_conntrack_tuple *tuple)
@@ -341,6 +342,11 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
341 342
342 return 0; 343 return 0;
343} 344}
345
346static int ipv6_nlattr_tuple_size(void)
347{
348 return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1);
349}
344#endif 350#endif
345 351
346struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { 352struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
@@ -352,6 +358,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
352 .get_l4proto = ipv6_get_l4proto, 358 .get_l4proto = ipv6_get_l4proto,
353#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 359#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
354 .tuple_to_nlattr = ipv6_tuple_to_nlattr, 360 .tuple_to_nlattr = ipv6_tuple_to_nlattr,
361 .nlattr_tuple_size = ipv6_nlattr_tuple_size,
355 .nlattr_to_tuple = ipv6_nlattr_to_tuple, 362 .nlattr_to_tuple = ipv6_nlattr_to_tuple,
356 .nla_policy = ipv6_nla_policy, 363 .nla_policy = ipv6_nla_policy,
357#endif 364#endif
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 72dbb6d1a6b3..9903227bf37c 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -126,6 +126,10 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb,
126 pr_debug("icmpv6: can't create new conn with type %u\n", 126 pr_debug("icmpv6: can't create new conn with type %u\n",
127 type + 128); 127 type + 128);
128 nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); 128 nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple);
129 if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6))
130 nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL,
131 "nf_ct_icmpv6: invalid new with type %d ",
132 type + 128);
129 return false; 133 return false;
130 } 134 }
131 atomic_set(&ct->proto.icmp.count, 0); 135 atomic_set(&ct->proto.icmp.count, 0);
@@ -265,6 +269,11 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
265 269
266 return 0; 270 return 0;
267} 271}
272
273static int icmpv6_nlattr_tuple_size(void)
274{
275 return nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1);
276}
268#endif 277#endif
269 278
270#ifdef CONFIG_SYSCTL 279#ifdef CONFIG_SYSCTL
@@ -296,6 +305,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
296 .error = icmpv6_error, 305 .error = icmpv6_error,
297#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 306#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
298 .tuple_to_nlattr = icmpv6_tuple_to_nlattr, 307 .tuple_to_nlattr = icmpv6_tuple_to_nlattr,
308 .nlattr_tuple_size = icmpv6_nlattr_tuple_size,
299 .nlattr_to_tuple = icmpv6_nlattr_to_tuple, 309 .nlattr_to_tuple = icmpv6_nlattr_to_tuple,
300 .nla_policy = icmpv6_nla_policy, 310 .nla_policy = icmpv6_nla_policy,
301#endif 311#endif
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index ed4d79a9e4a6..058a5e4a60c3 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -528,14 +528,14 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
528 if (!ipv6_ext_hdr(nexthdr)) { 528 if (!ipv6_ext_hdr(nexthdr)) {
529 return -1; 529 return -1;
530 } 530 }
531 if (len < (int)sizeof(struct ipv6_opt_hdr)) {
532 pr_debug("too short\n");
533 return -1;
534 }
535 if (nexthdr == NEXTHDR_NONE) { 531 if (nexthdr == NEXTHDR_NONE) {
536 pr_debug("next header is none\n"); 532 pr_debug("next header is none\n");
537 return -1; 533 return -1;
538 } 534 }
535 if (len < (int)sizeof(struct ipv6_opt_hdr)) {
536 pr_debug("too short\n");
537 return -1;
538 }
539 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) 539 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
540 BUG(); 540 BUG();
541 if (nexthdr == NEXTHDR_AUTH) 541 if (nexthdr == NEXTHDR_AUTH)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 3c575118fca5..e9ac7a12f595 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -452,6 +452,7 @@ err:
452static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, 452static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
453 struct net_device *dev) 453 struct net_device *dev)
454{ 454{
455 struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
455 struct sk_buff *fp, *head = fq->q.fragments; 456 struct sk_buff *fp, *head = fq->q.fragments;
456 int payload_len; 457 int payload_len;
457 unsigned int nhoff; 458 unsigned int nhoff;
@@ -551,8 +552,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
551 head->csum); 552 head->csum);
552 553
553 rcu_read_lock(); 554 rcu_read_lock();
554 IP6_INC_STATS_BH(dev_net(dev), 555 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
555 __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
556 rcu_read_unlock(); 556 rcu_read_unlock();
557 fq->q.fragments = NULL; 557 fq->q.fragments = NULL;
558 return 1; 558 return 1;
@@ -566,8 +566,7 @@ out_oom:
566 printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); 566 printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n");
567out_fail: 567out_fail:
568 rcu_read_lock(); 568 rcu_read_lock();
569 IP6_INC_STATS_BH(dev_net(dev), 569 IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
570 __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
571 rcu_read_unlock(); 570 rcu_read_unlock();
572 return -1; 571 return -1;
573} 572}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 9c574235c905..1394ddb6e35c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -98,7 +98,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
98 98
99static struct dst_ops ip6_dst_ops_template = { 99static struct dst_ops ip6_dst_ops_template = {
100 .family = AF_INET6, 100 .family = AF_INET6,
101 .protocol = __constant_htons(ETH_P_IPV6), 101 .protocol = cpu_to_be16(ETH_P_IPV6),
102 .gc = ip6_dst_gc, 102 .gc = ip6_dst_gc,
103 .gc_thresh = 1024, 103 .gc_thresh = 1024,
104 .check = ip6_dst_check, 104 .check = ip6_dst_check,
@@ -117,7 +117,7 @@ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
117 117
118static struct dst_ops ip6_dst_blackhole_ops = { 118static struct dst_ops ip6_dst_blackhole_ops = {
119 .family = AF_INET6, 119 .family = AF_INET6,
120 .protocol = __constant_htons(ETH_P_IPV6), 120 .protocol = cpu_to_be16(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy, 121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check, 122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu, 123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
@@ -2400,8 +2400,9 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2400 kfree_skb(skb); 2400 kfree_skb(skb);
2401 goto errout; 2401 goto errout;
2402 } 2402 }
2403 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, 2403 rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2404 info->nlh, gfp_any()); 2404 info->nlh, gfp_any());
2405 return;
2405errout: 2406errout:
2406 if (err < 0) 2407 if (err < 0)
2407 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); 2408 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index d3467e563f02..664ab82e03b2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -188,9 +188,9 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
188 } 188 }
189 189
190 nt = netdev_priv(dev); 190 nt = netdev_priv(dev);
191 ipip6_tunnel_init(dev);
192 191
193 nt->parms = *parms; 192 nt->parms = *parms;
193 ipip6_tunnel_init(dev);
194 194
195 if (parms->i_flags & SIT_ISATAP) 195 if (parms->i_flags & SIT_ISATAP)
196 dev->priv_flags |= IFF_ISATAP; 196 dev->priv_flags |= IFF_ISATAP;
@@ -454,7 +454,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
454 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) 454 if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
455 goto out; 455 goto out;
456 456
457 if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) 457 if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
458 t->err_count++; 458 t->err_count++;
459 else 459 else
460 t->err_count = 1; 460 t->err_count = 1;
@@ -658,7 +658,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
658 } 658 }
659 659
660 if (tunnel->err_count > 0) { 660 if (tunnel->err_count > 0) {
661 if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { 661 if (time_before(jiffies,
662 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
662 tunnel->err_count--; 663 tunnel->err_count--;
663 dst_link_failure(skb); 664 dst_link_failure(skb);
664 } else 665 } else
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e5b85d45bee8..4b5aa1854260 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -533,8 +533,7 @@ static inline void syn_flood_warning(struct sk_buff *skb)
533 533
534static void tcp_v6_reqsk_destructor(struct request_sock *req) 534static void tcp_v6_reqsk_destructor(struct request_sock *req)
535{ 535{
536 if (inet6_rsk(req)->pktopts) 536 kfree_skb(inet6_rsk(req)->pktopts);
537 kfree_skb(inet6_rsk(req)->pktopts);
538} 537}
539 538
540#ifdef CONFIG_TCP_MD5SIG 539#ifdef CONFIG_TCP_MD5SIG
@@ -948,7 +947,7 @@ struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb)
948 947
949 switch (skb->ip_summed) { 948 switch (skb->ip_summed) {
950 case CHECKSUM_COMPLETE: 949 case CHECKSUM_COMPLETE:
951 if (!tcp_v6_check(skb->len, &iph->saddr, &iph->daddr, 950 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
952 skb->csum)) { 951 skb->csum)) {
953 skb->ip_summed = CHECKSUM_UNNECESSARY; 952 skb->ip_summed = CHECKSUM_UNNECESSARY;
954 break; 953 break;
@@ -1611,8 +1610,7 @@ ipv6_pktoptions:
1611 } 1610 }
1612 } 1611 }
1613 1612
1614 if (opt_skb) 1613 kfree_skb(opt_skb);
1615 kfree_skb(opt_skb);
1616 return 0; 1614 return 0;
1617} 1615}
1618 1616
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 84b1a296eecb..6842dd2edd5b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -49,6 +49,34 @@
49#include <linux/seq_file.h> 49#include <linux/seq_file.h>
50#include "udp_impl.h" 50#include "udp_impl.h"
51 51
52int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
53{
54 const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
55 const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
56 int sk_ipv6only = ipv6_only_sock(sk);
57 int sk2_ipv6only = inet_v6_ipv6only(sk2);
58 int addr_type = ipv6_addr_type(sk_rcv_saddr6);
59 int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;
60
61 /* if both are mapped, treat as IPv4 */
62 if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)
63 return ipv4_rcv_saddr_equal(sk, sk2);
64
65 if (addr_type2 == IPV6_ADDR_ANY &&
66 !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED))
67 return 1;
68
69 if (addr_type == IPV6_ADDR_ANY &&
70 !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED))
71 return 1;
72
73 if (sk2_rcv_saddr6 &&
74 ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6))
75 return 1;
76
77 return 0;
78}
79
52int udp_v6_get_port(struct sock *sk, unsigned short snum) 80int udp_v6_get_port(struct sock *sk, unsigned short snum)
53{ 81{
54 return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); 82 return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 97ab068e8ccc..b4b16a43f277 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -272,7 +272,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
272 272
273static struct dst_ops xfrm6_dst_ops = { 273static struct dst_ops xfrm6_dst_ops = {
274 .family = AF_INET6, 274 .family = AF_INET6,
275 .protocol = __constant_htons(ETH_P_IPV6), 275 .protocol = cpu_to_be16(ETH_P_IPV6),
276 .gc = xfrm6_garbage_collect, 276 .gc = xfrm6_garbage_collect,
277 .update_pmtu = xfrm6_update_pmtu, 277 .update_pmtu = xfrm6_update_pmtu,
278 .destroy = xfrm6_dst_destroy, 278 .destroy = xfrm6_dst_destroy,
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 0e685b05496e..f417b77fa0e1 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -69,7 +69,7 @@ __xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass)
69 69
70 for (i = 0; i < n; i++) { 70 for (i = 0; i < n; i++) {
71 dst[count[class[i] - 1]++] = src[i]; 71 dst[count[class[i] - 1]++] = src[i];
72 src[i] = 0; 72 src[i] = NULL;
73 } 73 }
74 74
75 return 0; 75 return 0;
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index b6e70f92e7fb..1627050e29fd 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1958,13 +1958,13 @@ static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = {
1958 1958
1959SOCKOPS_WRAP(ipx_dgram, PF_IPX); 1959SOCKOPS_WRAP(ipx_dgram, PF_IPX);
1960 1960
1961static struct packet_type ipx_8023_packet_type = { 1961static struct packet_type ipx_8023_packet_type __read_mostly = {
1962 .type = __constant_htons(ETH_P_802_3), 1962 .type = cpu_to_be16(ETH_P_802_3),
1963 .func = ipx_rcv, 1963 .func = ipx_rcv,
1964}; 1964};
1965 1965
1966static struct packet_type ipx_dix_packet_type = { 1966static struct packet_type ipx_dix_packet_type __read_mostly = {
1967 .type = __constant_htons(ETH_P_IPX), 1967 .type = cpu_to_be16(ETH_P_IPX),
1968 .func = ipx_rcv, 1968 .func = ipx_rcv,
1969}; 1969};
1970 1970
@@ -1975,15 +1975,15 @@ static struct notifier_block ipx_dev_notifier = {
1975extern struct datalink_proto *make_EII_client(void); 1975extern struct datalink_proto *make_EII_client(void);
1976extern void destroy_EII_client(struct datalink_proto *); 1976extern void destroy_EII_client(struct datalink_proto *);
1977 1977
1978static unsigned char ipx_8022_type = 0xE0; 1978static const unsigned char ipx_8022_type = 0xE0;
1979static unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; 1979static const unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 };
1980static char ipx_EII_err_msg[] __initdata = 1980static const char ipx_EII_err_msg[] __initconst =
1981 KERN_CRIT "IPX: Unable to register with Ethernet II\n"; 1981 KERN_CRIT "IPX: Unable to register with Ethernet II\n";
1982static char ipx_8023_err_msg[] __initdata = 1982static const char ipx_8023_err_msg[] __initconst =
1983 KERN_CRIT "IPX: Unable to register with 802.3\n"; 1983 KERN_CRIT "IPX: Unable to register with 802.3\n";
1984static char ipx_llc_err_msg[] __initdata = 1984static const char ipx_llc_err_msg[] __initconst =
1985 KERN_CRIT "IPX: Unable to register with 802.2\n"; 1985 KERN_CRIT "IPX: Unable to register with 802.2\n";
1986static char ipx_snap_err_msg[] __initdata = 1986static const char ipx_snap_err_msg[] __initconst =
1987 KERN_CRIT "IPX: Unable to register with SNAP\n"; 1987 KERN_CRIT "IPX: Unable to register with SNAP\n";
1988 1988
1989static int __init ipx_init(void) 1989static int __init ipx_init(void)
diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c
index ea319e3ddc18..bf92e1473447 100644
--- a/net/irda/irda_device.c
+++ b/net/irda/irda_device.c
@@ -149,13 +149,14 @@ int irda_device_is_receiving(struct net_device *dev)
149 149
150 IRDA_DEBUG(2, "%s()\n", __func__); 150 IRDA_DEBUG(2, "%s()\n", __func__);
151 151
152 if (!dev->do_ioctl) { 152 if (!dev->netdev_ops->ndo_do_ioctl) {
153 IRDA_ERROR("%s: do_ioctl not impl. by device driver\n", 153 IRDA_ERROR("%s: do_ioctl not impl. by device driver\n",
154 __func__); 154 __func__);
155 return -1; 155 return -1;
156 } 156 }
157 157
158 ret = dev->do_ioctl(dev, (struct ifreq *) &req, SIOCGRECEIVING); 158 ret = (dev->netdev_ops->ndo_do_ioctl)(dev, (struct ifreq *) &req,
159 SIOCGRECEIVING);
159 if (ret < 0) 160 if (ret < 0)
160 return ret; 161 return ret;
161 162
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 05112be99569..724bcf951b80 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -45,6 +45,16 @@ static int irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev);
45static void irlan_eth_set_multicast_list( struct net_device *dev); 45static void irlan_eth_set_multicast_list( struct net_device *dev);
46static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev); 46static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev);
47 47
48static const struct net_device_ops irlan_eth_netdev_ops = {
49 .ndo_open = irlan_eth_open,
50 .ndo_stop = irlan_eth_close,
51 .ndo_start_xmit = irlan_eth_xmit,
52 .ndo_get_stats = irlan_eth_get_stats,
53 .ndo_set_multicast_list = irlan_eth_set_multicast_list,
54 .ndo_change_mtu = eth_change_mtu,
55 .ndo_validate_addr = eth_validate_addr,
56};
57
48/* 58/*
49 * Function irlan_eth_setup (dev) 59 * Function irlan_eth_setup (dev)
50 * 60 *
@@ -53,14 +63,11 @@ static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev);
53 */ 63 */
54static void irlan_eth_setup(struct net_device *dev) 64static void irlan_eth_setup(struct net_device *dev)
55{ 65{
56 dev->open = irlan_eth_open; 66 ether_setup(dev);
57 dev->stop = irlan_eth_close; 67
58 dev->hard_start_xmit = irlan_eth_xmit; 68 dev->netdev_ops = &irlan_eth_netdev_ops;
59 dev->get_stats = irlan_eth_get_stats;
60 dev->set_multicast_list = irlan_eth_set_multicast_list;
61 dev->destructor = free_netdev; 69 dev->destructor = free_netdev;
62 70
63 ether_setup(dev);
64 71
65 /* 72 /*
66 * Lets do all queueing in IrTTP instead of this device driver. 73 * Lets do all queueing in IrTTP instead of this device driver.
diff --git a/net/irda/irmod.c b/net/irda/irmod.c
index 4c487a883725..303a68d92731 100644
--- a/net/irda/irmod.c
+++ b/net/irda/irmod.c
@@ -55,8 +55,8 @@ EXPORT_SYMBOL(irda_debug);
55/* Packet type handler. 55/* Packet type handler.
56 * Tell the kernel how IrDA packets should be handled. 56 * Tell the kernel how IrDA packets should be handled.
57 */ 57 */
58static struct packet_type irda_packet_type = { 58static struct packet_type irda_packet_type __read_mostly = {
59 .type = __constant_htons(ETH_P_IRDA), 59 .type = cpu_to_be16(ETH_P_IRDA),
60 .func = irlap_driver_rcv, /* Packet type handler irlap_frame.c */ 60 .func = irlap_driver_rcv, /* Packet type handler irlap_frame.c */
61}; 61};
62 62
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index eb8a2a0b6eb7..49e786535dc8 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1171,8 +1171,7 @@ static void iucv_callback_txdone(struct iucv_path *path,
1171 1171
1172 spin_unlock_irqrestore(&list->lock, flags); 1172 spin_unlock_irqrestore(&list->lock, flags);
1173 1173
1174 if (this) 1174 kfree_skb(this);
1175 kfree_skb(this);
1176 } 1175 }
1177 BUG_ON(!this); 1176 BUG_ON(!this);
1178 1177
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 7dcbde3ea7d9..643c1be2d02e 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -313,8 +313,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
313 if (one_sk != NULL) 313 if (one_sk != NULL)
314 err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); 314 err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
315 315
316 if (skb2) 316 kfree_skb(skb2);
317 kfree_skb(skb2);
318 kfree_skb(skb); 317 kfree_skb(skb);
319 return err; 318 return err;
320} 319}
@@ -3573,8 +3572,7 @@ static int pfkey_sendmsg(struct kiocb *kiocb,
3573out: 3572out:
3574 if (err && hdr && pfkey_error(hdr, err, sk) == 0) 3573 if (err && hdr && pfkey_error(hdr, err, sk) == 0)
3575 err = 0; 3574 err = 0;
3576 if (skb) 3575 kfree_skb(skb);
3577 kfree_skb(skb);
3578 3576
3579 return err ? : len; 3577 return err ? : len;
3580} 3578}
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 56fd85ab358e..febae702685c 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1118,11 +1118,11 @@ static const struct proto_ops llc_ui_ops = {
1118 .sendpage = sock_no_sendpage, 1118 .sendpage = sock_no_sendpage,
1119}; 1119};
1120 1120
1121static char llc_proc_err_msg[] __initdata = 1121static const char llc_proc_err_msg[] __initconst =
1122 KERN_CRIT "LLC: Unable to register the proc_fs entries\n"; 1122 KERN_CRIT "LLC: Unable to register the proc_fs entries\n";
1123static char llc_sysctl_err_msg[] __initdata = 1123static const char llc_sysctl_err_msg[] __initconst =
1124 KERN_CRIT "LLC: Unable to register the sysctl entries\n"; 1124 KERN_CRIT "LLC: Unable to register the sysctl entries\n";
1125static char llc_sock_err_msg[] __initdata = 1125static const char llc_sock_err_msg[] __initconst =
1126 KERN_CRIT "LLC: Unable to register the network family\n"; 1126 KERN_CRIT "LLC: Unable to register the network family\n";
1127 1127
1128static int __init llc2_init(void) 1128static int __init llc2_init(void)
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 5c6d89c6d51d..3477624a4906 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -332,8 +332,7 @@ int llc_conn_remove_acked_pdus(struct sock *sk, u8 nr, u16 *how_many_unacked)
332 332
333 for (i = 0; i < pdu_pos && i < q_len; i++) { 333 for (i = 0; i < pdu_pos && i < q_len; i++) {
334 skb = skb_dequeue(&llc->pdu_unack_q); 334 skb = skb_dequeue(&llc->pdu_unack_q);
335 if (skb) 335 kfree_skb(skb);
336 kfree_skb(skb);
337 nbr_acked++; 336 nbr_acked++;
338 } 337 }
339out: 338out:
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 50d5b10e23a2..ff4c0ab96a69 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -147,13 +147,13 @@ void llc_sap_close(struct llc_sap *sap)
147 kfree(sap); 147 kfree(sap);
148} 148}
149 149
150static struct packet_type llc_packet_type = { 150static struct packet_type llc_packet_type __read_mostly = {
151 .type = __constant_htons(ETH_P_802_2), 151 .type = cpu_to_be16(ETH_P_802_2),
152 .func = llc_rcv, 152 .func = llc_rcv,
153}; 153};
154 154
155static struct packet_type llc_tr_packet_type = { 155static struct packet_type llc_tr_packet_type __read_mostly = {
156 .type = __constant_htons(ETH_P_TR_802_2), 156 .type = cpu_to_be16(ETH_P_TR_802_2),
157 .func = llc_rcv, 157 .func = llc_rcv,
158}; 158};
159 159
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 7d4971aa443f..0e3ab88bb706 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -8,13 +8,15 @@ mac80211-y := \
8 wep.o \ 8 wep.o \
9 wpa.o \ 9 wpa.o \
10 scan.o \ 10 scan.o \
11 ht.o \ 11 ht.o agg-tx.o agg-rx.o \
12 ibss.o \
12 mlme.o \ 13 mlme.o \
13 iface.o \ 14 iface.o \
14 rate.o \ 15 rate.o \
15 michael.o \ 16 michael.o \
16 tkip.o \ 17 tkip.o \
17 aes_ccm.o \ 18 aes_ccm.o \
19 aes_cmac.o \
18 cfg.o \ 20 cfg.o \
19 rx.o \ 21 rx.o \
20 spectmgmt.o \ 22 spectmgmt.o \
@@ -37,6 +39,8 @@ mac80211-$(CONFIG_MAC80211_MESH) += \
37 mesh_plink.o \ 39 mesh_plink.o \
38 mesh_hwmp.o 40 mesh_hwmp.o
39 41
42mac80211-$(CONFIG_PM) += pm.o
43
40# objects for PID algorithm 44# objects for PID algorithm
41rc80211_pid-y := rc80211_pid_algo.o 45rc80211_pid-y := rc80211_pid_algo.o
42rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o 46rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
new file mode 100644
index 000000000000..3d097b3d7b62
--- /dev/null
+++ b/net/mac80211/aes_cmac.c
@@ -0,0 +1,135 @@
1/*
2 * AES-128-CMAC with TLen 16 for IEEE 802.11w BIP
3 * Copyright 2008, Jouni Malinen <j@w1.fi>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#include <linux/kernel.h>
11#include <linux/types.h>
12#include <linux/crypto.h>
13#include <linux/err.h>
14
15#include <net/mac80211.h>
16#include "key.h"
17#include "aes_cmac.h"
18
19#define AES_BLOCK_SIZE 16
20#define AES_CMAC_KEY_LEN 16
21#define CMAC_TLEN 8 /* CMAC TLen = 64 bits (8 octets) */
22#define AAD_LEN 20
23
24
25static void gf_mulx(u8 *pad)
26{
27 int i, carry;
28
29 carry = pad[0] & 0x80;
30 for (i = 0; i < AES_BLOCK_SIZE - 1; i++)
31 pad[i] = (pad[i] << 1) | (pad[i + 1] >> 7);
32 pad[AES_BLOCK_SIZE - 1] <<= 1;
33 if (carry)
34 pad[AES_BLOCK_SIZE - 1] ^= 0x87;
35}
36
37
38static void aes_128_cmac_vector(struct crypto_cipher *tfm, u8 *scratch,
39 size_t num_elem,
40 const u8 *addr[], const size_t *len, u8 *mac)
41{
42 u8 *cbc, *pad;
43 const u8 *pos, *end;
44 size_t i, e, left, total_len;
45
46 cbc = scratch;
47 pad = scratch + AES_BLOCK_SIZE;
48
49 memset(cbc, 0, AES_BLOCK_SIZE);
50
51 total_len = 0;
52 for (e = 0; e < num_elem; e++)
53 total_len += len[e];
54 left = total_len;
55
56 e = 0;
57 pos = addr[0];
58 end = pos + len[0];
59
60 while (left >= AES_BLOCK_SIZE) {
61 for (i = 0; i < AES_BLOCK_SIZE; i++) {
62 cbc[i] ^= *pos++;
63 if (pos >= end) {
64 e++;
65 pos = addr[e];
66 end = pos + len[e];
67 }
68 }
69 if (left > AES_BLOCK_SIZE)
70 crypto_cipher_encrypt_one(tfm, cbc, cbc);
71 left -= AES_BLOCK_SIZE;
72 }
73
74 memset(pad, 0, AES_BLOCK_SIZE);
75 crypto_cipher_encrypt_one(tfm, pad, pad);
76 gf_mulx(pad);
77
78 if (left || total_len == 0) {
79 for (i = 0; i < left; i++) {
80 cbc[i] ^= *pos++;
81 if (pos >= end) {
82 e++;
83 pos = addr[e];
84 end = pos + len[e];
85 }
86 }
87 cbc[left] ^= 0x80;
88 gf_mulx(pad);
89 }
90
91 for (i = 0; i < AES_BLOCK_SIZE; i++)
92 pad[i] ^= cbc[i];
93 crypto_cipher_encrypt_one(tfm, pad, pad);
94 memcpy(mac, pad, CMAC_TLEN);
95}
96
97
98void ieee80211_aes_cmac(struct crypto_cipher *tfm, u8 *scratch, const u8 *aad,
99 const u8 *data, size_t data_len, u8 *mic)
100{
101 const u8 *addr[3];
102 size_t len[3];
103 u8 zero[CMAC_TLEN];
104
105 memset(zero, 0, CMAC_TLEN);
106 addr[0] = aad;
107 len[0] = AAD_LEN;
108 addr[1] = data;
109 len[1] = data_len - CMAC_TLEN;
110 addr[2] = zero;
111 len[2] = CMAC_TLEN;
112
113 aes_128_cmac_vector(tfm, scratch, 3, addr, len, mic);
114}
115
116
117struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[])
118{
119 struct crypto_cipher *tfm;
120
121 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
122 if (IS_ERR(tfm))
123 return NULL;
124
125 crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
126
127 return tfm;
128}
129
130
131void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm)
132{
133 if (tfm)
134 crypto_free_cipher(tfm);
135}
diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h
new file mode 100644
index 000000000000..0eb9a4831508
--- /dev/null
+++ b/net/mac80211/aes_cmac.h
@@ -0,0 +1,19 @@
1/*
2 * Copyright 2008, Jouni Malinen <j@w1.fi>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#ifndef AES_CMAC_H
10#define AES_CMAC_H
11
12#include <linux/crypto.h>
13
14struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[]);
15void ieee80211_aes_cmac(struct crypto_cipher *tfm, u8 *scratch, const u8 *aad,
16 const u8 *data, size_t data_len, u8 *mic);
17void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm);
18
19#endif /* AES_CMAC_H */
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
new file mode 100644
index 000000000000..07656d830bc4
--- /dev/null
+++ b/net/mac80211/agg-rx.c
@@ -0,0 +1,310 @@
1/*
2 * HT handling
3 *
4 * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi>
5 * Copyright 2002-2005, Instant802 Networks, Inc.
6 * Copyright 2005-2006, Devicescape Software, Inc.
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2007-2008, Intel Corporation
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15
16#include <linux/ieee80211.h>
17#include <net/mac80211.h>
18#include "ieee80211_i.h"
19
20void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
21 u16 initiator, u16 reason)
22{
23 struct ieee80211_local *local = sta->local;
24 struct ieee80211_hw *hw = &local->hw;
25 int i;
26
27 /* check if TID is in operational state */
28 spin_lock_bh(&sta->lock);
29 if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) {
30 spin_unlock_bh(&sta->lock);
31 return;
32 }
33
34 sta->ampdu_mlme.tid_state_rx[tid] =
35 HT_AGG_STATE_REQ_STOP_BA_MSK |
36 (initiator << HT_AGG_STATE_INITIATOR_SHIFT);
37 spin_unlock_bh(&sta->lock);
38
39#ifdef CONFIG_MAC80211_HT_DEBUG
40 printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n",
41 sta->sta.addr, tid);
42#endif /* CONFIG_MAC80211_HT_DEBUG */
43
44 if (local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP,
45 &sta->sta, tid, NULL))
46 printk(KERN_DEBUG "HW problem - can not stop rx "
47 "aggregation for tid %d\n", tid);
48
49 /* shutdown timer has not expired */
50 if (initiator != WLAN_BACK_TIMER)
51 del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
52
53 /* check if this is a self generated aggregation halt */
54 if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER)
55 ieee80211_send_delba(sta->sdata, sta->sta.addr,
56 tid, 0, reason);
57
58 /* free the reordering buffer */
59 for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) {
60 if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) {
61 /* release the reordered frames */
62 dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]);
63 sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--;
64 sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL;
65 }
66 }
67
68 spin_lock_bh(&sta->lock);
69 /* free resources */
70 kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf);
71
72 if (!sta->ampdu_mlme.tid_rx[tid]->shutdown) {
73 kfree(sta->ampdu_mlme.tid_rx[tid]);
74 sta->ampdu_mlme.tid_rx[tid] = NULL;
75 }
76
77 sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE;
78 spin_unlock_bh(&sta->lock);
79}
80
81void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid,
82 u16 initiator, u16 reason)
83{
84 struct ieee80211_local *local = sdata->local;
85 struct sta_info *sta;
86
87 /* stop HW Rx aggregation. ampdu_action existence
88 * already verified in session init so we add the BUG_ON */
89 BUG_ON(!local->ops->ampdu_action);
90
91 rcu_read_lock();
92
93 sta = sta_info_get(local, ra);
94 if (!sta) {
95 rcu_read_unlock();
96 return;
97 }
98
99 __ieee80211_stop_rx_ba_session(sta, tid, initiator, reason);
100
101 rcu_read_unlock();
102}
103
104/*
105 * After accepting the AddBA Request we activated a timer,
106 * resetting it after each frame that arrives from the originator.
107 * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
108 */
109static void sta_rx_agg_session_timer_expired(unsigned long data)
110{
111 /* not an elegant detour, but there is no choice as the timer passes
112 * only one argument, and various sta_info are needed here, so init
113 * flow in sta_info_create gives the TID as data, while the timer_to_id
114 * array gives the sta through container_of */
115 u8 *ptid = (u8 *)data;
116 u8 *timer_to_id = ptid - *ptid;
117 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
118 timer_to_tid[0]);
119
120#ifdef CONFIG_MAC80211_HT_DEBUG
121 printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
122#endif
123 ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr,
124 (u16)*ptid, WLAN_BACK_TIMER,
125 WLAN_REASON_QSTA_TIMEOUT);
126}
127
128static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
129 u8 dialog_token, u16 status, u16 policy,
130 u16 buf_size, u16 timeout)
131{
132 struct ieee80211_local *local = sdata->local;
133 struct sk_buff *skb;
134 struct ieee80211_mgmt *mgmt;
135 u16 capab;
136
137 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
138
139 if (!skb) {
140 printk(KERN_DEBUG "%s: failed to allocate buffer "
141 "for addba resp frame\n", sdata->dev->name);
142 return;
143 }
144
145 skb_reserve(skb, local->hw.extra_tx_headroom);
146 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
147 memset(mgmt, 0, 24);
148 memcpy(mgmt->da, da, ETH_ALEN);
149 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
150 if (sdata->vif.type == NL80211_IFTYPE_AP ||
151 sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
152 memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
153 else if (sdata->vif.type == NL80211_IFTYPE_STATION)
154 memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
155
156 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
157 IEEE80211_STYPE_ACTION);
158
159 skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
160 mgmt->u.action.category = WLAN_CATEGORY_BACK;
161 mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
162 mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
163
164 capab = (u16)(policy << 1); /* bit 1 aggregation policy */
165 capab |= (u16)(tid << 2); /* bit 5:2 TID number */
166 capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */
167
168 mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab);
169 mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
170 mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
171
172 ieee80211_tx_skb(sdata, skb, 1);
173}
174
175void ieee80211_process_addba_request(struct ieee80211_local *local,
176 struct sta_info *sta,
177 struct ieee80211_mgmt *mgmt,
178 size_t len)
179{
180 struct ieee80211_hw *hw = &local->hw;
181 struct ieee80211_conf *conf = &hw->conf;
182 struct tid_ampdu_rx *tid_agg_rx;
183 u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status;
184 u8 dialog_token;
185 int ret = -EOPNOTSUPP;
186
187 /* extract session parameters from addba request frame */
188 dialog_token = mgmt->u.action.u.addba_req.dialog_token;
189 timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
190 start_seq_num =
191 le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4;
192
193 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
194 ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1;
195 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
196 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
197
198 status = WLAN_STATUS_REQUEST_DECLINED;
199
200 if (test_sta_flags(sta, WLAN_STA_SUSPEND)) {
201#ifdef CONFIG_MAC80211_HT_DEBUG
202 printk(KERN_DEBUG "Suspend in progress. "
203 "Denying ADDBA request\n");
204#endif
205 goto end_no_lock;
206 }
207
208 /* sanity check for incoming parameters:
209 * check if configuration can support the BA policy
210 * and if buffer size does not exceeds max value */
211 /* XXX: check own ht delayed BA capability?? */
212 if (((ba_policy != 1)
213 && (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA)))
214 || (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
215 status = WLAN_STATUS_INVALID_QOS_PARAM;
216#ifdef CONFIG_MAC80211_HT_DEBUG
217 if (net_ratelimit())
218 printk(KERN_DEBUG "AddBA Req with bad params from "
219 "%pM on tid %u. policy %d, buffer size %d\n",
220 mgmt->sa, tid, ba_policy,
221 buf_size);
222#endif /* CONFIG_MAC80211_HT_DEBUG */
223 goto end_no_lock;
224 }
225 /* determine default buffer size */
226 if (buf_size == 0) {
227 struct ieee80211_supported_band *sband;
228
229 sband = local->hw.wiphy->bands[conf->channel->band];
230 buf_size = IEEE80211_MIN_AMPDU_BUF;
231 buf_size = buf_size << sband->ht_cap.ampdu_factor;
232 }
233
234
235 /* examine state machine */
236 spin_lock_bh(&sta->lock);
237
238 if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) {
239#ifdef CONFIG_MAC80211_HT_DEBUG
240 if (net_ratelimit())
241 printk(KERN_DEBUG "unexpected AddBA Req from "
242 "%pM on tid %u\n",
243 mgmt->sa, tid);
244#endif /* CONFIG_MAC80211_HT_DEBUG */
245 goto end;
246 }
247
248 /* prepare A-MPDU MLME for Rx aggregation */
249 sta->ampdu_mlme.tid_rx[tid] =
250 kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC);
251 if (!sta->ampdu_mlme.tid_rx[tid]) {
252#ifdef CONFIG_MAC80211_HT_DEBUG
253 if (net_ratelimit())
254 printk(KERN_ERR "allocate rx mlme to tid %d failed\n",
255 tid);
256#endif
257 goto end;
258 }
259 /* rx timer */
260 sta->ampdu_mlme.tid_rx[tid]->session_timer.function =
261 sta_rx_agg_session_timer_expired;
262 sta->ampdu_mlme.tid_rx[tid]->session_timer.data =
263 (unsigned long)&sta->timer_to_tid[tid];
264 init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
265
266 tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
267
268 /* prepare reordering buffer */
269 tid_agg_rx->reorder_buf =
270 kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC);
271 if (!tid_agg_rx->reorder_buf) {
272#ifdef CONFIG_MAC80211_HT_DEBUG
273 if (net_ratelimit())
274 printk(KERN_ERR "can not allocate reordering buffer "
275 "to tid %d\n", tid);
276#endif
277 kfree(sta->ampdu_mlme.tid_rx[tid]);
278 goto end;
279 }
280
281 if (local->ops->ampdu_action)
282 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START,
283 &sta->sta, tid, &start_seq_num);
284#ifdef CONFIG_MAC80211_HT_DEBUG
285 printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret);
286#endif /* CONFIG_MAC80211_HT_DEBUG */
287
288 if (ret) {
289 kfree(tid_agg_rx->reorder_buf);
290 kfree(tid_agg_rx);
291 sta->ampdu_mlme.tid_rx[tid] = NULL;
292 goto end;
293 }
294
295 /* change state and send addba resp */
296 sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL;
297 tid_agg_rx->dialog_token = dialog_token;
298 tid_agg_rx->ssn = start_seq_num;
299 tid_agg_rx->head_seq_num = start_seq_num;
300 tid_agg_rx->buf_size = buf_size;
301 tid_agg_rx->timeout = timeout;
302 tid_agg_rx->stored_mpdu_num = 0;
303 status = WLAN_STATUS_SUCCESS;
304end:
305 spin_unlock_bh(&sta->lock);
306
307end_no_lock:
308 ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
309 dialog_token, status, 1, buf_size, timeout);
310}
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
new file mode 100644
index 000000000000..947aaaad35d2
--- /dev/null
+++ b/net/mac80211/agg-tx.c
@@ -0,0 +1,695 @@
1/*
2 * HT handling
3 *
4 * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi>
5 * Copyright 2002-2005, Instant802 Networks, Inc.
6 * Copyright 2005-2006, Devicescape Software, Inc.
7 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
8 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
9 * Copyright 2007-2009, Intel Corporation
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2 as
13 * published by the Free Software Foundation.
14 */
15
16#include <linux/ieee80211.h>
17#include <net/mac80211.h>
18#include "ieee80211_i.h"
19#include "wme.h"
20
21/**
22 * DOC: TX aggregation
23 *
24 * Aggregation on the TX side requires setting the hardware flag
25 * %IEEE80211_HW_AMPDU_AGGREGATION as well as, if present, the @ampdu_queues
26 * hardware parameter to the number of hardware AMPDU queues. If there are no
27 * hardware queues then the driver will (currently) have to do all frame
28 * buffering.
29 *
30 * When TX aggregation is started by some subsystem (usually the rate control
31 * algorithm would be appropriate) by calling the
32 * ieee80211_start_tx_ba_session() function, the driver will be notified via
33 * its @ampdu_action function, with the %IEEE80211_AMPDU_TX_START action.
34 *
35 * In response to that, the driver is later required to call the
36 * ieee80211_start_tx_ba_cb() (or ieee80211_start_tx_ba_cb_irqsafe())
37 * function, which will start the aggregation session.
38 *
39 * Similarly, when the aggregation session is stopped by
40 * ieee80211_stop_tx_ba_session(), the driver's @ampdu_action function will
41 * be called with the action %IEEE80211_AMPDU_TX_STOP. In this case, the
42 * call must not fail, and the driver must later call ieee80211_stop_tx_ba_cb()
43 * (or ieee80211_stop_tx_ba_cb_irqsafe()).
44 */
45
46static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
47 const u8 *da, u16 tid,
48 u8 dialog_token, u16 start_seq_num,
49 u16 agg_size, u16 timeout)
50{
51 struct ieee80211_local *local = sdata->local;
52 struct sk_buff *skb;
53 struct ieee80211_mgmt *mgmt;
54 u16 capab;
55
56 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
57
58 if (!skb) {
59 printk(KERN_ERR "%s: failed to allocate buffer "
60 "for addba request frame\n", sdata->dev->name);
61 return;
62 }
63 skb_reserve(skb, local->hw.extra_tx_headroom);
64 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
65 memset(mgmt, 0, 24);
66 memcpy(mgmt->da, da, ETH_ALEN);
67 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
68 if (sdata->vif.type == NL80211_IFTYPE_AP ||
69 sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
70 memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
71 else if (sdata->vif.type == NL80211_IFTYPE_STATION)
72 memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
73
74 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
75 IEEE80211_STYPE_ACTION);
76
77 skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req));
78
79 mgmt->u.action.category = WLAN_CATEGORY_BACK;
80 mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ;
81
82 mgmt->u.action.u.addba_req.dialog_token = dialog_token;
83 capab = (u16)(1 << 1); /* bit 1 aggregation policy */
84 capab |= (u16)(tid << 2); /* bit 5:2 TID number */
85 capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */
86
87 mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab);
88
89 mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout);
90 mgmt->u.action.u.addba_req.start_seq_num =
91 cpu_to_le16(start_seq_num << 4);
92
93 ieee80211_tx_skb(sdata, skb, 1);
94}
95
96void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn)
97{
98 struct ieee80211_local *local = sdata->local;
99 struct sk_buff *skb;
100 struct ieee80211_bar *bar;
101 u16 bar_control = 0;
102
103 skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom);
104 if (!skb) {
105 printk(KERN_ERR "%s: failed to allocate buffer for "
106 "bar frame\n", sdata->dev->name);
107 return;
108 }
109 skb_reserve(skb, local->hw.extra_tx_headroom);
110 bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar));
111 memset(bar, 0, sizeof(*bar));
112 bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
113 IEEE80211_STYPE_BACK_REQ);
114 memcpy(bar->ra, ra, ETH_ALEN);
115 memcpy(bar->ta, sdata->dev->dev_addr, ETH_ALEN);
116 bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL;
117 bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA;
118 bar_control |= (u16)(tid << 12);
119 bar->control = cpu_to_le16(bar_control);
120 bar->start_seq_num = cpu_to_le16(ssn);
121
122 ieee80211_tx_skb(sdata, skb, 0);
123}
124
125static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
126 enum ieee80211_back_parties initiator)
127{
128 struct ieee80211_local *local = sta->local;
129 int ret;
130 u8 *state;
131
132 state = &sta->ampdu_mlme.tid_state_tx[tid];
133
134 *state = HT_AGG_STATE_REQ_STOP_BA_MSK |
135 (initiator << HT_AGG_STATE_INITIATOR_SHIFT);
136
137 ret = local->ops->ampdu_action(&local->hw, IEEE80211_AMPDU_TX_STOP,
138 &sta->sta, tid, NULL);
139
140 /* HW shall not deny going back to legacy */
141 if (WARN_ON(ret)) {
142 *state = HT_AGG_STATE_OPERATIONAL;
143 /*
144 * We may have pending packets get stuck in this case...
145 * Not bothering with a workaround for now.
146 */
147 }
148
149 return ret;
150}
151
152/*
153 * After sending add Block Ack request we activated a timer until
154 * add Block Ack response will arrive from the recipient.
155 * If this timer expires sta_addba_resp_timer_expired will be executed.
156 */
157static void sta_addba_resp_timer_expired(unsigned long data)
158{
159 /* not an elegant detour, but there is no choice as the timer passes
160 * only one argument, and both sta_info and TID are needed, so init
161 * flow in sta_info_create gives the TID as data, while the timer_to_id
162 * array gives the sta through container_of */
163 u16 tid = *(u8 *)data;
164 struct sta_info *sta = container_of((void *)data,
165 struct sta_info, timer_to_tid[tid]);
166 u8 *state;
167
168 state = &sta->ampdu_mlme.tid_state_tx[tid];
169
170 /* check if the TID waits for addBA response */
171 spin_lock_bh(&sta->lock);
172 if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
173 spin_unlock_bh(&sta->lock);
174 *state = HT_AGG_STATE_IDLE;
175#ifdef CONFIG_MAC80211_HT_DEBUG
176 printk(KERN_DEBUG "timer expired on tid %d but we are not "
177 "expecting addBA response there", tid);
178#endif
179 return;
180 }
181
182#ifdef CONFIG_MAC80211_HT_DEBUG
183 printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid);
184#endif
185
186 ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR);
187 spin_unlock_bh(&sta->lock);
188}
189
190static inline int ieee80211_ac_from_tid(int tid)
191{
192 return ieee802_1d_to_ac[tid & 7];
193}
194
195int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
196{
197 struct ieee80211_local *local = hw_to_local(hw);
198 struct sta_info *sta;
199 struct ieee80211_sub_if_data *sdata;
200 u8 *state;
201 int ret = 0;
202 u16 start_seq_num;
203
204 if (WARN_ON(!local->ops->ampdu_action))
205 return -EINVAL;
206
207 if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION))
208 return -EINVAL;
209
210#ifdef CONFIG_MAC80211_HT_DEBUG
211 printk(KERN_DEBUG "Open BA session requested for %pM tid %u\n",
212 ra, tid);
213#endif /* CONFIG_MAC80211_HT_DEBUG */
214
215 rcu_read_lock();
216
217 sta = sta_info_get(local, ra);
218 if (!sta) {
219#ifdef CONFIG_MAC80211_HT_DEBUG
220 printk(KERN_DEBUG "Could not find the station\n");
221#endif
222 ret = -ENOENT;
223 goto unlock;
224 }
225
226 /*
227 * The aggregation code is not prepared to handle
228 * anything but STA/AP due to the BSSID handling.
229 * IBSS could work in the code but isn't supported
230 * by drivers or the standard.
231 */
232 if (sta->sdata->vif.type != NL80211_IFTYPE_STATION &&
233 sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
234 sta->sdata->vif.type != NL80211_IFTYPE_AP) {
235 ret = -EINVAL;
236 goto unlock;
237 }
238
239 if (test_sta_flags(sta, WLAN_STA_SUSPEND)) {
240#ifdef CONFIG_MAC80211_HT_DEBUG
241 printk(KERN_DEBUG "Suspend in progress. "
242 "Denying BA session request\n");
243#endif
244 ret = -EINVAL;
245 goto unlock;
246 }
247
248 spin_lock_bh(&sta->lock);
249 spin_lock(&local->ampdu_lock);
250
251 sdata = sta->sdata;
252
253 /* we have tried too many times, receiver does not want A-MPDU */
254 if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) {
255 ret = -EBUSY;
256 goto err_unlock_sta;
257 }
258
259 state = &sta->ampdu_mlme.tid_state_tx[tid];
260 /* check if the TID is not in aggregation flow already */
261 if (*state != HT_AGG_STATE_IDLE) {
262#ifdef CONFIG_MAC80211_HT_DEBUG
263 printk(KERN_DEBUG "BA request denied - session is not "
264 "idle on tid %u\n", tid);
265#endif /* CONFIG_MAC80211_HT_DEBUG */
266 ret = -EAGAIN;
267 goto err_unlock_sta;
268 }
269
270 /*
271 * While we're asking the driver about the aggregation,
272 * stop the AC queue so that we don't have to worry
273 * about frames that came in while we were doing that,
274 * which would require us to put them to the AC pending
275 * afterwards which just makes the code more complex.
276 */
277 ieee80211_stop_queue_by_reason(
278 &local->hw, ieee80211_ac_from_tid(tid),
279 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
280
281 /* prepare A-MPDU MLME for Tx aggregation */
282 sta->ampdu_mlme.tid_tx[tid] =
283 kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
284 if (!sta->ampdu_mlme.tid_tx[tid]) {
285#ifdef CONFIG_MAC80211_HT_DEBUG
286 if (net_ratelimit())
287 printk(KERN_ERR "allocate tx mlme to tid %d failed\n",
288 tid);
289#endif
290 ret = -ENOMEM;
291 goto err_wake_queue;
292 }
293
294 skb_queue_head_init(&sta->ampdu_mlme.tid_tx[tid]->pending);
295
296 /* Tx timer */
297 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function =
298 sta_addba_resp_timer_expired;
299 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data =
300 (unsigned long)&sta->timer_to_tid[tid];
301 init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
302
303 /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the
304 * call back right away, it must see that the flow has begun */
305 *state |= HT_ADDBA_REQUESTED_MSK;
306
307 start_seq_num = sta->tid_seq[tid];
308
309 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START,
310 &sta->sta, tid, &start_seq_num);
311
312 if (ret) {
313#ifdef CONFIG_MAC80211_HT_DEBUG
314 printk(KERN_DEBUG "BA request denied - HW unavailable for"
315 " tid %d\n", tid);
316#endif /* CONFIG_MAC80211_HT_DEBUG */
317 *state = HT_AGG_STATE_IDLE;
318 goto err_free;
319 }
320
321 /* Driver vetoed or OKed, but we can take packets again now */
322 ieee80211_wake_queue_by_reason(
323 &local->hw, ieee80211_ac_from_tid(tid),
324 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
325
326 spin_unlock(&local->ampdu_lock);
327 spin_unlock_bh(&sta->lock);
328
329 /* send an addBA request */
330 sta->ampdu_mlme.dialog_token_allocator++;
331 sta->ampdu_mlme.tid_tx[tid]->dialog_token =
332 sta->ampdu_mlme.dialog_token_allocator;
333 sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num;
334
335 ieee80211_send_addba_request(sta->sdata, ra, tid,
336 sta->ampdu_mlme.tid_tx[tid]->dialog_token,
337 sta->ampdu_mlme.tid_tx[tid]->ssn,
338 0x40, 5000);
339 /* activate the timer for the recipient's addBA response */
340 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires =
341 jiffies + ADDBA_RESP_INTERVAL;
342 add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
343#ifdef CONFIG_MAC80211_HT_DEBUG
344 printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
345#endif
346 goto unlock;
347
348 err_free:
349 kfree(sta->ampdu_mlme.tid_tx[tid]);
350 sta->ampdu_mlme.tid_tx[tid] = NULL;
351 err_wake_queue:
352 ieee80211_wake_queue_by_reason(
353 &local->hw, ieee80211_ac_from_tid(tid),
354 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
355 err_unlock_sta:
356 spin_unlock(&local->ampdu_lock);
357 spin_unlock_bh(&sta->lock);
358 unlock:
359 rcu_read_unlock();
360 return ret;
361}
362EXPORT_SYMBOL(ieee80211_start_tx_ba_session);
363
364/*
365 * splice packets from the STA's pending to the local pending,
366 * requires a call to ieee80211_agg_splice_finish and holding
367 * local->ampdu_lock across both calls.
368 */
369static void ieee80211_agg_splice_packets(struct ieee80211_local *local,
370 struct sta_info *sta, u16 tid)
371{
372 unsigned long flags;
373 u16 queue = ieee80211_ac_from_tid(tid);
374
375 ieee80211_stop_queue_by_reason(
376 &local->hw, queue,
377 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
378
379 if (!skb_queue_empty(&sta->ampdu_mlme.tid_tx[tid]->pending)) {
380 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
381 /* mark queue as pending, it is stopped already */
382 __set_bit(IEEE80211_QUEUE_STOP_REASON_PENDING,
383 &local->queue_stop_reasons[queue]);
384 /* copy over remaining packets */
385 skb_queue_splice_tail_init(
386 &sta->ampdu_mlme.tid_tx[tid]->pending,
387 &local->pending[queue]);
388 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
389 }
390}
391
392static void ieee80211_agg_splice_finish(struct ieee80211_local *local,
393 struct sta_info *sta, u16 tid)
394{
395 u16 queue = ieee80211_ac_from_tid(tid);
396
397 ieee80211_wake_queue_by_reason(
398 &local->hw, queue,
399 IEEE80211_QUEUE_STOP_REASON_AGGREGATION);
400}
401
402/* caller must hold sta->lock */
403static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
404 struct sta_info *sta, u16 tid)
405{
406#ifdef CONFIG_MAC80211_HT_DEBUG
407 printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid);
408#endif
409
410 spin_lock(&local->ampdu_lock);
411 ieee80211_agg_splice_packets(local, sta, tid);
412 /*
413 * NB: we rely on sta->lock being taken in the TX
414 * processing here when adding to the pending queue,
415 * otherwise we could only change the state of the
416 * session to OPERATIONAL _here_.
417 */
418 ieee80211_agg_splice_finish(local, sta, tid);
419 spin_unlock(&local->ampdu_lock);
420
421 local->ops->ampdu_action(&local->hw, IEEE80211_AMPDU_TX_OPERATIONAL,
422 &sta->sta, tid, NULL);
423}
424
425void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid)
426{
427 struct ieee80211_local *local = hw_to_local(hw);
428 struct sta_info *sta;
429 u8 *state;
430
431 if (tid >= STA_TID_NUM) {
432#ifdef CONFIG_MAC80211_HT_DEBUG
433 printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
434 tid, STA_TID_NUM);
435#endif
436 return;
437 }
438
439 rcu_read_lock();
440 sta = sta_info_get(local, ra);
441 if (!sta) {
442 rcu_read_unlock();
443#ifdef CONFIG_MAC80211_HT_DEBUG
444 printk(KERN_DEBUG "Could not find station: %pM\n", ra);
445#endif
446 return;
447 }
448
449 state = &sta->ampdu_mlme.tid_state_tx[tid];
450 spin_lock_bh(&sta->lock);
451
452 if (WARN_ON(!(*state & HT_ADDBA_REQUESTED_MSK))) {
453#ifdef CONFIG_MAC80211_HT_DEBUG
454 printk(KERN_DEBUG "addBA was not requested yet, state is %d\n",
455 *state);
456#endif
457 spin_unlock_bh(&sta->lock);
458 rcu_read_unlock();
459 return;
460 }
461
462 if (WARN_ON(*state & HT_ADDBA_DRV_READY_MSK))
463 goto out;
464
465 *state |= HT_ADDBA_DRV_READY_MSK;
466
467 if (*state == HT_AGG_STATE_OPERATIONAL)
468 ieee80211_agg_tx_operational(local, sta, tid);
469
470 out:
471 spin_unlock_bh(&sta->lock);
472 rcu_read_unlock();
473}
474EXPORT_SYMBOL(ieee80211_start_tx_ba_cb);
475
476void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
477 const u8 *ra, u16 tid)
478{
479 struct ieee80211_local *local = hw_to_local(hw);
480 struct ieee80211_ra_tid *ra_tid;
481 struct sk_buff *skb = dev_alloc_skb(0);
482
483 if (unlikely(!skb)) {
484#ifdef CONFIG_MAC80211_HT_DEBUG
485 if (net_ratelimit())
486 printk(KERN_WARNING "%s: Not enough memory, "
487 "dropping start BA session", skb->dev->name);
488#endif
489 return;
490 }
491 ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
492 memcpy(&ra_tid->ra, ra, ETH_ALEN);
493 ra_tid->tid = tid;
494
495 skb->pkt_type = IEEE80211_ADDBA_MSG;
496 skb_queue_tail(&local->skb_queue, skb);
497 tasklet_schedule(&local->tasklet);
498}
499EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
500
501int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
502 enum ieee80211_back_parties initiator)
503{
504 u8 *state;
505 int ret;
506
507 /* check if the TID is in aggregation */
508 state = &sta->ampdu_mlme.tid_state_tx[tid];
509 spin_lock_bh(&sta->lock);
510
511 if (*state != HT_AGG_STATE_OPERATIONAL) {
512 ret = -ENOENT;
513 goto unlock;
514 }
515
516#ifdef CONFIG_MAC80211_HT_DEBUG
517 printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n",
518 sta->sta.addr, tid);
519#endif /* CONFIG_MAC80211_HT_DEBUG */
520
521 ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator);
522
523 unlock:
524 spin_unlock_bh(&sta->lock);
525 return ret;
526}
527
528int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw,
529 u8 *ra, u16 tid,
530 enum ieee80211_back_parties initiator)
531{
532 struct ieee80211_local *local = hw_to_local(hw);
533 struct sta_info *sta;
534 int ret = 0;
535
536 if (WARN_ON(!local->ops->ampdu_action))
537 return -EINVAL;
538
539 if (tid >= STA_TID_NUM)
540 return -EINVAL;
541
542 rcu_read_lock();
543 sta = sta_info_get(local, ra);
544 if (!sta) {
545 rcu_read_unlock();
546 return -ENOENT;
547 }
548
549 ret = __ieee80211_stop_tx_ba_session(sta, tid, initiator);
550 rcu_read_unlock();
551 return ret;
552}
553EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
554
555void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
556{
557 struct ieee80211_local *local = hw_to_local(hw);
558 struct sta_info *sta;
559 u8 *state;
560
561 if (tid >= STA_TID_NUM) {
562#ifdef CONFIG_MAC80211_HT_DEBUG
563 printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
564 tid, STA_TID_NUM);
565#endif
566 return;
567 }
568
569#ifdef CONFIG_MAC80211_HT_DEBUG
570 printk(KERN_DEBUG "Stopping Tx BA session for %pM tid %d\n",
571 ra, tid);
572#endif /* CONFIG_MAC80211_HT_DEBUG */
573
574 rcu_read_lock();
575 sta = sta_info_get(local, ra);
576 if (!sta) {
577#ifdef CONFIG_MAC80211_HT_DEBUG
578 printk(KERN_DEBUG "Could not find station: %pM\n", ra);
579#endif
580 rcu_read_unlock();
581 return;
582 }
583 state = &sta->ampdu_mlme.tid_state_tx[tid];
584
585 /* NOTE: no need to use sta->lock in this state check, as
586 * ieee80211_stop_tx_ba_session will let only one stop call to
587 * pass through per sta/tid
588 */
589 if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) {
590#ifdef CONFIG_MAC80211_HT_DEBUG
591 printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
592#endif
593 rcu_read_unlock();
594 return;
595 }
596
597 if (*state & HT_AGG_STATE_INITIATOR_MSK)
598 ieee80211_send_delba(sta->sdata, ra, tid,
599 WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
600
601 spin_lock_bh(&sta->lock);
602 spin_lock(&local->ampdu_lock);
603
604 ieee80211_agg_splice_packets(local, sta, tid);
605
606 *state = HT_AGG_STATE_IDLE;
607 /* from now on packets are no longer put onto sta->pending */
608 sta->ampdu_mlme.addba_req_num[tid] = 0;
609 kfree(sta->ampdu_mlme.tid_tx[tid]);
610 sta->ampdu_mlme.tid_tx[tid] = NULL;
611
612 ieee80211_agg_splice_finish(local, sta, tid);
613
614 spin_unlock(&local->ampdu_lock);
615 spin_unlock_bh(&sta->lock);
616
617 rcu_read_unlock();
618}
619EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb);
620
621void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
622 const u8 *ra, u16 tid)
623{
624 struct ieee80211_local *local = hw_to_local(hw);
625 struct ieee80211_ra_tid *ra_tid;
626 struct sk_buff *skb = dev_alloc_skb(0);
627
628 if (unlikely(!skb)) {
629#ifdef CONFIG_MAC80211_HT_DEBUG
630 if (net_ratelimit())
631 printk(KERN_WARNING "%s: Not enough memory, "
632 "dropping stop BA session", skb->dev->name);
633#endif
634 return;
635 }
636 ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
637 memcpy(&ra_tid->ra, ra, ETH_ALEN);
638 ra_tid->tid = tid;
639
640 skb->pkt_type = IEEE80211_DELBA_MSG;
641 skb_queue_tail(&local->skb_queue, skb);
642 tasklet_schedule(&local->tasklet);
643}
644EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe);
645
646
647void ieee80211_process_addba_resp(struct ieee80211_local *local,
648 struct sta_info *sta,
649 struct ieee80211_mgmt *mgmt,
650 size_t len)
651{
652 u16 capab, tid;
653 u8 *state;
654
655 capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
656 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
657
658 state = &sta->ampdu_mlme.tid_state_tx[tid];
659
660 spin_lock_bh(&sta->lock);
661
662 if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
663 spin_unlock_bh(&sta->lock);
664 return;
665 }
666
667 if (mgmt->u.action.u.addba_resp.dialog_token !=
668 sta->ampdu_mlme.tid_tx[tid]->dialog_token) {
669 spin_unlock_bh(&sta->lock);
670#ifdef CONFIG_MAC80211_HT_DEBUG
671 printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid);
672#endif /* CONFIG_MAC80211_HT_DEBUG */
673 return;
674 }
675
676 del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
677#ifdef CONFIG_MAC80211_HT_DEBUG
678 printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid);
679#endif /* CONFIG_MAC80211_HT_DEBUG */
680 if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
681 == WLAN_STATUS_SUCCESS) {
682 u8 curstate = *state;
683
684 *state |= HT_ADDBA_RECEIVED_MSK;
685
686 if (*state != curstate && *state == HT_AGG_STATE_OPERATIONAL)
687 ieee80211_agg_tx_operational(local, sta, tid);
688
689 sta->ampdu_mlme.addba_req_num[tid] = 0;
690 } else {
691 sta->ampdu_mlme.addba_req_num[tid]++;
692 ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR);
693 }
694 spin_unlock_bh(&sta->lock);
695}
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 9d4e4d846ec1..e677b751d468 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -133,6 +133,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
133 case WLAN_CIPHER_SUITE_CCMP: 133 case WLAN_CIPHER_SUITE_CCMP:
134 alg = ALG_CCMP; 134 alg = ALG_CCMP;
135 break; 135 break;
136 case WLAN_CIPHER_SUITE_AES_CMAC:
137 alg = ALG_AES_CMAC;
138 break;
136 default: 139 default:
137 return -EINVAL; 140 return -EINVAL;
138 } 141 }
@@ -275,6 +278,17 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
275 else 278 else
276 params.cipher = WLAN_CIPHER_SUITE_WEP104; 279 params.cipher = WLAN_CIPHER_SUITE_WEP104;
277 break; 280 break;
281 case ALG_AES_CMAC:
282 params.cipher = WLAN_CIPHER_SUITE_AES_CMAC;
283 seq[0] = key->u.aes_cmac.tx_pn[5];
284 seq[1] = key->u.aes_cmac.tx_pn[4];
285 seq[2] = key->u.aes_cmac.tx_pn[3];
286 seq[3] = key->u.aes_cmac.tx_pn[2];
287 seq[4] = key->u.aes_cmac.tx_pn[1];
288 seq[5] = key->u.aes_cmac.tx_pn[0];
289 params.seq = seq;
290 params.seq_len = 6;
291 break;
278 } 292 }
279 293
280 params.key = key->conf.key; 294 params.key = key->conf.key;
@@ -304,6 +318,22 @@ static int ieee80211_config_default_key(struct wiphy *wiphy,
304 return 0; 318 return 0;
305} 319}
306 320
321static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy,
322 struct net_device *dev,
323 u8 key_idx)
324{
325 struct ieee80211_sub_if_data *sdata;
326
327 rcu_read_lock();
328
329 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
330 ieee80211_set_default_mgmt_key(sdata, key_idx);
331
332 rcu_read_unlock();
333
334 return 0;
335}
336
307static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) 337static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
308{ 338{
309 struct ieee80211_sub_if_data *sdata = sta->sdata; 339 struct ieee80211_sub_if_data *sdata = sta->sdata;
@@ -311,11 +341,15 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
311 sinfo->filled = STATION_INFO_INACTIVE_TIME | 341 sinfo->filled = STATION_INFO_INACTIVE_TIME |
312 STATION_INFO_RX_BYTES | 342 STATION_INFO_RX_BYTES |
313 STATION_INFO_TX_BYTES | 343 STATION_INFO_TX_BYTES |
344 STATION_INFO_RX_PACKETS |
345 STATION_INFO_TX_PACKETS |
314 STATION_INFO_TX_BITRATE; 346 STATION_INFO_TX_BITRATE;
315 347
316 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); 348 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
317 sinfo->rx_bytes = sta->rx_bytes; 349 sinfo->rx_bytes = sta->rx_bytes;
318 sinfo->tx_bytes = sta->tx_bytes; 350 sinfo->tx_bytes = sta->tx_bytes;
351 sinfo->rx_packets = sta->rx_packets;
352 sinfo->tx_packets = sta->tx_packets;
319 353
320 if (sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { 354 if (sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) {
321 sinfo->filled |= STATION_INFO_SIGNAL; 355 sinfo->filled |= STATION_INFO_SIGNAL;
@@ -417,7 +451,8 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
417 * This is a kludge. beacon interval should really be part 451 * This is a kludge. beacon interval should really be part
418 * of the beacon information. 452 * of the beacon information.
419 */ 453 */
420 if (params->interval) { 454 if (params->interval && (sdata->local->hw.conf.beacon_int !=
455 params->interval)) {
421 sdata->local->hw.conf.beacon_int = params->interval; 456 sdata->local->hw.conf.beacon_int = params->interval;
422 err = ieee80211_hw_config(sdata->local, 457 err = ieee80211_hw_config(sdata->local,
423 IEEE80211_CONF_CHANGE_BEACON_INTERVAL); 458 IEEE80211_CONF_CHANGE_BEACON_INTERVAL);
@@ -493,7 +528,8 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata,
493 528
494 kfree(old); 529 kfree(old);
495 530
496 return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); 531 return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON |
532 IEEE80211_IFCC_BEACON_ENABLED);
497} 533}
498 534
499static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, 535static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
@@ -504,9 +540,6 @@ static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev,
504 540
505 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 541 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
506 542
507 if (sdata->vif.type != NL80211_IFTYPE_AP)
508 return -EINVAL;
509
510 old = sdata->u.ap.beacon; 543 old = sdata->u.ap.beacon;
511 544
512 if (old) 545 if (old)
@@ -523,9 +556,6 @@ static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev,
523 556
524 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 557 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
525 558
526 if (sdata->vif.type != NL80211_IFTYPE_AP)
527 return -EINVAL;
528
529 old = sdata->u.ap.beacon; 559 old = sdata->u.ap.beacon;
530 560
531 if (!old) 561 if (!old)
@@ -541,9 +571,6 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
541 571
542 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 572 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
543 573
544 if (sdata->vif.type != NL80211_IFTYPE_AP)
545 return -EINVAL;
546
547 old = sdata->u.ap.beacon; 574 old = sdata->u.ap.beacon;
548 575
549 if (!old) 576 if (!old)
@@ -553,7 +580,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev)
553 synchronize_rcu(); 580 synchronize_rcu();
554 kfree(old); 581 kfree(old);
555 582
556 return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); 583 return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON_ENABLED);
557} 584}
558 585
559/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */ 586/* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */
@@ -630,6 +657,10 @@ static void sta_apply_parameters(struct ieee80211_local *local,
630 sta->flags &= ~WLAN_STA_WME; 657 sta->flags &= ~WLAN_STA_WME;
631 if (params->station_flags & STATION_FLAG_WME) 658 if (params->station_flags & STATION_FLAG_WME)
632 sta->flags |= WLAN_STA_WME; 659 sta->flags |= WLAN_STA_WME;
660
661 sta->flags &= ~WLAN_STA_MFP;
662 if (params->station_flags & STATION_FLAG_MFP)
663 sta->flags |= WLAN_STA_MFP;
633 spin_unlock_bh(&sta->lock); 664 spin_unlock_bh(&sta->lock);
634 } 665 }
635 666
@@ -688,10 +719,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
688 int err; 719 int err;
689 int layer2_update; 720 int layer2_update;
690 721
691 /* Prevent a race with changing the rate control algorithm */
692 if (!netif_running(dev))
693 return -ENETDOWN;
694
695 if (params->vlan) { 722 if (params->vlan) {
696 sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); 723 sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
697 724
@@ -820,14 +847,8 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev,
820 struct sta_info *sta; 847 struct sta_info *sta;
821 int err; 848 int err;
822 849
823 if (!netif_running(dev))
824 return -ENETDOWN;
825
826 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 850 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
827 851
828 if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
829 return -ENOTSUPP;
830
831 rcu_read_lock(); 852 rcu_read_lock();
832 sta = sta_info_get(local, next_hop); 853 sta = sta_info_get(local, next_hop);
833 if (!sta) { 854 if (!sta) {
@@ -873,14 +894,8 @@ static int ieee80211_change_mpath(struct wiphy *wiphy,
873 struct mesh_path *mpath; 894 struct mesh_path *mpath;
874 struct sta_info *sta; 895 struct sta_info *sta;
875 896
876 if (!netif_running(dev))
877 return -ENETDOWN;
878
879 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 897 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
880 898
881 if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
882 return -ENOTSUPP;
883
884 rcu_read_lock(); 899 rcu_read_lock();
885 900
886 sta = sta_info_get(local, next_hop); 901 sta = sta_info_get(local, next_hop);
@@ -949,9 +964,6 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev,
949 964
950 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 965 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
951 966
952 if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
953 return -ENOTSUPP;
954
955 rcu_read_lock(); 967 rcu_read_lock();
956 mpath = mesh_path_lookup(dst, sdata); 968 mpath = mesh_path_lookup(dst, sdata);
957 if (!mpath) { 969 if (!mpath) {
@@ -973,9 +985,6 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev,
973 985
974 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 986 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
975 987
976 if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
977 return -ENOTSUPP;
978
979 rcu_read_lock(); 988 rcu_read_lock();
980 mpath = mesh_path_lookup_by_idx(idx, sdata); 989 mpath = mesh_path_lookup_by_idx(idx, sdata);
981 if (!mpath) { 990 if (!mpath) {
@@ -995,8 +1004,6 @@ static int ieee80211_get_mesh_params(struct wiphy *wiphy,
995 struct ieee80211_sub_if_data *sdata; 1004 struct ieee80211_sub_if_data *sdata;
996 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1005 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
997 1006
998 if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
999 return -ENOTSUPP;
1000 memcpy(conf, &(sdata->u.mesh.mshcfg), sizeof(struct mesh_config)); 1007 memcpy(conf, &(sdata->u.mesh.mshcfg), sizeof(struct mesh_config));
1001 return 0; 1008 return 0;
1002} 1009}
@@ -1014,9 +1021,6 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy,
1014 struct ieee80211_sub_if_data *sdata; 1021 struct ieee80211_sub_if_data *sdata;
1015 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1022 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1016 1023
1017 if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
1018 return -ENOTSUPP;
1019
1020 /* Set the config options which we are interested in setting */ 1024 /* Set the config options which we are interested in setting */
1021 conf = &(sdata->u.mesh.mshcfg); 1025 conf = &(sdata->u.mesh.mshcfg);
1022 if (_chg_mesh_attr(NL80211_MESHCONF_RETRY_TIMEOUT, mask)) 1026 if (_chg_mesh_attr(NL80211_MESHCONF_RETRY_TIMEOUT, mask))
@@ -1064,9 +1068,6 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
1064 1068
1065 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1069 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1066 1070
1067 if (sdata->vif.type != NL80211_IFTYPE_AP)
1068 return -EINVAL;
1069
1070 if (params->use_cts_prot >= 0) { 1071 if (params->use_cts_prot >= 0) {
1071 sdata->vif.bss_conf.use_cts_prot = params->use_cts_prot; 1072 sdata->vif.bss_conf.use_cts_prot = params->use_cts_prot;
1072 changed |= BSS_CHANGED_ERP_CTS_PROT; 1073 changed |= BSS_CHANGED_ERP_CTS_PROT;
@@ -1141,6 +1142,150 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
1141 return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); 1142 return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
1142} 1143}
1143 1144
1145#ifdef CONFIG_PM
1146static int ieee80211_suspend(struct wiphy *wiphy)
1147{
1148 return __ieee80211_suspend(wiphy_priv(wiphy));
1149}
1150
1151static int ieee80211_resume(struct wiphy *wiphy)
1152{
1153 return __ieee80211_resume(wiphy_priv(wiphy));
1154}
1155#else
1156#define ieee80211_suspend NULL
1157#define ieee80211_resume NULL
1158#endif
1159
1160static int ieee80211_scan(struct wiphy *wiphy,
1161 struct net_device *dev,
1162 struct cfg80211_scan_request *req)
1163{
1164 struct ieee80211_sub_if_data *sdata;
1165
1166 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1167
1168 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
1169 sdata->vif.type != NL80211_IFTYPE_ADHOC &&
1170 sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
1171 return -EOPNOTSUPP;
1172
1173 return ieee80211_request_scan(sdata, req);
1174}
1175
1176static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev,
1177 struct cfg80211_auth_request *req)
1178{
1179 struct ieee80211_sub_if_data *sdata;
1180
1181 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1182
1183 switch (req->auth_type) {
1184 case NL80211_AUTHTYPE_OPEN_SYSTEM:
1185 sdata->u.mgd.auth_algs = IEEE80211_AUTH_ALG_OPEN;
1186 break;
1187 case NL80211_AUTHTYPE_SHARED_KEY:
1188 sdata->u.mgd.auth_algs = IEEE80211_AUTH_ALG_SHARED_KEY;
1189 break;
1190 case NL80211_AUTHTYPE_FT:
1191 sdata->u.mgd.auth_algs = IEEE80211_AUTH_ALG_FT;
1192 break;
1193 case NL80211_AUTHTYPE_NETWORK_EAP:
1194 sdata->u.mgd.auth_algs = IEEE80211_AUTH_ALG_LEAP;
1195 break;
1196 default:
1197 return -EOPNOTSUPP;
1198 }
1199
1200 memcpy(sdata->u.mgd.bssid, req->peer_addr, ETH_ALEN);
1201 sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
1202 sdata->u.mgd.flags |= IEEE80211_STA_BSSID_SET;
1203
1204 /* TODO: req->chan */
1205 sdata->u.mgd.flags |= IEEE80211_STA_AUTO_CHANNEL_SEL;
1206
1207 if (req->ssid) {
1208 sdata->u.mgd.flags |= IEEE80211_STA_SSID_SET;
1209 memcpy(sdata->u.mgd.ssid, req->ssid, req->ssid_len);
1210 sdata->u.mgd.ssid_len = req->ssid_len;
1211 sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_SSID_SEL;
1212 }
1213
1214 kfree(sdata->u.mgd.sme_auth_ie);
1215 sdata->u.mgd.sme_auth_ie = NULL;
1216 sdata->u.mgd.sme_auth_ie_len = 0;
1217 if (req->ie) {
1218 sdata->u.mgd.sme_auth_ie = kmalloc(req->ie_len, GFP_KERNEL);
1219 if (sdata->u.mgd.sme_auth_ie == NULL)
1220 return -ENOMEM;
1221 memcpy(sdata->u.mgd.sme_auth_ie, req->ie, req->ie_len);
1222 sdata->u.mgd.sme_auth_ie_len = req->ie_len;
1223 }
1224
1225 sdata->u.mgd.flags |= IEEE80211_STA_EXT_SME;
1226 sdata->u.mgd.state = IEEE80211_STA_MLME_DIRECT_PROBE;
1227 ieee80211_sta_req_auth(sdata);
1228 return 0;
1229}
1230
1231static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
1232 struct cfg80211_assoc_request *req)
1233{
1234 struct ieee80211_sub_if_data *sdata;
1235 int ret;
1236
1237 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1238
1239 if (memcmp(sdata->u.mgd.bssid, req->peer_addr, ETH_ALEN) != 0 ||
1240 !(sdata->u.mgd.flags & IEEE80211_STA_AUTHENTICATED))
1241 return -ENOLINK; /* not authenticated */
1242
1243 sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
1244 sdata->u.mgd.flags |= IEEE80211_STA_BSSID_SET;
1245
1246 /* TODO: req->chan */
1247 sdata->u.mgd.flags |= IEEE80211_STA_AUTO_CHANNEL_SEL;
1248
1249 if (req->ssid) {
1250 sdata->u.mgd.flags |= IEEE80211_STA_SSID_SET;
1251 memcpy(sdata->u.mgd.ssid, req->ssid, req->ssid_len);
1252 sdata->u.mgd.ssid_len = req->ssid_len;
1253 sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_SSID_SEL;
1254 } else
1255 sdata->u.mgd.flags |= IEEE80211_STA_AUTO_SSID_SEL;
1256
1257 ret = ieee80211_sta_set_extra_ie(sdata, req->ie, req->ie_len);
1258 if (ret)
1259 return ret;
1260
1261 sdata->u.mgd.flags |= IEEE80211_STA_EXT_SME;
1262 sdata->u.mgd.state = IEEE80211_STA_MLME_ASSOCIATE;
1263 ieee80211_sta_req_auth(sdata);
1264 return 0;
1265}
1266
1267static int ieee80211_deauth(struct wiphy *wiphy, struct net_device *dev,
1268 struct cfg80211_deauth_request *req)
1269{
1270 struct ieee80211_sub_if_data *sdata;
1271
1272 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1273
1274 /* TODO: req->ie */
1275 return ieee80211_sta_deauthenticate(sdata, req->reason_code);
1276}
1277
1278static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev,
1279 struct cfg80211_disassoc_request *req)
1280{
1281 struct ieee80211_sub_if_data *sdata;
1282
1283 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1284
1285 /* TODO: req->ie */
1286 return ieee80211_sta_disassociate(sdata, req->reason_code);
1287}
1288
1144struct cfg80211_ops mac80211_config_ops = { 1289struct cfg80211_ops mac80211_config_ops = {
1145 .add_virtual_intf = ieee80211_add_iface, 1290 .add_virtual_intf = ieee80211_add_iface,
1146 .del_virtual_intf = ieee80211_del_iface, 1291 .del_virtual_intf = ieee80211_del_iface,
@@ -1149,6 +1294,7 @@ struct cfg80211_ops mac80211_config_ops = {
1149 .del_key = ieee80211_del_key, 1294 .del_key = ieee80211_del_key,
1150 .get_key = ieee80211_get_key, 1295 .get_key = ieee80211_get_key,
1151 .set_default_key = ieee80211_config_default_key, 1296 .set_default_key = ieee80211_config_default_key,
1297 .set_default_mgmt_key = ieee80211_config_default_mgmt_key,
1152 .add_beacon = ieee80211_add_beacon, 1298 .add_beacon = ieee80211_add_beacon,
1153 .set_beacon = ieee80211_set_beacon, 1299 .set_beacon = ieee80211_set_beacon,
1154 .del_beacon = ieee80211_del_beacon, 1300 .del_beacon = ieee80211_del_beacon,
@@ -1169,4 +1315,11 @@ struct cfg80211_ops mac80211_config_ops = {
1169 .change_bss = ieee80211_change_bss, 1315 .change_bss = ieee80211_change_bss,
1170 .set_txq_params = ieee80211_set_txq_params, 1316 .set_txq_params = ieee80211_set_txq_params,
1171 .set_channel = ieee80211_set_channel, 1317 .set_channel = ieee80211_set_channel,
1318 .suspend = ieee80211_suspend,
1319 .resume = ieee80211_resume,
1320 .scan = ieee80211_scan,
1321 .auth = ieee80211_auth,
1322 .assoc = ieee80211_assoc,
1323 .deauth = ieee80211_deauth,
1324 .disassoc = ieee80211_disassoc,
1172}; 1325};
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 2697a2fe608f..210b9b6fecd2 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -40,6 +40,10 @@ static const struct file_operations name## _ops = { \
40 local->debugfs.name = debugfs_create_file(#name, 0400, phyd, \ 40 local->debugfs.name = debugfs_create_file(#name, 0400, phyd, \
41 local, &name## _ops); 41 local, &name## _ops);
42 42
43#define DEBUGFS_ADD_MODE(name, mode) \
44 local->debugfs.name = debugfs_create_file(#name, mode, phyd, \
45 local, &name## _ops);
46
43#define DEBUGFS_DEL(name) \ 47#define DEBUGFS_DEL(name) \
44 debugfs_remove(local->debugfs.name); \ 48 debugfs_remove(local->debugfs.name); \
45 local->debugfs.name = NULL; 49 local->debugfs.name = NULL;
@@ -57,11 +61,80 @@ DEBUGFS_READONLY_FILE(long_retry_limit, 20, "%d",
57 local->hw.conf.long_frame_max_tx_count); 61 local->hw.conf.long_frame_max_tx_count);
58DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d", 62DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d",
59 local->total_ps_buffered); 63 local->total_ps_buffered);
60DEBUGFS_READONLY_FILE(wep_iv, 20, "%#06x", 64DEBUGFS_READONLY_FILE(wep_iv, 20, "%#08x",
61 local->wep_iv & 0xffffff); 65 local->wep_iv & 0xffffff);
62DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", 66DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s",
63 local->rate_ctrl ? local->rate_ctrl->ops->name : "<unset>"); 67 local->rate_ctrl ? local->rate_ctrl->ops->name : "<unset>");
64 68
69static ssize_t tsf_read(struct file *file, char __user *user_buf,
70 size_t count, loff_t *ppos)
71{
72 struct ieee80211_local *local = file->private_data;
73 u64 tsf = 0;
74 char buf[100];
75
76 if (local->ops->get_tsf)
77 tsf = local->ops->get_tsf(local_to_hw(local));
78
79 snprintf(buf, sizeof(buf), "0x%016llx\n", (unsigned long long) tsf);
80
81 return simple_read_from_buffer(user_buf, count, ppos, buf, 19);
82}
83
84static ssize_t tsf_write(struct file *file,
85 const char __user *user_buf,
86 size_t count, loff_t *ppos)
87{
88 struct ieee80211_local *local = file->private_data;
89 unsigned long long tsf;
90 char buf[100];
91 size_t len;
92
93 len = min(count, sizeof(buf) - 1);
94 if (copy_from_user(buf, user_buf, len))
95 return -EFAULT;
96 buf[len] = '\0';
97
98 if (strncmp(buf, "reset", 5) == 0) {
99 if (local->ops->reset_tsf) {
100 local->ops->reset_tsf(local_to_hw(local));
101 printk(KERN_INFO "%s: debugfs reset TSF\n", wiphy_name(local->hw.wiphy));
102 }
103 } else {
104 tsf = simple_strtoul(buf, NULL, 0);
105 if (local->ops->set_tsf) {
106 local->ops->set_tsf(local_to_hw(local), tsf);
107 printk(KERN_INFO "%s: debugfs set TSF to %#018llx\n", wiphy_name(local->hw.wiphy), tsf);
108 }
109 }
110
111 return count;
112}
113
114static const struct file_operations tsf_ops = {
115 .read = tsf_read,
116 .write = tsf_write,
117 .open = mac80211_open_file_generic
118};
119
120static ssize_t reset_write(struct file *file, const char __user *user_buf,
121 size_t count, loff_t *ppos)
122{
123 struct ieee80211_local *local = file->private_data;
124
125 rtnl_lock();
126 __ieee80211_suspend(&local->hw);
127 __ieee80211_resume(&local->hw);
128 rtnl_unlock();
129
130 return count;
131}
132
133static const struct file_operations reset_ops = {
134 .write = reset_write,
135 .open = mac80211_open_file_generic,
136};
137
65/* statistics stuff */ 138/* statistics stuff */
66 139
67#define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \ 140#define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \
@@ -136,8 +209,6 @@ DEBUGFS_STATS_FILE(multicast_received_frame_count, 20, "%u",
136 local->dot11MulticastReceivedFrameCount); 209 local->dot11MulticastReceivedFrameCount);
137DEBUGFS_STATS_FILE(transmitted_frame_count, 20, "%u", 210DEBUGFS_STATS_FILE(transmitted_frame_count, 20, "%u",
138 local->dot11TransmittedFrameCount); 211 local->dot11TransmittedFrameCount);
139DEBUGFS_STATS_FILE(wep_undecryptable_count, 20, "%u",
140 local->dot11WEPUndecryptableCount);
141#ifdef CONFIG_MAC80211_DEBUG_COUNTERS 212#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
142DEBUGFS_STATS_FILE(tx_handlers_drop, 20, "%u", 213DEBUGFS_STATS_FILE(tx_handlers_drop, 20, "%u",
143 local->tx_handlers_drop); 214 local->tx_handlers_drop);
@@ -204,6 +275,8 @@ void debugfs_hw_add(struct ieee80211_local *local)
204 DEBUGFS_ADD(long_retry_limit); 275 DEBUGFS_ADD(long_retry_limit);
205 DEBUGFS_ADD(total_ps_buffered); 276 DEBUGFS_ADD(total_ps_buffered);
206 DEBUGFS_ADD(wep_iv); 277 DEBUGFS_ADD(wep_iv);
278 DEBUGFS_ADD(tsf);
279 DEBUGFS_ADD_MODE(reset, 0200);
207 280
208 statsd = debugfs_create_dir("statistics", phyd); 281 statsd = debugfs_create_dir("statistics", phyd);
209 local->debugfs.statistics = statsd; 282 local->debugfs.statistics = statsd;
@@ -221,7 +294,6 @@ void debugfs_hw_add(struct ieee80211_local *local)
221 DEBUGFS_STATS_ADD(received_fragment_count); 294 DEBUGFS_STATS_ADD(received_fragment_count);
222 DEBUGFS_STATS_ADD(multicast_received_frame_count); 295 DEBUGFS_STATS_ADD(multicast_received_frame_count);
223 DEBUGFS_STATS_ADD(transmitted_frame_count); 296 DEBUGFS_STATS_ADD(transmitted_frame_count);
224 DEBUGFS_STATS_ADD(wep_undecryptable_count);
225#ifdef CONFIG_MAC80211_DEBUG_COUNTERS 297#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
226 DEBUGFS_STATS_ADD(tx_handlers_drop); 298 DEBUGFS_STATS_ADD(tx_handlers_drop);
227 DEBUGFS_STATS_ADD(tx_handlers_queued); 299 DEBUGFS_STATS_ADD(tx_handlers_queued);
@@ -258,6 +330,8 @@ void debugfs_hw_del(struct ieee80211_local *local)
258 DEBUGFS_DEL(long_retry_limit); 330 DEBUGFS_DEL(long_retry_limit);
259 DEBUGFS_DEL(total_ps_buffered); 331 DEBUGFS_DEL(total_ps_buffered);
260 DEBUGFS_DEL(wep_iv); 332 DEBUGFS_DEL(wep_iv);
333 DEBUGFS_DEL(tsf);
334 DEBUGFS_DEL(reset);
261 335
262 DEBUGFS_STATS_DEL(transmitted_fragment_count); 336 DEBUGFS_STATS_DEL(transmitted_fragment_count);
263 DEBUGFS_STATS_DEL(multicast_transmitted_frame_count); 337 DEBUGFS_STATS_DEL(multicast_transmitted_frame_count);
@@ -268,7 +342,6 @@ void debugfs_hw_del(struct ieee80211_local *local)
268 DEBUGFS_STATS_DEL(received_fragment_count); 342 DEBUGFS_STATS_DEL(received_fragment_count);
269 DEBUGFS_STATS_DEL(multicast_received_frame_count); 343 DEBUGFS_STATS_DEL(multicast_received_frame_count);
270 DEBUGFS_STATS_DEL(transmitted_frame_count); 344 DEBUGFS_STATS_DEL(transmitted_frame_count);
271 DEBUGFS_STATS_DEL(wep_undecryptable_count);
272 DEBUGFS_STATS_DEL(num_scans); 345 DEBUGFS_STATS_DEL(num_scans);
273#ifdef CONFIG_MAC80211_DEBUG_COUNTERS 346#ifdef CONFIG_MAC80211_DEBUG_COUNTERS
274 DEBUGFS_STATS_DEL(tx_handlers_drop); 347 DEBUGFS_STATS_DEL(tx_handlers_drop);
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 6424ac565ae0..99c752588b30 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -76,6 +76,9 @@ static ssize_t key_algorithm_read(struct file *file,
76 case ALG_CCMP: 76 case ALG_CCMP:
77 alg = "CCMP\n"; 77 alg = "CCMP\n";
78 break; 78 break;
79 case ALG_AES_CMAC:
80 alg = "AES-128-CMAC\n";
81 break;
79 default: 82 default:
80 return 0; 83 return 0;
81 } 84 }
@@ -105,6 +108,12 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
105 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", 108 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
106 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); 109 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]);
107 break; 110 break;
111 case ALG_AES_CMAC:
112 tpn = key->u.aes_cmac.tx_pn;
113 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
114 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4],
115 tpn[5]);
116 break;
108 default: 117 default:
109 return 0; 118 return 0;
110 } 119 }
@@ -142,6 +151,14 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
142 } 151 }
143 len = p - buf; 152 len = p - buf;
144 break; 153 break;
154 case ALG_AES_CMAC:
155 rpn = key->u.aes_cmac.rx_pn;
156 p += scnprintf(p, sizeof(buf)+buf-p,
157 "%02x%02x%02x%02x%02x%02x\n",
158 rpn[0], rpn[1], rpn[2],
159 rpn[3], rpn[4], rpn[5]);
160 len = p - buf;
161 break;
145 default: 162 default:
146 return 0; 163 return 0;
147 } 164 }
@@ -156,13 +173,40 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
156 char buf[20]; 173 char buf[20];
157 int len; 174 int len;
158 175
159 if (key->conf.alg != ALG_CCMP) 176 switch (key->conf.alg) {
177 case ALG_CCMP:
178 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
179 break;
180 case ALG_AES_CMAC:
181 len = scnprintf(buf, sizeof(buf), "%u\n",
182 key->u.aes_cmac.replays);
183 break;
184 default:
160 return 0; 185 return 0;
161 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); 186 }
162 return simple_read_from_buffer(userbuf, count, ppos, buf, len); 187 return simple_read_from_buffer(userbuf, count, ppos, buf, len);
163} 188}
164KEY_OPS(replays); 189KEY_OPS(replays);
165 190
191static ssize_t key_icverrors_read(struct file *file, char __user *userbuf,
192 size_t count, loff_t *ppos)
193{
194 struct ieee80211_key *key = file->private_data;
195 char buf[20];
196 int len;
197
198 switch (key->conf.alg) {
199 case ALG_AES_CMAC:
200 len = scnprintf(buf, sizeof(buf), "%u\n",
201 key->u.aes_cmac.icverrors);
202 break;
203 default:
204 return 0;
205 }
206 return simple_read_from_buffer(userbuf, count, ppos, buf, len);
207}
208KEY_OPS(icverrors);
209
166static ssize_t key_key_read(struct file *file, char __user *userbuf, 210static ssize_t key_key_read(struct file *file, char __user *userbuf,
167 size_t count, loff_t *ppos) 211 size_t count, loff_t *ppos)
168{ 212{
@@ -222,6 +266,7 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key)
222 DEBUGFS_ADD(tx_spec); 266 DEBUGFS_ADD(tx_spec);
223 DEBUGFS_ADD(rx_spec); 267 DEBUGFS_ADD(rx_spec);
224 DEBUGFS_ADD(replays); 268 DEBUGFS_ADD(replays);
269 DEBUGFS_ADD(icverrors);
225 DEBUGFS_ADD(key); 270 DEBUGFS_ADD(key);
226 DEBUGFS_ADD(ifindex); 271 DEBUGFS_ADD(ifindex);
227}; 272};
@@ -243,6 +288,7 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key)
243 DEBUGFS_DEL(tx_spec); 288 DEBUGFS_DEL(tx_spec);
244 DEBUGFS_DEL(rx_spec); 289 DEBUGFS_DEL(rx_spec);
245 DEBUGFS_DEL(replays); 290 DEBUGFS_DEL(replays);
291 DEBUGFS_DEL(icverrors);
246 DEBUGFS_DEL(key); 292 DEBUGFS_DEL(key);
247 DEBUGFS_DEL(ifindex); 293 DEBUGFS_DEL(ifindex);
248 294
@@ -280,6 +326,35 @@ void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata)
280 sdata->common_debugfs.default_key = NULL; 326 sdata->common_debugfs.default_key = NULL;
281} 327}
282 328
329void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata)
330{
331 char buf[50];
332 struct ieee80211_key *key;
333
334 if (!sdata->debugfsdir)
335 return;
336
337 /* this is running under the key lock */
338
339 key = sdata->default_mgmt_key;
340 if (key) {
341 sprintf(buf, "../keys/%d", key->debugfs.cnt);
342 sdata->common_debugfs.default_mgmt_key =
343 debugfs_create_symlink("default_mgmt_key",
344 sdata->debugfsdir, buf);
345 } else
346 ieee80211_debugfs_key_remove_mgmt_default(sdata);
347}
348
349void ieee80211_debugfs_key_remove_mgmt_default(struct ieee80211_sub_if_data *sdata)
350{
351 if (!sdata)
352 return;
353
354 debugfs_remove(sdata->common_debugfs.default_mgmt_key);
355 sdata->common_debugfs.default_mgmt_key = NULL;
356}
357
283void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, 358void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key,
284 struct sta_info *sta) 359 struct sta_info *sta)
285{ 360{
diff --git a/net/mac80211/debugfs_key.h b/net/mac80211/debugfs_key.h
index b1a3754ee240..54717b4e1371 100644
--- a/net/mac80211/debugfs_key.h
+++ b/net/mac80211/debugfs_key.h
@@ -6,6 +6,10 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key);
6void ieee80211_debugfs_key_remove(struct ieee80211_key *key); 6void ieee80211_debugfs_key_remove(struct ieee80211_key *key);
7void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata); 7void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata);
8void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata); 8void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata);
9void ieee80211_debugfs_key_add_mgmt_default(
10 struct ieee80211_sub_if_data *sdata);
11void ieee80211_debugfs_key_remove_mgmt_default(
12 struct ieee80211_sub_if_data *sdata);
9void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, 13void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key,
10 struct sta_info *sta); 14 struct sta_info *sta);
11#else 15#else
@@ -19,6 +23,12 @@ static inline void ieee80211_debugfs_key_add_default(
19static inline void ieee80211_debugfs_key_remove_default( 23static inline void ieee80211_debugfs_key_remove_default(
20 struct ieee80211_sub_if_data *sdata) 24 struct ieee80211_sub_if_data *sdata)
21{} 25{}
26static inline void ieee80211_debugfs_key_add_mgmt_default(
27 struct ieee80211_sub_if_data *sdata)
28{}
29static inline void ieee80211_debugfs_key_remove_mgmt_default(
30 struct ieee80211_sub_if_data *sdata)
31{}
22static inline void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, 32static inline void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key,
23 struct sta_info *sta) 33 struct sta_info *sta)
24{} 34{}
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index c54219301724..e3420329f4e6 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -94,31 +94,31 @@ IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC);
94IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC); 94IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC);
95IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC); 95IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC);
96 96
97/* STA/IBSS attributes */ 97/* STA attributes */
98IEEE80211_IF_FILE(state, u.sta.state, DEC); 98IEEE80211_IF_FILE(state, u.mgd.state, DEC);
99IEEE80211_IF_FILE(bssid, u.sta.bssid, MAC); 99IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
100IEEE80211_IF_FILE(prev_bssid, u.sta.prev_bssid, MAC); 100IEEE80211_IF_FILE(prev_bssid, u.mgd.prev_bssid, MAC);
101IEEE80211_IF_FILE(ssid_len, u.sta.ssid_len, SIZE); 101IEEE80211_IF_FILE(ssid_len, u.mgd.ssid_len, SIZE);
102IEEE80211_IF_FILE(aid, u.sta.aid, DEC); 102IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
103IEEE80211_IF_FILE(ap_capab, u.sta.ap_capab, HEX); 103IEEE80211_IF_FILE(ap_capab, u.mgd.ap_capab, HEX);
104IEEE80211_IF_FILE(capab, u.sta.capab, HEX); 104IEEE80211_IF_FILE(capab, u.mgd.capab, HEX);
105IEEE80211_IF_FILE(extra_ie_len, u.sta.extra_ie_len, SIZE); 105IEEE80211_IF_FILE(extra_ie_len, u.mgd.extra_ie_len, SIZE);
106IEEE80211_IF_FILE(auth_tries, u.sta.auth_tries, DEC); 106IEEE80211_IF_FILE(auth_tries, u.mgd.auth_tries, DEC);
107IEEE80211_IF_FILE(assoc_tries, u.sta.assoc_tries, DEC); 107IEEE80211_IF_FILE(assoc_tries, u.mgd.assoc_tries, DEC);
108IEEE80211_IF_FILE(auth_algs, u.sta.auth_algs, HEX); 108IEEE80211_IF_FILE(auth_algs, u.mgd.auth_algs, HEX);
109IEEE80211_IF_FILE(auth_alg, u.sta.auth_alg, DEC); 109IEEE80211_IF_FILE(auth_alg, u.mgd.auth_alg, DEC);
110IEEE80211_IF_FILE(auth_transaction, u.sta.auth_transaction, DEC); 110IEEE80211_IF_FILE(auth_transaction, u.mgd.auth_transaction, DEC);
111 111
112static ssize_t ieee80211_if_fmt_flags( 112static ssize_t ieee80211_if_fmt_flags(
113 const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) 113 const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
114{ 114{
115 return scnprintf(buf, buflen, "%s%s%s%s%s%s%s\n", 115 return scnprintf(buf, buflen, "%s%s%s%s%s%s%s\n",
116 sdata->u.sta.flags & IEEE80211_STA_SSID_SET ? "SSID\n" : "", 116 sdata->u.mgd.flags & IEEE80211_STA_SSID_SET ? "SSID\n" : "",
117 sdata->u.sta.flags & IEEE80211_STA_BSSID_SET ? "BSSID\n" : "", 117 sdata->u.mgd.flags & IEEE80211_STA_BSSID_SET ? "BSSID\n" : "",
118 sdata->u.sta.flags & IEEE80211_STA_PREV_BSSID_SET ? "prev BSSID\n" : "", 118 sdata->u.mgd.flags & IEEE80211_STA_PREV_BSSID_SET ? "prev BSSID\n" : "",
119 sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "", 119 sdata->u.mgd.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "",
120 sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "", 120 sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "",
121 sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "", 121 sdata->u.mgd.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "",
122 sdata->vif.bss_conf.use_cts_prot ? "CTS prot\n" : ""); 122 sdata->vif.bss_conf.use_cts_prot ? "CTS prot\n" : "");
123} 123}
124__IEEE80211_IF_FILE(flags); 124__IEEE80211_IF_FILE(flags);
@@ -283,9 +283,11 @@ static void add_files(struct ieee80211_sub_if_data *sdata)
283#endif 283#endif
284 break; 284 break;
285 case NL80211_IFTYPE_STATION: 285 case NL80211_IFTYPE_STATION:
286 case NL80211_IFTYPE_ADHOC:
287 add_sta_files(sdata); 286 add_sta_files(sdata);
288 break; 287 break;
288 case NL80211_IFTYPE_ADHOC:
289 /* XXX */
290 break;
289 case NL80211_IFTYPE_AP: 291 case NL80211_IFTYPE_AP:
290 add_ap_files(sdata); 292 add_ap_files(sdata);
291 break; 293 break;
@@ -418,9 +420,11 @@ static void del_files(struct ieee80211_sub_if_data *sdata)
418#endif 420#endif
419 break; 421 break;
420 case NL80211_IFTYPE_STATION: 422 case NL80211_IFTYPE_STATION:
421 case NL80211_IFTYPE_ADHOC:
422 del_sta_files(sdata); 423 del_sta_files(sdata);
423 break; 424 break;
425 case NL80211_IFTYPE_ADHOC:
426 /* XXX */
427 break;
424 case NL80211_IFTYPE_AP: 428 case NL80211_IFTYPE_AP:
425 del_ap_files(sdata); 429 del_ap_files(sdata);
426 break; 430 break;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index a2fbe0131312..90230c718b5b 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -67,14 +67,15 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
67 char buf[100]; 67 char buf[100];
68 struct sta_info *sta = file->private_data; 68 struct sta_info *sta = file->private_data;
69 u32 staflags = get_sta_flags(sta); 69 u32 staflags = get_sta_flags(sta);
70 int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s", 70 int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s",
71 staflags & WLAN_STA_AUTH ? "AUTH\n" : "", 71 staflags & WLAN_STA_AUTH ? "AUTH\n" : "",
72 staflags & WLAN_STA_ASSOC ? "ASSOC\n" : "", 72 staflags & WLAN_STA_ASSOC ? "ASSOC\n" : "",
73 staflags & WLAN_STA_PS ? "PS\n" : "", 73 staflags & WLAN_STA_PS ? "PS\n" : "",
74 staflags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "", 74 staflags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "",
75 staflags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "", 75 staflags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "",
76 staflags & WLAN_STA_WME ? "WME\n" : "", 76 staflags & WLAN_STA_WME ? "WME\n" : "",
77 staflags & WLAN_STA_WDS ? "WDS\n" : ""); 77 staflags & WLAN_STA_WDS ? "WDS\n" : "",
78 staflags & WLAN_STA_MFP ? "MFP\n" : "");
78 return simple_read_from_buffer(userbuf, count, ppos, buf, res); 79 return simple_read_from_buffer(userbuf, count, ppos, buf, res);
79} 80}
80STA_OPS(flags); 81STA_OPS(flags);
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index c5c0c5271096..4e3c72f20de7 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -17,8 +17,7 @@
17#include <net/wireless.h> 17#include <net/wireless.h>
18#include <net/mac80211.h> 18#include <net/mac80211.h>
19#include "ieee80211_i.h" 19#include "ieee80211_i.h"
20#include "sta_info.h" 20#include "rate.h"
21#include "wme.h"
22 21
23void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, 22void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
24 struct ieee80211_ht_cap *ht_cap_ie, 23 struct ieee80211_ht_cap *ht_cap_ie,
@@ -95,7 +94,9 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
95{ 94{
96 struct ieee80211_local *local = sdata->local; 95 struct ieee80211_local *local = sdata->local;
97 struct ieee80211_supported_band *sband; 96 struct ieee80211_supported_band *sband;
97 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
98 struct ieee80211_bss_ht_conf ht; 98 struct ieee80211_bss_ht_conf ht;
99 struct sta_info *sta;
99 u32 changed = 0; 100 u32 changed = 0;
100 bool enable_ht = true, ht_changed; 101 bool enable_ht = true, ht_changed;
101 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; 102 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
@@ -130,14 +131,25 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
130 } 131 }
131 } 132 }
132 133
133 ht_changed = local->hw.conf.ht.enabled != enable_ht || 134 ht_changed = conf_is_ht(&local->hw.conf) != enable_ht ||
134 channel_type != local->hw.conf.ht.channel_type; 135 channel_type != local->hw.conf.channel_type;
135 136
136 local->oper_channel_type = channel_type; 137 local->oper_channel_type = channel_type;
137 local->hw.conf.ht.enabled = enable_ht;
138 138
139 if (ht_changed) 139 if (ht_changed) {
140 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_HT); 140 /* channel_type change automatically detected */
141 ieee80211_hw_config(local, 0);
142
143 rcu_read_lock();
144
145 sta = sta_info_get(local, ifmgd->bssid);
146 if (sta)
147 rate_control_rate_update(local, sband, sta,
148 IEEE80211_RC_HT_CHANGED);
149
150 rcu_read_unlock();
151
152 }
141 153
142 /* disable HT */ 154 /* disable HT */
143 if (!enable_ht) 155 if (!enable_ht)
@@ -154,108 +166,22 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
154 return changed; 166 return changed;
155} 167}
156 168
157static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, 169void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta)
158 const u8 *da, u16 tid,
159 u8 dialog_token, u16 start_seq_num,
160 u16 agg_size, u16 timeout)
161{ 170{
162 struct ieee80211_local *local = sdata->local; 171 int i;
163 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
164 struct sk_buff *skb;
165 struct ieee80211_mgmt *mgmt;
166 u16 capab;
167
168 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
169
170 if (!skb) {
171 printk(KERN_ERR "%s: failed to allocate buffer "
172 "for addba request frame\n", sdata->dev->name);
173 return;
174 }
175 skb_reserve(skb, local->hw.extra_tx_headroom);
176 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
177 memset(mgmt, 0, 24);
178 memcpy(mgmt->da, da, ETH_ALEN);
179 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
180 if (sdata->vif.type == NL80211_IFTYPE_AP)
181 memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
182 else
183 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
184
185 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
186 IEEE80211_STYPE_ACTION);
187
188 skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req));
189
190 mgmt->u.action.category = WLAN_CATEGORY_BACK;
191 mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ;
192
193 mgmt->u.action.u.addba_req.dialog_token = dialog_token;
194 capab = (u16)(1 << 1); /* bit 1 aggregation policy */
195 capab |= (u16)(tid << 2); /* bit 5:2 TID number */
196 capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */
197
198 mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab);
199
200 mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout);
201 mgmt->u.action.u.addba_req.start_seq_num =
202 cpu_to_le16(start_seq_num << 4);
203
204 ieee80211_tx_skb(sdata, skb, 0);
205}
206
207static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
208 u8 dialog_token, u16 status, u16 policy,
209 u16 buf_size, u16 timeout)
210{
211 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
212 struct ieee80211_local *local = sdata->local;
213 struct sk_buff *skb;
214 struct ieee80211_mgmt *mgmt;
215 u16 capab;
216
217 skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
218 172
219 if (!skb) { 173 for (i = 0; i < STA_TID_NUM; i++) {
220 printk(KERN_DEBUG "%s: failed to allocate buffer " 174 __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR);
221 "for addba resp frame\n", sdata->dev->name); 175 __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
222 return; 176 WLAN_REASON_QSTA_LEAVE_QBSS);
223 } 177 }
224
225 skb_reserve(skb, local->hw.extra_tx_headroom);
226 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
227 memset(mgmt, 0, 24);
228 memcpy(mgmt->da, da, ETH_ALEN);
229 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
230 if (sdata->vif.type == NL80211_IFTYPE_AP)
231 memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
232 else
233 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
234 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
235 IEEE80211_STYPE_ACTION);
236
237 skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp));
238 mgmt->u.action.category = WLAN_CATEGORY_BACK;
239 mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
240 mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
241
242 capab = (u16)(policy << 1); /* bit 1 aggregation policy */
243 capab |= (u16)(tid << 2); /* bit 5:2 TID number */
244 capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */
245
246 mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab);
247 mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout);
248 mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
249
250 ieee80211_tx_skb(sdata, skb, 0);
251} 178}
252 179
253static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, 180void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
254 const u8 *da, u16 tid, 181 const u8 *da, u16 tid,
255 u16 initiator, u16 reason_code) 182 u16 initiator, u16 reason_code)
256{ 183{
257 struct ieee80211_local *local = sdata->local; 184 struct ieee80211_local *local = sdata->local;
258 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
259 struct sk_buff *skb; 185 struct sk_buff *skb;
260 struct ieee80211_mgmt *mgmt; 186 struct ieee80211_mgmt *mgmt;
261 u16 params; 187 u16 params;
@@ -273,10 +199,12 @@ static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
273 memset(mgmt, 0, 24); 199 memset(mgmt, 0, 24);
274 memcpy(mgmt->da, da, ETH_ALEN); 200 memcpy(mgmt->da, da, ETH_ALEN);
275 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); 201 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
276 if (sdata->vif.type == NL80211_IFTYPE_AP) 202 if (sdata->vif.type == NL80211_IFTYPE_AP ||
203 sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
277 memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); 204 memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN);
278 else 205 else if (sdata->vif.type == NL80211_IFTYPE_STATION)
279 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); 206 memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN);
207
280 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | 208 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
281 IEEE80211_STYPE_ACTION); 209 IEEE80211_STYPE_ACTION);
282 210
@@ -290,770 +218,7 @@ static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
290 mgmt->u.action.u.delba.params = cpu_to_le16(params); 218 mgmt->u.action.u.delba.params = cpu_to_le16(params);
291 mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code); 219 mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code);
292 220
293 ieee80211_tx_skb(sdata, skb, 0); 221 ieee80211_tx_skb(sdata, skb, 1);
294}
295
296void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn)
297{
298 struct ieee80211_local *local = sdata->local;
299 struct sk_buff *skb;
300 struct ieee80211_bar *bar;
301 u16 bar_control = 0;
302
303 skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom);
304 if (!skb) {
305 printk(KERN_ERR "%s: failed to allocate buffer for "
306 "bar frame\n", sdata->dev->name);
307 return;
308 }
309 skb_reserve(skb, local->hw.extra_tx_headroom);
310 bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar));
311 memset(bar, 0, sizeof(*bar));
312 bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
313 IEEE80211_STYPE_BACK_REQ);
314 memcpy(bar->ra, ra, ETH_ALEN);
315 memcpy(bar->ta, sdata->dev->dev_addr, ETH_ALEN);
316 bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL;
317 bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA;
318 bar_control |= (u16)(tid << 12);
319 bar->control = cpu_to_le16(bar_control);
320 bar->start_seq_num = cpu_to_le16(ssn);
321
322 ieee80211_tx_skb(sdata, skb, 0);
323}
324
325void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid,
326 u16 initiator, u16 reason)
327{
328 struct ieee80211_local *local = sdata->local;
329 struct ieee80211_hw *hw = &local->hw;
330 struct sta_info *sta;
331 int ret, i;
332
333 rcu_read_lock();
334
335 sta = sta_info_get(local, ra);
336 if (!sta) {
337 rcu_read_unlock();
338 return;
339 }
340
341 /* check if TID is in operational state */
342 spin_lock_bh(&sta->lock);
343 if (sta->ampdu_mlme.tid_state_rx[tid]
344 != HT_AGG_STATE_OPERATIONAL) {
345 spin_unlock_bh(&sta->lock);
346 rcu_read_unlock();
347 return;
348 }
349 sta->ampdu_mlme.tid_state_rx[tid] =
350 HT_AGG_STATE_REQ_STOP_BA_MSK |
351 (initiator << HT_AGG_STATE_INITIATOR_SHIFT);
352 spin_unlock_bh(&sta->lock);
353
354 /* stop HW Rx aggregation. ampdu_action existence
355 * already verified in session init so we add the BUG_ON */
356 BUG_ON(!local->ops->ampdu_action);
357
358#ifdef CONFIG_MAC80211_HT_DEBUG
359 printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n",
360 ra, tid);
361#endif /* CONFIG_MAC80211_HT_DEBUG */
362
363 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP,
364 &sta->sta, tid, NULL);
365 if (ret)
366 printk(KERN_DEBUG "HW problem - can not stop rx "
367 "aggregation for tid %d\n", tid);
368
369 /* shutdown timer has not expired */
370 if (initiator != WLAN_BACK_TIMER)
371 del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
372
373 /* check if this is a self generated aggregation halt */
374 if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER)
375 ieee80211_send_delba(sdata, ra, tid, 0, reason);
376
377 /* free the reordering buffer */
378 for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) {
379 if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) {
380 /* release the reordered frames */
381 dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]);
382 sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--;
383 sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL;
384 }
385 }
386 /* free resources */
387 kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf);
388 kfree(sta->ampdu_mlme.tid_rx[tid]);
389 sta->ampdu_mlme.tid_rx[tid] = NULL;
390 sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE;
391
392 rcu_read_unlock();
393}
394
395
396/*
397 * After sending add Block Ack request we activated a timer until
398 * add Block Ack response will arrive from the recipient.
399 * If this timer expires sta_addba_resp_timer_expired will be executed.
400 */
401static void sta_addba_resp_timer_expired(unsigned long data)
402{
403 /* not an elegant detour, but there is no choice as the timer passes
404 * only one argument, and both sta_info and TID are needed, so init
405 * flow in sta_info_create gives the TID as data, while the timer_to_id
406 * array gives the sta through container_of */
407 u16 tid = *(u8 *)data;
408 struct sta_info *temp_sta = container_of((void *)data,
409 struct sta_info, timer_to_tid[tid]);
410
411 struct ieee80211_local *local = temp_sta->local;
412 struct ieee80211_hw *hw = &local->hw;
413 struct sta_info *sta;
414 u8 *state;
415
416 rcu_read_lock();
417
418 sta = sta_info_get(local, temp_sta->sta.addr);
419 if (!sta) {
420 rcu_read_unlock();
421 return;
422 }
423
424 state = &sta->ampdu_mlme.tid_state_tx[tid];
425 /* check if the TID waits for addBA response */
426 spin_lock_bh(&sta->lock);
427 if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
428 spin_unlock_bh(&sta->lock);
429 *state = HT_AGG_STATE_IDLE;
430#ifdef CONFIG_MAC80211_HT_DEBUG
431 printk(KERN_DEBUG "timer expired on tid %d but we are not "
432 "expecting addBA response there", tid);
433#endif
434 goto timer_expired_exit;
435 }
436
437#ifdef CONFIG_MAC80211_HT_DEBUG
438 printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid);
439#endif
440
441 /* go through the state check in stop_BA_session */
442 *state = HT_AGG_STATE_OPERATIONAL;
443 spin_unlock_bh(&sta->lock);
444 ieee80211_stop_tx_ba_session(hw, temp_sta->sta.addr, tid,
445 WLAN_BACK_INITIATOR);
446
447timer_expired_exit:
448 rcu_read_unlock();
449}
450
451void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr)
452{
453 struct ieee80211_local *local = sdata->local;
454 int i;
455
456 for (i = 0; i < STA_TID_NUM; i++) {
457 ieee80211_stop_tx_ba_session(&local->hw, addr, i,
458 WLAN_BACK_INITIATOR);
459 ieee80211_sta_stop_rx_ba_session(sdata, addr, i,
460 WLAN_BACK_RECIPIENT,
461 WLAN_REASON_QSTA_LEAVE_QBSS);
462 }
463}
464
465int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid)
466{
467 struct ieee80211_local *local = hw_to_local(hw);
468 struct sta_info *sta;
469 struct ieee80211_sub_if_data *sdata;
470 u16 start_seq_num;
471 u8 *state;
472 int ret = 0;
473
474 if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION))
475 return -EINVAL;
476
477#ifdef CONFIG_MAC80211_HT_DEBUG
478 printk(KERN_DEBUG "Open BA session requested for %pM tid %u\n",
479 ra, tid);
480#endif /* CONFIG_MAC80211_HT_DEBUG */
481
482 rcu_read_lock();
483
484 sta = sta_info_get(local, ra);
485 if (!sta) {
486#ifdef CONFIG_MAC80211_HT_DEBUG
487 printk(KERN_DEBUG "Could not find the station\n");
488#endif
489 ret = -ENOENT;
490 goto exit;
491 }
492
493 spin_lock_bh(&sta->lock);
494
495 /* we have tried too many times, receiver does not want A-MPDU */
496 if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) {
497 ret = -EBUSY;
498 goto err_unlock_sta;
499 }
500
501 state = &sta->ampdu_mlme.tid_state_tx[tid];
502 /* check if the TID is not in aggregation flow already */
503 if (*state != HT_AGG_STATE_IDLE) {
504#ifdef CONFIG_MAC80211_HT_DEBUG
505 printk(KERN_DEBUG "BA request denied - session is not "
506 "idle on tid %u\n", tid);
507#endif /* CONFIG_MAC80211_HT_DEBUG */
508 ret = -EAGAIN;
509 goto err_unlock_sta;
510 }
511
512 /* prepare A-MPDU MLME for Tx aggregation */
513 sta->ampdu_mlme.tid_tx[tid] =
514 kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC);
515 if (!sta->ampdu_mlme.tid_tx[tid]) {
516#ifdef CONFIG_MAC80211_HT_DEBUG
517 if (net_ratelimit())
518 printk(KERN_ERR "allocate tx mlme to tid %d failed\n",
519 tid);
520#endif
521 ret = -ENOMEM;
522 goto err_unlock_sta;
523 }
524 /* Tx timer */
525 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function =
526 sta_addba_resp_timer_expired;
527 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data =
528 (unsigned long)&sta->timer_to_tid[tid];
529 init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
530
531 if (hw->ampdu_queues) {
532 /* create a new queue for this aggregation */
533 ret = ieee80211_ht_agg_queue_add(local, sta, tid);
534
535 /* case no queue is available to aggregation
536 * don't switch to aggregation */
537 if (ret) {
538#ifdef CONFIG_MAC80211_HT_DEBUG
539 printk(KERN_DEBUG "BA request denied - "
540 "queue unavailable for tid %d\n", tid);
541#endif /* CONFIG_MAC80211_HT_DEBUG */
542 goto err_unlock_queue;
543 }
544 }
545 sdata = sta->sdata;
546
547 /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the
548 * call back right away, it must see that the flow has begun */
549 *state |= HT_ADDBA_REQUESTED_MSK;
550
551 /* This is slightly racy because the queue isn't stopped */
552 start_seq_num = sta->tid_seq[tid];
553
554 if (local->ops->ampdu_action)
555 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START,
556 &sta->sta, tid, &start_seq_num);
557
558 if (ret) {
559 /* No need to requeue the packets in the agg queue, since we
560 * held the tx lock: no packet could be enqueued to the newly
561 * allocated queue */
562 if (hw->ampdu_queues)
563 ieee80211_ht_agg_queue_remove(local, sta, tid, 0);
564#ifdef CONFIG_MAC80211_HT_DEBUG
565 printk(KERN_DEBUG "BA request denied - HW unavailable for"
566 " tid %d\n", tid);
567#endif /* CONFIG_MAC80211_HT_DEBUG */
568 *state = HT_AGG_STATE_IDLE;
569 goto err_unlock_queue;
570 }
571
572 /* Will put all the packets in the new SW queue */
573 if (hw->ampdu_queues)
574 ieee80211_requeue(local, ieee802_1d_to_ac[tid]);
575 spin_unlock_bh(&sta->lock);
576
577 /* send an addBA request */
578 sta->ampdu_mlme.dialog_token_allocator++;
579 sta->ampdu_mlme.tid_tx[tid]->dialog_token =
580 sta->ampdu_mlme.dialog_token_allocator;
581 sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num;
582
583
584 ieee80211_send_addba_request(sta->sdata, ra, tid,
585 sta->ampdu_mlme.tid_tx[tid]->dialog_token,
586 sta->ampdu_mlme.tid_tx[tid]->ssn,
587 0x40, 5000);
588 /* activate the timer for the recipient's addBA response */
589 sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires =
590 jiffies + ADDBA_RESP_INTERVAL;
591 add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
592#ifdef CONFIG_MAC80211_HT_DEBUG
593 printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid);
594#endif
595 goto exit;
596
597err_unlock_queue:
598 kfree(sta->ampdu_mlme.tid_tx[tid]);
599 sta->ampdu_mlme.tid_tx[tid] = NULL;
600 ret = -EBUSY;
601err_unlock_sta:
602 spin_unlock_bh(&sta->lock);
603exit:
604 rcu_read_unlock();
605 return ret;
606}
607EXPORT_SYMBOL(ieee80211_start_tx_ba_session);
608
609int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw,
610 u8 *ra, u16 tid,
611 enum ieee80211_back_parties initiator)
612{
613 struct ieee80211_local *local = hw_to_local(hw);
614 struct sta_info *sta;
615 u8 *state;
616 int ret = 0;
617
618 if (tid >= STA_TID_NUM)
619 return -EINVAL;
620
621 rcu_read_lock();
622 sta = sta_info_get(local, ra);
623 if (!sta) {
624 rcu_read_unlock();
625 return -ENOENT;
626 }
627
628 /* check if the TID is in aggregation */
629 state = &sta->ampdu_mlme.tid_state_tx[tid];
630 spin_lock_bh(&sta->lock);
631
632 if (*state != HT_AGG_STATE_OPERATIONAL) {
633 ret = -ENOENT;
634 goto stop_BA_exit;
635 }
636
637#ifdef CONFIG_MAC80211_HT_DEBUG
638 printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n",
639 ra, tid);
640#endif /* CONFIG_MAC80211_HT_DEBUG */
641
642 if (hw->ampdu_queues)
643 ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]);
644
645 *state = HT_AGG_STATE_REQ_STOP_BA_MSK |
646 (initiator << HT_AGG_STATE_INITIATOR_SHIFT);
647
648 if (local->ops->ampdu_action)
649 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP,
650 &sta->sta, tid, NULL);
651
652 /* case HW denied going back to legacy */
653 if (ret) {
654 WARN_ON(ret != -EBUSY);
655 *state = HT_AGG_STATE_OPERATIONAL;
656 if (hw->ampdu_queues)
657 ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
658 goto stop_BA_exit;
659 }
660
661stop_BA_exit:
662 spin_unlock_bh(&sta->lock);
663 rcu_read_unlock();
664 return ret;
665}
666EXPORT_SYMBOL(ieee80211_stop_tx_ba_session);
667
668void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid)
669{
670 struct ieee80211_local *local = hw_to_local(hw);
671 struct sta_info *sta;
672 u8 *state;
673
674 if (tid >= STA_TID_NUM) {
675#ifdef CONFIG_MAC80211_HT_DEBUG
676 printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
677 tid, STA_TID_NUM);
678#endif
679 return;
680 }
681
682 rcu_read_lock();
683 sta = sta_info_get(local, ra);
684 if (!sta) {
685 rcu_read_unlock();
686#ifdef CONFIG_MAC80211_HT_DEBUG
687 printk(KERN_DEBUG "Could not find station: %pM\n", ra);
688#endif
689 return;
690 }
691
692 state = &sta->ampdu_mlme.tid_state_tx[tid];
693 spin_lock_bh(&sta->lock);
694
695 if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
696#ifdef CONFIG_MAC80211_HT_DEBUG
697 printk(KERN_DEBUG "addBA was not requested yet, state is %d\n",
698 *state);
699#endif
700 spin_unlock_bh(&sta->lock);
701 rcu_read_unlock();
702 return;
703 }
704
705 WARN_ON_ONCE(*state & HT_ADDBA_DRV_READY_MSK);
706
707 *state |= HT_ADDBA_DRV_READY_MSK;
708
709 if (*state == HT_AGG_STATE_OPERATIONAL) {
710#ifdef CONFIG_MAC80211_HT_DEBUG
711 printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid);
712#endif
713 if (hw->ampdu_queues)
714 ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
715 }
716 spin_unlock_bh(&sta->lock);
717 rcu_read_unlock();
718}
719EXPORT_SYMBOL(ieee80211_start_tx_ba_cb);
720
721void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid)
722{
723 struct ieee80211_local *local = hw_to_local(hw);
724 struct sta_info *sta;
725 u8 *state;
726 int agg_queue;
727
728 if (tid >= STA_TID_NUM) {
729#ifdef CONFIG_MAC80211_HT_DEBUG
730 printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
731 tid, STA_TID_NUM);
732#endif
733 return;
734 }
735
736#ifdef CONFIG_MAC80211_HT_DEBUG
737 printk(KERN_DEBUG "Stopping Tx BA session for %pM tid %d\n",
738 ra, tid);
739#endif /* CONFIG_MAC80211_HT_DEBUG */
740
741 rcu_read_lock();
742 sta = sta_info_get(local, ra);
743 if (!sta) {
744#ifdef CONFIG_MAC80211_HT_DEBUG
745 printk(KERN_DEBUG "Could not find station: %pM\n", ra);
746#endif
747 rcu_read_unlock();
748 return;
749 }
750 state = &sta->ampdu_mlme.tid_state_tx[tid];
751
752 /* NOTE: no need to use sta->lock in this state check, as
753 * ieee80211_stop_tx_ba_session will let only one stop call to
754 * pass through per sta/tid
755 */
756 if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) {
757#ifdef CONFIG_MAC80211_HT_DEBUG
758 printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n");
759#endif
760 rcu_read_unlock();
761 return;
762 }
763
764 if (*state & HT_AGG_STATE_INITIATOR_MSK)
765 ieee80211_send_delba(sta->sdata, ra, tid,
766 WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
767
768 if (hw->ampdu_queues) {
769 agg_queue = sta->tid_to_tx_q[tid];
770 ieee80211_ht_agg_queue_remove(local, sta, tid, 1);
771
772 /* We just requeued the all the frames that were in the
773 * removed queue, and since we might miss a softirq we do
774 * netif_schedule_queue. ieee80211_wake_queue is not used
775 * here as this queue is not necessarily stopped
776 */
777 netif_schedule_queue(netdev_get_tx_queue(local->mdev,
778 agg_queue));
779 }
780 spin_lock_bh(&sta->lock);
781 *state = HT_AGG_STATE_IDLE;
782 sta->ampdu_mlme.addba_req_num[tid] = 0;
783 kfree(sta->ampdu_mlme.tid_tx[tid]);
784 sta->ampdu_mlme.tid_tx[tid] = NULL;
785 spin_unlock_bh(&sta->lock);
786
787 rcu_read_unlock();
788}
789EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb);
790
791void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
792 const u8 *ra, u16 tid)
793{
794 struct ieee80211_local *local = hw_to_local(hw);
795 struct ieee80211_ra_tid *ra_tid;
796 struct sk_buff *skb = dev_alloc_skb(0);
797
798 if (unlikely(!skb)) {
799#ifdef CONFIG_MAC80211_HT_DEBUG
800 if (net_ratelimit())
801 printk(KERN_WARNING "%s: Not enough memory, "
802 "dropping start BA session", skb->dev->name);
803#endif
804 return;
805 }
806 ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
807 memcpy(&ra_tid->ra, ra, ETH_ALEN);
808 ra_tid->tid = tid;
809
810 skb->pkt_type = IEEE80211_ADDBA_MSG;
811 skb_queue_tail(&local->skb_queue, skb);
812 tasklet_schedule(&local->tasklet);
813}
814EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
815
816void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw,
817 const u8 *ra, u16 tid)
818{
819 struct ieee80211_local *local = hw_to_local(hw);
820 struct ieee80211_ra_tid *ra_tid;
821 struct sk_buff *skb = dev_alloc_skb(0);
822
823 if (unlikely(!skb)) {
824#ifdef CONFIG_MAC80211_HT_DEBUG
825 if (net_ratelimit())
826 printk(KERN_WARNING "%s: Not enough memory, "
827 "dropping stop BA session", skb->dev->name);
828#endif
829 return;
830 }
831 ra_tid = (struct ieee80211_ra_tid *) &skb->cb;
832 memcpy(&ra_tid->ra, ra, ETH_ALEN);
833 ra_tid->tid = tid;
834
835 skb->pkt_type = IEEE80211_DELBA_MSG;
836 skb_queue_tail(&local->skb_queue, skb);
837 tasklet_schedule(&local->tasklet);
838}
839EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe);
840
841/*
842 * After accepting the AddBA Request we activated a timer,
843 * resetting it after each frame that arrives from the originator.
844 * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
845 */
846static void sta_rx_agg_session_timer_expired(unsigned long data)
847{
848 /* not an elegant detour, but there is no choice as the timer passes
849 * only one argument, and various sta_info are needed here, so init
850 * flow in sta_info_create gives the TID as data, while the timer_to_id
851 * array gives the sta through container_of */
852 u8 *ptid = (u8 *)data;
853 u8 *timer_to_id = ptid - *ptid;
854 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
855 timer_to_tid[0]);
856
857#ifdef CONFIG_MAC80211_HT_DEBUG
858 printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
859#endif
860 ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr,
861 (u16)*ptid, WLAN_BACK_TIMER,
862 WLAN_REASON_QSTA_TIMEOUT);
863}
864
865void ieee80211_process_addba_request(struct ieee80211_local *local,
866 struct sta_info *sta,
867 struct ieee80211_mgmt *mgmt,
868 size_t len)
869{
870 struct ieee80211_hw *hw = &local->hw;
871 struct ieee80211_conf *conf = &hw->conf;
872 struct tid_ampdu_rx *tid_agg_rx;
873 u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status;
874 u8 dialog_token;
875 int ret = -EOPNOTSUPP;
876
877 /* extract session parameters from addba request frame */
878 dialog_token = mgmt->u.action.u.addba_req.dialog_token;
879 timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout);
880 start_seq_num =
881 le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4;
882
883 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab);
884 ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1;
885 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
886 buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
887
888 status = WLAN_STATUS_REQUEST_DECLINED;
889
890 /* sanity check for incoming parameters:
891 * check if configuration can support the BA policy
892 * and if buffer size does not exceeds max value */
893 /* XXX: check own ht delayed BA capability?? */
894 if (((ba_policy != 1)
895 && (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA)))
896 || (buf_size > IEEE80211_MAX_AMPDU_BUF)) {
897 status = WLAN_STATUS_INVALID_QOS_PARAM;
898#ifdef CONFIG_MAC80211_HT_DEBUG
899 if (net_ratelimit())
900 printk(KERN_DEBUG "AddBA Req with bad params from "
901 "%pM on tid %u. policy %d, buffer size %d\n",
902 mgmt->sa, tid, ba_policy,
903 buf_size);
904#endif /* CONFIG_MAC80211_HT_DEBUG */
905 goto end_no_lock;
906 }
907 /* determine default buffer size */
908 if (buf_size == 0) {
909 struct ieee80211_supported_band *sband;
910
911 sband = local->hw.wiphy->bands[conf->channel->band];
912 buf_size = IEEE80211_MIN_AMPDU_BUF;
913 buf_size = buf_size << sband->ht_cap.ampdu_factor;
914 }
915
916
917 /* examine state machine */
918 spin_lock_bh(&sta->lock);
919
920 if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) {
921#ifdef CONFIG_MAC80211_HT_DEBUG
922 if (net_ratelimit())
923 printk(KERN_DEBUG "unexpected AddBA Req from "
924 "%pM on tid %u\n",
925 mgmt->sa, tid);
926#endif /* CONFIG_MAC80211_HT_DEBUG */
927 goto end;
928 }
929
930 /* prepare A-MPDU MLME for Rx aggregation */
931 sta->ampdu_mlme.tid_rx[tid] =
932 kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC);
933 if (!sta->ampdu_mlme.tid_rx[tid]) {
934#ifdef CONFIG_MAC80211_HT_DEBUG
935 if (net_ratelimit())
936 printk(KERN_ERR "allocate rx mlme to tid %d failed\n",
937 tid);
938#endif
939 goto end;
940 }
941 /* rx timer */
942 sta->ampdu_mlme.tid_rx[tid]->session_timer.function =
943 sta_rx_agg_session_timer_expired;
944 sta->ampdu_mlme.tid_rx[tid]->session_timer.data =
945 (unsigned long)&sta->timer_to_tid[tid];
946 init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
947
948 tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
949
950 /* prepare reordering buffer */
951 tid_agg_rx->reorder_buf =
952 kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC);
953 if (!tid_agg_rx->reorder_buf) {
954#ifdef CONFIG_MAC80211_HT_DEBUG
955 if (net_ratelimit())
956 printk(KERN_ERR "can not allocate reordering buffer "
957 "to tid %d\n", tid);
958#endif
959 kfree(sta->ampdu_mlme.tid_rx[tid]);
960 goto end;
961 }
962 memset(tid_agg_rx->reorder_buf, 0,
963 buf_size * sizeof(struct sk_buff *));
964
965 if (local->ops->ampdu_action)
966 ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START,
967 &sta->sta, tid, &start_seq_num);
968#ifdef CONFIG_MAC80211_HT_DEBUG
969 printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret);
970#endif /* CONFIG_MAC80211_HT_DEBUG */
971
972 if (ret) {
973 kfree(tid_agg_rx->reorder_buf);
974 kfree(tid_agg_rx);
975 sta->ampdu_mlme.tid_rx[tid] = NULL;
976 goto end;
977 }
978
979 /* change state and send addba resp */
980 sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL;
981 tid_agg_rx->dialog_token = dialog_token;
982 tid_agg_rx->ssn = start_seq_num;
983 tid_agg_rx->head_seq_num = start_seq_num;
984 tid_agg_rx->buf_size = buf_size;
985 tid_agg_rx->timeout = timeout;
986 tid_agg_rx->stored_mpdu_num = 0;
987 status = WLAN_STATUS_SUCCESS;
988end:
989 spin_unlock_bh(&sta->lock);
990
991end_no_lock:
992 ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid,
993 dialog_token, status, 1, buf_size, timeout);
994}
995
996void ieee80211_process_addba_resp(struct ieee80211_local *local,
997 struct sta_info *sta,
998 struct ieee80211_mgmt *mgmt,
999 size_t len)
1000{
1001 struct ieee80211_hw *hw = &local->hw;
1002 u16 capab;
1003 u16 tid, start_seq_num;
1004 u8 *state;
1005
1006 capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
1007 tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
1008
1009 state = &sta->ampdu_mlme.tid_state_tx[tid];
1010
1011 spin_lock_bh(&sta->lock);
1012
1013 if (!(*state & HT_ADDBA_REQUESTED_MSK)) {
1014 spin_unlock_bh(&sta->lock);
1015 return;
1016 }
1017
1018 if (mgmt->u.action.u.addba_resp.dialog_token !=
1019 sta->ampdu_mlme.tid_tx[tid]->dialog_token) {
1020 spin_unlock_bh(&sta->lock);
1021#ifdef CONFIG_MAC80211_HT_DEBUG
1022 printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid);
1023#endif /* CONFIG_MAC80211_HT_DEBUG */
1024 return;
1025 }
1026
1027 del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
1028#ifdef CONFIG_MAC80211_HT_DEBUG
1029 printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid);
1030#endif /* CONFIG_MAC80211_HT_DEBUG */
1031 if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
1032 == WLAN_STATUS_SUCCESS) {
1033 *state |= HT_ADDBA_RECEIVED_MSK;
1034 sta->ampdu_mlme.addba_req_num[tid] = 0;
1035
1036 if (*state == HT_AGG_STATE_OPERATIONAL &&
1037 local->hw.ampdu_queues)
1038 ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]);
1039
1040 if (local->ops->ampdu_action) {
1041 (void)local->ops->ampdu_action(hw,
1042 IEEE80211_AMPDU_TX_RESUME,
1043 &sta->sta, tid, &start_seq_num);
1044 }
1045#ifdef CONFIG_MAC80211_HT_DEBUG
1046 printk(KERN_DEBUG "Resuming TX aggregation for tid %d\n", tid);
1047#endif /* CONFIG_MAC80211_HT_DEBUG */
1048 spin_unlock_bh(&sta->lock);
1049 } else {
1050 sta->ampdu_mlme.addba_req_num[tid]++;
1051 /* this will allow the state check in stop_BA_session */
1052 *state = HT_AGG_STATE_OPERATIONAL;
1053 spin_unlock_bh(&sta->lock);
1054 ieee80211_stop_tx_ba_session(hw, sta->sta.addr, tid,
1055 WLAN_BACK_INITIATOR);
1056 }
1057} 222}
1058 223
1059void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, 224void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
new file mode 100644
index 000000000000..3201e1f96365
--- /dev/null
+++ b/net/mac80211/ibss.c
@@ -0,0 +1,908 @@
1/*
2 * IBSS mode implementation
3 * Copyright 2003-2008, Jouni Malinen <j@w1.fi>
4 * Copyright 2004, Instant802 Networks, Inc.
5 * Copyright 2005, Devicescape Software, Inc.
6 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
7 * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
8 * Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 */
14
15#include <linux/delay.h>
16#include <linux/if_ether.h>
17#include <linux/skbuff.h>
18#include <linux/if_arp.h>
19#include <linux/etherdevice.h>
20#include <linux/rtnetlink.h>
21#include <net/mac80211.h>
22#include <asm/unaligned.h>
23
24#include "ieee80211_i.h"
25#include "rate.h"
26
27#define IEEE80211_SCAN_INTERVAL (2 * HZ)
28#define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ)
29#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ)
30
31#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ)
32#define IEEE80211_IBSS_MERGE_DELAY 0x400000
33#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ)
34
35#define IEEE80211_IBSS_MAX_STA_ENTRIES 128
36
37
38static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
39 struct ieee80211_mgmt *mgmt,
40 size_t len)
41{
42 u16 auth_alg, auth_transaction, status_code;
43
44 if (len < 24 + 6)
45 return;
46
47 auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
48 auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
49 status_code = le16_to_cpu(mgmt->u.auth.status_code);
50
51 /*
52 * IEEE 802.11 standard does not require authentication in IBSS
53 * networks and most implementations do not seem to use it.
54 * However, try to reply to authentication attempts if someone
55 * has actually implemented this.
56 */
57 if (auth_alg == WLAN_AUTH_OPEN && auth_transaction == 1)
58 ieee80211_send_auth(sdata, 2, WLAN_AUTH_OPEN, NULL, 0,
59 sdata->u.ibss.bssid, 0);
60}
61
62static int __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
63 const u8 *bssid, const int beacon_int,
64 const int freq,
65 const size_t supp_rates_len,
66 const u8 *supp_rates,
67 const u16 capability, u64 tsf)
68{
69 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
70 struct ieee80211_local *local = sdata->local;
71 int res = 0, rates, i, j;
72 struct sk_buff *skb;
73 struct ieee80211_mgmt *mgmt;
74 u8 *pos;
75 struct ieee80211_supported_band *sband;
76 union iwreq_data wrqu;
77
78 if (local->ops->reset_tsf) {
79 /* Reset own TSF to allow time synchronization work. */
80 local->ops->reset_tsf(local_to_hw(local));
81 }
82
83 if ((ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET) &&
84 memcmp(ifibss->bssid, bssid, ETH_ALEN) == 0)
85 return res;
86
87 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
88 if (!skb) {
89 printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
90 "response\n", sdata->dev->name);
91 return -ENOMEM;
92 }
93
94 if (!(ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET)) {
95 /* Remove possible STA entries from other IBSS networks. */
96 sta_info_flush_delayed(sdata);
97 }
98
99 memcpy(ifibss->bssid, bssid, ETH_ALEN);
100 res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID);
101 if (res)
102 return res;
103
104 local->hw.conf.beacon_int = beacon_int >= 10 ? beacon_int : 10;
105
106 sdata->drop_unencrypted = capability &
107 WLAN_CAPABILITY_PRIVACY ? 1 : 0;
108
109 res = ieee80211_set_freq(sdata, freq);
110
111 if (res)
112 return res;
113
114 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
115
116 /* Build IBSS probe response */
117
118 skb_reserve(skb, local->hw.extra_tx_headroom);
119
120 mgmt = (struct ieee80211_mgmt *)
121 skb_put(skb, 24 + sizeof(mgmt->u.beacon));
122 memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon));
123 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
124 IEEE80211_STYPE_PROBE_RESP);
125 memset(mgmt->da, 0xff, ETH_ALEN);
126 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
127 memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN);
128 mgmt->u.beacon.beacon_int =
129 cpu_to_le16(local->hw.conf.beacon_int);
130 mgmt->u.beacon.timestamp = cpu_to_le64(tsf);
131 mgmt->u.beacon.capab_info = cpu_to_le16(capability);
132
133 pos = skb_put(skb, 2 + ifibss->ssid_len);
134 *pos++ = WLAN_EID_SSID;
135 *pos++ = ifibss->ssid_len;
136 memcpy(pos, ifibss->ssid, ifibss->ssid_len);
137
138 rates = supp_rates_len;
139 if (rates > 8)
140 rates = 8;
141 pos = skb_put(skb, 2 + rates);
142 *pos++ = WLAN_EID_SUPP_RATES;
143 *pos++ = rates;
144 memcpy(pos, supp_rates, rates);
145
146 if (sband->band == IEEE80211_BAND_2GHZ) {
147 pos = skb_put(skb, 2 + 1);
148 *pos++ = WLAN_EID_DS_PARAMS;
149 *pos++ = 1;
150 *pos++ = ieee80211_frequency_to_channel(freq);
151 }
152
153 pos = skb_put(skb, 2 + 2);
154 *pos++ = WLAN_EID_IBSS_PARAMS;
155 *pos++ = 2;
156 /* FIX: set ATIM window based on scan results */
157 *pos++ = 0;
158 *pos++ = 0;
159
160 if (supp_rates_len > 8) {
161 rates = supp_rates_len - 8;
162 pos = skb_put(skb, 2 + rates);
163 *pos++ = WLAN_EID_EXT_SUPP_RATES;
164 *pos++ = rates;
165 memcpy(pos, &supp_rates[8], rates);
166 }
167
168 ifibss->probe_resp = skb;
169
170 ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON |
171 IEEE80211_IFCC_BEACON_ENABLED);
172
173
174 rates = 0;
175 for (i = 0; i < supp_rates_len; i++) {
176 int bitrate = (supp_rates[i] & 0x7f) * 5;
177 for (j = 0; j < sband->n_bitrates; j++)
178 if (sband->bitrates[j].bitrate == bitrate)
179 rates |= BIT(j);
180 }
181
182 ieee80211_sta_def_wmm_params(sdata, supp_rates_len, supp_rates);
183
184 ifibss->flags |= IEEE80211_IBSS_PREV_BSSID_SET;
185 ifibss->state = IEEE80211_IBSS_MLME_JOINED;
186 mod_timer(&ifibss->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
187
188 memset(&wrqu, 0, sizeof(wrqu));
189 memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN);
190 wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL);
191
192 return res;
193}
194
195static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
196 struct ieee80211_bss *bss)
197{
198 return __ieee80211_sta_join_ibss(sdata,
199 bss->cbss.bssid,
200 bss->cbss.beacon_interval,
201 bss->cbss.channel->center_freq,
202 bss->supp_rates_len, bss->supp_rates,
203 bss->cbss.capability,
204 bss->cbss.tsf);
205}
206
207static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
208 struct ieee80211_mgmt *mgmt,
209 size_t len,
210 struct ieee80211_rx_status *rx_status,
211 struct ieee802_11_elems *elems,
212 bool beacon)
213{
214 struct ieee80211_local *local = sdata->local;
215 int freq;
216 struct ieee80211_bss *bss;
217 struct sta_info *sta;
218 struct ieee80211_channel *channel;
219 u64 beacon_timestamp, rx_timestamp;
220 u32 supp_rates = 0;
221 enum ieee80211_band band = rx_status->band;
222
223 if (elems->ds_params && elems->ds_params_len == 1)
224 freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
225 else
226 freq = rx_status->freq;
227
228 channel = ieee80211_get_channel(local->hw.wiphy, freq);
229
230 if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
231 return;
232
233 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates &&
234 memcmp(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) {
235 supp_rates = ieee80211_sta_get_rates(local, elems, band);
236
237 rcu_read_lock();
238
239 sta = sta_info_get(local, mgmt->sa);
240 if (sta) {
241 u32 prev_rates;
242
243 prev_rates = sta->sta.supp_rates[band];
244 /* make sure mandatory rates are always added */
245 sta->sta.supp_rates[band] = supp_rates |
246 ieee80211_mandatory_rates(local, band);
247
248#ifdef CONFIG_MAC80211_IBSS_DEBUG
249 if (sta->sta.supp_rates[band] != prev_rates)
250 printk(KERN_DEBUG "%s: updated supp_rates set "
251 "for %pM based on beacon info (0x%llx | "
252 "0x%llx -> 0x%llx)\n",
253 sdata->dev->name,
254 sta->sta.addr,
255 (unsigned long long) prev_rates,
256 (unsigned long long) supp_rates,
257 (unsigned long long) sta->sta.supp_rates[band]);
258#endif
259 } else
260 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates);
261
262 rcu_read_unlock();
263 }
264
265 bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
266 channel, beacon);
267 if (!bss)
268 return;
269
270 /* was just updated in ieee80211_bss_info_update */
271 beacon_timestamp = bss->cbss.tsf;
272
273 /* check if we need to merge IBSS */
274
275 /* merge only on beacons (???) */
276 if (!beacon)
277 goto put_bss;
278
279 /* we use a fixed BSSID */
280 if (sdata->u.ibss.flags & IEEE80211_IBSS_BSSID_SET)
281 goto put_bss;
282
283 /* not an IBSS */
284 if (!(bss->cbss.capability & WLAN_CAPABILITY_IBSS))
285 goto put_bss;
286
287 /* different channel */
288 if (bss->cbss.channel != local->oper_channel)
289 goto put_bss;
290
291 /* different SSID */
292 if (elems->ssid_len != sdata->u.ibss.ssid_len ||
293 memcmp(elems->ssid, sdata->u.ibss.ssid,
294 sdata->u.ibss.ssid_len))
295 goto put_bss;
296
297 /* same BSSID */
298 if (memcmp(bss->cbss.bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0)
299 goto put_bss;
300
301 if (rx_status->flag & RX_FLAG_TSFT) {
302 /*
303 * For correct IBSS merging we need mactime; since mactime is
304 * defined as the time the first data symbol of the frame hits
305 * the PHY, and the timestamp of the beacon is defined as "the
306 * time that the data symbol containing the first bit of the
307 * timestamp is transmitted to the PHY plus the transmitting
308 * STA's delays through its local PHY from the MAC-PHY
309 * interface to its interface with the WM" (802.11 11.1.2)
310 * - equals the time this bit arrives at the receiver - we have
311 * to take into account the offset between the two.
312 *
313 * E.g. at 1 MBit that means mactime is 192 usec earlier
314 * (=24 bytes * 8 usecs/byte) than the beacon timestamp.
315 */
316 int rate;
317
318 if (rx_status->flag & RX_FLAG_HT)
319 rate = 65; /* TODO: HT rates */
320 else
321 rate = local->hw.wiphy->bands[band]->
322 bitrates[rx_status->rate_idx].bitrate;
323
324 rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate);
325 } else if (local && local->ops && local->ops->get_tsf)
326 /* second best option: get current TSF */
327 rx_timestamp = local->ops->get_tsf(local_to_hw(local));
328 else
329 /* can't merge without knowing the TSF */
330 rx_timestamp = -1LLU;
331
332#ifdef CONFIG_MAC80211_IBSS_DEBUG
333 printk(KERN_DEBUG "RX beacon SA=%pM BSSID="
334 "%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n",
335 mgmt->sa, mgmt->bssid,
336 (unsigned long long)rx_timestamp,
337 (unsigned long long)beacon_timestamp,
338 (unsigned long long)(rx_timestamp - beacon_timestamp),
339 jiffies);
340#endif
341
342 /* give slow hardware some time to do the TSF sync */
343 if (rx_timestamp < IEEE80211_IBSS_MERGE_DELAY)
344 goto put_bss;
345
346 if (beacon_timestamp > rx_timestamp) {
347#ifdef CONFIG_MAC80211_IBSS_DEBUG
348 printk(KERN_DEBUG "%s: beacon TSF higher than "
349 "local TSF - IBSS merge with BSSID %pM\n",
350 sdata->dev->name, mgmt->bssid);
351#endif
352 ieee80211_sta_join_ibss(sdata, bss);
353 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates);
354 }
355
356 put_bss:
357 ieee80211_rx_bss_put(local, bss);
358}
359
360/*
361 * Add a new IBSS station, will also be called by the RX code when,
362 * in IBSS mode, receiving a frame from a yet-unknown station, hence
363 * must be callable in atomic context.
364 */
365struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
366 u8 *bssid,u8 *addr, u32 supp_rates)
367{
368 struct ieee80211_local *local = sdata->local;
369 struct sta_info *sta;
370 int band = local->hw.conf.channel->band;
371
372 /* TODO: Could consider removing the least recently used entry and
373 * allow new one to be added. */
374 if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) {
375 if (net_ratelimit()) {
376 printk(KERN_DEBUG "%s: No room for a new IBSS STA "
377 "entry %pM\n", sdata->dev->name, addr);
378 }
379 return NULL;
380 }
381
382 if (compare_ether_addr(bssid, sdata->u.ibss.bssid))
383 return NULL;
384
385#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
386 printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n",
387 wiphy_name(local->hw.wiphy), addr, sdata->dev->name);
388#endif
389
390 sta = sta_info_alloc(sdata, addr, GFP_ATOMIC);
391 if (!sta)
392 return NULL;
393
394 set_sta_flags(sta, WLAN_STA_AUTHORIZED);
395
396 /* make sure mandatory rates are always added */
397 sta->sta.supp_rates[band] = supp_rates |
398 ieee80211_mandatory_rates(local, band);
399
400 rate_control_rate_init(sta);
401
402 if (sta_info_insert(sta))
403 return NULL;
404
405 return sta;
406}
407
408static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
409{
410 struct ieee80211_local *local = sdata->local;
411 int active = 0;
412 struct sta_info *sta;
413
414 rcu_read_lock();
415
416 list_for_each_entry_rcu(sta, &local->sta_list, list) {
417 if (sta->sdata == sdata &&
418 time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL,
419 jiffies)) {
420 active++;
421 break;
422 }
423 }
424
425 rcu_read_unlock();
426
427 return active;
428}
429
430
431static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
432{
433 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
434
435 mod_timer(&ifibss->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
436
437 ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT);
438 if (ieee80211_sta_active_ibss(sdata))
439 return;
440
441 if ((ifibss->flags & IEEE80211_IBSS_BSSID_SET) &&
442 (!(ifibss->flags & IEEE80211_IBSS_AUTO_CHANNEL_SEL)))
443 return;
444
445 printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other "
446 "IBSS networks with same SSID (merge)\n", sdata->dev->name);
447
448 /* XXX maybe racy? */
449 if (sdata->local->scan_req)
450 return;
451
452 memcpy(sdata->local->int_scan_req.ssids[0].ssid,
453 ifibss->ssid, IEEE80211_MAX_SSID_LEN);
454 sdata->local->int_scan_req.ssids[0].ssid_len = ifibss->ssid_len;
455 ieee80211_request_scan(sdata, &sdata->local->int_scan_req);
456}
457
458static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
459{
460 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
461 struct ieee80211_local *local = sdata->local;
462 struct ieee80211_supported_band *sband;
463 u8 *pos;
464 u8 bssid[ETH_ALEN];
465 u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
466 u16 capability;
467 int i;
468
469 if (ifibss->flags & IEEE80211_IBSS_BSSID_SET) {
470 memcpy(bssid, ifibss->bssid, ETH_ALEN);
471 } else {
472 /* Generate random, not broadcast, locally administered BSSID. Mix in
473 * own MAC address to make sure that devices that do not have proper
474 * random number generator get different BSSID. */
475 get_random_bytes(bssid, ETH_ALEN);
476 for (i = 0; i < ETH_ALEN; i++)
477 bssid[i] ^= sdata->dev->dev_addr[i];
478 bssid[0] &= ~0x01;
479 bssid[0] |= 0x02;
480 }
481
482 printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n",
483 sdata->dev->name, bssid);
484
485 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
486
487 if (local->hw.conf.beacon_int == 0)
488 local->hw.conf.beacon_int = 100;
489
490 capability = WLAN_CAPABILITY_IBSS;
491
492 if (sdata->default_key)
493 capability |= WLAN_CAPABILITY_PRIVACY;
494 else
495 sdata->drop_unencrypted = 0;
496
497 pos = supp_rates;
498 for (i = 0; i < sband->n_bitrates; i++) {
499 int rate = sband->bitrates[i].bitrate;
500 *pos++ = (u8) (rate / 5);
501 }
502
503 return __ieee80211_sta_join_ibss(sdata,
504 bssid, local->hw.conf.beacon_int,
505 local->hw.conf.channel->center_freq,
506 sband->n_bitrates, supp_rates,
507 capability, 0);
508}
509
510static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
511{
512 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
513 struct ieee80211_local *local = sdata->local;
514 struct ieee80211_bss *bss;
515 const u8 *bssid = NULL;
516 int active_ibss;
517
518 if (ifibss->ssid_len == 0)
519 return -EINVAL;
520
521 active_ibss = ieee80211_sta_active_ibss(sdata);
522#ifdef CONFIG_MAC80211_IBSS_DEBUG
523 printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n",
524 sdata->dev->name, active_ibss);
525#endif /* CONFIG_MAC80211_IBSS_DEBUG */
526
527 if (active_ibss)
528 return 0;
529
530 if (ifibss->flags & IEEE80211_IBSS_BSSID_SET)
531 bssid = ifibss->bssid;
532 bss = (void *)cfg80211_get_bss(local->hw.wiphy, NULL, bssid,
533 ifibss->ssid, ifibss->ssid_len,
534 WLAN_CAPABILITY_IBSS,
535 WLAN_CAPABILITY_IBSS);
536
537#ifdef CONFIG_MAC80211_IBSS_DEBUG
538 if (bss)
539 printk(KERN_DEBUG " sta_find_ibss: selected %pM current "
540 "%pM\n", bss->cbss.bssid, ifibss->bssid);
541#endif /* CONFIG_MAC80211_IBSS_DEBUG */
542
543 if (bss &&
544 (!(ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET) ||
545 memcmp(ifibss->bssid, bss->cbss.bssid, ETH_ALEN))) {
546 int ret;
547
548 printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM"
549 " based on configured SSID\n",
550 sdata->dev->name, bss->cbss.bssid);
551
552 ret = ieee80211_sta_join_ibss(sdata, bss);
553 ieee80211_rx_bss_put(local, bss);
554 return ret;
555 } else if (bss)
556 ieee80211_rx_bss_put(local, bss);
557
558#ifdef CONFIG_MAC80211_IBSS_DEBUG
559 printk(KERN_DEBUG " did not try to join ibss\n");
560#endif /* CONFIG_MAC80211_IBSS_DEBUG */
561
562 /* Selected IBSS not found in current scan results - try to scan */
563 if (ifibss->state == IEEE80211_IBSS_MLME_JOINED &&
564 !ieee80211_sta_active_ibss(sdata)) {
565 mod_timer(&ifibss->timer, jiffies +
566 IEEE80211_IBSS_MERGE_INTERVAL);
567 } else if (time_after(jiffies, local->last_scan_completed +
568 IEEE80211_SCAN_INTERVAL)) {
569 printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to "
570 "join\n", sdata->dev->name);
571
572 /* XXX maybe racy? */
573 if (local->scan_req)
574 return -EBUSY;
575
576 memcpy(local->int_scan_req.ssids[0].ssid,
577 ifibss->ssid, IEEE80211_MAX_SSID_LEN);
578 local->int_scan_req.ssids[0].ssid_len = ifibss->ssid_len;
579 return ieee80211_request_scan(sdata, &local->int_scan_req);
580 } else if (ifibss->state != IEEE80211_IBSS_MLME_JOINED) {
581 int interval = IEEE80211_SCAN_INTERVAL;
582
583 if (time_after(jiffies, ifibss->ibss_join_req +
584 IEEE80211_IBSS_JOIN_TIMEOUT)) {
585 if (!(local->oper_channel->flags &
586 IEEE80211_CHAN_NO_IBSS))
587 return ieee80211_sta_create_ibss(sdata);
588 printk(KERN_DEBUG "%s: IBSS not allowed on"
589 " %d MHz\n", sdata->dev->name,
590 local->hw.conf.channel->center_freq);
591
592 /* No IBSS found - decrease scan interval and continue
593 * scanning. */
594 interval = IEEE80211_SCAN_INTERVAL_SLOW;
595 }
596
597 ifibss->state = IEEE80211_IBSS_MLME_SEARCH;
598 mod_timer(&ifibss->timer, jiffies + interval);
599 return 0;
600 }
601
602 return 0;
603}
604
605static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
606 struct ieee80211_mgmt *mgmt,
607 size_t len)
608{
609 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
610 struct ieee80211_local *local = sdata->local;
611 int tx_last_beacon;
612 struct sk_buff *skb;
613 struct ieee80211_mgmt *resp;
614 u8 *pos, *end;
615
616 if (ifibss->state != IEEE80211_IBSS_MLME_JOINED ||
617 len < 24 + 2 || !ifibss->probe_resp)
618 return;
619
620 if (local->ops->tx_last_beacon)
621 tx_last_beacon = local->ops->tx_last_beacon(local_to_hw(local));
622 else
623 tx_last_beacon = 1;
624
625#ifdef CONFIG_MAC80211_IBSS_DEBUG
626 printk(KERN_DEBUG "%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM"
627 " (tx_last_beacon=%d)\n",
628 sdata->dev->name, mgmt->sa, mgmt->da,
629 mgmt->bssid, tx_last_beacon);
630#endif /* CONFIG_MAC80211_IBSS_DEBUG */
631
632 if (!tx_last_beacon)
633 return;
634
635 if (memcmp(mgmt->bssid, ifibss->bssid, ETH_ALEN) != 0 &&
636 memcmp(mgmt->bssid, "\xff\xff\xff\xff\xff\xff", ETH_ALEN) != 0)
637 return;
638
639 end = ((u8 *) mgmt) + len;
640 pos = mgmt->u.probe_req.variable;
641 if (pos[0] != WLAN_EID_SSID ||
642 pos + 2 + pos[1] > end) {
643#ifdef CONFIG_MAC80211_IBSS_DEBUG
644 printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq "
645 "from %pM\n",
646 sdata->dev->name, mgmt->sa);
647#endif
648 return;
649 }
650 if (pos[1] != 0 &&
651 (pos[1] != ifibss->ssid_len ||
652 memcmp(pos + 2, ifibss->ssid, ifibss->ssid_len) != 0)) {
653 /* Ignore ProbeReq for foreign SSID */
654 return;
655 }
656
657 /* Reply with ProbeResp */
658 skb = skb_copy(ifibss->probe_resp, GFP_KERNEL);
659 if (!skb)
660 return;
661
662 resp = (struct ieee80211_mgmt *) skb->data;
663 memcpy(resp->da, mgmt->sa, ETH_ALEN);
664#ifdef CONFIG_MAC80211_IBSS_DEBUG
665 printk(KERN_DEBUG "%s: Sending ProbeResp to %pM\n",
666 sdata->dev->name, resp->da);
667#endif /* CONFIG_MAC80211_IBSS_DEBUG */
668 ieee80211_tx_skb(sdata, skb, 0);
669}
670
671static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
672 struct ieee80211_mgmt *mgmt,
673 size_t len,
674 struct ieee80211_rx_status *rx_status)
675{
676 size_t baselen;
677 struct ieee802_11_elems elems;
678
679 if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN))
680 return; /* ignore ProbeResp to foreign address */
681
682 baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
683 if (baselen > len)
684 return;
685
686 ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen,
687 &elems);
688
689 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false);
690}
691
692static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
693 struct ieee80211_mgmt *mgmt,
694 size_t len,
695 struct ieee80211_rx_status *rx_status)
696{
697 size_t baselen;
698 struct ieee802_11_elems elems;
699
700 /* Process beacon from the current BSS */
701 baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt;
702 if (baselen > len)
703 return;
704
705 ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems);
706
707 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true);
708}
709
710static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
711 struct sk_buff *skb)
712{
713 struct ieee80211_rx_status *rx_status;
714 struct ieee80211_mgmt *mgmt;
715 u16 fc;
716
717 rx_status = (struct ieee80211_rx_status *) skb->cb;
718 mgmt = (struct ieee80211_mgmt *) skb->data;
719 fc = le16_to_cpu(mgmt->frame_control);
720
721 switch (fc & IEEE80211_FCTL_STYPE) {
722 case IEEE80211_STYPE_PROBE_REQ:
723 ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len);
724 break;
725 case IEEE80211_STYPE_PROBE_RESP:
726 ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len,
727 rx_status);
728 break;
729 case IEEE80211_STYPE_BEACON:
730 ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len,
731 rx_status);
732 break;
733 case IEEE80211_STYPE_AUTH:
734 ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len);
735 break;
736 }
737
738 kfree_skb(skb);
739}
740
741static void ieee80211_ibss_work(struct work_struct *work)
742{
743 struct ieee80211_sub_if_data *sdata =
744 container_of(work, struct ieee80211_sub_if_data, u.ibss.work);
745 struct ieee80211_local *local = sdata->local;
746 struct ieee80211_if_ibss *ifibss;
747 struct sk_buff *skb;
748
749 if (!netif_running(sdata->dev))
750 return;
751
752 if (local->sw_scanning || local->hw_scanning)
753 return;
754
755 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_ADHOC))
756 return;
757 ifibss = &sdata->u.ibss;
758
759 while ((skb = skb_dequeue(&ifibss->skb_queue)))
760 ieee80211_ibss_rx_queued_mgmt(sdata, skb);
761
762 if (!test_and_clear_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request))
763 return;
764
765 switch (ifibss->state) {
766 case IEEE80211_IBSS_MLME_SEARCH:
767 ieee80211_sta_find_ibss(sdata);
768 break;
769 case IEEE80211_IBSS_MLME_JOINED:
770 ieee80211_sta_merge_ibss(sdata);
771 break;
772 default:
773 WARN_ON(1);
774 break;
775 }
776}
777
778static void ieee80211_ibss_timer(unsigned long data)
779{
780 struct ieee80211_sub_if_data *sdata =
781 (struct ieee80211_sub_if_data *) data;
782 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
783 struct ieee80211_local *local = sdata->local;
784
785 set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request);
786 queue_work(local->hw.workqueue, &ifibss->work);
787}
788
789void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
790{
791 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
792
793 INIT_WORK(&ifibss->work, ieee80211_ibss_work);
794 setup_timer(&ifibss->timer, ieee80211_ibss_timer,
795 (unsigned long) sdata);
796 skb_queue_head_init(&ifibss->skb_queue);
797
798 ifibss->flags |= IEEE80211_IBSS_AUTO_BSSID_SEL |
799 IEEE80211_IBSS_AUTO_CHANNEL_SEL;
800}
801
802int ieee80211_ibss_commit(struct ieee80211_sub_if_data *sdata)
803{
804 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
805
806 ifibss->flags &= ~IEEE80211_IBSS_PREV_BSSID_SET;
807
808 if (ifibss->ssid_len)
809 ifibss->flags |= IEEE80211_IBSS_SSID_SET;
810 else
811 ifibss->flags &= ~IEEE80211_IBSS_SSID_SET;
812
813 ifibss->ibss_join_req = jiffies;
814 ifibss->state = IEEE80211_IBSS_MLME_SEARCH;
815 set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request);
816
817 return 0;
818}
819
820int ieee80211_ibss_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len)
821{
822 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
823
824 if (len > IEEE80211_MAX_SSID_LEN)
825 return -EINVAL;
826
827 if (ifibss->ssid_len != len || memcmp(ifibss->ssid, ssid, len) != 0) {
828 memset(ifibss->ssid, 0, sizeof(ifibss->ssid));
829 memcpy(ifibss->ssid, ssid, len);
830 ifibss->ssid_len = len;
831 }
832
833 return ieee80211_ibss_commit(sdata);
834}
835
836int ieee80211_ibss_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len)
837{
838 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
839
840 memcpy(ssid, ifibss->ssid, ifibss->ssid_len);
841 *len = ifibss->ssid_len;
842
843 return 0;
844}
845
846int ieee80211_ibss_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid)
847{
848 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
849
850 if (is_valid_ether_addr(bssid)) {
851 memcpy(ifibss->bssid, bssid, ETH_ALEN);
852 ifibss->flags |= IEEE80211_IBSS_BSSID_SET;
853 } else {
854 memset(ifibss->bssid, 0, ETH_ALEN);
855 ifibss->flags &= ~IEEE80211_IBSS_BSSID_SET;
856 }
857
858 if (netif_running(sdata->dev)) {
859 if (ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID)) {
860 printk(KERN_DEBUG "%s: Failed to config new BSSID to "
861 "the low-level driver\n", sdata->dev->name);
862 }
863 }
864
865 return ieee80211_ibss_commit(sdata);
866}
867
868/* scan finished notification */
869void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
870{
871 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
872 struct ieee80211_if_ibss *ifibss;
873
874 if (sdata && sdata->vif.type == NL80211_IFTYPE_ADHOC) {
875 ifibss = &sdata->u.ibss;
876 if ((!(ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET)) ||
877 !ieee80211_sta_active_ibss(sdata))
878 ieee80211_sta_find_ibss(sdata);
879 }
880}
881
882ieee80211_rx_result
883ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
884 struct ieee80211_rx_status *rx_status)
885{
886 struct ieee80211_local *local = sdata->local;
887 struct ieee80211_mgmt *mgmt;
888 u16 fc;
889
890 if (skb->len < 24)
891 return RX_DROP_MONITOR;
892
893 mgmt = (struct ieee80211_mgmt *) skb->data;
894 fc = le16_to_cpu(mgmt->frame_control);
895
896 switch (fc & IEEE80211_FCTL_STYPE) {
897 case IEEE80211_STYPE_PROBE_RESP:
898 case IEEE80211_STYPE_BEACON:
899 memcpy(skb->cb, rx_status, sizeof(*rx_status));
900 case IEEE80211_STYPE_PROBE_REQ:
901 case IEEE80211_STYPE_AUTH:
902 skb_queue_tail(&sdata->u.ibss.skb_queue, skb);
903 queue_work(local->hw.workqueue, &sdata->u.ibss.work);
904 return RX_QUEUED;
905 }
906
907 return RX_DROP_MONITOR;
908}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f3eec989662b..e6ed78cb16b3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -43,7 +43,7 @@ struct ieee80211_local;
43 43
44/* Required encryption head and tailroom */ 44/* Required encryption head and tailroom */
45#define IEEE80211_ENCRYPT_HEADROOM 8 45#define IEEE80211_ENCRYPT_HEADROOM 8
46#define IEEE80211_ENCRYPT_TAILROOM 12 46#define IEEE80211_ENCRYPT_TAILROOM 18
47 47
48/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent 48/* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent
49 * reception of at least three fragmented frames. This limit can be increased 49 * reception of at least three fragmented frames. This limit can be increased
@@ -57,6 +57,8 @@ struct ieee80211_local;
57 */ 57 */
58#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ) 58#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ)
59 59
60#define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024))
61
60struct ieee80211_fragment_entry { 62struct ieee80211_fragment_entry {
61 unsigned long first_frag_time; 63 unsigned long first_frag_time;
62 unsigned int seq; 64 unsigned int seq;
@@ -70,43 +72,36 @@ struct ieee80211_fragment_entry {
70 72
71 73
72struct ieee80211_bss { 74struct ieee80211_bss {
73 struct list_head list; 75 /* Yes, this is a hack */
74 struct ieee80211_bss *hnext; 76 struct cfg80211_bss cbss;
75 size_t ssid_len;
76 77
77 atomic_t users; 78 /* don't want to look up all the time */
78 79 size_t ssid_len;
79 u8 bssid[ETH_ALEN];
80 u8 ssid[IEEE80211_MAX_SSID_LEN]; 80 u8 ssid[IEEE80211_MAX_SSID_LEN];
81
81 u8 dtim_period; 82 u8 dtim_period;
82 u16 capability; /* host byte order */ 83
83 enum ieee80211_band band;
84 int freq;
85 int signal, noise, qual;
86 u8 *ies; /* all information elements from the last Beacon or Probe
87 * Response frames; note Beacon frame is not allowed to
88 * override values from Probe Response */
89 size_t ies_len;
90 bool wmm_used; 84 bool wmm_used;
85
86 unsigned long last_probe_resp;
87
91#ifdef CONFIG_MAC80211_MESH 88#ifdef CONFIG_MAC80211_MESH
92 u8 *mesh_id; 89 u8 *mesh_id;
93 size_t mesh_id_len; 90 size_t mesh_id_len;
94 u8 *mesh_cfg; 91 u8 *mesh_cfg;
95#endif 92#endif
93
96#define IEEE80211_MAX_SUPP_RATES 32 94#define IEEE80211_MAX_SUPP_RATES 32
97 u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; 95 u8 supp_rates[IEEE80211_MAX_SUPP_RATES];
98 size_t supp_rates_len; 96 size_t supp_rates_len;
99 u64 timestamp;
100 int beacon_int;
101 97
102 unsigned long last_probe_resp; 98 /*
103 unsigned long last_update; 99 * During assocation, we save an ERP value from a probe response so
104
105 /* during assocation, we save an ERP value from a probe response so
106 * that we can feed ERP info to the driver when handling the 100 * that we can feed ERP info to the driver when handling the
107 * association completes. these fields probably won't be up-to-date 101 * association completes. these fields probably won't be up-to-date
108 * otherwise, you probably don't want to use them. */ 102 * otherwise, you probably don't want to use them.
109 int has_erp_value; 103 */
104 bool has_erp_value;
110 u8 erp_value; 105 u8 erp_value;
111}; 106};
112 107
@@ -154,11 +149,6 @@ struct ieee80211_tx_data {
154 149
155 struct ieee80211_channel *channel; 150 struct ieee80211_channel *channel;
156 151
157 /* Extra fragments (in addition to the first fragment
158 * in skb) */
159 struct sk_buff **extra_frag;
160 int num_extra_frag;
161
162 u16 ethertype; 152 u16 ethertype;
163 unsigned int flags; 153 unsigned int flags;
164}; 154};
@@ -194,12 +184,6 @@ struct ieee80211_rx_data {
194 u16 tkip_iv16; 184 u16 tkip_iv16;
195}; 185};
196 186
197struct ieee80211_tx_stored_packet {
198 struct sk_buff *skb;
199 struct sk_buff **extra_frag;
200 int num_extra_frag;
201};
202
203struct beacon_data { 187struct beacon_data {
204 u8 *head, *tail; 188 u8 *head, *tail;
205 int head_len, tail_len; 189 int head_len, tail_len;
@@ -244,7 +228,7 @@ struct mesh_preq_queue {
244 u8 flags; 228 u8 flags;
245}; 229};
246 230
247/* flags used in struct ieee80211_if_sta.flags */ 231/* flags used in struct ieee80211_if_managed.flags */
248#define IEEE80211_STA_SSID_SET BIT(0) 232#define IEEE80211_STA_SSID_SET BIT(0)
249#define IEEE80211_STA_BSSID_SET BIT(1) 233#define IEEE80211_STA_BSSID_SET BIT(1)
250#define IEEE80211_STA_PREV_BSSID_SET BIT(2) 234#define IEEE80211_STA_PREV_BSSID_SET BIT(2)
@@ -252,43 +236,49 @@ struct mesh_preq_queue {
252#define IEEE80211_STA_ASSOCIATED BIT(4) 236#define IEEE80211_STA_ASSOCIATED BIT(4)
253#define IEEE80211_STA_PROBEREQ_POLL BIT(5) 237#define IEEE80211_STA_PROBEREQ_POLL BIT(5)
254#define IEEE80211_STA_CREATE_IBSS BIT(6) 238#define IEEE80211_STA_CREATE_IBSS BIT(6)
255#define IEEE80211_STA_MIXED_CELL BIT(7) 239/* hole at 7, please re-use */
256#define IEEE80211_STA_WMM_ENABLED BIT(8) 240#define IEEE80211_STA_WMM_ENABLED BIT(8)
241/* hole at 9, please re-use */
257#define IEEE80211_STA_AUTO_SSID_SEL BIT(10) 242#define IEEE80211_STA_AUTO_SSID_SEL BIT(10)
258#define IEEE80211_STA_AUTO_BSSID_SEL BIT(11) 243#define IEEE80211_STA_AUTO_BSSID_SEL BIT(11)
259#define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12) 244#define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12)
260#define IEEE80211_STA_PRIVACY_INVOKED BIT(13) 245#define IEEE80211_STA_PRIVACY_INVOKED BIT(13)
246#define IEEE80211_STA_TKIP_WEP_USED BIT(14)
247#define IEEE80211_STA_CSA_RECEIVED BIT(15)
248#define IEEE80211_STA_MFP_ENABLED BIT(16)
249#define IEEE80211_STA_EXT_SME BIT(17)
261/* flags for MLME request */ 250/* flags for MLME request */
262#define IEEE80211_STA_REQ_SCAN 0 251#define IEEE80211_STA_REQ_SCAN 0
263#define IEEE80211_STA_REQ_DIRECT_PROBE 1 252#define IEEE80211_STA_REQ_DIRECT_PROBE 1
264#define IEEE80211_STA_REQ_AUTH 2 253#define IEEE80211_STA_REQ_AUTH 2
265#define IEEE80211_STA_REQ_RUN 3 254#define IEEE80211_STA_REQ_RUN 3
266 255
267/* STA/IBSS MLME states */
268enum ieee80211_sta_mlme_state {
269 IEEE80211_STA_MLME_DISABLED,
270 IEEE80211_STA_MLME_DIRECT_PROBE,
271 IEEE80211_STA_MLME_AUTHENTICATE,
272 IEEE80211_STA_MLME_ASSOCIATE,
273 IEEE80211_STA_MLME_ASSOCIATED,
274 IEEE80211_STA_MLME_IBSS_SEARCH,
275 IEEE80211_STA_MLME_IBSS_JOINED,
276};
277
278/* bitfield of allowed auth algs */ 256/* bitfield of allowed auth algs */
279#define IEEE80211_AUTH_ALG_OPEN BIT(0) 257#define IEEE80211_AUTH_ALG_OPEN BIT(0)
280#define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1) 258#define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1)
281#define IEEE80211_AUTH_ALG_LEAP BIT(2) 259#define IEEE80211_AUTH_ALG_LEAP BIT(2)
260#define IEEE80211_AUTH_ALG_FT BIT(3)
282 261
283struct ieee80211_if_sta { 262struct ieee80211_if_managed {
284 struct timer_list timer; 263 struct timer_list timer;
264 struct timer_list chswitch_timer;
285 struct work_struct work; 265 struct work_struct work;
266 struct work_struct chswitch_work;
267 struct work_struct beacon_loss_work;
268
286 u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; 269 u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN];
270
287 u8 ssid[IEEE80211_MAX_SSID_LEN]; 271 u8 ssid[IEEE80211_MAX_SSID_LEN];
288 enum ieee80211_sta_mlme_state state;
289 size_t ssid_len; 272 size_t ssid_len;
290 u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; 273
291 size_t scan_ssid_len; 274 enum {
275 IEEE80211_STA_MLME_DISABLED,
276 IEEE80211_STA_MLME_DIRECT_PROBE,
277 IEEE80211_STA_MLME_AUTHENTICATE,
278 IEEE80211_STA_MLME_ASSOCIATE,
279 IEEE80211_STA_MLME_ASSOCIATED,
280 } state;
281
292 u16 aid; 282 u16 aid;
293 u16 ap_capab, capab; 283 u16 ap_capab, capab;
294 u8 *extra_ie; /* to be added to the end of AssocReq */ 284 u8 *extra_ie; /* to be added to the end of AssocReq */
@@ -308,6 +298,7 @@ struct ieee80211_if_sta {
308 unsigned long request; 298 unsigned long request;
309 299
310 unsigned long last_probe; 300 unsigned long last_probe;
301 unsigned long last_beacon;
311 302
312 unsigned int flags; 303 unsigned int flags;
313 304
@@ -315,11 +306,53 @@ struct ieee80211_if_sta {
315 int auth_alg; /* currently used IEEE 802.11 authentication algorithm */ 306 int auth_alg; /* currently used IEEE 802.11 authentication algorithm */
316 int auth_transaction; 307 int auth_transaction;
317 308
309 enum {
310 IEEE80211_MFP_DISABLED,
311 IEEE80211_MFP_OPTIONAL,
312 IEEE80211_MFP_REQUIRED
313 } mfp; /* management frame protection */
314
315 int wmm_last_param_set;
316
317 /* Extra IE data for management frames */
318 u8 *sme_auth_ie;
319 size_t sme_auth_ie_len;
320};
321
322enum ieee80211_ibss_flags {
323 IEEE80211_IBSS_AUTO_CHANNEL_SEL = BIT(0),
324 IEEE80211_IBSS_AUTO_BSSID_SEL = BIT(1),
325 IEEE80211_IBSS_BSSID_SET = BIT(2),
326 IEEE80211_IBSS_PREV_BSSID_SET = BIT(3),
327 IEEE80211_IBSS_SSID_SET = BIT(4),
328};
329
330enum ieee80211_ibss_request {
331 IEEE80211_IBSS_REQ_RUN = 0,
332};
333
334struct ieee80211_if_ibss {
335 struct timer_list timer;
336 struct work_struct work;
337
338 struct sk_buff_head skb_queue;
339
340 u8 ssid[IEEE80211_MAX_SSID_LEN];
341 u8 ssid_len;
342
343 u32 flags;
344
345 u8 bssid[ETH_ALEN];
346
347 unsigned long request;
348
318 unsigned long ibss_join_req; 349 unsigned long ibss_join_req;
319 struct sk_buff *probe_resp; /* ProbeResp template for IBSS */ 350 struct sk_buff *probe_resp; /* ProbeResp template for IBSS */
320 u32 supp_rates_bits[IEEE80211_NUM_BANDS];
321 351
322 int wmm_last_param_set; 352 enum {
353 IEEE80211_IBSS_MLME_SEARCH,
354 IEEE80211_IBSS_MLME_JOINED,
355 } state;
323}; 356};
324 357
325struct ieee80211_if_mesh { 358struct ieee80211_if_mesh {
@@ -370,7 +403,6 @@ struct ieee80211_if_mesh {
370 * 403 *
371 * @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets 404 * @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets
372 * @IEEE80211_SDATA_PROMISC: interface is promisc 405 * @IEEE80211_SDATA_PROMISC: interface is promisc
373 * @IEEE80211_SDATA_USERSPACE_MLME: userspace MLME is active
374 * @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode 406 * @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode
375 * @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between 407 * @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between
376 * associated stations and deliver multicast frames both 408 * associated stations and deliver multicast frames both
@@ -379,9 +411,8 @@ struct ieee80211_if_mesh {
379enum ieee80211_sub_if_data_flags { 411enum ieee80211_sub_if_data_flags {
380 IEEE80211_SDATA_ALLMULTI = BIT(0), 412 IEEE80211_SDATA_ALLMULTI = BIT(0),
381 IEEE80211_SDATA_PROMISC = BIT(1), 413 IEEE80211_SDATA_PROMISC = BIT(1),
382 IEEE80211_SDATA_USERSPACE_MLME = BIT(2), 414 IEEE80211_SDATA_OPERATING_GMODE = BIT(2),
383 IEEE80211_SDATA_OPERATING_GMODE = BIT(3), 415 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3),
384 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(4),
385}; 416};
386 417
387struct ieee80211_sub_if_data { 418struct ieee80211_sub_if_data {
@@ -404,8 +435,10 @@ struct ieee80211_sub_if_data {
404 unsigned int fragment_next; 435 unsigned int fragment_next;
405 436
406#define NUM_DEFAULT_KEYS 4 437#define NUM_DEFAULT_KEYS 4
407 struct ieee80211_key *keys[NUM_DEFAULT_KEYS]; 438#define NUM_DEFAULT_MGMT_KEYS 2
439 struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
408 struct ieee80211_key *default_key; 440 struct ieee80211_key *default_key;
441 struct ieee80211_key *default_mgmt_key;
409 442
410 u16 sequence_number; 443 u16 sequence_number;
411 444
@@ -423,7 +456,8 @@ struct ieee80211_sub_if_data {
423 struct ieee80211_if_ap ap; 456 struct ieee80211_if_ap ap;
424 struct ieee80211_if_wds wds; 457 struct ieee80211_if_wds wds;
425 struct ieee80211_if_vlan vlan; 458 struct ieee80211_if_vlan vlan;
426 struct ieee80211_if_sta sta; 459 struct ieee80211_if_managed mgd;
460 struct ieee80211_if_ibss ibss;
427#ifdef CONFIG_MAC80211_MESH 461#ifdef CONFIG_MAC80211_MESH
428 struct ieee80211_if_mesh mesh; 462 struct ieee80211_if_mesh mesh;
429#endif 463#endif
@@ -477,6 +511,7 @@ struct ieee80211_sub_if_data {
477 } debugfs; 511 } debugfs;
478 struct { 512 struct {
479 struct dentry *default_key; 513 struct dentry *default_key;
514 struct dentry *default_mgmt_key;
480 } common_debugfs; 515 } common_debugfs;
481 516
482#ifdef CONFIG_MAC80211_MESH 517#ifdef CONFIG_MAC80211_MESH
@@ -541,11 +576,12 @@ enum {
541enum queue_stop_reason { 576enum queue_stop_reason {
542 IEEE80211_QUEUE_STOP_REASON_DRIVER, 577 IEEE80211_QUEUE_STOP_REASON_DRIVER,
543 IEEE80211_QUEUE_STOP_REASON_PS, 578 IEEE80211_QUEUE_STOP_REASON_PS,
579 IEEE80211_QUEUE_STOP_REASON_CSA,
580 IEEE80211_QUEUE_STOP_REASON_AGGREGATION,
581 IEEE80211_QUEUE_STOP_REASON_SUSPEND,
582 IEEE80211_QUEUE_STOP_REASON_PENDING,
544}; 583};
545 584
546/* maximum number of hardware queues we support. */
547#define QD_MAX_QUEUES (IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_QUEUES)
548
549struct ieee80211_master_priv { 585struct ieee80211_master_priv {
550 struct ieee80211_local *local; 586 struct ieee80211_local *local;
551}; 587};
@@ -558,9 +594,10 @@ struct ieee80211_local {
558 594
559 const struct ieee80211_ops *ops; 595 const struct ieee80211_ops *ops;
560 596
561 unsigned long queue_pool[BITS_TO_LONGS(QD_MAX_QUEUES)];
562 unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES]; 597 unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES];
598 /* also used to protect ampdu_ac_queue and amdpu_ac_stop_refcnt */
563 spinlock_t queue_stop_reason_lock; 599 spinlock_t queue_stop_reason_lock;
600
564 struct net_device *mdev; /* wmaster# - "master" 802.11 device */ 601 struct net_device *mdev; /* wmaster# - "master" 802.11 device */
565 int open_count; 602 int open_count;
566 int monitors, cooked_mntrs; 603 int monitors, cooked_mntrs;
@@ -568,7 +605,6 @@ struct ieee80211_local {
568 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss; 605 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss;
569 unsigned int filter_flags; /* FIF_* */ 606 unsigned int filter_flags; /* FIF_* */
570 struct iw_statistics wstats; 607 struct iw_statistics wstats;
571 u8 wstats_flags;
572 bool tim_in_locked_section; /* see ieee80211_beacon_get() */ 608 bool tim_in_locked_section; /* see ieee80211_beacon_get() */
573 int tx_headroom; /* required headroom for hardware/radiotap */ 609 int tx_headroom; /* required headroom for hardware/radiotap */
574 610
@@ -595,11 +631,17 @@ struct ieee80211_local {
595 struct sta_info *sta_hash[STA_HASH_SIZE]; 631 struct sta_info *sta_hash[STA_HASH_SIZE];
596 struct timer_list sta_cleanup; 632 struct timer_list sta_cleanup;
597 633
598 unsigned long queues_pending[BITS_TO_LONGS(IEEE80211_MAX_QUEUES)]; 634 struct sk_buff_head pending[IEEE80211_MAX_QUEUES];
599 unsigned long queues_pending_run[BITS_TO_LONGS(IEEE80211_MAX_QUEUES)];
600 struct ieee80211_tx_stored_packet pending_packet[IEEE80211_MAX_QUEUES];
601 struct tasklet_struct tx_pending_tasklet; 635 struct tasklet_struct tx_pending_tasklet;
602 636
637 /*
638 * This lock is used to prevent concurrent A-MPDU
639 * session start/stop processing, this thus also
640 * synchronises the ->ampdu_action() callback to
641 * drivers and limits it to one at a time.
642 */
643 spinlock_t ampdu_lock;
644
603 /* number of interfaces with corresponding IFF_ flags */ 645 /* number of interfaces with corresponding IFF_ flags */
604 atomic_t iff_allmultis, iff_promiscs; 646 atomic_t iff_allmultis, iff_promiscs;
605 647
@@ -612,7 +654,9 @@ struct ieee80211_local {
612 struct crypto_blkcipher *wep_rx_tfm; 654 struct crypto_blkcipher *wep_rx_tfm;
613 u32 wep_iv; 655 u32 wep_iv;
614 656
657 /* see iface.c */
615 struct list_head interfaces; 658 struct list_head interfaces;
659 struct mutex iflist_mtx;
616 660
617 /* 661 /*
618 * Key lock, protects sdata's key_list and sta_info's 662 * Key lock, protects sdata's key_list and sta_info's
@@ -623,20 +667,18 @@ struct ieee80211_local {
623 667
624 /* Scanning and BSS list */ 668 /* Scanning and BSS list */
625 bool sw_scanning, hw_scanning; 669 bool sw_scanning, hw_scanning;
670 struct cfg80211_ssid scan_ssid;
671 struct cfg80211_scan_request int_scan_req;
672 struct cfg80211_scan_request *scan_req;
673 struct ieee80211_channel *scan_channel;
626 int scan_channel_idx; 674 int scan_channel_idx;
627 enum ieee80211_band scan_band;
628 675
629 enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state; 676 enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state;
630 unsigned long last_scan_completed; 677 unsigned long last_scan_completed;
631 struct delayed_work scan_work; 678 struct delayed_work scan_work;
632 struct ieee80211_sub_if_data *scan_sdata; 679 struct ieee80211_sub_if_data *scan_sdata;
633 struct ieee80211_channel *oper_channel, *scan_channel;
634 enum nl80211_channel_type oper_channel_type; 680 enum nl80211_channel_type oper_channel_type;
635 u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; 681 struct ieee80211_channel *oper_channel, *csa_channel;
636 size_t scan_ssid_len;
637 struct list_head bss_list;
638 struct ieee80211_bss *bss_hash[STA_HASH_SIZE];
639 spinlock_t bss_lock;
640 682
641 /* SNMP counters */ 683 /* SNMP counters */
642 /* dot11CountersTable */ 684 /* dot11CountersTable */
@@ -649,7 +691,6 @@ struct ieee80211_local {
649 u32 dot11ReceivedFragmentCount; 691 u32 dot11ReceivedFragmentCount;
650 u32 dot11MulticastReceivedFrameCount; 692 u32 dot11MulticastReceivedFrameCount;
651 u32 dot11TransmittedFrameCount; 693 u32 dot11TransmittedFrameCount;
652 u32 dot11WEPUndecryptableCount;
653 694
654#ifdef CONFIG_MAC80211_LEDS 695#ifdef CONFIG_MAC80211_LEDS
655 int tx_led_counter, rx_led_counter; 696 int tx_led_counter, rx_led_counter;
@@ -696,11 +737,14 @@ struct ieee80211_local {
696 unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */ 737 unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */
697 738
698 bool powersave; 739 bool powersave;
699 int dynamic_ps_timeout; 740 bool pspolling;
700 struct work_struct dynamic_ps_enable_work; 741 struct work_struct dynamic_ps_enable_work;
701 struct work_struct dynamic_ps_disable_work; 742 struct work_struct dynamic_ps_disable_work;
702 struct timer_list dynamic_ps_timer; 743 struct timer_list dynamic_ps_timer;
703 744
745 int user_power_level; /* in dBm */
746 int power_constr_level; /* in dBm */
747
704#ifdef CONFIG_MAC80211_DEBUGFS 748#ifdef CONFIG_MAC80211_DEBUGFS
705 struct local_debugfsdentries { 749 struct local_debugfsdentries {
706 struct dentry *rcdir; 750 struct dentry *rcdir;
@@ -712,6 +756,8 @@ struct ieee80211_local {
712 struct dentry *long_retry_limit; 756 struct dentry *long_retry_limit;
713 struct dentry *total_ps_buffered; 757 struct dentry *total_ps_buffered;
714 struct dentry *wep_iv; 758 struct dentry *wep_iv;
759 struct dentry *tsf;
760 struct dentry *reset;
715 struct dentry *statistics; 761 struct dentry *statistics;
716 struct local_debugfsdentries_statsdentries { 762 struct local_debugfsdentries_statsdentries {
717 struct dentry *transmitted_fragment_count; 763 struct dentry *transmitted_fragment_count;
@@ -805,6 +851,7 @@ struct ieee802_11_elems {
805 u8 *country_elem; 851 u8 *country_elem;
806 u8 *pwr_constr_elem; 852 u8 *pwr_constr_elem;
807 u8 *quiet_elem; /* first quite element */ 853 u8 *quiet_elem; /* first quite element */
854 u8 *timeout_int;
808 855
809 /* length of them, respectively */ 856 /* length of them, respectively */
810 u8 ssid_len; 857 u8 ssid_len;
@@ -832,6 +879,7 @@ struct ieee802_11_elems {
832 u8 pwr_constr_elem_len; 879 u8 pwr_constr_elem_len;
833 u8 quiet_elem_len; 880 u8 quiet_elem_len;
834 u8 num_of_quiet_elem; /* can be more the one */ 881 u8 num_of_quiet_elem; /* can be more the one */
882 u8 timeout_int_len;
835}; 883};
836 884
837static inline struct ieee80211_local *hw_to_local( 885static inline struct ieee80211_local *hw_to_local(
@@ -860,34 +908,43 @@ void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx);
860void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, 908void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
861 u32 changed); 909 u32 changed);
862void ieee80211_configure_filter(struct ieee80211_local *local); 910void ieee80211_configure_filter(struct ieee80211_local *local);
911u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
863 912
864/* wireless extensions */ 913/* wireless extensions */
865extern const struct iw_handler_def ieee80211_iw_handler_def; 914extern const struct iw_handler_def ieee80211_iw_handler_def;
866 915
867/* STA/IBSS code */ 916/* STA code */
868void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); 917void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata);
869void ieee80211_scan_work(struct work_struct *work); 918ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
870void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, 919 struct sk_buff *skb,
871 struct ieee80211_rx_status *rx_status); 920 struct ieee80211_rx_status *rx_status);
921int ieee80211_sta_commit(struct ieee80211_sub_if_data *sdata);
872int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len); 922int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len);
873int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len); 923int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len);
874int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid); 924int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid);
875void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, 925void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata);
876 struct ieee80211_if_sta *ifsta);
877struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
878 u8 *bssid, u8 *addr, u64 supp_rates);
879int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason); 926int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason);
880int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason); 927int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason);
881u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); 928void ieee80211_send_pspoll(struct ieee80211_local *local,
882u64 ieee80211_sta_get_rates(struct ieee80211_local *local, 929 struct ieee80211_sub_if_data *sdata);
883 struct ieee802_11_elems *elems, 930
884 enum ieee80211_band band); 931/* IBSS code */
885void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, 932int ieee80211_ibss_commit(struct ieee80211_sub_if_data *sdata);
886 u8 *ssid, size_t ssid_len); 933int ieee80211_ibss_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len);
934int ieee80211_ibss_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len);
935int ieee80211_ibss_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid);
936void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
937void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata);
938ieee80211_rx_result
939ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
940 struct ieee80211_rx_status *rx_status);
941struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
942 u8 *bssid, u8 *addr, u32 supp_rates);
887 943
888/* scan/BSS handling */ 944/* scan/BSS handling */
945void ieee80211_scan_work(struct work_struct *work);
889int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, 946int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
890 u8 *ssid, size_t ssid_len); 947 struct cfg80211_scan_request *req);
891int ieee80211_scan_results(struct ieee80211_local *local, 948int ieee80211_scan_results(struct ieee80211_local *local,
892 struct iw_request_info *info, 949 struct iw_request_info *info,
893 char *buf, size_t len); 950 char *buf, size_t len);
@@ -895,29 +952,28 @@ ieee80211_rx_result
895ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, 952ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata,
896 struct sk_buff *skb, 953 struct sk_buff *skb,
897 struct ieee80211_rx_status *rx_status); 954 struct ieee80211_rx_status *rx_status);
898void ieee80211_rx_bss_list_init(struct ieee80211_local *local);
899void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local);
900int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, 955int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata,
901 char *ie, size_t len); 956 const char *ie, size_t len);
902 957
903void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local); 958void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local);
959void ieee80211_scan_failed(struct ieee80211_local *local);
904int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, 960int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
905 u8 *ssid, size_t ssid_len); 961 struct cfg80211_scan_request *req);
906struct ieee80211_bss * 962struct ieee80211_bss *
907ieee80211_bss_info_update(struct ieee80211_local *local, 963ieee80211_bss_info_update(struct ieee80211_local *local,
908 struct ieee80211_rx_status *rx_status, 964 struct ieee80211_rx_status *rx_status,
909 struct ieee80211_mgmt *mgmt, 965 struct ieee80211_mgmt *mgmt,
910 size_t len, 966 size_t len,
911 struct ieee802_11_elems *elems, 967 struct ieee802_11_elems *elems,
912 int freq, bool beacon); 968 struct ieee80211_channel *channel,
913struct ieee80211_bss * 969 bool beacon);
914ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq,
915 u8 *ssid, u8 ssid_len);
916struct ieee80211_bss * 970struct ieee80211_bss *
917ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq, 971ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
918 u8 *ssid, u8 ssid_len); 972 u8 *ssid, u8 ssid_len);
919void ieee80211_rx_bss_put(struct ieee80211_local *local, 973void ieee80211_rx_bss_put(struct ieee80211_local *local,
920 struct ieee80211_bss *bss); 974 struct ieee80211_bss *bss);
975void ieee80211_rx_bss_remove(struct ieee80211_sub_if_data *sdata, u8 *bssid,
976 int freq, u8 *ssid, u8 ssid_len);
921 977
922/* interface handling */ 978/* interface handling */
923int ieee80211_if_add(struct ieee80211_local *local, const char *name, 979int ieee80211_if_add(struct ieee80211_local *local, const char *name,
@@ -943,10 +999,15 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
943 struct ieee80211_ht_info *hti, 999 struct ieee80211_ht_info *hti,
944 u16 ap_ht_cap_flags); 1000 u16 ap_ht_cap_flags);
945void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn); 1001void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn);
1002void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
1003 const u8 *da, u16 tid,
1004 u16 initiator, u16 reason_code);
946 1005
947void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da, 1006void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da,
948 u16 tid, u16 initiator, u16 reason); 1007 u16 tid, u16 initiator, u16 reason);
949void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr); 1008void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
1009 u16 initiator, u16 reason);
1010void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta);
950void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, 1011void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
951 struct sta_info *sta, 1012 struct sta_info *sta,
952 struct ieee80211_mgmt *mgmt, size_t len); 1013 struct ieee80211_mgmt *mgmt, size_t len);
@@ -959,10 +1020,36 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
959 struct ieee80211_mgmt *mgmt, 1020 struct ieee80211_mgmt *mgmt,
960 size_t len); 1021 size_t len);
961 1022
1023int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
1024 enum ieee80211_back_parties initiator);
1025
962/* Spectrum management */ 1026/* Spectrum management */
963void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, 1027void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
964 struct ieee80211_mgmt *mgmt, 1028 struct ieee80211_mgmt *mgmt,
965 size_t len); 1029 size_t len);
1030void ieee80211_chswitch_timer(unsigned long data);
1031void ieee80211_chswitch_work(struct work_struct *work);
1032void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata,
1033 struct ieee80211_channel_sw_ie *sw_elem,
1034 struct ieee80211_bss *bss);
1035void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
1036 u16 capab_info, u8 *pwr_constr_elem,
1037 u8 pwr_constr_elem_len);
1038
1039/* Suspend/resume */
1040#ifdef CONFIG_PM
1041int __ieee80211_suspend(struct ieee80211_hw *hw);
1042int __ieee80211_resume(struct ieee80211_hw *hw);
1043#else
1044static inline int __ieee80211_suspend(struct ieee80211_hw *hw)
1045{
1046 return 0;
1047}
1048static inline int __ieee80211_resume(struct ieee80211_hw *hw)
1049{
1050 return 0;
1051}
1052#endif
966 1053
967/* utility functions/constants */ 1054/* utility functions/constants */
968extern void *mac80211_wiphy_privid; /* for wiphy privid */ 1055extern void *mac80211_wiphy_privid; /* for wiphy privid */
@@ -980,17 +1067,42 @@ void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
980void ieee802_11_parse_elems(u8 *start, size_t len, 1067void ieee802_11_parse_elems(u8 *start, size_t len,
981 struct ieee802_11_elems *elems); 1068 struct ieee802_11_elems *elems);
982int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq); 1069int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq);
983u64 ieee80211_mandatory_rates(struct ieee80211_local *local, 1070u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
984 enum ieee80211_band band); 1071 enum ieee80211_band band);
985 1072
986void ieee80211_dynamic_ps_enable_work(struct work_struct *work); 1073void ieee80211_dynamic_ps_enable_work(struct work_struct *work);
987void ieee80211_dynamic_ps_disable_work(struct work_struct *work); 1074void ieee80211_dynamic_ps_disable_work(struct work_struct *work);
988void ieee80211_dynamic_ps_timer(unsigned long data); 1075void ieee80211_dynamic_ps_timer(unsigned long data);
1076void ieee80211_send_nullfunc(struct ieee80211_local *local,
1077 struct ieee80211_sub_if_data *sdata,
1078 int powersave);
1079void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
1080 struct ieee80211_hdr *hdr);
1081void ieee80211_beacon_loss_work(struct work_struct *work);
989 1082
990void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, 1083void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
991 enum queue_stop_reason reason); 1084 enum queue_stop_reason reason);
992void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, 1085void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
993 enum queue_stop_reason reason); 1086 enum queue_stop_reason reason);
1087void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
1088 enum queue_stop_reason reason);
1089void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
1090 enum queue_stop_reason reason);
1091
1092void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
1093 u16 transaction, u16 auth_alg,
1094 u8 *extra, size_t extra_len,
1095 const u8 *bssid, int encrypt);
1096void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1097 u8 *ssid, size_t ssid_len,
1098 u8 *ie, size_t ie_len);
1099
1100void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
1101 const size_t supp_rates_len,
1102 const u8 *supp_rates);
1103u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
1104 struct ieee802_11_elems *elems,
1105 enum ieee80211_band band);
994 1106
995#ifdef CONFIG_MAC80211_NOINLINE 1107#ifdef CONFIG_MAC80211_NOINLINE
996#define debug_noinline noinline 1108#define debug_noinline noinline
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b9074824862a..91e8e1bacaaa 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -21,6 +21,23 @@
21#include "mesh.h" 21#include "mesh.h"
22#include "led.h" 22#include "led.h"
23 23
24/**
25 * DOC: Interface list locking
26 *
27 * The interface list in each struct ieee80211_local is protected
28 * three-fold:
29 *
30 * (1) modifications may only be done under the RTNL
31 * (2) modifications and readers are protected against each other by
32 * the iflist_mtx.
33 * (3) modifications are done in an RCU manner so atomic readers
34 * can traverse the list in RCU-safe blocks.
35 *
36 * As a consequence, reads (traversals) of the list can be protected
37 * by either the RTNL, the iflist_mtx or RCU.
38 */
39
40
24static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) 41static int ieee80211_change_mtu(struct net_device *dev, int new_mtu)
25{ 42{
26 int meshhdrlen; 43 int meshhdrlen;
@@ -219,7 +236,10 @@ static int ieee80211_open(struct net_device *dev)
219 break; 236 break;
220 case NL80211_IFTYPE_STATION: 237 case NL80211_IFTYPE_STATION:
221 case NL80211_IFTYPE_ADHOC: 238 case NL80211_IFTYPE_ADHOC:
222 sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; 239 if (sdata->vif.type == NL80211_IFTYPE_STATION)
240 sdata->u.mgd.flags &= ~IEEE80211_STA_PREV_BSSID_SET;
241 else
242 sdata->u.ibss.flags &= ~IEEE80211_IBSS_PREV_BSSID_SET;
223 /* fall through */ 243 /* fall through */
224 default: 244 default:
225 conf.vif = &sdata->vif; 245 conf.vif = &sdata->vif;
@@ -241,8 +261,7 @@ static int ieee80211_open(struct net_device *dev)
241 ieee80211_bss_info_change_notify(sdata, changed); 261 ieee80211_bss_info_change_notify(sdata, changed);
242 ieee80211_enable_keys(sdata); 262 ieee80211_enable_keys(sdata);
243 263
244 if (sdata->vif.type == NL80211_IFTYPE_STATION && 264 if (sdata->vif.type == NL80211_IFTYPE_STATION)
245 !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME))
246 netif_carrier_off(dev); 265 netif_carrier_off(dev);
247 else 266 else
248 netif_carrier_on(dev); 267 netif_carrier_on(dev);
@@ -304,11 +323,10 @@ static int ieee80211_open(struct net_device *dev)
304 * yet be effective. Trigger execution of ieee80211_sta_work 323 * yet be effective. Trigger execution of ieee80211_sta_work
305 * to fix this. 324 * to fix this.
306 */ 325 */
307 if (sdata->vif.type == NL80211_IFTYPE_STATION || 326 if (sdata->vif.type == NL80211_IFTYPE_STATION)
308 sdata->vif.type == NL80211_IFTYPE_ADHOC) { 327 queue_work(local->hw.workqueue, &sdata->u.mgd.work);
309 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 328 else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
310 queue_work(local->hw.workqueue, &ifsta->work); 329 queue_work(local->hw.workqueue, &sdata->u.ibss.work);
311 }
312 330
313 netif_tx_start_all_queues(dev); 331 netif_tx_start_all_queues(dev);
314 332
@@ -345,13 +363,24 @@ static int ieee80211_stop(struct net_device *dev)
345 363
346 list_for_each_entry_rcu(sta, &local->sta_list, list) { 364 list_for_each_entry_rcu(sta, &local->sta_list, list) {
347 if (sta->sdata == sdata) 365 if (sta->sdata == sdata)
348 ieee80211_sta_tear_down_BA_sessions(sdata, 366 ieee80211_sta_tear_down_BA_sessions(sta);
349 sta->sta.addr);
350 } 367 }
351 368
352 rcu_read_unlock(); 369 rcu_read_unlock();
353 370
354 /* 371 /*
372 * Announce that we are leaving the network, in case we are a
373 * station interface type. This must be done before removing
374 * all stations associated with sta_info_flush, otherwise STA
375 * information will be gone and no announce being done.
376 */
377 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
378 if (sdata->u.mgd.state != IEEE80211_STA_MLME_DISABLED)
379 ieee80211_sta_deauthenticate(sdata,
380 WLAN_REASON_DEAUTH_LEAVING);
381 }
382
383 /*
355 * Remove all stations associated with this interface. 384 * Remove all stations associated with this interface.
356 * 385 *
357 * This must be done before calling ops->remove_interface() 386 * This must be done before calling ops->remove_interface()
@@ -383,6 +412,8 @@ static int ieee80211_stop(struct net_device *dev)
383 atomic_dec(&local->iff_promiscs); 412 atomic_dec(&local->iff_promiscs);
384 413
385 dev_mc_unsync(local->mdev, dev); 414 dev_mc_unsync(local->mdev, dev);
415 del_timer_sync(&local->dynamic_ps_timer);
416 cancel_work_sync(&local->dynamic_ps_enable_work);
386 417
387 /* APs need special treatment */ 418 /* APs need special treatment */
388 if (sdata->vif.type == NL80211_IFTYPE_AP) { 419 if (sdata->vif.type == NL80211_IFTYPE_AP) {
@@ -434,14 +465,9 @@ static int ieee80211_stop(struct net_device *dev)
434 netif_addr_unlock_bh(local->mdev); 465 netif_addr_unlock_bh(local->mdev);
435 break; 466 break;
436 case NL80211_IFTYPE_STATION: 467 case NL80211_IFTYPE_STATION:
437 case NL80211_IFTYPE_ADHOC: 468 memset(sdata->u.mgd.bssid, 0, ETH_ALEN);
438 /* Announce that we are leaving the network. */ 469 del_timer_sync(&sdata->u.mgd.chswitch_timer);
439 if (sdata->u.sta.state != IEEE80211_STA_MLME_DISABLED) 470 del_timer_sync(&sdata->u.mgd.timer);
440 ieee80211_sta_deauthenticate(sdata,
441 WLAN_REASON_DEAUTH_LEAVING);
442
443 memset(sdata->u.sta.bssid, 0, ETH_ALEN);
444 del_timer_sync(&sdata->u.sta.timer);
445 /* 471 /*
446 * If the timer fired while we waited for it, it will have 472 * If the timer fired while we waited for it, it will have
447 * requeued the work. Now the work will be running again 473 * requeued the work. Now the work will be running again
@@ -449,7 +475,11 @@ static int ieee80211_stop(struct net_device *dev)
449 * whether the interface is running, which, at this point, 475 * whether the interface is running, which, at this point,
450 * it no longer is. 476 * it no longer is.
451 */ 477 */
452 cancel_work_sync(&sdata->u.sta.work); 478 cancel_work_sync(&sdata->u.mgd.work);
479 cancel_work_sync(&sdata->u.mgd.chswitch_work);
480
481 cancel_work_sync(&sdata->u.mgd.beacon_loss_work);
482
453 /* 483 /*
454 * When we get here, the interface is marked down. 484 * When we get here, the interface is marked down.
455 * Call synchronize_rcu() to wait for the RX path 485 * Call synchronize_rcu() to wait for the RX path
@@ -457,12 +487,22 @@ static int ieee80211_stop(struct net_device *dev)
457 * frames at this very time on another CPU. 487 * frames at this very time on another CPU.
458 */ 488 */
459 synchronize_rcu(); 489 synchronize_rcu();
460 skb_queue_purge(&sdata->u.sta.skb_queue); 490 skb_queue_purge(&sdata->u.mgd.skb_queue);
461 491
462 sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; 492 sdata->u.mgd.flags &= ~(IEEE80211_STA_PRIVACY_INVOKED |
463 kfree(sdata->u.sta.extra_ie); 493 IEEE80211_STA_TKIP_WEP_USED);
464 sdata->u.sta.extra_ie = NULL; 494 kfree(sdata->u.mgd.extra_ie);
465 sdata->u.sta.extra_ie_len = 0; 495 sdata->u.mgd.extra_ie = NULL;
496 sdata->u.mgd.extra_ie_len = 0;
497 /* fall through */
498 case NL80211_IFTYPE_ADHOC:
499 if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
500 memset(sdata->u.ibss.bssid, 0, ETH_ALEN);
501 del_timer_sync(&sdata->u.ibss.timer);
502 cancel_work_sync(&sdata->u.ibss.work);
503 synchronize_rcu();
504 skb_queue_purge(&sdata->u.ibss.skb_queue);
505 }
466 /* fall through */ 506 /* fall through */
467 case NL80211_IFTYPE_MESH_POINT: 507 case NL80211_IFTYPE_MESH_POINT:
468 if (ieee80211_vif_is_mesh(&sdata->vif)) { 508 if (ieee80211_vif_is_mesh(&sdata->vif)) {
@@ -501,7 +541,7 @@ static int ieee80211_stop(struct net_device *dev)
501 * scan event to userspace -- the scan is incomplete. 541 * scan event to userspace -- the scan is incomplete.
502 */ 542 */
503 if (local->sw_scanning) 543 if (local->sw_scanning)
504 ieee80211_scan_completed(&local->hw); 544 ieee80211_scan_completed(&local->hw, true);
505 } 545 }
506 546
507 conf.vif = &sdata->vif; 547 conf.vif = &sdata->vif;
@@ -569,19 +609,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
569 dev_mc_sync(local->mdev, dev); 609 dev_mc_sync(local->mdev, dev);
570} 610}
571 611
572static void ieee80211_if_setup(struct net_device *dev)
573{
574 ether_setup(dev);
575 dev->hard_start_xmit = ieee80211_subif_start_xmit;
576 dev->wireless_handlers = &ieee80211_iw_handler_def;
577 dev->set_multicast_list = ieee80211_set_multicast_list;
578 dev->change_mtu = ieee80211_change_mtu;
579 dev->open = ieee80211_open;
580 dev->stop = ieee80211_stop;
581 dev->destructor = free_netdev;
582 /* we will validate the address ourselves in ->open */
583 dev->validate_addr = NULL;
584}
585/* 612/*
586 * Called when the netdev is removed or, by the code below, before 613 * Called when the netdev is removed or, by the code below, before
587 * the interface type changes. 614 * the interface type changes.
@@ -621,12 +648,14 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
621 if (ieee80211_vif_is_mesh(&sdata->vif)) 648 if (ieee80211_vif_is_mesh(&sdata->vif))
622 mesh_rmc_free(sdata); 649 mesh_rmc_free(sdata);
623 break; 650 break;
624 case NL80211_IFTYPE_STATION:
625 case NL80211_IFTYPE_ADHOC: 651 case NL80211_IFTYPE_ADHOC:
626 kfree(sdata->u.sta.extra_ie); 652 kfree_skb(sdata->u.ibss.probe_resp);
627 kfree(sdata->u.sta.assocreq_ies); 653 break;
628 kfree(sdata->u.sta.assocresp_ies); 654 case NL80211_IFTYPE_STATION:
629 kfree_skb(sdata->u.sta.probe_resp); 655 kfree(sdata->u.mgd.extra_ie);
656 kfree(sdata->u.mgd.assocreq_ies);
657 kfree(sdata->u.mgd.assocresp_ies);
658 kfree(sdata->u.mgd.sme_auth_ie);
630 break; 659 break;
631 case NL80211_IFTYPE_WDS: 660 case NL80211_IFTYPE_WDS:
632 case NL80211_IFTYPE_AP_VLAN: 661 case NL80211_IFTYPE_AP_VLAN:
@@ -642,6 +671,34 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
642 WARN_ON(flushed); 671 WARN_ON(flushed);
643} 672}
644 673
674static const struct net_device_ops ieee80211_dataif_ops = {
675 .ndo_open = ieee80211_open,
676 .ndo_stop = ieee80211_stop,
677 .ndo_uninit = ieee80211_teardown_sdata,
678 .ndo_start_xmit = ieee80211_subif_start_xmit,
679 .ndo_set_multicast_list = ieee80211_set_multicast_list,
680 .ndo_change_mtu = ieee80211_change_mtu,
681 .ndo_set_mac_address = eth_mac_addr,
682};
683
684static const struct net_device_ops ieee80211_monitorif_ops = {
685 .ndo_open = ieee80211_open,
686 .ndo_stop = ieee80211_stop,
687 .ndo_uninit = ieee80211_teardown_sdata,
688 .ndo_start_xmit = ieee80211_monitor_start_xmit,
689 .ndo_set_multicast_list = ieee80211_set_multicast_list,
690 .ndo_change_mtu = ieee80211_change_mtu,
691 .ndo_set_mac_address = eth_mac_addr,
692};
693
694static void ieee80211_if_setup(struct net_device *dev)
695{
696 ether_setup(dev);
697 dev->netdev_ops = &ieee80211_dataif_ops;
698 dev->wireless_handlers = &ieee80211_iw_handler_def;
699 dev->destructor = free_netdev;
700}
701
645/* 702/*
646 * Helper function to initialise an interface to a specific type. 703 * Helper function to initialise an interface to a specific type.
647 */ 704 */
@@ -653,7 +710,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
653 710
654 /* and set some type-dependent values */ 711 /* and set some type-dependent values */
655 sdata->vif.type = type; 712 sdata->vif.type = type;
656 sdata->dev->hard_start_xmit = ieee80211_subif_start_xmit; 713 sdata->dev->netdev_ops = &ieee80211_dataif_ops;
657 sdata->wdev.iftype = type; 714 sdata->wdev.iftype = type;
658 715
659 /* only monitor differs */ 716 /* only monitor differs */
@@ -665,16 +722,18 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
665 INIT_LIST_HEAD(&sdata->u.ap.vlans); 722 INIT_LIST_HEAD(&sdata->u.ap.vlans);
666 break; 723 break;
667 case NL80211_IFTYPE_STATION: 724 case NL80211_IFTYPE_STATION:
668 case NL80211_IFTYPE_ADHOC:
669 ieee80211_sta_setup_sdata(sdata); 725 ieee80211_sta_setup_sdata(sdata);
670 break; 726 break;
727 case NL80211_IFTYPE_ADHOC:
728 ieee80211_ibss_setup_sdata(sdata);
729 break;
671 case NL80211_IFTYPE_MESH_POINT: 730 case NL80211_IFTYPE_MESH_POINT:
672 if (ieee80211_vif_is_mesh(&sdata->vif)) 731 if (ieee80211_vif_is_mesh(&sdata->vif))
673 ieee80211_mesh_init_sdata(sdata); 732 ieee80211_mesh_init_sdata(sdata);
674 break; 733 break;
675 case NL80211_IFTYPE_MONITOR: 734 case NL80211_IFTYPE_MONITOR:
676 sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP; 735 sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP;
677 sdata->dev->hard_start_xmit = ieee80211_monitor_start_xmit; 736 sdata->dev->netdev_ops = &ieee80211_monitorif_ops;
678 sdata->u.mntr_flags = MONITOR_FLAG_CONTROL | 737 sdata->u.mntr_flags = MONITOR_FLAG_CONTROL |
679 MONITOR_FLAG_OTHER_BSS; 738 MONITOR_FLAG_OTHER_BSS;
680 break; 739 break;
@@ -755,6 +814,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
755 814
756 memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); 815 memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
757 SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); 816 SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
817 ndev->features |= NETIF_F_NETNS_LOCAL;
758 818
759 /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */ 819 /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */
760 sdata = netdev_priv(ndev); 820 sdata = netdev_priv(ndev);
@@ -780,15 +840,15 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
780 if (ret) 840 if (ret)
781 goto fail; 841 goto fail;
782 842
783 ndev->uninit = ieee80211_teardown_sdata;
784
785 if (ieee80211_vif_is_mesh(&sdata->vif) && 843 if (ieee80211_vif_is_mesh(&sdata->vif) &&
786 params && params->mesh_id_len) 844 params && params->mesh_id_len)
787 ieee80211_sdata_set_mesh_id(sdata, 845 ieee80211_sdata_set_mesh_id(sdata,
788 params->mesh_id_len, 846 params->mesh_id_len,
789 params->mesh_id); 847 params->mesh_id);
790 848
849 mutex_lock(&local->iflist_mtx);
791 list_add_tail_rcu(&sdata->list, &local->interfaces); 850 list_add_tail_rcu(&sdata->list, &local->interfaces);
851 mutex_unlock(&local->iflist_mtx);
792 852
793 if (new_dev) 853 if (new_dev)
794 *new_dev = ndev; 854 *new_dev = ndev;
@@ -804,7 +864,10 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata)
804{ 864{
805 ASSERT_RTNL(); 865 ASSERT_RTNL();
806 866
867 mutex_lock(&sdata->local->iflist_mtx);
807 list_del_rcu(&sdata->list); 868 list_del_rcu(&sdata->list);
869 mutex_unlock(&sdata->local->iflist_mtx);
870
808 synchronize_rcu(); 871 synchronize_rcu();
809 unregister_netdevice(sdata->dev); 872 unregister_netdevice(sdata->dev);
810} 873}
@@ -820,7 +883,16 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
820 ASSERT_RTNL(); 883 ASSERT_RTNL();
821 884
822 list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { 885 list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
886 /*
887 * we cannot hold the iflist_mtx across unregister_netdevice,
888 * but we only need to hold it for list modifications to lock
889 * out readers since we're under the RTNL here as all other
890 * writers.
891 */
892 mutex_lock(&local->iflist_mtx);
823 list_del(&sdata->list); 893 list_del(&sdata->list);
894 mutex_unlock(&local->iflist_mtx);
895
824 unregister_netdevice(sdata->dev); 896 unregister_netdevice(sdata->dev);
825 } 897 }
826} 898}
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 999f7aa42326..687acf23054d 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -18,6 +18,7 @@
18#include "ieee80211_i.h" 18#include "ieee80211_i.h"
19#include "debugfs_key.h" 19#include "debugfs_key.h"
20#include "aes_ccm.h" 20#include "aes_ccm.h"
21#include "aes_cmac.h"
21 22
22 23
23/** 24/**
@@ -47,7 +48,6 @@
47 */ 48 */
48 49
49static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; 50static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
50static const u8 zero_addr[ETH_ALEN];
51 51
52/* key mutex: used to synchronise todo runners */ 52/* key mutex: used to synchronise todo runners */
53static DEFINE_MUTEX(key_mutex); 53static DEFINE_MUTEX(key_mutex);
@@ -108,29 +108,18 @@ static void assert_key_lock(void)
108 WARN_ON(!mutex_is_locked(&key_mutex)); 108 WARN_ON(!mutex_is_locked(&key_mutex));
109} 109}
110 110
111static const u8 *get_mac_for_key(struct ieee80211_key *key) 111static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key)
112{ 112{
113 const u8 *addr = bcast_addr;
114
115 /*
116 * If we're an AP we won't ever receive frames with a non-WEP
117 * group key so we tell the driver that by using the zero MAC
118 * address to indicate a transmit-only key.
119 */
120 if (key->conf.alg != ALG_WEP &&
121 (key->sdata->vif.type == NL80211_IFTYPE_AP ||
122 key->sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
123 addr = zero_addr;
124
125 if (key->sta) 113 if (key->sta)
126 addr = key->sta->sta.addr; 114 return &key->sta->sta;
127 115
128 return addr; 116 return NULL;
129} 117}
130 118
131static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) 119static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
132{ 120{
133 const u8 *addr; 121 struct ieee80211_sub_if_data *sdata;
122 struct ieee80211_sta *sta;
134 int ret; 123 int ret;
135 124
136 assert_key_lock(); 125 assert_key_lock();
@@ -139,11 +128,16 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
139 if (!key->local->ops->set_key) 128 if (!key->local->ops->set_key)
140 return; 129 return;
141 130
142 addr = get_mac_for_key(key); 131 sta = get_sta_for_key(key);
132
133 sdata = key->sdata;
134 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
135 sdata = container_of(sdata->bss,
136 struct ieee80211_sub_if_data,
137 u.ap);
143 138
144 ret = key->local->ops->set_key(local_to_hw(key->local), SET_KEY, 139 ret = key->local->ops->set_key(local_to_hw(key->local), SET_KEY,
145 key->sdata->dev->dev_addr, addr, 140 &sdata->vif, sta, &key->conf);
146 &key->conf);
147 141
148 if (!ret) { 142 if (!ret) {
149 spin_lock(&todo_lock); 143 spin_lock(&todo_lock);
@@ -155,12 +149,13 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
155 printk(KERN_ERR "mac80211-%s: failed to set key " 149 printk(KERN_ERR "mac80211-%s: failed to set key "
156 "(%d, %pM) to hardware (%d)\n", 150 "(%d, %pM) to hardware (%d)\n",
157 wiphy_name(key->local->hw.wiphy), 151 wiphy_name(key->local->hw.wiphy),
158 key->conf.keyidx, addr, ret); 152 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
159} 153}
160 154
161static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) 155static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
162{ 156{
163 const u8 *addr; 157 struct ieee80211_sub_if_data *sdata;
158 struct ieee80211_sta *sta;
164 int ret; 159 int ret;
165 160
166 assert_key_lock(); 161 assert_key_lock();
@@ -176,17 +171,22 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
176 } 171 }
177 spin_unlock(&todo_lock); 172 spin_unlock(&todo_lock);
178 173
179 addr = get_mac_for_key(key); 174 sta = get_sta_for_key(key);
175 sdata = key->sdata;
176
177 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
178 sdata = container_of(sdata->bss,
179 struct ieee80211_sub_if_data,
180 u.ap);
180 181
181 ret = key->local->ops->set_key(local_to_hw(key->local), DISABLE_KEY, 182 ret = key->local->ops->set_key(local_to_hw(key->local), DISABLE_KEY,
182 key->sdata->dev->dev_addr, addr, 183 &sdata->vif, sta, &key->conf);
183 &key->conf);
184 184
185 if (ret) 185 if (ret)
186 printk(KERN_ERR "mac80211-%s: failed to remove key " 186 printk(KERN_ERR "mac80211-%s: failed to remove key "
187 "(%d, %pM) from hardware (%d)\n", 187 "(%d, %pM) from hardware (%d)\n",
188 wiphy_name(key->local->hw.wiphy), 188 wiphy_name(key->local->hw.wiphy),
189 key->conf.keyidx, addr, ret); 189 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
190 190
191 spin_lock(&todo_lock); 191 spin_lock(&todo_lock);
192 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; 192 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
@@ -216,13 +216,38 @@ void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx)
216 spin_unlock_irqrestore(&sdata->local->key_lock, flags); 216 spin_unlock_irqrestore(&sdata->local->key_lock, flags);
217} 217}
218 218
219static void
220__ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx)
221{
222 struct ieee80211_key *key = NULL;
223
224 if (idx >= NUM_DEFAULT_KEYS &&
225 idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
226 key = sdata->keys[idx];
227
228 rcu_assign_pointer(sdata->default_mgmt_key, key);
229
230 if (key)
231 add_todo(key, KEY_FLAG_TODO_DEFMGMTKEY);
232}
233
234void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
235 int idx)
236{
237 unsigned long flags;
238
239 spin_lock_irqsave(&sdata->local->key_lock, flags);
240 __ieee80211_set_default_mgmt_key(sdata, idx);
241 spin_unlock_irqrestore(&sdata->local->key_lock, flags);
242}
243
219 244
220static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, 245static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
221 struct sta_info *sta, 246 struct sta_info *sta,
222 struct ieee80211_key *old, 247 struct ieee80211_key *old,
223 struct ieee80211_key *new) 248 struct ieee80211_key *new)
224{ 249{
225 int idx, defkey; 250 int idx, defkey, defmgmtkey;
226 251
227 if (new) 252 if (new)
228 list_add(&new->list, &sdata->key_list); 253 list_add(&new->list, &sdata->key_list);
@@ -238,13 +263,19 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
238 idx = new->conf.keyidx; 263 idx = new->conf.keyidx;
239 264
240 defkey = old && sdata->default_key == old; 265 defkey = old && sdata->default_key == old;
266 defmgmtkey = old && sdata->default_mgmt_key == old;
241 267
242 if (defkey && !new) 268 if (defkey && !new)
243 __ieee80211_set_default_key(sdata, -1); 269 __ieee80211_set_default_key(sdata, -1);
270 if (defmgmtkey && !new)
271 __ieee80211_set_default_mgmt_key(sdata, -1);
244 272
245 rcu_assign_pointer(sdata->keys[idx], new); 273 rcu_assign_pointer(sdata->keys[idx], new);
246 if (defkey && new) 274 if (defkey && new)
247 __ieee80211_set_default_key(sdata, new->conf.keyidx); 275 __ieee80211_set_default_key(sdata, new->conf.keyidx);
276 if (defmgmtkey && new)
277 __ieee80211_set_default_mgmt_key(sdata,
278 new->conf.keyidx);
248 } 279 }
249 280
250 if (old) { 281 if (old) {
@@ -263,7 +294,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
263{ 294{
264 struct ieee80211_key *key; 295 struct ieee80211_key *key;
265 296
266 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS); 297 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS);
267 298
268 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); 299 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL);
269 if (!key) 300 if (!key)
@@ -292,6 +323,10 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
292 key->conf.iv_len = CCMP_HDR_LEN; 323 key->conf.iv_len = CCMP_HDR_LEN;
293 key->conf.icv_len = CCMP_MIC_LEN; 324 key->conf.icv_len = CCMP_MIC_LEN;
294 break; 325 break;
326 case ALG_AES_CMAC:
327 key->conf.iv_len = 0;
328 key->conf.icv_len = sizeof(struct ieee80211_mmie);
329 break;
295 } 330 }
296 memcpy(key->conf.key, key_data, key_len); 331 memcpy(key->conf.key, key_data, key_len);
297 INIT_LIST_HEAD(&key->list); 332 INIT_LIST_HEAD(&key->list);
@@ -309,6 +344,19 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
309 } 344 }
310 } 345 }
311 346
347 if (alg == ALG_AES_CMAC) {
348 /*
349 * Initialize AES key state here as an optimization so that
350 * it does not need to be initialized for every packet.
351 */
352 key->u.aes_cmac.tfm =
353 ieee80211_aes_cmac_key_setup(key_data);
354 if (!key->u.aes_cmac.tfm) {
355 kfree(key);
356 return NULL;
357 }
358 }
359
312 return key; 360 return key;
313} 361}
314 362
@@ -352,7 +400,7 @@ void ieee80211_key_link(struct ieee80211_key *key,
352 */ 400 */
353 401
354 /* same here, the AP could be using QoS */ 402 /* same here, the AP could be using QoS */
355 ap = sta_info_get(key->local, key->sdata->u.sta.bssid); 403 ap = sta_info_get(key->local, key->sdata->u.mgd.bssid);
356 if (ap) { 404 if (ap) {
357 if (test_sta_flags(ap, WLAN_STA_WME)) 405 if (test_sta_flags(ap, WLAN_STA_WME))
358 key->conf.flags |= 406 key->conf.flags |=
@@ -462,6 +510,8 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
462 510
463 if (key->conf.alg == ALG_CCMP) 511 if (key->conf.alg == ALG_CCMP)
464 ieee80211_aes_key_free(key->u.ccmp.tfm); 512 ieee80211_aes_key_free(key->u.ccmp.tfm);
513 if (key->conf.alg == ALG_AES_CMAC)
514 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
465 ieee80211_debugfs_key_remove(key); 515 ieee80211_debugfs_key_remove(key);
466 516
467 kfree(key); 517 kfree(key);
@@ -484,6 +534,7 @@ static void __ieee80211_key_todo(void)
484 list_del_init(&key->todo); 534 list_del_init(&key->todo);
485 todoflags = key->flags & (KEY_FLAG_TODO_ADD_DEBUGFS | 535 todoflags = key->flags & (KEY_FLAG_TODO_ADD_DEBUGFS |
486 KEY_FLAG_TODO_DEFKEY | 536 KEY_FLAG_TODO_DEFKEY |
537 KEY_FLAG_TODO_DEFMGMTKEY |
487 KEY_FLAG_TODO_HWACCEL_ADD | 538 KEY_FLAG_TODO_HWACCEL_ADD |
488 KEY_FLAG_TODO_HWACCEL_REMOVE | 539 KEY_FLAG_TODO_HWACCEL_REMOVE |
489 KEY_FLAG_TODO_DELETE); 540 KEY_FLAG_TODO_DELETE);
@@ -501,6 +552,11 @@ static void __ieee80211_key_todo(void)
501 ieee80211_debugfs_key_add_default(key->sdata); 552 ieee80211_debugfs_key_add_default(key->sdata);
502 work_done = true; 553 work_done = true;
503 } 554 }
555 if (todoflags & KEY_FLAG_TODO_DEFMGMTKEY) {
556 ieee80211_debugfs_key_remove_mgmt_default(key->sdata);
557 ieee80211_debugfs_key_add_mgmt_default(key->sdata);
558 work_done = true;
559 }
504 if (todoflags & KEY_FLAG_TODO_HWACCEL_ADD) { 560 if (todoflags & KEY_FLAG_TODO_HWACCEL_ADD) {
505 ieee80211_key_enable_hw_accel(key); 561 ieee80211_key_enable_hw_accel(key);
506 work_done = true; 562 work_done = true;
@@ -536,6 +592,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata)
536 ieee80211_key_lock(); 592 ieee80211_key_lock();
537 593
538 ieee80211_debugfs_key_remove_default(sdata); 594 ieee80211_debugfs_key_remove_default(sdata);
595 ieee80211_debugfs_key_remove_mgmt_default(sdata);
539 596
540 spin_lock_irqsave(&sdata->local->key_lock, flags); 597 spin_lock_irqsave(&sdata->local->key_lock, flags);
541 list_for_each_entry_safe(key, tmp, &sdata->key_list, list) 598 list_for_each_entry_safe(key, tmp, &sdata->key_list, list)
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 425816e0996c..215d3ef42a4f 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -46,6 +46,8 @@ struct sta_info;
46 * acceleration. 46 * acceleration.
47 * @KEY_FLAG_TODO_DEFKEY: Key is default key and debugfs needs to be updated. 47 * @KEY_FLAG_TODO_DEFKEY: Key is default key and debugfs needs to be updated.
48 * @KEY_FLAG_TODO_ADD_DEBUGFS: Key needs to be added to debugfs. 48 * @KEY_FLAG_TODO_ADD_DEBUGFS: Key needs to be added to debugfs.
49 * @KEY_FLAG_TODO_DEFMGMTKEY: Key is default management key and debugfs needs
50 * to be updated.
49 */ 51 */
50enum ieee80211_internal_key_flags { 52enum ieee80211_internal_key_flags {
51 KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0), 53 KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0),
@@ -54,6 +56,7 @@ enum ieee80211_internal_key_flags {
54 KEY_FLAG_TODO_HWACCEL_REMOVE = BIT(3), 56 KEY_FLAG_TODO_HWACCEL_REMOVE = BIT(3),
55 KEY_FLAG_TODO_DEFKEY = BIT(4), 57 KEY_FLAG_TODO_DEFKEY = BIT(4),
56 KEY_FLAG_TODO_ADD_DEBUGFS = BIT(5), 58 KEY_FLAG_TODO_ADD_DEBUGFS = BIT(5),
59 KEY_FLAG_TODO_DEFMGMTKEY = BIT(6),
57}; 60};
58 61
59struct tkip_ctx { 62struct tkip_ctx {
@@ -96,6 +99,16 @@ struct ieee80211_key {
96 u8 tx_crypto_buf[6 * AES_BLOCK_LEN]; 99 u8 tx_crypto_buf[6 * AES_BLOCK_LEN];
97 u8 rx_crypto_buf[6 * AES_BLOCK_LEN]; 100 u8 rx_crypto_buf[6 * AES_BLOCK_LEN];
98 } ccmp; 101 } ccmp;
102 struct {
103 u8 tx_pn[6];
104 u8 rx_pn[6];
105 struct crypto_cipher *tfm;
106 u32 replays; /* dot11RSNAStatsCMACReplays */
107 u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
108 /* scratch buffers for virt_to_page() (crypto API) */
109 u8 tx_crypto_buf[2 * AES_BLOCK_LEN];
110 u8 rx_crypto_buf[2 * AES_BLOCK_LEN];
111 } aes_cmac;
99 } u; 112 } u;
100 113
101 /* number of times this key has been used */ 114 /* number of times this key has been used */
@@ -114,6 +127,7 @@ struct ieee80211_key {
114 struct dentry *tx_spec; 127 struct dentry *tx_spec;
115 struct dentry *rx_spec; 128 struct dentry *rx_spec;
116 struct dentry *replays; 129 struct dentry *replays;
130 struct dentry *icverrors;
117 struct dentry *key; 131 struct dentry *key;
118 struct dentry *ifindex; 132 struct dentry *ifindex;
119 int cnt; 133 int cnt;
@@ -140,6 +154,8 @@ void ieee80211_key_link(struct ieee80211_key *key,
140 struct sta_info *sta); 154 struct sta_info *sta);
141void ieee80211_key_free(struct ieee80211_key *key); 155void ieee80211_key_free(struct ieee80211_key *key);
142void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); 156void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx);
157void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
158 int idx);
143void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); 159void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata);
144void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); 160void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata);
145void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata); 161void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 24b14363d6e7..a6f1d8a869bc 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -161,30 +161,67 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed)
161 if (WARN_ON(!netif_running(sdata->dev))) 161 if (WARN_ON(!netif_running(sdata->dev)))
162 return 0; 162 return 0;
163 163
164 if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
165 return -EINVAL;
166
167 if (!local->ops->config_interface)
168 return 0;
169
170 memset(&conf, 0, sizeof(conf)); 164 memset(&conf, 0, sizeof(conf));
171 conf.changed = changed;
172 165
173 if (sdata->vif.type == NL80211_IFTYPE_STATION || 166 if (sdata->vif.type == NL80211_IFTYPE_STATION)
174 sdata->vif.type == NL80211_IFTYPE_ADHOC) 167 conf.bssid = sdata->u.mgd.bssid;
175 conf.bssid = sdata->u.sta.bssid; 168 else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
169 conf.bssid = sdata->u.ibss.bssid;
176 else if (sdata->vif.type == NL80211_IFTYPE_AP) 170 else if (sdata->vif.type == NL80211_IFTYPE_AP)
177 conf.bssid = sdata->dev->dev_addr; 171 conf.bssid = sdata->dev->dev_addr;
178 else if (ieee80211_vif_is_mesh(&sdata->vif)) { 172 else if (ieee80211_vif_is_mesh(&sdata->vif)) {
179 u8 zero[ETH_ALEN] = { 0 }; 173 static const u8 zero[ETH_ALEN] = { 0 };
180 conf.bssid = zero; 174 conf.bssid = zero;
181 } else { 175 } else {
182 WARN_ON(1); 176 WARN_ON(1);
183 return -EINVAL; 177 return -EINVAL;
184 } 178 }
185 179
186 if (WARN_ON(!conf.bssid && (changed & IEEE80211_IFCC_BSSID))) 180 if (!local->ops->config_interface)
187 return -EINVAL; 181 return 0;
182
183 switch (sdata->vif.type) {
184 case NL80211_IFTYPE_AP:
185 case NL80211_IFTYPE_ADHOC:
186 case NL80211_IFTYPE_MESH_POINT:
187 break;
188 default:
189 /* do not warn to simplify caller in scan.c */
190 changed &= ~IEEE80211_IFCC_BEACON_ENABLED;
191 if (WARN_ON(changed & IEEE80211_IFCC_BEACON))
192 return -EINVAL;
193 changed &= ~IEEE80211_IFCC_BEACON;
194 break;
195 }
196
197 if (changed & IEEE80211_IFCC_BEACON_ENABLED) {
198 if (local->sw_scanning) {
199 conf.enable_beacon = false;
200 } else {
201 /*
202 * Beacon should be enabled, but AP mode must
203 * check whether there is a beacon configured.
204 */
205 switch (sdata->vif.type) {
206 case NL80211_IFTYPE_AP:
207 conf.enable_beacon =
208 !!rcu_dereference(sdata->u.ap.beacon);
209 break;
210 case NL80211_IFTYPE_ADHOC:
211 conf.enable_beacon = !!sdata->u.ibss.probe_resp;
212 break;
213 case NL80211_IFTYPE_MESH_POINT:
214 conf.enable_beacon = true;
215 break;
216 default:
217 /* not reached */
218 WARN_ON(1);
219 break;
220 }
221 }
222 }
223
224 conf.changed = changed;
188 225
189 return local->ops->config_interface(local_to_hw(local), 226 return local->ops->config_interface(local_to_hw(local),
190 &sdata->vif, &conf); 227 &sdata->vif, &conf);
@@ -208,26 +245,22 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
208 } 245 }
209 246
210 if (chan != local->hw.conf.channel || 247 if (chan != local->hw.conf.channel ||
211 channel_type != local->hw.conf.ht.channel_type) { 248 channel_type != local->hw.conf.channel_type) {
212 local->hw.conf.channel = chan; 249 local->hw.conf.channel = chan;
213 local->hw.conf.ht.channel_type = channel_type; 250 local->hw.conf.channel_type = channel_type;
214 switch (channel_type) {
215 case NL80211_CHAN_NO_HT:
216 local->hw.conf.ht.enabled = false;
217 break;
218 case NL80211_CHAN_HT20:
219 case NL80211_CHAN_HT40MINUS:
220 case NL80211_CHAN_HT40PLUS:
221 local->hw.conf.ht.enabled = true;
222 break;
223 }
224 changed |= IEEE80211_CONF_CHANGE_CHANNEL; 251 changed |= IEEE80211_CONF_CHANGE_CHANNEL;
225 } 252 }
226 253
227 if (!local->hw.conf.power_level) 254 if (local->sw_scanning)
228 power = chan->max_power; 255 power = chan->max_power;
229 else 256 else
230 power = min(chan->max_power, local->hw.conf.power_level); 257 power = local->power_constr_level ?
258 (chan->max_power - local->power_constr_level) :
259 chan->max_power;
260
261 if (local->user_power_level)
262 power = min(power, local->user_power_level);
263
231 if (local->hw.conf.power_level != power) { 264 if (local->hw.conf.power_level != power) {
232 changed |= IEEE80211_CONF_CHANGE_POWER; 265 changed |= IEEE80211_CONF_CHANGE_POWER;
233 local->hw.conf.power_level = power; 266 local->hw.conf.power_level = power;
@@ -667,7 +700,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
667 const struct ieee80211_ops *ops) 700 const struct ieee80211_ops *ops)
668{ 701{
669 struct ieee80211_local *local; 702 struct ieee80211_local *local;
670 int priv_size; 703 int priv_size, i;
671 struct wiphy *wiphy; 704 struct wiphy *wiphy;
672 705
673 /* Ensure 32-byte alignment of our private data and hw private data. 706 /* Ensure 32-byte alignment of our private data and hw private data.
@@ -695,6 +728,10 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
695 return NULL; 728 return NULL;
696 729
697 wiphy->privid = mac80211_wiphy_privid; 730 wiphy->privid = mac80211_wiphy_privid;
731 wiphy->max_scan_ssids = 4;
732 /* Yes, putting cfg80211_bss into ieee80211_bss is a hack */
733 wiphy->bss_priv_size = sizeof(struct ieee80211_bss) -
734 sizeof(struct cfg80211_bss);
698 735
699 local = wiphy_priv(wiphy); 736 local = wiphy_priv(wiphy);
700 local->hw.wiphy = wiphy; 737 local->hw.wiphy = wiphy;
@@ -722,6 +759,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
722 local->hw.conf.radio_enabled = true; 759 local->hw.conf.radio_enabled = true;
723 760
724 INIT_LIST_HEAD(&local->interfaces); 761 INIT_LIST_HEAD(&local->interfaces);
762 mutex_init(&local->iflist_mtx);
725 763
726 spin_lock_init(&local->key_lock); 764 spin_lock_init(&local->key_lock);
727 765
@@ -738,6 +776,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
738 776
739 sta_info_init(local); 777 sta_info_init(local);
740 778
779 for (i = 0; i < IEEE80211_MAX_QUEUES; i++)
780 skb_queue_head_init(&local->pending[i]);
741 tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, 781 tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending,
742 (unsigned long)local); 782 (unsigned long)local);
743 tasklet_disable(&local->tx_pending_tasklet); 783 tasklet_disable(&local->tx_pending_tasklet);
@@ -750,10 +790,29 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
750 skb_queue_head_init(&local->skb_queue); 790 skb_queue_head_init(&local->skb_queue);
751 skb_queue_head_init(&local->skb_queue_unreliable); 791 skb_queue_head_init(&local->skb_queue_unreliable);
752 792
793 spin_lock_init(&local->ampdu_lock);
794
753 return local_to_hw(local); 795 return local_to_hw(local);
754} 796}
755EXPORT_SYMBOL(ieee80211_alloc_hw); 797EXPORT_SYMBOL(ieee80211_alloc_hw);
756 798
799static const struct net_device_ops ieee80211_master_ops = {
800 .ndo_start_xmit = ieee80211_master_start_xmit,
801 .ndo_open = ieee80211_master_open,
802 .ndo_stop = ieee80211_master_stop,
803 .ndo_set_multicast_list = ieee80211_master_set_multicast_list,
804 .ndo_select_queue = ieee80211_select_queue,
805};
806
807static void ieee80211_master_setup(struct net_device *mdev)
808{
809 mdev->type = ARPHRD_IEEE80211;
810 mdev->netdev_ops = &ieee80211_master_ops;
811 mdev->header_ops = &ieee80211_header_ops;
812 mdev->tx_queue_len = 1000;
813 mdev->addr_len = ETH_ALEN;
814}
815
757int ieee80211_register_hw(struct ieee80211_hw *hw) 816int ieee80211_register_hw(struct ieee80211_hw *hw)
758{ 817{
759 struct ieee80211_local *local = hw_to_local(hw); 818 struct ieee80211_local *local = hw_to_local(hw);
@@ -761,25 +820,33 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
761 enum ieee80211_band band; 820 enum ieee80211_band band;
762 struct net_device *mdev; 821 struct net_device *mdev;
763 struct ieee80211_master_priv *mpriv; 822 struct ieee80211_master_priv *mpriv;
823 int channels, i, j;
764 824
765 /* 825 /*
766 * generic code guarantees at least one band, 826 * generic code guarantees at least one band,
767 * set this very early because much code assumes 827 * set this very early because much code assumes
768 * that hw.conf.channel is assigned 828 * that hw.conf.channel is assigned
769 */ 829 */
830 channels = 0;
770 for (band = 0; band < IEEE80211_NUM_BANDS; band++) { 831 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
771 struct ieee80211_supported_band *sband; 832 struct ieee80211_supported_band *sband;
772 833
773 sband = local->hw.wiphy->bands[band]; 834 sband = local->hw.wiphy->bands[band];
774 if (sband) { 835 if (sband && !local->oper_channel) {
775 /* init channel we're on */ 836 /* init channel we're on */
776 local->hw.conf.channel = 837 local->hw.conf.channel =
777 local->oper_channel = 838 local->oper_channel =
778 local->scan_channel = &sband->channels[0]; 839 local->scan_channel = &sband->channels[0];
779 break;
780 } 840 }
841 if (sband)
842 channels += sband->n_channels;
781 } 843 }
782 844
845 local->int_scan_req.n_channels = channels;
846 local->int_scan_req.channels = kzalloc(sizeof(void *) * channels, GFP_KERNEL);
847 if (!local->int_scan_req.channels)
848 return -ENOMEM;
849
783 /* if low-level driver supports AP, we also support VLAN */ 850 /* if low-level driver supports AP, we also support VLAN */
784 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) 851 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP))
785 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN); 852 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN);
@@ -787,9 +854,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
787 /* mac80211 always supports monitor */ 854 /* mac80211 always supports monitor */
788 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); 855 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
789 856
857 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
858 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
859 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
860 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
861
790 result = wiphy_register(local->hw.wiphy); 862 result = wiphy_register(local->hw.wiphy);
791 if (result < 0) 863 if (result < 0)
792 return result; 864 goto fail_wiphy_register;
793 865
794 /* 866 /*
795 * We use the number of queues for feature tests (QoS, HT) internally 867 * We use the number of queues for feature tests (QoS, HT) internally
@@ -797,14 +869,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
797 */ 869 */
798 if (hw->queues > IEEE80211_MAX_QUEUES) 870 if (hw->queues > IEEE80211_MAX_QUEUES)
799 hw->queues = IEEE80211_MAX_QUEUES; 871 hw->queues = IEEE80211_MAX_QUEUES;
800 if (hw->ampdu_queues > IEEE80211_MAX_AMPDU_QUEUES)
801 hw->ampdu_queues = IEEE80211_MAX_AMPDU_QUEUES;
802 if (hw->queues < 4)
803 hw->ampdu_queues = 0;
804 872
805 mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv), 873 mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv),
806 "wmaster%d", ether_setup, 874 "wmaster%d", ieee80211_master_setup,
807 ieee80211_num_queues(hw)); 875 hw->queues);
808 if (!mdev) 876 if (!mdev)
809 goto fail_mdev_alloc; 877 goto fail_mdev_alloc;
810 878
@@ -812,17 +880,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
812 mpriv->local = local; 880 mpriv->local = local;
813 local->mdev = mdev; 881 local->mdev = mdev;
814 882
815 ieee80211_rx_bss_list_init(local);
816
817 mdev->hard_start_xmit = ieee80211_master_start_xmit;
818 mdev->open = ieee80211_master_open;
819 mdev->stop = ieee80211_master_stop;
820 mdev->type = ARPHRD_IEEE80211;
821 mdev->header_ops = &ieee80211_header_ops;
822 mdev->set_multicast_list = ieee80211_master_set_multicast_list;
823
824 local->hw.workqueue = 883 local->hw.workqueue =
825 create_freezeable_workqueue(wiphy_name(local->hw.wiphy)); 884 create_singlethread_workqueue(wiphy_name(local->hw.wiphy));
826 if (!local->hw.workqueue) { 885 if (!local->hw.workqueue) {
827 result = -ENOMEM; 886 result = -ENOMEM;
828 goto fail_workqueue; 887 goto fail_workqueue;
@@ -846,15 +905,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
846 905
847 local->hw.conf.listen_interval = local->hw.max_listen_interval; 906 local->hw.conf.listen_interval = local->hw.max_listen_interval;
848 907
849 local->wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC |
850 IEEE80211_HW_SIGNAL_DB |
851 IEEE80211_HW_SIGNAL_DBM) ?
852 IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID;
853 local->wstats_flags |= local->hw.flags & IEEE80211_HW_NOISE_DBM ?
854 IW_QUAL_NOISE_UPDATED : IW_QUAL_NOISE_INVALID;
855 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
856 local->wstats_flags |= IW_QUAL_DBM;
857
858 result = sta_info_start(local); 908 result = sta_info_start(local);
859 if (result < 0) 909 if (result < 0)
860 goto fail_sta_info; 910 goto fail_sta_info;
@@ -866,6 +916,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
866 916
867 memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); 917 memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
868 SET_NETDEV_DEV(local->mdev, wiphy_dev(local->hw.wiphy)); 918 SET_NETDEV_DEV(local->mdev, wiphy_dev(local->hw.wiphy));
919 local->mdev->features |= NETIF_F_NETNS_LOCAL;
869 920
870 result = register_netdevice(local->mdev); 921 result = register_netdevice(local->mdev);
871 if (result < 0) 922 if (result < 0)
@@ -887,8 +938,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
887 goto fail_wep; 938 goto fail_wep;
888 } 939 }
889 940
890 local->mdev->select_queue = ieee80211_select_queue;
891
892 /* add one default STA interface if supported */ 941 /* add one default STA interface if supported */
893 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION)) { 942 if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION)) {
894 result = ieee80211_if_add(local, "wlan%d", NULL, 943 result = ieee80211_if_add(local, "wlan%d", NULL,
@@ -902,6 +951,20 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
902 951
903 ieee80211_led_init(local); 952 ieee80211_led_init(local);
904 953
954 /* alloc internal scan request */
955 i = 0;
956 local->int_scan_req.ssids = &local->scan_ssid;
957 local->int_scan_req.n_ssids = 1;
958 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
959 if (!hw->wiphy->bands[band])
960 continue;
961 for (j = 0; j < hw->wiphy->bands[band]->n_channels; j++) {
962 local->int_scan_req.channels[i] =
963 &hw->wiphy->bands[band]->channels[j];
964 i++;
965 }
966 }
967
905 return 0; 968 return 0;
906 969
907fail_wep: 970fail_wep:
@@ -920,6 +983,8 @@ fail_workqueue:
920 free_netdev(local->mdev); 983 free_netdev(local->mdev);
921fail_mdev_alloc: 984fail_mdev_alloc:
922 wiphy_unregister(local->hw.wiphy); 985 wiphy_unregister(local->hw.wiphy);
986fail_wiphy_register:
987 kfree(local->int_scan_req.channels);
923 return result; 988 return result;
924} 989}
925EXPORT_SYMBOL(ieee80211_register_hw); 990EXPORT_SYMBOL(ieee80211_register_hw);
@@ -947,7 +1012,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
947 1012
948 rtnl_unlock(); 1013 rtnl_unlock();
949 1014
950 ieee80211_rx_bss_list_deinit(local);
951 ieee80211_clear_tx_pending(local); 1015 ieee80211_clear_tx_pending(local);
952 sta_info_stop(local); 1016 sta_info_stop(local);
953 rate_control_deinitialize(local); 1017 rate_control_deinitialize(local);
@@ -965,6 +1029,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
965 ieee80211_wep_free(local); 1029 ieee80211_wep_free(local);
966 ieee80211_led_exit(local); 1030 ieee80211_led_exit(local);
967 free_netdev(local->mdev); 1031 free_netdev(local->mdev);
1032 kfree(local->int_scan_req.channels);
968} 1033}
969EXPORT_SYMBOL(ieee80211_unregister_hw); 1034EXPORT_SYMBOL(ieee80211_unregister_hw);
970 1035
@@ -972,6 +1037,8 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
972{ 1037{
973 struct ieee80211_local *local = hw_to_local(hw); 1038 struct ieee80211_local *local = hw_to_local(hw);
974 1039
1040 mutex_destroy(&local->iflist_mtx);
1041
975 wiphy_free(local->hw.wiphy); 1042 wiphy_free(local->hw.wiphy);
976} 1043}
977EXPORT_SYMBOL(ieee80211_free_hw); 1044EXPORT_SYMBOL(ieee80211_free_hw);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 82f568e94365..9a3e5de0410a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -275,16 +275,6 @@ u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_t
275 & tbl->hash_mask; 275 & tbl->hash_mask;
276} 276}
277 277
278u8 mesh_id_hash(u8 *mesh_id, int mesh_id_len)
279{
280 if (!mesh_id_len)
281 return 1;
282 else if (mesh_id_len == 1)
283 return (u8) mesh_id[0];
284 else
285 return (u8) (mesh_id[0] + 2 * mesh_id[1]);
286}
287
288struct mesh_table *mesh_table_alloc(int size_order) 278struct mesh_table *mesh_table_alloc(int size_order)
289{ 279{
290 int i; 280 int i;
@@ -442,7 +432,8 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
442 432
443 ifmsh->housekeeping = true; 433 ifmsh->housekeeping = true;
444 queue_work(local->hw.workqueue, &ifmsh->work); 434 queue_work(local->hw.workqueue, &ifmsh->work);
445 ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); 435 ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON |
436 IEEE80211_IFCC_BEACON_ENABLED);
446} 437}
447 438
448void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) 439void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
@@ -476,7 +467,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
476 struct ieee80211_local *local = sdata->local; 467 struct ieee80211_local *local = sdata->local;
477 struct ieee802_11_elems elems; 468 struct ieee802_11_elems elems;
478 struct ieee80211_channel *channel; 469 struct ieee80211_channel *channel;
479 u64 supp_rates = 0; 470 u32 supp_rates = 0;
480 size_t baselen; 471 size_t baselen;
481 int freq; 472 int freq;
482 enum ieee80211_band band = rx_status->band; 473 enum ieee80211_band band = rx_status->band;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index c197ab545e54..d891d7ddccd7 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -24,15 +24,15 @@
24 * 24 *
25 * 25 *
26 * 26 *
27 * @MESH_PATH_ACTIVE: the mesh path is can be used for forwarding 27 * @MESH_PATH_ACTIVE: the mesh path can be used for forwarding
28 * @MESH_PATH_RESOLVED: the discovery process is running for this mesh path 28 * @MESH_PATH_RESOLVING: the discovery process is running for this mesh path
29 * @MESH_PATH_DSN_VALID: the mesh path contains a valid destination sequence 29 * @MESH_PATH_DSN_VALID: the mesh path contains a valid destination sequence
30 * number 30 * number
31 * @MESH_PATH_FIXED: the mesh path has been manually set and should not be 31 * @MESH_PATH_FIXED: the mesh path has been manually set and should not be
32 * modified 32 * modified
33 * @MESH_PATH_RESOLVED: the mesh path can has been resolved 33 * @MESH_PATH_RESOLVED: the mesh path can has been resolved
34 * 34 *
35 * MESH_PATH_RESOLVED and MESH_PATH_DELETE are used by the mesh path timer to 35 * MESH_PATH_RESOLVED is used by the mesh path timer to
36 * decide when to stop or cancel the mesh path discovery. 36 * decide when to stop or cancel the mesh path discovery.
37 */ 37 */
38enum mesh_path_flags { 38enum mesh_path_flags {
@@ -196,7 +196,6 @@ struct mesh_rmc {
196 196
197/* Public interfaces */ 197/* Public interfaces */
198/* Various */ 198/* Various */
199u8 mesh_id_hash(u8 *mesh_id, int mesh_id_len);
200int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); 199int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr);
201int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, 200int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr,
202 struct ieee80211_sub_if_data *sdata); 201 struct ieee80211_sub_if_data *sdata);
@@ -236,14 +235,13 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
236 struct ieee80211_mgmt *mgmt, size_t len); 235 struct ieee80211_mgmt *mgmt, size_t len);
237int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata); 236int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata);
238/* Mesh plinks */ 237/* Mesh plinks */
239void mesh_neighbour_update(u8 *hw_addr, u64 rates, 238void mesh_neighbour_update(u8 *hw_addr, u32 rates,
240 struct ieee80211_sub_if_data *sdata, bool add); 239 struct ieee80211_sub_if_data *sdata, bool add);
241bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); 240bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
242void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); 241void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
243void mesh_plink_broken(struct sta_info *sta); 242void mesh_plink_broken(struct sta_info *sta);
244void mesh_plink_deactivate(struct sta_info *sta); 243void mesh_plink_deactivate(struct sta_info *sta);
245int mesh_plink_open(struct sta_info *sta); 244int mesh_plink_open(struct sta_info *sta);
246int mesh_plink_close(struct sta_info *sta);
247void mesh_plink_block(struct sta_info *sta); 245void mesh_plink_block(struct sta_info *sta);
248void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, 246void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
249 struct ieee80211_mgmt *mgmt, size_t len, 247 struct ieee80211_mgmt *mgmt, size_t len,
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 71fe60961230..60b35accda91 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -58,7 +58,6 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae)
58#define PERR_IE_DST_ADDR(x) (x + 2) 58#define PERR_IE_DST_ADDR(x) (x + 2)
59#define PERR_IE_DST_DSN(x) u32_field_get(x, 8, 0); 59#define PERR_IE_DST_DSN(x) u32_field_get(x, 8, 0);
60 60
61#define TU_TO_EXP_TIME(x) (jiffies + msecs_to_jiffies(x * 1024 / 1000))
62#define MSEC_TO_TU(x) (x*1000/1024) 61#define MSEC_TO_TU(x) (x*1000/1024)
63#define DSN_GT(x, y) ((long) (y) - (long) (x) < 0) 62#define DSN_GT(x, y) ((long) (y) - (long) (x) < 0)
64#define DSN_LT(x, y) ((long) (x) - (long) (y) < 0) 63#define DSN_LT(x, y) ((long) (x) - (long) (y) < 0)
@@ -149,7 +148,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
149 pos += ETH_ALEN; 148 pos += ETH_ALEN;
150 memcpy(pos, &dst_dsn, 4); 149 memcpy(pos, &dst_dsn, 4);
151 150
152 ieee80211_tx_skb(sdata, skb, 0); 151 ieee80211_tx_skb(sdata, skb, 1);
153 return 0; 152 return 0;
154} 153}
155 154
@@ -198,7 +197,7 @@ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra,
198 pos += ETH_ALEN; 197 pos += ETH_ALEN;
199 memcpy(pos, &dst_dsn, 4); 198 memcpy(pos, &dst_dsn, 4);
200 199
201 ieee80211_tx_skb(sdata, skb, 0); 200 ieee80211_tx_skb(sdata, skb, 1);
202 return 0; 201 return 0;
203} 202}
204 203
@@ -759,7 +758,7 @@ enddiscovery:
759} 758}
760 759
761/** 760/**
762 * ieee80211s_lookup_nexthop - put the appropriate next hop on a mesh frame 761 * mesh_nexthop_lookup - put the appropriate next hop on a mesh frame
763 * 762 *
764 * @skb: 802.11 frame to be sent 763 * @skb: 802.11 frame to be sent
765 * @sdata: network subif the frame will be sent through 764 * @sdata: network subif the frame will be sent through
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 1159bdb4119c..a8bbdeca013a 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -93,7 +93,7 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta)
93 * on it in the lifecycle management section! 93 * on it in the lifecycle management section!
94 */ 94 */
95static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, 95static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata,
96 u8 *hw_addr, u64 rates) 96 u8 *hw_addr, u32 rates)
97{ 97{
98 struct ieee80211_local *local = sdata->local; 98 struct ieee80211_local *local = sdata->local;
99 struct sta_info *sta; 99 struct sta_info *sta;
@@ -218,11 +218,11 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
218 memcpy(pos, &reason, 2); 218 memcpy(pos, &reason, 2);
219 } 219 }
220 220
221 ieee80211_tx_skb(sdata, skb, 0); 221 ieee80211_tx_skb(sdata, skb, 1);
222 return 0; 222 return 0;
223} 223}
224 224
225void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct ieee80211_sub_if_data *sdata, 225void mesh_neighbour_update(u8 *hw_addr, u32 rates, struct ieee80211_sub_if_data *sdata,
226 bool peer_accepting_plinks) 226 bool peer_accepting_plinks)
227{ 227{
228 struct ieee80211_local *local = sdata->local; 228 struct ieee80211_local *local = sdata->local;
@@ -361,36 +361,6 @@ void mesh_plink_block(struct sta_info *sta)
361 spin_unlock_bh(&sta->lock); 361 spin_unlock_bh(&sta->lock);
362} 362}
363 363
364int mesh_plink_close(struct sta_info *sta)
365{
366 struct ieee80211_sub_if_data *sdata = sta->sdata;
367 __le16 llid, plid, reason;
368
369 mpl_dbg("Mesh plink: closing link with %pM\n", sta->sta.addr);
370 spin_lock_bh(&sta->lock);
371 sta->reason = cpu_to_le16(MESH_LINK_CANCELLED);
372 reason = sta->reason;
373
374 if (sta->plink_state == PLINK_LISTEN ||
375 sta->plink_state == PLINK_BLOCKED) {
376 mesh_plink_fsm_restart(sta);
377 spin_unlock_bh(&sta->lock);
378 return 0;
379 } else if (sta->plink_state == PLINK_ESTAB) {
380 __mesh_plink_deactivate(sta);
381 /* The timer should not be running */
382 mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata));
383 } else if (!mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)))
384 sta->ignore_plink_timer = true;
385
386 sta->plink_state = PLINK_HOLDING;
387 llid = sta->llid;
388 plid = sta->plid;
389 spin_unlock_bh(&sta->lock);
390 mesh_plink_frame_tx(sta->sdata, PLINK_CLOSE, sta->sta.addr, llid,
391 plid, reason);
392 return 0;
393}
394 364
395void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, 365void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt,
396 size_t len, struct ieee80211_rx_status *rx_status) 366 size_t len, struct ieee80211_rx_status *rx_status)
@@ -477,7 +447,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
477 spin_lock_bh(&sta->lock); 447 spin_lock_bh(&sta->lock);
478 } else if (!sta) { 448 } else if (!sta) {
479 /* ftype == PLINK_OPEN */ 449 /* ftype == PLINK_OPEN */
480 u64 rates; 450 u32 rates;
481 if (!mesh_plink_free_count(sdata)) { 451 if (!mesh_plink_free_count(sdata)) {
482 mpl_dbg("Mesh plink error: no more free plinks\n"); 452 mpl_dbg("Mesh plink error: no more free plinks\n");
483 rcu_read_unlock(); 453 rcu_read_unlock();
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 2b890af01ba4..7ecda9d59d8a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1,6 +1,6 @@
1/* 1/*
2 * BSS client mode implementation 2 * BSS client mode implementation
3 * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> 3 * Copyright 2003-2008, Jouni Malinen <j@w1.fi>
4 * Copyright 2004, Instant802 Networks, Inc. 4 * Copyright 2004, Instant802 Networks, Inc.
5 * Copyright 2005, Devicescape Software, Inc. 5 * Copyright 2005, Devicescape Software, Inc.
6 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> 6 * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
@@ -15,11 +15,8 @@
15#include <linux/if_ether.h> 15#include <linux/if_ether.h>
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17#include <linux/if_arp.h> 17#include <linux/if_arp.h>
18#include <linux/wireless.h>
19#include <linux/random.h>
20#include <linux/etherdevice.h> 18#include <linux/etherdevice.h>
21#include <linux/rtnetlink.h> 19#include <linux/rtnetlink.h>
22#include <net/iw_handler.h>
23#include <net/mac80211.h> 20#include <net/mac80211.h>
24#include <asm/unaligned.h> 21#include <asm/unaligned.h>
25 22
@@ -33,17 +30,8 @@
33#define IEEE80211_ASSOC_TIMEOUT (HZ / 5) 30#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
34#define IEEE80211_ASSOC_MAX_TRIES 3 31#define IEEE80211_ASSOC_MAX_TRIES 3
35#define IEEE80211_MONITORING_INTERVAL (2 * HZ) 32#define IEEE80211_MONITORING_INTERVAL (2 * HZ)
36#define IEEE80211_PROBE_INTERVAL (60 * HZ) 33#define IEEE80211_PROBE_IDLE_TIME (60 * HZ)
37#define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ) 34#define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ)
38#define IEEE80211_SCAN_INTERVAL (2 * HZ)
39#define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ)
40#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ)
41
42#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ)
43#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ)
44
45#define IEEE80211_IBSS_MAX_STA_ENTRIES 128
46
47 35
48/* utils */ 36/* utils */
49static int ecw2cw(int ecw) 37static int ecw2cw(int ecw)
@@ -55,10 +43,10 @@ static u8 *ieee80211_bss_get_ie(struct ieee80211_bss *bss, u8 ie)
55{ 43{
56 u8 *end, *pos; 44 u8 *end, *pos;
57 45
58 pos = bss->ies; 46 pos = bss->cbss.information_elements;
59 if (pos == NULL) 47 if (pos == NULL)
60 return NULL; 48 return NULL;
61 end = pos + bss->ies_len; 49 end = pos + bss->cbss.len_information_elements;
62 50
63 while (pos + 1 < end) { 51 while (pos + 1 < end) {
64 if (pos + 2 + pos[1] > end) 52 if (pos + 2 + pos[1] > end)
@@ -73,7 +61,7 @@ static u8 *ieee80211_bss_get_ie(struct ieee80211_bss *bss, u8 ie)
73 61
74static int ieee80211_compatible_rates(struct ieee80211_bss *bss, 62static int ieee80211_compatible_rates(struct ieee80211_bss *bss,
75 struct ieee80211_supported_band *sband, 63 struct ieee80211_supported_band *sband,
76 u64 *rates) 64 u32 *rates)
77{ 65{
78 int i, j, count; 66 int i, j, count;
79 *rates = 0; 67 *rates = 0;
@@ -92,146 +80,11 @@ static int ieee80211_compatible_rates(struct ieee80211_bss *bss,
92 return count; 80 return count;
93} 81}
94 82
95/* also used by mesh code */
96u64 ieee80211_sta_get_rates(struct ieee80211_local *local,
97 struct ieee802_11_elems *elems,
98 enum ieee80211_band band)
99{
100 struct ieee80211_supported_band *sband;
101 struct ieee80211_rate *bitrates;
102 size_t num_rates;
103 u64 supp_rates;
104 int i, j;
105 sband = local->hw.wiphy->bands[band];
106
107 if (!sband) {
108 WARN_ON(1);
109 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
110 }
111
112 bitrates = sband->bitrates;
113 num_rates = sband->n_bitrates;
114 supp_rates = 0;
115 for (i = 0; i < elems->supp_rates_len +
116 elems->ext_supp_rates_len; i++) {
117 u8 rate = 0;
118 int own_rate;
119 if (i < elems->supp_rates_len)
120 rate = elems->supp_rates[i];
121 else if (elems->ext_supp_rates)
122 rate = elems->ext_supp_rates
123 [i - elems->supp_rates_len];
124 own_rate = 5 * (rate & 0x7f);
125 for (j = 0; j < num_rates; j++)
126 if (bitrates[j].bitrate == own_rate)
127 supp_rates |= BIT(j);
128 }
129 return supp_rates;
130}
131
132/* frame sending functions */ 83/* frame sending functions */
133 84
134/* also used by scanning code */ 85static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
135void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
136 u8 *ssid, size_t ssid_len)
137{
138 struct ieee80211_local *local = sdata->local;
139 struct ieee80211_supported_band *sband;
140 struct sk_buff *skb;
141 struct ieee80211_mgmt *mgmt;
142 u8 *pos, *supp_rates, *esupp_rates = NULL;
143 int i;
144
145 skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200);
146 if (!skb) {
147 printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
148 "request\n", sdata->dev->name);
149 return;
150 }
151 skb_reserve(skb, local->hw.extra_tx_headroom);
152
153 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
154 memset(mgmt, 0, 24);
155 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
156 IEEE80211_STYPE_PROBE_REQ);
157 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
158 if (dst) {
159 memcpy(mgmt->da, dst, ETH_ALEN);
160 memcpy(mgmt->bssid, dst, ETH_ALEN);
161 } else {
162 memset(mgmt->da, 0xff, ETH_ALEN);
163 memset(mgmt->bssid, 0xff, ETH_ALEN);
164 }
165 pos = skb_put(skb, 2 + ssid_len);
166 *pos++ = WLAN_EID_SSID;
167 *pos++ = ssid_len;
168 memcpy(pos, ssid, ssid_len);
169
170 supp_rates = skb_put(skb, 2);
171 supp_rates[0] = WLAN_EID_SUPP_RATES;
172 supp_rates[1] = 0;
173 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
174
175 for (i = 0; i < sband->n_bitrates; i++) {
176 struct ieee80211_rate *rate = &sband->bitrates[i];
177 if (esupp_rates) {
178 pos = skb_put(skb, 1);
179 esupp_rates[1]++;
180 } else if (supp_rates[1] == 8) {
181 esupp_rates = skb_put(skb, 3);
182 esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES;
183 esupp_rates[1] = 1;
184 pos = &esupp_rates[2];
185 } else {
186 pos = skb_put(skb, 1);
187 supp_rates[1]++;
188 }
189 *pos = rate->bitrate / 5;
190 }
191
192 ieee80211_tx_skb(sdata, skb, 0);
193}
194
195static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
196 struct ieee80211_if_sta *ifsta,
197 int transaction, u8 *extra, size_t extra_len,
198 int encrypt)
199{
200 struct ieee80211_local *local = sdata->local;
201 struct sk_buff *skb;
202 struct ieee80211_mgmt *mgmt;
203
204 skb = dev_alloc_skb(local->hw.extra_tx_headroom +
205 sizeof(*mgmt) + 6 + extra_len);
206 if (!skb) {
207 printk(KERN_DEBUG "%s: failed to allocate buffer for auth "
208 "frame\n", sdata->dev->name);
209 return;
210 }
211 skb_reserve(skb, local->hw.extra_tx_headroom);
212
213 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6);
214 memset(mgmt, 0, 24 + 6);
215 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
216 IEEE80211_STYPE_AUTH);
217 if (encrypt)
218 mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
219 memcpy(mgmt->da, ifsta->bssid, ETH_ALEN);
220 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
221 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
222 mgmt->u.auth.auth_alg = cpu_to_le16(ifsta->auth_alg);
223 mgmt->u.auth.auth_transaction = cpu_to_le16(transaction);
224 ifsta->auth_transaction = transaction + 1;
225 mgmt->u.auth.status_code = cpu_to_le16(0);
226 if (extra)
227 memcpy(skb_put(skb, extra_len), extra, extra_len);
228
229 ieee80211_tx_skb(sdata, skb, encrypt);
230}
231
232static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
233 struct ieee80211_if_sta *ifsta)
234{ 86{
87 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
235 struct ieee80211_local *local = sdata->local; 88 struct ieee80211_local *local = sdata->local;
236 struct sk_buff *skb; 89 struct sk_buff *skb;
237 struct ieee80211_mgmt *mgmt; 90 struct ieee80211_mgmt *mgmt;
@@ -241,11 +94,11 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
241 struct ieee80211_bss *bss; 94 struct ieee80211_bss *bss;
242 int wmm = 0; 95 int wmm = 0;
243 struct ieee80211_supported_band *sband; 96 struct ieee80211_supported_band *sband;
244 u64 rates = 0; 97 u32 rates = 0;
245 98
246 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 99 skb = dev_alloc_skb(local->hw.extra_tx_headroom +
247 sizeof(*mgmt) + 200 + ifsta->extra_ie_len + 100 sizeof(*mgmt) + 200 + ifmgd->extra_ie_len +
248 ifsta->ssid_len); 101 ifmgd->ssid_len);
249 if (!skb) { 102 if (!skb) {
250 printk(KERN_DEBUG "%s: failed to allocate buffer for assoc " 103 printk(KERN_DEBUG "%s: failed to allocate buffer for assoc "
251 "frame\n", sdata->dev->name); 104 "frame\n", sdata->dev->name);
@@ -255,7 +108,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
255 108
256 sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; 109 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
257 110
258 capab = ifsta->capab; 111 capab = ifmgd->capab;
259 112
260 if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) { 113 if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) {
261 if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) 114 if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE))
@@ -264,11 +117,11 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
264 capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; 117 capab |= WLAN_CAPABILITY_SHORT_PREAMBLE;
265 } 118 }
266 119
267 bss = ieee80211_rx_bss_get(local, ifsta->bssid, 120 bss = ieee80211_rx_bss_get(local, ifmgd->bssid,
268 local->hw.conf.channel->center_freq, 121 local->hw.conf.channel->center_freq,
269 ifsta->ssid, ifsta->ssid_len); 122 ifmgd->ssid, ifmgd->ssid_len);
270 if (bss) { 123 if (bss) {
271 if (bss->capability & WLAN_CAPABILITY_PRIVACY) 124 if (bss->cbss.capability & WLAN_CAPABILITY_PRIVACY)
272 capab |= WLAN_CAPABILITY_PRIVACY; 125 capab |= WLAN_CAPABILITY_PRIVACY;
273 if (bss->wmm_used) 126 if (bss->wmm_used)
274 wmm = 1; 127 wmm = 1;
@@ -279,7 +132,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
279 * b-only mode) */ 132 * b-only mode) */
280 rates_len = ieee80211_compatible_rates(bss, sband, &rates); 133 rates_len = ieee80211_compatible_rates(bss, sband, &rates);
281 134
282 if ((bss->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) && 135 if ((bss->cbss.capability & WLAN_CAPABILITY_SPECTRUM_MGMT) &&
283 (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT)) 136 (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT))
284 capab |= WLAN_CAPABILITY_SPECTRUM_MGMT; 137 capab |= WLAN_CAPABILITY_SPECTRUM_MGMT;
285 138
@@ -291,18 +144,18 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
291 144
292 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); 145 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
293 memset(mgmt, 0, 24); 146 memset(mgmt, 0, 24);
294 memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); 147 memcpy(mgmt->da, ifmgd->bssid, ETH_ALEN);
295 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); 148 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
296 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); 149 memcpy(mgmt->bssid, ifmgd->bssid, ETH_ALEN);
297 150
298 if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { 151 if (ifmgd->flags & IEEE80211_STA_PREV_BSSID_SET) {
299 skb_put(skb, 10); 152 skb_put(skb, 10);
300 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | 153 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
301 IEEE80211_STYPE_REASSOC_REQ); 154 IEEE80211_STYPE_REASSOC_REQ);
302 mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab); 155 mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab);
303 mgmt->u.reassoc_req.listen_interval = 156 mgmt->u.reassoc_req.listen_interval =
304 cpu_to_le16(local->hw.conf.listen_interval); 157 cpu_to_le16(local->hw.conf.listen_interval);
305 memcpy(mgmt->u.reassoc_req.current_ap, ifsta->prev_bssid, 158 memcpy(mgmt->u.reassoc_req.current_ap, ifmgd->prev_bssid,
306 ETH_ALEN); 159 ETH_ALEN);
307 } else { 160 } else {
308 skb_put(skb, 4); 161 skb_put(skb, 4);
@@ -314,10 +167,10 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
314 } 167 }
315 168
316 /* SSID */ 169 /* SSID */
317 ies = pos = skb_put(skb, 2 + ifsta->ssid_len); 170 ies = pos = skb_put(skb, 2 + ifmgd->ssid_len);
318 *pos++ = WLAN_EID_SSID; 171 *pos++ = WLAN_EID_SSID;
319 *pos++ = ifsta->ssid_len; 172 *pos++ = ifmgd->ssid_len;
320 memcpy(pos, ifsta->ssid, ifsta->ssid_len); 173 memcpy(pos, ifmgd->ssid, ifmgd->ssid_len);
321 174
322 /* add all rates which were marked to be used above */ 175 /* add all rates which were marked to be used above */
323 supp_rates_len = rates_len; 176 supp_rates_len = rates_len;
@@ -372,12 +225,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
372 } 225 }
373 } 226 }
374 227
375 if (ifsta->extra_ie) { 228 if (ifmgd->extra_ie) {
376 pos = skb_put(skb, ifsta->extra_ie_len); 229 pos = skb_put(skb, ifmgd->extra_ie_len);
377 memcpy(pos, ifsta->extra_ie, ifsta->extra_ie_len); 230 memcpy(pos, ifmgd->extra_ie, ifmgd->extra_ie_len);
378 } 231 }
379 232
380 if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { 233 if (wmm && (ifmgd->flags & IEEE80211_STA_WMM_ENABLED)) {
381 pos = skb_put(skb, 9); 234 pos = skb_put(skb, 9);
382 *pos++ = WLAN_EID_VENDOR_SPECIFIC; 235 *pos++ = WLAN_EID_VENDOR_SPECIFIC;
383 *pos++ = 7; /* len */ 236 *pos++ = 7; /* len */
@@ -391,10 +244,17 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
391 } 244 }
392 245
393 /* wmm support is a must to HT */ 246 /* wmm support is a must to HT */
394 if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) && 247 /*
248 * IEEE802.11n does not allow TKIP/WEP as pairwise
249 * ciphers in HT mode. We still associate in non-ht
250 * mode (11a/b/g) if any one of these ciphers is
251 * configured as pairwise.
252 */
253 if (wmm && (ifmgd->flags & IEEE80211_STA_WMM_ENABLED) &&
395 sband->ht_cap.ht_supported && 254 sband->ht_cap.ht_supported &&
396 (ht_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_INFORMATION)) && 255 (ht_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_INFORMATION)) &&
397 ht_ie[1] >= sizeof(struct ieee80211_ht_info)) { 256 ht_ie[1] >= sizeof(struct ieee80211_ht_info) &&
257 (!(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED))) {
398 struct ieee80211_ht_info *ht_info = 258 struct ieee80211_ht_info *ht_info =
399 (struct ieee80211_ht_info *)(ht_ie + 2); 259 (struct ieee80211_ht_info *)(ht_ie + 2);
400 u16 cap = sband->ht_cap.cap; 260 u16 cap = sband->ht_cap.cap;
@@ -429,11 +289,11 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
429 memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs)); 289 memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs));
430 } 290 }
431 291
432 kfree(ifsta->assocreq_ies); 292 kfree(ifmgd->assocreq_ies);
433 ifsta->assocreq_ies_len = (skb->data + skb->len) - ies; 293 ifmgd->assocreq_ies_len = (skb->data + skb->len) - ies;
434 ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_KERNEL); 294 ifmgd->assocreq_ies = kmalloc(ifmgd->assocreq_ies_len, GFP_KERNEL);
435 if (ifsta->assocreq_ies) 295 if (ifmgd->assocreq_ies)
436 memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len); 296 memcpy(ifmgd->assocreq_ies, ies, ifmgd->assocreq_ies_len);
437 297
438 ieee80211_tx_skb(sdata, skb, 0); 298 ieee80211_tx_skb(sdata, skb, 0);
439} 299}
@@ -443,7 +303,7 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
443 u16 stype, u16 reason) 303 u16 stype, u16 reason)
444{ 304{
445 struct ieee80211_local *local = sdata->local; 305 struct ieee80211_local *local = sdata->local;
446 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 306 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
447 struct sk_buff *skb; 307 struct sk_buff *skb;
448 struct ieee80211_mgmt *mgmt; 308 struct ieee80211_mgmt *mgmt;
449 309
@@ -457,40 +317,51 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
457 317
458 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); 318 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
459 memset(mgmt, 0, 24); 319 memset(mgmt, 0, 24);
460 memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); 320 memcpy(mgmt->da, ifmgd->bssid, ETH_ALEN);
461 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); 321 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
462 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); 322 memcpy(mgmt->bssid, ifmgd->bssid, ETH_ALEN);
463 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype); 323 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype);
464 skb_put(skb, 2); 324 skb_put(skb, 2);
465 /* u.deauth.reason_code == u.disassoc.reason_code */ 325 /* u.deauth.reason_code == u.disassoc.reason_code */
466 mgmt->u.deauth.reason_code = cpu_to_le16(reason); 326 mgmt->u.deauth.reason_code = cpu_to_le16(reason);
467 327
468 ieee80211_tx_skb(sdata, skb, 0); 328 ieee80211_tx_skb(sdata, skb, ifmgd->flags & IEEE80211_STA_MFP_ENABLED);
469} 329}
470 330
471/* MLME */ 331void ieee80211_send_pspoll(struct ieee80211_local *local,
472static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, 332 struct ieee80211_sub_if_data *sdata)
473 struct ieee80211_bss *bss)
474{ 333{
475 struct ieee80211_local *local = sdata->local; 334 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
476 int i, have_higher_than_11mbit = 0; 335 struct ieee80211_pspoll *pspoll;
336 struct sk_buff *skb;
337 u16 fc;
477 338
478 /* cf. IEEE 802.11 9.2.12 */ 339 skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*pspoll));
479 for (i = 0; i < bss->supp_rates_len; i++) 340 if (!skb) {
480 if ((bss->supp_rates[i] & 0x7f) * 5 > 110) 341 printk(KERN_DEBUG "%s: failed to allocate buffer for "
481 have_higher_than_11mbit = 1; 342 "pspoll frame\n", sdata->dev->name);
343 return;
344 }
345 skb_reserve(skb, local->hw.extra_tx_headroom);
482 346
483 if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && 347 pspoll = (struct ieee80211_pspoll *) skb_put(skb, sizeof(*pspoll));
484 have_higher_than_11mbit) 348 memset(pspoll, 0, sizeof(*pspoll));
485 sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; 349 fc = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL | IEEE80211_FCTL_PM;
486 else 350 pspoll->frame_control = cpu_to_le16(fc);
487 sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; 351 pspoll->aid = cpu_to_le16(ifmgd->aid);
352
353 /* aid in PS-Poll has its two MSBs each set to 1 */
354 pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14);
488 355
489 ieee80211_set_wmm_default(sdata); 356 memcpy(pspoll->bssid, ifmgd->bssid, ETH_ALEN);
357 memcpy(pspoll->ta, sdata->dev->dev_addr, ETH_ALEN);
358
359 ieee80211_tx_skb(sdata, skb, 0);
490} 360}
491 361
362/* MLME */
492static void ieee80211_sta_wmm_params(struct ieee80211_local *local, 363static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
493 struct ieee80211_if_sta *ifsta, 364 struct ieee80211_if_managed *ifmgd,
494 u8 *wmm_param, size_t wmm_param_len) 365 u8 *wmm_param, size_t wmm_param_len)
495{ 366{
496 struct ieee80211_tx_queue_params params; 367 struct ieee80211_tx_queue_params params;
@@ -498,7 +369,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
498 int count; 369 int count;
499 u8 *pos; 370 u8 *pos;
500 371
501 if (!(ifsta->flags & IEEE80211_STA_WMM_ENABLED)) 372 if (!(ifmgd->flags & IEEE80211_STA_WMM_ENABLED))
502 return; 373 return;
503 374
504 if (!wmm_param) 375 if (!wmm_param)
@@ -507,18 +378,15 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
507 if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1) 378 if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1)
508 return; 379 return;
509 count = wmm_param[6] & 0x0f; 380 count = wmm_param[6] & 0x0f;
510 if (count == ifsta->wmm_last_param_set) 381 if (count == ifmgd->wmm_last_param_set)
511 return; 382 return;
512 ifsta->wmm_last_param_set = count; 383 ifmgd->wmm_last_param_set = count;
513 384
514 pos = wmm_param + 8; 385 pos = wmm_param + 8;
515 left = wmm_param_len - 8; 386 left = wmm_param_len - 8;
516 387
517 memset(&params, 0, sizeof(params)); 388 memset(&params, 0, sizeof(params));
518 389
519 if (!local->ops->conf_tx)
520 return;
521
522 local->wmm_acm = 0; 390 local->wmm_acm = 0;
523 for (; left >= 4; left -= 4, pos += 4) { 391 for (; left >= 4; left -= 4, pos += 4) {
524 int aci = (pos[0] >> 5) & 0x03; 392 int aci = (pos[0] >> 5) & 0x03;
@@ -526,26 +394,26 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
526 int queue; 394 int queue;
527 395
528 switch (aci) { 396 switch (aci) {
529 case 1: 397 case 1: /* AC_BK */
530 queue = 3; 398 queue = 3;
531 if (acm) 399 if (acm)
532 local->wmm_acm |= BIT(0) | BIT(3); 400 local->wmm_acm |= BIT(1) | BIT(2); /* BK/- */
533 break; 401 break;
534 case 2: 402 case 2: /* AC_VI */
535 queue = 1; 403 queue = 1;
536 if (acm) 404 if (acm)
537 local->wmm_acm |= BIT(4) | BIT(5); 405 local->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */
538 break; 406 break;
539 case 3: 407 case 3: /* AC_VO */
540 queue = 0; 408 queue = 0;
541 if (acm) 409 if (acm)
542 local->wmm_acm |= BIT(6) | BIT(7); 410 local->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */
543 break; 411 break;
544 case 0: 412 case 0: /* AC_BE */
545 default: 413 default:
546 queue = 2; 414 queue = 2;
547 if (acm) 415 if (acm)
548 local->wmm_acm |= BIT(1) | BIT(2); 416 local->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */
549 break; 417 break;
550 } 418 }
551 419
@@ -559,21 +427,41 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
559 local->mdev->name, queue, aci, acm, params.aifs, params.cw_min, 427 local->mdev->name, queue, aci, acm, params.aifs, params.cw_min,
560 params.cw_max, params.txop); 428 params.cw_max, params.txop);
561#endif 429#endif
562 /* TODO: handle ACM (block TX, fallback to next lowest allowed 430 if (local->ops->conf_tx &&
563 * AC for now) */ 431 local->ops->conf_tx(local_to_hw(local), queue, &params)) {
564 if (local->ops->conf_tx(local_to_hw(local), queue, &params)) {
565 printk(KERN_DEBUG "%s: failed to set TX queue " 432 printk(KERN_DEBUG "%s: failed to set TX queue "
566 "parameters for queue %d\n", local->mdev->name, queue); 433 "parameters for queue %d\n", local->mdev->name, queue);
567 } 434 }
568 } 435 }
569} 436}
570 437
438static bool ieee80211_check_tim(struct ieee802_11_elems *elems, u16 aid)
439{
440 u8 mask;
441 u8 index, indexn1, indexn2;
442 struct ieee80211_tim_ie *tim = (struct ieee80211_tim_ie *) elems->tim;
443
444 aid &= 0x3fff;
445 index = aid / 8;
446 mask = 1 << (aid & 7);
447
448 indexn1 = tim->bitmap_ctrl & 0xfe;
449 indexn2 = elems->tim_len + indexn1 - 4;
450
451 if (index < indexn1 || index > indexn2)
452 return false;
453
454 index -= indexn1;
455
456 return !!(tim->virtual_map[index] & mask);
457}
458
571static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, 459static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
572 u16 capab, bool erp_valid, u8 erp) 460 u16 capab, bool erp_valid, u8 erp)
573{ 461{
574 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; 462 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
575#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 463#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
576 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 464 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
577#endif 465#endif
578 u32 changed = 0; 466 u32 changed = 0;
579 bool use_protection; 467 bool use_protection;
@@ -596,7 +484,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
596 printk(KERN_DEBUG "%s: CTS protection %s (BSSID=%pM)\n", 484 printk(KERN_DEBUG "%s: CTS protection %s (BSSID=%pM)\n",
597 sdata->dev->name, 485 sdata->dev->name,
598 use_protection ? "enabled" : "disabled", 486 use_protection ? "enabled" : "disabled",
599 ifsta->bssid); 487 ifmgd->bssid);
600 } 488 }
601#endif 489#endif
602 bss_conf->use_cts_prot = use_protection; 490 bss_conf->use_cts_prot = use_protection;
@@ -610,7 +498,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
610 " (BSSID=%pM)\n", 498 " (BSSID=%pM)\n",
611 sdata->dev->name, 499 sdata->dev->name,
612 use_short_preamble ? "short" : "long", 500 use_short_preamble ? "short" : "long",
613 ifsta->bssid); 501 ifmgd->bssid);
614 } 502 }
615#endif 503#endif
616 bss_conf->use_short_preamble = use_short_preamble; 504 bss_conf->use_short_preamble = use_short_preamble;
@@ -624,7 +512,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
624 " (BSSID=%pM)\n", 512 " (BSSID=%pM)\n",
625 sdata->dev->name, 513 sdata->dev->name,
626 use_short_slot ? "short" : "long", 514 use_short_slot ? "short" : "long",
627 ifsta->bssid); 515 ifmgd->bssid);
628 } 516 }
629#endif 517#endif
630 bss_conf->use_short_slot = use_short_slot; 518 bss_conf->use_short_slot = use_short_slot;
@@ -634,57 +522,57 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
634 return changed; 522 return changed;
635} 523}
636 524
637static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata, 525static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata)
638 struct ieee80211_if_sta *ifsta)
639{ 526{
640 union iwreq_data wrqu; 527 union iwreq_data wrqu;
528
641 memset(&wrqu, 0, sizeof(wrqu)); 529 memset(&wrqu, 0, sizeof(wrqu));
642 if (ifsta->flags & IEEE80211_STA_ASSOCIATED) 530 if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED)
643 memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); 531 memcpy(wrqu.ap_addr.sa_data, sdata->u.mgd.bssid, ETH_ALEN);
644 wrqu.ap_addr.sa_family = ARPHRD_ETHER; 532 wrqu.ap_addr.sa_family = ARPHRD_ETHER;
645 wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); 533 wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL);
646} 534}
647 535
648static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata, 536static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata)
649 struct ieee80211_if_sta *ifsta)
650{ 537{
538 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
651 char *buf; 539 char *buf;
652 size_t len; 540 size_t len;
653 int i; 541 int i;
654 union iwreq_data wrqu; 542 union iwreq_data wrqu;
655 543
656 if (!ifsta->assocreq_ies && !ifsta->assocresp_ies) 544 if (!ifmgd->assocreq_ies && !ifmgd->assocresp_ies)
657 return; 545 return;
658 546
659 buf = kmalloc(50 + 2 * (ifsta->assocreq_ies_len + 547 buf = kmalloc(50 + 2 * (ifmgd->assocreq_ies_len +
660 ifsta->assocresp_ies_len), GFP_KERNEL); 548 ifmgd->assocresp_ies_len), GFP_KERNEL);
661 if (!buf) 549 if (!buf)
662 return; 550 return;
663 551
664 len = sprintf(buf, "ASSOCINFO("); 552 len = sprintf(buf, "ASSOCINFO(");
665 if (ifsta->assocreq_ies) { 553 if (ifmgd->assocreq_ies) {
666 len += sprintf(buf + len, "ReqIEs="); 554 len += sprintf(buf + len, "ReqIEs=");
667 for (i = 0; i < ifsta->assocreq_ies_len; i++) { 555 for (i = 0; i < ifmgd->assocreq_ies_len; i++) {
668 len += sprintf(buf + len, "%02x", 556 len += sprintf(buf + len, "%02x",
669 ifsta->assocreq_ies[i]); 557 ifmgd->assocreq_ies[i]);
670 } 558 }
671 } 559 }
672 if (ifsta->assocresp_ies) { 560 if (ifmgd->assocresp_ies) {
673 if (ifsta->assocreq_ies) 561 if (ifmgd->assocreq_ies)
674 len += sprintf(buf + len, " "); 562 len += sprintf(buf + len, " ");
675 len += sprintf(buf + len, "RespIEs="); 563 len += sprintf(buf + len, "RespIEs=");
676 for (i = 0; i < ifsta->assocresp_ies_len; i++) { 564 for (i = 0; i < ifmgd->assocresp_ies_len; i++) {
677 len += sprintf(buf + len, "%02x", 565 len += sprintf(buf + len, "%02x",
678 ifsta->assocresp_ies[i]); 566 ifmgd->assocresp_ies[i]);
679 } 567 }
680 } 568 }
681 len += sprintf(buf + len, ")"); 569 len += sprintf(buf + len, ")");
682 570
683 if (len > IW_CUSTOM_MAX) { 571 if (len > IW_CUSTOM_MAX) {
684 len = sprintf(buf, "ASSOCRESPIE="); 572 len = sprintf(buf, "ASSOCRESPIE=");
685 for (i = 0; i < ifsta->assocresp_ies_len; i++) { 573 for (i = 0; i < ifmgd->assocresp_ies_len; i++) {
686 len += sprintf(buf + len, "%02x", 574 len += sprintf(buf + len, "%02x",
687 ifsta->assocresp_ies[i]); 575 ifmgd->assocresp_ies[i]);
688 } 576 }
689 } 577 }
690 578
@@ -699,40 +587,39 @@ static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata,
699 587
700 588
701static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, 589static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
702 struct ieee80211_if_sta *ifsta,
703 u32 bss_info_changed) 590 u32 bss_info_changed)
704{ 591{
592 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
705 struct ieee80211_local *local = sdata->local; 593 struct ieee80211_local *local = sdata->local;
706 struct ieee80211_conf *conf = &local_to_hw(local)->conf; 594 struct ieee80211_conf *conf = &local_to_hw(local)->conf;
707 595
708 struct ieee80211_bss *bss; 596 struct ieee80211_bss *bss;
709 597
710 bss_info_changed |= BSS_CHANGED_ASSOC; 598 bss_info_changed |= BSS_CHANGED_ASSOC;
711 ifsta->flags |= IEEE80211_STA_ASSOCIATED; 599 ifmgd->flags |= IEEE80211_STA_ASSOCIATED;
712
713 if (sdata->vif.type != NL80211_IFTYPE_STATION)
714 return;
715 600
716 bss = ieee80211_rx_bss_get(local, ifsta->bssid, 601 bss = ieee80211_rx_bss_get(local, ifmgd->bssid,
717 conf->channel->center_freq, 602 conf->channel->center_freq,
718 ifsta->ssid, ifsta->ssid_len); 603 ifmgd->ssid, ifmgd->ssid_len);
719 if (bss) { 604 if (bss) {
720 /* set timing information */ 605 /* set timing information */
721 sdata->vif.bss_conf.beacon_int = bss->beacon_int; 606 sdata->vif.bss_conf.beacon_int = bss->cbss.beacon_interval;
722 sdata->vif.bss_conf.timestamp = bss->timestamp; 607 sdata->vif.bss_conf.timestamp = bss->cbss.tsf;
723 sdata->vif.bss_conf.dtim_period = bss->dtim_period; 608 sdata->vif.bss_conf.dtim_period = bss->dtim_period;
724 609
725 bss_info_changed |= ieee80211_handle_bss_capability(sdata, 610 bss_info_changed |= ieee80211_handle_bss_capability(sdata,
726 bss->capability, bss->has_erp_value, bss->erp_value); 611 bss->cbss.capability, bss->has_erp_value, bss->erp_value);
612
613 cfg80211_hold_bss(&bss->cbss);
727 614
728 ieee80211_rx_bss_put(local, bss); 615 ieee80211_rx_bss_put(local, bss);
729 } 616 }
730 617
731 ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; 618 ifmgd->flags |= IEEE80211_STA_PREV_BSSID_SET;
732 memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); 619 memcpy(ifmgd->prev_bssid, sdata->u.mgd.bssid, ETH_ALEN);
733 ieee80211_sta_send_associnfo(sdata, ifsta); 620 ieee80211_sta_send_associnfo(sdata);
734 621
735 ifsta->last_probe = jiffies; 622 ifmgd->last_probe = jiffies;
736 ieee80211_led_assoc(local, 1); 623 ieee80211_led_assoc(local, 1);
737 624
738 sdata->vif.bss_conf.assoc = 1; 625 sdata->vif.bss_conf.assoc = 1;
@@ -745,72 +632,115 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
745 ieee80211_bss_info_change_notify(sdata, bss_info_changed); 632 ieee80211_bss_info_change_notify(sdata, bss_info_changed);
746 633
747 if (local->powersave) { 634 if (local->powersave) {
748 if (local->dynamic_ps_timeout > 0) 635 if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) &&
636 local->hw.conf.dynamic_ps_timeout > 0) {
749 mod_timer(&local->dynamic_ps_timer, jiffies + 637 mod_timer(&local->dynamic_ps_timer, jiffies +
750 msecs_to_jiffies(local->dynamic_ps_timeout)); 638 msecs_to_jiffies(
751 else { 639 local->hw.conf.dynamic_ps_timeout));
640 } else {
641 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
642 ieee80211_send_nullfunc(local, sdata, 1);
752 conf->flags |= IEEE80211_CONF_PS; 643 conf->flags |= IEEE80211_CONF_PS;
753 ieee80211_hw_config(local, 644 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
754 IEEE80211_CONF_CHANGE_PS);
755 } 645 }
756 } 646 }
757 647
758 netif_tx_start_all_queues(sdata->dev); 648 netif_tx_start_all_queues(sdata->dev);
759 netif_carrier_on(sdata->dev); 649 netif_carrier_on(sdata->dev);
760 650
761 ieee80211_sta_send_apinfo(sdata, ifsta); 651 ieee80211_sta_send_apinfo(sdata);
762} 652}
763 653
764static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata, 654static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata)
765 struct ieee80211_if_sta *ifsta)
766{ 655{
767 ifsta->direct_probe_tries++; 656 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
768 if (ifsta->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) { 657 struct ieee80211_local *local = sdata->local;
658
659 ifmgd->direct_probe_tries++;
660 if (ifmgd->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) {
769 printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n", 661 printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n",
770 sdata->dev->name, ifsta->bssid); 662 sdata->dev->name, ifmgd->bssid);
771 ifsta->state = IEEE80211_STA_MLME_DISABLED; 663 ifmgd->state = IEEE80211_STA_MLME_DISABLED;
772 ieee80211_sta_send_apinfo(sdata, ifsta); 664 ieee80211_sta_send_apinfo(sdata);
665
666 /*
667 * Most likely AP is not in the range so remove the
668 * bss information associated to the AP
669 */
670 ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
671 sdata->local->hw.conf.channel->center_freq,
672 ifmgd->ssid, ifmgd->ssid_len);
673
674 /*
675 * We might have a pending scan which had no chance to run yet
676 * due to state == IEEE80211_STA_MLME_DIRECT_PROBE.
677 * Hence, queue the STAs work again
678 */
679 queue_work(local->hw.workqueue, &ifmgd->work);
773 return; 680 return;
774 } 681 }
775 682
776 printk(KERN_DEBUG "%s: direct probe to AP %pM try %d\n", 683 printk(KERN_DEBUG "%s: direct probe to AP %pM try %d\n",
777 sdata->dev->name, ifsta->bssid, 684 sdata->dev->name, ifmgd->bssid,
778 ifsta->direct_probe_tries); 685 ifmgd->direct_probe_tries);
779 686
780 ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; 687 ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE;
781 688
782 set_bit(IEEE80211_STA_REQ_DIRECT_PROBE, &ifsta->request); 689 set_bit(IEEE80211_STA_REQ_DIRECT_PROBE, &ifmgd->request);
783 690
784 /* Direct probe is sent to broadcast address as some APs 691 /* Direct probe is sent to broadcast address as some APs
785 * will not answer to direct packet in unassociated state. 692 * will not answer to direct packet in unassociated state.
786 */ 693 */
787 ieee80211_send_probe_req(sdata, NULL, 694 ieee80211_send_probe_req(sdata, NULL,
788 ifsta->ssid, ifsta->ssid_len); 695 ifmgd->ssid, ifmgd->ssid_len, NULL, 0);
789 696
790 mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); 697 mod_timer(&ifmgd->timer, jiffies + IEEE80211_AUTH_TIMEOUT);
791} 698}
792 699
793 700
794static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata, 701static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata)
795 struct ieee80211_if_sta *ifsta)
796{ 702{
797 ifsta->auth_tries++; 703 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
798 if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) { 704 struct ieee80211_local *local = sdata->local;
705 u8 *ies;
706 size_t ies_len;
707
708 ifmgd->auth_tries++;
709 if (ifmgd->auth_tries > IEEE80211_AUTH_MAX_TRIES) {
799 printk(KERN_DEBUG "%s: authentication with AP %pM" 710 printk(KERN_DEBUG "%s: authentication with AP %pM"
800 " timed out\n", 711 " timed out\n",
801 sdata->dev->name, ifsta->bssid); 712 sdata->dev->name, ifmgd->bssid);
802 ifsta->state = IEEE80211_STA_MLME_DISABLED; 713 ifmgd->state = IEEE80211_STA_MLME_DISABLED;
803 ieee80211_sta_send_apinfo(sdata, ifsta); 714 ieee80211_sta_send_apinfo(sdata);
715 ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
716 sdata->local->hw.conf.channel->center_freq,
717 ifmgd->ssid, ifmgd->ssid_len);
718
719 /*
720 * We might have a pending scan which had no chance to run yet
721 * due to state == IEEE80211_STA_MLME_AUTHENTICATE.
722 * Hence, queue the STAs work again
723 */
724 queue_work(local->hw.workqueue, &ifmgd->work);
804 return; 725 return;
805 } 726 }
806 727
807 ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; 728 ifmgd->state = IEEE80211_STA_MLME_AUTHENTICATE;
808 printk(KERN_DEBUG "%s: authenticate with AP %pM\n", 729 printk(KERN_DEBUG "%s: authenticate with AP %pM\n",
809 sdata->dev->name, ifsta->bssid); 730 sdata->dev->name, ifmgd->bssid);
810 731
811 ieee80211_send_auth(sdata, ifsta, 1, NULL, 0, 0); 732 if (ifmgd->flags & IEEE80211_STA_EXT_SME) {
733 ies = ifmgd->sme_auth_ie;
734 ies_len = ifmgd->sme_auth_ie_len;
735 } else {
736 ies = NULL;
737 ies_len = 0;
738 }
739 ieee80211_send_auth(sdata, 1, ifmgd->auth_alg, ies, ies_len,
740 ifmgd->bssid, 0);
741 ifmgd->auth_transaction = 2;
812 742
813 mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); 743 mod_timer(&ifmgd->timer, jiffies + IEEE80211_AUTH_TIMEOUT);
814} 744}
815 745
816/* 746/*
@@ -818,32 +748,44 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata,
818 * if self disconnected or a reason code from the AP. 748 * if self disconnected or a reason code from the AP.
819 */ 749 */
820static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, 750static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
821 struct ieee80211_if_sta *ifsta, bool deauth, 751 bool deauth, bool self_disconnected,
822 bool self_disconnected, u16 reason) 752 u16 reason)
823{ 753{
754 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
824 struct ieee80211_local *local = sdata->local; 755 struct ieee80211_local *local = sdata->local;
756 struct ieee80211_conf *conf = &local_to_hw(local)->conf;
757 struct ieee80211_bss *bss;
825 struct sta_info *sta; 758 struct sta_info *sta;
826 u32 changed = 0, config_changed = 0; 759 u32 changed = 0, config_changed = 0;
827 760
828 rcu_read_lock(); 761 rcu_read_lock();
829 762
830 sta = sta_info_get(local, ifsta->bssid); 763 sta = sta_info_get(local, ifmgd->bssid);
831 if (!sta) { 764 if (!sta) {
832 rcu_read_unlock(); 765 rcu_read_unlock();
833 return; 766 return;
834 } 767 }
835 768
836 if (deauth) { 769 if (deauth) {
837 ifsta->direct_probe_tries = 0; 770 ifmgd->direct_probe_tries = 0;
838 ifsta->auth_tries = 0; 771 ifmgd->auth_tries = 0;
839 } 772 }
840 ifsta->assoc_scan_tries = 0; 773 ifmgd->assoc_scan_tries = 0;
841 ifsta->assoc_tries = 0; 774 ifmgd->assoc_tries = 0;
842 775
843 netif_tx_stop_all_queues(sdata->dev); 776 netif_tx_stop_all_queues(sdata->dev);
844 netif_carrier_off(sdata->dev); 777 netif_carrier_off(sdata->dev);
845 778
846 ieee80211_sta_tear_down_BA_sessions(sdata, sta->sta.addr); 779 ieee80211_sta_tear_down_BA_sessions(sta);
780
781 bss = ieee80211_rx_bss_get(local, ifmgd->bssid,
782 conf->channel->center_freq,
783 ifmgd->ssid, ifmgd->ssid_len);
784
785 if (bss) {
786 cfg80211_unhold_bss(&bss->cbss);
787 ieee80211_rx_bss_put(local, bss);
788 }
847 789
848 if (self_disconnected) { 790 if (self_disconnected) {
849 if (deauth) 791 if (deauth)
@@ -854,23 +796,28 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
854 IEEE80211_STYPE_DISASSOC, reason); 796 IEEE80211_STYPE_DISASSOC, reason);
855 } 797 }
856 798
857 ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; 799 ifmgd->flags &= ~IEEE80211_STA_ASSOCIATED;
858 changed |= ieee80211_reset_erp_info(sdata); 800 changed |= ieee80211_reset_erp_info(sdata);
859 801
860 ieee80211_led_assoc(local, 0); 802 ieee80211_led_assoc(local, 0);
861 changed |= BSS_CHANGED_ASSOC; 803 changed |= BSS_CHANGED_ASSOC;
862 sdata->vif.bss_conf.assoc = false; 804 sdata->vif.bss_conf.assoc = false;
863 805
864 ieee80211_sta_send_apinfo(sdata, ifsta); 806 ieee80211_sta_send_apinfo(sdata);
865 807
866 if (self_disconnected || reason == WLAN_REASON_DISASSOC_STA_HAS_LEFT) 808 if (self_disconnected || reason == WLAN_REASON_DISASSOC_STA_HAS_LEFT) {
867 ifsta->state = IEEE80211_STA_MLME_DISABLED; 809 ifmgd->state = IEEE80211_STA_MLME_DISABLED;
810 ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
811 sdata->local->hw.conf.channel->center_freq,
812 ifmgd->ssid, ifmgd->ssid_len);
813 }
868 814
869 rcu_read_unlock(); 815 rcu_read_unlock();
870 816
871 local->hw.conf.ht.enabled = false; 817 /* channel(_type) changes are handled by ieee80211_hw_config */
872 local->oper_channel_type = NL80211_CHAN_NO_HT; 818 local->oper_channel_type = NL80211_CHAN_NO_HT;
873 config_changed |= IEEE80211_CONF_CHANGE_HT; 819
820 local->power_constr_level = 0;
874 821
875 del_timer_sync(&local->dynamic_ps_timer); 822 del_timer_sync(&local->dynamic_ps_timer);
876 cancel_work_sync(&local->dynamic_ps_enable_work); 823 cancel_work_sync(&local->dynamic_ps_enable_work);
@@ -885,7 +832,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
885 832
886 rcu_read_lock(); 833 rcu_read_lock();
887 834
888 sta = sta_info_get(local, ifsta->bssid); 835 sta = sta_info_get(local, ifmgd->bssid);
889 if (!sta) { 836 if (!sta) {
890 rcu_read_unlock(); 837 rcu_read_unlock();
891 return; 838 return;
@@ -906,27 +853,27 @@ static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata)
906 return 1; 853 return 1;
907} 854}
908 855
909static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata, 856static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata)
910 struct ieee80211_if_sta *ifsta)
911{ 857{
858 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
912 struct ieee80211_local *local = sdata->local; 859 struct ieee80211_local *local = sdata->local;
913 struct ieee80211_bss *bss; 860 struct ieee80211_bss *bss;
914 int bss_privacy; 861 int bss_privacy;
915 int wep_privacy; 862 int wep_privacy;
916 int privacy_invoked; 863 int privacy_invoked;
917 864
918 if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL)) 865 if (!ifmgd || (ifmgd->flags & IEEE80211_STA_EXT_SME))
919 return 0; 866 return 0;
920 867
921 bss = ieee80211_rx_bss_get(local, ifsta->bssid, 868 bss = ieee80211_rx_bss_get(local, ifmgd->bssid,
922 local->hw.conf.channel->center_freq, 869 local->hw.conf.channel->center_freq,
923 ifsta->ssid, ifsta->ssid_len); 870 ifmgd->ssid, ifmgd->ssid_len);
924 if (!bss) 871 if (!bss)
925 return 0; 872 return 0;
926 873
927 bss_privacy = !!(bss->capability & WLAN_CAPABILITY_PRIVACY); 874 bss_privacy = !!(bss->cbss.capability & WLAN_CAPABILITY_PRIVACY);
928 wep_privacy = !!ieee80211_sta_wep_configured(sdata); 875 wep_privacy = !!ieee80211_sta_wep_configured(sdata);
929 privacy_invoked = !!(ifsta->flags & IEEE80211_STA_PRIVACY_INVOKED); 876 privacy_invoked = !!(ifmgd->flags & IEEE80211_STA_PRIVACY_INVOKED);
930 877
931 ieee80211_rx_bss_put(local, bss); 878 ieee80211_rx_bss_put(local, bss);
932 879
@@ -936,105 +883,173 @@ static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata,
936 return 1; 883 return 1;
937} 884}
938 885
939static void ieee80211_associate(struct ieee80211_sub_if_data *sdata, 886static void ieee80211_associate(struct ieee80211_sub_if_data *sdata)
940 struct ieee80211_if_sta *ifsta)
941{ 887{
942 ifsta->assoc_tries++; 888 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
943 if (ifsta->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) { 889 struct ieee80211_local *local = sdata->local;
890
891 ifmgd->assoc_tries++;
892 if (ifmgd->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) {
944 printk(KERN_DEBUG "%s: association with AP %pM" 893 printk(KERN_DEBUG "%s: association with AP %pM"
945 " timed out\n", 894 " timed out\n",
946 sdata->dev->name, ifsta->bssid); 895 sdata->dev->name, ifmgd->bssid);
947 ifsta->state = IEEE80211_STA_MLME_DISABLED; 896 ifmgd->state = IEEE80211_STA_MLME_DISABLED;
948 ieee80211_sta_send_apinfo(sdata, ifsta); 897 ieee80211_sta_send_apinfo(sdata);
898 ieee80211_rx_bss_remove(sdata, ifmgd->bssid,
899 sdata->local->hw.conf.channel->center_freq,
900 ifmgd->ssid, ifmgd->ssid_len);
901 /*
902 * We might have a pending scan which had no chance to run yet
903 * due to state == IEEE80211_STA_MLME_ASSOCIATE.
904 * Hence, queue the STAs work again
905 */
906 queue_work(local->hw.workqueue, &ifmgd->work);
949 return; 907 return;
950 } 908 }
951 909
952 ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; 910 ifmgd->state = IEEE80211_STA_MLME_ASSOCIATE;
953 printk(KERN_DEBUG "%s: associate with AP %pM\n", 911 printk(KERN_DEBUG "%s: associate with AP %pM\n",
954 sdata->dev->name, ifsta->bssid); 912 sdata->dev->name, ifmgd->bssid);
955 if (ieee80211_privacy_mismatch(sdata, ifsta)) { 913 if (ieee80211_privacy_mismatch(sdata)) {
956 printk(KERN_DEBUG "%s: mismatch in privacy configuration and " 914 printk(KERN_DEBUG "%s: mismatch in privacy configuration and "
957 "mixed-cell disabled - abort association\n", sdata->dev->name); 915 "mixed-cell disabled - abort association\n", sdata->dev->name);
958 ifsta->state = IEEE80211_STA_MLME_DISABLED; 916 ifmgd->state = IEEE80211_STA_MLME_DISABLED;
959 return; 917 return;
960 } 918 }
961 919
962 ieee80211_send_assoc(sdata, ifsta); 920 ieee80211_send_assoc(sdata);
963 921
964 mod_timer(&ifsta->timer, jiffies + IEEE80211_ASSOC_TIMEOUT); 922 mod_timer(&ifmgd->timer, jiffies + IEEE80211_ASSOC_TIMEOUT);
965} 923}
966 924
925void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
926 struct ieee80211_hdr *hdr)
927{
928 /*
929 * We can postpone the mgd.timer whenever receiving unicast frames
930 * from AP because we know that the connection is working both ways
931 * at that time. But multicast frames (and hence also beacons) must
932 * be ignored here, because we need to trigger the timer during
933 * data idle periods for sending the periodical probe request to
934 * the AP.
935 */
936 if (!is_multicast_ether_addr(hdr->addr1))
937 mod_timer(&sdata->u.mgd.timer,
938 jiffies + IEEE80211_MONITORING_INTERVAL);
939}
967 940
968static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, 941void ieee80211_beacon_loss_work(struct work_struct *work)
969 struct ieee80211_if_sta *ifsta)
970{ 942{
943 struct ieee80211_sub_if_data *sdata =
944 container_of(work, struct ieee80211_sub_if_data,
945 u.mgd.beacon_loss_work);
946 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
947
948 printk(KERN_DEBUG "%s: driver reports beacon loss from AP %pM "
949 "- sending probe request\n", sdata->dev->name,
950 sdata->u.mgd.bssid);
951
952 ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL;
953 ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid,
954 ifmgd->ssid_len, NULL, 0);
955
956 mod_timer(&ifmgd->timer, jiffies + IEEE80211_MONITORING_INTERVAL);
957}
958
959void ieee80211_beacon_loss(struct ieee80211_vif *vif)
960{
961 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
962
963 queue_work(sdata->local->hw.workqueue,
964 &sdata->u.mgd.beacon_loss_work);
965}
966EXPORT_SYMBOL(ieee80211_beacon_loss);
967
968static void ieee80211_associated(struct ieee80211_sub_if_data *sdata)
969{
970 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
971 struct ieee80211_local *local = sdata->local; 971 struct ieee80211_local *local = sdata->local;
972 struct sta_info *sta; 972 struct sta_info *sta;
973 int disassoc; 973 bool disassoc = false;
974 974
975 /* TODO: start monitoring current AP signal quality and number of 975 /* TODO: start monitoring current AP signal quality and number of
976 * missed beacons. Scan other channels every now and then and search 976 * missed beacons. Scan other channels every now and then and search
977 * for better APs. */ 977 * for better APs. */
978 /* TODO: remove expired BSSes */ 978 /* TODO: remove expired BSSes */
979 979
980 ifsta->state = IEEE80211_STA_MLME_ASSOCIATED; 980 ifmgd->state = IEEE80211_STA_MLME_ASSOCIATED;
981 981
982 rcu_read_lock(); 982 rcu_read_lock();
983 983
984 sta = sta_info_get(local, ifsta->bssid); 984 sta = sta_info_get(local, ifmgd->bssid);
985 if (!sta) { 985 if (!sta) {
986 printk(KERN_DEBUG "%s: No STA entry for own AP %pM\n", 986 printk(KERN_DEBUG "%s: No STA entry for own AP %pM\n",
987 sdata->dev->name, ifsta->bssid); 987 sdata->dev->name, ifmgd->bssid);
988 disassoc = 1; 988 disassoc = true;
989 } else { 989 goto unlock;
990 disassoc = 0;
991 if (time_after(jiffies,
992 sta->last_rx + IEEE80211_MONITORING_INTERVAL)) {
993 if (ifsta->flags & IEEE80211_STA_PROBEREQ_POLL) {
994 printk(KERN_DEBUG "%s: No ProbeResp from "
995 "current AP %pM - assume out of "
996 "range\n",
997 sdata->dev->name, ifsta->bssid);
998 disassoc = 1;
999 } else
1000 ieee80211_send_probe_req(sdata, ifsta->bssid,
1001 ifsta->ssid,
1002 ifsta->ssid_len);
1003 ifsta->flags ^= IEEE80211_STA_PROBEREQ_POLL;
1004 } else {
1005 ifsta->flags &= ~IEEE80211_STA_PROBEREQ_POLL;
1006 if (time_after(jiffies, ifsta->last_probe +
1007 IEEE80211_PROBE_INTERVAL)) {
1008 ifsta->last_probe = jiffies;
1009 ieee80211_send_probe_req(sdata, ifsta->bssid,
1010 ifsta->ssid,
1011 ifsta->ssid_len);
1012 }
1013 }
1014 } 990 }
1015 991
992 if ((ifmgd->flags & IEEE80211_STA_PROBEREQ_POLL) &&
993 time_after(jiffies, sta->last_rx + IEEE80211_MONITORING_INTERVAL)) {
994 printk(KERN_DEBUG "%s: no probe response from AP %pM "
995 "- disassociating\n",
996 sdata->dev->name, ifmgd->bssid);
997 disassoc = true;
998 ifmgd->flags &= ~IEEE80211_STA_PROBEREQ_POLL;
999 goto unlock;
1000 }
1001
1002 /*
1003 * Beacon filtering is only enabled with power save and then the
1004 * stack should not check for beacon loss.
1005 */
1006 if (!((local->hw.flags & IEEE80211_HW_BEACON_FILTER) &&
1007 (local->hw.conf.flags & IEEE80211_CONF_PS)) &&
1008 time_after(jiffies,
1009 ifmgd->last_beacon + IEEE80211_MONITORING_INTERVAL)) {
1010 printk(KERN_DEBUG "%s: beacon loss from AP %pM "
1011 "- sending probe request\n",
1012 sdata->dev->name, ifmgd->bssid);
1013 ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL;
1014 ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid,
1015 ifmgd->ssid_len, NULL, 0);
1016 goto unlock;
1017
1018 }
1019
1020 if (time_after(jiffies, sta->last_rx + IEEE80211_PROBE_IDLE_TIME)) {
1021 ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL;
1022 ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid,
1023 ifmgd->ssid_len, NULL, 0);
1024 }
1025
1026 unlock:
1016 rcu_read_unlock(); 1027 rcu_read_unlock();
1017 1028
1018 if (disassoc) 1029 if (disassoc)
1019 ieee80211_set_disassoc(sdata, ifsta, true, true, 1030 ieee80211_set_disassoc(sdata, true, true,
1020 WLAN_REASON_PREV_AUTH_NOT_VALID); 1031 WLAN_REASON_PREV_AUTH_NOT_VALID);
1021 else 1032 else
1022 mod_timer(&ifsta->timer, jiffies + 1033 mod_timer(&ifmgd->timer, jiffies +
1023 IEEE80211_MONITORING_INTERVAL); 1034 IEEE80211_MONITORING_INTERVAL);
1024} 1035}
1025 1036
1026 1037
1027static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata, 1038static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata)
1028 struct ieee80211_if_sta *ifsta)
1029{ 1039{
1040 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1041
1030 printk(KERN_DEBUG "%s: authenticated\n", sdata->dev->name); 1042 printk(KERN_DEBUG "%s: authenticated\n", sdata->dev->name);
1031 ifsta->flags |= IEEE80211_STA_AUTHENTICATED; 1043 ifmgd->flags |= IEEE80211_STA_AUTHENTICATED;
1032 ieee80211_associate(sdata, ifsta); 1044 if (ifmgd->flags & IEEE80211_STA_EXT_SME) {
1045 /* Wait for SME to request association */
1046 ifmgd->state = IEEE80211_STA_MLME_DISABLED;
1047 } else
1048 ieee80211_associate(sdata);
1033} 1049}
1034 1050
1035 1051
1036static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, 1052static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
1037 struct ieee80211_if_sta *ifsta,
1038 struct ieee80211_mgmt *mgmt, 1053 struct ieee80211_mgmt *mgmt,
1039 size_t len) 1054 size_t len)
1040{ 1055{
@@ -1045,50 +1060,37 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata,
1045 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); 1060 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
1046 if (!elems.challenge) 1061 if (!elems.challenge)
1047 return; 1062 return;
1048 ieee80211_send_auth(sdata, ifsta, 3, elems.challenge - 2, 1063 ieee80211_send_auth(sdata, 3, sdata->u.mgd.auth_alg,
1049 elems.challenge_len + 2, 1); 1064 elems.challenge - 2, elems.challenge_len + 2,
1065 sdata->u.mgd.bssid, 1);
1066 sdata->u.mgd.auth_transaction = 4;
1050} 1067}
1051 1068
1052static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, 1069static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
1053 struct ieee80211_if_sta *ifsta,
1054 struct ieee80211_mgmt *mgmt, 1070 struct ieee80211_mgmt *mgmt,
1055 size_t len) 1071 size_t len)
1056{ 1072{
1073 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1057 u16 auth_alg, auth_transaction, status_code; 1074 u16 auth_alg, auth_transaction, status_code;
1058 1075
1059 if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && 1076 if (ifmgd->state != IEEE80211_STA_MLME_AUTHENTICATE)
1060 sdata->vif.type != NL80211_IFTYPE_ADHOC)
1061 return; 1077 return;
1062 1078
1063 if (len < 24 + 6) 1079 if (len < 24 + 6)
1064 return; 1080 return;
1065 1081
1066 if (sdata->vif.type != NL80211_IFTYPE_ADHOC && 1082 if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN) != 0)
1067 memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0)
1068 return; 1083 return;
1069 1084
1070 if (sdata->vif.type != NL80211_IFTYPE_ADHOC && 1085 if (memcmp(ifmgd->bssid, mgmt->bssid, ETH_ALEN) != 0)
1071 memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0)
1072 return; 1086 return;
1073 1087
1074 auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); 1088 auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
1075 auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); 1089 auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
1076 status_code = le16_to_cpu(mgmt->u.auth.status_code); 1090 status_code = le16_to_cpu(mgmt->u.auth.status_code);
1077 1091
1078 if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { 1092 if (auth_alg != ifmgd->auth_alg ||
1079 /* 1093 auth_transaction != ifmgd->auth_transaction)
1080 * IEEE 802.11 standard does not require authentication in IBSS
1081 * networks and most implementations do not seem to use it.
1082 * However, try to reply to authentication attempts if someone
1083 * has actually implemented this.
1084 */
1085 if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1)
1086 return;
1087 ieee80211_send_auth(sdata, ifsta, 2, NULL, 0, 0);
1088 }
1089
1090 if (auth_alg != ifsta->auth_alg ||
1091 auth_transaction != ifsta->auth_transaction)
1092 return; 1094 return;
1093 1095
1094 if (status_code != WLAN_STATUS_SUCCESS) { 1096 if (status_code != WLAN_STATUS_SUCCESS) {
@@ -1097,15 +1099,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
1097 const int num_algs = ARRAY_SIZE(algs); 1099 const int num_algs = ARRAY_SIZE(algs);
1098 int i, pos; 1100 int i, pos;
1099 algs[0] = algs[1] = algs[2] = 0xff; 1101 algs[0] = algs[1] = algs[2] = 0xff;
1100 if (ifsta->auth_algs & IEEE80211_AUTH_ALG_OPEN) 1102 if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_OPEN)
1101 algs[0] = WLAN_AUTH_OPEN; 1103 algs[0] = WLAN_AUTH_OPEN;
1102 if (ifsta->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY) 1104 if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY)
1103 algs[1] = WLAN_AUTH_SHARED_KEY; 1105 algs[1] = WLAN_AUTH_SHARED_KEY;
1104 if (ifsta->auth_algs & IEEE80211_AUTH_ALG_LEAP) 1106 if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_LEAP)
1105 algs[2] = WLAN_AUTH_LEAP; 1107 algs[2] = WLAN_AUTH_LEAP;
1106 if (ifsta->auth_alg == WLAN_AUTH_OPEN) 1108 if (ifmgd->auth_alg == WLAN_AUTH_OPEN)
1107 pos = 0; 1109 pos = 0;
1108 else if (ifsta->auth_alg == WLAN_AUTH_SHARED_KEY) 1110 else if (ifmgd->auth_alg == WLAN_AUTH_SHARED_KEY)
1109 pos = 1; 1111 pos = 1;
1110 else 1112 else
1111 pos = 2; 1113 pos = 2;
@@ -1113,105 +1115,112 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
1113 pos++; 1115 pos++;
1114 if (pos >= num_algs) 1116 if (pos >= num_algs)
1115 pos = 0; 1117 pos = 0;
1116 if (algs[pos] == ifsta->auth_alg || 1118 if (algs[pos] == ifmgd->auth_alg ||
1117 algs[pos] == 0xff) 1119 algs[pos] == 0xff)
1118 continue; 1120 continue;
1119 if (algs[pos] == WLAN_AUTH_SHARED_KEY && 1121 if (algs[pos] == WLAN_AUTH_SHARED_KEY &&
1120 !ieee80211_sta_wep_configured(sdata)) 1122 !ieee80211_sta_wep_configured(sdata))
1121 continue; 1123 continue;
1122 ifsta->auth_alg = algs[pos]; 1124 ifmgd->auth_alg = algs[pos];
1123 break; 1125 break;
1124 } 1126 }
1125 } 1127 }
1126 return; 1128 return;
1127 } 1129 }
1128 1130
1129 switch (ifsta->auth_alg) { 1131 switch (ifmgd->auth_alg) {
1130 case WLAN_AUTH_OPEN: 1132 case WLAN_AUTH_OPEN:
1131 case WLAN_AUTH_LEAP: 1133 case WLAN_AUTH_LEAP:
1132 ieee80211_auth_completed(sdata, ifsta); 1134 case WLAN_AUTH_FT:
1135 ieee80211_auth_completed(sdata);
1136 cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, len);
1133 break; 1137 break;
1134 case WLAN_AUTH_SHARED_KEY: 1138 case WLAN_AUTH_SHARED_KEY:
1135 if (ifsta->auth_transaction == 4) 1139 if (ifmgd->auth_transaction == 4) {
1136 ieee80211_auth_completed(sdata, ifsta); 1140 ieee80211_auth_completed(sdata);
1137 else 1141 cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, len);
1138 ieee80211_auth_challenge(sdata, ifsta, mgmt, len); 1142 } else
1143 ieee80211_auth_challenge(sdata, mgmt, len);
1139 break; 1144 break;
1140 } 1145 }
1141} 1146}
1142 1147
1143 1148
1144static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, 1149static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
1145 struct ieee80211_if_sta *ifsta,
1146 struct ieee80211_mgmt *mgmt, 1150 struct ieee80211_mgmt *mgmt,
1147 size_t len) 1151 size_t len)
1148{ 1152{
1153 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1149 u16 reason_code; 1154 u16 reason_code;
1150 1155
1151 if (len < 24 + 2) 1156 if (len < 24 + 2)
1152 return; 1157 return;
1153 1158
1154 if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN)) 1159 if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN))
1155 return; 1160 return;
1156 1161
1157 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); 1162 reason_code = le16_to_cpu(mgmt->u.deauth.reason_code);
1158 1163
1159 if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) 1164 if (ifmgd->flags & IEEE80211_STA_AUTHENTICATED)
1160 printk(KERN_DEBUG "%s: deauthenticated (Reason: %u)\n", 1165 printk(KERN_DEBUG "%s: deauthenticated (Reason: %u)\n",
1161 sdata->dev->name, reason_code); 1166 sdata->dev->name, reason_code);
1162 1167
1163 if (ifsta->state == IEEE80211_STA_MLME_AUTHENTICATE || 1168 if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) &&
1164 ifsta->state == IEEE80211_STA_MLME_ASSOCIATE || 1169 (ifmgd->state == IEEE80211_STA_MLME_AUTHENTICATE ||
1165 ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { 1170 ifmgd->state == IEEE80211_STA_MLME_ASSOCIATE ||
1166 ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; 1171 ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED)) {
1167 mod_timer(&ifsta->timer, jiffies + 1172 ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE;
1173 mod_timer(&ifmgd->timer, jiffies +
1168 IEEE80211_RETRY_AUTH_INTERVAL); 1174 IEEE80211_RETRY_AUTH_INTERVAL);
1169 } 1175 }
1170 1176
1171 ieee80211_set_disassoc(sdata, ifsta, true, false, 0); 1177 ieee80211_set_disassoc(sdata, true, false, 0);
1172 ifsta->flags &= ~IEEE80211_STA_AUTHENTICATED; 1178 ifmgd->flags &= ~IEEE80211_STA_AUTHENTICATED;
1179 cfg80211_send_rx_deauth(sdata->dev, (u8 *) mgmt, len);
1173} 1180}
1174 1181
1175 1182
1176static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, 1183static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
1177 struct ieee80211_if_sta *ifsta,
1178 struct ieee80211_mgmt *mgmt, 1184 struct ieee80211_mgmt *mgmt,
1179 size_t len) 1185 size_t len)
1180{ 1186{
1187 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1181 u16 reason_code; 1188 u16 reason_code;
1182 1189
1183 if (len < 24 + 2) 1190 if (len < 24 + 2)
1184 return; 1191 return;
1185 1192
1186 if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN)) 1193 if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN))
1187 return; 1194 return;
1188 1195
1189 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); 1196 reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code);
1190 1197
1191 if (ifsta->flags & IEEE80211_STA_ASSOCIATED) 1198 if (ifmgd->flags & IEEE80211_STA_ASSOCIATED)
1192 printk(KERN_DEBUG "%s: disassociated (Reason: %u)\n", 1199 printk(KERN_DEBUG "%s: disassociated (Reason: %u)\n",
1193 sdata->dev->name, reason_code); 1200 sdata->dev->name, reason_code);
1194 1201
1195 if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { 1202 if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) &&
1196 ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; 1203 ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) {
1197 mod_timer(&ifsta->timer, jiffies + 1204 ifmgd->state = IEEE80211_STA_MLME_ASSOCIATE;
1205 mod_timer(&ifmgd->timer, jiffies +
1198 IEEE80211_RETRY_AUTH_INTERVAL); 1206 IEEE80211_RETRY_AUTH_INTERVAL);
1199 } 1207 }
1200 1208
1201 ieee80211_set_disassoc(sdata, ifsta, false, false, reason_code); 1209 ieee80211_set_disassoc(sdata, false, false, reason_code);
1210 cfg80211_send_rx_disassoc(sdata->dev, (u8 *) mgmt, len);
1202} 1211}
1203 1212
1204 1213
1205static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, 1214static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1206 struct ieee80211_if_sta *ifsta,
1207 struct ieee80211_mgmt *mgmt, 1215 struct ieee80211_mgmt *mgmt,
1208 size_t len, 1216 size_t len,
1209 int reassoc) 1217 int reassoc)
1210{ 1218{
1219 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1211 struct ieee80211_local *local = sdata->local; 1220 struct ieee80211_local *local = sdata->local;
1212 struct ieee80211_supported_band *sband; 1221 struct ieee80211_supported_band *sband;
1213 struct sta_info *sta; 1222 struct sta_info *sta;
1214 u64 rates, basic_rates; 1223 u32 rates, basic_rates;
1215 u16 capab_info, status_code, aid; 1224 u16 capab_info, status_code, aid;
1216 struct ieee802_11_elems elems; 1225 struct ieee802_11_elems elems;
1217 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; 1226 struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
@@ -1224,13 +1233,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1224 /* AssocResp and ReassocResp have identical structure, so process both 1233 /* AssocResp and ReassocResp have identical structure, so process both
1225 * of them in this function. */ 1234 * of them in this function. */
1226 1235
1227 if (ifsta->state != IEEE80211_STA_MLME_ASSOCIATE) 1236 if (ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE)
1228 return; 1237 return;
1229 1238
1230 if (len < 24 + 6) 1239 if (len < 24 + 6)
1231 return; 1240 return;
1232 1241
1233 if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) 1242 if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN) != 0)
1234 return; 1243 return;
1235 1244
1236 capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); 1245 capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info);
@@ -1242,13 +1251,31 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1242 sdata->dev->name, reassoc ? "Rea" : "A", mgmt->sa, 1251 sdata->dev->name, reassoc ? "Rea" : "A", mgmt->sa,
1243 capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); 1252 capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14))));
1244 1253
1254 pos = mgmt->u.assoc_resp.variable;
1255 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
1256
1257 if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY &&
1258 elems.timeout_int && elems.timeout_int_len == 5 &&
1259 elems.timeout_int[0] == WLAN_TIMEOUT_ASSOC_COMEBACK) {
1260 u32 tu, ms;
1261 tu = get_unaligned_le32(elems.timeout_int + 1);
1262 ms = tu * 1024 / 1000;
1263 printk(KERN_DEBUG "%s: AP rejected association temporarily; "
1264 "comeback duration %u TU (%u ms)\n",
1265 sdata->dev->name, tu, ms);
1266 if (ms > IEEE80211_ASSOC_TIMEOUT)
1267 mod_timer(&ifmgd->timer,
1268 jiffies + msecs_to_jiffies(ms));
1269 return;
1270 }
1271
1245 if (status_code != WLAN_STATUS_SUCCESS) { 1272 if (status_code != WLAN_STATUS_SUCCESS) {
1246 printk(KERN_DEBUG "%s: AP denied association (code=%d)\n", 1273 printk(KERN_DEBUG "%s: AP denied association (code=%d)\n",
1247 sdata->dev->name, status_code); 1274 sdata->dev->name, status_code);
1248 /* if this was a reassociation, ensure we try a "full" 1275 /* if this was a reassociation, ensure we try a "full"
1249 * association next time. This works around some broken APs 1276 * association next time. This works around some broken APs
1250 * which do not correctly reject reassociation requests. */ 1277 * which do not correctly reject reassociation requests. */
1251 ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; 1278 ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET;
1252 return; 1279 return;
1253 } 1280 }
1254 1281
@@ -1257,9 +1284,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1257 "set\n", sdata->dev->name, aid); 1284 "set\n", sdata->dev->name, aid);
1258 aid &= ~(BIT(15) | BIT(14)); 1285 aid &= ~(BIT(15) | BIT(14));
1259 1286
1260 pos = mgmt->u.assoc_resp.variable;
1261 ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems);
1262
1263 if (!elems.supp_rates) { 1287 if (!elems.supp_rates) {
1264 printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n", 1288 printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n",
1265 sdata->dev->name); 1289 sdata->dev->name);
@@ -1267,40 +1291,29 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1267 } 1291 }
1268 1292
1269 printk(KERN_DEBUG "%s: associated\n", sdata->dev->name); 1293 printk(KERN_DEBUG "%s: associated\n", sdata->dev->name);
1270 ifsta->aid = aid; 1294 ifmgd->aid = aid;
1271 ifsta->ap_capab = capab_info; 1295 ifmgd->ap_capab = capab_info;
1272 1296
1273 kfree(ifsta->assocresp_ies); 1297 kfree(ifmgd->assocresp_ies);
1274 ifsta->assocresp_ies_len = len - (pos - (u8 *) mgmt); 1298 ifmgd->assocresp_ies_len = len - (pos - (u8 *) mgmt);
1275 ifsta->assocresp_ies = kmalloc(ifsta->assocresp_ies_len, GFP_KERNEL); 1299 ifmgd->assocresp_ies = kmalloc(ifmgd->assocresp_ies_len, GFP_KERNEL);
1276 if (ifsta->assocresp_ies) 1300 if (ifmgd->assocresp_ies)
1277 memcpy(ifsta->assocresp_ies, pos, ifsta->assocresp_ies_len); 1301 memcpy(ifmgd->assocresp_ies, pos, ifmgd->assocresp_ies_len);
1278 1302
1279 rcu_read_lock(); 1303 rcu_read_lock();
1280 1304
1281 /* Add STA entry for the AP */ 1305 /* Add STA entry for the AP */
1282 sta = sta_info_get(local, ifsta->bssid); 1306 sta = sta_info_get(local, ifmgd->bssid);
1283 if (!sta) { 1307 if (!sta) {
1284 struct ieee80211_bss *bss;
1285
1286 newsta = true; 1308 newsta = true;
1287 1309
1288 sta = sta_info_alloc(sdata, ifsta->bssid, GFP_ATOMIC); 1310 sta = sta_info_alloc(sdata, ifmgd->bssid, GFP_ATOMIC);
1289 if (!sta) { 1311 if (!sta) {
1290 printk(KERN_DEBUG "%s: failed to alloc STA entry for" 1312 printk(KERN_DEBUG "%s: failed to alloc STA entry for"
1291 " the AP\n", sdata->dev->name); 1313 " the AP\n", sdata->dev->name);
1292 rcu_read_unlock(); 1314 rcu_read_unlock();
1293 return; 1315 return;
1294 } 1316 }
1295 bss = ieee80211_rx_bss_get(local, ifsta->bssid,
1296 local->hw.conf.channel->center_freq,
1297 ifsta->ssid, ifsta->ssid_len);
1298 if (bss) {
1299 sta->last_signal = bss->signal;
1300 sta->last_qual = bss->qual;
1301 sta->last_noise = bss->noise;
1302 ieee80211_rx_bss_put(local, bss);
1303 }
1304 1317
1305 /* update new sta with its last rx activity */ 1318 /* update new sta with its last rx activity */
1306 sta->last_rx = jiffies; 1319 sta->last_rx = jiffies;
@@ -1367,7 +1380,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1367 else 1380 else
1368 sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; 1381 sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
1369 1382
1370 if (elems.ht_cap_elem) 1383 /* If TKIP/WEP is used, no need to parse AP's HT capabilities */
1384 if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED))
1371 ieee80211_ht_cap_ie_to_sta_ht_cap(sband, 1385 ieee80211_ht_cap_ie_to_sta_ht_cap(sband,
1372 elems.ht_cap_elem, &sta->sta.ht_cap); 1386 elems.ht_cap_elem, &sta->sta.ht_cap);
1373 1387
@@ -1375,6 +1389,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1375 1389
1376 rate_control_rate_init(sta); 1390 rate_control_rate_init(sta);
1377 1391
1392 if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED)
1393 set_sta_flags(sta, WLAN_STA_MFP);
1394
1378 if (elems.wmm_param) 1395 if (elems.wmm_param)
1379 set_sta_flags(sta, WLAN_STA_WME); 1396 set_sta_flags(sta, WLAN_STA_WME);
1380 1397
@@ -1391,11 +1408,12 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1391 rcu_read_unlock(); 1408 rcu_read_unlock();
1392 1409
1393 if (elems.wmm_param) 1410 if (elems.wmm_param)
1394 ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, 1411 ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param,
1395 elems.wmm_param_len); 1412 elems.wmm_param_len);
1396 1413
1397 if (elems.ht_info_elem && elems.wmm_param && 1414 if (elems.ht_info_elem && elems.wmm_param &&
1398 (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) 1415 (ifmgd->flags & IEEE80211_STA_WMM_ENABLED) &&
1416 !(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED))
1399 changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, 1417 changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem,
1400 ap_ht_cap_flags); 1418 ap_ht_cap_flags);
1401 1419
@@ -1403,136 +1421,19 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata,
1403 * ieee80211_set_associated() will tell the driver */ 1421 * ieee80211_set_associated() will tell the driver */
1404 bss_conf->aid = aid; 1422 bss_conf->aid = aid;
1405 bss_conf->assoc_capability = capab_info; 1423 bss_conf->assoc_capability = capab_info;
1406 ieee80211_set_associated(sdata, ifsta, changed); 1424 ieee80211_set_associated(sdata, changed);
1407
1408 ieee80211_associated(sdata, ifsta);
1409}
1410
1411
1412static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
1413 struct ieee80211_if_sta *ifsta,
1414 struct ieee80211_bss *bss)
1415{
1416 struct ieee80211_local *local = sdata->local;
1417 int res, rates, i, j;
1418 struct sk_buff *skb;
1419 struct ieee80211_mgmt *mgmt;
1420 u8 *pos;
1421 struct ieee80211_supported_band *sband;
1422 union iwreq_data wrqu;
1423
1424 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400);
1425 if (!skb) {
1426 printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
1427 "response\n", sdata->dev->name);
1428 return -ENOMEM;
1429 }
1430
1431 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
1432
1433 /* Remove possible STA entries from other IBSS networks. */
1434 sta_info_flush_delayed(sdata);
1435
1436 if (local->ops->reset_tsf) {
1437 /* Reset own TSF to allow time synchronization work. */
1438 local->ops->reset_tsf(local_to_hw(local));
1439 }
1440 memcpy(ifsta->bssid, bss->bssid, ETH_ALEN);
1441 res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID);
1442 if (res)
1443 return res;
1444
1445 local->hw.conf.beacon_int = bss->beacon_int >= 10 ? bss->beacon_int : 10;
1446
1447 sdata->drop_unencrypted = bss->capability &
1448 WLAN_CAPABILITY_PRIVACY ? 1 : 0;
1449
1450 res = ieee80211_set_freq(sdata, bss->freq);
1451
1452 if (res)
1453 return res;
1454
1455 /* Build IBSS probe response */
1456 1425
1457 skb_reserve(skb, local->hw.extra_tx_headroom); 1426 /*
1458 1427 * initialise the time of last beacon to be the association time,
1459 mgmt = (struct ieee80211_mgmt *) 1428 * otherwise beacon loss check will trigger immediately
1460 skb_put(skb, 24 + sizeof(mgmt->u.beacon)); 1429 */
1461 memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); 1430 ifmgd->last_beacon = jiffies;
1462 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
1463 IEEE80211_STYPE_PROBE_RESP);
1464 memset(mgmt->da, 0xff, ETH_ALEN);
1465 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
1466 memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN);
1467 mgmt->u.beacon.beacon_int =
1468 cpu_to_le16(local->hw.conf.beacon_int);
1469 mgmt->u.beacon.timestamp = cpu_to_le64(bss->timestamp);
1470 mgmt->u.beacon.capab_info = cpu_to_le16(bss->capability);
1471
1472 pos = skb_put(skb, 2 + ifsta->ssid_len);
1473 *pos++ = WLAN_EID_SSID;
1474 *pos++ = ifsta->ssid_len;
1475 memcpy(pos, ifsta->ssid, ifsta->ssid_len);
1476
1477 rates = bss->supp_rates_len;
1478 if (rates > 8)
1479 rates = 8;
1480 pos = skb_put(skb, 2 + rates);
1481 *pos++ = WLAN_EID_SUPP_RATES;
1482 *pos++ = rates;
1483 memcpy(pos, bss->supp_rates, rates);
1484
1485 if (bss->band == IEEE80211_BAND_2GHZ) {
1486 pos = skb_put(skb, 2 + 1);
1487 *pos++ = WLAN_EID_DS_PARAMS;
1488 *pos++ = 1;
1489 *pos++ = ieee80211_frequency_to_channel(bss->freq);
1490 }
1491
1492 pos = skb_put(skb, 2 + 2);
1493 *pos++ = WLAN_EID_IBSS_PARAMS;
1494 *pos++ = 2;
1495 /* FIX: set ATIM window based on scan results */
1496 *pos++ = 0;
1497 *pos++ = 0;
1498
1499 if (bss->supp_rates_len > 8) {
1500 rates = bss->supp_rates_len - 8;
1501 pos = skb_put(skb, 2 + rates);
1502 *pos++ = WLAN_EID_EXT_SUPP_RATES;
1503 *pos++ = rates;
1504 memcpy(pos, &bss->supp_rates[8], rates);
1505 }
1506
1507 ifsta->probe_resp = skb;
1508
1509 ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON);
1510
1511
1512 rates = 0;
1513 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
1514 for (i = 0; i < bss->supp_rates_len; i++) {
1515 int bitrate = (bss->supp_rates[i] & 0x7f) * 5;
1516 for (j = 0; j < sband->n_bitrates; j++)
1517 if (sband->bitrates[j].bitrate == bitrate)
1518 rates |= BIT(j);
1519 }
1520 ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates;
1521
1522 ieee80211_sta_def_wmm_params(sdata, bss);
1523
1524 ifsta->state = IEEE80211_STA_MLME_IBSS_JOINED;
1525 mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
1526
1527 ieee80211_led_assoc(local, true);
1528
1529 memset(&wrqu, 0, sizeof(wrqu));
1530 memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN);
1531 wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL);
1532 1431
1533 return res; 1432 ieee80211_associated(sdata);
1433 cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, len);
1534} 1434}
1535 1435
1436
1536static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, 1437static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
1537 struct ieee80211_mgmt *mgmt, 1438 struct ieee80211_mgmt *mgmt,
1538 size_t len, 1439 size_t len,
@@ -1543,11 +1444,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
1543 struct ieee80211_local *local = sdata->local; 1444 struct ieee80211_local *local = sdata->local;
1544 int freq; 1445 int freq;
1545 struct ieee80211_bss *bss; 1446 struct ieee80211_bss *bss;
1546 struct sta_info *sta;
1547 struct ieee80211_channel *channel; 1447 struct ieee80211_channel *channel;
1548 u64 beacon_timestamp, rx_timestamp;
1549 u64 supp_rates = 0;
1550 enum ieee80211_band band = rx_status->band;
1551 1448
1552 if (elems->ds_params && elems->ds_params_len == 1) 1449 if (elems->ds_params && elems->ds_params_len == 1)
1553 freq = ieee80211_channel_to_frequency(elems->ds_params[0]); 1450 freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
@@ -1559,112 +1456,16 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
1559 if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) 1456 if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
1560 return; 1457 return;
1561 1458
1562 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates &&
1563 memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) {
1564 supp_rates = ieee80211_sta_get_rates(local, elems, band);
1565
1566 rcu_read_lock();
1567
1568 sta = sta_info_get(local, mgmt->sa);
1569 if (sta) {
1570 u64 prev_rates;
1571
1572 prev_rates = sta->sta.supp_rates[band];
1573 /* make sure mandatory rates are always added */
1574 sta->sta.supp_rates[band] = supp_rates |
1575 ieee80211_mandatory_rates(local, band);
1576
1577#ifdef CONFIG_MAC80211_IBSS_DEBUG
1578 if (sta->sta.supp_rates[band] != prev_rates)
1579 printk(KERN_DEBUG "%s: updated supp_rates set "
1580 "for %pM based on beacon info (0x%llx | "
1581 "0x%llx -> 0x%llx)\n",
1582 sdata->dev->name,
1583 sta->sta.addr,
1584 (unsigned long long) prev_rates,
1585 (unsigned long long) supp_rates,
1586 (unsigned long long) sta->sta.supp_rates[band]);
1587#endif
1588 } else {
1589 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates);
1590 }
1591
1592 rcu_read_unlock();
1593 }
1594
1595 bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, 1459 bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
1596 freq, beacon); 1460 channel, beacon);
1597 if (!bss) 1461 if (!bss)
1598 return; 1462 return;
1599 1463
1600 /* was just updated in ieee80211_bss_info_update */ 1464 if (elems->ch_switch_elem && (elems->ch_switch_elem_len == 3) &&
1601 beacon_timestamp = bss->timestamp; 1465 (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN) == 0)) {
1602 1466 struct ieee80211_channel_sw_ie *sw_elem =
1603 /* 1467 (struct ieee80211_channel_sw_ie *)elems->ch_switch_elem;
1604 * In STA mode, the remaining parameters should not be overridden 1468 ieee80211_process_chanswitch(sdata, sw_elem, bss);
1605 * by beacons because they're not necessarily accurate there.
1606 */
1607 if (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
1608 bss->last_probe_resp && beacon) {
1609 ieee80211_rx_bss_put(local, bss);
1610 return;
1611 }
1612
1613 /* check if we need to merge IBSS */
1614 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && beacon &&
1615 bss->capability & WLAN_CAPABILITY_IBSS &&
1616 bss->freq == local->oper_channel->center_freq &&
1617 elems->ssid_len == sdata->u.sta.ssid_len &&
1618 memcmp(elems->ssid, sdata->u.sta.ssid,
1619 sdata->u.sta.ssid_len) == 0) {
1620 if (rx_status->flag & RX_FLAG_TSFT) {
1621 /* in order for correct IBSS merging we need mactime
1622 *
1623 * since mactime is defined as the time the first data
1624 * symbol of the frame hits the PHY, and the timestamp
1625 * of the beacon is defined as "the time that the data
1626 * symbol containing the first bit of the timestamp is
1627 * transmitted to the PHY plus the transmitting STA’s
1628 * delays through its local PHY from the MAC-PHY
1629 * interface to its interface with the WM"
1630 * (802.11 11.1.2) - equals the time this bit arrives at
1631 * the receiver - we have to take into account the
1632 * offset between the two.
1633 * e.g: at 1 MBit that means mactime is 192 usec earlier
1634 * (=24 bytes * 8 usecs/byte) than the beacon timestamp.
1635 */
1636 int rate;
1637 if (rx_status->flag & RX_FLAG_HT) {
1638 rate = 65; /* TODO: HT rates */
1639 } else {
1640 rate = local->hw.wiphy->bands[band]->
1641 bitrates[rx_status->rate_idx].bitrate;
1642 }
1643 rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate);
1644 } else if (local && local->ops && local->ops->get_tsf)
1645 /* second best option: get current TSF */
1646 rx_timestamp = local->ops->get_tsf(local_to_hw(local));
1647 else
1648 /* can't merge without knowing the TSF */
1649 rx_timestamp = -1LLU;
1650#ifdef CONFIG_MAC80211_IBSS_DEBUG
1651 printk(KERN_DEBUG "RX beacon SA=%pM BSSID="
1652 "%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n",
1653 mgmt->sa, mgmt->bssid,
1654 (unsigned long long)rx_timestamp,
1655 (unsigned long long)beacon_timestamp,
1656 (unsigned long long)(rx_timestamp - beacon_timestamp),
1657 jiffies);
1658#endif /* CONFIG_MAC80211_IBSS_DEBUG */
1659 if (beacon_timestamp > rx_timestamp) {
1660#ifdef CONFIG_MAC80211_IBSS_DEBUG
1661 printk(KERN_DEBUG "%s: beacon TSF higher than "
1662 "local TSF - IBSS merge with BSSID %pM\n",
1663 sdata->dev->name, mgmt->bssid);
1664#endif
1665 ieee80211_sta_join_ibss(sdata, &sdata->u.sta, bss);
1666 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates);
1667 }
1668 } 1469 }
1669 1470
1670 ieee80211_rx_bss_put(local, bss); 1471 ieee80211_rx_bss_put(local, bss);
@@ -1676,9 +1477,11 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
1676 size_t len, 1477 size_t len,
1677 struct ieee80211_rx_status *rx_status) 1478 struct ieee80211_rx_status *rx_status)
1678{ 1479{
1480 struct ieee80211_if_managed *ifmgd;
1679 size_t baselen; 1481 size_t baselen;
1680 struct ieee802_11_elems elems; 1482 struct ieee802_11_elems elems;
1681 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 1483
1484 ifmgd = &sdata->u.mgd;
1682 1485
1683 if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN)) 1486 if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN))
1684 return; /* ignore ProbeResp to foreign address */ 1487 return; /* ignore ProbeResp to foreign address */
@@ -1694,25 +1497,27 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
1694 1497
1695 /* direct probe may be part of the association flow */ 1498 /* direct probe may be part of the association flow */
1696 if (test_and_clear_bit(IEEE80211_STA_REQ_DIRECT_PROBE, 1499 if (test_and_clear_bit(IEEE80211_STA_REQ_DIRECT_PROBE,
1697 &ifsta->request)) { 1500 &ifmgd->request)) {
1698 printk(KERN_DEBUG "%s direct probe responded\n", 1501 printk(KERN_DEBUG "%s direct probe responded\n",
1699 sdata->dev->name); 1502 sdata->dev->name);
1700 ieee80211_authenticate(sdata, ifsta); 1503 ieee80211_authenticate(sdata);
1701 } 1504 }
1702}
1703 1505
1506 if (ifmgd->flags & IEEE80211_STA_PROBEREQ_POLL)
1507 ifmgd->flags &= ~IEEE80211_STA_PROBEREQ_POLL;
1508}
1704 1509
1705static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, 1510static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1706 struct ieee80211_mgmt *mgmt, 1511 struct ieee80211_mgmt *mgmt,
1707 size_t len, 1512 size_t len,
1708 struct ieee80211_rx_status *rx_status) 1513 struct ieee80211_rx_status *rx_status)
1709{ 1514{
1710 struct ieee80211_if_sta *ifsta; 1515 struct ieee80211_if_managed *ifmgd;
1711 size_t baselen; 1516 size_t baselen;
1712 struct ieee802_11_elems elems; 1517 struct ieee802_11_elems elems;
1713 struct ieee80211_local *local = sdata->local; 1518 struct ieee80211_local *local = sdata->local;
1714 u32 changed = 0; 1519 u32 changed = 0;
1715 bool erp_valid; 1520 bool erp_valid, directed_tim;
1716 u8 erp_value = 0; 1521 u8 erp_value = 0;
1717 1522
1718 /* Process beacon from the current BSS */ 1523 /* Process beacon from the current BSS */
@@ -1726,15 +1531,43 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1726 1531
1727 if (sdata->vif.type != NL80211_IFTYPE_STATION) 1532 if (sdata->vif.type != NL80211_IFTYPE_STATION)
1728 return; 1533 return;
1729 ifsta = &sdata->u.sta;
1730 1534
1731 if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED) || 1535 ifmgd = &sdata->u.mgd;
1732 memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) 1536
1537 if (!(ifmgd->flags & IEEE80211_STA_ASSOCIATED) ||
1538 memcmp(ifmgd->bssid, mgmt->bssid, ETH_ALEN) != 0)
1539 return;
1540
1541 if (rx_status->freq != local->hw.conf.channel->center_freq)
1733 return; 1542 return;
1734 1543
1735 ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, 1544 ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param,
1736 elems.wmm_param_len); 1545 elems.wmm_param_len);
1737 1546
1547 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) {
1548 directed_tim = ieee80211_check_tim(&elems, ifmgd->aid);
1549
1550 if (directed_tim) {
1551 if (local->hw.conf.dynamic_ps_timeout > 0) {
1552 local->hw.conf.flags &= ~IEEE80211_CONF_PS;
1553 ieee80211_hw_config(local,
1554 IEEE80211_CONF_CHANGE_PS);
1555 ieee80211_send_nullfunc(local, sdata, 0);
1556 } else {
1557 local->pspolling = true;
1558
1559 /*
1560 * Here is assumed that the driver will be
1561 * able to send ps-poll frame and receive a
1562 * response even though power save mode is
1563 * enabled, but some drivers might require
1564 * to disable power save here. This needs
1565 * to be investigated.
1566 */
1567 ieee80211_send_pspoll(local, sdata);
1568 }
1569 }
1570 }
1738 1571
1739 if (elems.erp_info && elems.erp_info_len >= 1) { 1572 if (elems.erp_info && elems.erp_info_len >= 1) {
1740 erp_valid = true; 1573 erp_valid = true;
@@ -1747,14 +1580,15 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1747 erp_valid, erp_value); 1580 erp_valid, erp_value);
1748 1581
1749 1582
1750 if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param) { 1583 if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param &&
1584 !(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED)) {
1751 struct sta_info *sta; 1585 struct sta_info *sta;
1752 struct ieee80211_supported_band *sband; 1586 struct ieee80211_supported_band *sband;
1753 u16 ap_ht_cap_flags; 1587 u16 ap_ht_cap_flags;
1754 1588
1755 rcu_read_lock(); 1589 rcu_read_lock();
1756 1590
1757 sta = sta_info_get(local, ifsta->bssid); 1591 sta = sta_info_get(local, ifmgd->bssid);
1758 if (!sta) { 1592 if (!sta) {
1759 rcu_read_unlock(); 1593 rcu_read_unlock();
1760 return; 1594 return;
@@ -1778,92 +1612,28 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1778 * for the BSSID we are associated to */ 1612 * for the BSSID we are associated to */
1779 regulatory_hint_11d(local->hw.wiphy, 1613 regulatory_hint_11d(local->hw.wiphy,
1780 elems.country_elem, elems.country_elem_len); 1614 elems.country_elem, elems.country_elem_len);
1781 }
1782
1783 ieee80211_bss_info_change_notify(sdata, changed);
1784}
1785
1786
1787static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
1788 struct ieee80211_if_sta *ifsta,
1789 struct ieee80211_mgmt *mgmt,
1790 size_t len,
1791 struct ieee80211_rx_status *rx_status)
1792{
1793 struct ieee80211_local *local = sdata->local;
1794 int tx_last_beacon;
1795 struct sk_buff *skb;
1796 struct ieee80211_mgmt *resp;
1797 u8 *pos, *end;
1798 1615
1799 if (sdata->vif.type != NL80211_IFTYPE_ADHOC || 1616 /* TODO: IBSS also needs this */
1800 ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED || 1617 if (elems.pwr_constr_elem)
1801 len < 24 + 2 || !ifsta->probe_resp) 1618 ieee80211_handle_pwr_constr(sdata,
1802 return; 1619 le16_to_cpu(mgmt->u.probe_resp.capab_info),
1803 1620 elems.pwr_constr_elem,
1804 if (local->ops->tx_last_beacon) 1621 elems.pwr_constr_elem_len);
1805 tx_last_beacon = local->ops->tx_last_beacon(local_to_hw(local));
1806 else
1807 tx_last_beacon = 1;
1808
1809#ifdef CONFIG_MAC80211_IBSS_DEBUG
1810 printk(KERN_DEBUG "%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM"
1811 " (tx_last_beacon=%d)\n",
1812 sdata->dev->name, mgmt->sa, mgmt->da,
1813 mgmt->bssid, tx_last_beacon);
1814#endif /* CONFIG_MAC80211_IBSS_DEBUG */
1815
1816 if (!tx_last_beacon)
1817 return;
1818
1819 if (memcmp(mgmt->bssid, ifsta->bssid, ETH_ALEN) != 0 &&
1820 memcmp(mgmt->bssid, "\xff\xff\xff\xff\xff\xff", ETH_ALEN) != 0)
1821 return;
1822
1823 end = ((u8 *) mgmt) + len;
1824 pos = mgmt->u.probe_req.variable;
1825 if (pos[0] != WLAN_EID_SSID ||
1826 pos + 2 + pos[1] > end) {
1827#ifdef CONFIG_MAC80211_IBSS_DEBUG
1828 printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq "
1829 "from %pM\n",
1830 sdata->dev->name, mgmt->sa);
1831#endif
1832 return;
1833 } 1622 }
1834 if (pos[1] != 0 &&
1835 (pos[1] != ifsta->ssid_len ||
1836 memcmp(pos + 2, ifsta->ssid, ifsta->ssid_len) != 0)) {
1837 /* Ignore ProbeReq for foreign SSID */
1838 return;
1839 }
1840
1841 /* Reply with ProbeResp */
1842 skb = skb_copy(ifsta->probe_resp, GFP_KERNEL);
1843 if (!skb)
1844 return;
1845 1623
1846 resp = (struct ieee80211_mgmt *) skb->data; 1624 ieee80211_bss_info_change_notify(sdata, changed);
1847 memcpy(resp->da, mgmt->sa, ETH_ALEN);
1848#ifdef CONFIG_MAC80211_IBSS_DEBUG
1849 printk(KERN_DEBUG "%s: Sending ProbeResp to %pM\n",
1850 sdata->dev->name, resp->da);
1851#endif /* CONFIG_MAC80211_IBSS_DEBUG */
1852 ieee80211_tx_skb(sdata, skb, 0);
1853} 1625}
1854 1626
1855void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, 1627ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata,
1856 struct ieee80211_rx_status *rx_status) 1628 struct sk_buff *skb,
1629 struct ieee80211_rx_status *rx_status)
1857{ 1630{
1858 struct ieee80211_local *local = sdata->local; 1631 struct ieee80211_local *local = sdata->local;
1859 struct ieee80211_if_sta *ifsta;
1860 struct ieee80211_mgmt *mgmt; 1632 struct ieee80211_mgmt *mgmt;
1861 u16 fc; 1633 u16 fc;
1862 1634
1863 if (skb->len < 24) 1635 if (skb->len < 24)
1864 goto fail; 1636 return RX_DROP_MONITOR;
1865
1866 ifsta = &sdata->u.sta;
1867 1637
1868 mgmt = (struct ieee80211_mgmt *) skb->data; 1638 mgmt = (struct ieee80211_mgmt *) skb->data;
1869 fc = le16_to_cpu(mgmt->frame_control); 1639 fc = le16_to_cpu(mgmt->frame_control);
@@ -1878,113 +1648,68 @@ void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *
1878 case IEEE80211_STYPE_REASSOC_RESP: 1648 case IEEE80211_STYPE_REASSOC_RESP:
1879 case IEEE80211_STYPE_DEAUTH: 1649 case IEEE80211_STYPE_DEAUTH:
1880 case IEEE80211_STYPE_DISASSOC: 1650 case IEEE80211_STYPE_DISASSOC:
1881 skb_queue_tail(&ifsta->skb_queue, skb); 1651 skb_queue_tail(&sdata->u.mgd.skb_queue, skb);
1882 queue_work(local->hw.workqueue, &ifsta->work); 1652 queue_work(local->hw.workqueue, &sdata->u.mgd.work);
1883 return; 1653 return RX_QUEUED;
1884 } 1654 }
1885 1655
1886 fail: 1656 return RX_DROP_MONITOR;
1887 kfree_skb(skb);
1888} 1657}
1889 1658
1890static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 1659static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1891 struct sk_buff *skb) 1660 struct sk_buff *skb)
1892{ 1661{
1893 struct ieee80211_rx_status *rx_status; 1662 struct ieee80211_rx_status *rx_status;
1894 struct ieee80211_if_sta *ifsta;
1895 struct ieee80211_mgmt *mgmt; 1663 struct ieee80211_mgmt *mgmt;
1896 u16 fc; 1664 u16 fc;
1897 1665
1898 ifsta = &sdata->u.sta;
1899
1900 rx_status = (struct ieee80211_rx_status *) skb->cb; 1666 rx_status = (struct ieee80211_rx_status *) skb->cb;
1901 mgmt = (struct ieee80211_mgmt *) skb->data; 1667 mgmt = (struct ieee80211_mgmt *) skb->data;
1902 fc = le16_to_cpu(mgmt->frame_control); 1668 fc = le16_to_cpu(mgmt->frame_control);
1903 1669
1904 switch (fc & IEEE80211_FCTL_STYPE) { 1670 switch (fc & IEEE80211_FCTL_STYPE) {
1905 case IEEE80211_STYPE_PROBE_REQ:
1906 ieee80211_rx_mgmt_probe_req(sdata, ifsta, mgmt, skb->len,
1907 rx_status);
1908 break;
1909 case IEEE80211_STYPE_PROBE_RESP: 1671 case IEEE80211_STYPE_PROBE_RESP:
1910 ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, rx_status); 1672 ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len,
1673 rx_status);
1911 break; 1674 break;
1912 case IEEE80211_STYPE_BEACON: 1675 case IEEE80211_STYPE_BEACON:
1913 ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status); 1676 ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len,
1677 rx_status);
1914 break; 1678 break;
1915 case IEEE80211_STYPE_AUTH: 1679 case IEEE80211_STYPE_AUTH:
1916 ieee80211_rx_mgmt_auth(sdata, ifsta, mgmt, skb->len); 1680 ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len);
1917 break; 1681 break;
1918 case IEEE80211_STYPE_ASSOC_RESP: 1682 case IEEE80211_STYPE_ASSOC_RESP:
1919 ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 0); 1683 ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, 0);
1920 break; 1684 break;
1921 case IEEE80211_STYPE_REASSOC_RESP: 1685 case IEEE80211_STYPE_REASSOC_RESP:
1922 ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 1); 1686 ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, 1);
1923 break; 1687 break;
1924 case IEEE80211_STYPE_DEAUTH: 1688 case IEEE80211_STYPE_DEAUTH:
1925 ieee80211_rx_mgmt_deauth(sdata, ifsta, mgmt, skb->len); 1689 ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len);
1926 break; 1690 break;
1927 case IEEE80211_STYPE_DISASSOC: 1691 case IEEE80211_STYPE_DISASSOC:
1928 ieee80211_rx_mgmt_disassoc(sdata, ifsta, mgmt, skb->len); 1692 ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len);
1929 break; 1693 break;
1930 } 1694 }
1931 1695
1932 kfree_skb(skb); 1696 kfree_skb(skb);
1933} 1697}
1934 1698
1935
1936static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata)
1937{
1938 struct ieee80211_local *local = sdata->local;
1939 int active = 0;
1940 struct sta_info *sta;
1941
1942 rcu_read_lock();
1943
1944 list_for_each_entry_rcu(sta, &local->sta_list, list) {
1945 if (sta->sdata == sdata &&
1946 time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL,
1947 jiffies)) {
1948 active++;
1949 break;
1950 }
1951 }
1952
1953 rcu_read_unlock();
1954
1955 return active;
1956}
1957
1958
1959static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata,
1960 struct ieee80211_if_sta *ifsta)
1961{
1962 mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL);
1963
1964 ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT);
1965 if (ieee80211_sta_active_ibss(sdata))
1966 return;
1967
1968 printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other "
1969 "IBSS networks with same SSID (merge)\n", sdata->dev->name);
1970 ieee80211_request_scan(sdata, ifsta->ssid, ifsta->ssid_len);
1971}
1972
1973
1974static void ieee80211_sta_timer(unsigned long data) 1699static void ieee80211_sta_timer(unsigned long data)
1975{ 1700{
1976 struct ieee80211_sub_if_data *sdata = 1701 struct ieee80211_sub_if_data *sdata =
1977 (struct ieee80211_sub_if_data *) data; 1702 (struct ieee80211_sub_if_data *) data;
1978 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 1703 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1979 struct ieee80211_local *local = sdata->local; 1704 struct ieee80211_local *local = sdata->local;
1980 1705
1981 set_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); 1706 set_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request);
1982 queue_work(local->hw.workqueue, &ifsta->work); 1707 queue_work(local->hw.workqueue, &ifmgd->work);
1983} 1708}
1984 1709
1985static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata, 1710static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata)
1986 struct ieee80211_if_sta *ifsta)
1987{ 1711{
1712 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1988 struct ieee80211_local *local = sdata->local; 1713 struct ieee80211_local *local = sdata->local;
1989 1714
1990 if (local->ops->reset_tsf) { 1715 if (local->ops->reset_tsf) {
@@ -1992,298 +1717,112 @@ static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata,
1992 local->ops->reset_tsf(local_to_hw(local)); 1717 local->ops->reset_tsf(local_to_hw(local));
1993 } 1718 }
1994 1719
1995 ifsta->wmm_last_param_set = -1; /* allow any WMM update */ 1720 ifmgd->wmm_last_param_set = -1; /* allow any WMM update */
1996 1721
1997 1722
1998 if (ifsta->auth_algs & IEEE80211_AUTH_ALG_OPEN) 1723 if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_OPEN)
1999 ifsta->auth_alg = WLAN_AUTH_OPEN; 1724 ifmgd->auth_alg = WLAN_AUTH_OPEN;
2000 else if (ifsta->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY) 1725 else if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY)
2001 ifsta->auth_alg = WLAN_AUTH_SHARED_KEY; 1726 ifmgd->auth_alg = WLAN_AUTH_SHARED_KEY;
2002 else if (ifsta->auth_algs & IEEE80211_AUTH_ALG_LEAP) 1727 else if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_LEAP)
2003 ifsta->auth_alg = WLAN_AUTH_LEAP; 1728 ifmgd->auth_alg = WLAN_AUTH_LEAP;
1729 else if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_FT)
1730 ifmgd->auth_alg = WLAN_AUTH_FT;
2004 else 1731 else
2005 ifsta->auth_alg = WLAN_AUTH_OPEN; 1732 ifmgd->auth_alg = WLAN_AUTH_OPEN;
2006 ifsta->auth_transaction = -1; 1733 ifmgd->auth_transaction = -1;
2007 ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; 1734 ifmgd->flags &= ~IEEE80211_STA_ASSOCIATED;
2008 ifsta->assoc_scan_tries = 0; 1735 ifmgd->assoc_scan_tries = 0;
2009 ifsta->direct_probe_tries = 0; 1736 ifmgd->direct_probe_tries = 0;
2010 ifsta->auth_tries = 0; 1737 ifmgd->auth_tries = 0;
2011 ifsta->assoc_tries = 0; 1738 ifmgd->assoc_tries = 0;
2012 netif_tx_stop_all_queues(sdata->dev); 1739 netif_tx_stop_all_queues(sdata->dev);
2013 netif_carrier_off(sdata->dev); 1740 netif_carrier_off(sdata->dev);
2014} 1741}
2015 1742
2016 1743static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata)
2017static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta,
2018 const char *ssid, int ssid_len)
2019{
2020 int tmp, hidden_ssid;
2021
2022 if (ssid_len == ifsta->ssid_len &&
2023 !memcmp(ifsta->ssid, ssid, ssid_len))
2024 return 1;
2025
2026 if (ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL)
2027 return 0;
2028
2029 hidden_ssid = 1;
2030 tmp = ssid_len;
2031 while (tmp--) {
2032 if (ssid[tmp] != '\0') {
2033 hidden_ssid = 0;
2034 break;
2035 }
2036 }
2037
2038 if (hidden_ssid && (ifsta->ssid_len == ssid_len || ssid_len == 0))
2039 return 1;
2040
2041 if (ssid_len == 1 && ssid[0] == ' ')
2042 return 1;
2043
2044 return 0;
2045}
2046
2047static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata,
2048 struct ieee80211_if_sta *ifsta)
2049{ 1744{
1745 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2050 struct ieee80211_local *local = sdata->local; 1746 struct ieee80211_local *local = sdata->local;
2051 struct ieee80211_bss *bss; 1747 struct ieee80211_bss *bss;
2052 struct ieee80211_supported_band *sband; 1748 u8 *bssid = ifmgd->bssid, *ssid = ifmgd->ssid;
2053 u8 bssid[ETH_ALEN], *pos; 1749 u8 ssid_len = ifmgd->ssid_len;
2054 int i; 1750 u16 capa_mask = WLAN_CAPABILITY_ESS;
2055 int ret; 1751 u16 capa_val = WLAN_CAPABILITY_ESS;
2056 1752 struct ieee80211_channel *chan = local->oper_channel;
2057#if 0
2058 /* Easier testing, use fixed BSSID. */
2059 memset(bssid, 0xfe, ETH_ALEN);
2060#else
2061 /* Generate random, not broadcast, locally administered BSSID. Mix in
2062 * own MAC address to make sure that devices that do not have proper
2063 * random number generator get different BSSID. */
2064 get_random_bytes(bssid, ETH_ALEN);
2065 for (i = 0; i < ETH_ALEN; i++)
2066 bssid[i] ^= sdata->dev->dev_addr[i];
2067 bssid[0] &= ~0x01;
2068 bssid[0] |= 0x02;
2069#endif
2070
2071 printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n",
2072 sdata->dev->name, bssid);
2073
2074 bss = ieee80211_rx_bss_add(local, bssid,
2075 local->hw.conf.channel->center_freq,
2076 sdata->u.sta.ssid, sdata->u.sta.ssid_len);
2077 if (!bss)
2078 return -ENOMEM;
2079
2080 bss->band = local->hw.conf.channel->band;
2081 sband = local->hw.wiphy->bands[bss->band];
2082
2083 if (local->hw.conf.beacon_int == 0)
2084 local->hw.conf.beacon_int = 100;
2085 bss->beacon_int = local->hw.conf.beacon_int;
2086 bss->last_update = jiffies;
2087 bss->capability = WLAN_CAPABILITY_IBSS;
2088 1753
2089 if (sdata->default_key) 1754 if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) &&
2090 bss->capability |= WLAN_CAPABILITY_PRIVACY; 1755 ifmgd->flags & (IEEE80211_STA_AUTO_SSID_SEL |
2091 else 1756 IEEE80211_STA_AUTO_BSSID_SEL |
2092 sdata->drop_unencrypted = 0; 1757 IEEE80211_STA_AUTO_CHANNEL_SEL)) {
2093 1758 capa_mask |= WLAN_CAPABILITY_PRIVACY;
2094 bss->supp_rates_len = sband->n_bitrates; 1759 if (sdata->default_key)
2095 pos = bss->supp_rates; 1760 capa_val |= WLAN_CAPABILITY_PRIVACY;
2096 for (i = 0; i < sband->n_bitrates; i++) {
2097 int rate = sband->bitrates[i].bitrate;
2098 *pos++ = (u8) (rate / 5);
2099 } 1761 }
2100 1762
2101 ret = ieee80211_sta_join_ibss(sdata, ifsta, bss); 1763 if (ifmgd->flags & IEEE80211_STA_AUTO_CHANNEL_SEL)
2102 ieee80211_rx_bss_put(local, bss); 1764 chan = NULL;
2103 return ret;
2104}
2105 1765
1766 if (ifmgd->flags & IEEE80211_STA_AUTO_BSSID_SEL)
1767 bssid = NULL;
2106 1768
2107static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata, 1769 if (ifmgd->flags & IEEE80211_STA_AUTO_SSID_SEL) {
2108 struct ieee80211_if_sta *ifsta) 1770 ssid = NULL;
2109{ 1771 ssid_len = 0;
2110 struct ieee80211_local *local = sdata->local;
2111 struct ieee80211_bss *bss;
2112 int found = 0;
2113 u8 bssid[ETH_ALEN];
2114 int active_ibss;
2115
2116 if (ifsta->ssid_len == 0)
2117 return -EINVAL;
2118
2119 active_ibss = ieee80211_sta_active_ibss(sdata);
2120#ifdef CONFIG_MAC80211_IBSS_DEBUG
2121 printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n",
2122 sdata->dev->name, active_ibss);
2123#endif /* CONFIG_MAC80211_IBSS_DEBUG */
2124 spin_lock_bh(&local->bss_lock);
2125 list_for_each_entry(bss, &local->bss_list, list) {
2126 if (ifsta->ssid_len != bss->ssid_len ||
2127 memcmp(ifsta->ssid, bss->ssid, bss->ssid_len) != 0
2128 || !(bss->capability & WLAN_CAPABILITY_IBSS))
2129 continue;
2130#ifdef CONFIG_MAC80211_IBSS_DEBUG
2131 printk(KERN_DEBUG " bssid=%pM found\n", bss->bssid);
2132#endif /* CONFIG_MAC80211_IBSS_DEBUG */
2133 memcpy(bssid, bss->bssid, ETH_ALEN);
2134 found = 1;
2135 if (active_ibss || memcmp(bssid, ifsta->bssid, ETH_ALEN) != 0)
2136 break;
2137 } 1772 }
2138 spin_unlock_bh(&local->bss_lock);
2139 1773
2140#ifdef CONFIG_MAC80211_IBSS_DEBUG 1774 bss = (void *)cfg80211_get_bss(local->hw.wiphy, chan,
2141 if (found) 1775 bssid, ssid, ssid_len,
2142 printk(KERN_DEBUG " sta_find_ibss: selected %pM current " 1776 capa_mask, capa_val);
2143 "%pM\n", bssid, ifsta->bssid);
2144#endif /* CONFIG_MAC80211_IBSS_DEBUG */
2145 1777
2146 if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { 1778 if (bss) {
2147 int ret; 1779 ieee80211_set_freq(sdata, bss->cbss.channel->center_freq);
2148 int search_freq; 1780 if (!(ifmgd->flags & IEEE80211_STA_SSID_SET))
2149 1781 ieee80211_sta_set_ssid(sdata, bss->ssid,
2150 if (ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) 1782 bss->ssid_len);
2151 search_freq = bss->freq; 1783 ieee80211_sta_set_bssid(sdata, bss->cbss.bssid);
1784 ieee80211_sta_def_wmm_params(sdata, bss->supp_rates_len,
1785 bss->supp_rates);
1786 if (sdata->u.mgd.mfp == IEEE80211_MFP_REQUIRED)
1787 sdata->u.mgd.flags |= IEEE80211_STA_MFP_ENABLED;
2152 else 1788 else
2153 search_freq = local->hw.conf.channel->center_freq; 1789 sdata->u.mgd.flags &= ~IEEE80211_STA_MFP_ENABLED;
2154
2155 bss = ieee80211_rx_bss_get(local, bssid, search_freq,
2156 ifsta->ssid, ifsta->ssid_len);
2157 if (!bss)
2158 goto dont_join;
2159
2160 printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM"
2161 " based on configured SSID\n",
2162 sdata->dev->name, bssid);
2163 ret = ieee80211_sta_join_ibss(sdata, ifsta, bss);
2164 ieee80211_rx_bss_put(local, bss);
2165 return ret;
2166 }
2167
2168dont_join:
2169#ifdef CONFIG_MAC80211_IBSS_DEBUG
2170 printk(KERN_DEBUG " did not try to join ibss\n");
2171#endif /* CONFIG_MAC80211_IBSS_DEBUG */
2172
2173 /* Selected IBSS not found in current scan results - try to scan */
2174 if (ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED &&
2175 !ieee80211_sta_active_ibss(sdata)) {
2176 mod_timer(&ifsta->timer, jiffies +
2177 IEEE80211_IBSS_MERGE_INTERVAL);
2178 } else if (time_after(jiffies, local->last_scan_completed +
2179 IEEE80211_SCAN_INTERVAL)) {
2180 printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to "
2181 "join\n", sdata->dev->name);
2182 return ieee80211_request_scan(sdata, ifsta->ssid,
2183 ifsta->ssid_len);
2184 } else if (ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED) {
2185 int interval = IEEE80211_SCAN_INTERVAL;
2186
2187 if (time_after(jiffies, ifsta->ibss_join_req +
2188 IEEE80211_IBSS_JOIN_TIMEOUT)) {
2189 if ((ifsta->flags & IEEE80211_STA_CREATE_IBSS) &&
2190 (!(local->oper_channel->flags &
2191 IEEE80211_CHAN_NO_IBSS)))
2192 return ieee80211_sta_create_ibss(sdata, ifsta);
2193 if (ifsta->flags & IEEE80211_STA_CREATE_IBSS) {
2194 printk(KERN_DEBUG "%s: IBSS not allowed on"
2195 " %d MHz\n", sdata->dev->name,
2196 local->hw.conf.channel->center_freq);
2197 }
2198
2199 /* No IBSS found - decrease scan interval and continue
2200 * scanning. */
2201 interval = IEEE80211_SCAN_INTERVAL_SLOW;
2202 }
2203
2204 ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH;
2205 mod_timer(&ifsta->timer, jiffies + interval);
2206 return 0;
2207 }
2208
2209 return 0;
2210}
2211
2212
2213static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
2214 struct ieee80211_if_sta *ifsta)
2215{
2216 struct ieee80211_local *local = sdata->local;
2217 struct ieee80211_bss *bss, *selected = NULL;
2218 int top_rssi = 0, freq;
2219
2220 spin_lock_bh(&local->bss_lock);
2221 freq = local->oper_channel->center_freq;
2222 list_for_each_entry(bss, &local->bss_list, list) {
2223 if (!(bss->capability & WLAN_CAPABILITY_ESS))
2224 continue;
2225
2226 if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL |
2227 IEEE80211_STA_AUTO_BSSID_SEL |
2228 IEEE80211_STA_AUTO_CHANNEL_SEL)) &&
2229 (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^
2230 !!sdata->default_key))
2231 continue;
2232
2233 if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) &&
2234 bss->freq != freq)
2235 continue;
2236
2237 if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) &&
2238 memcmp(bss->bssid, ifsta->bssid, ETH_ALEN))
2239 continue;
2240
2241 if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) &&
2242 !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len))
2243 continue;
2244
2245 if (!selected || top_rssi < bss->signal) {
2246 selected = bss;
2247 top_rssi = bss->signal;
2248 }
2249 }
2250 if (selected)
2251 atomic_inc(&selected->users);
2252 spin_unlock_bh(&local->bss_lock);
2253
2254 if (selected) {
2255 ieee80211_set_freq(sdata, selected->freq);
2256 if (!(ifsta->flags & IEEE80211_STA_SSID_SET))
2257 ieee80211_sta_set_ssid(sdata, selected->ssid,
2258 selected->ssid_len);
2259 ieee80211_sta_set_bssid(sdata, selected->bssid);
2260 ieee80211_sta_def_wmm_params(sdata, selected);
2261 1790
2262 /* Send out direct probe if no probe resp was received or 1791 /* Send out direct probe if no probe resp was received or
2263 * the one we have is outdated 1792 * the one we have is outdated
2264 */ 1793 */
2265 if (!selected->last_probe_resp || 1794 if (!bss->last_probe_resp ||
2266 time_after(jiffies, selected->last_probe_resp 1795 time_after(jiffies, bss->last_probe_resp
2267 + IEEE80211_SCAN_RESULT_EXPIRE)) 1796 + IEEE80211_SCAN_RESULT_EXPIRE))
2268 ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; 1797 ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE;
2269 else 1798 else
2270 ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; 1799 ifmgd->state = IEEE80211_STA_MLME_AUTHENTICATE;
2271 1800
2272 ieee80211_rx_bss_put(local, selected); 1801 ieee80211_rx_bss_put(local, bss);
2273 ieee80211_sta_reset_auth(sdata, ifsta); 1802 ieee80211_sta_reset_auth(sdata);
2274 return 0; 1803 return 0;
2275 } else { 1804 } else {
2276 if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) { 1805 if (ifmgd->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) {
2277 ifsta->assoc_scan_tries++; 1806 ifmgd->assoc_scan_tries++;
2278 if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) 1807 /* XXX maybe racy? */
2279 ieee80211_start_scan(sdata, NULL, 0); 1808 if (local->scan_req)
1809 return -1;
1810 memcpy(local->int_scan_req.ssids[0].ssid,
1811 ifmgd->ssid, IEEE80211_MAX_SSID_LEN);
1812 if (ifmgd->flags & IEEE80211_STA_AUTO_SSID_SEL)
1813 local->int_scan_req.ssids[0].ssid_len = 0;
2280 else 1814 else
2281 ieee80211_start_scan(sdata, ifsta->ssid, 1815 local->int_scan_req.ssids[0].ssid_len = ifmgd->ssid_len;
2282 ifsta->ssid_len); 1816
2283 ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; 1817 if (ieee80211_start_scan(sdata, &local->int_scan_req))
2284 set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); 1818 ieee80211_scan_failed(local);
2285 } else 1819
2286 ifsta->state = IEEE80211_STA_MLME_DISABLED; 1820 ifmgd->state = IEEE80211_STA_MLME_AUTHENTICATE;
1821 set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request);
1822 } else {
1823 ifmgd->assoc_scan_tries = 0;
1824 ifmgd->state = IEEE80211_STA_MLME_DISABLED;
1825 }
2287 } 1826 }
2288 return -1; 1827 return -1;
2289} 1828}
@@ -2292,9 +1831,9 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata,
2292static void ieee80211_sta_work(struct work_struct *work) 1831static void ieee80211_sta_work(struct work_struct *work)
2293{ 1832{
2294 struct ieee80211_sub_if_data *sdata = 1833 struct ieee80211_sub_if_data *sdata =
2295 container_of(work, struct ieee80211_sub_if_data, u.sta.work); 1834 container_of(work, struct ieee80211_sub_if_data, u.mgd.work);
2296 struct ieee80211_local *local = sdata->local; 1835 struct ieee80211_local *local = sdata->local;
2297 struct ieee80211_if_sta *ifsta; 1836 struct ieee80211_if_managed *ifmgd;
2298 struct sk_buff *skb; 1837 struct sk_buff *skb;
2299 1838
2300 if (!netif_running(sdata->dev)) 1839 if (!netif_running(sdata->dev))
@@ -2303,61 +1842,60 @@ static void ieee80211_sta_work(struct work_struct *work)
2303 if (local->sw_scanning || local->hw_scanning) 1842 if (local->sw_scanning || local->hw_scanning)
2304 return; 1843 return;
2305 1844
2306 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION && 1845 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
2307 sdata->vif.type != NL80211_IFTYPE_ADHOC))
2308 return; 1846 return;
2309 ifsta = &sdata->u.sta; 1847 ifmgd = &sdata->u.mgd;
2310 1848
2311 while ((skb = skb_dequeue(&ifsta->skb_queue))) 1849 while ((skb = skb_dequeue(&ifmgd->skb_queue)))
2312 ieee80211_sta_rx_queued_mgmt(sdata, skb); 1850 ieee80211_sta_rx_queued_mgmt(sdata, skb);
2313 1851
2314 if (ifsta->state != IEEE80211_STA_MLME_DIRECT_PROBE && 1852 if (ifmgd->state != IEEE80211_STA_MLME_DIRECT_PROBE &&
2315 ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && 1853 ifmgd->state != IEEE80211_STA_MLME_AUTHENTICATE &&
2316 ifsta->state != IEEE80211_STA_MLME_ASSOCIATE && 1854 ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE &&
2317 test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) { 1855 test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request)) {
2318 ieee80211_start_scan(sdata, ifsta->scan_ssid, 1856 /*
2319 ifsta->scan_ssid_len); 1857 * The call to ieee80211_start_scan can fail but ieee80211_request_scan
1858 * (which queued ieee80211_sta_work) did not return an error. Thus, call
1859 * ieee80211_scan_failed here if ieee80211_start_scan fails in order to
1860 * notify the scan requester.
1861 */
1862 if (ieee80211_start_scan(sdata, local->scan_req))
1863 ieee80211_scan_failed(local);
2320 return; 1864 return;
2321 } 1865 }
2322 1866
2323 if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) { 1867 if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request)) {
2324 if (ieee80211_sta_config_auth(sdata, ifsta)) 1868 if (ieee80211_sta_config_auth(sdata))
2325 return; 1869 return;
2326 clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); 1870 clear_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request);
2327 } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request)) 1871 } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request))
2328 return; 1872 return;
2329 1873
2330 switch (ifsta->state) { 1874 switch (ifmgd->state) {
2331 case IEEE80211_STA_MLME_DISABLED: 1875 case IEEE80211_STA_MLME_DISABLED:
2332 break; 1876 break;
2333 case IEEE80211_STA_MLME_DIRECT_PROBE: 1877 case IEEE80211_STA_MLME_DIRECT_PROBE:
2334 ieee80211_direct_probe(sdata, ifsta); 1878 ieee80211_direct_probe(sdata);
2335 break; 1879 break;
2336 case IEEE80211_STA_MLME_AUTHENTICATE: 1880 case IEEE80211_STA_MLME_AUTHENTICATE:
2337 ieee80211_authenticate(sdata, ifsta); 1881 ieee80211_authenticate(sdata);
2338 break; 1882 break;
2339 case IEEE80211_STA_MLME_ASSOCIATE: 1883 case IEEE80211_STA_MLME_ASSOCIATE:
2340 ieee80211_associate(sdata, ifsta); 1884 ieee80211_associate(sdata);
2341 break; 1885 break;
2342 case IEEE80211_STA_MLME_ASSOCIATED: 1886 case IEEE80211_STA_MLME_ASSOCIATED:
2343 ieee80211_associated(sdata, ifsta); 1887 ieee80211_associated(sdata);
2344 break;
2345 case IEEE80211_STA_MLME_IBSS_SEARCH:
2346 ieee80211_sta_find_ibss(sdata, ifsta);
2347 break;
2348 case IEEE80211_STA_MLME_IBSS_JOINED:
2349 ieee80211_sta_merge_ibss(sdata, ifsta);
2350 break; 1888 break;
2351 default: 1889 default:
2352 WARN_ON(1); 1890 WARN_ON(1);
2353 break; 1891 break;
2354 } 1892 }
2355 1893
2356 if (ieee80211_privacy_mismatch(sdata, ifsta)) { 1894 if (ieee80211_privacy_mismatch(sdata)) {
2357 printk(KERN_DEBUG "%s: privacy configuration mismatch and " 1895 printk(KERN_DEBUG "%s: privacy configuration mismatch and "
2358 "mixed-cell disabled - disassociate\n", sdata->dev->name); 1896 "mixed-cell disabled - disassociate\n", sdata->dev->name);
2359 1897
2360 ieee80211_set_disassoc(sdata, ifsta, false, true, 1898 ieee80211_set_disassoc(sdata, false, true,
2361 WLAN_REASON_UNSPECIFIED); 1899 WLAN_REASON_UNSPECIFIED);
2362 } 1900 }
2363} 1901}
@@ -2366,208 +1904,161 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
2366{ 1904{
2367 if (sdata->vif.type == NL80211_IFTYPE_STATION) 1905 if (sdata->vif.type == NL80211_IFTYPE_STATION)
2368 queue_work(sdata->local->hw.workqueue, 1906 queue_work(sdata->local->hw.workqueue,
2369 &sdata->u.sta.work); 1907 &sdata->u.mgd.work);
2370} 1908}
2371 1909
2372/* interface setup */ 1910/* interface setup */
2373void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) 1911void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
2374{ 1912{
2375 struct ieee80211_if_sta *ifsta; 1913 struct ieee80211_if_managed *ifmgd;
2376 1914
2377 ifsta = &sdata->u.sta; 1915 ifmgd = &sdata->u.mgd;
2378 INIT_WORK(&ifsta->work, ieee80211_sta_work); 1916 INIT_WORK(&ifmgd->work, ieee80211_sta_work);
2379 setup_timer(&ifsta->timer, ieee80211_sta_timer, 1917 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
1918 INIT_WORK(&ifmgd->beacon_loss_work, ieee80211_beacon_loss_work);
1919 setup_timer(&ifmgd->timer, ieee80211_sta_timer,
2380 (unsigned long) sdata); 1920 (unsigned long) sdata);
2381 skb_queue_head_init(&ifsta->skb_queue); 1921 setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer,
1922 (unsigned long) sdata);
1923 skb_queue_head_init(&ifmgd->skb_queue);
2382 1924
2383 ifsta->capab = WLAN_CAPABILITY_ESS; 1925 ifmgd->capab = WLAN_CAPABILITY_ESS;
2384 ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN | 1926 ifmgd->auth_algs = IEEE80211_AUTH_ALG_OPEN |
2385 IEEE80211_AUTH_ALG_SHARED_KEY; 1927 IEEE80211_AUTH_ALG_SHARED_KEY;
2386 ifsta->flags |= IEEE80211_STA_CREATE_IBSS | 1928 ifmgd->flags |= IEEE80211_STA_CREATE_IBSS |
2387 IEEE80211_STA_AUTO_BSSID_SEL | 1929 IEEE80211_STA_AUTO_BSSID_SEL |
2388 IEEE80211_STA_AUTO_CHANNEL_SEL; 1930 IEEE80211_STA_AUTO_CHANNEL_SEL;
2389 if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4) 1931 if (sdata->local->hw.queues >= 4)
2390 ifsta->flags |= IEEE80211_STA_WMM_ENABLED; 1932 ifmgd->flags |= IEEE80211_STA_WMM_ENABLED;
2391}
2392
2393/*
2394 * Add a new IBSS station, will also be called by the RX code when,
2395 * in IBSS mode, receiving a frame from a yet-unknown station, hence
2396 * must be callable in atomic context.
2397 */
2398struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
2399 u8 *bssid,u8 *addr, u64 supp_rates)
2400{
2401 struct ieee80211_local *local = sdata->local;
2402 struct sta_info *sta;
2403 int band = local->hw.conf.channel->band;
2404
2405 /* TODO: Could consider removing the least recently used entry and
2406 * allow new one to be added. */
2407 if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) {
2408 if (net_ratelimit()) {
2409 printk(KERN_DEBUG "%s: No room for a new IBSS STA "
2410 "entry %pM\n", sdata->dev->name, addr);
2411 }
2412 return NULL;
2413 }
2414
2415 if (compare_ether_addr(bssid, sdata->u.sta.bssid))
2416 return NULL;
2417
2418#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
2419 printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n",
2420 wiphy_name(local->hw.wiphy), addr, sdata->dev->name);
2421#endif
2422
2423 sta = sta_info_alloc(sdata, addr, GFP_ATOMIC);
2424 if (!sta)
2425 return NULL;
2426
2427 set_sta_flags(sta, WLAN_STA_AUTHORIZED);
2428
2429 /* make sure mandatory rates are always added */
2430 sta->sta.supp_rates[band] = supp_rates |
2431 ieee80211_mandatory_rates(local, band);
2432
2433 rate_control_rate_init(sta);
2434
2435 if (sta_info_insert(sta))
2436 return NULL;
2437
2438 return sta;
2439} 1933}
2440 1934
2441/* configuration hooks */ 1935/* configuration hooks */
2442void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, 1936void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata)
2443 struct ieee80211_if_sta *ifsta)
2444{ 1937{
1938 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2445 struct ieee80211_local *local = sdata->local; 1939 struct ieee80211_local *local = sdata->local;
2446 1940
2447 if (sdata->vif.type != NL80211_IFTYPE_STATION) 1941 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
2448 return; 1942 return;
2449 1943
2450 if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | 1944 if ((ifmgd->flags & (IEEE80211_STA_BSSID_SET |
2451 IEEE80211_STA_AUTO_BSSID_SEL)) && 1945 IEEE80211_STA_AUTO_BSSID_SEL)) &&
2452 (ifsta->flags & (IEEE80211_STA_SSID_SET | 1946 (ifmgd->flags & (IEEE80211_STA_SSID_SET |
2453 IEEE80211_STA_AUTO_SSID_SEL))) { 1947 IEEE80211_STA_AUTO_SSID_SEL))) {
2454 1948
2455 if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) 1949 if (ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED)
2456 ieee80211_set_disassoc(sdata, ifsta, true, true, 1950 ieee80211_set_disassoc(sdata, true, true,
2457 WLAN_REASON_DEAUTH_LEAVING); 1951 WLAN_REASON_DEAUTH_LEAVING);
2458 1952
2459 set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); 1953 if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) ||
2460 queue_work(local->hw.workqueue, &ifsta->work); 1954 ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE)
1955 set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request);
1956 else if (ifmgd->flags & IEEE80211_STA_EXT_SME)
1957 set_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request);
1958 queue_work(local->hw.workqueue, &ifmgd->work);
2461 } 1959 }
2462} 1960}
2463 1961
1962int ieee80211_sta_commit(struct ieee80211_sub_if_data *sdata)
1963{
1964 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1965
1966 if (ifmgd->ssid_len)
1967 ifmgd->flags |= IEEE80211_STA_SSID_SET;
1968 else
1969 ifmgd->flags &= ~IEEE80211_STA_SSID_SET;
1970
1971 return 0;
1972}
1973
2464int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) 1974int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len)
2465{ 1975{
2466 struct ieee80211_if_sta *ifsta; 1976 struct ieee80211_if_managed *ifmgd;
2467 1977
2468 if (len > IEEE80211_MAX_SSID_LEN) 1978 if (len > IEEE80211_MAX_SSID_LEN)
2469 return -EINVAL; 1979 return -EINVAL;
2470 1980
2471 ifsta = &sdata->u.sta; 1981 ifmgd = &sdata->u.mgd;
2472
2473 if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) {
2474 memset(ifsta->ssid, 0, sizeof(ifsta->ssid));
2475 memcpy(ifsta->ssid, ssid, len);
2476 ifsta->ssid_len = len;
2477 ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET;
2478 }
2479
2480 if (len)
2481 ifsta->flags |= IEEE80211_STA_SSID_SET;
2482 else
2483 ifsta->flags &= ~IEEE80211_STA_SSID_SET;
2484 1982
2485 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && 1983 if (ifmgd->ssid_len != len || memcmp(ifmgd->ssid, ssid, len) != 0) {
2486 !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { 1984 /*
2487 ifsta->ibss_join_req = jiffies; 1985 * Do not use reassociation if SSID is changed (different ESS).
2488 ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH; 1986 */
2489 return ieee80211_sta_find_ibss(sdata, ifsta); 1987 ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET;
1988 memset(ifmgd->ssid, 0, sizeof(ifmgd->ssid));
1989 memcpy(ifmgd->ssid, ssid, len);
1990 ifmgd->ssid_len = len;
2490 } 1991 }
2491 1992
2492 return 0; 1993 return ieee80211_sta_commit(sdata);
2493} 1994}
2494 1995
2495int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len) 1996int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len)
2496{ 1997{
2497 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 1998 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2498 memcpy(ssid, ifsta->ssid, ifsta->ssid_len); 1999 memcpy(ssid, ifmgd->ssid, ifmgd->ssid_len);
2499 *len = ifsta->ssid_len; 2000 *len = ifmgd->ssid_len;
2500 return 0; 2001 return 0;
2501} 2002}
2502 2003
2503int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) 2004int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid)
2504{ 2005{
2505 struct ieee80211_if_sta *ifsta; 2006 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2506 int res;
2507 2007
2508 ifsta = &sdata->u.sta; 2008 if (is_valid_ether_addr(bssid)) {
2009 memcpy(ifmgd->bssid, bssid, ETH_ALEN);
2010 ifmgd->flags |= IEEE80211_STA_BSSID_SET;
2011 } else {
2012 memset(ifmgd->bssid, 0, ETH_ALEN);
2013 ifmgd->flags &= ~IEEE80211_STA_BSSID_SET;
2014 }
2509 2015
2510 if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { 2016 if (netif_running(sdata->dev)) {
2511 memcpy(ifsta->bssid, bssid, ETH_ALEN); 2017 if (ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID)) {
2512 res = 0;
2513 /*
2514 * Hack! See also ieee80211_sta_set_ssid.
2515 */
2516 if (netif_running(sdata->dev))
2517 res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID);
2518 if (res) {
2519 printk(KERN_DEBUG "%s: Failed to config new BSSID to " 2018 printk(KERN_DEBUG "%s: Failed to config new BSSID to "
2520 "the low-level driver\n", sdata->dev->name); 2019 "the low-level driver\n", sdata->dev->name);
2521 return res;
2522 } 2020 }
2523 } 2021 }
2524 2022
2525 if (is_valid_ether_addr(bssid)) 2023 return ieee80211_sta_commit(sdata);
2526 ifsta->flags |= IEEE80211_STA_BSSID_SET;
2527 else
2528 ifsta->flags &= ~IEEE80211_STA_BSSID_SET;
2529
2530 return 0;
2531} 2024}
2532 2025
2533int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len) 2026int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata,
2027 const char *ie, size_t len)
2534{ 2028{
2535 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 2029 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2536 2030
2537 kfree(ifsta->extra_ie); 2031 kfree(ifmgd->extra_ie);
2538 if (len == 0) { 2032 if (len == 0) {
2539 ifsta->extra_ie = NULL; 2033 ifmgd->extra_ie = NULL;
2540 ifsta->extra_ie_len = 0; 2034 ifmgd->extra_ie_len = 0;
2541 return 0; 2035 return 0;
2542 } 2036 }
2543 ifsta->extra_ie = kmalloc(len, GFP_KERNEL); 2037 ifmgd->extra_ie = kmalloc(len, GFP_KERNEL);
2544 if (!ifsta->extra_ie) { 2038 if (!ifmgd->extra_ie) {
2545 ifsta->extra_ie_len = 0; 2039 ifmgd->extra_ie_len = 0;
2546 return -ENOMEM; 2040 return -ENOMEM;
2547 } 2041 }
2548 memcpy(ifsta->extra_ie, ie, len); 2042 memcpy(ifmgd->extra_ie, ie, len);
2549 ifsta->extra_ie_len = len; 2043 ifmgd->extra_ie_len = len;
2550 return 0; 2044 return 0;
2551} 2045}
2552 2046
2553int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason) 2047int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason)
2554{ 2048{
2555 struct ieee80211_if_sta *ifsta = &sdata->u.sta;
2556
2557 printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n", 2049 printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n",
2558 sdata->dev->name, reason); 2050 sdata->dev->name, reason);
2559 2051
2560 if (sdata->vif.type != NL80211_IFTYPE_STATION && 2052 if (sdata->vif.type != NL80211_IFTYPE_STATION)
2561 sdata->vif.type != NL80211_IFTYPE_ADHOC)
2562 return -EINVAL; 2053 return -EINVAL;
2563 2054
2564 ieee80211_set_disassoc(sdata, ifsta, true, true, reason); 2055 ieee80211_set_disassoc(sdata, true, true, reason);
2565 return 0; 2056 return 0;
2566} 2057}
2567 2058
2568int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason) 2059int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason)
2569{ 2060{
2570 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 2061 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
2571 2062
2572 printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n", 2063 printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n",
2573 sdata->dev->name, reason); 2064 sdata->dev->name, reason);
@@ -2575,10 +2066,10 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason)
2575 if (sdata->vif.type != NL80211_IFTYPE_STATION) 2066 if (sdata->vif.type != NL80211_IFTYPE_STATION)
2576 return -EINVAL; 2067 return -EINVAL;
2577 2068
2578 if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED)) 2069 if (!(ifmgd->flags & IEEE80211_STA_ASSOCIATED))
2579 return -1; 2070 return -ENOLINK;
2580 2071
2581 ieee80211_set_disassoc(sdata, ifsta, false, true, reason); 2072 ieee80211_set_disassoc(sdata, false, true, reason);
2582 return 0; 2073 return 0;
2583} 2074}
2584 2075
@@ -2586,15 +2077,6 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason)
2586void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) 2077void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local)
2587{ 2078{
2588 struct ieee80211_sub_if_data *sdata = local->scan_sdata; 2079 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
2589 struct ieee80211_if_sta *ifsta;
2590
2591 if (sdata && sdata->vif.type == NL80211_IFTYPE_ADHOC) {
2592 ifsta = &sdata->u.sta;
2593 if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) ||
2594 (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) &&
2595 !ieee80211_sta_active_ibss(sdata)))
2596 ieee80211_sta_find_ibss(sdata, ifsta);
2597 }
2598 2080
2599 /* Restart STA timers */ 2081 /* Restart STA timers */
2600 rcu_read_lock(); 2082 rcu_read_lock();
@@ -2623,12 +2105,15 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
2623 struct ieee80211_local *local = 2105 struct ieee80211_local *local =
2624 container_of(work, struct ieee80211_local, 2106 container_of(work, struct ieee80211_local,
2625 dynamic_ps_enable_work); 2107 dynamic_ps_enable_work);
2108 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
2626 2109
2627 if (local->hw.conf.flags & IEEE80211_CONF_PS) 2110 if (local->hw.conf.flags & IEEE80211_CONF_PS)
2628 return; 2111 return;
2629 2112
2630 local->hw.conf.flags |= IEEE80211_CONF_PS; 2113 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
2114 ieee80211_send_nullfunc(local, sdata, 1);
2631 2115
2116 local->hw.conf.flags |= IEEE80211_CONF_PS;
2632 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); 2117 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
2633} 2118}
2634 2119
@@ -2638,3 +2123,36 @@ void ieee80211_dynamic_ps_timer(unsigned long data)
2638 2123
2639 queue_work(local->hw.workqueue, &local->dynamic_ps_enable_work); 2124 queue_work(local->hw.workqueue, &local->dynamic_ps_enable_work);
2640} 2125}
2126
2127void ieee80211_send_nullfunc(struct ieee80211_local *local,
2128 struct ieee80211_sub_if_data *sdata,
2129 int powersave)
2130{
2131 struct sk_buff *skb;
2132 struct ieee80211_hdr *nullfunc;
2133 __le16 fc;
2134
2135 if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION))
2136 return;
2137
2138 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24);
2139 if (!skb) {
2140 printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc "
2141 "frame\n", sdata->dev->name);
2142 return;
2143 }
2144 skb_reserve(skb, local->hw.extra_tx_headroom);
2145
2146 nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24);
2147 memset(nullfunc, 0, 24);
2148 fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC |
2149 IEEE80211_FCTL_TODS);
2150 if (powersave)
2151 fc |= cpu_to_le16(IEEE80211_FCTL_PM);
2152 nullfunc->frame_control = fc;
2153 memcpy(nullfunc->addr1, sdata->u.mgd.bssid, ETH_ALEN);
2154 memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN);
2155 memcpy(nullfunc->addr3, sdata->u.mgd.bssid, ETH_ALEN);
2156
2157 ieee80211_tx_skb(sdata, skb, 0);
2158}
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
new file mode 100644
index 000000000000..027302326498
--- /dev/null
+++ b/net/mac80211/pm.c
@@ -0,0 +1,179 @@
1#include <net/mac80211.h>
2#include <net/rtnetlink.h>
3
4#include "ieee80211_i.h"
5#include "led.h"
6
7int __ieee80211_suspend(struct ieee80211_hw *hw)
8{
9 struct ieee80211_local *local = hw_to_local(hw);
10 struct ieee80211_sub_if_data *sdata;
11 struct ieee80211_if_init_conf conf;
12 struct sta_info *sta;
13 unsigned long flags;
14
15 ieee80211_stop_queues_by_reason(hw,
16 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
17
18 flush_workqueue(local->hw.workqueue);
19
20 /* disable keys */
21 list_for_each_entry(sdata, &local->interfaces, list)
22 ieee80211_disable_keys(sdata);
23
24 /* Tear down aggregation sessions */
25
26 rcu_read_lock();
27
28 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
29 list_for_each_entry_rcu(sta, &local->sta_list, list) {
30 set_sta_flags(sta, WLAN_STA_SUSPEND);
31 ieee80211_sta_tear_down_BA_sessions(sta);
32 }
33 }
34
35 rcu_read_unlock();
36
37 /* remove STAs */
38 if (local->ops->sta_notify) {
39 spin_lock_irqsave(&local->sta_lock, flags);
40 list_for_each_entry(sta, &local->sta_list, list) {
41 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
42 sdata = container_of(sdata->bss,
43 struct ieee80211_sub_if_data,
44 u.ap);
45
46 local->ops->sta_notify(hw, &sdata->vif,
47 STA_NOTIFY_REMOVE, &sta->sta);
48 }
49 spin_unlock_irqrestore(&local->sta_lock, flags);
50 }
51
52 /* remove all interfaces */
53 list_for_each_entry(sdata, &local->interfaces, list) {
54 if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
55 sdata->vif.type != NL80211_IFTYPE_MONITOR &&
56 netif_running(sdata->dev)) {
57 conf.vif = &sdata->vif;
58 conf.type = sdata->vif.type;
59 conf.mac_addr = sdata->dev->dev_addr;
60 local->ops->remove_interface(hw, &conf);
61 }
62 }
63
64 /* flush again, in case driver queued work */
65 flush_workqueue(local->hw.workqueue);
66
67 /* stop hardware */
68 if (local->open_count) {
69 ieee80211_led_radio(local, false);
70 local->ops->stop(hw);
71 }
72 return 0;
73}
74
75int __ieee80211_resume(struct ieee80211_hw *hw)
76{
77 struct ieee80211_local *local = hw_to_local(hw);
78 struct ieee80211_sub_if_data *sdata;
79 struct ieee80211_if_init_conf conf;
80 struct sta_info *sta;
81 unsigned long flags;
82 int res;
83
84 /* restart hardware */
85 if (local->open_count) {
86 res = local->ops->start(hw);
87
88 ieee80211_led_radio(local, hw->conf.radio_enabled);
89 }
90
91 /* add interfaces */
92 list_for_each_entry(sdata, &local->interfaces, list) {
93 if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
94 sdata->vif.type != NL80211_IFTYPE_MONITOR &&
95 netif_running(sdata->dev)) {
96 conf.vif = &sdata->vif;
97 conf.type = sdata->vif.type;
98 conf.mac_addr = sdata->dev->dev_addr;
99 res = local->ops->add_interface(hw, &conf);
100 }
101 }
102
103 /* add STAs back */
104 if (local->ops->sta_notify) {
105 spin_lock_irqsave(&local->sta_lock, flags);
106 list_for_each_entry(sta, &local->sta_list, list) {
107 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
108 sdata = container_of(sdata->bss,
109 struct ieee80211_sub_if_data,
110 u.ap);
111
112 local->ops->sta_notify(hw, &sdata->vif,
113 STA_NOTIFY_ADD, &sta->sta);
114 }
115 spin_unlock_irqrestore(&local->sta_lock, flags);
116 }
117
118 /* Clear Suspend state so that ADDBA requests can be processed */
119
120 rcu_read_lock();
121
122 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
123 list_for_each_entry_rcu(sta, &local->sta_list, list) {
124 clear_sta_flags(sta, WLAN_STA_SUSPEND);
125 }
126 }
127
128 rcu_read_unlock();
129
130 /* add back keys */
131 list_for_each_entry(sdata, &local->interfaces, list)
132 if (netif_running(sdata->dev))
133 ieee80211_enable_keys(sdata);
134
135 /* setup RTS threshold */
136 if (local->ops->set_rts_threshold)
137 local->ops->set_rts_threshold(hw, local->rts_threshold);
138
139 /* reconfigure hardware */
140 ieee80211_hw_config(local, ~0);
141
142 netif_addr_lock_bh(local->mdev);
143 ieee80211_configure_filter(local);
144 netif_addr_unlock_bh(local->mdev);
145
146 /* Finally also reconfigure all the BSS information */
147 list_for_each_entry(sdata, &local->interfaces, list) {
148 u32 changed = ~0;
149 if (!netif_running(sdata->dev))
150 continue;
151 switch (sdata->vif.type) {
152 case NL80211_IFTYPE_STATION:
153 /* disable beacon change bits */
154 changed &= ~IEEE80211_IFCC_BEACON;
155 /* fall through */
156 case NL80211_IFTYPE_ADHOC:
157 case NL80211_IFTYPE_AP:
158 case NL80211_IFTYPE_MESH_POINT:
159 WARN_ON(ieee80211_if_config(sdata, changed));
160 ieee80211_bss_info_change_notify(sdata, ~0);
161 break;
162 case NL80211_IFTYPE_WDS:
163 break;
164 case NL80211_IFTYPE_AP_VLAN:
165 case NL80211_IFTYPE_MONITOR:
166 /* ignore virtual */
167 break;
168 case NL80211_IFTYPE_UNSPECIFIED:
169 case __NL80211_IFTYPE_AFTER_LAST:
170 WARN_ON(1);
171 break;
172 }
173 }
174
175 ieee80211_wake_queues_by_reason(hw,
176 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
177
178 return 0;
179}
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 3fa7ab285066..4641f00a1e5c 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -219,10 +219,12 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
219 info->control.rates[i].count = 1; 219 info->control.rates[i].count = 1;
220 } 220 }
221 221
222 if (sta && sdata->force_unicast_rateidx > -1) 222 if (sta && sdata->force_unicast_rateidx > -1) {
223 info->control.rates[0].idx = sdata->force_unicast_rateidx; 223 info->control.rates[0].idx = sdata->force_unicast_rateidx;
224 else 224 } else {
225 ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); 225 ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
226 info->flags |= IEEE80211_TX_INTFL_RCALGO;
227 }
226 228
227 /* 229 /*
228 * try to enforce the maximum rate the user wanted 230 * try to enforce the maximum rate the user wanted
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 928da625e281..2ab5ad9e71ce 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -44,8 +44,10 @@ static inline void rate_control_tx_status(struct ieee80211_local *local,
44 struct rate_control_ref *ref = local->rate_ctrl; 44 struct rate_control_ref *ref = local->rate_ctrl;
45 struct ieee80211_sta *ista = &sta->sta; 45 struct ieee80211_sta *ista = &sta->sta;
46 void *priv_sta = sta->rate_ctrl_priv; 46 void *priv_sta = sta->rate_ctrl_priv;
47 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
47 48
48 ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); 49 if (likely(info->flags & IEEE80211_TX_INTFL_RCALGO))
50 ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb);
49} 51}
50 52
51 53
@@ -62,6 +64,18 @@ static inline void rate_control_rate_init(struct sta_info *sta)
62 ref->ops->rate_init(ref->priv, sband, ista, priv_sta); 64 ref->ops->rate_init(ref->priv, sband, ista, priv_sta);
63} 65}
64 66
67static inline void rate_control_rate_update(struct ieee80211_local *local,
68 struct ieee80211_supported_band *sband,
69 struct sta_info *sta, u32 changed)
70{
71 struct rate_control_ref *ref = local->rate_ctrl;
72 struct ieee80211_sta *ista = &sta->sta;
73 void *priv_sta = sta->rate_ctrl_priv;
74
75 if (ref->ops->rate_update)
76 ref->ops->rate_update(ref->priv, sband, ista,
77 priv_sta, changed);
78}
65 79
66static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, 80static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
67 struct ieee80211_sta *sta, 81 struct ieee80211_sta *sta,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7175ae80c36a..64ebe664effc 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -86,8 +86,7 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
86 86
87 if (status->flag & RX_FLAG_TSFT) 87 if (status->flag & RX_FLAG_TSFT)
88 len += 8; 88 len += 8;
89 if (local->hw.flags & IEEE80211_HW_SIGNAL_DB || 89 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
90 local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
91 len += 1; 90 len += 1;
92 if (local->hw.flags & IEEE80211_HW_NOISE_DBM) 91 if (local->hw.flags & IEEE80211_HW_NOISE_DBM)
93 len += 1; 92 len += 1;
@@ -102,7 +101,7 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
102 return len; 101 return len;
103} 102}
104 103
105/** 104/*
106 * ieee80211_add_rx_radiotap_header - add radiotap header 105 * ieee80211_add_rx_radiotap_header - add radiotap header
107 * 106 *
108 * add a radiotap header containing all the fields which the hardware provided. 107 * add a radiotap header containing all the fields which the hardware provided.
@@ -143,6 +142,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
143 /* IEEE80211_RADIOTAP_FLAGS */ 142 /* IEEE80211_RADIOTAP_FLAGS */
144 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) 143 if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
145 *pos |= IEEE80211_RADIOTAP_F_FCS; 144 *pos |= IEEE80211_RADIOTAP_F_FCS;
145 if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC))
146 *pos |= IEEE80211_RADIOTAP_F_BADFCS;
146 if (status->flag & RX_FLAG_SHORTPRE) 147 if (status->flag & RX_FLAG_SHORTPRE)
147 *pos |= IEEE80211_RADIOTAP_F_SHORTPRE; 148 *pos |= IEEE80211_RADIOTAP_F_SHORTPRE;
148 pos++; 149 pos++;
@@ -158,7 +159,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
158 */ 159 */
159 *pos = 0; 160 *pos = 0;
160 } else { 161 } else {
161 rthdr->it_present |= (1 << IEEE80211_RADIOTAP_RATE); 162 rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE);
162 *pos = rate->bitrate / 5; 163 *pos = rate->bitrate / 5;
163 } 164 }
164 pos++; 165 pos++;
@@ -199,23 +200,14 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
199 *pos = status->antenna; 200 *pos = status->antenna;
200 pos++; 201 pos++;
201 202
202 /* IEEE80211_RADIOTAP_DB_ANTSIGNAL */
203 if (local->hw.flags & IEEE80211_HW_SIGNAL_DB) {
204 *pos = status->signal;
205 rthdr->it_present |=
206 cpu_to_le32(1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL);
207 pos++;
208 }
209
210 /* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */ 203 /* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */
211 204
212 /* IEEE80211_RADIOTAP_RX_FLAGS */ 205 /* IEEE80211_RADIOTAP_RX_FLAGS */
213 /* ensure 2 byte alignment for the 2 byte field as required */ 206 /* ensure 2 byte alignment for the 2 byte field as required */
214 if ((pos - (unsigned char *)rthdr) & 1) 207 if ((pos - (unsigned char *)rthdr) & 1)
215 pos++; 208 pos++;
216 /* FIXME: when radiotap gets a 'bad PLCP' flag use it here */ 209 if (status->flag & RX_FLAG_FAILED_PLCP_CRC)
217 if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) 210 *(__le16 *)pos |= cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADPLCP);
218 *(__le16 *)pos |= cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS);
219 pos += 2; 211 pos += 2;
220} 212}
221 213
@@ -371,39 +363,50 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
371 rx->skb->priority = (tid > 7) ? 0 : tid; 363 rx->skb->priority = (tid > 7) ? 0 : tid;
372} 364}
373 365
374static void ieee80211_verify_ip_alignment(struct ieee80211_rx_data *rx) 366/**
367 * DOC: Packet alignment
368 *
369 * Drivers always need to pass packets that are aligned to two-byte boundaries
370 * to the stack.
371 *
372 * Additionally, should, if possible, align the payload data in a way that
373 * guarantees that the contained IP header is aligned to a four-byte
374 * boundary. In the case of regular frames, this simply means aligning the
375 * payload to a four-byte boundary (because either the IP header is directly
376 * contained, or IV/RFC1042 headers that have a length divisible by four are
377 * in front of it).
378 *
379 * With A-MSDU frames, however, the payload data address must yield two modulo
380 * four because there are 14-byte 802.3 headers within the A-MSDU frames that
381 * push the IP header further back to a multiple of four again. Thankfully, the
382 * specs were sane enough this time around to require padding each A-MSDU
383 * subframe to a length that is a multiple of four.
384 *
385 * Padding like Atheros hardware adds which is inbetween the 802.11 header and
386 * the payload is not supported, the driver is required to move the 802.11
387 * header to be directly in front of the payload in that case.
388 */
389static void ieee80211_verify_alignment(struct ieee80211_rx_data *rx)
375{ 390{
376#ifdef CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT
377 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 391 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
378 int hdrlen; 392 int hdrlen;
379 393
394#ifndef CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT
395 return;
396#endif
397
398 if (WARN_ONCE((unsigned long)rx->skb->data & 1,
399 "unaligned packet at 0x%p\n", rx->skb->data))
400 return;
401
380 if (!ieee80211_is_data_present(hdr->frame_control)) 402 if (!ieee80211_is_data_present(hdr->frame_control))
381 return; 403 return;
382 404
383 /*
384 * Drivers are required to align the payload data in a way that
385 * guarantees that the contained IP header is aligned to a four-
386 * byte boundary. In the case of regular frames, this simply means
387 * aligning the payload to a four-byte boundary (because either
388 * the IP header is directly contained, or IV/RFC1042 headers that
389 * have a length divisible by four are in front of it.
390 *
391 * With A-MSDU frames, however, the payload data address must
392 * yield two modulo four because there are 14-byte 802.3 headers
393 * within the A-MSDU frames that push the IP header further back
394 * to a multiple of four again. Thankfully, the specs were sane
395 * enough this time around to require padding each A-MSDU subframe
396 * to a length that is a multiple of four.
397 *
398 * Padding like atheros hardware adds which is inbetween the 802.11
399 * header and the payload is not supported, the driver is required
400 * to move the 802.11 header further back in that case.
401 */
402 hdrlen = ieee80211_hdrlen(hdr->frame_control); 405 hdrlen = ieee80211_hdrlen(hdr->frame_control);
403 if (rx->flags & IEEE80211_RX_AMSDU) 406 if (rx->flags & IEEE80211_RX_AMSDU)
404 hdrlen += ETH_HLEN; 407 hdrlen += ETH_HLEN;
405 WARN_ON_ONCE(((unsigned long)(rx->skb->data + hdrlen)) & 3); 408 WARN_ONCE(((unsigned long)(rx->skb->data + hdrlen)) & 3,
406#endif 409 "unaligned IP payload at 0x%p\n", rx->skb->data + hdrlen);
407} 410}
408 411
409 412
@@ -435,6 +438,52 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
435 return RX_CONTINUE; 438 return RX_CONTINUE;
436} 439}
437 440
441
442static int ieee80211_is_unicast_robust_mgmt_frame(struct sk_buff *skb)
443{
444 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
445
446 if (skb->len < 24 || is_multicast_ether_addr(hdr->addr1))
447 return 0;
448
449 return ieee80211_is_robust_mgmt_frame(hdr);
450}
451
452
453static int ieee80211_is_multicast_robust_mgmt_frame(struct sk_buff *skb)
454{
455 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
456
457 if (skb->len < 24 || !is_multicast_ether_addr(hdr->addr1))
458 return 0;
459
460 return ieee80211_is_robust_mgmt_frame(hdr);
461}
462
463
464/* Get the BIP key index from MMIE; return -1 if this is not a BIP frame */
465static int ieee80211_get_mmie_keyidx(struct sk_buff *skb)
466{
467 struct ieee80211_mgmt *hdr = (struct ieee80211_mgmt *) skb->data;
468 struct ieee80211_mmie *mmie;
469
470 if (skb->len < 24 + sizeof(*mmie) ||
471 !is_multicast_ether_addr(hdr->da))
472 return -1;
473
474 if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *) hdr))
475 return -1; /* not a robust management frame */
476
477 mmie = (struct ieee80211_mmie *)
478 (skb->data + skb->len - sizeof(*mmie));
479 if (mmie->element_id != WLAN_EID_MMIE ||
480 mmie->length != sizeof(*mmie) - 2)
481 return -1;
482
483 return le16_to_cpu(mmie->key_id);
484}
485
486
438static ieee80211_rx_result 487static ieee80211_rx_result
439ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) 488ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
440{ 489{
@@ -550,21 +599,23 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
550 int hdrlen; 599 int hdrlen;
551 ieee80211_rx_result result = RX_DROP_UNUSABLE; 600 ieee80211_rx_result result = RX_DROP_UNUSABLE;
552 struct ieee80211_key *stakey = NULL; 601 struct ieee80211_key *stakey = NULL;
602 int mmie_keyidx = -1;
553 603
554 /* 604 /*
555 * Key selection 101 605 * Key selection 101
556 * 606 *
557 * There are three types of keys: 607 * There are four types of keys:
558 * - GTK (group keys) 608 * - GTK (group keys)
609 * - IGTK (group keys for management frames)
559 * - PTK (pairwise keys) 610 * - PTK (pairwise keys)
560 * - STK (station-to-station pairwise keys) 611 * - STK (station-to-station pairwise keys)
561 * 612 *
562 * When selecting a key, we have to distinguish between multicast 613 * When selecting a key, we have to distinguish between multicast
563 * (including broadcast) and unicast frames, the latter can only 614 * (including broadcast) and unicast frames, the latter can only
564 * use PTKs and STKs while the former always use GTKs. Unless, of 615 * use PTKs and STKs while the former always use GTKs and IGTKs.
565 * course, actual WEP keys ("pre-RSNA") are used, then unicast 616 * Unless, of course, actual WEP keys ("pre-RSNA") are used, then
566 * frames can also use key indizes like GTKs. Hence, if we don't 617 * unicast frames can also use key indices like GTKs. Hence, if we
567 * have a PTK/STK we check the key index for a WEP key. 618 * don't have a PTK/STK we check the key index for a WEP key.
568 * 619 *
569 * Note that in a regular BSS, multicast frames are sent by the 620 * Note that in a regular BSS, multicast frames are sent by the
570 * AP only, associated stations unicast the frame to the AP first 621 * AP only, associated stations unicast the frame to the AP first
@@ -577,8 +628,14 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
577 * possible. 628 * possible.
578 */ 629 */
579 630
580 if (!ieee80211_has_protected(hdr->frame_control)) 631 if (!ieee80211_has_protected(hdr->frame_control)) {
581 return RX_CONTINUE; 632 if (!ieee80211_is_mgmt(hdr->frame_control) ||
633 rx->sta == NULL || !test_sta_flags(rx->sta, WLAN_STA_MFP))
634 return RX_CONTINUE;
635 mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
636 if (mmie_keyidx < 0)
637 return RX_CONTINUE;
638 }
582 639
583 /* 640 /*
584 * No point in finding a key and decrypting if the frame is neither 641 * No point in finding a key and decrypting if the frame is neither
@@ -592,6 +649,16 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
592 649
593 if (!is_multicast_ether_addr(hdr->addr1) && stakey) { 650 if (!is_multicast_ether_addr(hdr->addr1) && stakey) {
594 rx->key = stakey; 651 rx->key = stakey;
652 } else if (mmie_keyidx >= 0) {
653 /* Broadcast/multicast robust management frame / BIP */
654 if ((rx->status->flag & RX_FLAG_DECRYPTED) &&
655 (rx->status->flag & RX_FLAG_IV_STRIPPED))
656 return RX_CONTINUE;
657
658 if (mmie_keyidx < NUM_DEFAULT_KEYS ||
659 mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
660 return RX_DROP_MONITOR; /* unexpected BIP keyidx */
661 rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
595 } else { 662 } else {
596 /* 663 /*
597 * The device doesn't give us the IV so we won't be 664 * The device doesn't give us the IV so we won't be
@@ -654,6 +721,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
654 case ALG_CCMP: 721 case ALG_CCMP:
655 result = ieee80211_crypto_ccmp_decrypt(rx); 722 result = ieee80211_crypto_ccmp_decrypt(rx);
656 break; 723 break;
724 case ALG_AES_CMAC:
725 result = ieee80211_crypto_aes_cmac_decrypt(rx);
726 break;
657 } 727 }
658 728
659 /* either the frame has been decrypted or will be dropped */ 729 /* either the frame has been decrypted or will be dropped */
@@ -662,6 +732,39 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
662 return result; 732 return result;
663} 733}
664 734
735static ieee80211_rx_result debug_noinline
736ieee80211_rx_h_check_more_data(struct ieee80211_rx_data *rx)
737{
738 struct ieee80211_local *local;
739 struct ieee80211_hdr *hdr;
740 struct sk_buff *skb;
741
742 local = rx->local;
743 skb = rx->skb;
744 hdr = (struct ieee80211_hdr *) skb->data;
745
746 if (!local->pspolling)
747 return RX_CONTINUE;
748
749 if (!ieee80211_has_fromds(hdr->frame_control))
750 /* this is not from AP */
751 return RX_CONTINUE;
752
753 if (!ieee80211_is_data(hdr->frame_control))
754 return RX_CONTINUE;
755
756 if (!ieee80211_has_moredata(hdr->frame_control)) {
757 /* AP has no more frames buffered for us */
758 local->pspolling = false;
759 return RX_CONTINUE;
760 }
761
762 /* more data bit is set, let's request a new frame from the AP */
763 ieee80211_send_pspoll(local, rx->sdata);
764
765 return RX_CONTINUE;
766}
767
665static void ap_sta_ps_start(struct sta_info *sta) 768static void ap_sta_ps_start(struct sta_info *sta)
666{ 769{
667 struct ieee80211_sub_if_data *sdata = sta->sdata; 770 struct ieee80211_sub_if_data *sdata = sta->sdata;
@@ -736,7 +839,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
736 if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) { 839 if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) {
737 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, 840 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len,
738 NL80211_IFTYPE_ADHOC); 841 NL80211_IFTYPE_ADHOC);
739 if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) 842 if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0)
740 sta->last_rx = jiffies; 843 sta->last_rx = jiffies;
741 } else 844 } else
742 if (!is_multicast_ether_addr(hdr->addr1) || 845 if (!is_multicast_ether_addr(hdr->addr1) ||
@@ -747,12 +850,19 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
747 * Mesh beacons will update last_rx when if they are found to 850 * Mesh beacons will update last_rx when if they are found to
748 * match the current local configuration when processed. 851 * match the current local configuration when processed.
749 */ 852 */
750 sta->last_rx = jiffies; 853 if (rx->sdata->vif.type == NL80211_IFTYPE_STATION &&
854 ieee80211_is_beacon(hdr->frame_control)) {
855 rx->sdata->u.mgd.last_beacon = jiffies;
856 } else
857 sta->last_rx = jiffies;
751 } 858 }
752 859
753 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 860 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
754 return RX_CONTINUE; 861 return RX_CONTINUE;
755 862
863 if (rx->sdata->vif.type == NL80211_IFTYPE_STATION)
864 ieee80211_sta_rx_notify(rx->sdata, hdr);
865
756 sta->rx_fragments++; 866 sta->rx_fragments++;
757 sta->rx_bytes += rx->skb->len; 867 sta->rx_bytes += rx->skb->len;
758 sta->last_signal = rx->status->signal; 868 sta->last_signal = rx->status->signal;
@@ -1101,6 +1211,15 @@ ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc)
1101 /* Drop unencrypted frames if key is set. */ 1211 /* Drop unencrypted frames if key is set. */
1102 if (unlikely(!ieee80211_has_protected(fc) && 1212 if (unlikely(!ieee80211_has_protected(fc) &&
1103 !ieee80211_is_nullfunc(fc) && 1213 !ieee80211_is_nullfunc(fc) &&
1214 (!ieee80211_is_mgmt(fc) ||
1215 (ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
1216 rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP))) &&
1217 (rx->key || rx->sdata->drop_unencrypted)))
1218 return -EACCES;
1219 /* BIP does not use Protected field, so need to check MMIE */
1220 if (unlikely(rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP) &&
1221 ieee80211_is_multicast_robust_mgmt_frame(rx->skb) &&
1222 ieee80211_get_mmie_keyidx(rx->skb) < 0 &&
1104 (rx->key || rx->sdata->drop_unencrypted))) 1223 (rx->key || rx->sdata->drop_unencrypted)))
1105 return -EACCES; 1224 return -EACCES;
1106 1225
@@ -1138,12 +1257,12 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
1138 1257
1139 switch (hdr->frame_control & 1258 switch (hdr->frame_control &
1140 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { 1259 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
1141 case __constant_cpu_to_le16(IEEE80211_FCTL_TODS): 1260 case cpu_to_le16(IEEE80211_FCTL_TODS):
1142 if (unlikely(sdata->vif.type != NL80211_IFTYPE_AP && 1261 if (unlikely(sdata->vif.type != NL80211_IFTYPE_AP &&
1143 sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) 1262 sdata->vif.type != NL80211_IFTYPE_AP_VLAN))
1144 return -1; 1263 return -1;
1145 break; 1264 break;
1146 case __constant_cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): 1265 case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
1147 if (unlikely(sdata->vif.type != NL80211_IFTYPE_WDS && 1266 if (unlikely(sdata->vif.type != NL80211_IFTYPE_WDS &&
1148 sdata->vif.type != NL80211_IFTYPE_MESH_POINT)) 1267 sdata->vif.type != NL80211_IFTYPE_MESH_POINT))
1149 return -1; 1268 return -1;
@@ -1157,13 +1276,13 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx)
1157 } 1276 }
1158 } 1277 }
1159 break; 1278 break;
1160 case __constant_cpu_to_le16(IEEE80211_FCTL_FROMDS): 1279 case cpu_to_le16(IEEE80211_FCTL_FROMDS):
1161 if (sdata->vif.type != NL80211_IFTYPE_STATION || 1280 if (sdata->vif.type != NL80211_IFTYPE_STATION ||
1162 (is_multicast_ether_addr(dst) && 1281 (is_multicast_ether_addr(dst) &&
1163 !compare_ether_addr(src, dev->dev_addr))) 1282 !compare_ether_addr(src, dev->dev_addr)))
1164 return -1; 1283 return -1;
1165 break; 1284 break;
1166 case __constant_cpu_to_le16(0): 1285 case cpu_to_le16(0):
1167 if (sdata->vif.type != NL80211_IFTYPE_ADHOC) 1286 if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
1168 return -1; 1287 return -1;
1169 break; 1288 break;
@@ -1267,10 +1386,37 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
1267 } 1386 }
1268 1387
1269 if (skb) { 1388 if (skb) {
1270 /* deliver to local stack */ 1389 int align __maybe_unused;
1271 skb->protocol = eth_type_trans(skb, dev); 1390
1272 memset(skb->cb, 0, sizeof(skb->cb)); 1391#if defined(CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT) || !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
1273 netif_rx(skb); 1392 /*
1393 * 'align' will only take the values 0 or 2 here
1394 * since all frames are required to be aligned
1395 * to 2-byte boundaries when being passed to
1396 * mac80211. That also explains the __skb_push()
1397 * below.
1398 */
1399 align = (unsigned long)skb->data & 4;
1400 if (align) {
1401 if (WARN_ON(skb_headroom(skb) < 3)) {
1402 dev_kfree_skb(skb);
1403 skb = NULL;
1404 } else {
1405 u8 *data = skb->data;
1406 size_t len = skb->len;
1407 u8 *new = __skb_push(skb, align);
1408 memmove(new, data, len);
1409 __skb_trim(skb, len);
1410 }
1411 }
1412#endif
1413
1414 if (skb) {
1415 /* deliver to local stack */
1416 skb->protocol = eth_type_trans(skb, dev);
1417 memset(skb->cb, 0, sizeof(skb->cb));
1418 netif_rx(skb);
1419 }
1274 } 1420 }
1275 1421
1276 if (xmit_skb) { 1422 if (xmit_skb) {
@@ -1339,14 +1485,20 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
1339 if (remaining <= subframe_len + padding) 1485 if (remaining <= subframe_len + padding)
1340 frame = skb; 1486 frame = skb;
1341 else { 1487 else {
1342 frame = dev_alloc_skb(local->hw.extra_tx_headroom + 1488 /*
1343 subframe_len); 1489 * Allocate and reserve two bytes more for payload
1490 * alignment since sizeof(struct ethhdr) is 14.
1491 */
1492 frame = dev_alloc_skb(
1493 ALIGN(local->hw.extra_tx_headroom, 4) +
1494 subframe_len + 2);
1344 1495
1345 if (frame == NULL) 1496 if (frame == NULL)
1346 return RX_DROP_UNUSABLE; 1497 return RX_DROP_UNUSABLE;
1347 1498
1348 skb_reserve(frame, local->hw.extra_tx_headroom + 1499 skb_reserve(frame,
1349 sizeof(struct ethhdr)); 1500 ALIGN(local->hw.extra_tx_headroom, 4) +
1501 sizeof(struct ethhdr) + 2);
1350 memcpy(skb_put(frame, ntohs(len)), skb->data, 1502 memcpy(skb_put(frame, ntohs(len)), skb->data,
1351 ntohs(len)); 1503 ntohs(len));
1352 1504
@@ -1529,11 +1681,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
1529 start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4; 1681 start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4;
1530 1682
1531 /* reset session timer */ 1683 /* reset session timer */
1532 if (tid_agg_rx->timeout) { 1684 if (tid_agg_rx->timeout)
1533 unsigned long expires = 1685 mod_timer(&tid_agg_rx->session_timer,
1534 jiffies + (tid_agg_rx->timeout / 1000) * HZ; 1686 TU_TO_EXP_TIME(tid_agg_rx->timeout));
1535 mod_timer(&tid_agg_rx->session_timer, expires);
1536 }
1537 1687
1538 /* manage reordering buffer according to requested */ 1688 /* manage reordering buffer according to requested */
1539 /* sequence number */ 1689 /* sequence number */
@@ -1547,12 +1697,64 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx)
1547 return RX_CONTINUE; 1697 return RX_CONTINUE;
1548} 1698}
1549 1699
1700static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
1701 struct ieee80211_mgmt *mgmt,
1702 size_t len)
1703{
1704 struct ieee80211_local *local = sdata->local;
1705 struct sk_buff *skb;
1706 struct ieee80211_mgmt *resp;
1707
1708 if (compare_ether_addr(mgmt->da, sdata->dev->dev_addr) != 0) {
1709 /* Not to own unicast address */
1710 return;
1711 }
1712
1713 if (compare_ether_addr(mgmt->sa, sdata->u.mgd.bssid) != 0 ||
1714 compare_ether_addr(mgmt->bssid, sdata->u.mgd.bssid) != 0) {
1715 /* Not from the current AP. */
1716 return;
1717 }
1718
1719 if (sdata->u.mgd.state == IEEE80211_STA_MLME_ASSOCIATE) {
1720 /* Association in progress; ignore SA Query */
1721 return;
1722 }
1723
1724 if (len < 24 + 1 + sizeof(resp->u.action.u.sa_query)) {
1725 /* Too short SA Query request frame */
1726 return;
1727 }
1728
1729 skb = dev_alloc_skb(sizeof(*resp) + local->hw.extra_tx_headroom);
1730 if (skb == NULL)
1731 return;
1732
1733 skb_reserve(skb, local->hw.extra_tx_headroom);
1734 resp = (struct ieee80211_mgmt *) skb_put(skb, 24);
1735 memset(resp, 0, 24);
1736 memcpy(resp->da, mgmt->sa, ETH_ALEN);
1737 memcpy(resp->sa, sdata->dev->dev_addr, ETH_ALEN);
1738 memcpy(resp->bssid, sdata->u.mgd.bssid, ETH_ALEN);
1739 resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
1740 IEEE80211_STYPE_ACTION);
1741 skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query));
1742 resp->u.action.category = WLAN_CATEGORY_SA_QUERY;
1743 resp->u.action.u.sa_query.action = WLAN_ACTION_SA_QUERY_RESPONSE;
1744 memcpy(resp->u.action.u.sa_query.trans_id,
1745 mgmt->u.action.u.sa_query.trans_id,
1746 WLAN_SA_QUERY_TR_ID_LEN);
1747
1748 ieee80211_tx_skb(sdata, skb, 1);
1749}
1750
1550static ieee80211_rx_result debug_noinline 1751static ieee80211_rx_result debug_noinline
1551ieee80211_rx_h_action(struct ieee80211_rx_data *rx) 1752ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1552{ 1753{
1553 struct ieee80211_local *local = rx->local; 1754 struct ieee80211_local *local = rx->local;
1554 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); 1755 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
1555 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; 1756 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1757 struct ieee80211_bss *bss;
1556 int len = rx->skb->len; 1758 int len = rx->skb->len;
1557 1759
1558 if (!ieee80211_is_action(mgmt->frame_control)) 1760 if (!ieee80211_is_action(mgmt->frame_control))
@@ -1564,12 +1766,26 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1564 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 1766 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
1565 return RX_DROP_MONITOR; 1767 return RX_DROP_MONITOR;
1566 1768
1769 if (ieee80211_drop_unencrypted(rx, mgmt->frame_control))
1770 return RX_DROP_MONITOR;
1771
1567 /* all categories we currently handle have action_code */ 1772 /* all categories we currently handle have action_code */
1568 if (len < IEEE80211_MIN_ACTION_SIZE + 1) 1773 if (len < IEEE80211_MIN_ACTION_SIZE + 1)
1569 return RX_DROP_MONITOR; 1774 return RX_DROP_MONITOR;
1570 1775
1571 switch (mgmt->u.action.category) { 1776 switch (mgmt->u.action.category) {
1572 case WLAN_CATEGORY_BACK: 1777 case WLAN_CATEGORY_BACK:
1778 /*
1779 * The aggregation code is not prepared to handle
1780 * anything but STA/AP due to the BSSID handling;
1781 * IBSS could work in the code but isn't supported
1782 * by drivers or the standard.
1783 */
1784 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
1785 sdata->vif.type != NL80211_IFTYPE_AP_VLAN &&
1786 sdata->vif.type != NL80211_IFTYPE_AP)
1787 return RX_DROP_MONITOR;
1788
1573 switch (mgmt->u.action.u.addba_req.action_code) { 1789 switch (mgmt->u.action.u.addba_req.action_code) {
1574 case WLAN_ACTION_ADDBA_REQ: 1790 case WLAN_ACTION_ADDBA_REQ:
1575 if (len < (IEEE80211_MIN_ACTION_SIZE + 1791 if (len < (IEEE80211_MIN_ACTION_SIZE +
@@ -1594,6 +1810,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1594 case WLAN_CATEGORY_SPECTRUM_MGMT: 1810 case WLAN_CATEGORY_SPECTRUM_MGMT:
1595 if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) 1811 if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ)
1596 return RX_DROP_MONITOR; 1812 return RX_DROP_MONITOR;
1813
1814 if (sdata->vif.type != NL80211_IFTYPE_STATION)
1815 return RX_DROP_MONITOR;
1816
1597 switch (mgmt->u.action.u.measurement.action_code) { 1817 switch (mgmt->u.action.u.measurement.action_code) {
1598 case WLAN_ACTION_SPCT_MSR_REQ: 1818 case WLAN_ACTION_SPCT_MSR_REQ:
1599 if (len < (IEEE80211_MIN_ACTION_SIZE + 1819 if (len < (IEEE80211_MIN_ACTION_SIZE +
@@ -1601,6 +1821,43 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1601 return RX_DROP_MONITOR; 1821 return RX_DROP_MONITOR;
1602 ieee80211_process_measurement_req(sdata, mgmt, len); 1822 ieee80211_process_measurement_req(sdata, mgmt, len);
1603 break; 1823 break;
1824 case WLAN_ACTION_SPCT_CHL_SWITCH:
1825 if (len < (IEEE80211_MIN_ACTION_SIZE +
1826 sizeof(mgmt->u.action.u.chan_switch)))
1827 return RX_DROP_MONITOR;
1828
1829 if (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN))
1830 return RX_DROP_MONITOR;
1831
1832 bss = ieee80211_rx_bss_get(local, sdata->u.mgd.bssid,
1833 local->hw.conf.channel->center_freq,
1834 sdata->u.mgd.ssid,
1835 sdata->u.mgd.ssid_len);
1836 if (!bss)
1837 return RX_DROP_MONITOR;
1838
1839 ieee80211_process_chanswitch(sdata,
1840 &mgmt->u.action.u.chan_switch.sw_elem, bss);
1841 ieee80211_rx_bss_put(local, bss);
1842 break;
1843 }
1844 break;
1845 case WLAN_CATEGORY_SA_QUERY:
1846 if (len < (IEEE80211_MIN_ACTION_SIZE +
1847 sizeof(mgmt->u.action.u.sa_query)))
1848 return RX_DROP_MONITOR;
1849 switch (mgmt->u.action.u.sa_query.action) {
1850 case WLAN_ACTION_SA_QUERY_REQUEST:
1851 if (sdata->vif.type != NL80211_IFTYPE_STATION)
1852 return RX_DROP_MONITOR;
1853 ieee80211_process_sa_query_req(sdata, mgmt, len);
1854 break;
1855 case WLAN_ACTION_SA_QUERY_RESPONSE:
1856 /*
1857 * SA Query response is currently only used in AP mode
1858 * and it is processed in user space.
1859 */
1860 return RX_CONTINUE;
1604 } 1861 }
1605 break; 1862 break;
1606 default: 1863 default:
@@ -1616,22 +1873,24 @@ static ieee80211_rx_result debug_noinline
1616ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) 1873ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
1617{ 1874{
1618 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); 1875 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev);
1876 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1619 1877
1620 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 1878 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
1621 return RX_DROP_MONITOR; 1879 return RX_DROP_MONITOR;
1622 1880
1881 if (ieee80211_drop_unencrypted(rx, mgmt->frame_control))
1882 return RX_DROP_MONITOR;
1883
1623 if (ieee80211_vif_is_mesh(&sdata->vif)) 1884 if (ieee80211_vif_is_mesh(&sdata->vif))
1624 return ieee80211_mesh_rx_mgmt(sdata, rx->skb, rx->status); 1885 return ieee80211_mesh_rx_mgmt(sdata, rx->skb, rx->status);
1625 1886
1626 if (sdata->vif.type != NL80211_IFTYPE_STATION && 1887 if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
1627 sdata->vif.type != NL80211_IFTYPE_ADHOC) 1888 return ieee80211_ibss_rx_mgmt(sdata, rx->skb, rx->status);
1628 return RX_DROP_MONITOR;
1629 1889
1630 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) 1890 if (sdata->vif.type == NL80211_IFTYPE_STATION)
1631 return RX_DROP_MONITOR; 1891 return ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status);
1632 1892
1633 ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status); 1893 return RX_DROP_MONITOR;
1634 return RX_QUEUED;
1635} 1894}
1636 1895
1637static void ieee80211_rx_michael_mic_report(struct net_device *dev, 1896static void ieee80211_rx_michael_mic_report(struct net_device *dev,
@@ -1780,6 +2039,7 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
1780 CALL_RXH(ieee80211_rx_h_passive_scan) 2039 CALL_RXH(ieee80211_rx_h_passive_scan)
1781 CALL_RXH(ieee80211_rx_h_check) 2040 CALL_RXH(ieee80211_rx_h_check)
1782 CALL_RXH(ieee80211_rx_h_decrypt) 2041 CALL_RXH(ieee80211_rx_h_decrypt)
2042 CALL_RXH(ieee80211_rx_h_check_more_data)
1783 CALL_RXH(ieee80211_rx_h_sta_process) 2043 CALL_RXH(ieee80211_rx_h_sta_process)
1784 CALL_RXH(ieee80211_rx_h_defragment) 2044 CALL_RXH(ieee80211_rx_h_defragment)
1785 CALL_RXH(ieee80211_rx_h_ps_poll) 2045 CALL_RXH(ieee80211_rx_h_ps_poll)
@@ -1823,16 +2083,17 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
1823/* main receive path */ 2083/* main receive path */
1824 2084
1825static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, 2085static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
1826 u8 *bssid, struct ieee80211_rx_data *rx, 2086 struct ieee80211_rx_data *rx,
1827 struct ieee80211_hdr *hdr) 2087 struct ieee80211_hdr *hdr)
1828{ 2088{
2089 u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, sdata->vif.type);
1829 int multicast = is_multicast_ether_addr(hdr->addr1); 2090 int multicast = is_multicast_ether_addr(hdr->addr1);
1830 2091
1831 switch (sdata->vif.type) { 2092 switch (sdata->vif.type) {
1832 case NL80211_IFTYPE_STATION: 2093 case NL80211_IFTYPE_STATION:
1833 if (!bssid) 2094 if (!bssid)
1834 return 0; 2095 return 0;
1835 if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { 2096 if (!ieee80211_bssid_match(bssid, sdata->u.mgd.bssid)) {
1836 if (!(rx->flags & IEEE80211_RX_IN_SCAN)) 2097 if (!(rx->flags & IEEE80211_RX_IN_SCAN))
1837 return 0; 2098 return 0;
1838 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2099 rx->flags &= ~IEEE80211_RX_RA_MATCH;
@@ -1850,7 +2111,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
1850 if (ieee80211_is_beacon(hdr->frame_control)) { 2111 if (ieee80211_is_beacon(hdr->frame_control)) {
1851 return 1; 2112 return 1;
1852 } 2113 }
1853 else if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { 2114 else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) {
1854 if (!(rx->flags & IEEE80211_RX_IN_SCAN)) 2115 if (!(rx->flags & IEEE80211_RX_IN_SCAN))
1855 return 0; 2116 return 0;
1856 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2117 rx->flags &= ~IEEE80211_RX_RA_MATCH;
@@ -1928,7 +2189,6 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
1928 int prepares; 2189 int prepares;
1929 struct ieee80211_sub_if_data *prev = NULL; 2190 struct ieee80211_sub_if_data *prev = NULL;
1930 struct sk_buff *skb_new; 2191 struct sk_buff *skb_new;
1931 u8 *bssid;
1932 2192
1933 hdr = (struct ieee80211_hdr *)skb->data; 2193 hdr = (struct ieee80211_hdr *)skb->data;
1934 memset(&rx, 0, sizeof(rx)); 2194 memset(&rx, 0, sizeof(rx));
@@ -1956,7 +2216,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
1956 rx.flags |= IEEE80211_RX_IN_SCAN; 2216 rx.flags |= IEEE80211_RX_IN_SCAN;
1957 2217
1958 ieee80211_parse_qos(&rx); 2218 ieee80211_parse_qos(&rx);
1959 ieee80211_verify_ip_alignment(&rx); 2219 ieee80211_verify_alignment(&rx);
1960 2220
1961 skb = rx.skb; 2221 skb = rx.skb;
1962 2222
@@ -1967,9 +2227,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
1967 if (sdata->vif.type == NL80211_IFTYPE_MONITOR) 2227 if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
1968 continue; 2228 continue;
1969 2229
1970 bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type);
1971 rx.flags |= IEEE80211_RX_RA_MATCH; 2230 rx.flags |= IEEE80211_RX_RA_MATCH;
1972 prepares = prepare_for_handlers(sdata, bssid, &rx, hdr); 2231 prepares = prepare_for_handlers(sdata, &rx, hdr);
1973 2232
1974 if (!prepares) 2233 if (!prepares)
1975 continue; 2234 continue;
@@ -2174,11 +2433,9 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local,
2174 /* new un-ordered ampdu frame - process it */ 2433 /* new un-ordered ampdu frame - process it */
2175 2434
2176 /* reset session timer */ 2435 /* reset session timer */
2177 if (tid_agg_rx->timeout) { 2436 if (tid_agg_rx->timeout)
2178 unsigned long expires = 2437 mod_timer(&tid_agg_rx->session_timer,
2179 jiffies + (tid_agg_rx->timeout / 1000) * HZ; 2438 TU_TO_EXP_TIME(tid_agg_rx->timeout));
2180 mod_timer(&tid_agg_rx->session_timer, expires);
2181 }
2182 2439
2183 /* if this mpdu is fragmented - terminate rx aggregation session */ 2440 /* if this mpdu is fragmented - terminate rx aggregation session */
2184 sc = le16_to_cpu(hdr->seq_ctrl); 2441 sc = le16_to_cpu(hdr->seq_ctrl);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index f5c7c3371929..3bf9839f5916 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -12,14 +12,11 @@
12 * published by the Free Software Foundation. 12 * published by the Free Software Foundation.
13 */ 13 */
14 14
15/* TODO: 15/* TODO: figure out how to avoid that the "current BSS" expires */
16 * order BSS list by RSSI(?) ("quality of AP")
17 * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE,
18 * SSID)
19 */
20 16
21#include <linux/wireless.h> 17#include <linux/wireless.h>
22#include <linux/if_arp.h> 18#include <linux/if_arp.h>
19#include <linux/rtnetlink.h>
23#include <net/mac80211.h> 20#include <net/mac80211.h>
24#include <net/iw_handler.h> 21#include <net/iw_handler.h>
25 22
@@ -30,192 +27,29 @@
30#define IEEE80211_CHANNEL_TIME (HZ / 33) 27#define IEEE80211_CHANNEL_TIME (HZ / 33)
31#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5) 28#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5)
32 29
33void ieee80211_rx_bss_list_init(struct ieee80211_local *local)
34{
35 spin_lock_init(&local->bss_lock);
36 INIT_LIST_HEAD(&local->bss_list);
37}
38
39void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local)
40{
41 struct ieee80211_bss *bss, *tmp;
42
43 list_for_each_entry_safe(bss, tmp, &local->bss_list, list)
44 ieee80211_rx_bss_put(local, bss);
45}
46
47struct ieee80211_bss * 30struct ieee80211_bss *
48ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq, 31ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq,
49 u8 *ssid, u8 ssid_len) 32 u8 *ssid, u8 ssid_len)
50{ 33{
51 struct ieee80211_bss *bss; 34 return (void *)cfg80211_get_bss(local->hw.wiphy,
52 35 ieee80211_get_channel(local->hw.wiphy,
53 spin_lock_bh(&local->bss_lock); 36 freq),
54 bss = local->bss_hash[STA_HASH(bssid)]; 37 bssid, ssid, ssid_len,
55 while (bss) { 38 0, 0);
56 if (!bss_mesh_cfg(bss) &&
57 !memcmp(bss->bssid, bssid, ETH_ALEN) &&
58 bss->freq == freq &&
59 bss->ssid_len == ssid_len &&
60 (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) {
61 atomic_inc(&bss->users);
62 break;
63 }
64 bss = bss->hnext;
65 }
66 spin_unlock_bh(&local->bss_lock);
67 return bss;
68}
69
70/* Caller must hold local->bss_lock */
71static void __ieee80211_rx_bss_hash_add(struct ieee80211_local *local,
72 struct ieee80211_bss *bss)
73{
74 u8 hash_idx;
75
76 if (bss_mesh_cfg(bss))
77 hash_idx = mesh_id_hash(bss_mesh_id(bss),
78 bss_mesh_id_len(bss));
79 else
80 hash_idx = STA_HASH(bss->bssid);
81
82 bss->hnext = local->bss_hash[hash_idx];
83 local->bss_hash[hash_idx] = bss;
84}
85
86/* Caller must hold local->bss_lock */
87static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local,
88 struct ieee80211_bss *bss)
89{
90 struct ieee80211_bss *b, *prev = NULL;
91 b = local->bss_hash[STA_HASH(bss->bssid)];
92 while (b) {
93 if (b == bss) {
94 if (!prev)
95 local->bss_hash[STA_HASH(bss->bssid)] =
96 bss->hnext;
97 else
98 prev->hnext = bss->hnext;
99 break;
100 }
101 prev = b;
102 b = b->hnext;
103 }
104}
105
106struct ieee80211_bss *
107ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq,
108 u8 *ssid, u8 ssid_len)
109{
110 struct ieee80211_bss *bss;
111
112 bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
113 if (!bss)
114 return NULL;
115 atomic_set(&bss->users, 2);
116 memcpy(bss->bssid, bssid, ETH_ALEN);
117 bss->freq = freq;
118 if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) {
119 memcpy(bss->ssid, ssid, ssid_len);
120 bss->ssid_len = ssid_len;
121 }
122
123 spin_lock_bh(&local->bss_lock);
124 /* TODO: order by RSSI? */
125 list_add_tail(&bss->list, &local->bss_list);
126 __ieee80211_rx_bss_hash_add(local, bss);
127 spin_unlock_bh(&local->bss_lock);
128 return bss;
129}
130
131#ifdef CONFIG_MAC80211_MESH
132static struct ieee80211_bss *
133ieee80211_rx_mesh_bss_get(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
134 u8 *mesh_cfg, int freq)
135{
136 struct ieee80211_bss *bss;
137
138 spin_lock_bh(&local->bss_lock);
139 bss = local->bss_hash[mesh_id_hash(mesh_id, mesh_id_len)];
140 while (bss) {
141 if (bss_mesh_cfg(bss) &&
142 !memcmp(bss_mesh_cfg(bss), mesh_cfg, MESH_CFG_CMP_LEN) &&
143 bss->freq == freq &&
144 mesh_id_len == bss->mesh_id_len &&
145 (mesh_id_len == 0 || !memcmp(bss->mesh_id, mesh_id,
146 mesh_id_len))) {
147 atomic_inc(&bss->users);
148 break;
149 }
150 bss = bss->hnext;
151 }
152 spin_unlock_bh(&local->bss_lock);
153 return bss;
154} 39}
155 40
156static struct ieee80211_bss * 41static void ieee80211_rx_bss_free(struct cfg80211_bss *cbss)
157ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len,
158 u8 *mesh_cfg, int mesh_config_len, int freq)
159{ 42{
160 struct ieee80211_bss *bss; 43 struct ieee80211_bss *bss = (void *)cbss;
161
162 if (mesh_config_len != IEEE80211_MESH_CONFIG_LEN)
163 return NULL;
164
165 bss = kzalloc(sizeof(*bss), GFP_ATOMIC);
166 if (!bss)
167 return NULL;
168
169 bss->mesh_cfg = kmalloc(MESH_CFG_CMP_LEN, GFP_ATOMIC);
170 if (!bss->mesh_cfg) {
171 kfree(bss);
172 return NULL;
173 }
174
175 if (mesh_id_len && mesh_id_len <= IEEE80211_MAX_MESH_ID_LEN) {
176 bss->mesh_id = kmalloc(mesh_id_len, GFP_ATOMIC);
177 if (!bss->mesh_id) {
178 kfree(bss->mesh_cfg);
179 kfree(bss);
180 return NULL;
181 }
182 memcpy(bss->mesh_id, mesh_id, mesh_id_len);
183 }
184
185 atomic_set(&bss->users, 2);
186 memcpy(bss->mesh_cfg, mesh_cfg, MESH_CFG_CMP_LEN);
187 bss->mesh_id_len = mesh_id_len;
188 bss->freq = freq;
189 spin_lock_bh(&local->bss_lock);
190 /* TODO: order by RSSI? */
191 list_add_tail(&bss->list, &local->bss_list);
192 __ieee80211_rx_bss_hash_add(local, bss);
193 spin_unlock_bh(&local->bss_lock);
194 return bss;
195}
196#endif
197 44
198static void ieee80211_rx_bss_free(struct ieee80211_bss *bss)
199{
200 kfree(bss->ies);
201 kfree(bss_mesh_id(bss)); 45 kfree(bss_mesh_id(bss));
202 kfree(bss_mesh_cfg(bss)); 46 kfree(bss_mesh_cfg(bss));
203 kfree(bss);
204} 47}
205 48
206void ieee80211_rx_bss_put(struct ieee80211_local *local, 49void ieee80211_rx_bss_put(struct ieee80211_local *local,
207 struct ieee80211_bss *bss) 50 struct ieee80211_bss *bss)
208{ 51{
209 local_bh_disable(); 52 cfg80211_put_bss((struct cfg80211_bss *)bss);
210 if (!atomic_dec_and_lock(&bss->users, &local->bss_lock)) {
211 local_bh_enable();
212 return;
213 }
214
215 __ieee80211_rx_bss_hash_del(local, bss);
216 list_del(&bss->list);
217 spin_unlock_bh(&local->bss_lock);
218 ieee80211_rx_bss_free(bss);
219} 53}
220 54
221struct ieee80211_bss * 55struct ieee80211_bss *
@@ -224,39 +58,25 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
224 struct ieee80211_mgmt *mgmt, 58 struct ieee80211_mgmt *mgmt,
225 size_t len, 59 size_t len,
226 struct ieee802_11_elems *elems, 60 struct ieee802_11_elems *elems,
227 int freq, bool beacon) 61 struct ieee80211_channel *channel,
62 bool beacon)
228{ 63{
229 struct ieee80211_bss *bss; 64 struct ieee80211_bss *bss;
230 int clen; 65 int clen;
66 s32 signal = 0;
231 67
232#ifdef CONFIG_MAC80211_MESH 68 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
233 if (elems->mesh_config) 69 signal = rx_status->signal * 100;
234 bss = ieee80211_rx_mesh_bss_get(local, elems->mesh_id, 70 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
235 elems->mesh_id_len, elems->mesh_config, freq); 71 signal = (rx_status->signal * 100) / local->hw.max_signal;
236 else 72
237#endif 73 bss = (void *)cfg80211_inform_bss_frame(local->hw.wiphy, channel,
238 bss = ieee80211_rx_bss_get(local, mgmt->bssid, freq, 74 mgmt, len, signal, GFP_ATOMIC);
239 elems->ssid, elems->ssid_len); 75
240 if (!bss) { 76 if (!bss)
241#ifdef CONFIG_MAC80211_MESH 77 return NULL;
242 if (elems->mesh_config) 78
243 bss = ieee80211_rx_mesh_bss_add(local, elems->mesh_id, 79 bss->cbss.free_priv = ieee80211_rx_bss_free;
244 elems->mesh_id_len, elems->mesh_config,
245 elems->mesh_config_len, freq);
246 else
247#endif
248 bss = ieee80211_rx_bss_add(local, mgmt->bssid, freq,
249 elems->ssid, elems->ssid_len);
250 if (!bss)
251 return NULL;
252 } else {
253#if 0
254 /* TODO: order by RSSI? */
255 spin_lock_bh(&local->bss_lock);
256 list_move_tail(&bss->list, &local->bss_list);
257 spin_unlock_bh(&local->bss_lock);
258#endif
259 }
260 80
261 /* save the ERP value so that it is available at association time */ 81 /* save the ERP value so that it is available at association time */
262 if (elems->erp_info && elems->erp_info_len >= 1) { 82 if (elems->erp_info && elems->erp_info_len >= 1) {
@@ -264,9 +84,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
264 bss->has_erp_value = 1; 84 bss->has_erp_value = 1;
265 } 85 }
266 86
267 bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int);
268 bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info);
269
270 if (elems->tim) { 87 if (elems->tim) {
271 struct ieee80211_tim_ie *tim_ie = 88 struct ieee80211_tim_ie *tim_ie =
272 (struct ieee80211_tim_ie *)elems->tim; 89 (struct ieee80211_tim_ie *)elems->tim;
@@ -295,37 +112,27 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
295 bss->supp_rates_len += clen; 112 bss->supp_rates_len += clen;
296 } 113 }
297 114
298 bss->band = rx_status->band;
299
300 bss->timestamp = le64_to_cpu(mgmt->u.beacon.timestamp);
301 bss->last_update = jiffies;
302 bss->signal = rx_status->signal;
303 bss->noise = rx_status->noise;
304 bss->qual = rx_status->qual;
305 bss->wmm_used = elems->wmm_param || elems->wmm_info; 115 bss->wmm_used = elems->wmm_param || elems->wmm_info;
306 116
307 if (!beacon) 117 if (!beacon)
308 bss->last_probe_resp = jiffies; 118 bss->last_probe_resp = jiffies;
309 119
310 /*
311 * For probe responses, or if we don't have any information yet,
312 * use the IEs from the beacon.
313 */
314 if (!bss->ies || !beacon) {
315 if (bss->ies == NULL || bss->ies_len < elems->total_len) {
316 kfree(bss->ies);
317 bss->ies = kmalloc(elems->total_len, GFP_ATOMIC);
318 }
319 if (bss->ies) {
320 memcpy(bss->ies, elems->ie_start, elems->total_len);
321 bss->ies_len = elems->total_len;
322 } else
323 bss->ies_len = 0;
324 }
325
326 return bss; 120 return bss;
327} 121}
328 122
123void ieee80211_rx_bss_remove(struct ieee80211_sub_if_data *sdata, u8 *bssid,
124 int freq, u8 *ssid, u8 ssid_len)
125{
126 struct ieee80211_bss *bss;
127 struct ieee80211_local *local = sdata->local;
128
129 bss = ieee80211_rx_bss_get(local, bssid, freq, ssid, ssid_len);
130 if (bss) {
131 cfg80211_unlink_bss(local->hw.wiphy, (void *)bss);
132 ieee80211_rx_bss_put(local, bss);
133 }
134}
135
329ieee80211_rx_result 136ieee80211_rx_result
330ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, 137ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
331 struct ieee80211_rx_status *rx_status) 138 struct ieee80211_rx_status *rx_status)
@@ -387,7 +194,7 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
387 194
388 bss = ieee80211_bss_info_update(sdata->local, rx_status, 195 bss = ieee80211_bss_info_update(sdata->local, rx_status,
389 mgmt, skb->len, &elems, 196 mgmt, skb->len, &elems,
390 freq, beacon); 197 channel, beacon);
391 if (bss) 198 if (bss)
392 ieee80211_rx_bss_put(sdata->local, bss); 199 ieee80211_rx_bss_put(sdata->local, bss);
393 200
@@ -395,56 +202,94 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
395 return RX_QUEUED; 202 return RX_QUEUED;
396} 203}
397 204
398static void ieee80211_send_nullfunc(struct ieee80211_local *local, 205void ieee80211_scan_failed(struct ieee80211_local *local)
399 struct ieee80211_sub_if_data *sdata,
400 int powersave)
401{ 206{
402 struct sk_buff *skb; 207 if (WARN_ON(!local->scan_req))
403 struct ieee80211_hdr *nullfunc;
404 __le16 fc;
405
406 skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24);
407 if (!skb) {
408 printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc "
409 "frame\n", sdata->dev->name);
410 return; 208 return;
209
210 /* notify cfg80211 about the failed scan */
211 if (local->scan_req != &local->int_scan_req)
212 cfg80211_scan_done(local->scan_req, true);
213
214 local->scan_req = NULL;
215}
216
217/*
218 * inform AP that we will go to sleep so that it will buffer the frames
219 * while we scan
220 */
221static void ieee80211_scan_ps_enable(struct ieee80211_sub_if_data *sdata)
222{
223 struct ieee80211_local *local = sdata->local;
224 bool ps = false;
225
226 /* FIXME: what to do when local->pspolling is true? */
227
228 del_timer_sync(&local->dynamic_ps_timer);
229 cancel_work_sync(&local->dynamic_ps_enable_work);
230
231 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
232 ps = true;
233 local->hw.conf.flags &= ~IEEE80211_CONF_PS;
234 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
235 }
236
237 if (!ps || !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK))
238 /*
239 * If power save was enabled, no need to send a nullfunc
240 * frame because AP knows that we are sleeping. But if the
241 * hardware is creating the nullfunc frame for power save
242 * status (ie. IEEE80211_HW_PS_NULLFUNC_STACK is not
243 * enabled) and power save was enabled, the firmware just
244 * sent a null frame with power save disabled. So we need
245 * to send a new nullfunc frame to inform the AP that we
246 * are again sleeping.
247 */
248 ieee80211_send_nullfunc(local, sdata, 1);
249}
250
251/* inform AP that we are awake again, unless power save is enabled */
252static void ieee80211_scan_ps_disable(struct ieee80211_sub_if_data *sdata)
253{
254 struct ieee80211_local *local = sdata->local;
255
256 if (!local->powersave)
257 ieee80211_send_nullfunc(local, sdata, 0);
258 else {
259 /*
260 * In !IEEE80211_HW_PS_NULLFUNC_STACK case the hardware
261 * will send a nullfunc frame with the powersave bit set
262 * even though the AP already knows that we are sleeping.
263 * This could be avoided by sending a null frame with power
264 * save bit disabled before enabling the power save, but
265 * this doesn't gain anything.
266 *
267 * When IEEE80211_HW_PS_NULLFUNC_STACK is enabled, no need
268 * to send a nullfunc frame because AP already knows that
269 * we are sleeping, let's just enable power save mode in
270 * hardware.
271 */
272 local->hw.conf.flags |= IEEE80211_CONF_PS;
273 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
411 } 274 }
412 skb_reserve(skb, local->hw.extra_tx_headroom);
413
414 nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24);
415 memset(nullfunc, 0, 24);
416 fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC |
417 IEEE80211_FCTL_TODS);
418 if (powersave)
419 fc |= cpu_to_le16(IEEE80211_FCTL_PM);
420 nullfunc->frame_control = fc;
421 memcpy(nullfunc->addr1, sdata->u.sta.bssid, ETH_ALEN);
422 memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN);
423 memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN);
424
425 ieee80211_tx_skb(sdata, skb, 0);
426} 275}
427 276
428void ieee80211_scan_completed(struct ieee80211_hw *hw) 277void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
429{ 278{
430 struct ieee80211_local *local = hw_to_local(hw); 279 struct ieee80211_local *local = hw_to_local(hw);
431 struct ieee80211_sub_if_data *sdata; 280 struct ieee80211_sub_if_data *sdata;
432 union iwreq_data wrqu;
433 281
434 if (WARN_ON(!local->hw_scanning && !local->sw_scanning)) 282 if (WARN_ON(!local->hw_scanning && !local->sw_scanning))
435 return; 283 return;
436 284
437 local->last_scan_completed = jiffies; 285 if (WARN_ON(!local->scan_req))
438 memset(&wrqu, 0, sizeof(wrqu)); 286 return;
439 287
440 /* 288 if (local->scan_req != &local->int_scan_req)
441 * local->scan_sdata could have been NULLed by the interface 289 cfg80211_scan_done(local->scan_req, aborted);
442 * down code in case we were scanning on an interface that is 290 local->scan_req = NULL;
443 * being taken down. 291
444 */ 292 local->last_scan_completed = jiffies;
445 sdata = local->scan_sdata;
446 if (sdata)
447 wireless_send_event(sdata->dev, SIOCGIWSCAN, &wrqu, NULL);
448 293
449 if (local->hw_scanning) { 294 if (local->hw_scanning) {
450 local->hw_scanning = false; 295 local->hw_scanning = false;
@@ -472,34 +317,46 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw)
472 netif_addr_unlock(local->mdev); 317 netif_addr_unlock(local->mdev);
473 netif_tx_unlock_bh(local->mdev); 318 netif_tx_unlock_bh(local->mdev);
474 319
475 rcu_read_lock(); 320 if (local->ops->sw_scan_complete)
476 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 321 local->ops->sw_scan_complete(local_to_hw(local));
322
323 mutex_lock(&local->iflist_mtx);
324 list_for_each_entry(sdata, &local->interfaces, list) {
325 if (!netif_running(sdata->dev))
326 continue;
327
477 /* Tell AP we're back */ 328 /* Tell AP we're back */
478 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 329 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
479 if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { 330 if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) {
480 ieee80211_send_nullfunc(local, sdata, 0); 331 ieee80211_scan_ps_disable(sdata);
481 netif_tx_wake_all_queues(sdata->dev); 332 netif_tx_wake_all_queues(sdata->dev);
482 } 333 }
483 } else 334 } else
484 netif_tx_wake_all_queues(sdata->dev); 335 netif_tx_wake_all_queues(sdata->dev);
336
337 /* re-enable beaconing */
338 if (sdata->vif.type == NL80211_IFTYPE_AP ||
339 sdata->vif.type == NL80211_IFTYPE_ADHOC ||
340 sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
341 ieee80211_if_config(sdata,
342 IEEE80211_IFCC_BEACON_ENABLED);
485 } 343 }
486 rcu_read_unlock(); 344 mutex_unlock(&local->iflist_mtx);
487 345
488 done: 346 done:
489 ieee80211_mlme_notify_scan_completed(local); 347 ieee80211_mlme_notify_scan_completed(local);
348 ieee80211_ibss_notify_scan_completed(local);
490 ieee80211_mesh_notify_scan_completed(local); 349 ieee80211_mesh_notify_scan_completed(local);
491} 350}
492EXPORT_SYMBOL(ieee80211_scan_completed); 351EXPORT_SYMBOL(ieee80211_scan_completed);
493 352
494
495void ieee80211_scan_work(struct work_struct *work) 353void ieee80211_scan_work(struct work_struct *work)
496{ 354{
497 struct ieee80211_local *local = 355 struct ieee80211_local *local =
498 container_of(work, struct ieee80211_local, scan_work.work); 356 container_of(work, struct ieee80211_local, scan_work.work);
499 struct ieee80211_sub_if_data *sdata = local->scan_sdata; 357 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
500 struct ieee80211_supported_band *sband;
501 struct ieee80211_channel *chan; 358 struct ieee80211_channel *chan;
502 int skip; 359 int skip, i;
503 unsigned long next_delay = 0; 360 unsigned long next_delay = 0;
504 361
505 /* 362 /*
@@ -510,33 +367,13 @@ void ieee80211_scan_work(struct work_struct *work)
510 367
511 switch (local->scan_state) { 368 switch (local->scan_state) {
512 case SCAN_SET_CHANNEL: 369 case SCAN_SET_CHANNEL:
513 /*
514 * Get current scan band. scan_band may be IEEE80211_NUM_BANDS
515 * after we successfully scanned the last channel of the last
516 * band (and the last band is supported by the hw)
517 */
518 if (local->scan_band < IEEE80211_NUM_BANDS)
519 sband = local->hw.wiphy->bands[local->scan_band];
520 else
521 sband = NULL;
522
523 /*
524 * If we are at an unsupported band and have more bands
525 * left to scan, advance to the next supported one.
526 */
527 while (!sband && local->scan_band < IEEE80211_NUM_BANDS - 1) {
528 local->scan_band++;
529 sband = local->hw.wiphy->bands[local->scan_band];
530 local->scan_channel_idx = 0;
531 }
532
533 /* if no more bands/channels left, complete scan */ 370 /* if no more bands/channels left, complete scan */
534 if (!sband || local->scan_channel_idx >= sband->n_channels) { 371 if (local->scan_channel_idx >= local->scan_req->n_channels) {
535 ieee80211_scan_completed(local_to_hw(local)); 372 ieee80211_scan_completed(local_to_hw(local), false);
536 return; 373 return;
537 } 374 }
538 skip = 0; 375 skip = 0;
539 chan = &sband->channels[local->scan_channel_idx]; 376 chan = local->scan_req->channels[local->scan_channel_idx];
540 377
541 if (chan->flags & IEEE80211_CHAN_DISABLED || 378 if (chan->flags & IEEE80211_CHAN_DISABLED ||
542 (sdata->vif.type == NL80211_IFTYPE_ADHOC && 379 (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
@@ -552,15 +389,6 @@ void ieee80211_scan_work(struct work_struct *work)
552 389
553 /* advance state machine to next channel/band */ 390 /* advance state machine to next channel/band */
554 local->scan_channel_idx++; 391 local->scan_channel_idx++;
555 if (local->scan_channel_idx >= sband->n_channels) {
556 /*
557 * scan_band may end up == IEEE80211_NUM_BANDS, but
558 * we'll catch that case above and complete the scan
559 * if that is the case.
560 */
561 local->scan_band++;
562 local->scan_channel_idx = 0;
563 }
564 392
565 if (skip) 393 if (skip)
566 break; 394 break;
@@ -573,10 +401,15 @@ void ieee80211_scan_work(struct work_struct *work)
573 next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; 401 next_delay = IEEE80211_PASSIVE_CHANNEL_TIME;
574 local->scan_state = SCAN_SET_CHANNEL; 402 local->scan_state = SCAN_SET_CHANNEL;
575 403
576 if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN) 404 if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN ||
405 !local->scan_req->n_ssids)
577 break; 406 break;
578 ieee80211_send_probe_req(sdata, NULL, local->scan_ssid, 407 for (i = 0; i < local->scan_req->n_ssids; i++)
579 local->scan_ssid_len); 408 ieee80211_send_probe_req(
409 sdata, NULL,
410 local->scan_req->ssids[i].ssid,
411 local->scan_req->ssids[i].ssid_len,
412 local->scan_req->ie, local->scan_req->ie_len);
580 next_delay = IEEE80211_CHANNEL_TIME; 413 next_delay = IEEE80211_CHANNEL_TIME;
581 break; 414 break;
582 } 415 }
@@ -587,14 +420,19 @@ void ieee80211_scan_work(struct work_struct *work)
587 420
588 421
589int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, 422int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
590 u8 *ssid, size_t ssid_len) 423 struct cfg80211_scan_request *req)
591{ 424{
592 struct ieee80211_local *local = scan_sdata->local; 425 struct ieee80211_local *local = scan_sdata->local;
593 struct ieee80211_sub_if_data *sdata; 426 struct ieee80211_sub_if_data *sdata;
594 427
595 if (ssid_len > IEEE80211_MAX_SSID_LEN) 428 if (!req)
596 return -EINVAL; 429 return -EINVAL;
597 430
431 if (local->scan_req && local->scan_req != req)
432 return -EBUSY;
433
434 local->scan_req = req;
435
598 /* MLME-SCAN.request (page 118) page 144 (11.1.3.1) 436 /* MLME-SCAN.request (page 118) page 144 (11.1.3.1)
599 * BSSType: INFRASTRUCTURE, INDEPENDENT, ANY_BSS 437 * BSSType: INFRASTRUCTURE, INDEPENDENT, ANY_BSS
600 * BSSID: MACAddress 438 * BSSID: MACAddress
@@ -622,7 +460,7 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
622 int rc; 460 int rc;
623 461
624 local->hw_scanning = true; 462 local->hw_scanning = true;
625 rc = local->ops->hw_scan(local_to_hw(local), ssid, ssid_len); 463 rc = local->ops->hw_scan(local_to_hw(local), req);
626 if (rc) { 464 if (rc) {
627 local->hw_scanning = false; 465 local->hw_scanning = false;
628 return rc; 466 return rc;
@@ -631,29 +469,49 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
631 return 0; 469 return 0;
632 } 470 }
633 471
472 /*
473 * Hardware/driver doesn't support hw_scan, so use software
474 * scanning instead. First send a nullfunc frame with power save
475 * bit on so that AP will buffer the frames for us while we are not
476 * listening, then send probe requests to each channel and wait for
477 * the responses. After all channels are scanned, tune back to the
478 * original channel and send a nullfunc frame with power save bit
479 * off to trigger the AP to send us all the buffered frames.
480 *
481 * Note that while local->sw_scanning is true everything else but
482 * nullfunc frames and probe requests will be dropped in
483 * ieee80211_tx_h_check_assoc().
484 */
634 local->sw_scanning = true; 485 local->sw_scanning = true;
486 if (local->ops->sw_scan_start)
487 local->ops->sw_scan_start(local_to_hw(local));
488
489 mutex_lock(&local->iflist_mtx);
490 list_for_each_entry(sdata, &local->interfaces, list) {
491 if (!netif_running(sdata->dev))
492 continue;
493
494 /* disable beaconing */
495 if (sdata->vif.type == NL80211_IFTYPE_AP ||
496 sdata->vif.type == NL80211_IFTYPE_ADHOC ||
497 sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
498 ieee80211_if_config(sdata,
499 IEEE80211_IFCC_BEACON_ENABLED);
635 500
636 rcu_read_lock();
637 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
638 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 501 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
639 if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { 502 if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) {
640 netif_tx_stop_all_queues(sdata->dev); 503 netif_tx_stop_all_queues(sdata->dev);
641 ieee80211_send_nullfunc(local, sdata, 1); 504 ieee80211_scan_ps_enable(sdata);
642 } 505 }
643 } else 506 } else
644 netif_tx_stop_all_queues(sdata->dev); 507 netif_tx_stop_all_queues(sdata->dev);
645 } 508 }
646 rcu_read_unlock(); 509 mutex_unlock(&local->iflist_mtx);
647 510
648 if (ssid) {
649 local->scan_ssid_len = ssid_len;
650 memcpy(local->scan_ssid, ssid, ssid_len);
651 } else
652 local->scan_ssid_len = 0;
653 local->scan_state = SCAN_SET_CHANNEL; 511 local->scan_state = SCAN_SET_CHANNEL;
654 local->scan_channel_idx = 0; 512 local->scan_channel_idx = 0;
655 local->scan_band = IEEE80211_BAND_2GHZ;
656 local->scan_sdata = scan_sdata; 513 local->scan_sdata = scan_sdata;
514 local->scan_req = req;
657 515
658 netif_addr_lock_bh(local->mdev); 516 netif_addr_lock_bh(local->mdev);
659 local->filter_flags |= FIF_BCN_PRBRESP_PROMISC; 517 local->filter_flags |= FIF_BCN_PRBRESP_PROMISC;
@@ -673,13 +531,21 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata,
673 531
674 532
675int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, 533int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
676 u8 *ssid, size_t ssid_len) 534 struct cfg80211_scan_request *req)
677{ 535{
678 struct ieee80211_local *local = sdata->local; 536 struct ieee80211_local *local = sdata->local;
679 struct ieee80211_if_sta *ifsta; 537 struct ieee80211_if_managed *ifmgd;
538
539 if (!req)
540 return -EINVAL;
541
542 if (local->scan_req && local->scan_req != req)
543 return -EBUSY;
544
545 local->scan_req = req;
680 546
681 if (sdata->vif.type != NL80211_IFTYPE_STATION) 547 if (sdata->vif.type != NL80211_IFTYPE_STATION)
682 return ieee80211_start_scan(sdata, ssid, ssid_len); 548 return ieee80211_start_scan(sdata, req);
683 549
684 /* 550 /*
685 * STA has a state machine that might need to defer scanning 551 * STA has a state machine that might need to defer scanning
@@ -693,242 +559,9 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
693 return -EBUSY; 559 return -EBUSY;
694 } 560 }
695 561
696 ifsta = &sdata->u.sta; 562 ifmgd = &sdata->u.mgd;
697 563 set_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request);
698 ifsta->scan_ssid_len = ssid_len; 564 queue_work(local->hw.workqueue, &ifmgd->work);
699 if (ssid_len)
700 memcpy(ifsta->scan_ssid, ssid, ssid_len);
701 set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request);
702 queue_work(local->hw.workqueue, &ifsta->work);
703 565
704 return 0; 566 return 0;
705} 567}
706
707
708static void ieee80211_scan_add_ies(struct iw_request_info *info,
709 struct ieee80211_bss *bss,
710 char **current_ev, char *end_buf)
711{
712 u8 *pos, *end, *next;
713 struct iw_event iwe;
714
715 if (bss == NULL || bss->ies == NULL)
716 return;
717
718 /*
719 * If needed, fragment the IEs buffer (at IE boundaries) into short
720 * enough fragments to fit into IW_GENERIC_IE_MAX octet messages.
721 */
722 pos = bss->ies;
723 end = pos + bss->ies_len;
724
725 while (end - pos > IW_GENERIC_IE_MAX) {
726 next = pos + 2 + pos[1];
727 while (next + 2 + next[1] - pos < IW_GENERIC_IE_MAX)
728 next = next + 2 + next[1];
729
730 memset(&iwe, 0, sizeof(iwe));
731 iwe.cmd = IWEVGENIE;
732 iwe.u.data.length = next - pos;
733 *current_ev = iwe_stream_add_point(info, *current_ev,
734 end_buf, &iwe, pos);
735
736 pos = next;
737 }
738
739 if (end > pos) {
740 memset(&iwe, 0, sizeof(iwe));
741 iwe.cmd = IWEVGENIE;
742 iwe.u.data.length = end - pos;
743 *current_ev = iwe_stream_add_point(info, *current_ev,
744 end_buf, &iwe, pos);
745 }
746}
747
748
749static char *
750ieee80211_scan_result(struct ieee80211_local *local,
751 struct iw_request_info *info,
752 struct ieee80211_bss *bss,
753 char *current_ev, char *end_buf)
754{
755 struct iw_event iwe;
756 char *buf;
757
758 if (time_after(jiffies,
759 bss->last_update + IEEE80211_SCAN_RESULT_EXPIRE))
760 return current_ev;
761
762 memset(&iwe, 0, sizeof(iwe));
763 iwe.cmd = SIOCGIWAP;
764 iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
765 memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN);
766 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
767 IW_EV_ADDR_LEN);
768
769 memset(&iwe, 0, sizeof(iwe));
770 iwe.cmd = SIOCGIWESSID;
771 if (bss_mesh_cfg(bss)) {
772 iwe.u.data.length = bss_mesh_id_len(bss);
773 iwe.u.data.flags = 1;
774 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
775 &iwe, bss_mesh_id(bss));
776 } else {
777 iwe.u.data.length = bss->ssid_len;
778 iwe.u.data.flags = 1;
779 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
780 &iwe, bss->ssid);
781 }
782
783 if (bss->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)
784 || bss_mesh_cfg(bss)) {
785 memset(&iwe, 0, sizeof(iwe));
786 iwe.cmd = SIOCGIWMODE;
787 if (bss_mesh_cfg(bss))
788 iwe.u.mode = IW_MODE_MESH;
789 else if (bss->capability & WLAN_CAPABILITY_ESS)
790 iwe.u.mode = IW_MODE_MASTER;
791 else
792 iwe.u.mode = IW_MODE_ADHOC;
793 current_ev = iwe_stream_add_event(info, current_ev, end_buf,
794 &iwe, IW_EV_UINT_LEN);
795 }
796
797 memset(&iwe, 0, sizeof(iwe));
798 iwe.cmd = SIOCGIWFREQ;
799 iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq);
800 iwe.u.freq.e = 0;
801 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
802 IW_EV_FREQ_LEN);
803
804 memset(&iwe, 0, sizeof(iwe));
805 iwe.cmd = SIOCGIWFREQ;
806 iwe.u.freq.m = bss->freq;
807 iwe.u.freq.e = 6;
808 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
809 IW_EV_FREQ_LEN);
810 memset(&iwe, 0, sizeof(iwe));
811 iwe.cmd = IWEVQUAL;
812 iwe.u.qual.qual = bss->qual;
813 iwe.u.qual.level = bss->signal;
814 iwe.u.qual.noise = bss->noise;
815 iwe.u.qual.updated = local->wstats_flags;
816 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
817 IW_EV_QUAL_LEN);
818
819 memset(&iwe, 0, sizeof(iwe));
820 iwe.cmd = SIOCGIWENCODE;
821 if (bss->capability & WLAN_CAPABILITY_PRIVACY)
822 iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
823 else
824 iwe.u.data.flags = IW_ENCODE_DISABLED;
825 iwe.u.data.length = 0;
826 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
827 &iwe, "");
828
829 ieee80211_scan_add_ies(info, bss, &current_ev, end_buf);
830
831 if (bss->supp_rates_len > 0) {
832 /* display all supported rates in readable format */
833 char *p = current_ev + iwe_stream_lcp_len(info);
834 int i;
835
836 memset(&iwe, 0, sizeof(iwe));
837 iwe.cmd = SIOCGIWRATE;
838 /* Those two flags are ignored... */
839 iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
840
841 for (i = 0; i < bss->supp_rates_len; i++) {
842 iwe.u.bitrate.value = ((bss->supp_rates[i] &
843 0x7f) * 500000);
844 p = iwe_stream_add_value(info, current_ev, p,
845 end_buf, &iwe, IW_EV_PARAM_LEN);
846 }
847 current_ev = p;
848 }
849
850 buf = kmalloc(30, GFP_ATOMIC);
851 if (buf) {
852 memset(&iwe, 0, sizeof(iwe));
853 iwe.cmd = IWEVCUSTOM;
854 sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp));
855 iwe.u.data.length = strlen(buf);
856 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
857 &iwe, buf);
858 memset(&iwe, 0, sizeof(iwe));
859 iwe.cmd = IWEVCUSTOM;
860 sprintf(buf, " Last beacon: %dms ago",
861 jiffies_to_msecs(jiffies - bss->last_update));
862 iwe.u.data.length = strlen(buf);
863 current_ev = iwe_stream_add_point(info, current_ev,
864 end_buf, &iwe, buf);
865 kfree(buf);
866 }
867
868 if (bss_mesh_cfg(bss)) {
869 u8 *cfg = bss_mesh_cfg(bss);
870 buf = kmalloc(50, GFP_ATOMIC);
871 if (buf) {
872 memset(&iwe, 0, sizeof(iwe));
873 iwe.cmd = IWEVCUSTOM;
874 sprintf(buf, "Mesh network (version %d)", cfg[0]);
875 iwe.u.data.length = strlen(buf);
876 current_ev = iwe_stream_add_point(info, current_ev,
877 end_buf,
878 &iwe, buf);
879 sprintf(buf, "Path Selection Protocol ID: "
880 "0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3],
881 cfg[4]);
882 iwe.u.data.length = strlen(buf);
883 current_ev = iwe_stream_add_point(info, current_ev,
884 end_buf,
885 &iwe, buf);
886 sprintf(buf, "Path Selection Metric ID: "
887 "0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7],
888 cfg[8]);
889 iwe.u.data.length = strlen(buf);
890 current_ev = iwe_stream_add_point(info, current_ev,
891 end_buf,
892 &iwe, buf);
893 sprintf(buf, "Congestion Control Mode ID: "
894 "0x%02X%02X%02X%02X", cfg[9], cfg[10],
895 cfg[11], cfg[12]);
896 iwe.u.data.length = strlen(buf);
897 current_ev = iwe_stream_add_point(info, current_ev,
898 end_buf,
899 &iwe, buf);
900 sprintf(buf, "Channel Precedence: "
901 "0x%02X%02X%02X%02X", cfg[13], cfg[14],
902 cfg[15], cfg[16]);
903 iwe.u.data.length = strlen(buf);
904 current_ev = iwe_stream_add_point(info, current_ev,
905 end_buf,
906 &iwe, buf);
907 kfree(buf);
908 }
909 }
910
911 return current_ev;
912}
913
914
915int ieee80211_scan_results(struct ieee80211_local *local,
916 struct iw_request_info *info,
917 char *buf, size_t len)
918{
919 char *current_ev = buf;
920 char *end_buf = buf + len;
921 struct ieee80211_bss *bss;
922
923 spin_lock_bh(&local->bss_lock);
924 list_for_each_entry(bss, &local->bss_list, list) {
925 if (buf + len - current_ev <= IW_EV_ADDR_LEN) {
926 spin_unlock_bh(&local->bss_lock);
927 return -E2BIG;
928 }
929 current_ev = ieee80211_scan_result(local, info, bss,
930 current_ev, end_buf);
931 }
932 spin_unlock_bh(&local->bss_lock);
933 return current_ev - buf;
934}
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index f72bad636d8e..5f7a2624ed74 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -65,7 +65,7 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da
65 IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED; 65 IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED;
66 msr_report->u.action.u.measurement.msr_elem.type = request_ie->type; 66 msr_report->u.action.u.measurement.msr_elem.type = request_ie->type;
67 67
68 ieee80211_tx_skb(sdata, skb, 0); 68 ieee80211_tx_skb(sdata, skb, 1);
69} 69}
70 70
71void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, 71void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -84,3 +84,104 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
84 mgmt->sa, mgmt->bssid, 84 mgmt->sa, mgmt->bssid,
85 mgmt->u.action.u.measurement.dialog_token); 85 mgmt->u.action.u.measurement.dialog_token);
86} 86}
87
88void ieee80211_chswitch_work(struct work_struct *work)
89{
90 struct ieee80211_sub_if_data *sdata =
91 container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work);
92 struct ieee80211_bss *bss;
93 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
94
95 if (!netif_running(sdata->dev))
96 return;
97
98 bss = ieee80211_rx_bss_get(sdata->local, ifmgd->bssid,
99 sdata->local->hw.conf.channel->center_freq,
100 ifmgd->ssid, ifmgd->ssid_len);
101 if (!bss)
102 goto exit;
103
104 sdata->local->oper_channel = sdata->local->csa_channel;
105 /* XXX: shouldn't really modify cfg80211-owned data! */
106 if (!ieee80211_hw_config(sdata->local, IEEE80211_CONF_CHANGE_CHANNEL))
107 bss->cbss.channel = sdata->local->oper_channel;
108
109 ieee80211_rx_bss_put(sdata->local, bss);
110exit:
111 ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED;
112 ieee80211_wake_queues_by_reason(&sdata->local->hw,
113 IEEE80211_QUEUE_STOP_REASON_CSA);
114}
115
116void ieee80211_chswitch_timer(unsigned long data)
117{
118 struct ieee80211_sub_if_data *sdata =
119 (struct ieee80211_sub_if_data *) data;
120 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
121
122 queue_work(sdata->local->hw.workqueue, &ifmgd->chswitch_work);
123}
124
125void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata,
126 struct ieee80211_channel_sw_ie *sw_elem,
127 struct ieee80211_bss *bss)
128{
129 struct ieee80211_channel *new_ch;
130 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
131 int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num);
132
133 /* FIXME: Handle ADHOC later */
134 if (sdata->vif.type != NL80211_IFTYPE_STATION)
135 return;
136
137 if (ifmgd->state != IEEE80211_STA_MLME_ASSOCIATED)
138 return;
139
140 if (sdata->local->sw_scanning || sdata->local->hw_scanning)
141 return;
142
143 /* Disregard subsequent beacons if we are already running a timer
144 processing a CSA */
145
146 if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED)
147 return;
148
149 new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq);
150 if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED)
151 return;
152
153 sdata->local->csa_channel = new_ch;
154
155 if (sw_elem->count <= 1) {
156 queue_work(sdata->local->hw.workqueue, &ifmgd->chswitch_work);
157 } else {
158 ieee80211_stop_queues_by_reason(&sdata->local->hw,
159 IEEE80211_QUEUE_STOP_REASON_CSA);
160 ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
161 mod_timer(&ifmgd->chswitch_timer,
162 jiffies +
163 msecs_to_jiffies(sw_elem->count *
164 bss->cbss.beacon_interval));
165 }
166}
167
168void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
169 u16 capab_info, u8 *pwr_constr_elem,
170 u8 pwr_constr_elem_len)
171{
172 struct ieee80211_conf *conf = &sdata->local->hw.conf;
173
174 if (!(capab_info & WLAN_CAPABILITY_SPECTRUM_MGMT))
175 return;
176
177 /* Power constraint IE length should be 1 octet */
178 if (pwr_constr_elem_len != 1)
179 return;
180
181 if ((*pwr_constr_elem <= conf->channel->max_power) &&
182 (*pwr_constr_elem != sdata->local->power_constr_level)) {
183 sdata->local->power_constr_level = *pwr_constr_elem;
184 ieee80211_hw_config(sdata->local, 0);
185 }
186}
187
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 10c5539c20ab..c5f14e6bbde2 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -194,12 +194,47 @@ void sta_info_destroy(struct sta_info *sta)
194 dev_kfree_skb_any(skb); 194 dev_kfree_skb_any(skb);
195 195
196 for (i = 0; i < STA_TID_NUM; i++) { 196 for (i = 0; i < STA_TID_NUM; i++) {
197 struct tid_ampdu_rx *tid_rx;
198 struct tid_ampdu_tx *tid_tx;
199
197 spin_lock_bh(&sta->lock); 200 spin_lock_bh(&sta->lock);
198 if (sta->ampdu_mlme.tid_rx[i]) 201 tid_rx = sta->ampdu_mlme.tid_rx[i];
199 del_timer_sync(&sta->ampdu_mlme.tid_rx[i]->session_timer); 202 /* Make sure timer won't free the tid_rx struct, see below */
200 if (sta->ampdu_mlme.tid_tx[i]) 203 if (tid_rx)
201 del_timer_sync(&sta->ampdu_mlme.tid_tx[i]->addba_resp_timer); 204 tid_rx->shutdown = true;
205
202 spin_unlock_bh(&sta->lock); 206 spin_unlock_bh(&sta->lock);
207
208 /*
209 * Outside spinlock - shutdown is true now so that the timer
210 * won't free tid_rx, we have to do that now. Can't let the
211 * timer do it because we have to sync the timer outside the
212 * lock that it takes itself.
213 */
214 if (tid_rx) {
215 del_timer_sync(&tid_rx->session_timer);
216 kfree(tid_rx);
217 }
218
219 /*
220 * No need to do such complications for TX agg sessions, the
221 * path leading to freeing the tid_tx struct goes via a call
222 * from the driver, and thus needs to look up the sta struct
223 * again, which cannot be found when we get here. Hence, we
224 * just need to delete the timer and free the aggregation
225 * info; we won't be telling the peer about it then but that
226 * doesn't matter if we're not talking to it again anyway.
227 */
228 tid_tx = sta->ampdu_mlme.tid_tx[i];
229 if (tid_tx) {
230 del_timer_sync(&tid_tx->addba_resp_timer);
231 /*
232 * STA removed while aggregation session being
233 * started? Bit odd, but purge frames anyway.
234 */
235 skb_queue_purge(&tid_tx->pending);
236 kfree(tid_tx);
237 }
203 } 238 }
204 239
205 __sta_info_free(local, sta); 240 __sta_info_free(local, sta);
@@ -246,8 +281,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
246 * enable session_timer's data differentiation. refer to 281 * enable session_timer's data differentiation. refer to
247 * sta_rx_agg_session_timer_expired for useage */ 282 * sta_rx_agg_session_timer_expired for useage */
248 sta->timer_to_tid[i] = i; 283 sta->timer_to_tid[i] = i;
249 /* tid to tx queue: initialize according to HW (0 is valid) */
250 sta->tid_to_tx_q[i] = ieee80211_num_queues(&local->hw);
251 /* rx */ 284 /* rx */
252 sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE; 285 sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE;
253 sta->ampdu_mlme.tid_rx[i] = NULL; 286 sta->ampdu_mlme.tid_rx[i] = NULL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index e49a5b99cf10..5534d489f506 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -34,6 +34,9 @@
34 * @WLAN_STA_CLEAR_PS_FILT: Clear PS filter in hardware (using the 34 * @WLAN_STA_CLEAR_PS_FILT: Clear PS filter in hardware (using the
35 * IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next 35 * IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next
36 * frame to this station is transmitted. 36 * frame to this station is transmitted.
37 * @WLAN_STA_MFP: Management frame protection is used with this STA.
38 * @WLAN_STA_SUSPEND: Set/cleared during a suspend/resume cycle.
39 * Used to deny ADDBA requests (both TX and RX).
37 */ 40 */
38enum ieee80211_sta_info_flags { 41enum ieee80211_sta_info_flags {
39 WLAN_STA_AUTH = 1<<0, 42 WLAN_STA_AUTH = 1<<0,
@@ -46,6 +49,8 @@ enum ieee80211_sta_info_flags {
46 WLAN_STA_WDS = 1<<7, 49 WLAN_STA_WDS = 1<<7,
47 WLAN_STA_PSPOLL = 1<<8, 50 WLAN_STA_PSPOLL = 1<<8,
48 WLAN_STA_CLEAR_PS_FILT = 1<<9, 51 WLAN_STA_CLEAR_PS_FILT = 1<<9,
52 WLAN_STA_MFP = 1<<10,
53 WLAN_STA_SUSPEND = 1<<11
49}; 54};
50 55
51#define STA_TID_NUM 16 56#define STA_TID_NUM 16
@@ -63,17 +68,18 @@ enum ieee80211_sta_info_flags {
63#define HT_AGG_STATE_OPERATIONAL (HT_ADDBA_REQUESTED_MSK | \ 68#define HT_AGG_STATE_OPERATIONAL (HT_ADDBA_REQUESTED_MSK | \
64 HT_ADDBA_DRV_READY_MSK | \ 69 HT_ADDBA_DRV_READY_MSK | \
65 HT_ADDBA_RECEIVED_MSK) 70 HT_ADDBA_RECEIVED_MSK)
66#define HT_AGG_STATE_DEBUGFS_CTL BIT(7)
67 71
68/** 72/**
69 * struct tid_ampdu_tx - TID aggregation information (Tx). 73 * struct tid_ampdu_tx - TID aggregation information (Tx).
70 * 74 *
71 * @addba_resp_timer: timer for peer's response to addba request 75 * @addba_resp_timer: timer for peer's response to addba request
76 * @pending: pending frames queue -- use sta's spinlock to protect
72 * @ssn: Starting Sequence Number expected to be aggregated. 77 * @ssn: Starting Sequence Number expected to be aggregated.
73 * @dialog_token: dialog token for aggregation session 78 * @dialog_token: dialog token for aggregation session
74 */ 79 */
75struct tid_ampdu_tx { 80struct tid_ampdu_tx {
76 struct timer_list addba_resp_timer; 81 struct timer_list addba_resp_timer;
82 struct sk_buff_head pending;
77 u16 ssn; 83 u16 ssn;
78 u8 dialog_token; 84 u8 dialog_token;
79}; 85};
@@ -87,8 +93,9 @@ struct tid_ampdu_tx {
87 * @stored_mpdu_num: number of MPDUs in reordering buffer 93 * @stored_mpdu_num: number of MPDUs in reordering buffer
88 * @ssn: Starting Sequence Number expected to be aggregated. 94 * @ssn: Starting Sequence Number expected to be aggregated.
89 * @buf_size: buffer size for incoming A-MPDUs 95 * @buf_size: buffer size for incoming A-MPDUs
90 * @timeout: reset timer value. 96 * @timeout: reset timer value (in TUs).
91 * @dialog_token: dialog token for aggregation session 97 * @dialog_token: dialog token for aggregation session
98 * @shutdown: this session is being shut down due to STA removal
92 */ 99 */
93struct tid_ampdu_rx { 100struct tid_ampdu_rx {
94 struct sk_buff **reorder_buf; 101 struct sk_buff **reorder_buf;
@@ -99,6 +106,7 @@ struct tid_ampdu_rx {
99 u16 buf_size; 106 u16 buf_size;
100 u16 timeout; 107 u16 timeout;
101 u8 dialog_token; 108 u8 dialog_token;
109 bool shutdown;
102}; 110};
103 111
104/** 112/**
@@ -198,7 +206,6 @@ struct sta_ampdu_mlme {
198 * @tid_seq: per-TID sequence numbers for sending to this STA 206 * @tid_seq: per-TID sequence numbers for sending to this STA
199 * @ampdu_mlme: A-MPDU state machine state 207 * @ampdu_mlme: A-MPDU state machine state
200 * @timer_to_tid: identity mapping to ID timers 208 * @timer_to_tid: identity mapping to ID timers
201 * @tid_to_tx_q: map tid to tx queue
202 * @llid: Local link ID 209 * @llid: Local link ID
203 * @plid: Peer link ID 210 * @plid: Peer link ID
204 * @reason: Cancel reason on PLINK_HOLDING state 211 * @reason: Cancel reason on PLINK_HOLDING state
@@ -273,7 +280,6 @@ struct sta_info {
273 */ 280 */
274 struct sta_ampdu_mlme ampdu_mlme; 281 struct sta_ampdu_mlme ampdu_mlme;
275 u8 timer_to_tid[STA_TID_NUM]; 282 u8 timer_to_tid[STA_TID_NUM];
276 u8 tid_to_tx_q[STA_TID_NUM];
277 283
278#ifdef CONFIG_MAC80211_MESH 284#ifdef CONFIG_MAC80211_MESH
279 /* 285 /*
@@ -382,8 +388,6 @@ static inline u32 get_sta_flags(struct sta_info *sta)
382} 388}
383 389
384 390
385/* Maximum number of concurrently registered stations */
386#define MAX_STA_COUNT 2007
387 391
388#define STA_HASH_SIZE 256 392#define STA_HASH_SIZE 256
389#define STA_HASH(sta) (sta[5]) 393#define STA_HASH(sta) (sta[5])
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 94de5033f0b6..3fb04a86444d 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -34,7 +34,7 @@
34 34
35#define IEEE80211_TX_OK 0 35#define IEEE80211_TX_OK 0
36#define IEEE80211_TX_AGAIN 1 36#define IEEE80211_TX_AGAIN 1
37#define IEEE80211_TX_FRAG_AGAIN 2 37#define IEEE80211_TX_PENDING 2
38 38
39/* misc utils */ 39/* misc utils */
40 40
@@ -192,7 +192,19 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
192 return TX_CONTINUE; 192 return TX_CONTINUE;
193 193
194 if (unlikely(tx->local->sw_scanning) && 194 if (unlikely(tx->local->sw_scanning) &&
195 !ieee80211_is_probe_req(hdr->frame_control)) 195 !ieee80211_is_probe_req(hdr->frame_control) &&
196 !ieee80211_is_nullfunc(hdr->frame_control))
197 /*
198 * When software scanning only nullfunc frames (to notify
199 * the sleep state to the AP) and probe requests (for the
200 * active scan) are allowed, all other frames should not be
201 * sent and we should not get here, but if we do
202 * nonetheless, drop them to avoid sending them
203 * off-channel. See the link below and
204 * ieee80211_start_scan() for more.
205 *
206 * http://article.gmane.org/gmane.linux.kernel.wireless.general/30089
207 */
196 return TX_DROP; 208 return TX_DROP;
197 209
198 if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT) 210 if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
@@ -330,6 +342,22 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx)
330 return TX_CONTINUE; 342 return TX_CONTINUE;
331} 343}
332 344
345static int ieee80211_use_mfp(__le16 fc, struct sta_info *sta,
346 struct sk_buff *skb)
347{
348 if (!ieee80211_is_mgmt(fc))
349 return 0;
350
351 if (sta == NULL || !test_sta_flags(sta, WLAN_STA_MFP))
352 return 0;
353
354 if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *)
355 skb->data))
356 return 0;
357
358 return 1;
359}
360
333static ieee80211_tx_result 361static ieee80211_tx_result
334ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) 362ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
335{ 363{
@@ -409,11 +437,17 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
409 tx->key = NULL; 437 tx->key = NULL;
410 else if (tx->sta && (key = rcu_dereference(tx->sta->key))) 438 else if (tx->sta && (key = rcu_dereference(tx->sta->key)))
411 tx->key = key; 439 tx->key = key;
440 else if (ieee80211_is_mgmt(hdr->frame_control) &&
441 (key = rcu_dereference(tx->sdata->default_mgmt_key)))
442 tx->key = key;
412 else if ((key = rcu_dereference(tx->sdata->default_key))) 443 else if ((key = rcu_dereference(tx->sdata->default_key)))
413 tx->key = key; 444 tx->key = key;
414 else if (tx->sdata->drop_unencrypted && 445 else if (tx->sdata->drop_unencrypted &&
415 (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) && 446 (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) &&
416 !(info->flags & IEEE80211_TX_CTL_INJECTED)) { 447 !(info->flags & IEEE80211_TX_CTL_INJECTED) &&
448 (!ieee80211_is_robust_mgmt_frame(hdr) ||
449 (ieee80211_is_action(hdr->frame_control) &&
450 tx->sta && test_sta_flags(tx->sta, WLAN_STA_MFP)))) {
417 I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); 451 I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted);
418 return TX_DROP; 452 return TX_DROP;
419 } else 453 } else
@@ -428,10 +462,19 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
428 if (ieee80211_is_auth(hdr->frame_control)) 462 if (ieee80211_is_auth(hdr->frame_control))
429 break; 463 break;
430 case ALG_TKIP: 464 case ALG_TKIP:
431 case ALG_CCMP:
432 if (!ieee80211_is_data_present(hdr->frame_control)) 465 if (!ieee80211_is_data_present(hdr->frame_control))
433 tx->key = NULL; 466 tx->key = NULL;
434 break; 467 break;
468 case ALG_CCMP:
469 if (!ieee80211_is_data_present(hdr->frame_control) &&
470 !ieee80211_use_mfp(hdr->frame_control, tx->sta,
471 tx->skb))
472 tx->key = NULL;
473 break;
474 case ALG_AES_CMAC:
475 if (!ieee80211_is_mgmt(hdr->frame_control))
476 tx->key = NULL;
477 break;
435 } 478 }
436 } 479 }
437 480
@@ -658,17 +701,62 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
658 return TX_CONTINUE; 701 return TX_CONTINUE;
659} 702}
660 703
704static int ieee80211_fragment(struct ieee80211_local *local,
705 struct sk_buff *skb, int hdrlen,
706 int frag_threshold)
707{
708 struct sk_buff *tail = skb, *tmp;
709 int per_fragm = frag_threshold - hdrlen - FCS_LEN;
710 int pos = hdrlen + per_fragm;
711 int rem = skb->len - hdrlen - per_fragm;
712
713 if (WARN_ON(rem < 0))
714 return -EINVAL;
715
716 while (rem) {
717 int fraglen = per_fragm;
718
719 if (fraglen > rem)
720 fraglen = rem;
721 rem -= fraglen;
722 tmp = dev_alloc_skb(local->tx_headroom +
723 frag_threshold +
724 IEEE80211_ENCRYPT_HEADROOM +
725 IEEE80211_ENCRYPT_TAILROOM);
726 if (!tmp)
727 return -ENOMEM;
728 tail->next = tmp;
729 tail = tmp;
730 skb_reserve(tmp, local->tx_headroom +
731 IEEE80211_ENCRYPT_HEADROOM);
732 /* copy control information */
733 memcpy(tmp->cb, skb->cb, sizeof(tmp->cb));
734 skb_copy_queue_mapping(tmp, skb);
735 tmp->priority = skb->priority;
736 tmp->do_not_encrypt = skb->do_not_encrypt;
737 tmp->dev = skb->dev;
738 tmp->iif = skb->iif;
739
740 /* copy header and data */
741 memcpy(skb_put(tmp, hdrlen), skb->data, hdrlen);
742 memcpy(skb_put(tmp, fraglen), skb->data + pos, fraglen);
743
744 pos += fraglen;
745 }
746
747 skb->len = hdrlen + per_fragm;
748 return 0;
749}
750
661static ieee80211_tx_result debug_noinline 751static ieee80211_tx_result debug_noinline
662ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) 752ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
663{ 753{
664 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); 754 struct sk_buff *skb = tx->skb;
665 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; 755 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
666 size_t hdrlen, per_fragm, num_fragm, payload_len, left; 756 struct ieee80211_hdr *hdr = (void *)skb->data;
667 struct sk_buff **frags, *first, *frag;
668 int i;
669 u16 seq;
670 u8 *pos;
671 int frag_threshold = tx->local->fragmentation_threshold; 757 int frag_threshold = tx->local->fragmentation_threshold;
758 int hdrlen;
759 int fragnum;
672 760
673 if (!(tx->flags & IEEE80211_TX_FRAGMENTED)) 761 if (!(tx->flags & IEEE80211_TX_FRAGMENTED))
674 return TX_CONTINUE; 762 return TX_CONTINUE;
@@ -681,58 +769,35 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
681 if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) 769 if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU))
682 return TX_DROP; 770 return TX_DROP;
683 771
684 first = tx->skb;
685
686 hdrlen = ieee80211_hdrlen(hdr->frame_control); 772 hdrlen = ieee80211_hdrlen(hdr->frame_control);
687 payload_len = first->len - hdrlen;
688 per_fragm = frag_threshold - hdrlen - FCS_LEN;
689 num_fragm = DIV_ROUND_UP(payload_len, per_fragm);
690
691 frags = kzalloc(num_fragm * sizeof(struct sk_buff *), GFP_ATOMIC);
692 if (!frags)
693 goto fail;
694
695 hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREFRAGS);
696 seq = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ;
697 pos = first->data + hdrlen + per_fragm;
698 left = payload_len - per_fragm;
699 for (i = 0; i < num_fragm - 1; i++) {
700 struct ieee80211_hdr *fhdr;
701 size_t copylen;
702
703 if (left <= 0)
704 goto fail;
705 773
706 /* reserve enough extra head and tail room for possible 774 /* internal error, why is TX_FRAGMENTED set? */
707 * encryption */ 775 if (WARN_ON(skb->len <= frag_threshold))
708 frag = frags[i] = 776 return TX_DROP;
709 dev_alloc_skb(tx->local->tx_headroom +
710 frag_threshold +
711 IEEE80211_ENCRYPT_HEADROOM +
712 IEEE80211_ENCRYPT_TAILROOM);
713 if (!frag)
714 goto fail;
715
716 /* Make sure that all fragments use the same priority so
717 * that they end up using the same TX queue */
718 frag->priority = first->priority;
719 777
720 skb_reserve(frag, tx->local->tx_headroom + 778 /*
721 IEEE80211_ENCRYPT_HEADROOM); 779 * Now fragment the frame. This will allocate all the fragments and
780 * chain them (using skb as the first fragment) to skb->next.
781 * During transmission, we will remove the successfully transmitted
782 * fragments from this list. When the low-level driver rejects one
783 * of the fragments then we will simply pretend to accept the skb
784 * but store it away as pending.
785 */
786 if (ieee80211_fragment(tx->local, skb, hdrlen, frag_threshold))
787 return TX_DROP;
722 788
723 /* copy TX information */ 789 /* update duration/seq/flags of fragments */
724 info = IEEE80211_SKB_CB(frag); 790 fragnum = 0;
725 memcpy(info, first->cb, sizeof(frag->cb)); 791 do {
792 int next_len;
793 const __le16 morefrags = cpu_to_le16(IEEE80211_FCTL_MOREFRAGS);
726 794
727 /* copy/fill in 802.11 header */ 795 hdr = (void *)skb->data;
728 fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen); 796 info = IEEE80211_SKB_CB(skb);
729 memcpy(fhdr, first->data, hdrlen);
730 fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG));
731 797
732 if (i == num_fragm - 2) { 798 if (skb->next) {
733 /* clear MOREFRAGS bit for the last fragment */ 799 hdr->frame_control |= morefrags;
734 fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS); 800 next_len = skb->next->len;
735 } else {
736 /* 801 /*
737 * No multi-rate retries for fragmented frames, that 802 * No multi-rate retries for fragmented frames, that
738 * would completely throw off the NAV at other STAs. 803 * would completely throw off the NAV at other STAs.
@@ -743,35 +808,16 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx)
743 info->control.rates[4].idx = -1; 808 info->control.rates[4].idx = -1;
744 BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5); 809 BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5);
745 info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; 810 info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE;
811 } else {
812 hdr->frame_control &= ~morefrags;
813 next_len = 0;
746 } 814 }
747 815 hdr->duration_id = ieee80211_duration(tx, 0, next_len);
748 /* copy data */ 816 hdr->seq_ctrl |= cpu_to_le16(fragnum & IEEE80211_SCTL_FRAG);
749 copylen = left > per_fragm ? per_fragm : left; 817 fragnum++;
750 memcpy(skb_put(frag, copylen), pos, copylen); 818 } while ((skb = skb->next));
751
752 skb_copy_queue_mapping(frag, first);
753
754 frag->do_not_encrypt = first->do_not_encrypt;
755
756 pos += copylen;
757 left -= copylen;
758 }
759 skb_trim(first, hdrlen + per_fragm);
760
761 tx->num_extra_frag = num_fragm - 1;
762 tx->extra_frag = frags;
763 819
764 return TX_CONTINUE; 820 return TX_CONTINUE;
765
766 fail:
767 if (frags) {
768 for (i = 0; i < num_fragm - 1; i++)
769 if (frags[i])
770 dev_kfree_skb(frags[i]);
771 kfree(frags);
772 }
773 I802_DEBUG_INC(tx->local->tx_handlers_drop_fragment);
774 return TX_DROP;
775} 821}
776 822
777static ieee80211_tx_result debug_noinline 823static ieee80211_tx_result debug_noinline
@@ -787,6 +833,8 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
787 return ieee80211_crypto_tkip_encrypt(tx); 833 return ieee80211_crypto_tkip_encrypt(tx);
788 case ALG_CCMP: 834 case ALG_CCMP:
789 return ieee80211_crypto_ccmp_encrypt(tx); 835 return ieee80211_crypto_ccmp_encrypt(tx);
836 case ALG_AES_CMAC:
837 return ieee80211_crypto_aes_cmac_encrypt(tx);
790 } 838 }
791 839
792 /* not reached */ 840 /* not reached */
@@ -797,27 +845,19 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
797static ieee80211_tx_result debug_noinline 845static ieee80211_tx_result debug_noinline
798ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx) 846ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx)
799{ 847{
800 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; 848 struct sk_buff *skb = tx->skb;
801 int next_len, i; 849 struct ieee80211_hdr *hdr;
802 int group_addr = is_multicast_ether_addr(hdr->addr1); 850 int next_len;
803 851 bool group_addr;
804 if (!(tx->flags & IEEE80211_TX_FRAGMENTED)) {
805 hdr->duration_id = ieee80211_duration(tx, group_addr, 0);
806 return TX_CONTINUE;
807 }
808
809 hdr->duration_id = ieee80211_duration(tx, group_addr,
810 tx->extra_frag[0]->len);
811 852
812 for (i = 0; i < tx->num_extra_frag; i++) { 853 do {
813 if (i + 1 < tx->num_extra_frag) 854 hdr = (void *) skb->data;
814 next_len = tx->extra_frag[i + 1]->len; 855 next_len = skb->next ? skb->next->len : 0;
815 else 856 group_addr = is_multicast_ether_addr(hdr->addr1);
816 next_len = 0;
817 857
818 hdr = (struct ieee80211_hdr *)tx->extra_frag[i]->data; 858 hdr->duration_id =
819 hdr->duration_id = ieee80211_duration(tx, 0, next_len); 859 ieee80211_duration(tx, group_addr, next_len);
820 } 860 } while ((skb = skb->next));
821 861
822 return TX_CONTINUE; 862 return TX_CONTINUE;
823} 863}
@@ -825,24 +865,20 @@ ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx)
825static ieee80211_tx_result debug_noinline 865static ieee80211_tx_result debug_noinline
826ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) 866ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
827{ 867{
828 int i; 868 struct sk_buff *skb = tx->skb;
829 869
830 if (!tx->sta) 870 if (!tx->sta)
831 return TX_CONTINUE; 871 return TX_CONTINUE;
832 872
833 tx->sta->tx_packets++; 873 tx->sta->tx_packets++;
834 tx->sta->tx_fragments++; 874 do {
835 tx->sta->tx_bytes += tx->skb->len; 875 tx->sta->tx_fragments++;
836 if (tx->extra_frag) { 876 tx->sta->tx_bytes += skb->len;
837 tx->sta->tx_fragments += tx->num_extra_frag; 877 } while ((skb = skb->next));
838 for (i = 0; i < tx->num_extra_frag; i++)
839 tx->sta->tx_bytes += tx->extra_frag[i]->len;
840 }
841 878
842 return TX_CONTINUE; 879 return TX_CONTINUE;
843} 880}
844 881
845
846/* actual transmit path */ 882/* actual transmit path */
847 883
848/* 884/*
@@ -948,9 +984,9 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
948 struct ieee80211_hdr *hdr; 984 struct ieee80211_hdr *hdr;
949 struct ieee80211_sub_if_data *sdata; 985 struct ieee80211_sub_if_data *sdata;
950 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 986 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
951
952 int hdrlen, tid; 987 int hdrlen, tid;
953 u8 *qc, *state; 988 u8 *qc, *state;
989 bool queued = false;
954 990
955 memset(tx, 0, sizeof(*tx)); 991 memset(tx, 0, sizeof(*tx));
956 tx->skb = skb; 992 tx->skb = skb;
@@ -977,17 +1013,53 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx,
977 */ 1013 */
978 } 1014 }
979 1015
1016 /*
1017 * If this flag is set to true anywhere, and we get here,
1018 * we are doing the needed processing, so remove the flag
1019 * now.
1020 */
1021 info->flags &= ~IEEE80211_TX_INTFL_NEED_TXPROCESSING;
1022
980 hdr = (struct ieee80211_hdr *) skb->data; 1023 hdr = (struct ieee80211_hdr *) skb->data;
981 1024
982 tx->sta = sta_info_get(local, hdr->addr1); 1025 tx->sta = sta_info_get(local, hdr->addr1);
983 1026
984 if (tx->sta && ieee80211_is_data_qos(hdr->frame_control)) { 1027 if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) &&
1028 (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) {
1029 unsigned long flags;
1030 struct tid_ampdu_tx *tid_tx;
1031
985 qc = ieee80211_get_qos_ctl(hdr); 1032 qc = ieee80211_get_qos_ctl(hdr);
986 tid = *qc & IEEE80211_QOS_CTL_TID_MASK; 1033 tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
987 1034
1035 spin_lock_irqsave(&tx->sta->lock, flags);
1036 /*
1037 * XXX: This spinlock could be fairly expensive, but see the
1038 * comment in agg-tx.c:ieee80211_agg_tx_operational().
1039 * One way to solve this would be to do something RCU-like
1040 * for managing the tid_tx struct and using atomic bitops
1041 * for the actual state -- by introducing an actual
1042 * 'operational' bit that would be possible. It would
1043 * require changing ieee80211_agg_tx_operational() to
1044 * set that bit, and changing the way tid_tx is managed
1045 * everywhere, including races between that bit and
1046 * tid_tx going away (tid_tx being added can be easily
1047 * committed to memory before the 'operational' bit).
1048 */
1049 tid_tx = tx->sta->ampdu_mlme.tid_tx[tid];
988 state = &tx->sta->ampdu_mlme.tid_state_tx[tid]; 1050 state = &tx->sta->ampdu_mlme.tid_state_tx[tid];
989 if (*state == HT_AGG_STATE_OPERATIONAL) 1051 if (*state == HT_AGG_STATE_OPERATIONAL) {
990 info->flags |= IEEE80211_TX_CTL_AMPDU; 1052 info->flags |= IEEE80211_TX_CTL_AMPDU;
1053 } else if (*state != HT_AGG_STATE_IDLE) {
1054 /* in progress */
1055 queued = true;
1056 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
1057 __skb_queue_tail(&tid_tx->pending, skb);
1058 }
1059 spin_unlock_irqrestore(&tx->sta->lock, flags);
1060
1061 if (unlikely(queued))
1062 return TX_QUEUED;
991 } 1063 }
992 1064
993 if (is_multicast_ether_addr(hdr->addr1)) { 1065 if (is_multicast_ether_addr(hdr->addr1)) {
@@ -1038,51 +1110,55 @@ static int ieee80211_tx_prepare(struct ieee80211_local *local,
1038 } 1110 }
1039 if (unlikely(!dev)) 1111 if (unlikely(!dev))
1040 return -ENODEV; 1112 return -ENODEV;
1041 /* initialises tx with control */ 1113 /*
1114 * initialises tx with control
1115 *
1116 * return value is safe to ignore here because this function
1117 * can only be invoked for multicast frames
1118 *
1119 * XXX: clean up
1120 */
1042 __ieee80211_tx_prepare(tx, skb, dev); 1121 __ieee80211_tx_prepare(tx, skb, dev);
1043 dev_put(dev); 1122 dev_put(dev);
1044 return 0; 1123 return 0;
1045} 1124}
1046 1125
1047static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, 1126static int __ieee80211_tx(struct ieee80211_local *local,
1048 struct ieee80211_tx_data *tx) 1127 struct sk_buff **skbp,
1128 struct sta_info *sta)
1049{ 1129{
1130 struct sk_buff *skb = *skbp, *next;
1050 struct ieee80211_tx_info *info; 1131 struct ieee80211_tx_info *info;
1051 int ret, i; 1132 int ret, len;
1133 bool fragm = false;
1052 1134
1053 if (skb) { 1135 local->mdev->trans_start = jiffies;
1054 if (netif_subqueue_stopped(local->mdev, skb))
1055 return IEEE80211_TX_AGAIN;
1056 info = IEEE80211_SKB_CB(skb);
1057 1136
1058 ret = local->ops->tx(local_to_hw(local), skb); 1137 while (skb) {
1059 if (ret) 1138 if (ieee80211_queue_stopped(&local->hw,
1060 return IEEE80211_TX_AGAIN; 1139 skb_get_queue_mapping(skb)))
1061 local->mdev->trans_start = jiffies; 1140 return IEEE80211_TX_PENDING;
1062 ieee80211_led_tx(local, 1); 1141
1063 } 1142 info = IEEE80211_SKB_CB(skb);
1064 if (tx->extra_frag) { 1143
1065 for (i = 0; i < tx->num_extra_frag; i++) { 1144 if (fragm)
1066 if (!tx->extra_frag[i])
1067 continue;
1068 info = IEEE80211_SKB_CB(tx->extra_frag[i]);
1069 info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT | 1145 info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT |
1070 IEEE80211_TX_CTL_FIRST_FRAGMENT); 1146 IEEE80211_TX_CTL_FIRST_FRAGMENT);
1071 if (netif_subqueue_stopped(local->mdev, 1147
1072 tx->extra_frag[i])) 1148 next = skb->next;
1073 return IEEE80211_TX_FRAG_AGAIN; 1149 len = skb->len;
1074 1150 ret = local->ops->tx(local_to_hw(local), skb);
1075 ret = local->ops->tx(local_to_hw(local), 1151 if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) {
1076 tx->extra_frag[i]); 1152 dev_kfree_skb(skb);
1077 if (ret) 1153 ret = NETDEV_TX_OK;
1078 return IEEE80211_TX_FRAG_AGAIN;
1079 local->mdev->trans_start = jiffies;
1080 ieee80211_led_tx(local, 1);
1081 tx->extra_frag[i] = NULL;
1082 } 1154 }
1083 kfree(tx->extra_frag); 1155 if (ret != NETDEV_TX_OK)
1084 tx->extra_frag = NULL; 1156 return IEEE80211_TX_AGAIN;
1157 *skbp = skb = next;
1158 ieee80211_led_tx(local, 1);
1159 fragm = true;
1085 } 1160 }
1161
1086 return IEEE80211_TX_OK; 1162 return IEEE80211_TX_OK;
1087} 1163}
1088 1164
@@ -1094,7 +1170,6 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1094{ 1170{
1095 struct sk_buff *skb = tx->skb; 1171 struct sk_buff *skb = tx->skb;
1096 ieee80211_tx_result res = TX_DROP; 1172 ieee80211_tx_result res = TX_DROP;
1097 int i;
1098 1173
1099#define CALL_TXH(txh) \ 1174#define CALL_TXH(txh) \
1100 res = txh(tx); \ 1175 res = txh(tx); \
@@ -1118,11 +1193,13 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1118 txh_done: 1193 txh_done:
1119 if (unlikely(res == TX_DROP)) { 1194 if (unlikely(res == TX_DROP)) {
1120 I802_DEBUG_INC(tx->local->tx_handlers_drop); 1195 I802_DEBUG_INC(tx->local->tx_handlers_drop);
1121 dev_kfree_skb(skb); 1196 while (skb) {
1122 for (i = 0; i < tx->num_extra_frag; i++) 1197 struct sk_buff *next;
1123 if (tx->extra_frag[i]) 1198
1124 dev_kfree_skb(tx->extra_frag[i]); 1199 next = skb->next;
1125 kfree(tx->extra_frag); 1200 dev_kfree_skb(skb);
1201 skb = next;
1202 }
1126 return -1; 1203 return -1;
1127 } else if (unlikely(res == TX_QUEUED)) { 1204 } else if (unlikely(res == TX_QUEUED)) {
1128 I802_DEBUG_INC(tx->local->tx_handlers_queued); 1205 I802_DEBUG_INC(tx->local->tx_handlers_queued);
@@ -1132,23 +1209,26 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1132 return 0; 1209 return 0;
1133} 1210}
1134 1211
1135static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) 1212static void ieee80211_tx(struct net_device *dev, struct sk_buff *skb,
1213 bool txpending)
1136{ 1214{
1137 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 1215 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1138 struct sta_info *sta; 1216 struct sta_info *sta;
1139 struct ieee80211_tx_data tx; 1217 struct ieee80211_tx_data tx;
1140 ieee80211_tx_result res_prepare; 1218 ieee80211_tx_result res_prepare;
1141 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); 1219 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
1142 int ret, i; 1220 struct sk_buff *next;
1221 unsigned long flags;
1222 int ret, retries;
1143 u16 queue; 1223 u16 queue;
1144 1224
1145 queue = skb_get_queue_mapping(skb); 1225 queue = skb_get_queue_mapping(skb);
1146 1226
1147 WARN_ON(test_bit(queue, local->queues_pending)); 1227 WARN_ON(!txpending && !skb_queue_empty(&local->pending[queue]));
1148 1228
1149 if (unlikely(skb->len < 10)) { 1229 if (unlikely(skb->len < 10)) {
1150 dev_kfree_skb(skb); 1230 dev_kfree_skb(skb);
1151 return 0; 1231 return;
1152 } 1232 }
1153 1233
1154 rcu_read_lock(); 1234 rcu_read_lock();
@@ -1156,10 +1236,13 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb)
1156 /* initialises tx */ 1236 /* initialises tx */
1157 res_prepare = __ieee80211_tx_prepare(&tx, skb, dev); 1237 res_prepare = __ieee80211_tx_prepare(&tx, skb, dev);
1158 1238
1159 if (res_prepare == TX_DROP) { 1239 if (unlikely(res_prepare == TX_DROP)) {
1160 dev_kfree_skb(skb); 1240 dev_kfree_skb(skb);
1161 rcu_read_unlock(); 1241 rcu_read_unlock();
1162 return 0; 1242 return;
1243 } else if (unlikely(res_prepare == TX_QUEUED)) {
1244 rcu_read_unlock();
1245 return;
1163 } 1246 }
1164 1247
1165 sta = tx.sta; 1248 sta = tx.sta;
@@ -1169,11 +1252,13 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb)
1169 if (invoke_tx_handlers(&tx)) 1252 if (invoke_tx_handlers(&tx))
1170 goto out; 1253 goto out;
1171 1254
1172retry: 1255 retries = 0;
1173 ret = __ieee80211_tx(local, skb, &tx); 1256 retry:
1174 if (ret) { 1257 ret = __ieee80211_tx(local, &tx.skb, tx.sta);
1175 struct ieee80211_tx_stored_packet *store; 1258 switch (ret) {
1176 1259 case IEEE80211_TX_OK:
1260 break;
1261 case IEEE80211_TX_AGAIN:
1177 /* 1262 /*
1178 * Since there are no fragmented frames on A-MPDU 1263 * Since there are no fragmented frames on A-MPDU
1179 * queues, there's no reason for a driver to reject 1264 * queues, there's no reason for a driver to reject
@@ -1181,46 +1266,57 @@ retry:
1181 */ 1266 */
1182 if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) 1267 if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU))
1183 goto drop; 1268 goto drop;
1269 /* fall through */
1270 case IEEE80211_TX_PENDING:
1271 skb = tx.skb;
1272
1273 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
1274
1275 if (__netif_subqueue_stopped(local->mdev, queue)) {
1276 do {
1277 next = skb->next;
1278 skb->next = NULL;
1279 if (unlikely(txpending))
1280 skb_queue_head(&local->pending[queue],
1281 skb);
1282 else
1283 skb_queue_tail(&local->pending[queue],
1284 skb);
1285 } while ((skb = next));
1184 1286
1185 store = &local->pending_packet[queue]; 1287 /*
1288 * Make sure nobody will enable the queue on us
1289 * (without going through the tasklet) nor disable the
1290 * netdev queue underneath the pending handling code.
1291 */
1292 __set_bit(IEEE80211_QUEUE_STOP_REASON_PENDING,
1293 &local->queue_stop_reasons[queue]);
1186 1294
1187 if (ret == IEEE80211_TX_FRAG_AGAIN) 1295 spin_unlock_irqrestore(&local->queue_stop_reason_lock,
1188 skb = NULL; 1296 flags);
1297 } else {
1298 spin_unlock_irqrestore(&local->queue_stop_reason_lock,
1299 flags);
1189 1300
1190 set_bit(queue, local->queues_pending); 1301 retries++;
1191 smp_mb(); 1302 if (WARN(retries > 10, "tx refused but queue active"))
1192 /* 1303 goto drop;
1193 * When the driver gets out of buffers during sending of
1194 * fragments and calls ieee80211_stop_queue, the netif
1195 * subqueue is stopped. There is, however, a small window
1196 * in which the PENDING bit is not yet set. If a buffer
1197 * gets available in that window (i.e. driver calls
1198 * ieee80211_wake_queue), we would end up with ieee80211_tx
1199 * called with the PENDING bit still set. Prevent this by
1200 * continuing transmitting here when that situation is
1201 * possible to have happened.
1202 */
1203 if (!__netif_subqueue_stopped(local->mdev, queue)) {
1204 clear_bit(queue, local->queues_pending);
1205 goto retry; 1304 goto retry;
1206 } 1305 }
1207 store->skb = skb;
1208 store->extra_frag = tx.extra_frag;
1209 store->num_extra_frag = tx.num_extra_frag;
1210 } 1306 }
1211 out: 1307 out:
1212 rcu_read_unlock(); 1308 rcu_read_unlock();
1213 return 0; 1309 return;
1214 1310
1215 drop: 1311 drop:
1216 if (skb)
1217 dev_kfree_skb(skb);
1218 for (i = 0; i < tx.num_extra_frag; i++)
1219 if (tx.extra_frag[i])
1220 dev_kfree_skb(tx.extra_frag[i]);
1221 kfree(tx.extra_frag);
1222 rcu_read_unlock(); 1312 rcu_read_unlock();
1223 return 0; 1313
1314 skb = tx.skb;
1315 while (skb) {
1316 next = skb->next;
1317 dev_kfree_skb(skb);
1318 skb = next;
1319 }
1224} 1320}
1225 1321
1226/* device xmit handlers */ 1322/* device xmit handlers */
@@ -1279,7 +1375,6 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
1279 FOUND_SDATA, 1375 FOUND_SDATA,
1280 UNKNOWN_ADDRESS, 1376 UNKNOWN_ADDRESS,
1281 } monitor_iface = NOT_MONITOR; 1377 } monitor_iface = NOT_MONITOR;
1282 int ret;
1283 1378
1284 if (skb->iif) 1379 if (skb->iif)
1285 odev = dev_get_by_index(&init_net, skb->iif); 1380 odev = dev_get_by_index(&init_net, skb->iif);
@@ -1293,7 +1388,20 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
1293 "originating device\n", dev->name); 1388 "originating device\n", dev->name);
1294#endif 1389#endif
1295 dev_kfree_skb(skb); 1390 dev_kfree_skb(skb);
1296 return 0; 1391 return NETDEV_TX_OK;
1392 }
1393
1394 if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) &&
1395 local->hw.conf.dynamic_ps_timeout > 0) {
1396 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
1397 ieee80211_stop_queues_by_reason(&local->hw,
1398 IEEE80211_QUEUE_STOP_REASON_PS);
1399 queue_work(local->hw.workqueue,
1400 &local->dynamic_ps_disable_work);
1401 }
1402
1403 mod_timer(&local->dynamic_ps_timer, jiffies +
1404 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
1297 } 1405 }
1298 1406
1299 memset(info, 0, sizeof(*info)); 1407 memset(info, 0, sizeof(*info));
@@ -1309,7 +1417,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
1309 else 1417 else
1310 if (mesh_nexthop_lookup(skb, osdata)) { 1418 if (mesh_nexthop_lookup(skb, osdata)) {
1311 dev_put(odev); 1419 dev_put(odev);
1312 return 0; 1420 return NETDEV_TX_OK;
1313 } 1421 }
1314 if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0) 1422 if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0)
1315 IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh, 1423 IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh,
@@ -1371,7 +1479,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
1371 if (ieee80211_skb_resize(osdata->local, skb, headroom, may_encrypt)) { 1479 if (ieee80211_skb_resize(osdata->local, skb, headroom, may_encrypt)) {
1372 dev_kfree_skb(skb); 1480 dev_kfree_skb(skb);
1373 dev_put(odev); 1481 dev_put(odev);
1374 return 0; 1482 return NETDEV_TX_OK;
1375 } 1483 }
1376 1484
1377 if (osdata->vif.type == NL80211_IFTYPE_AP_VLAN) 1485 if (osdata->vif.type == NL80211_IFTYPE_AP_VLAN)
@@ -1380,20 +1488,42 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev)
1380 u.ap); 1488 u.ap);
1381 if (likely(monitor_iface != UNKNOWN_ADDRESS)) 1489 if (likely(monitor_iface != UNKNOWN_ADDRESS))
1382 info->control.vif = &osdata->vif; 1490 info->control.vif = &osdata->vif;
1383 ret = ieee80211_tx(odev, skb); 1491
1492 ieee80211_tx(odev, skb, false);
1384 dev_put(odev); 1493 dev_put(odev);
1385 1494
1386 return ret; 1495 return NETDEV_TX_OK;
1387} 1496}
1388 1497
1389int ieee80211_monitor_start_xmit(struct sk_buff *skb, 1498int ieee80211_monitor_start_xmit(struct sk_buff *skb,
1390 struct net_device *dev) 1499 struct net_device *dev)
1391{ 1500{
1392 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); 1501 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
1502 struct ieee80211_channel *chan = local->hw.conf.channel;
1393 struct ieee80211_radiotap_header *prthdr = 1503 struct ieee80211_radiotap_header *prthdr =
1394 (struct ieee80211_radiotap_header *)skb->data; 1504 (struct ieee80211_radiotap_header *)skb->data;
1395 u16 len_rthdr; 1505 u16 len_rthdr;
1396 1506
1507 /*
1508 * Frame injection is not allowed if beaconing is not allowed
1509 * or if we need radar detection. Beaconing is usually not allowed when
1510 * the mode or operation (Adhoc, AP, Mesh) does not support DFS.
1511 * Passive scan is also used in world regulatory domains where
1512 * your country is not known and as such it should be treated as
1513 * NO TX unless the channel is explicitly allowed in which case
1514 * your current regulatory domain would not have the passive scan
1515 * flag.
1516 *
1517 * Since AP mode uses monitor interfaces to inject/TX management
1518 * frames we can make AP mode the exception to this rule once it
1519 * supports radar detection as its implementation can deal with
1520 * radar detection by itself. We can do that later by adding a
1521 * monitor flag interfaces used for AP support.
1522 */
1523 if ((chan->flags & (IEEE80211_CHAN_NO_IBSS | IEEE80211_CHAN_RADAR |
1524 IEEE80211_CHAN_PASSIVE_SCAN)))
1525 goto fail;
1526
1397 /* check for not even having the fixed radiotap header part */ 1527 /* check for not even having the fixed radiotap header part */
1398 if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) 1528 if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header)))
1399 goto fail; /* too short to be possibly valid */ 1529 goto fail; /* too short to be possibly valid */
@@ -1477,19 +1607,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1477 goto fail; 1607 goto fail;
1478 } 1608 }
1479 1609
1480 if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) &&
1481 local->dynamic_ps_timeout > 0) {
1482 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
1483 ieee80211_stop_queues_by_reason(&local->hw,
1484 IEEE80211_QUEUE_STOP_REASON_PS);
1485 queue_work(local->hw.workqueue,
1486 &local->dynamic_ps_disable_work);
1487 }
1488
1489 mod_timer(&local->dynamic_ps_timer, jiffies +
1490 msecs_to_jiffies(local->dynamic_ps_timeout));
1491 }
1492
1493 nh_pos = skb_network_header(skb) - skb->data; 1610 nh_pos = skb_network_header(skb) - skb->data;
1494 h_pos = skb_transport_header(skb) - skb->data; 1611 h_pos = skb_transport_header(skb) - skb->data;
1495 1612
@@ -1570,7 +1687,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1570 case NL80211_IFTYPE_STATION: 1687 case NL80211_IFTYPE_STATION:
1571 fc |= cpu_to_le16(IEEE80211_FCTL_TODS); 1688 fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
1572 /* BSSID SA DA */ 1689 /* BSSID SA DA */
1573 memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); 1690 memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN);
1574 memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); 1691 memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
1575 memcpy(hdr.addr3, skb->data, ETH_ALEN); 1692 memcpy(hdr.addr3, skb->data, ETH_ALEN);
1576 hdrlen = 24; 1693 hdrlen = 24;
@@ -1579,7 +1696,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1579 /* DA SA BSSID */ 1696 /* DA SA BSSID */
1580 memcpy(hdr.addr1, skb->data, ETH_ALEN); 1697 memcpy(hdr.addr1, skb->data, ETH_ALEN);
1581 memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); 1698 memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN);
1582 memcpy(hdr.addr3, sdata->u.sta.bssid, ETH_ALEN); 1699 memcpy(hdr.addr3, sdata->u.ibss.bssid, ETH_ALEN);
1583 hdrlen = 24; 1700 hdrlen = 24;
1584 break; 1701 break;
1585 default: 1702 default:
@@ -1601,8 +1718,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1601 } 1718 }
1602 1719
1603 /* receiver and we are QoS enabled, use a QoS type frame */ 1720 /* receiver and we are QoS enabled, use a QoS type frame */
1604 if (sta_flags & WLAN_STA_WME && 1721 if ((sta_flags & WLAN_STA_WME) && local->hw.queues >= 4) {
1605 ieee80211_num_regular_queues(&local->hw) >= 4) {
1606 fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); 1722 fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA);
1607 hdrlen += 2; 1723 hdrlen += 2;
1608 } 1724 }
@@ -1734,19 +1850,58 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb,
1734 */ 1850 */
1735void ieee80211_clear_tx_pending(struct ieee80211_local *local) 1851void ieee80211_clear_tx_pending(struct ieee80211_local *local)
1736{ 1852{
1737 int i, j; 1853 int i;
1738 struct ieee80211_tx_stored_packet *store;
1739 1854
1740 for (i = 0; i < ieee80211_num_regular_queues(&local->hw); i++) { 1855 for (i = 0; i < local->hw.queues; i++)
1741 if (!test_bit(i, local->queues_pending)) 1856 skb_queue_purge(&local->pending[i]);
1742 continue; 1857}
1743 store = &local->pending_packet[i]; 1858
1744 kfree_skb(store->skb); 1859static bool ieee80211_tx_pending_skb(struct ieee80211_local *local,
1745 for (j = 0; j < store->num_extra_frag; j++) 1860 struct sk_buff *skb)
1746 kfree_skb(store->extra_frag[j]); 1861{
1747 kfree(store->extra_frag); 1862 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
1748 clear_bit(i, local->queues_pending); 1863 struct ieee80211_sub_if_data *sdata;
1864 struct sta_info *sta;
1865 struct ieee80211_hdr *hdr;
1866 struct net_device *dev;
1867 int ret;
1868 bool result = true;
1869
1870 /* does interface still exist? */
1871 dev = dev_get_by_index(&init_net, skb->iif);
1872 if (!dev) {
1873 dev_kfree_skb(skb);
1874 return true;
1875 }
1876
1877 /* validate info->control.vif against skb->iif */
1878 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1879 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
1880 sdata = container_of(sdata->bss,
1881 struct ieee80211_sub_if_data,
1882 u.ap);
1883
1884 if (unlikely(info->control.vif && info->control.vif != &sdata->vif)) {
1885 dev_kfree_skb(skb);
1886 result = true;
1887 goto out;
1749 } 1888 }
1889
1890 if (info->flags & IEEE80211_TX_INTFL_NEED_TXPROCESSING) {
1891 ieee80211_tx(dev, skb, true);
1892 } else {
1893 hdr = (struct ieee80211_hdr *)skb->data;
1894 sta = sta_info_get(local, hdr->addr1);
1895
1896 ret = __ieee80211_tx(local, &skb, sta);
1897 if (ret != IEEE80211_TX_OK)
1898 result = false;
1899 }
1900
1901 out:
1902 dev_put(dev);
1903
1904 return result;
1750} 1905}
1751 1906
1752/* 1907/*
@@ -1757,40 +1912,53 @@ void ieee80211_tx_pending(unsigned long data)
1757{ 1912{
1758 struct ieee80211_local *local = (struct ieee80211_local *)data; 1913 struct ieee80211_local *local = (struct ieee80211_local *)data;
1759 struct net_device *dev = local->mdev; 1914 struct net_device *dev = local->mdev;
1760 struct ieee80211_tx_stored_packet *store; 1915 unsigned long flags;
1761 struct ieee80211_tx_data tx; 1916 int i;
1762 int i, ret; 1917 bool next;
1763 1918
1919 rcu_read_lock();
1764 netif_tx_lock_bh(dev); 1920 netif_tx_lock_bh(dev);
1765 for (i = 0; i < ieee80211_num_regular_queues(&local->hw); i++) {
1766 /* Check that this queue is ok */
1767 if (__netif_subqueue_stopped(local->mdev, i) &&
1768 !test_bit(i, local->queues_pending_run))
1769 continue;
1770 1921
1771 if (!test_bit(i, local->queues_pending)) { 1922 for (i = 0; i < local->hw.queues; i++) {
1772 clear_bit(i, local->queues_pending_run); 1923 /*
1773 ieee80211_wake_queue(&local->hw, i); 1924 * If queue is stopped by something other than due to pending
1925 * frames, or we have no pending frames, proceed to next queue.
1926 */
1927 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
1928 next = false;
1929 if (local->queue_stop_reasons[i] !=
1930 BIT(IEEE80211_QUEUE_STOP_REASON_PENDING) ||
1931 skb_queue_empty(&local->pending[i]))
1932 next = true;
1933 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
1934
1935 if (next)
1774 continue; 1936 continue;
1775 }
1776 1937
1777 clear_bit(i, local->queues_pending_run); 1938 /*
1939 * start the queue now to allow processing our packets,
1940 * we're under the tx lock here anyway so nothing will
1941 * happen as a result of this
1942 */
1778 netif_start_subqueue(local->mdev, i); 1943 netif_start_subqueue(local->mdev, i);
1779 1944
1780 store = &local->pending_packet[i]; 1945 while (!skb_queue_empty(&local->pending[i])) {
1781 tx.extra_frag = store->extra_frag; 1946 struct sk_buff *skb = skb_dequeue(&local->pending[i]);
1782 tx.num_extra_frag = store->num_extra_frag; 1947
1783 tx.flags = 0; 1948 if (!ieee80211_tx_pending_skb(local, skb)) {
1784 ret = __ieee80211_tx(local, store->skb, &tx); 1949 skb_queue_head(&local->pending[i], skb);
1785 if (ret) { 1950 break;
1786 if (ret == IEEE80211_TX_FRAG_AGAIN) 1951 }
1787 store->skb = NULL;
1788 } else {
1789 clear_bit(i, local->queues_pending);
1790 ieee80211_wake_queue(&local->hw, i);
1791 } 1952 }
1953
1954 /* Start regular packet processing again. */
1955 if (skb_queue_empty(&local->pending[i]))
1956 ieee80211_wake_queue_by_reason(&local->hw, i,
1957 IEEE80211_QUEUE_STOP_REASON_PENDING);
1792 } 1958 }
1959
1793 netif_tx_unlock_bh(dev); 1960 netif_tx_unlock_bh(dev);
1961 rcu_read_unlock();
1794} 1962}
1795 1963
1796/* functions for drivers to get certain frames */ 1964/* functions for drivers to get certain frames */
@@ -1865,7 +2033,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
1865 struct ieee80211_tx_info *info; 2033 struct ieee80211_tx_info *info;
1866 struct ieee80211_sub_if_data *sdata = NULL; 2034 struct ieee80211_sub_if_data *sdata = NULL;
1867 struct ieee80211_if_ap *ap = NULL; 2035 struct ieee80211_if_ap *ap = NULL;
1868 struct ieee80211_if_sta *ifsta = NULL;
1869 struct beacon_data *beacon; 2036 struct beacon_data *beacon;
1870 struct ieee80211_supported_band *sband; 2037 struct ieee80211_supported_band *sband;
1871 enum ieee80211_band band = local->hw.conf.channel->band; 2038 enum ieee80211_band band = local->hw.conf.channel->band;
@@ -1917,13 +2084,13 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
1917 } else 2084 } else
1918 goto out; 2085 goto out;
1919 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { 2086 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
2087 struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
1920 struct ieee80211_hdr *hdr; 2088 struct ieee80211_hdr *hdr;
1921 ifsta = &sdata->u.sta;
1922 2089
1923 if (!ifsta->probe_resp) 2090 if (!ifibss->probe_resp)
1924 goto out; 2091 goto out;
1925 2092
1926 skb = skb_copy(ifsta->probe_resp, GFP_ATOMIC); 2093 skb = skb_copy(ifibss->probe_resp, GFP_ATOMIC);
1927 if (!skb) 2094 if (!skb)
1928 goto out; 2095 goto out;
1929 2096
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index fb89e1d0aa03..fdf432f14554 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -41,6 +41,15 @@ const unsigned char rfc1042_header[] __aligned(2) =
41const unsigned char bridge_tunnel_header[] __aligned(2) = 41const unsigned char bridge_tunnel_header[] __aligned(2) =
42 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; 42 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
43 43
44struct ieee80211_hw *wiphy_to_ieee80211_hw(struct wiphy *wiphy)
45{
46 struct ieee80211_local *local;
47 BUG_ON(!wiphy);
48
49 local = wiphy_priv(wiphy);
50 return &local->hw;
51}
52EXPORT_SYMBOL(wiphy_to_ieee80211_hw);
44 53
45u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, 54u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
46 enum nl80211_iftype type) 55 enum nl80211_iftype type)
@@ -157,18 +166,13 @@ int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
157 166
158void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx) 167void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx)
159{ 168{
160 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; 169 struct sk_buff *skb = tx->skb;
170 struct ieee80211_hdr *hdr;
161 171
162 hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); 172 do {
163 if (tx->extra_frag) { 173 hdr = (struct ieee80211_hdr *) skb->data;
164 struct ieee80211_hdr *fhdr; 174 hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
165 int i; 175 } while ((skb = skb->next));
166 for (i = 0; i < tx->num_extra_frag; i++) {
167 fhdr = (struct ieee80211_hdr *)
168 tx->extra_frag[i]->data;
169 fhdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
170 }
171 }
172} 176}
173 177
174int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, 178int ieee80211_frame_duration(struct ieee80211_local *local, size_t len,
@@ -335,21 +339,21 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
335{ 339{
336 struct ieee80211_local *local = hw_to_local(hw); 340 struct ieee80211_local *local = hw_to_local(hw);
337 341
338 /* we don't need to track ampdu queues */ 342 if (WARN_ON(queue >= hw->queues))
339 if (queue < ieee80211_num_regular_queues(hw)) { 343 return;
340 __clear_bit(reason, &local->queue_stop_reasons[queue]);
341 344
342 if (local->queue_stop_reasons[queue] != 0) 345 __clear_bit(reason, &local->queue_stop_reasons[queue]);
343 /* someone still has this queue stopped */
344 return;
345 }
346 346
347 if (test_bit(queue, local->queues_pending)) { 347 if (!skb_queue_empty(&local->pending[queue]) &&
348 set_bit(queue, local->queues_pending_run); 348 local->queue_stop_reasons[queue] ==
349 BIT(IEEE80211_QUEUE_STOP_REASON_PENDING))
349 tasklet_schedule(&local->tx_pending_tasklet); 350 tasklet_schedule(&local->tx_pending_tasklet);
350 } else { 351
351 netif_wake_subqueue(local->mdev, queue); 352 if (local->queue_stop_reasons[queue] != 0)
352 } 353 /* someone still has this queue stopped */
354 return;
355
356 netif_wake_subqueue(local->mdev, queue);
353} 357}
354 358
355void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, 359void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue,
@@ -375,11 +379,18 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
375{ 379{
376 struct ieee80211_local *local = hw_to_local(hw); 380 struct ieee80211_local *local = hw_to_local(hw);
377 381
378 /* we don't need to track ampdu queues */ 382 if (WARN_ON(queue >= hw->queues))
379 if (queue < ieee80211_num_regular_queues(hw)) 383 return;
380 __set_bit(reason, &local->queue_stop_reasons[queue]); 384
385 /*
386 * Only stop if it was previously running, this is necessary
387 * for correct pending packets handling because there we may
388 * start (but not wake) the queue and rely on that.
389 */
390 if (!local->queue_stop_reasons[queue])
391 netif_stop_subqueue(local->mdev, queue);
381 392
382 netif_stop_subqueue(local->mdev, queue); 393 __set_bit(reason, &local->queue_stop_reasons[queue]);
383} 394}
384 395
385void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, 396void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
@@ -409,7 +420,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw,
409 420
410 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 421 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
411 422
412 for (i = 0; i < ieee80211_num_queues(hw); i++) 423 for (i = 0; i < hw->queues; i++)
413 __ieee80211_stop_queue(hw, i, reason); 424 __ieee80211_stop_queue(hw, i, reason);
414 425
415 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 426 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -425,6 +436,10 @@ EXPORT_SYMBOL(ieee80211_stop_queues);
425int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue) 436int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue)
426{ 437{
427 struct ieee80211_local *local = hw_to_local(hw); 438 struct ieee80211_local *local = hw_to_local(hw);
439
440 if (WARN_ON(queue >= hw->queues))
441 return true;
442
428 return __netif_subqueue_stopped(local->mdev, queue); 443 return __netif_subqueue_stopped(local->mdev, queue);
429} 444}
430EXPORT_SYMBOL(ieee80211_queue_stopped); 445EXPORT_SYMBOL(ieee80211_queue_stopped);
@@ -438,7 +453,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
438 453
439 spin_lock_irqsave(&local->queue_stop_reason_lock, flags); 454 spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
440 455
441 for (i = 0; i < hw->queues + hw->ampdu_queues; i++) 456 for (i = 0; i < hw->queues; i++)
442 __ieee80211_wake_queue(hw, i, reason); 457 __ieee80211_wake_queue(hw, i, reason);
443 458
444 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 459 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
@@ -459,7 +474,7 @@ void ieee80211_iterate_active_interfaces(
459 struct ieee80211_local *local = hw_to_local(hw); 474 struct ieee80211_local *local = hw_to_local(hw);
460 struct ieee80211_sub_if_data *sdata; 475 struct ieee80211_sub_if_data *sdata;
461 476
462 rtnl_lock(); 477 mutex_lock(&local->iflist_mtx);
463 478
464 list_for_each_entry(sdata, &local->interfaces, list) { 479 list_for_each_entry(sdata, &local->interfaces, list) {
465 switch (sdata->vif.type) { 480 switch (sdata->vif.type) {
@@ -480,7 +495,7 @@ void ieee80211_iterate_active_interfaces(
480 &sdata->vif); 495 &sdata->vif);
481 } 496 }
482 497
483 rtnl_unlock(); 498 mutex_unlock(&local->iflist_mtx);
484} 499}
485EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces); 500EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
486 501
@@ -653,6 +668,10 @@ void ieee802_11_parse_elems(u8 *start, size_t len,
653 elems->pwr_constr_elem = pos; 668 elems->pwr_constr_elem = pos;
654 elems->pwr_constr_elem_len = elen; 669 elems->pwr_constr_elem_len = elen;
655 break; 670 break;
671 case WLAN_EID_TIMEOUT_INTERVAL:
672 elems->timeout_int = pos;
673 elems->timeout_int_len = elen;
674 break;
656 default: 675 default:
657 break; 676 break;
658 } 677 }
@@ -688,6 +707,27 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
688 local->ops->conf_tx(local_to_hw(local), i, &qparam); 707 local->ops->conf_tx(local_to_hw(local), i, &qparam);
689} 708}
690 709
710void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
711 const size_t supp_rates_len,
712 const u8 *supp_rates)
713{
714 struct ieee80211_local *local = sdata->local;
715 int i, have_higher_than_11mbit = 0;
716
717 /* cf. IEEE 802.11 9.2.12 */
718 for (i = 0; i < supp_rates_len; i++)
719 if ((supp_rates[i] & 0x7f) * 5 > 110)
720 have_higher_than_11mbit = 1;
721
722 if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ &&
723 have_higher_than_11mbit)
724 sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
725 else
726 sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE;
727
728 ieee80211_set_wmm_default(sdata);
729}
730
691void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, 731void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb,
692 int encrypt) 732 int encrypt)
693{ 733{
@@ -727,12 +767,12 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz)
727 return ret; 767 return ret;
728} 768}
729 769
730u64 ieee80211_mandatory_rates(struct ieee80211_local *local, 770u32 ieee80211_mandatory_rates(struct ieee80211_local *local,
731 enum ieee80211_band band) 771 enum ieee80211_band band)
732{ 772{
733 struct ieee80211_supported_band *sband; 773 struct ieee80211_supported_band *sband;
734 struct ieee80211_rate *bitrates; 774 struct ieee80211_rate *bitrates;
735 u64 mandatory_rates; 775 u32 mandatory_rates;
736 enum ieee80211_rate_flags mandatory_flag; 776 enum ieee80211_rate_flags mandatory_flag;
737 int i; 777 int i;
738 778
@@ -754,3 +794,140 @@ u64 ieee80211_mandatory_rates(struct ieee80211_local *local,
754 mandatory_rates |= BIT(i); 794 mandatory_rates |= BIT(i);
755 return mandatory_rates; 795 return mandatory_rates;
756} 796}
797
798void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
799 u16 transaction, u16 auth_alg,
800 u8 *extra, size_t extra_len,
801 const u8 *bssid, int encrypt)
802{
803 struct ieee80211_local *local = sdata->local;
804 struct sk_buff *skb;
805 struct ieee80211_mgmt *mgmt;
806
807 skb = dev_alloc_skb(local->hw.extra_tx_headroom +
808 sizeof(*mgmt) + 6 + extra_len);
809 if (!skb) {
810 printk(KERN_DEBUG "%s: failed to allocate buffer for auth "
811 "frame\n", sdata->dev->name);
812 return;
813 }
814 skb_reserve(skb, local->hw.extra_tx_headroom);
815
816 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6);
817 memset(mgmt, 0, 24 + 6);
818 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
819 IEEE80211_STYPE_AUTH);
820 if (encrypt)
821 mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
822 memcpy(mgmt->da, bssid, ETH_ALEN);
823 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
824 memcpy(mgmt->bssid, bssid, ETH_ALEN);
825 mgmt->u.auth.auth_alg = cpu_to_le16(auth_alg);
826 mgmt->u.auth.auth_transaction = cpu_to_le16(transaction);
827 mgmt->u.auth.status_code = cpu_to_le16(0);
828 if (extra)
829 memcpy(skb_put(skb, extra_len), extra, extra_len);
830
831 ieee80211_tx_skb(sdata, skb, encrypt);
832}
833
834void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
835 u8 *ssid, size_t ssid_len,
836 u8 *ie, size_t ie_len)
837{
838 struct ieee80211_local *local = sdata->local;
839 struct ieee80211_supported_band *sband;
840 struct sk_buff *skb;
841 struct ieee80211_mgmt *mgmt;
842 u8 *pos, *supp_rates, *esupp_rates = NULL;
843 int i;
844
845 skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 +
846 ie_len);
847 if (!skb) {
848 printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
849 "request\n", sdata->dev->name);
850 return;
851 }
852 skb_reserve(skb, local->hw.extra_tx_headroom);
853
854 mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24);
855 memset(mgmt, 0, 24);
856 mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
857 IEEE80211_STYPE_PROBE_REQ);
858 memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN);
859 if (dst) {
860 memcpy(mgmt->da, dst, ETH_ALEN);
861 memcpy(mgmt->bssid, dst, ETH_ALEN);
862 } else {
863 memset(mgmt->da, 0xff, ETH_ALEN);
864 memset(mgmt->bssid, 0xff, ETH_ALEN);
865 }
866 pos = skb_put(skb, 2 + ssid_len);
867 *pos++ = WLAN_EID_SSID;
868 *pos++ = ssid_len;
869 memcpy(pos, ssid, ssid_len);
870
871 supp_rates = skb_put(skb, 2);
872 supp_rates[0] = WLAN_EID_SUPP_RATES;
873 supp_rates[1] = 0;
874 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
875
876 for (i = 0; i < sband->n_bitrates; i++) {
877 struct ieee80211_rate *rate = &sband->bitrates[i];
878 if (esupp_rates) {
879 pos = skb_put(skb, 1);
880 esupp_rates[1]++;
881 } else if (supp_rates[1] == 8) {
882 esupp_rates = skb_put(skb, 3);
883 esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES;
884 esupp_rates[1] = 1;
885 pos = &esupp_rates[2];
886 } else {
887 pos = skb_put(skb, 1);
888 supp_rates[1]++;
889 }
890 *pos = rate->bitrate / 5;
891 }
892
893 if (ie)
894 memcpy(skb_put(skb, ie_len), ie, ie_len);
895
896 ieee80211_tx_skb(sdata, skb, 0);
897}
898
899u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
900 struct ieee802_11_elems *elems,
901 enum ieee80211_band band)
902{
903 struct ieee80211_supported_band *sband;
904 struct ieee80211_rate *bitrates;
905 size_t num_rates;
906 u32 supp_rates;
907 int i, j;
908 sband = local->hw.wiphy->bands[band];
909
910 if (!sband) {
911 WARN_ON(1);
912 sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
913 }
914
915 bitrates = sband->bitrates;
916 num_rates = sband->n_bitrates;
917 supp_rates = 0;
918 for (i = 0; i < elems->supp_rates_len +
919 elems->ext_supp_rates_len; i++) {
920 u8 rate = 0;
921 int own_rate;
922 if (i < elems->supp_rates_len)
923 rate = elems->supp_rates[i];
924 else if (elems->ext_supp_rates)
925 rate = elems->ext_supp_rates
926 [i - elems->supp_rates_len];
927 own_rate = 5 * (rate & 0x7f);
928 for (j = 0; j < num_rates; j++)
929 if (bitrates[j].bitrate == own_rate)
930 supp_rates |= BIT(j);
931 }
932 return supp_rates;
933}
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 7043ddc75498..ef73105b3061 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -329,24 +329,17 @@ static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
329ieee80211_tx_result 329ieee80211_tx_result
330ieee80211_crypto_wep_encrypt(struct ieee80211_tx_data *tx) 330ieee80211_crypto_wep_encrypt(struct ieee80211_tx_data *tx)
331{ 331{
332 int i; 332 struct sk_buff *skb;
333 333
334 ieee80211_tx_set_protected(tx); 334 ieee80211_tx_set_protected(tx);
335 335
336 if (wep_encrypt_skb(tx, tx->skb) < 0) { 336 skb = tx->skb;
337 I802_DEBUG_INC(tx->local->tx_handlers_drop_wep); 337 do {
338 return TX_DROP; 338 if (wep_encrypt_skb(tx, skb) < 0) {
339 } 339 I802_DEBUG_INC(tx->local->tx_handlers_drop_wep);
340 340 return TX_DROP;
341 if (tx->extra_frag) {
342 for (i = 0; i < tx->num_extra_frag; i++) {
343 if (wep_encrypt_skb(tx, tx->extra_frag[i])) {
344 I802_DEBUG_INC(tx->local->
345 tx_handlers_drop_wep);
346 return TX_DROP;
347 }
348 } 341 }
349 } 342 } while ((skb = skb->next));
350 343
351 return TX_CONTINUE; 344 return TX_CONTINUE;
352} 345}
diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c
index 7162d5816f39..deb4ecec122a 100644
--- a/net/mac80211/wext.c
+++ b/net/mac80211/wext.c
@@ -37,7 +37,14 @@ static int ieee80211_set_encryption(struct ieee80211_sub_if_data *sdata, u8 *sta
37 struct ieee80211_key *key; 37 struct ieee80211_key *key;
38 int err; 38 int err;
39 39
40 if (idx < 0 || idx >= NUM_DEFAULT_KEYS) { 40 if (alg == ALG_AES_CMAC) {
41 if (idx < NUM_DEFAULT_KEYS ||
42 idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) {
43 printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d "
44 "(BIP)\n", sdata->dev->name, idx);
45 return -EINVAL;
46 }
47 } else if (idx < 0 || idx >= NUM_DEFAULT_KEYS) {
41 printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n", 48 printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n",
42 sdata->dev->name, idx); 49 sdata->dev->name, idx);
43 return -EINVAL; 50 return -EINVAL;
@@ -103,6 +110,9 @@ static int ieee80211_set_encryption(struct ieee80211_sub_if_data *sdata, u8 *sta
103 110
104 if (set_tx_key || (!sta && !sdata->default_key && key)) 111 if (set_tx_key || (!sta && !sdata->default_key && key))
105 ieee80211_set_default_key(sdata, idx); 112 ieee80211_set_default_key(sdata, idx);
113 if (alg == ALG_AES_CMAC &&
114 (set_tx_key || (!sta && !sdata->default_mgmt_key && key)))
115 ieee80211_set_default_mgmt_key(sdata, idx);
106 } 116 }
107 117
108 out_unlock: 118 out_unlock:
@@ -119,125 +129,38 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev,
119 129
120 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 130 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
121 131
122 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) 132 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
123 return -EOPNOTSUPP;
124
125 if (sdata->vif.type == NL80211_IFTYPE_STATION ||
126 sdata->vif.type == NL80211_IFTYPE_ADHOC) {
127 int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length); 133 int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length);
128 if (ret) 134 if (ret)
129 return ret; 135 return ret;
130 sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; 136 sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
131 ieee80211_sta_req_auth(sdata, &sdata->u.sta); 137 sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME;
138 ieee80211_sta_req_auth(sdata);
132 return 0; 139 return 0;
133 } 140 }
134 141
135 return -EOPNOTSUPP; 142 return -EOPNOTSUPP;
136} 143}
137 144
138static int ieee80211_ioctl_giwrange(struct net_device *dev,
139 struct iw_request_info *info,
140 struct iw_point *data, char *extra)
141{
142 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
143 struct iw_range *range = (struct iw_range *) extra;
144 enum ieee80211_band band;
145 int c = 0;
146
147 data->length = sizeof(struct iw_range);
148 memset(range, 0, sizeof(struct iw_range));
149
150 range->we_version_compiled = WIRELESS_EXT;
151 range->we_version_source = 21;
152 range->retry_capa = IW_RETRY_LIMIT;
153 range->retry_flags = IW_RETRY_LIMIT;
154 range->min_retry = 0;
155 range->max_retry = 255;
156 range->min_rts = 0;
157 range->max_rts = 2347;
158 range->min_frag = 256;
159 range->max_frag = 2346;
160
161 range->encoding_size[0] = 5;
162 range->encoding_size[1] = 13;
163 range->num_encoding_sizes = 2;
164 range->max_encoding_tokens = NUM_DEFAULT_KEYS;
165
166 if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC ||
167 local->hw.flags & IEEE80211_HW_SIGNAL_DB)
168 range->max_qual.level = local->hw.max_signal;
169 else if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
170 range->max_qual.level = -110;
171 else
172 range->max_qual.level = 0;
173
174 if (local->hw.flags & IEEE80211_HW_NOISE_DBM)
175 range->max_qual.noise = -110;
176 else
177 range->max_qual.noise = 0;
178
179 range->max_qual.qual = 100;
180 range->max_qual.updated = local->wstats_flags;
181
182 range->avg_qual.qual = 50;
183 /* not always true but better than nothing */
184 range->avg_qual.level = range->max_qual.level / 2;
185 range->avg_qual.noise = range->max_qual.noise / 2;
186 range->avg_qual.updated = local->wstats_flags;
187
188 range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 |
189 IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP;
190
191
192 for (band = 0; band < IEEE80211_NUM_BANDS; band ++) {
193 int i;
194 struct ieee80211_supported_band *sband;
195
196 sband = local->hw.wiphy->bands[band];
197
198 if (!sband)
199 continue;
200
201 for (i = 0; i < sband->n_channels && c < IW_MAX_FREQUENCIES; i++) {
202 struct ieee80211_channel *chan = &sband->channels[i];
203
204 if (!(chan->flags & IEEE80211_CHAN_DISABLED)) {
205 range->freq[c].i =
206 ieee80211_frequency_to_channel(
207 chan->center_freq);
208 range->freq[c].m = chan->center_freq;
209 range->freq[c].e = 6;
210 c++;
211 }
212 }
213 }
214 range->num_channels = c;
215 range->num_frequency = c;
216
217 IW_EVENT_CAPA_SET_KERNEL(range->event_capa);
218 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP);
219 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN);
220
221 range->scan_capa |= IW_SCAN_CAPA_ESSID;
222
223 return 0;
224}
225
226
227static int ieee80211_ioctl_siwfreq(struct net_device *dev, 145static int ieee80211_ioctl_siwfreq(struct net_device *dev,
228 struct iw_request_info *info, 146 struct iw_request_info *info,
229 struct iw_freq *freq, char *extra) 147 struct iw_freq *freq, char *extra)
230{ 148{
231 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 149 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
232 150
233 if (sdata->vif.type == NL80211_IFTYPE_STATION) 151 if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
234 sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; 152 sdata->u.ibss.flags &= ~IEEE80211_IBSS_AUTO_CHANNEL_SEL;
153 else if (sdata->vif.type == NL80211_IFTYPE_STATION)
154 sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL;
235 155
236 /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ 156 /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */
237 if (freq->e == 0) { 157 if (freq->e == 0) {
238 if (freq->m < 0) { 158 if (freq->m < 0) {
239 if (sdata->vif.type == NL80211_IFTYPE_STATION) 159 if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
240 sdata->u.sta.flags |= 160 sdata->u.ibss.flags |=
161 IEEE80211_IBSS_AUTO_CHANNEL_SEL;
162 else if (sdata->vif.type == NL80211_IFTYPE_STATION)
163 sdata->u.mgd.flags |=
241 IEEE80211_STA_AUTO_CHANNEL_SEL; 164 IEEE80211_STA_AUTO_CHANNEL_SEL;
242 return 0; 165 return 0;
243 } else 166 } else
@@ -274,32 +197,28 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev,
274{ 197{
275 struct ieee80211_sub_if_data *sdata; 198 struct ieee80211_sub_if_data *sdata;
276 size_t len = data->length; 199 size_t len = data->length;
200 int ret;
277 201
278 /* iwconfig uses nul termination in SSID.. */ 202 /* iwconfig uses nul termination in SSID.. */
279 if (len > 0 && ssid[len - 1] == '\0') 203 if (len > 0 && ssid[len - 1] == '\0')
280 len--; 204 len--;
281 205
282 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 206 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
283 if (sdata->vif.type == NL80211_IFTYPE_STATION || 207 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
284 sdata->vif.type == NL80211_IFTYPE_ADHOC) {
285 int ret;
286 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) {
287 if (len > IEEE80211_MAX_SSID_LEN)
288 return -EINVAL;
289 memcpy(sdata->u.sta.ssid, ssid, len);
290 sdata->u.sta.ssid_len = len;
291 return 0;
292 }
293 if (data->flags) 208 if (data->flags)
294 sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; 209 sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_SSID_SEL;
295 else 210 else
296 sdata->u.sta.flags |= IEEE80211_STA_AUTO_SSID_SEL; 211 sdata->u.mgd.flags |= IEEE80211_STA_AUTO_SSID_SEL;
212
297 ret = ieee80211_sta_set_ssid(sdata, ssid, len); 213 ret = ieee80211_sta_set_ssid(sdata, ssid, len);
298 if (ret) 214 if (ret)
299 return ret; 215 return ret;
300 ieee80211_sta_req_auth(sdata, &sdata->u.sta); 216
217 sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME;
218 ieee80211_sta_req_auth(sdata);
301 return 0; 219 return 0;
302 } 220 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
221 return ieee80211_ibss_set_ssid(sdata, ssid, len);
303 222
304 return -EOPNOTSUPP; 223 return -EOPNOTSUPP;
305} 224}
@@ -313,8 +232,7 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
313 232
314 struct ieee80211_sub_if_data *sdata; 233 struct ieee80211_sub_if_data *sdata;
315 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 234 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
316 if (sdata->vif.type == NL80211_IFTYPE_STATION || 235 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
317 sdata->vif.type == NL80211_IFTYPE_ADHOC) {
318 int res = ieee80211_sta_get_ssid(sdata, ssid, &len); 236 int res = ieee80211_sta_get_ssid(sdata, ssid, &len);
319 if (res == 0) { 237 if (res == 0) {
320 data->length = len; 238 data->length = len;
@@ -322,6 +240,14 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev,
322 } else 240 } else
323 data->flags = 0; 241 data->flags = 0;
324 return res; 242 return res;
243 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
244 int res = ieee80211_ibss_get_ssid(sdata, ssid, &len);
245 if (res == 0) {
246 data->length = len;
247 data->flags = 1;
248 } else
249 data->flags = 0;
250 return res;
325 } 251 }
326 252
327 return -EOPNOTSUPP; 253 return -EOPNOTSUPP;
@@ -335,26 +261,32 @@ static int ieee80211_ioctl_siwap(struct net_device *dev,
335 struct ieee80211_sub_if_data *sdata; 261 struct ieee80211_sub_if_data *sdata;
336 262
337 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 263 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
338 if (sdata->vif.type == NL80211_IFTYPE_STATION || 264 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
339 sdata->vif.type == NL80211_IFTYPE_ADHOC) {
340 int ret; 265 int ret;
341 if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { 266
342 memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data,
343 ETH_ALEN);
344 return 0;
345 }
346 if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) 267 if (is_zero_ether_addr((u8 *) &ap_addr->sa_data))
347 sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL | 268 sdata->u.mgd.flags |= IEEE80211_STA_AUTO_BSSID_SEL |
348 IEEE80211_STA_AUTO_CHANNEL_SEL; 269 IEEE80211_STA_AUTO_CHANNEL_SEL;
349 else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data)) 270 else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data))
350 sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL; 271 sdata->u.mgd.flags |= IEEE80211_STA_AUTO_BSSID_SEL;
351 else 272 else
352 sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; 273 sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL;
353 ret = ieee80211_sta_set_bssid(sdata, (u8 *) &ap_addr->sa_data); 274 ret = ieee80211_sta_set_bssid(sdata, (u8 *) &ap_addr->sa_data);
354 if (ret) 275 if (ret)
355 return ret; 276 return ret;
356 ieee80211_sta_req_auth(sdata, &sdata->u.sta); 277 sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME;
278 ieee80211_sta_req_auth(sdata);
357 return 0; 279 return 0;
280 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
281 if (is_zero_ether_addr((u8 *) &ap_addr->sa_data))
282 sdata->u.ibss.flags |= IEEE80211_IBSS_AUTO_BSSID_SEL |
283 IEEE80211_IBSS_AUTO_CHANNEL_SEL;
284 else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data))
285 sdata->u.ibss.flags |= IEEE80211_IBSS_AUTO_BSSID_SEL;
286 else
287 sdata->u.ibss.flags &= ~IEEE80211_IBSS_AUTO_BSSID_SEL;
288
289 return ieee80211_ibss_set_bssid(sdata, (u8 *) &ap_addr->sa_data);
358 } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { 290 } else if (sdata->vif.type == NL80211_IFTYPE_WDS) {
359 /* 291 /*
360 * If it is necessary to update the WDS peer address 292 * If it is necessary to update the WDS peer address
@@ -383,17 +315,20 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
383 struct ieee80211_sub_if_data *sdata; 315 struct ieee80211_sub_if_data *sdata;
384 316
385 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 317 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
386 if (sdata->vif.type == NL80211_IFTYPE_STATION || 318 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
387 sdata->vif.type == NL80211_IFTYPE_ADHOC) { 319 if (sdata->u.mgd.state == IEEE80211_STA_MLME_ASSOCIATED) {
388 if (sdata->u.sta.state == IEEE80211_STA_MLME_ASSOCIATED ||
389 sdata->u.sta.state == IEEE80211_STA_MLME_IBSS_JOINED) {
390 ap_addr->sa_family = ARPHRD_ETHER; 320 ap_addr->sa_family = ARPHRD_ETHER;
391 memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); 321 memcpy(&ap_addr->sa_data, sdata->u.mgd.bssid, ETH_ALEN);
392 return 0; 322 } else
393 } else {
394 memset(&ap_addr->sa_data, 0, ETH_ALEN); 323 memset(&ap_addr->sa_data, 0, ETH_ALEN);
395 return 0; 324 return 0;
396 } 325 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
326 if (sdata->u.ibss.state == IEEE80211_IBSS_MLME_JOINED) {
327 ap_addr->sa_family = ARPHRD_ETHER;
328 memcpy(&ap_addr->sa_data, sdata->u.ibss.bssid, ETH_ALEN);
329 } else
330 memset(&ap_addr->sa_data, 0, ETH_ALEN);
331 return 0;
397 } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { 332 } else if (sdata->vif.type == NL80211_IFTYPE_WDS) {
398 ap_addr->sa_family = ARPHRD_ETHER; 333 ap_addr->sa_family = ARPHRD_ETHER;
399 memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); 334 memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN);
@@ -404,58 +339,6 @@ static int ieee80211_ioctl_giwap(struct net_device *dev,
404} 339}
405 340
406 341
407static int ieee80211_ioctl_siwscan(struct net_device *dev,
408 struct iw_request_info *info,
409 union iwreq_data *wrqu, char *extra)
410{
411 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
412 struct iw_scan_req *req = NULL;
413 u8 *ssid = NULL;
414 size_t ssid_len = 0;
415
416 if (!netif_running(dev))
417 return -ENETDOWN;
418
419 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
420 sdata->vif.type != NL80211_IFTYPE_ADHOC &&
421 sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
422 return -EOPNOTSUPP;
423
424 /* if SSID was specified explicitly then use that */
425 if (wrqu->data.length == sizeof(struct iw_scan_req) &&
426 wrqu->data.flags & IW_SCAN_THIS_ESSID) {
427 req = (struct iw_scan_req *)extra;
428 ssid = req->essid;
429 ssid_len = req->essid_len;
430 }
431
432 return ieee80211_request_scan(sdata, ssid, ssid_len);
433}
434
435
436static int ieee80211_ioctl_giwscan(struct net_device *dev,
437 struct iw_request_info *info,
438 struct iw_point *data, char *extra)
439{
440 int res;
441 struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
442 struct ieee80211_sub_if_data *sdata;
443
444 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
445
446 if (local->sw_scanning || local->hw_scanning)
447 return -EAGAIN;
448
449 res = ieee80211_scan_results(local, info, extra, data->length);
450 if (res >= 0) {
451 data->length = res;
452 return 0;
453 }
454 data->length = 0;
455 return res;
456}
457
458
459static int ieee80211_ioctl_siwrate(struct net_device *dev, 342static int ieee80211_ioctl_siwrate(struct net_device *dev,
460 struct iw_request_info *info, 343 struct iw_request_info *info,
461 struct iw_param *rate, char *extra) 344 struct iw_param *rate, char *extra)
@@ -511,7 +394,7 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev,
511 394
512 rcu_read_lock(); 395 rcu_read_lock();
513 396
514 sta = sta_info_get(local, sdata->u.sta.bssid); 397 sta = sta_info_get(local, sdata->u.mgd.bssid);
515 398
516 if (sta && !(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) 399 if (sta && !(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS))
517 rate->value = sband->bitrates[sta->last_tx_rate.idx].bitrate; 400 rate->value = sband->bitrates[sta->last_tx_rate.idx].bitrate;
@@ -549,10 +432,9 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev,
549 else /* Automatic power level setting */ 432 else /* Automatic power level setting */
550 new_power_level = chan->max_power; 433 new_power_level = chan->max_power;
551 434
552 if (local->hw.conf.power_level != new_power_level) { 435 local->user_power_level = new_power_level;
553 local->hw.conf.power_level = new_power_level; 436 if (local->hw.conf.power_level != new_power_level)
554 reconf_flags |= IEEE80211_CONF_CHANGE_POWER; 437 reconf_flags |= IEEE80211_CONF_CHANGE_POWER;
555 }
556 438
557 if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { 439 if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) {
558 local->hw.conf.radio_enabled = !(data->txpower.disabled); 440 local->hw.conf.radio_enabled = !(data->txpower.disabled);
@@ -713,8 +595,7 @@ static int ieee80211_ioctl_siwmlme(struct net_device *dev,
713 struct iw_mlme *mlme = (struct iw_mlme *) extra; 595 struct iw_mlme *mlme = (struct iw_mlme *) extra;
714 596
715 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 597 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
716 if (sdata->vif.type != NL80211_IFTYPE_STATION && 598 if (!(sdata->vif.type == NL80211_IFTYPE_STATION))
717 sdata->vif.type != NL80211_IFTYPE_ADHOC)
718 return -EINVAL; 599 return -EINVAL;
719 600
720 switch (mlme->cmd) { 601 switch (mlme->cmd) {
@@ -737,7 +618,7 @@ static int ieee80211_ioctl_siwencode(struct net_device *dev,
737 struct ieee80211_sub_if_data *sdata; 618 struct ieee80211_sub_if_data *sdata;
738 int idx, i, alg = ALG_WEP; 619 int idx, i, alg = ALG_WEP;
739 u8 bcaddr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; 620 u8 bcaddr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
740 int remove = 0; 621 int remove = 0, ret;
741 622
742 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 623 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
743 624
@@ -763,11 +644,20 @@ static int ieee80211_ioctl_siwencode(struct net_device *dev,
763 return 0; 644 return 0;
764 } 645 }
765 646
766 return ieee80211_set_encryption( 647 ret = ieee80211_set_encryption(
767 sdata, bcaddr, 648 sdata, bcaddr,
768 idx, alg, remove, 649 idx, alg, remove,
769 !sdata->default_key, 650 !sdata->default_key,
770 keybuf, erq->length); 651 keybuf, erq->length);
652
653 if (!ret) {
654 if (remove)
655 sdata->u.mgd.flags &= ~IEEE80211_STA_TKIP_WEP_USED;
656 else
657 sdata->u.mgd.flags |= IEEE80211_STA_TKIP_WEP_USED;
658 }
659
660 return ret;
771} 661}
772 662
773 663
@@ -810,8 +700,7 @@ static int ieee80211_ioctl_giwencode(struct net_device *dev,
810 erq->flags |= IW_ENCODE_ENABLED; 700 erq->flags |= IW_ENCODE_ENABLED;
811 701
812 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 702 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
813 struct ieee80211_if_sta *ifsta = &sdata->u.sta; 703 switch (sdata->u.mgd.auth_alg) {
814 switch (ifsta->auth_alg) {
815 case WLAN_AUTH_OPEN: 704 case WLAN_AUTH_OPEN:
816 case WLAN_AUTH_LEAP: 705 case WLAN_AUTH_LEAP:
817 erq->flags |= IW_ENCODE_OPEN; 706 erq->flags |= IW_ENCODE_OPEN;
@@ -836,6 +725,9 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev,
836 int ret = 0, timeout = 0; 725 int ret = 0, timeout = 0;
837 bool ps; 726 bool ps;
838 727
728 if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS))
729 return -EOPNOTSUPP;
730
839 if (sdata->vif.type != NL80211_IFTYPE_STATION) 731 if (sdata->vif.type != NL80211_IFTYPE_STATION)
840 return -EINVAL; 732 return -EINVAL;
841 733
@@ -852,31 +744,49 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev,
852 ps = true; 744 ps = true;
853 break; 745 break;
854 default: /* Otherwise we ignore */ 746 default: /* Otherwise we ignore */
855 break; 747 return -EINVAL;
856 } 748 }
857 749
750 if (wrq->flags & ~(IW_POWER_MODE | IW_POWER_TIMEOUT))
751 return -EINVAL;
752
858 if (wrq->flags & IW_POWER_TIMEOUT) 753 if (wrq->flags & IW_POWER_TIMEOUT)
859 timeout = wrq->value / 1000; 754 timeout = wrq->value / 1000;
860 755
861set: 756 set:
862 if (ps == local->powersave && timeout == local->dynamic_ps_timeout) 757 if (ps == local->powersave && timeout == conf->dynamic_ps_timeout)
863 return ret; 758 return ret;
864 759
865 local->powersave = ps; 760 local->powersave = ps;
866 local->dynamic_ps_timeout = timeout; 761 conf->dynamic_ps_timeout = timeout;
867 762
868 if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { 763 if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)
869 if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) && 764 ret = ieee80211_hw_config(local,
870 local->dynamic_ps_timeout > 0) 765 IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT);
871 mod_timer(&local->dynamic_ps_timer, jiffies + 766
872 msecs_to_jiffies(local->dynamic_ps_timeout)); 767 if (!(sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED))
873 else { 768 return ret;
874 if (local->powersave) 769
875 conf->flags |= IEEE80211_CONF_PS; 770 if (conf->dynamic_ps_timeout > 0 &&
876 else 771 !(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)) {
877 conf->flags &= ~IEEE80211_CONF_PS; 772 mod_timer(&local->dynamic_ps_timer, jiffies +
773 msecs_to_jiffies(conf->dynamic_ps_timeout));
774 } else {
775 if (local->powersave) {
776 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
777 ieee80211_send_nullfunc(local, sdata, 1);
778 conf->flags |= IEEE80211_CONF_PS;
779 ret = ieee80211_hw_config(local,
780 IEEE80211_CONF_CHANGE_PS);
781 } else {
782 conf->flags &= ~IEEE80211_CONF_PS;
783 ret = ieee80211_hw_config(local,
784 IEEE80211_CONF_CHANGE_PS);
785 if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)
786 ieee80211_send_nullfunc(local, sdata, 0);
787 del_timer_sync(&local->dynamic_ps_timer);
788 cancel_work_sync(&local->dynamic_ps_enable_work);
878 } 789 }
879 ret = ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS);
880 } 790 }
881 791
882 return ret; 792 return ret;
@@ -903,11 +813,22 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
903 813
904 switch (data->flags & IW_AUTH_INDEX) { 814 switch (data->flags & IW_AUTH_INDEX) {
905 case IW_AUTH_WPA_VERSION: 815 case IW_AUTH_WPA_VERSION:
906 case IW_AUTH_CIPHER_PAIRWISE:
907 case IW_AUTH_CIPHER_GROUP: 816 case IW_AUTH_CIPHER_GROUP:
908 case IW_AUTH_WPA_ENABLED: 817 case IW_AUTH_WPA_ENABLED:
909 case IW_AUTH_RX_UNENCRYPTED_EAPOL: 818 case IW_AUTH_RX_UNENCRYPTED_EAPOL:
910 case IW_AUTH_KEY_MGMT: 819 case IW_AUTH_KEY_MGMT:
820 case IW_AUTH_CIPHER_GROUP_MGMT:
821 break;
822 case IW_AUTH_CIPHER_PAIRWISE:
823 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
824 if (data->value & (IW_AUTH_CIPHER_WEP40 |
825 IW_AUTH_CIPHER_WEP104 | IW_AUTH_CIPHER_TKIP))
826 sdata->u.mgd.flags |=
827 IEEE80211_STA_TKIP_WEP_USED;
828 else
829 sdata->u.mgd.flags &=
830 ~IEEE80211_STA_TKIP_WEP_USED;
831 }
911 break; 832 break;
912 case IW_AUTH_DROP_UNENCRYPTED: 833 case IW_AUTH_DROP_UNENCRYPTED:
913 sdata->drop_unencrypted = !!data->value; 834 sdata->drop_unencrypted = !!data->value;
@@ -916,24 +837,45 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev,
916 if (sdata->vif.type != NL80211_IFTYPE_STATION) 837 if (sdata->vif.type != NL80211_IFTYPE_STATION)
917 ret = -EINVAL; 838 ret = -EINVAL;
918 else { 839 else {
919 sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; 840 sdata->u.mgd.flags &= ~IEEE80211_STA_PRIVACY_INVOKED;
920 /* 841 /*
921 * Privacy invoked by wpa_supplicant, store the 842 * Privacy invoked by wpa_supplicant, store the
922 * value and allow associating to a protected 843 * value and allow associating to a protected
923 * network without having a key up front. 844 * network without having a key up front.
924 */ 845 */
925 if (data->value) 846 if (data->value)
926 sdata->u.sta.flags |= 847 sdata->u.mgd.flags |=
927 IEEE80211_STA_PRIVACY_INVOKED; 848 IEEE80211_STA_PRIVACY_INVOKED;
928 } 849 }
929 break; 850 break;
930 case IW_AUTH_80211_AUTH_ALG: 851 case IW_AUTH_80211_AUTH_ALG:
931 if (sdata->vif.type == NL80211_IFTYPE_STATION || 852 if (sdata->vif.type == NL80211_IFTYPE_STATION)
932 sdata->vif.type == NL80211_IFTYPE_ADHOC) 853 sdata->u.mgd.auth_algs = data->value;
933 sdata->u.sta.auth_algs = data->value;
934 else 854 else
935 ret = -EOPNOTSUPP; 855 ret = -EOPNOTSUPP;
936 break; 856 break;
857 case IW_AUTH_MFP:
858 if (!(sdata->local->hw.flags & IEEE80211_HW_MFP_CAPABLE)) {
859 ret = -EOPNOTSUPP;
860 break;
861 }
862 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
863 switch (data->value) {
864 case IW_AUTH_MFP_DISABLED:
865 sdata->u.mgd.mfp = IEEE80211_MFP_DISABLED;
866 break;
867 case IW_AUTH_MFP_OPTIONAL:
868 sdata->u.mgd.mfp = IEEE80211_MFP_OPTIONAL;
869 break;
870 case IW_AUTH_MFP_REQUIRED:
871 sdata->u.mgd.mfp = IEEE80211_MFP_REQUIRED;
872 break;
873 default:
874 ret = -EINVAL;
875 }
876 } else
877 ret = -EOPNOTSUPP;
878 break;
937 default: 879 default:
938 ret = -EOPNOTSUPP; 880 ret = -EOPNOTSUPP;
939 break; 881 break;
@@ -951,9 +893,9 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev
951 893
952 rcu_read_lock(); 894 rcu_read_lock();
953 895
954 if (sdata->vif.type == NL80211_IFTYPE_STATION || 896 if (sdata->vif.type == NL80211_IFTYPE_STATION)
955 sdata->vif.type == NL80211_IFTYPE_ADHOC) 897 sta = sta_info_get(local, sdata->u.mgd.bssid);
956 sta = sta_info_get(local, sdata->u.sta.bssid); 898
957 if (!sta) { 899 if (!sta) {
958 wstats->discard.fragment = 0; 900 wstats->discard.fragment = 0;
959 wstats->discard.misc = 0; 901 wstats->discard.misc = 0;
@@ -962,10 +904,45 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev
962 wstats->qual.noise = 0; 904 wstats->qual.noise = 0;
963 wstats->qual.updated = IW_QUAL_ALL_INVALID; 905 wstats->qual.updated = IW_QUAL_ALL_INVALID;
964 } else { 906 } else {
965 wstats->qual.level = sta->last_signal; 907 wstats->qual.updated = 0;
966 wstats->qual.qual = sta->last_qual; 908 /*
967 wstats->qual.noise = sta->last_noise; 909 * mirror what cfg80211 does for iwrange/scan results,
968 wstats->qual.updated = local->wstats_flags; 910 * otherwise userspace gets confused.
911 */
912 if (local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC |
913 IEEE80211_HW_SIGNAL_DBM)) {
914 wstats->qual.updated |= IW_QUAL_LEVEL_UPDATED;
915 wstats->qual.updated |= IW_QUAL_QUAL_UPDATED;
916 } else {
917 wstats->qual.updated |= IW_QUAL_LEVEL_INVALID;
918 wstats->qual.updated |= IW_QUAL_QUAL_INVALID;
919 }
920
921 if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) {
922 wstats->qual.level = sta->last_signal;
923 wstats->qual.qual = sta->last_signal;
924 } else if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) {
925 int sig = sta->last_signal;
926
927 wstats->qual.updated |= IW_QUAL_DBM;
928 wstats->qual.level = sig;
929 if (sig < -110)
930 sig = -110;
931 else if (sig > -40)
932 sig = -40;
933 wstats->qual.qual = sig + 110;
934 }
935
936 if (local->hw.flags & IEEE80211_HW_NOISE_DBM) {
937 /*
938 * This assumes that if driver reports noise, it also
939 * reports signal in dBm.
940 */
941 wstats->qual.noise = sta->last_noise;
942 wstats->qual.updated |= IW_QUAL_NOISE_UPDATED;
943 } else {
944 wstats->qual.updated |= IW_QUAL_NOISE_INVALID;
945 }
969 } 946 }
970 947
971 rcu_read_unlock(); 948 rcu_read_unlock();
@@ -982,9 +959,8 @@ static int ieee80211_ioctl_giwauth(struct net_device *dev,
982 959
983 switch (data->flags & IW_AUTH_INDEX) { 960 switch (data->flags & IW_AUTH_INDEX) {
984 case IW_AUTH_80211_AUTH_ALG: 961 case IW_AUTH_80211_AUTH_ALG:
985 if (sdata->vif.type == NL80211_IFTYPE_STATION || 962 if (sdata->vif.type == NL80211_IFTYPE_STATION)
986 sdata->vif.type == NL80211_IFTYPE_ADHOC) 963 data->value = sdata->u.mgd.auth_algs;
987 data->value = sdata->u.sta.auth_algs;
988 else 964 else
989 ret = -EOPNOTSUPP; 965 ret = -EOPNOTSUPP;
990 break; 966 break;
@@ -1017,6 +993,9 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev,
1017 case IW_ENCODE_ALG_CCMP: 993 case IW_ENCODE_ALG_CCMP:
1018 alg = ALG_CCMP; 994 alg = ALG_CCMP;
1019 break; 995 break;
996 case IW_ENCODE_ALG_AES_CMAC:
997 alg = ALG_AES_CMAC;
998 break;
1020 default: 999 default:
1021 return -EOPNOTSUPP; 1000 return -EOPNOTSUPP;
1022 } 1001 }
@@ -1025,20 +1004,41 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev,
1025 remove = 1; 1004 remove = 1;
1026 1005
1027 idx = erq->flags & IW_ENCODE_INDEX; 1006 idx = erq->flags & IW_ENCODE_INDEX;
1028 if (idx < 1 || idx > 4) { 1007 if (alg == ALG_AES_CMAC) {
1029 idx = -1; 1008 if (idx < NUM_DEFAULT_KEYS + 1 ||
1030 if (!sdata->default_key) 1009 idx > NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) {
1031 idx = 0; 1010 idx = -1;
1032 else for (i = 0; i < NUM_DEFAULT_KEYS; i++) { 1011 if (!sdata->default_mgmt_key)
1033 if (sdata->default_key == sdata->keys[i]) { 1012 idx = 0;
1034 idx = i; 1013 else for (i = NUM_DEFAULT_KEYS;
1035 break; 1014 i < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS;
1015 i++) {
1016 if (sdata->default_mgmt_key == sdata->keys[i])
1017 {
1018 idx = i;
1019 break;
1020 }
1036 } 1021 }
1037 } 1022 if (idx < 0)
1038 if (idx < 0) 1023 return -EINVAL;
1039 return -EINVAL; 1024 } else
1040 } else 1025 idx--;
1041 idx--; 1026 } else {
1027 if (idx < 1 || idx > 4) {
1028 idx = -1;
1029 if (!sdata->default_key)
1030 idx = 0;
1031 else for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
1032 if (sdata->default_key == sdata->keys[i]) {
1033 idx = i;
1034 break;
1035 }
1036 }
1037 if (idx < 0)
1038 return -EINVAL;
1039 } else
1040 idx--;
1041 }
1042 1042
1043 return ieee80211_set_encryption(sdata, ext->addr.sa_data, idx, alg, 1043 return ieee80211_set_encryption(sdata, ext->addr.sa_data, idx, alg,
1044 remove, 1044 remove,
@@ -1063,7 +1063,7 @@ static const iw_handler ieee80211_handler[] =
1063 (iw_handler) NULL, /* SIOCSIWSENS */ 1063 (iw_handler) NULL, /* SIOCSIWSENS */
1064 (iw_handler) NULL, /* SIOCGIWSENS */ 1064 (iw_handler) NULL, /* SIOCGIWSENS */
1065 (iw_handler) NULL /* not used */, /* SIOCSIWRANGE */ 1065 (iw_handler) NULL /* not used */, /* SIOCSIWRANGE */
1066 (iw_handler) ieee80211_ioctl_giwrange, /* SIOCGIWRANGE */ 1066 (iw_handler) cfg80211_wext_giwrange, /* SIOCGIWRANGE */
1067 (iw_handler) NULL /* not used */, /* SIOCSIWPRIV */ 1067 (iw_handler) NULL /* not used */, /* SIOCSIWPRIV */
1068 (iw_handler) NULL /* kernel code */, /* SIOCGIWPRIV */ 1068 (iw_handler) NULL /* kernel code */, /* SIOCGIWPRIV */
1069 (iw_handler) NULL /* not used */, /* SIOCSIWSTATS */ 1069 (iw_handler) NULL /* not used */, /* SIOCSIWSTATS */
@@ -1076,8 +1076,8 @@ static const iw_handler ieee80211_handler[] =
1076 (iw_handler) ieee80211_ioctl_giwap, /* SIOCGIWAP */ 1076 (iw_handler) ieee80211_ioctl_giwap, /* SIOCGIWAP */
1077 (iw_handler) ieee80211_ioctl_siwmlme, /* SIOCSIWMLME */ 1077 (iw_handler) ieee80211_ioctl_siwmlme, /* SIOCSIWMLME */
1078 (iw_handler) NULL, /* SIOCGIWAPLIST */ 1078 (iw_handler) NULL, /* SIOCGIWAPLIST */
1079 (iw_handler) ieee80211_ioctl_siwscan, /* SIOCSIWSCAN */ 1079 (iw_handler) cfg80211_wext_siwscan, /* SIOCSIWSCAN */
1080 (iw_handler) ieee80211_ioctl_giwscan, /* SIOCGIWSCAN */ 1080 (iw_handler) cfg80211_wext_giwscan, /* SIOCGIWSCAN */
1081 (iw_handler) ieee80211_ioctl_siwessid, /* SIOCSIWESSID */ 1081 (iw_handler) ieee80211_ioctl_siwessid, /* SIOCSIWESSID */
1082 (iw_handler) ieee80211_ioctl_giwessid, /* SIOCGIWESSID */ 1082 (iw_handler) ieee80211_ioctl_giwessid, /* SIOCGIWESSID */
1083 (iw_handler) NULL, /* SIOCSIWNICKN */ 1083 (iw_handler) NULL, /* SIOCSIWNICKN */
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index ac71b38f7cb5..0b8ad1f4ecdd 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -99,10 +99,13 @@ static u16 classify80211(struct ieee80211_local *local, struct sk_buff *skb)
99 /* in case we are a client verify acm is not set for this ac */ 99 /* in case we are a client verify acm is not set for this ac */
100 while (unlikely(local->wmm_acm & BIT(skb->priority))) { 100 while (unlikely(local->wmm_acm & BIT(skb->priority))) {
101 if (wme_downgrade_ac(skb)) { 101 if (wme_downgrade_ac(skb)) {
102 /* The old code would drop the packet in this 102 /*
103 * case. 103 * This should not really happen. The AP has marked all
104 * lower ACs to require admission control which is not
105 * a reasonable configuration. Allow the frame to be
106 * transmitted using AC_BK as a workaround.
104 */ 107 */
105 return 0; 108 break;
106 } 109 }
107 } 110 }
108 111
@@ -114,9 +117,7 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
114{ 117{
115 struct ieee80211_master_priv *mpriv = netdev_priv(dev); 118 struct ieee80211_master_priv *mpriv = netdev_priv(dev);
116 struct ieee80211_local *local = mpriv->local; 119 struct ieee80211_local *local = mpriv->local;
117 struct ieee80211_hw *hw = &local->hw;
118 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; 120 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
119 struct sta_info *sta;
120 u16 queue; 121 u16 queue;
121 u8 tid; 122 u8 tid;
122 123
@@ -124,29 +125,11 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
124 if (unlikely(queue >= local->hw.queues)) 125 if (unlikely(queue >= local->hw.queues))
125 queue = local->hw.queues - 1; 126 queue = local->hw.queues - 1;
126 127
127 if (skb->requeue) { 128 /*
128 if (!hw->ampdu_queues) 129 * Now we know the 1d priority, fill in the QoS header if
129 return queue; 130 * there is one (and we haven't done this before).
130
131 rcu_read_lock();
132 sta = sta_info_get(local, hdr->addr1);
133 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
134 if (sta) {
135 int ampdu_queue = sta->tid_to_tx_q[tid];
136
137 if ((ampdu_queue < ieee80211_num_queues(hw)) &&
138 test_bit(ampdu_queue, local->queue_pool))
139 queue = ampdu_queue;
140 }
141 rcu_read_unlock();
142
143 return queue;
144 }
145
146 /* Now we know the 1d priority, fill in the QoS header if
147 * there is one.
148 */ 131 */
149 if (ieee80211_is_data_qos(hdr->frame_control)) { 132 if (!skb->requeue && ieee80211_is_data_qos(hdr->frame_control)) {
150 u8 *p = ieee80211_get_qos_ctl(hdr); 133 u8 *p = ieee80211_get_qos_ctl(hdr);
151 u8 ack_policy = 0; 134 u8 ack_policy = 0;
152 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; 135 tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK;
@@ -156,140 +139,7 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb)
156 /* qos header is 2 bytes, second reserved */ 139 /* qos header is 2 bytes, second reserved */
157 *p++ = ack_policy | tid; 140 *p++ = ack_policy | tid;
158 *p = 0; 141 *p = 0;
159
160 if (!hw->ampdu_queues)
161 return queue;
162
163 rcu_read_lock();
164
165 sta = sta_info_get(local, hdr->addr1);
166 if (sta) {
167 int ampdu_queue = sta->tid_to_tx_q[tid];
168
169 if ((ampdu_queue < ieee80211_num_queues(hw)) &&
170 test_bit(ampdu_queue, local->queue_pool))
171 queue = ampdu_queue;
172 }
173
174 rcu_read_unlock();
175 } 142 }
176 143
177 return queue; 144 return queue;
178} 145}
179
180int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
181 struct sta_info *sta, u16 tid)
182{
183 int i;
184
185 /* XXX: currently broken due to cb/requeue use */
186 return -EPERM;
187
188 /* prepare the filter and save it for the SW queue
189 * matching the received HW queue */
190
191 if (!local->hw.ampdu_queues)
192 return -EPERM;
193
194 /* try to get a Qdisc from the pool */
195 for (i = local->hw.queues; i < ieee80211_num_queues(&local->hw); i++)
196 if (!test_and_set_bit(i, local->queue_pool)) {
197 ieee80211_stop_queue(local_to_hw(local), i);
198 sta->tid_to_tx_q[tid] = i;
199
200 /* IF there are already pending packets
201 * on this tid first we need to drain them
202 * on the previous queue
203 * since HT is strict in order */
204#ifdef CONFIG_MAC80211_HT_DEBUG
205 if (net_ratelimit())
206 printk(KERN_DEBUG "allocated aggregation queue"
207 " %d tid %d addr %pM pool=0x%lX\n",
208 i, tid, sta->sta.addr,
209 local->queue_pool[0]);
210#endif /* CONFIG_MAC80211_HT_DEBUG */
211 return 0;
212 }
213
214 return -EAGAIN;
215}
216
217/**
218 * the caller needs to hold netdev_get_tx_queue(local->mdev, X)->lock
219 */
220void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
221 struct sta_info *sta, u16 tid,
222 u8 requeue)
223{
224 int agg_queue = sta->tid_to_tx_q[tid];
225 struct ieee80211_hw *hw = &local->hw;
226
227 /* return the qdisc to the pool */
228 clear_bit(agg_queue, local->queue_pool);
229 sta->tid_to_tx_q[tid] = ieee80211_num_queues(hw);
230
231 if (requeue) {
232 ieee80211_requeue(local, agg_queue);
233 } else {
234 struct netdev_queue *txq;
235 spinlock_t *root_lock;
236 struct Qdisc *q;
237
238 txq = netdev_get_tx_queue(local->mdev, agg_queue);
239 q = rcu_dereference(txq->qdisc);
240 root_lock = qdisc_lock(q);
241
242 spin_lock_bh(root_lock);
243 qdisc_reset(q);
244 spin_unlock_bh(root_lock);
245 }
246}
247
248void ieee80211_requeue(struct ieee80211_local *local, int queue)
249{
250 struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, queue);
251 struct sk_buff_head list;
252 spinlock_t *root_lock;
253 struct Qdisc *qdisc;
254 u32 len;
255
256 rcu_read_lock_bh();
257
258 qdisc = rcu_dereference(txq->qdisc);
259 if (!qdisc || !qdisc->dequeue)
260 goto out_unlock;
261
262 skb_queue_head_init(&list);
263
264 root_lock = qdisc_root_lock(qdisc);
265 spin_lock(root_lock);
266 for (len = qdisc->q.qlen; len > 0; len--) {
267 struct sk_buff *skb = qdisc->dequeue(qdisc);
268
269 if (skb)
270 __skb_queue_tail(&list, skb);
271 }
272 spin_unlock(root_lock);
273
274 for (len = list.qlen; len > 0; len--) {
275 struct sk_buff *skb = __skb_dequeue(&list);
276 u16 new_queue;
277
278 BUG_ON(!skb);
279 new_queue = ieee80211_select_queue(local->mdev, skb);
280 skb_set_queue_mapping(skb, new_queue);
281
282 txq = netdev_get_tx_queue(local->mdev, new_queue);
283
284
285 qdisc = rcu_dereference(txq->qdisc);
286 root_lock = qdisc_root_lock(qdisc);
287
288 spin_lock(root_lock);
289 qdisc_enqueue_root(skb, qdisc);
290 spin_unlock(root_lock);
291 }
292
293out_unlock:
294 rcu_read_unlock_bh();
295}
diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h
index bc62f28a4d3d..7520d2e014dc 100644
--- a/net/mac80211/wme.h
+++ b/net/mac80211/wme.h
@@ -21,11 +21,5 @@
21extern const int ieee802_1d_to_ac[8]; 21extern const int ieee802_1d_to_ac[8];
22 22
23u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb); 23u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb);
24int ieee80211_ht_agg_queue_add(struct ieee80211_local *local,
25 struct sta_info *sta, u16 tid);
26void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local,
27 struct sta_info *sta, u16 tid,
28 u8 requeue);
29void ieee80211_requeue(struct ieee80211_local *local, int queue);
30 24
31#endif /* _WME_H */ 25#endif /* _WME_H */
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 7aa63caf8d50..4f8bfea278f2 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright 2002-2004, Instant802 Networks, Inc. 2 * Copyright 2002-2004, Instant802 Networks, Inc.
3 * Copyright 2008, Jouni Malinen <j@w1.fi>
3 * 4 *
4 * This program is free software; you can redistribute it and/or modify 5 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as 6 * it under the terms of the GNU General Public License version 2 as
@@ -19,6 +20,7 @@
19#include "michael.h" 20#include "michael.h"
20#include "tkip.h" 21#include "tkip.h"
21#include "aes_ccm.h" 22#include "aes_ccm.h"
23#include "aes_cmac.h"
22#include "wpa.h" 24#include "wpa.h"
23 25
24ieee80211_tx_result 26ieee80211_tx_result
@@ -194,19 +196,13 @@ ieee80211_tx_result
194ieee80211_crypto_tkip_encrypt(struct ieee80211_tx_data *tx) 196ieee80211_crypto_tkip_encrypt(struct ieee80211_tx_data *tx)
195{ 197{
196 struct sk_buff *skb = tx->skb; 198 struct sk_buff *skb = tx->skb;
197 int i;
198 199
199 ieee80211_tx_set_protected(tx); 200 ieee80211_tx_set_protected(tx);
200 201
201 if (tkip_encrypt_skb(tx, skb) < 0) 202 do {
202 return TX_DROP; 203 if (tkip_encrypt_skb(tx, skb) < 0)
203 204 return TX_DROP;
204 if (tx->extra_frag) { 205 } while ((skb = skb->next));
205 for (i = 0; i < tx->num_extra_frag; i++) {
206 if (tkip_encrypt_skb(tx, tx->extra_frag[i]))
207 return TX_DROP;
208 }
209 }
210 206
211 return TX_CONTINUE; 207 return TX_CONTINUE;
212} 208}
@@ -266,7 +262,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
266 int encrypted) 262 int encrypted)
267{ 263{
268 __le16 mask_fc; 264 __le16 mask_fc;
269 int a4_included; 265 int a4_included, mgmt;
270 u8 qos_tid; 266 u8 qos_tid;
271 u8 *b_0, *aad; 267 u8 *b_0, *aad;
272 u16 data_len, len_a; 268 u16 data_len, len_a;
@@ -277,12 +273,15 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
277 aad = scratch + 4 * AES_BLOCK_LEN; 273 aad = scratch + 4 * AES_BLOCK_LEN;
278 274
279 /* 275 /*
280 * Mask FC: zero subtype b4 b5 b6 276 * Mask FC: zero subtype b4 b5 b6 (if not mgmt)
281 * Retry, PwrMgt, MoreData; set Protected 277 * Retry, PwrMgt, MoreData; set Protected
282 */ 278 */
279 mgmt = ieee80211_is_mgmt(hdr->frame_control);
283 mask_fc = hdr->frame_control; 280 mask_fc = hdr->frame_control;
284 mask_fc &= ~cpu_to_le16(0x0070 | IEEE80211_FCTL_RETRY | 281 mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_RETRY |
285 IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA); 282 IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA);
283 if (!mgmt)
284 mask_fc &= ~cpu_to_le16(0x0070);
286 mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); 285 mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
287 286
288 hdrlen = ieee80211_hdrlen(hdr->frame_control); 287 hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -300,8 +299,10 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch,
300 299
301 /* First block, b_0 */ 300 /* First block, b_0 */
302 b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */ 301 b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */
303 /* Nonce: QoS Priority | A2 | PN */ 302 /* Nonce: Nonce Flags | A2 | PN
304 b_0[1] = qos_tid; 303 * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7)
304 */
305 b_0[1] = qos_tid | (mgmt << 4);
305 memcpy(&b_0[2], hdr->addr2, ETH_ALEN); 306 memcpy(&b_0[2], hdr->addr2, ETH_ALEN);
306 memcpy(&b_0[8], pn, CCMP_PN_LEN); 307 memcpy(&b_0[8], pn, CCMP_PN_LEN);
307 /* l(m) */ 308 /* l(m) */
@@ -360,9 +361,14 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
360 int hdrlen, len, tail; 361 int hdrlen, len, tail;
361 u8 *pos, *pn; 362 u8 *pos, *pn;
362 int i; 363 int i;
364 bool skip_hw;
365
366 skip_hw = (tx->key->conf.flags & IEEE80211_KEY_FLAG_SW_MGMT) &&
367 ieee80211_is_mgmt(hdr->frame_control);
363 368
364 if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && 369 if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) &&
365 !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { 370 !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) &&
371 !skip_hw) {
366 /* hwaccel - with no need for preallocated room for CCMP 372 /* hwaccel - with no need for preallocated room for CCMP
367 * header or MIC fields */ 373 * header or MIC fields */
368 info->control.hw_key = &tx->key->conf; 374 info->control.hw_key = &tx->key->conf;
@@ -397,7 +403,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
397 403
398 ccmp_pn2hdr(pos, pn, key->conf.keyidx); 404 ccmp_pn2hdr(pos, pn, key->conf.keyidx);
399 405
400 if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { 406 if ((key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !skip_hw) {
401 /* hwaccel - with preallocated room for CCMP header */ 407 /* hwaccel - with preallocated room for CCMP header */
402 info->control.hw_key = &tx->key->conf; 408 info->control.hw_key = &tx->key->conf;
403 return 0; 409 return 0;
@@ -416,19 +422,13 @@ ieee80211_tx_result
416ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx) 422ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx)
417{ 423{
418 struct sk_buff *skb = tx->skb; 424 struct sk_buff *skb = tx->skb;
419 int i;
420 425
421 ieee80211_tx_set_protected(tx); 426 ieee80211_tx_set_protected(tx);
422 427
423 if (ccmp_encrypt_skb(tx, skb) < 0) 428 do {
424 return TX_DROP; 429 if (ccmp_encrypt_skb(tx, skb) < 0)
425 430 return TX_DROP;
426 if (tx->extra_frag) { 431 } while ((skb = skb->next));
427 for (i = 0; i < tx->num_extra_frag; i++) {
428 if (ccmp_encrypt_skb(tx, tx->extra_frag[i]))
429 return TX_DROP;
430 }
431 }
432 432
433 return TX_CONTINUE; 433 return TX_CONTINUE;
434} 434}
@@ -446,7 +446,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
446 446
447 hdrlen = ieee80211_hdrlen(hdr->frame_control); 447 hdrlen = ieee80211_hdrlen(hdr->frame_control);
448 448
449 if (!ieee80211_is_data(hdr->frame_control)) 449 if (!ieee80211_is_data(hdr->frame_control) &&
450 !ieee80211_is_robust_mgmt_frame(hdr))
450 return RX_CONTINUE; 451 return RX_CONTINUE;
451 452
452 data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN; 453 data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN;
@@ -485,3 +486,126 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
485 486
486 return RX_CONTINUE; 487 return RX_CONTINUE;
487} 488}
489
490
491static void bip_aad(struct sk_buff *skb, u8 *aad)
492{
493 /* BIP AAD: FC(masked) || A1 || A2 || A3 */
494
495 /* FC type/subtype */
496 aad[0] = skb->data[0];
497 /* Mask FC Retry, PwrMgt, MoreData flags to zero */
498 aad[1] = skb->data[1] & ~(BIT(4) | BIT(5) | BIT(6));
499 /* A1 || A2 || A3 */
500 memcpy(aad + 2, skb->data + 4, 3 * ETH_ALEN);
501}
502
503
504static inline void bip_ipn_swap(u8 *d, const u8 *s)
505{
506 *d++ = s[5];
507 *d++ = s[4];
508 *d++ = s[3];
509 *d++ = s[2];
510 *d++ = s[1];
511 *d = s[0];
512}
513
514
515ieee80211_tx_result
516ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx)
517{
518 struct sk_buff *skb = tx->skb;
519 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
520 struct ieee80211_key *key = tx->key;
521 struct ieee80211_mmie *mmie;
522 u8 *pn, aad[20];
523 int i;
524
525 if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) {
526 /* hwaccel */
527 info->control.hw_key = &tx->key->conf;
528 return 0;
529 }
530
531 if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
532 return TX_DROP;
533
534 mmie = (struct ieee80211_mmie *) skb_put(skb, sizeof(*mmie));
535 mmie->element_id = WLAN_EID_MMIE;
536 mmie->length = sizeof(*mmie) - 2;
537 mmie->key_id = cpu_to_le16(key->conf.keyidx);
538
539 /* PN = PN + 1 */
540 pn = key->u.aes_cmac.tx_pn;
541
542 for (i = sizeof(key->u.aes_cmac.tx_pn) - 1; i >= 0; i--) {
543 pn[i]++;
544 if (pn[i])
545 break;
546 }
547 bip_ipn_swap(mmie->sequence_number, pn);
548
549 bip_aad(skb, aad);
550
551 /*
552 * MIC = AES-128-CMAC(IGTK, AAD || Management Frame Body || MMIE, 64)
553 */
554 ieee80211_aes_cmac(key->u.aes_cmac.tfm, key->u.aes_cmac.tx_crypto_buf,
555 aad, skb->data + 24, skb->len - 24, mmie->mic);
556
557 return TX_CONTINUE;
558}
559
560
561ieee80211_rx_result
562ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
563{
564 struct sk_buff *skb = rx->skb;
565 struct ieee80211_key *key = rx->key;
566 struct ieee80211_mmie *mmie;
567 u8 aad[20], mic[8], ipn[6];
568 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
569
570 if (!ieee80211_is_mgmt(hdr->frame_control))
571 return RX_CONTINUE;
572
573 if ((rx->status->flag & RX_FLAG_DECRYPTED) &&
574 (rx->status->flag & RX_FLAG_IV_STRIPPED))
575 return RX_CONTINUE;
576
577 if (skb->len < 24 + sizeof(*mmie))
578 return RX_DROP_UNUSABLE;
579
580 mmie = (struct ieee80211_mmie *)
581 (skb->data + skb->len - sizeof(*mmie));
582 if (mmie->element_id != WLAN_EID_MMIE ||
583 mmie->length != sizeof(*mmie) - 2)
584 return RX_DROP_UNUSABLE; /* Invalid MMIE */
585
586 bip_ipn_swap(ipn, mmie->sequence_number);
587
588 if (memcmp(ipn, key->u.aes_cmac.rx_pn, 6) <= 0) {
589 key->u.aes_cmac.replays++;
590 return RX_DROP_UNUSABLE;
591 }
592
593 if (!(rx->status->flag & RX_FLAG_DECRYPTED)) {
594 /* hardware didn't decrypt/verify MIC */
595 bip_aad(skb, aad);
596 ieee80211_aes_cmac(key->u.aes_cmac.tfm,
597 key->u.aes_cmac.rx_crypto_buf, aad,
598 skb->data + 24, skb->len - 24, mic);
599 if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
600 key->u.aes_cmac.icverrors++;
601 return RX_DROP_UNUSABLE;
602 }
603 }
604
605 memcpy(key->u.aes_cmac.rx_pn, ipn, 6);
606
607 /* Remove MMIE */
608 skb_trim(skb, skb->len - sizeof(*mmie));
609
610 return RX_CONTINUE;
611}
diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h
index d42d221d8a1d..baba0608313e 100644
--- a/net/mac80211/wpa.h
+++ b/net/mac80211/wpa.h
@@ -28,4 +28,9 @@ ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx);
28ieee80211_rx_result 28ieee80211_rx_result
29ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx); 29ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx);
30 30
31ieee80211_tx_result
32ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx);
33ieee80211_rx_result
34ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx);
35
31#endif /* WPA_H */ 36#endif /* WPA_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c2bac9cd0caf..2c967e4f706c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -357,6 +357,45 @@ config NETFILTER_XT_TARGET_DSCP
357 357
358 To compile it as a module, choose M here. If unsure, say N. 358 To compile it as a module, choose M here. If unsure, say N.
359 359
360config NETFILTER_XT_TARGET_HL
361 tristate '"HL" hoplimit target support'
362 depends on IP_NF_MANGLE || IP6_NF_MANGLE
363 depends on NETFILTER_ADVANCED
364 ---help---
365 This option adds the "HL" (for IPv6) and "TTL" (for IPv4)
366 targets, which enable the user to change the
367 hoplimit/time-to-live value of the IP header.
368
369 While it is safe to decrement the hoplimit/TTL value, the
370 modules also allow to increment and set the hoplimit value of
371 the header to arbitrary values. This is EXTREMELY DANGEROUS
372 since you can easily create immortal packets that loop
373 forever on the network.
374
375config NETFILTER_XT_TARGET_LED
376 tristate '"LED" target support'
377 depends on LEDS_CLASS && LED_TRIGGERS
378 depends on NETFILTER_ADVANCED
379 help
380 This option adds a `LED' target, which allows you to blink LEDs in
381 response to particular packets passing through your machine.
382
383 This can be used to turn a spare LED into a network activity LED,
384 which only flashes in response to FTP transfers, for example. Or
385 you could have an LED which lights up for a minute or two every time
386 somebody connects to your machine via SSH.
387
388 You will need support for the "led" class to make this work.
389
390 To create an LED trigger for incoming SSH traffic:
391 iptables -A INPUT -p tcp --dport 22 -j LED --led-trigger-id ssh --led-delay 1000
392
393 Then attach the new trigger to an LED on your system:
394 echo netfilter-ssh > /sys/class/leds/<ledname>/trigger
395
396 For more information on the LEDs available on your system, see
397 Documentation/leds-class.txt
398
360config NETFILTER_XT_TARGET_MARK 399config NETFILTER_XT_TARGET_MARK
361 tristate '"MARK" target support' 400 tristate '"MARK" target support'
362 default m if NETFILTER_ADVANCED=n 401 default m if NETFILTER_ADVANCED=n
@@ -488,6 +527,22 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
488 This option adds a "TCPOPTSTRIP" target, which allows you to strip 527 This option adds a "TCPOPTSTRIP" target, which allows you to strip
489 TCP options from TCP packets. 528 TCP options from TCP packets.
490 529
530config NETFILTER_XT_MATCH_CLUSTER
531 tristate '"cluster" match support'
532 depends on NF_CONNTRACK
533 depends on NETFILTER_ADVANCED
534 ---help---
535 This option allows you to build work-load-sharing clusters of
536 network servers/stateful firewalls without having a dedicated
537 load-balancing router/server/switch. Basically, this match returns
538 true when the packet must be handled by this cluster node. Thus,
539 all nodes see all packets and this match decides which node handles
540 what packets. The work-load sharing algorithm is based on source
541 address hashing.
542
543 If you say Y or M here, try `iptables -m cluster --help` for
544 more information.
545
491config NETFILTER_XT_MATCH_COMMENT 546config NETFILTER_XT_MATCH_COMMENT
492 tristate '"comment" match support' 547 tristate '"comment" match support'
493 depends on NETFILTER_ADVANCED 548 depends on NETFILTER_ADVANCED
@@ -605,6 +660,14 @@ config NETFILTER_XT_MATCH_HELPER
605 660
606 To compile it as a module, choose M here. If unsure, say Y. 661 To compile it as a module, choose M here. If unsure, say Y.
607 662
663config NETFILTER_XT_MATCH_HL
664 tristate '"hl" hoplimit/TTL match support'
665 depends on NETFILTER_ADVANCED
666 ---help---
667 HL matching allows you to match packets based on the hoplimit
668 in the IPv6 header, or the time-to-live field in the IPv4
669 header of the packet.
670
608config NETFILTER_XT_MATCH_IPRANGE 671config NETFILTER_XT_MATCH_IPRANGE
609 tristate '"iprange" address range match support' 672 tristate '"iprange" address range match support'
610 depends on NETFILTER_ADVANCED 673 depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index da3d909e053f..6282060fbda9 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -45,6 +45,8 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
45obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o 45obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
46obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o 46obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
47obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o 47obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
48obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
49obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
48obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o 50obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
49obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o 51obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
50obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o 52obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
@@ -57,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
57obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o 59obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
58 60
59# matches 61# matches
62obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
60obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o 63obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
61obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o 64obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
62obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o 65obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
@@ -67,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
67obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o 70obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
68obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o 71obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
69obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o 72obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
73obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o
70obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o 74obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
71obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o 75obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
72obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o 76obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a90ac83c5918..5bb34737501f 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -174,7 +174,6 @@ next_hook:
174 outdev, &elem, okfn, hook_thresh); 174 outdev, &elem, okfn, hook_thresh);
175 if (verdict == NF_ACCEPT || verdict == NF_STOP) { 175 if (verdict == NF_ACCEPT || verdict == NF_STOP) {
176 ret = 1; 176 ret = 1;
177 goto unlock;
178 } else if (verdict == NF_DROP) { 177 } else if (verdict == NF_DROP) {
179 kfree_skb(skb); 178 kfree_skb(skb);
180 ret = -EPERM; 179 ret = -EPERM;
@@ -183,7 +182,6 @@ next_hook:
183 verdict >> NF_VERDICT_BITS)) 182 verdict >> NF_VERDICT_BITS))
184 goto next_hook; 183 goto next_hook;
185 } 184 }
186unlock:
187 rcu_read_unlock(); 185 rcu_read_unlock();
188 return ret; 186 return ret;
189} 187}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 6be5d4efa51b..5c48378a852f 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -149,8 +149,8 @@ static struct task_struct *sync_backup_thread;
149/* multicast addr */ 149/* multicast addr */
150static struct sockaddr_in mcast_addr = { 150static struct sockaddr_in mcast_addr = {
151 .sin_family = AF_INET, 151 .sin_family = AF_INET,
152 .sin_port = __constant_htons(IP_VS_SYNC_PORT), 152 .sin_port = cpu_to_be16(IP_VS_SYNC_PORT),
153 .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP), 153 .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
154}; 154};
155 155
156 156
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 4f8fcf498545..07d9d8857e5d 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -177,7 +177,7 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = {
177 .me = THIS_MODULE, 177 .me = THIS_MODULE,
178 .help = amanda_help, 178 .help = amanda_help,
179 .tuple.src.l3num = AF_INET, 179 .tuple.src.l3num = AF_INET,
180 .tuple.src.u.udp.port = __constant_htons(10080), 180 .tuple.src.u.udp.port = cpu_to_be16(10080),
181 .tuple.dst.protonum = IPPROTO_UDP, 181 .tuple.dst.protonum = IPPROTO_UDP,
182 .expect_policy = &amanda_exp_policy, 182 .expect_policy = &amanda_exp_policy,
183 }, 183 },
@@ -186,7 +186,7 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = {
186 .me = THIS_MODULE, 186 .me = THIS_MODULE,
187 .help = amanda_help, 187 .help = amanda_help,
188 .tuple.src.l3num = AF_INET6, 188 .tuple.src.l3num = AF_INET6,
189 .tuple.src.u.udp.port = __constant_htons(10080), 189 .tuple.src.u.udp.port = cpu_to_be16(10080),
190 .tuple.dst.protonum = IPPROTO_UDP, 190 .tuple.dst.protonum = IPPROTO_UDP,
191 .expect_policy = &amanda_exp_policy, 191 .expect_policy = &amanda_exp_policy,
192 }, 192 },
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 90ce9ddb9451..8020db6274b8 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -29,6 +29,7 @@
29#include <linux/netdevice.h> 29#include <linux/netdevice.h>
30#include <linux/socket.h> 30#include <linux/socket.h>
31#include <linux/mm.h> 31#include <linux/mm.h>
32#include <linux/rculist_nulls.h>
32 33
33#include <net/netfilter/nf_conntrack.h> 34#include <net/netfilter/nf_conntrack.h>
34#include <net/netfilter/nf_conntrack_l3proto.h> 35#include <net/netfilter/nf_conntrack_l3proto.h>
@@ -54,7 +55,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_lock);
54unsigned int nf_conntrack_htable_size __read_mostly; 55unsigned int nf_conntrack_htable_size __read_mostly;
55EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); 56EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
56 57
57int nf_conntrack_max __read_mostly; 58unsigned int nf_conntrack_max __read_mostly;
58EXPORT_SYMBOL_GPL(nf_conntrack_max); 59EXPORT_SYMBOL_GPL(nf_conntrack_max);
59 60
60struct nf_conn nf_conntrack_untracked __read_mostly; 61struct nf_conn nf_conntrack_untracked __read_mostly;
@@ -163,8 +164,8 @@ static void
163clean_from_lists(struct nf_conn *ct) 164clean_from_lists(struct nf_conn *ct)
164{ 165{
165 pr_debug("clean_from_lists(%p)\n", ct); 166 pr_debug("clean_from_lists(%p)\n", ct);
166 hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); 167 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
167 hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode); 168 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
168 169
169 /* Destroy all pending expectations */ 170 /* Destroy all pending expectations */
170 nf_ct_remove_expectations(ct); 171 nf_ct_remove_expectations(ct);
@@ -204,8 +205,8 @@ destroy_conntrack(struct nf_conntrack *nfct)
204 205
205 /* We overload first tuple to link into unconfirmed list. */ 206 /* We overload first tuple to link into unconfirmed list. */
206 if (!nf_ct_is_confirmed(ct)) { 207 if (!nf_ct_is_confirmed(ct)) {
207 BUG_ON(hlist_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode)); 208 BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode));
208 hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); 209 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
209 } 210 }
210 211
211 NF_CT_STAT_INC(net, delete); 212 NF_CT_STAT_INC(net, delete);
@@ -242,18 +243,26 @@ static void death_by_timeout(unsigned long ul_conntrack)
242 nf_ct_put(ct); 243 nf_ct_put(ct);
243} 244}
244 245
246/*
247 * Warning :
248 * - Caller must take a reference on returned object
249 * and recheck nf_ct_tuple_equal(tuple, &h->tuple)
250 * OR
251 * - Caller must lock nf_conntrack_lock before calling this function
252 */
245struct nf_conntrack_tuple_hash * 253struct nf_conntrack_tuple_hash *
246__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) 254__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
247{ 255{
248 struct nf_conntrack_tuple_hash *h; 256 struct nf_conntrack_tuple_hash *h;
249 struct hlist_node *n; 257 struct hlist_nulls_node *n;
250 unsigned int hash = hash_conntrack(tuple); 258 unsigned int hash = hash_conntrack(tuple);
251 259
252 /* Disable BHs the entire time since we normally need to disable them 260 /* Disable BHs the entire time since we normally need to disable them
253 * at least once for the stats anyway. 261 * at least once for the stats anyway.
254 */ 262 */
255 local_bh_disable(); 263 local_bh_disable();
256 hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { 264begin:
265 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
257 if (nf_ct_tuple_equal(tuple, &h->tuple)) { 266 if (nf_ct_tuple_equal(tuple, &h->tuple)) {
258 NF_CT_STAT_INC(net, found); 267 NF_CT_STAT_INC(net, found);
259 local_bh_enable(); 268 local_bh_enable();
@@ -261,6 +270,13 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
261 } 270 }
262 NF_CT_STAT_INC(net, searched); 271 NF_CT_STAT_INC(net, searched);
263 } 272 }
273 /*
274 * if the nulls value we got at the end of this lookup is
275 * not the expected one, we must restart lookup.
276 * We probably met an item that was moved to another chain.
277 */
278 if (get_nulls_value(n) != hash)
279 goto begin;
264 local_bh_enable(); 280 local_bh_enable();
265 281
266 return NULL; 282 return NULL;
@@ -275,11 +291,18 @@ nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
275 struct nf_conn *ct; 291 struct nf_conn *ct;
276 292
277 rcu_read_lock(); 293 rcu_read_lock();
294begin:
278 h = __nf_conntrack_find(net, tuple); 295 h = __nf_conntrack_find(net, tuple);
279 if (h) { 296 if (h) {
280 ct = nf_ct_tuplehash_to_ctrack(h); 297 ct = nf_ct_tuplehash_to_ctrack(h);
281 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) 298 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
282 h = NULL; 299 h = NULL;
300 else {
301 if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) {
302 nf_ct_put(ct);
303 goto begin;
304 }
305 }
283 } 306 }
284 rcu_read_unlock(); 307 rcu_read_unlock();
285 308
@@ -293,9 +316,9 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
293{ 316{
294 struct net *net = nf_ct_net(ct); 317 struct net *net = nf_ct_net(ct);
295 318
296 hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, 319 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
297 &net->ct.hash[hash]); 320 &net->ct.hash[hash]);
298 hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, 321 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode,
299 &net->ct.hash[repl_hash]); 322 &net->ct.hash[repl_hash]);
300} 323}
301 324
@@ -318,7 +341,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
318 struct nf_conntrack_tuple_hash *h; 341 struct nf_conntrack_tuple_hash *h;
319 struct nf_conn *ct; 342 struct nf_conn *ct;
320 struct nf_conn_help *help; 343 struct nf_conn_help *help;
321 struct hlist_node *n; 344 struct hlist_nulls_node *n;
322 enum ip_conntrack_info ctinfo; 345 enum ip_conntrack_info ctinfo;
323 struct net *net; 346 struct net *net;
324 347
@@ -350,17 +373,17 @@ __nf_conntrack_confirm(struct sk_buff *skb)
350 /* See if there's one in the list already, including reverse: 373 /* See if there's one in the list already, including reverse:
351 NAT could have grabbed it without realizing, since we're 374 NAT could have grabbed it without realizing, since we're
352 not in the hash. If there is, we lost race. */ 375 not in the hash. If there is, we lost race. */
353 hlist_for_each_entry(h, n, &net->ct.hash[hash], hnode) 376 hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
354 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 377 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
355 &h->tuple)) 378 &h->tuple))
356 goto out; 379 goto out;
357 hlist_for_each_entry(h, n, &net->ct.hash[repl_hash], hnode) 380 hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode)
358 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, 381 if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
359 &h->tuple)) 382 &h->tuple))
360 goto out; 383 goto out;
361 384
362 /* Remove from unconfirmed list */ 385 /* Remove from unconfirmed list */
363 hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); 386 hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
364 387
365 __nf_conntrack_hash_insert(ct, hash, repl_hash); 388 __nf_conntrack_hash_insert(ct, hash, repl_hash);
366 /* Timer relative to confirmation time, not original 389 /* Timer relative to confirmation time, not original
@@ -399,14 +422,14 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
399{ 422{
400 struct net *net = nf_ct_net(ignored_conntrack); 423 struct net *net = nf_ct_net(ignored_conntrack);
401 struct nf_conntrack_tuple_hash *h; 424 struct nf_conntrack_tuple_hash *h;
402 struct hlist_node *n; 425 struct hlist_nulls_node *n;
403 unsigned int hash = hash_conntrack(tuple); 426 unsigned int hash = hash_conntrack(tuple);
404 427
405 /* Disable BHs the entire time since we need to disable them at 428 /* Disable BHs the entire time since we need to disable them at
406 * least once for the stats anyway. 429 * least once for the stats anyway.
407 */ 430 */
408 rcu_read_lock_bh(); 431 rcu_read_lock_bh();
409 hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { 432 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) {
410 if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && 433 if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack &&
411 nf_ct_tuple_equal(tuple, &h->tuple)) { 434 nf_ct_tuple_equal(tuple, &h->tuple)) {
412 NF_CT_STAT_INC(net, found); 435 NF_CT_STAT_INC(net, found);
@@ -430,14 +453,14 @@ static noinline int early_drop(struct net *net, unsigned int hash)
430 /* Use oldest entry, which is roughly LRU */ 453 /* Use oldest entry, which is roughly LRU */
431 struct nf_conntrack_tuple_hash *h; 454 struct nf_conntrack_tuple_hash *h;
432 struct nf_conn *ct = NULL, *tmp; 455 struct nf_conn *ct = NULL, *tmp;
433 struct hlist_node *n; 456 struct hlist_nulls_node *n;
434 unsigned int i, cnt = 0; 457 unsigned int i, cnt = 0;
435 int dropped = 0; 458 int dropped = 0;
436 459
437 rcu_read_lock(); 460 rcu_read_lock();
438 for (i = 0; i < nf_conntrack_htable_size; i++) { 461 for (i = 0; i < nf_conntrack_htable_size; i++) {
439 hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], 462 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
440 hnode) { 463 hnnode) {
441 tmp = nf_ct_tuplehash_to_ctrack(h); 464 tmp = nf_ct_tuplehash_to_ctrack(h);
442 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) 465 if (!test_bit(IPS_ASSURED_BIT, &tmp->status))
443 ct = tmp; 466 ct = tmp;
@@ -472,7 +495,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
472 struct nf_conn *ct; 495 struct nf_conn *ct;
473 496
474 if (unlikely(!nf_conntrack_hash_rnd_initted)) { 497 if (unlikely(!nf_conntrack_hash_rnd_initted)) {
475 get_random_bytes(&nf_conntrack_hash_rnd, 4); 498 get_random_bytes(&nf_conntrack_hash_rnd,
499 sizeof(nf_conntrack_hash_rnd));
476 nf_conntrack_hash_rnd_initted = 1; 500 nf_conntrack_hash_rnd_initted = 1;
477 } 501 }
478 502
@@ -507,26 +531,19 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
507#ifdef CONFIG_NET_NS 531#ifdef CONFIG_NET_NS
508 ct->ct_net = net; 532 ct->ct_net = net;
509#endif 533#endif
510 INIT_RCU_HEAD(&ct->rcu);
511 534
512 return ct; 535 return ct;
513} 536}
514EXPORT_SYMBOL_GPL(nf_conntrack_alloc); 537EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
515 538
516static void nf_conntrack_free_rcu(struct rcu_head *head) 539void nf_conntrack_free(struct nf_conn *ct)
517{ 540{
518 struct nf_conn *ct = container_of(head, struct nf_conn, rcu);
519 struct net *net = nf_ct_net(ct); 541 struct net *net = nf_ct_net(ct);
520 542
543 nf_ct_ext_destroy(ct);
544 atomic_dec(&net->ct.count);
521 nf_ct_ext_free(ct); 545 nf_ct_ext_free(ct);
522 kmem_cache_free(nf_conntrack_cachep, ct); 546 kmem_cache_free(nf_conntrack_cachep, ct);
523 atomic_dec(&net->ct.count);
524}
525
526void nf_conntrack_free(struct nf_conn *ct)
527{
528 nf_ct_ext_destroy(ct);
529 call_rcu(&ct->rcu, nf_conntrack_free_rcu);
530} 547}
531EXPORT_SYMBOL_GPL(nf_conntrack_free); 548EXPORT_SYMBOL_GPL(nf_conntrack_free);
532 549
@@ -592,7 +609,7 @@ init_conntrack(struct net *net,
592 } 609 }
593 610
594 /* Overload tuple linked list to put us in unconfirmed list. */ 611 /* Overload tuple linked list to put us in unconfirmed list. */
595 hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, 612 hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
596 &net->ct.unconfirmed); 613 &net->ct.unconfirmed);
597 614
598 spin_unlock_bh(&nf_conntrack_lock); 615 spin_unlock_bh(&nf_conntrack_lock);
@@ -726,13 +743,15 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
726 NF_CT_ASSERT(skb->nfct); 743 NF_CT_ASSERT(skb->nfct);
727 744
728 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); 745 ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum);
729 if (ret < 0) { 746 if (ret <= 0) {
730 /* Invalid: inverse of the return code tells 747 /* Invalid: inverse of the return code tells
731 * the netfilter core what to do */ 748 * the netfilter core what to do */
732 pr_debug("nf_conntrack_in: Can't track with proto module\n"); 749 pr_debug("nf_conntrack_in: Can't track with proto module\n");
733 nf_conntrack_put(skb->nfct); 750 nf_conntrack_put(skb->nfct);
734 skb->nfct = NULL; 751 skb->nfct = NULL;
735 NF_CT_STAT_INC_ATOMIC(net, invalid); 752 NF_CT_STAT_INC_ATOMIC(net, invalid);
753 if (ret == -NF_DROP)
754 NF_CT_STAT_INC_ATOMIC(net, drop);
736 return -ret; 755 return -ret;
737 } 756 }
738 757
@@ -902,6 +921,12 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
902 return 0; 921 return 0;
903} 922}
904EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); 923EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple);
924
925int nf_ct_port_nlattr_tuple_size(void)
926{
927 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
928}
929EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size);
905#endif 930#endif
906 931
907/* Used by ipt_REJECT and ip6t_REJECT. */ 932/* Used by ipt_REJECT and ip6t_REJECT. */
@@ -930,17 +955,17 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
930{ 955{
931 struct nf_conntrack_tuple_hash *h; 956 struct nf_conntrack_tuple_hash *h;
932 struct nf_conn *ct; 957 struct nf_conn *ct;
933 struct hlist_node *n; 958 struct hlist_nulls_node *n;
934 959
935 spin_lock_bh(&nf_conntrack_lock); 960 spin_lock_bh(&nf_conntrack_lock);
936 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { 961 for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
937 hlist_for_each_entry(h, n, &net->ct.hash[*bucket], hnode) { 962 hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
938 ct = nf_ct_tuplehash_to_ctrack(h); 963 ct = nf_ct_tuplehash_to_ctrack(h);
939 if (iter(ct, data)) 964 if (iter(ct, data))
940 goto found; 965 goto found;
941 } 966 }
942 } 967 }
943 hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) { 968 hlist_nulls_for_each_entry(h, n, &net->ct.unconfirmed, hnnode) {
944 ct = nf_ct_tuplehash_to_ctrack(h); 969 ct = nf_ct_tuplehash_to_ctrack(h);
945 if (iter(ct, data)) 970 if (iter(ct, data))
946 set_bit(IPS_DYING_BIT, &ct->status); 971 set_bit(IPS_DYING_BIT, &ct->status);
@@ -988,7 +1013,7 @@ static int kill_all(struct nf_conn *i, void *data)
988 return 1; 1013 return 1;
989} 1014}
990 1015
991void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size) 1016void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
992{ 1017{
993 if (vmalloced) 1018 if (vmalloced)
994 vfree(hash); 1019 vfree(hash);
@@ -1056,26 +1081,28 @@ void nf_conntrack_cleanup(struct net *net)
1056 } 1081 }
1057} 1082}
1058 1083
1059struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced) 1084void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
1060{ 1085{
1061 struct hlist_head *hash; 1086 struct hlist_nulls_head *hash;
1062 unsigned int size, i; 1087 unsigned int nr_slots, i;
1088 size_t sz;
1063 1089
1064 *vmalloced = 0; 1090 *vmalloced = 0;
1065 1091
1066 size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_head)); 1092 BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
1067 hash = (void*)__get_free_pages(GFP_KERNEL|__GFP_NOWARN, 1093 nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
1068 get_order(sizeof(struct hlist_head) 1094 sz = nr_slots * sizeof(struct hlist_nulls_head);
1069 * size)); 1095 hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
1096 get_order(sz));
1070 if (!hash) { 1097 if (!hash) {
1071 *vmalloced = 1; 1098 *vmalloced = 1;
1072 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); 1099 printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
1073 hash = vmalloc(sizeof(struct hlist_head) * size); 1100 hash = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
1074 } 1101 }
1075 1102
1076 if (hash) 1103 if (hash && nulls)
1077 for (i = 0; i < size; i++) 1104 for (i = 0; i < nr_slots; i++)
1078 INIT_HLIST_HEAD(&hash[i]); 1105 INIT_HLIST_NULLS_HEAD(&hash[i], i);
1079 1106
1080 return hash; 1107 return hash;
1081} 1108}
@@ -1086,7 +1113,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1086 int i, bucket, vmalloced, old_vmalloced; 1113 int i, bucket, vmalloced, old_vmalloced;
1087 unsigned int hashsize, old_size; 1114 unsigned int hashsize, old_size;
1088 int rnd; 1115 int rnd;
1089 struct hlist_head *hash, *old_hash; 1116 struct hlist_nulls_head *hash, *old_hash;
1090 struct nf_conntrack_tuple_hash *h; 1117 struct nf_conntrack_tuple_hash *h;
1091 1118
1092 /* On boot, we can set this without any fancy locking. */ 1119 /* On boot, we can set this without any fancy locking. */
@@ -1097,13 +1124,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1097 if (!hashsize) 1124 if (!hashsize)
1098 return -EINVAL; 1125 return -EINVAL;
1099 1126
1100 hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced); 1127 hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1);
1101 if (!hash) 1128 if (!hash)
1102 return -ENOMEM; 1129 return -ENOMEM;
1103 1130
1104 /* We have to rehahs for the new table anyway, so we also can 1131 /* We have to rehahs for the new table anyway, so we also can
1105 * use a newrandom seed */ 1132 * use a newrandom seed */
1106 get_random_bytes(&rnd, 4); 1133 get_random_bytes(&rnd, sizeof(rnd));
1107 1134
1108 /* Lookups in the old hash might happen in parallel, which means we 1135 /* Lookups in the old hash might happen in parallel, which means we
1109 * might get false negatives during connection lookup. New connections 1136 * might get false negatives during connection lookup. New connections
@@ -1112,12 +1139,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1112 */ 1139 */
1113 spin_lock_bh(&nf_conntrack_lock); 1140 spin_lock_bh(&nf_conntrack_lock);
1114 for (i = 0; i < nf_conntrack_htable_size; i++) { 1141 for (i = 0; i < nf_conntrack_htable_size; i++) {
1115 while (!hlist_empty(&init_net.ct.hash[i])) { 1142 while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
1116 h = hlist_entry(init_net.ct.hash[i].first, 1143 h = hlist_nulls_entry(init_net.ct.hash[i].first,
1117 struct nf_conntrack_tuple_hash, hnode); 1144 struct nf_conntrack_tuple_hash, hnnode);
1118 hlist_del_rcu(&h->hnode); 1145 hlist_nulls_del_rcu(&h->hnnode);
1119 bucket = __hash_conntrack(&h->tuple, hashsize, rnd); 1146 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1120 hlist_add_head(&h->hnode, &hash[bucket]); 1147 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
1121 } 1148 }
1122 } 1149 }
1123 old_size = nf_conntrack_htable_size; 1150 old_size = nf_conntrack_htable_size;
@@ -1168,7 +1195,7 @@ static int nf_conntrack_init_init_net(void)
1168 1195
1169 nf_conntrack_cachep = kmem_cache_create("nf_conntrack", 1196 nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
1170 sizeof(struct nf_conn), 1197 sizeof(struct nf_conn),
1171 0, 0, NULL); 1198 0, SLAB_DESTROY_BY_RCU, NULL);
1172 if (!nf_conntrack_cachep) { 1199 if (!nf_conntrack_cachep) {
1173 printk(KERN_ERR "Unable to create nf_conn slab cache\n"); 1200 printk(KERN_ERR "Unable to create nf_conn slab cache\n");
1174 ret = -ENOMEM; 1201 ret = -ENOMEM;
@@ -1198,7 +1225,7 @@ static int nf_conntrack_init_net(struct net *net)
1198 int ret; 1225 int ret;
1199 1226
1200 atomic_set(&net->ct.count, 0); 1227 atomic_set(&net->ct.count, 0);
1201 INIT_HLIST_HEAD(&net->ct.unconfirmed); 1228 INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0);
1202 net->ct.stat = alloc_percpu(struct ip_conntrack_stat); 1229 net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
1203 if (!net->ct.stat) { 1230 if (!net->ct.stat) {
1204 ret = -ENOMEM; 1231 ret = -ENOMEM;
@@ -1208,7 +1235,7 @@ static int nf_conntrack_init_net(struct net *net)
1208 if (ret < 0) 1235 if (ret < 0)
1209 goto err_ecache; 1236 goto err_ecache;
1210 net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1237 net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
1211 &net->ct.hash_vmalloc); 1238 &net->ct.hash_vmalloc, 1);
1212 if (!net->ct.hash) { 1239 if (!net->ct.hash) {
1213 ret = -ENOMEM; 1240 ret = -ENOMEM;
1214 printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); 1241 printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 3a8a34a6d37c..3940f996a2e4 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -72,7 +72,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
72 unsigned int hash; 72 unsigned int hash;
73 73
74 if (unlikely(!nf_ct_expect_hash_rnd_initted)) { 74 if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
75 get_random_bytes(&nf_ct_expect_hash_rnd, 4); 75 get_random_bytes(&nf_ct_expect_hash_rnd,
76 sizeof(nf_ct_expect_hash_rnd));
76 nf_ct_expect_hash_rnd_initted = 1; 77 nf_ct_expect_hash_rnd_initted = 1;
77 } 78 }
78 79
@@ -603,7 +604,7 @@ int nf_conntrack_expect_init(struct net *net)
603 604
604 net->ct.expect_count = 0; 605 net->ct.expect_count = 0;
605 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 606 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
606 &net->ct.expect_vmalloc); 607 &net->ct.expect_vmalloc, 0);
607 if (net->ct.expect_hash == NULL) 608 if (net->ct.expect_hash == NULL)
608 goto err1; 609 goto err1;
609 610
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 687bd633c3d7..66369490230e 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1167,7 +1167,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
1167 .name = "Q.931", 1167 .name = "Q.931",
1168 .me = THIS_MODULE, 1168 .me = THIS_MODULE,
1169 .tuple.src.l3num = AF_INET, 1169 .tuple.src.l3num = AF_INET,
1170 .tuple.src.u.tcp.port = __constant_htons(Q931_PORT), 1170 .tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT),
1171 .tuple.dst.protonum = IPPROTO_TCP, 1171 .tuple.dst.protonum = IPPROTO_TCP,
1172 .help = q931_help, 1172 .help = q931_help,
1173 .expect_policy = &q931_exp_policy, 1173 .expect_policy = &q931_exp_policy,
@@ -1176,7 +1176,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = {
1176 .name = "Q.931", 1176 .name = "Q.931",
1177 .me = THIS_MODULE, 1177 .me = THIS_MODULE,
1178 .tuple.src.l3num = AF_INET6, 1178 .tuple.src.l3num = AF_INET6,
1179 .tuple.src.u.tcp.port = __constant_htons(Q931_PORT), 1179 .tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT),
1180 .tuple.dst.protonum = IPPROTO_TCP, 1180 .tuple.dst.protonum = IPPROTO_TCP,
1181 .help = q931_help, 1181 .help = q931_help,
1182 .expect_policy = &q931_exp_policy, 1182 .expect_policy = &q931_exp_policy,
@@ -1741,7 +1741,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
1741 .name = "RAS", 1741 .name = "RAS",
1742 .me = THIS_MODULE, 1742 .me = THIS_MODULE,
1743 .tuple.src.l3num = AF_INET, 1743 .tuple.src.l3num = AF_INET,
1744 .tuple.src.u.udp.port = __constant_htons(RAS_PORT), 1744 .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT),
1745 .tuple.dst.protonum = IPPROTO_UDP, 1745 .tuple.dst.protonum = IPPROTO_UDP,
1746 .help = ras_help, 1746 .help = ras_help,
1747 .expect_policy = &ras_exp_policy, 1747 .expect_policy = &ras_exp_policy,
@@ -1750,7 +1750,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = {
1750 .name = "RAS", 1750 .name = "RAS",
1751 .me = THIS_MODULE, 1751 .me = THIS_MODULE,
1752 .tuple.src.l3num = AF_INET6, 1752 .tuple.src.l3num = AF_INET6,
1753 .tuple.src.u.udp.port = __constant_htons(RAS_PORT), 1753 .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT),
1754 .tuple.dst.protonum = IPPROTO_UDP, 1754 .tuple.dst.protonum = IPPROTO_UDP,
1755 .help = ras_help, 1755 .help = ras_help,
1756 .expect_policy = &ras_exp_policy, 1756 .expect_policy = &ras_exp_policy,
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index a51bdac9f3a0..30b8e9009f99 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -142,6 +142,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
142 142
143 BUG_ON(me->expect_policy == NULL); 143 BUG_ON(me->expect_policy == NULL);
144 BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); 144 BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
145 BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
145 146
146 mutex_lock(&nf_ct_helper_mutex); 147 mutex_lock(&nf_ct_helper_mutex);
147 hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); 148 hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]);
@@ -158,6 +159,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
158 struct nf_conntrack_tuple_hash *h; 159 struct nf_conntrack_tuple_hash *h;
159 struct nf_conntrack_expect *exp; 160 struct nf_conntrack_expect *exp;
160 const struct hlist_node *n, *next; 161 const struct hlist_node *n, *next;
162 const struct hlist_nulls_node *nn;
161 unsigned int i; 163 unsigned int i;
162 164
163 /* Get rid of expectations */ 165 /* Get rid of expectations */
@@ -174,10 +176,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
174 } 176 }
175 177
176 /* Get rid of expecteds, set helpers to NULL. */ 178 /* Get rid of expecteds, set helpers to NULL. */
177 hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) 179 hlist_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
178 unhelp(h, me); 180 unhelp(h, me);
179 for (i = 0; i < nf_conntrack_htable_size; i++) { 181 for (i = 0; i < nf_conntrack_htable_size; i++) {
180 hlist_for_each_entry(h, n, &net->ct.hash[i], hnode) 182 hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
181 unhelp(h, me); 183 unhelp(h, me);
182 } 184 }
183} 185}
@@ -217,7 +219,7 @@ int nf_conntrack_helper_init(void)
217 219
218 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ 220 nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
219 nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 221 nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,
220 &nf_ct_helper_vmalloc); 222 &nf_ct_helper_vmalloc, 0);
221 if (!nf_ct_helper_hash) 223 if (!nf_ct_helper_hash)
222 return -ENOMEM; 224 return -ENOMEM;
223 225
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 5af4273b4668..8a3875e36ec2 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -105,7 +105,7 @@ static struct nf_conntrack_expect_policy exp_policy = {
105static struct nf_conntrack_helper helper __read_mostly = { 105static struct nf_conntrack_helper helper __read_mostly = {
106 .name = "netbios-ns", 106 .name = "netbios-ns",
107 .tuple.src.l3num = AF_INET, 107 .tuple.src.l3num = AF_INET,
108 .tuple.src.u.udp.port = __constant_htons(NMBD_PORT), 108 .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT),
109 .tuple.dst.protonum = IPPROTO_UDP, 109 .tuple.dst.protonum = IPPROTO_UDP,
110 .me = THIS_MODULE, 110 .me = THIS_MODULE,
111 .help = help, 111 .help = help,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index cb78aa00399e..c6439c77953c 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -19,6 +19,7 @@
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/kernel.h> 20#include <linux/kernel.h>
21#include <linux/rculist.h> 21#include <linux/rculist.h>
22#include <linux/rculist_nulls.h>
22#include <linux/types.h> 23#include <linux/types.h>
23#include <linux/timer.h> 24#include <linux/timer.h>
24#include <linux/skbuff.h> 25#include <linux/skbuff.h>
@@ -404,6 +405,78 @@ nla_put_failure:
404} 405}
405 406
406#ifdef CONFIG_NF_CONNTRACK_EVENTS 407#ifdef CONFIG_NF_CONNTRACK_EVENTS
408/*
409 * The general structure of a ctnetlink event is
410 *
411 * CTA_TUPLE_ORIG
412 * <l3/l4-proto-attributes>
413 * CTA_TUPLE_REPLY
414 * <l3/l4-proto-attributes>
415 * CTA_ID
416 * ...
417 * CTA_PROTOINFO
418 * <l4-proto-attributes>
419 * CTA_TUPLE_MASTER
420 * <l3/l4-proto-attributes>
421 *
422 * Therefore the formular is
423 *
424 * size = sizeof(headers) + sizeof(generic_nlas) + 3 * sizeof(tuple_nlas)
425 * + sizeof(protoinfo_nlas)
426 */
427static struct sk_buff *
428ctnetlink_alloc_skb(const struct nf_conntrack_tuple *tuple, gfp_t gfp)
429{
430 struct nf_conntrack_l3proto *l3proto;
431 struct nf_conntrack_l4proto *l4proto;
432 int len;
433
434#define NLA_TYPE_SIZE(type) nla_total_size(sizeof(type))
435
436 /* proto independant part */
437 len = NLMSG_SPACE(sizeof(struct nfgenmsg))
438 + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
439 + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
440 + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */
441 + 3 * NLA_TYPE_SIZE(u_int8_t) /* CTA_PROTO_NUM */
442 + NLA_TYPE_SIZE(u_int32_t) /* CTA_ID */
443 + NLA_TYPE_SIZE(u_int32_t) /* CTA_STATUS */
444#ifdef CONFIG_NF_CT_ACCT
445 + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
446 + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_PACKETS */
447 + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_BYTES */
448#endif
449 + NLA_TYPE_SIZE(u_int32_t) /* CTA_TIMEOUT */
450 + nla_total_size(0) /* CTA_PROTOINFO */
451 + nla_total_size(0) /* CTA_HELP */
452 + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
453#ifdef CONFIG_NF_CONNTRACK_SECMARK
454 + NLA_TYPE_SIZE(u_int32_t) /* CTA_SECMARK */
455#endif
456#ifdef CONFIG_NF_NAT_NEEDED
457 + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
458 + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_POS */
459 + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_BEFORE */
460 + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_AFTER */
461#endif
462#ifdef CONFIG_NF_CONNTRACK_MARK
463 + NLA_TYPE_SIZE(u_int32_t) /* CTA_MARK */
464#endif
465 ;
466
467#undef NLA_TYPE_SIZE
468
469 rcu_read_lock();
470 l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
471 len += l3proto->nla_size;
472
473 l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum);
474 len += l4proto->nla_size;
475 rcu_read_unlock();
476
477 return alloc_skb(len, gfp);
478}
479
407static int ctnetlink_conntrack_event(struct notifier_block *this, 480static int ctnetlink_conntrack_event(struct notifier_block *this,
408 unsigned long events, void *ptr) 481 unsigned long events, void *ptr)
409{ 482{
@@ -437,7 +510,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
437 if (!item->report && !nfnetlink_has_listeners(group)) 510 if (!item->report && !nfnetlink_has_listeners(group))
438 return NOTIFY_DONE; 511 return NOTIFY_DONE;
439 512
440 skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); 513 skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC);
441 if (!skb) 514 if (!skb)
442 return NOTIFY_DONE; 515 return NOTIFY_DONE;
443 516
@@ -518,6 +591,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
518nla_put_failure: 591nla_put_failure:
519 rcu_read_unlock(); 592 rcu_read_unlock();
520nlmsg_failure: 593nlmsg_failure:
594 nfnetlink_set_err(0, group, -ENOBUFS);
521 kfree_skb(skb); 595 kfree_skb(skb);
522 return NOTIFY_DONE; 596 return NOTIFY_DONE;
523} 597}
@@ -535,7 +609,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
535{ 609{
536 struct nf_conn *ct, *last; 610 struct nf_conn *ct, *last;
537 struct nf_conntrack_tuple_hash *h; 611 struct nf_conntrack_tuple_hash *h;
538 struct hlist_node *n; 612 struct hlist_nulls_node *n;
539 struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); 613 struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
540 u_int8_t l3proto = nfmsg->nfgen_family; 614 u_int8_t l3proto = nfmsg->nfgen_family;
541 615
@@ -543,27 +617,27 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
543 last = (struct nf_conn *)cb->args[1]; 617 last = (struct nf_conn *)cb->args[1];
544 for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { 618 for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
545restart: 619restart:
546 hlist_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], 620 hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
547 hnode) { 621 hnnode) {
548 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) 622 if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL)
549 continue; 623 continue;
550 ct = nf_ct_tuplehash_to_ctrack(h); 624 ct = nf_ct_tuplehash_to_ctrack(h);
625 if (!atomic_inc_not_zero(&ct->ct_general.use))
626 continue;
551 /* Dump entries of a given L3 protocol number. 627 /* Dump entries of a given L3 protocol number.
552 * If it is not specified, ie. l3proto == 0, 628 * If it is not specified, ie. l3proto == 0,
553 * then dump everything. */ 629 * then dump everything. */
554 if (l3proto && nf_ct_l3num(ct) != l3proto) 630 if (l3proto && nf_ct_l3num(ct) != l3proto)
555 continue; 631 goto releasect;
556 if (cb->args[1]) { 632 if (cb->args[1]) {
557 if (ct != last) 633 if (ct != last)
558 continue; 634 goto releasect;
559 cb->args[1] = 0; 635 cb->args[1] = 0;
560 } 636 }
561 if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, 637 if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
562 cb->nlh->nlmsg_seq, 638 cb->nlh->nlmsg_seq,
563 IPCTNL_MSG_CT_NEW, 639 IPCTNL_MSG_CT_NEW,
564 1, ct) < 0) { 640 1, ct) < 0) {
565 if (!atomic_inc_not_zero(&ct->ct_general.use))
566 continue;
567 cb->args[1] = (unsigned long)ct; 641 cb->args[1] = (unsigned long)ct;
568 goto out; 642 goto out;
569 } 643 }
@@ -576,6 +650,8 @@ restart:
576 if (acct) 650 if (acct)
577 memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); 651 memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX]));
578 } 652 }
653releasect:
654 nf_ct_put(ct);
579 } 655 }
580 if (cb->args[1]) { 656 if (cb->args[1]) {
581 cb->args[1] = 0; 657 cb->args[1] = 0;
@@ -599,7 +675,8 @@ ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple)
599 675
600 nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); 676 nla_parse_nested(tb, CTA_IP_MAX, attr, NULL);
601 677
602 l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); 678 rcu_read_lock();
679 l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
603 680
604 if (likely(l3proto->nlattr_to_tuple)) { 681 if (likely(l3proto->nlattr_to_tuple)) {
605 ret = nla_validate_nested(attr, CTA_IP_MAX, 682 ret = nla_validate_nested(attr, CTA_IP_MAX,
@@ -608,7 +685,7 @@ ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple)
608 ret = l3proto->nlattr_to_tuple(tb, tuple); 685 ret = l3proto->nlattr_to_tuple(tb, tuple);
609 } 686 }
610 687
611 nf_ct_l3proto_put(l3proto); 688 rcu_read_unlock();
612 689
613 return ret; 690 return ret;
614} 691}
@@ -633,7 +710,8 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
633 return -EINVAL; 710 return -EINVAL;
634 tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]); 711 tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
635 712
636 l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); 713 rcu_read_lock();
714 l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum);
637 715
638 if (likely(l4proto->nlattr_to_tuple)) { 716 if (likely(l4proto->nlattr_to_tuple)) {
639 ret = nla_validate_nested(attr, CTA_PROTO_MAX, 717 ret = nla_validate_nested(attr, CTA_PROTO_MAX,
@@ -642,7 +720,7 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
642 ret = l4proto->nlattr_to_tuple(tb, tuple); 720 ret = l4proto->nlattr_to_tuple(tb, tuple);
643 } 721 }
644 722
645 nf_ct_l4proto_put(l4proto); 723 rcu_read_unlock();
646 724
647 return ret; 725 return ret;
648} 726}
@@ -989,10 +1067,11 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[])
989 1067
990 nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, NULL); 1068 nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, NULL);
991 1069
992 l4proto = nf_ct_l4proto_find_get(nf_ct_l3num(ct), nf_ct_protonum(ct)); 1070 rcu_read_lock();
1071 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
993 if (l4proto->from_nlattr) 1072 if (l4proto->from_nlattr)
994 err = l4proto->from_nlattr(tb, ct); 1073 err = l4proto->from_nlattr(tb, ct);
995 nf_ct_l4proto_put(l4proto); 1074 rcu_read_unlock();
996 1075
997 return err; 1076 return err;
998} 1077}
@@ -1062,6 +1141,10 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[])
1062{ 1141{
1063 int err; 1142 int err;
1064 1143
1144 /* only allow NAT changes and master assignation for new conntracks */
1145 if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST] || cda[CTA_TUPLE_MASTER])
1146 return -EOPNOTSUPP;
1147
1065 if (cda[CTA_HELP]) { 1148 if (cda[CTA_HELP]) {
1066 err = ctnetlink_change_helper(ct, cda); 1149 err = ctnetlink_change_helper(ct, cda);
1067 if (err < 0) 1150 if (err < 0)
@@ -1124,13 +1207,11 @@ ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report)
1124 report); 1207 report);
1125} 1208}
1126 1209
1127static int 1210static struct nf_conn *
1128ctnetlink_create_conntrack(struct nlattr *cda[], 1211ctnetlink_create_conntrack(struct nlattr *cda[],
1129 struct nf_conntrack_tuple *otuple, 1212 struct nf_conntrack_tuple *otuple,
1130 struct nf_conntrack_tuple *rtuple, 1213 struct nf_conntrack_tuple *rtuple,
1131 struct nf_conn *master_ct, 1214 u8 u3)
1132 u32 pid,
1133 int report)
1134{ 1215{
1135 struct nf_conn *ct; 1216 struct nf_conn *ct;
1136 int err = -EINVAL; 1217 int err = -EINVAL;
@@ -1138,10 +1219,10 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
1138 1219
1139 ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC); 1220 ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC);
1140 if (IS_ERR(ct)) 1221 if (IS_ERR(ct))
1141 return -ENOMEM; 1222 return ERR_PTR(-ENOMEM);
1142 1223
1143 if (!cda[CTA_TIMEOUT]) 1224 if (!cda[CTA_TIMEOUT])
1144 goto err; 1225 goto err1;
1145 ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); 1226 ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT]));
1146 1227
1147 ct->timeout.expires = jiffies + ct->timeout.expires * HZ; 1228 ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
@@ -1152,10 +1233,8 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
1152 char *helpname; 1233 char *helpname;
1153 1234
1154 err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); 1235 err = ctnetlink_parse_help(cda[CTA_HELP], &helpname);
1155 if (err < 0) { 1236 if (err < 0)
1156 rcu_read_unlock(); 1237 goto err2;
1157 goto err;
1158 }
1159 1238
1160 helper = __nf_conntrack_helper_find_byname(helpname); 1239 helper = __nf_conntrack_helper_find_byname(helpname);
1161 if (helper == NULL) { 1240 if (helper == NULL) {
@@ -1163,28 +1242,26 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
1163#ifdef CONFIG_MODULES 1242#ifdef CONFIG_MODULES
1164 if (request_module("nfct-helper-%s", helpname) < 0) { 1243 if (request_module("nfct-helper-%s", helpname) < 0) {
1165 err = -EOPNOTSUPP; 1244 err = -EOPNOTSUPP;
1166 goto err; 1245 goto err1;
1167 } 1246 }
1168 1247
1169 rcu_read_lock(); 1248 rcu_read_lock();
1170 helper = __nf_conntrack_helper_find_byname(helpname); 1249 helper = __nf_conntrack_helper_find_byname(helpname);
1171 if (helper) { 1250 if (helper) {
1172 rcu_read_unlock();
1173 err = -EAGAIN; 1251 err = -EAGAIN;
1174 goto err; 1252 goto err2;
1175 } 1253 }
1176 rcu_read_unlock(); 1254 rcu_read_unlock();
1177#endif 1255#endif
1178 err = -EOPNOTSUPP; 1256 err = -EOPNOTSUPP;
1179 goto err; 1257 goto err1;
1180 } else { 1258 } else {
1181 struct nf_conn_help *help; 1259 struct nf_conn_help *help;
1182 1260
1183 help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); 1261 help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
1184 if (help == NULL) { 1262 if (help == NULL) {
1185 rcu_read_unlock();
1186 err = -ENOMEM; 1263 err = -ENOMEM;
1187 goto err; 1264 goto err2;
1188 } 1265 }
1189 1266
1190 /* not in hash table yet so not strictly necessary */ 1267 /* not in hash table yet so not strictly necessary */
@@ -1193,44 +1270,34 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
1193 } else { 1270 } else {
1194 /* try an implicit helper assignation */ 1271 /* try an implicit helper assignation */
1195 err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC); 1272 err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC);
1196 if (err < 0) { 1273 if (err < 0)
1197 rcu_read_unlock(); 1274 goto err2;
1198 goto err;
1199 }
1200 } 1275 }
1201 1276
1202 if (cda[CTA_STATUS]) { 1277 if (cda[CTA_STATUS]) {
1203 err = ctnetlink_change_status(ct, cda); 1278 err = ctnetlink_change_status(ct, cda);
1204 if (err < 0) { 1279 if (err < 0)
1205 rcu_read_unlock(); 1280 goto err2;
1206 goto err;
1207 }
1208 } 1281 }
1209 1282
1210 if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { 1283 if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
1211 err = ctnetlink_change_nat(ct, cda); 1284 err = ctnetlink_change_nat(ct, cda);
1212 if (err < 0) { 1285 if (err < 0)
1213 rcu_read_unlock(); 1286 goto err2;
1214 goto err;
1215 }
1216 } 1287 }
1217 1288
1218#ifdef CONFIG_NF_NAT_NEEDED 1289#ifdef CONFIG_NF_NAT_NEEDED
1219 if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) { 1290 if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) {
1220 err = ctnetlink_change_nat_seq_adj(ct, cda); 1291 err = ctnetlink_change_nat_seq_adj(ct, cda);
1221 if (err < 0) { 1292 if (err < 0)
1222 rcu_read_unlock(); 1293 goto err2;
1223 goto err;
1224 }
1225 } 1294 }
1226#endif 1295#endif
1227 1296
1228 if (cda[CTA_PROTOINFO]) { 1297 if (cda[CTA_PROTOINFO]) {
1229 err = ctnetlink_change_protoinfo(ct, cda); 1298 err = ctnetlink_change_protoinfo(ct, cda);
1230 if (err < 0) { 1299 if (err < 0)
1231 rcu_read_unlock(); 1300 goto err2;
1232 goto err;
1233 }
1234 } 1301 }
1235 1302
1236 nf_ct_acct_ext_add(ct, GFP_ATOMIC); 1303 nf_ct_acct_ext_add(ct, GFP_ATOMIC);
@@ -1241,23 +1308,36 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
1241#endif 1308#endif
1242 1309
1243 /* setup master conntrack: this is a confirmed expectation */ 1310 /* setup master conntrack: this is a confirmed expectation */
1244 if (master_ct) { 1311 if (cda[CTA_TUPLE_MASTER]) {
1312 struct nf_conntrack_tuple master;
1313 struct nf_conntrack_tuple_hash *master_h;
1314 struct nf_conn *master_ct;
1315
1316 err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, u3);
1317 if (err < 0)
1318 goto err2;
1319
1320 master_h = nf_conntrack_find_get(&init_net, &master);
1321 if (master_h == NULL) {
1322 err = -ENOENT;
1323 goto err2;
1324 }
1325 master_ct = nf_ct_tuplehash_to_ctrack(master_h);
1245 __set_bit(IPS_EXPECTED_BIT, &ct->status); 1326 __set_bit(IPS_EXPECTED_BIT, &ct->status);
1246 ct->master = master_ct; 1327 ct->master = master_ct;
1247 } 1328 }
1248 1329
1249 nf_conntrack_get(&ct->ct_general);
1250 add_timer(&ct->timeout); 1330 add_timer(&ct->timeout);
1251 nf_conntrack_hash_insert(ct); 1331 nf_conntrack_hash_insert(ct);
1252 rcu_read_unlock(); 1332 rcu_read_unlock();
1253 ctnetlink_event_report(ct, pid, report);
1254 nf_ct_put(ct);
1255 1333
1256 return 0; 1334 return ct;
1257 1335
1258err: 1336err2:
1337 rcu_read_unlock();
1338err1:
1259 nf_conntrack_free(ct); 1339 nf_conntrack_free(ct);
1260 return err; 1340 return ERR_PTR(err);
1261} 1341}
1262 1342
1263static int 1343static int
@@ -1289,38 +1369,25 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1289 h = __nf_conntrack_find(&init_net, &rtuple); 1369 h = __nf_conntrack_find(&init_net, &rtuple);
1290 1370
1291 if (h == NULL) { 1371 if (h == NULL) {
1292 struct nf_conntrack_tuple master; 1372 err = -ENOENT;
1293 struct nf_conntrack_tuple_hash *master_h = NULL; 1373 if (nlh->nlmsg_flags & NLM_F_CREATE) {
1294 struct nf_conn *master_ct = NULL; 1374 struct nf_conn *ct;
1295
1296 if (cda[CTA_TUPLE_MASTER]) {
1297 err = ctnetlink_parse_tuple(cda,
1298 &master,
1299 CTA_TUPLE_MASTER,
1300 u3);
1301 if (err < 0)
1302 goto out_unlock;
1303 1375
1304 master_h = __nf_conntrack_find(&init_net, &master); 1376 ct = ctnetlink_create_conntrack(cda, &otuple,
1305 if (master_h == NULL) { 1377 &rtuple, u3);
1306 err = -ENOENT; 1378 if (IS_ERR(ct)) {
1379 err = PTR_ERR(ct);
1307 goto out_unlock; 1380 goto out_unlock;
1308 } 1381 }
1309 master_ct = nf_ct_tuplehash_to_ctrack(master_h); 1382 err = 0;
1310 nf_conntrack_get(&master_ct->ct_general); 1383 nf_conntrack_get(&ct->ct_general);
1311 } 1384 spin_unlock_bh(&nf_conntrack_lock);
1312 1385 ctnetlink_event_report(ct,
1313 err = -ENOENT; 1386 NETLINK_CB(skb).pid,
1314 if (nlh->nlmsg_flags & NLM_F_CREATE) 1387 nlmsg_report(nlh));
1315 err = ctnetlink_create_conntrack(cda, 1388 nf_ct_put(ct);
1316 &otuple, 1389 } else
1317 &rtuple, 1390 spin_unlock_bh(&nf_conntrack_lock);
1318 master_ct,
1319 NETLINK_CB(skb).pid,
1320 nlmsg_report(nlh));
1321 spin_unlock_bh(&nf_conntrack_lock);
1322 if (err < 0 && master_ct)
1323 nf_ct_put(master_ct);
1324 1391
1325 return err; 1392 return err;
1326 } 1393 }
@@ -1332,17 +1399,6 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
1332 if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { 1399 if (!(nlh->nlmsg_flags & NLM_F_EXCL)) {
1333 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 1400 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
1334 1401
1335 /* we only allow nat config for new conntracks */
1336 if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
1337 err = -EOPNOTSUPP;
1338 goto out_unlock;
1339 }
1340 /* can't link an existing conntrack to a master */
1341 if (cda[CTA_TUPLE_MASTER]) {
1342 err = -EOPNOTSUPP;
1343 goto out_unlock;
1344 }
1345
1346 err = ctnetlink_change_conntrack(ct, cda); 1402 err = ctnetlink_change_conntrack(ct, cda);
1347 if (err == 0) { 1403 if (err == 0) {
1348 nf_conntrack_get(&ct->ct_general); 1404 nf_conntrack_get(&ct->ct_general);
@@ -1533,6 +1589,7 @@ static int ctnetlink_expect_event(struct notifier_block *this,
1533nla_put_failure: 1589nla_put_failure:
1534 rcu_read_unlock(); 1590 rcu_read_unlock();
1535nlmsg_failure: 1591nlmsg_failure:
1592 nfnetlink_set_err(0, 0, -ENOBUFS);
1536 kfree_skb(skb); 1593 kfree_skb(skb);
1537 return NOTIFY_DONE; 1594 return NOTIFY_DONE;
1538} 1595}
@@ -1780,6 +1837,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3, u32 pid, int report)
1780 goto out; 1837 goto out;
1781 } 1838 }
1782 1839
1840 exp->class = 0;
1783 exp->expectfn = NULL; 1841 exp->expectfn = NULL;
1784 exp->flags = 0; 1842 exp->flags = 0;
1785 exp->master = ct; 1843 exp->master = ct;
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 9e169ef2e854..3807ac7faf4c 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -66,7 +66,7 @@ void
66 struct nf_conntrack_expect *exp) __read_mostly; 66 struct nf_conntrack_expect *exp) __read_mostly;
67EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); 67EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn);
68 68
69#if defined(DEBUG) || defined(CONFIG_DYNAMIC_PRINTK_DEBUG) 69#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG)
70/* PptpControlMessageType names */ 70/* PptpControlMessageType names */
71const char *const pptp_msg_name[] = { 71const char *const pptp_msg_name[] = {
72 "UNKNOWN_MESSAGE", 72 "UNKNOWN_MESSAGE",
@@ -591,7 +591,7 @@ static struct nf_conntrack_helper pptp __read_mostly = {
591 .name = "pptp", 591 .name = "pptp",
592 .me = THIS_MODULE, 592 .me = THIS_MODULE,
593 .tuple.src.l3num = AF_INET, 593 .tuple.src.l3num = AF_INET,
594 .tuple.src.u.tcp.port = __constant_htons(PPTP_CONTROL_PORT), 594 .tuple.src.u.tcp.port = cpu_to_be16(PPTP_CONTROL_PORT),
595 .tuple.dst.protonum = IPPROTO_TCP, 595 .tuple.dst.protonum = IPPROTO_TCP,
596 .help = conntrack_pptp_help, 596 .help = conntrack_pptp_help,
597 .destroy = pptp_destroy_siblings, 597 .destroy = pptp_destroy_siblings,
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 592d73344d46..1a4568bf7ea5 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -74,27 +74,6 @@ EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
74 74
75/* this is guaranteed to always return a valid protocol helper, since 75/* this is guaranteed to always return a valid protocol helper, since
76 * it falls back to generic_protocol */ 76 * it falls back to generic_protocol */
77struct nf_conntrack_l4proto *
78nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto)
79{
80 struct nf_conntrack_l4proto *p;
81
82 rcu_read_lock();
83 p = __nf_ct_l4proto_find(l3proto, l4proto);
84 if (!try_module_get(p->me))
85 p = &nf_conntrack_l4proto_generic;
86 rcu_read_unlock();
87
88 return p;
89}
90EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
91
92void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p)
93{
94 module_put(p->me);
95}
96EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
97
98struct nf_conntrack_l3proto * 77struct nf_conntrack_l3proto *
99nf_ct_l3proto_find_get(u_int16_t l3proto) 78nf_ct_l3proto_find_get(u_int16_t l3proto)
100{ 79{
@@ -188,6 +167,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
188 if (proto->l3proto >= AF_MAX) 167 if (proto->l3proto >= AF_MAX)
189 return -EBUSY; 168 return -EBUSY;
190 169
170 if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size)
171 return -EINVAL;
172
191 mutex_lock(&nf_ct_proto_mutex); 173 mutex_lock(&nf_ct_proto_mutex);
192 if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) { 174 if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) {
193 ret = -EBUSY; 175 ret = -EBUSY;
@@ -198,6 +180,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto)
198 if (ret < 0) 180 if (ret < 0)
199 goto out_unlock; 181 goto out_unlock;
200 182
183 if (proto->nlattr_tuple_size)
184 proto->nla_size = 3 * proto->nlattr_tuple_size();
185
201 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto); 186 rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto);
202 187
203out_unlock: 188out_unlock:
@@ -284,6 +269,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
284 if (l4proto->l3proto >= PF_MAX) 269 if (l4proto->l3proto >= PF_MAX)
285 return -EBUSY; 270 return -EBUSY;
286 271
272 if ((l4proto->to_nlattr && !l4proto->nlattr_size)
273 || (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
274 return -EINVAL;
275
287 mutex_lock(&nf_ct_proto_mutex); 276 mutex_lock(&nf_ct_proto_mutex);
288 if (!nf_ct_protos[l4proto->l3proto]) { 277 if (!nf_ct_protos[l4proto->l3proto]) {
289 /* l3proto may be loaded latter. */ 278 /* l3proto may be loaded latter. */
@@ -311,6 +300,12 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto)
311 if (ret < 0) 300 if (ret < 0)
312 goto out_unlock; 301 goto out_unlock;
313 302
303 l4proto->nla_size = 0;
304 if (l4proto->nlattr_size)
305 l4proto->nla_size += l4proto->nlattr_size();
306 if (l4proto->nlattr_tuple_size)
307 l4proto->nla_size += 3 * l4proto->nlattr_tuple_size();
308
314 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], 309 rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto],
315 l4proto); 310 l4proto);
316 311
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 8fcf1762fabf..50dac8dbe7d8 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -16,6 +16,9 @@
16#include <linux/skbuff.h> 16#include <linux/skbuff.h>
17#include <linux/dccp.h> 17#include <linux/dccp.h>
18 18
19#include <net/net_namespace.h>
20#include <net/netns/generic.h>
21
19#include <linux/netfilter/nfnetlink_conntrack.h> 22#include <linux/netfilter/nfnetlink_conntrack.h>
20#include <net/netfilter/nf_conntrack.h> 23#include <net/netfilter/nf_conntrack.h>
21#include <net/netfilter/nf_conntrack_l4proto.h> 24#include <net/netfilter/nf_conntrack_l4proto.h>
@@ -23,8 +26,6 @@
23 26
24static DEFINE_RWLOCK(dccp_lock); 27static DEFINE_RWLOCK(dccp_lock);
25 28
26static int nf_ct_dccp_loose __read_mostly = 1;
27
28/* Timeouts are based on values from RFC4340: 29/* Timeouts are based on values from RFC4340:
29 * 30 *
30 * - REQUEST: 31 * - REQUEST:
@@ -72,16 +73,6 @@ static int nf_ct_dccp_loose __read_mostly = 1;
72 73
73#define DCCP_MSL (2 * 60 * HZ) 74#define DCCP_MSL (2 * 60 * HZ)
74 75
75static unsigned int dccp_timeout[CT_DCCP_MAX + 1] __read_mostly = {
76 [CT_DCCP_REQUEST] = 2 * DCCP_MSL,
77 [CT_DCCP_RESPOND] = 4 * DCCP_MSL,
78 [CT_DCCP_PARTOPEN] = 4 * DCCP_MSL,
79 [CT_DCCP_OPEN] = 12 * 3600 * HZ,
80 [CT_DCCP_CLOSEREQ] = 64 * HZ,
81 [CT_DCCP_CLOSING] = 64 * HZ,
82 [CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL,
83};
84
85static const char * const dccp_state_names[] = { 76static const char * const dccp_state_names[] = {
86 [CT_DCCP_NONE] = "NONE", 77 [CT_DCCP_NONE] = "NONE",
87 [CT_DCCP_REQUEST] = "REQUEST", 78 [CT_DCCP_REQUEST] = "REQUEST",
@@ -393,6 +384,22 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
393 }, 384 },
394}; 385};
395 386
387/* this module per-net specifics */
388static int dccp_net_id;
389struct dccp_net {
390 int dccp_loose;
391 unsigned int dccp_timeout[CT_DCCP_MAX + 1];
392#ifdef CONFIG_SYSCTL
393 struct ctl_table_header *sysctl_header;
394 struct ctl_table *sysctl_table;
395#endif
396};
397
398static inline struct dccp_net *dccp_pernet(struct net *net)
399{
400 return net_generic(net, dccp_net_id);
401}
402
396static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, 403static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
397 struct nf_conntrack_tuple *tuple) 404 struct nf_conntrack_tuple *tuple)
398{ 405{
@@ -419,6 +426,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
419 unsigned int dataoff) 426 unsigned int dataoff)
420{ 427{
421 struct net *net = nf_ct_net(ct); 428 struct net *net = nf_ct_net(ct);
429 struct dccp_net *dn;
422 struct dccp_hdr _dh, *dh; 430 struct dccp_hdr _dh, *dh;
423 const char *msg; 431 const char *msg;
424 u_int8_t state; 432 u_int8_t state;
@@ -429,7 +437,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
429 state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; 437 state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
430 switch (state) { 438 switch (state) {
431 default: 439 default:
432 if (nf_ct_dccp_loose == 0) { 440 dn = dccp_pernet(net);
441 if (dn->dccp_loose == 0) {
433 msg = "nf_ct_dccp: not picking up existing connection "; 442 msg = "nf_ct_dccp: not picking up existing connection ";
434 goto out_invalid; 443 goto out_invalid;
435 } 444 }
@@ -465,6 +474,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
465 u_int8_t pf, unsigned int hooknum) 474 u_int8_t pf, unsigned int hooknum)
466{ 475{
467 struct net *net = nf_ct_net(ct); 476 struct net *net = nf_ct_net(ct);
477 struct dccp_net *dn;
468 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); 478 enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
469 struct dccp_hdr _dh, *dh; 479 struct dccp_hdr _dh, *dh;
470 u_int8_t type, old_state, new_state; 480 u_int8_t type, old_state, new_state;
@@ -542,7 +552,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
542 ct->proto.dccp.last_pkt = type; 552 ct->proto.dccp.last_pkt = type;
543 ct->proto.dccp.state = new_state; 553 ct->proto.dccp.state = new_state;
544 write_unlock_bh(&dccp_lock); 554 write_unlock_bh(&dccp_lock);
545 nf_ct_refresh_acct(ct, ctinfo, skb, dccp_timeout[new_state]); 555
556 dn = dccp_pernet(net);
557 nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]);
546 558
547 return NF_ACCEPT; 559 return NF_ACCEPT;
548} 560}
@@ -657,16 +669,20 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
657 write_unlock_bh(&dccp_lock); 669 write_unlock_bh(&dccp_lock);
658 return 0; 670 return 0;
659} 671}
672
673static int dccp_nlattr_size(void)
674{
675 return nla_total_size(0) /* CTA_PROTOINFO_DCCP */
676 + nla_policy_len(dccp_nla_policy, CTA_PROTOINFO_DCCP_MAX + 1);
677}
660#endif 678#endif
661 679
662#ifdef CONFIG_SYSCTL 680#ifdef CONFIG_SYSCTL
663static unsigned int dccp_sysctl_table_users; 681/* template, data assigned later */
664static struct ctl_table_header *dccp_sysctl_header; 682static struct ctl_table dccp_sysctl_table[] = {
665static ctl_table dccp_sysctl_table[] = {
666 { 683 {
667 .ctl_name = CTL_UNNUMBERED, 684 .ctl_name = CTL_UNNUMBERED,
668 .procname = "nf_conntrack_dccp_timeout_request", 685 .procname = "nf_conntrack_dccp_timeout_request",
669 .data = &dccp_timeout[CT_DCCP_REQUEST],
670 .maxlen = sizeof(unsigned int), 686 .maxlen = sizeof(unsigned int),
671 .mode = 0644, 687 .mode = 0644,
672 .proc_handler = proc_dointvec_jiffies, 688 .proc_handler = proc_dointvec_jiffies,
@@ -674,7 +690,6 @@ static ctl_table dccp_sysctl_table[] = {
674 { 690 {
675 .ctl_name = CTL_UNNUMBERED, 691 .ctl_name = CTL_UNNUMBERED,
676 .procname = "nf_conntrack_dccp_timeout_respond", 692 .procname = "nf_conntrack_dccp_timeout_respond",
677 .data = &dccp_timeout[CT_DCCP_RESPOND],
678 .maxlen = sizeof(unsigned int), 693 .maxlen = sizeof(unsigned int),
679 .mode = 0644, 694 .mode = 0644,
680 .proc_handler = proc_dointvec_jiffies, 695 .proc_handler = proc_dointvec_jiffies,
@@ -682,7 +697,6 @@ static ctl_table dccp_sysctl_table[] = {
682 { 697 {
683 .ctl_name = CTL_UNNUMBERED, 698 .ctl_name = CTL_UNNUMBERED,
684 .procname = "nf_conntrack_dccp_timeout_partopen", 699 .procname = "nf_conntrack_dccp_timeout_partopen",
685 .data = &dccp_timeout[CT_DCCP_PARTOPEN],
686 .maxlen = sizeof(unsigned int), 700 .maxlen = sizeof(unsigned int),
687 .mode = 0644, 701 .mode = 0644,
688 .proc_handler = proc_dointvec_jiffies, 702 .proc_handler = proc_dointvec_jiffies,
@@ -690,7 +704,6 @@ static ctl_table dccp_sysctl_table[] = {
690 { 704 {
691 .ctl_name = CTL_UNNUMBERED, 705 .ctl_name = CTL_UNNUMBERED,
692 .procname = "nf_conntrack_dccp_timeout_open", 706 .procname = "nf_conntrack_dccp_timeout_open",
693 .data = &dccp_timeout[CT_DCCP_OPEN],
694 .maxlen = sizeof(unsigned int), 707 .maxlen = sizeof(unsigned int),
695 .mode = 0644, 708 .mode = 0644,
696 .proc_handler = proc_dointvec_jiffies, 709 .proc_handler = proc_dointvec_jiffies,
@@ -698,7 +711,6 @@ static ctl_table dccp_sysctl_table[] = {
698 { 711 {
699 .ctl_name = CTL_UNNUMBERED, 712 .ctl_name = CTL_UNNUMBERED,
700 .procname = "nf_conntrack_dccp_timeout_closereq", 713 .procname = "nf_conntrack_dccp_timeout_closereq",
701 .data = &dccp_timeout[CT_DCCP_CLOSEREQ],
702 .maxlen = sizeof(unsigned int), 714 .maxlen = sizeof(unsigned int),
703 .mode = 0644, 715 .mode = 0644,
704 .proc_handler = proc_dointvec_jiffies, 716 .proc_handler = proc_dointvec_jiffies,
@@ -706,7 +718,6 @@ static ctl_table dccp_sysctl_table[] = {
706 { 718 {
707 .ctl_name = CTL_UNNUMBERED, 719 .ctl_name = CTL_UNNUMBERED,
708 .procname = "nf_conntrack_dccp_timeout_closing", 720 .procname = "nf_conntrack_dccp_timeout_closing",
709 .data = &dccp_timeout[CT_DCCP_CLOSING],
710 .maxlen = sizeof(unsigned int), 721 .maxlen = sizeof(unsigned int),
711 .mode = 0644, 722 .mode = 0644,
712 .proc_handler = proc_dointvec_jiffies, 723 .proc_handler = proc_dointvec_jiffies,
@@ -714,7 +725,6 @@ static ctl_table dccp_sysctl_table[] = {
714 { 725 {
715 .ctl_name = CTL_UNNUMBERED, 726 .ctl_name = CTL_UNNUMBERED,
716 .procname = "nf_conntrack_dccp_timeout_timewait", 727 .procname = "nf_conntrack_dccp_timeout_timewait",
717 .data = &dccp_timeout[CT_DCCP_TIMEWAIT],
718 .maxlen = sizeof(unsigned int), 728 .maxlen = sizeof(unsigned int),
719 .mode = 0644, 729 .mode = 0644,
720 .proc_handler = proc_dointvec_jiffies, 730 .proc_handler = proc_dointvec_jiffies,
@@ -722,8 +732,7 @@ static ctl_table dccp_sysctl_table[] = {
722 { 732 {
723 .ctl_name = CTL_UNNUMBERED, 733 .ctl_name = CTL_UNNUMBERED,
724 .procname = "nf_conntrack_dccp_loose", 734 .procname = "nf_conntrack_dccp_loose",
725 .data = &nf_ct_dccp_loose, 735 .maxlen = sizeof(int),
726 .maxlen = sizeof(nf_ct_dccp_loose),
727 .mode = 0644, 736 .mode = 0644,
728 .proc_handler = proc_dointvec, 737 .proc_handler = proc_dointvec,
729 }, 738 },
@@ -746,16 +755,13 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {
746 .print_conntrack = dccp_print_conntrack, 755 .print_conntrack = dccp_print_conntrack,
747#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 756#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
748 .to_nlattr = dccp_to_nlattr, 757 .to_nlattr = dccp_to_nlattr,
758 .nlattr_size = dccp_nlattr_size,
749 .from_nlattr = nlattr_to_dccp, 759 .from_nlattr = nlattr_to_dccp,
750 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 760 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
761 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
751 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 762 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
752 .nla_policy = nf_ct_port_nla_policy, 763 .nla_policy = nf_ct_port_nla_policy,
753#endif 764#endif
754#ifdef CONFIG_SYSCTL
755 .ctl_table_users = &dccp_sysctl_table_users,
756 .ctl_table_header = &dccp_sysctl_header,
757 .ctl_table = dccp_sysctl_table,
758#endif
759}; 765};
760 766
761static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { 767static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
@@ -773,37 +779,111 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {
773 .to_nlattr = dccp_to_nlattr, 779 .to_nlattr = dccp_to_nlattr,
774 .from_nlattr = nlattr_to_dccp, 780 .from_nlattr = nlattr_to_dccp,
775 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 781 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
782 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
776 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 783 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
777 .nla_policy = nf_ct_port_nla_policy, 784 .nla_policy = nf_ct_port_nla_policy,
778#endif 785#endif
786};
787
788static __net_init int dccp_net_init(struct net *net)
789{
790 struct dccp_net *dn;
791 int err;
792
793 dn = kmalloc(sizeof(*dn), GFP_KERNEL);
794 if (!dn)
795 return -ENOMEM;
796
797 /* default values */
798 dn->dccp_loose = 1;
799 dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL;
800 dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL;
801 dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL;
802 dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ;
803 dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ;
804 dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ;
805 dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL;
806
807 err = net_assign_generic(net, dccp_net_id, dn);
808 if (err)
809 goto out;
810
811#ifdef CONFIG_SYSCTL
812 err = -ENOMEM;
813 dn->sysctl_table = kmemdup(dccp_sysctl_table,
814 sizeof(dccp_sysctl_table), GFP_KERNEL);
815 if (!dn->sysctl_table)
816 goto out;
817
818 dn->sysctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST];
819 dn->sysctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND];
820 dn->sysctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN];
821 dn->sysctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN];
822 dn->sysctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ];
823 dn->sysctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING];
824 dn->sysctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT];
825 dn->sysctl_table[7].data = &dn->dccp_loose;
826
827 dn->sysctl_header = register_net_sysctl_table(net,
828 nf_net_netfilter_sysctl_path, dn->sysctl_table);
829 if (!dn->sysctl_header) {
830 kfree(dn->sysctl_table);
831 goto out;
832 }
833#endif
834
835 return 0;
836
837out:
838 kfree(dn);
839 return err;
840}
841
842static __net_exit void dccp_net_exit(struct net *net)
843{
844 struct dccp_net *dn = dccp_pernet(net);
779#ifdef CONFIG_SYSCTL 845#ifdef CONFIG_SYSCTL
780 .ctl_table_users = &dccp_sysctl_table_users, 846 unregister_net_sysctl_table(dn->sysctl_header);
781 .ctl_table_header = &dccp_sysctl_header, 847 kfree(dn->sysctl_table);
782 .ctl_table = dccp_sysctl_table,
783#endif 848#endif
849 kfree(dn);
850
851 net_assign_generic(net, dccp_net_id, NULL);
852}
853
854static struct pernet_operations dccp_net_ops = {
855 .init = dccp_net_init,
856 .exit = dccp_net_exit,
784}; 857};
785 858
786static int __init nf_conntrack_proto_dccp_init(void) 859static int __init nf_conntrack_proto_dccp_init(void)
787{ 860{
788 int err; 861 int err;
789 862
790 err = nf_conntrack_l4proto_register(&dccp_proto4); 863 err = register_pernet_gen_subsys(&dccp_net_id, &dccp_net_ops);
791 if (err < 0) 864 if (err < 0)
792 goto err1; 865 goto err1;
793 866
794 err = nf_conntrack_l4proto_register(&dccp_proto6); 867 err = nf_conntrack_l4proto_register(&dccp_proto4);
795 if (err < 0) 868 if (err < 0)
796 goto err2; 869 goto err2;
870
871 err = nf_conntrack_l4proto_register(&dccp_proto6);
872 if (err < 0)
873 goto err3;
797 return 0; 874 return 0;
798 875
799err2: 876err3:
800 nf_conntrack_l4proto_unregister(&dccp_proto4); 877 nf_conntrack_l4proto_unregister(&dccp_proto4);
878err2:
879 unregister_pernet_gen_subsys(dccp_net_id, &dccp_net_ops);
801err1: 880err1:
802 return err; 881 return err;
803} 882}
804 883
805static void __exit nf_conntrack_proto_dccp_fini(void) 884static void __exit nf_conntrack_proto_dccp_fini(void)
806{ 885{
886 unregister_pernet_gen_subsys(dccp_net_id, &dccp_net_ops);
807 nf_conntrack_l4proto_unregister(&dccp_proto6); 887 nf_conntrack_l4proto_unregister(&dccp_proto6);
808 nf_conntrack_l4proto_unregister(&dccp_proto4); 888 nf_conntrack_l4proto_unregister(&dccp_proto4);
809} 889}
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 4be80d7b8795..829374f426c4 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -92,7 +92,7 @@ static struct ctl_table generic_compat_sysctl_table[] = {
92struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = 92struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly =
93{ 93{
94 .l3proto = PF_UNSPEC, 94 .l3proto = PF_UNSPEC,
95 .l4proto = 0, 95 .l4proto = 255,
96 .name = "unknown", 96 .name = "unknown",
97 .pkt_to_tuple = generic_pkt_to_tuple, 97 .pkt_to_tuple = generic_pkt_to_tuple,
98 .invert_tuple = generic_invert_tuple, 98 .invert_tuple = generic_invert_tuple,
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 1b279f9d6bf3..117b80112fcb 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -293,6 +293,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {
293 .me = THIS_MODULE, 293 .me = THIS_MODULE,
294#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 294#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
295 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 295 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
296 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
296 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 297 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
297 .nla_policy = nf_ct_port_nla_policy, 298 .nla_policy = nf_ct_port_nla_policy,
298#endif 299#endif
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 74e037901199..101b4ad9e817 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -537,6 +537,12 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
537 537
538 return 0; 538 return 0;
539} 539}
540
541static int sctp_nlattr_size(void)
542{
543 return nla_total_size(0) /* CTA_PROTOINFO_SCTP */
544 + nla_policy_len(sctp_nla_policy, CTA_PROTOINFO_SCTP_MAX + 1);
545}
540#endif 546#endif
541 547
542#ifdef CONFIG_SYSCTL 548#ifdef CONFIG_SYSCTL
@@ -668,8 +674,10 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {
668 .me = THIS_MODULE, 674 .me = THIS_MODULE,
669#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 675#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
670 .to_nlattr = sctp_to_nlattr, 676 .to_nlattr = sctp_to_nlattr,
677 .nlattr_size = sctp_nlattr_size,
671 .from_nlattr = nlattr_to_sctp, 678 .from_nlattr = nlattr_to_sctp,
672 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 679 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
680 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
673 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 681 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
674 .nla_policy = nf_ct_port_nla_policy, 682 .nla_policy = nf_ct_port_nla_policy,
675#endif 683#endif
@@ -696,8 +704,10 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {
696 .me = THIS_MODULE, 704 .me = THIS_MODULE,
697#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 705#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
698 .to_nlattr = sctp_to_nlattr, 706 .to_nlattr = sctp_to_nlattr,
707 .nlattr_size = sctp_nlattr_size,
699 .from_nlattr = nlattr_to_sctp, 708 .from_nlattr = nlattr_to_sctp,
700 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 709 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
710 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
701 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 711 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
702 .nla_policy = nf_ct_port_nla_policy, 712 .nla_policy = nf_ct_port_nla_policy,
703#endif 713#endif
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index a1edb9c1adee..b5ccf2b4b2e7 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -15,6 +15,7 @@
15#include <linux/skbuff.h> 15#include <linux/skbuff.h>
16#include <linux/ipv6.h> 16#include <linux/ipv6.h>
17#include <net/ip6_checksum.h> 17#include <net/ip6_checksum.h>
18#include <asm/unaligned.h>
18 19
19#include <net/tcp.h> 20#include <net/tcp.h>
20 21
@@ -25,6 +26,8 @@
25#include <net/netfilter/nf_conntrack_l4proto.h> 26#include <net/netfilter/nf_conntrack_l4proto.h>
26#include <net/netfilter/nf_conntrack_ecache.h> 27#include <net/netfilter/nf_conntrack_ecache.h>
27#include <net/netfilter/nf_log.h> 28#include <net/netfilter/nf_log.h>
29#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
28 31
29/* Protects ct->proto.tcp */ 32/* Protects ct->proto.tcp */
30static DEFINE_RWLOCK(tcp_lock); 33static DEFINE_RWLOCK(tcp_lock);
@@ -466,7 +469,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
466 for (i = 0; 469 for (i = 0;
467 i < (opsize - TCPOLEN_SACK_BASE); 470 i < (opsize - TCPOLEN_SACK_BASE);
468 i += TCPOLEN_SACK_PERBLOCK) { 471 i += TCPOLEN_SACK_PERBLOCK) {
469 tmp = ntohl(*((__be32 *)(ptr+i)+1)); 472 tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
470 473
471 if (after(tmp, *sack)) 474 if (after(tmp, *sack))
472 *sack = tmp; 475 *sack = tmp;
@@ -859,7 +862,7 @@ static int tcp_packet(struct nf_conn *ct,
859 */ 862 */
860 if (nf_ct_kill(ct)) 863 if (nf_ct_kill(ct))
861 return -NF_REPEAT; 864 return -NF_REPEAT;
862 return -NF_DROP; 865 return NF_DROP;
863 } 866 }
864 /* Fall through */ 867 /* Fall through */
865 case TCP_CONNTRACK_IGNORE: 868 case TCP_CONNTRACK_IGNORE:
@@ -892,7 +895,7 @@ static int tcp_packet(struct nf_conn *ct,
892 nf_log_packet(pf, 0, skb, NULL, NULL, NULL, 895 nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
893 "nf_ct_tcp: killing out of sync session "); 896 "nf_ct_tcp: killing out of sync session ");
894 nf_ct_kill(ct); 897 nf_ct_kill(ct);
895 return -NF_DROP; 898 return NF_DROP;
896 } 899 }
897 ct->proto.tcp.last_index = index; 900 ct->proto.tcp.last_index = index;
898 ct->proto.tcp.last_dir = dir; 901 ct->proto.tcp.last_dir = dir;
@@ -1181,6 +1184,17 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1181 1184
1182 return 0; 1185 return 0;
1183} 1186}
1187
1188static int tcp_nlattr_size(void)
1189{
1190 return nla_total_size(0) /* CTA_PROTOINFO_TCP */
1191 + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1);
1192}
1193
1194static int tcp_nlattr_tuple_size(void)
1195{
1196 return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1197}
1184#endif 1198#endif
1185 1199
1186#ifdef CONFIG_SYSCTL 1200#ifdef CONFIG_SYSCTL
@@ -1396,9 +1410,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =
1396 .error = tcp_error, 1410 .error = tcp_error,
1397#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 1411#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1398 .to_nlattr = tcp_to_nlattr, 1412 .to_nlattr = tcp_to_nlattr,
1413 .nlattr_size = tcp_nlattr_size,
1399 .from_nlattr = nlattr_to_tcp, 1414 .from_nlattr = nlattr_to_tcp,
1400 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 1415 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1401 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 1416 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
1417 .nlattr_tuple_size = tcp_nlattr_tuple_size,
1402 .nla_policy = nf_ct_port_nla_policy, 1418 .nla_policy = nf_ct_port_nla_policy,
1403#endif 1419#endif
1404#ifdef CONFIG_SYSCTL 1420#ifdef CONFIG_SYSCTL
@@ -1426,9 +1442,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =
1426 .error = tcp_error, 1442 .error = tcp_error,
1427#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 1443#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
1428 .to_nlattr = tcp_to_nlattr, 1444 .to_nlattr = tcp_to_nlattr,
1445 .nlattr_size = tcp_nlattr_size,
1429 .from_nlattr = nlattr_to_tcp, 1446 .from_nlattr = nlattr_to_tcp,
1430 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 1447 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
1431 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 1448 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
1449 .nlattr_tuple_size = tcp_nlattr_tuple_size,
1432 .nla_policy = nf_ct_port_nla_policy, 1450 .nla_policy = nf_ct_port_nla_policy,
1433#endif 1451#endif
1434#ifdef CONFIG_SYSCTL 1452#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 2b8b1f579f93..70809d117b91 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -22,6 +22,8 @@
22#include <net/netfilter/nf_conntrack_l4proto.h> 22#include <net/netfilter/nf_conntrack_l4proto.h>
23#include <net/netfilter/nf_conntrack_ecache.h> 23#include <net/netfilter/nf_conntrack_ecache.h>
24#include <net/netfilter/nf_log.h> 24#include <net/netfilter/nf_log.h>
25#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
26#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
25 27
26static unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ; 28static unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ;
27static unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ; 29static unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ;
@@ -193,6 +195,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =
193#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 195#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
194 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 196 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
195 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 197 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
198 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
196 .nla_policy = nf_ct_port_nla_policy, 199 .nla_policy = nf_ct_port_nla_policy,
197#endif 200#endif
198#ifdef CONFIG_SYSCTL 201#ifdef CONFIG_SYSCTL
@@ -220,6 +223,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =
220#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 223#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
221 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 224 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
222 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 225 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
226 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
223 .nla_policy = nf_ct_port_nla_policy, 227 .nla_policy = nf_ct_port_nla_policy,
224#endif 228#endif
225#ifdef CONFIG_SYSCTL 229#ifdef CONFIG_SYSCTL
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index 4579d8de13b1..4614696c1b88 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -180,6 +180,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =
180 .error = udplite_error, 180 .error = udplite_error,
181#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) 181#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
182 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, 182 .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
183 .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
183 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, 184 .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
184 .nla_policy = nf_ct_port_nla_policy, 185 .nla_policy = nf_ct_port_nla_policy,
185#endif 186#endif
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 4da54b0b9233..193515381970 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -44,40 +44,42 @@ struct ct_iter_state {
44 unsigned int bucket; 44 unsigned int bucket;
45}; 45};
46 46
47static struct hlist_node *ct_get_first(struct seq_file *seq) 47static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
48{ 48{
49 struct net *net = seq_file_net(seq); 49 struct net *net = seq_file_net(seq);
50 struct ct_iter_state *st = seq->private; 50 struct ct_iter_state *st = seq->private;
51 struct hlist_node *n; 51 struct hlist_nulls_node *n;
52 52
53 for (st->bucket = 0; 53 for (st->bucket = 0;
54 st->bucket < nf_conntrack_htable_size; 54 st->bucket < nf_conntrack_htable_size;
55 st->bucket++) { 55 st->bucket++) {
56 n = rcu_dereference(net->ct.hash[st->bucket].first); 56 n = rcu_dereference(net->ct.hash[st->bucket].first);
57 if (n) 57 if (!is_a_nulls(n))
58 return n; 58 return n;
59 } 59 }
60 return NULL; 60 return NULL;
61} 61}
62 62
63static struct hlist_node *ct_get_next(struct seq_file *seq, 63static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
64 struct hlist_node *head) 64 struct hlist_nulls_node *head)
65{ 65{
66 struct net *net = seq_file_net(seq); 66 struct net *net = seq_file_net(seq);
67 struct ct_iter_state *st = seq->private; 67 struct ct_iter_state *st = seq->private;
68 68
69 head = rcu_dereference(head->next); 69 head = rcu_dereference(head->next);
70 while (head == NULL) { 70 while (is_a_nulls(head)) {
71 if (++st->bucket >= nf_conntrack_htable_size) 71 if (likely(get_nulls_value(head) == st->bucket)) {
72 return NULL; 72 if (++st->bucket >= nf_conntrack_htable_size)
73 return NULL;
74 }
73 head = rcu_dereference(net->ct.hash[st->bucket].first); 75 head = rcu_dereference(net->ct.hash[st->bucket].first);
74 } 76 }
75 return head; 77 return head;
76} 78}
77 79
78static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) 80static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
79{ 81{
80 struct hlist_node *head = ct_get_first(seq); 82 struct hlist_nulls_node *head = ct_get_first(seq);
81 83
82 if (head) 84 if (head)
83 while (pos && (head = ct_get_next(seq, head))) 85 while (pos && (head = ct_get_next(seq, head)))
@@ -107,67 +109,74 @@ static void ct_seq_stop(struct seq_file *s, void *v)
107/* return 0 on success, 1 in case of error */ 109/* return 0 on success, 1 in case of error */
108static int ct_seq_show(struct seq_file *s, void *v) 110static int ct_seq_show(struct seq_file *s, void *v)
109{ 111{
110 const struct nf_conntrack_tuple_hash *hash = v; 112 struct nf_conntrack_tuple_hash *hash = v;
111 const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); 113 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
112 const struct nf_conntrack_l3proto *l3proto; 114 const struct nf_conntrack_l3proto *l3proto;
113 const struct nf_conntrack_l4proto *l4proto; 115 const struct nf_conntrack_l4proto *l4proto;
116 int ret = 0;
114 117
115 NF_CT_ASSERT(ct); 118 NF_CT_ASSERT(ct);
119 if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
120 return 0;
116 121
117 /* we only want to print DIR_ORIGINAL */ 122 /* we only want to print DIR_ORIGINAL */
118 if (NF_CT_DIRECTION(hash)) 123 if (NF_CT_DIRECTION(hash))
119 return 0; 124 goto release;
120 125
121 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); 126 l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
122 NF_CT_ASSERT(l3proto); 127 NF_CT_ASSERT(l3proto);
123 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); 128 l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
124 NF_CT_ASSERT(l4proto); 129 NF_CT_ASSERT(l4proto);
125 130
131 ret = -ENOSPC;
126 if (seq_printf(s, "%-8s %u %-8s %u %ld ", 132 if (seq_printf(s, "%-8s %u %-8s %u %ld ",
127 l3proto->name, nf_ct_l3num(ct), 133 l3proto->name, nf_ct_l3num(ct),
128 l4proto->name, nf_ct_protonum(ct), 134 l4proto->name, nf_ct_protonum(ct),
129 timer_pending(&ct->timeout) 135 timer_pending(&ct->timeout)
130 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) 136 ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
131 return -ENOSPC; 137 goto release;
132 138
133 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) 139 if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
134 return -ENOSPC; 140 goto release;
135 141
136 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, 142 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
137 l3proto, l4proto)) 143 l3proto, l4proto))
138 return -ENOSPC; 144 goto release;
139 145
140 if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) 146 if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
141 return -ENOSPC; 147 goto release;
142 148
143 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) 149 if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
144 if (seq_printf(s, "[UNREPLIED] ")) 150 if (seq_printf(s, "[UNREPLIED] "))
145 return -ENOSPC; 151 goto release;
146 152
147 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, 153 if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
148 l3proto, l4proto)) 154 l3proto, l4proto))
149 return -ENOSPC; 155 goto release;
150 156
151 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) 157 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
152 return -ENOSPC; 158 goto release;
153 159
154 if (test_bit(IPS_ASSURED_BIT, &ct->status)) 160 if (test_bit(IPS_ASSURED_BIT, &ct->status))
155 if (seq_printf(s, "[ASSURED] ")) 161 if (seq_printf(s, "[ASSURED] "))
156 return -ENOSPC; 162 goto release;
157 163
158#if defined(CONFIG_NF_CONNTRACK_MARK) 164#if defined(CONFIG_NF_CONNTRACK_MARK)
159 if (seq_printf(s, "mark=%u ", ct->mark)) 165 if (seq_printf(s, "mark=%u ", ct->mark))
160 return -ENOSPC; 166 goto release;
161#endif 167#endif
162 168
163#ifdef CONFIG_NF_CONNTRACK_SECMARK 169#ifdef CONFIG_NF_CONNTRACK_SECMARK
164 if (seq_printf(s, "secmark=%u ", ct->secmark)) 170 if (seq_printf(s, "secmark=%u ", ct->secmark))
165 return -ENOSPC; 171 goto release;
166#endif 172#endif
167 173
168 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) 174 if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
169 return -ENOSPC; 175 goto release;
170 176
177 ret = 0;
178release:
179 nf_ct_put(ct);
171 return 0; 180 return 0;
172} 181}
173 182
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index fa8ae5d2659c..8bb998fe098b 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -14,58 +14,63 @@
14 LOG target modules */ 14 LOG target modules */
15 15
16#define NF_LOG_PREFIXLEN 128 16#define NF_LOG_PREFIXLEN 128
17#define NFLOGGER_NAME_LEN 64
17 18
18static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; 19static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly;
20static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly;
19static DEFINE_MUTEX(nf_log_mutex); 21static DEFINE_MUTEX(nf_log_mutex);
20 22
21/* return EBUSY if somebody else is registered, EEXIST if the same logger 23static struct nf_logger *__find_logger(int pf, const char *str_logger)
22 * is registred, 0 on success. */
23int nf_log_register(u_int8_t pf, const struct nf_logger *logger)
24{ 24{
25 int ret; 25 struct nf_logger *t;
26 26
27 if (pf >= ARRAY_SIZE(nf_loggers)) 27 list_for_each_entry(t, &nf_loggers_l[pf], list[pf]) {
28 return -EINVAL; 28 if (!strnicmp(str_logger, t->name, strlen(t->name)))
29 29 return t;
30 /* Any setup of logging members must be done before 30 }
31 * substituting pointer. */
32 ret = mutex_lock_interruptible(&nf_log_mutex);
33 if (ret < 0)
34 return ret;
35
36 if (!nf_loggers[pf])
37 rcu_assign_pointer(nf_loggers[pf], logger);
38 else if (nf_loggers[pf] == logger)
39 ret = -EEXIST;
40 else
41 ret = -EBUSY;
42 31
43 mutex_unlock(&nf_log_mutex); 32 return NULL;
44 return ret;
45} 33}
46EXPORT_SYMBOL(nf_log_register);
47 34
48void nf_log_unregister_pf(u_int8_t pf) 35/* return EEXIST if the same logger is registred, 0 on success. */
36int nf_log_register(u_int8_t pf, struct nf_logger *logger)
49{ 37{
38 const struct nf_logger *llog;
39
50 if (pf >= ARRAY_SIZE(nf_loggers)) 40 if (pf >= ARRAY_SIZE(nf_loggers))
51 return; 41 return -EINVAL;
42
52 mutex_lock(&nf_log_mutex); 43 mutex_lock(&nf_log_mutex);
53 rcu_assign_pointer(nf_loggers[pf], NULL); 44
45 if (pf == NFPROTO_UNSPEC) {
46 int i;
47 for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
48 list_add_tail(&(logger->list[i]), &(nf_loggers_l[i]));
49 } else {
50 /* register at end of list to honor first register win */
51 list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
52 llog = rcu_dereference(nf_loggers[pf]);
53 if (llog == NULL)
54 rcu_assign_pointer(nf_loggers[pf], logger);
55 }
56
54 mutex_unlock(&nf_log_mutex); 57 mutex_unlock(&nf_log_mutex);
55 58
56 /* Give time to concurrent readers. */ 59 return 0;
57 synchronize_rcu();
58} 60}
59EXPORT_SYMBOL(nf_log_unregister_pf); 61EXPORT_SYMBOL(nf_log_register);
60 62
61void nf_log_unregister(const struct nf_logger *logger) 63void nf_log_unregister(struct nf_logger *logger)
62{ 64{
65 const struct nf_logger *c_logger;
63 int i; 66 int i;
64 67
65 mutex_lock(&nf_log_mutex); 68 mutex_lock(&nf_log_mutex);
66 for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) { 69 for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) {
67 if (nf_loggers[i] == logger) 70 c_logger = rcu_dereference(nf_loggers[i]);
71 if (c_logger == logger)
68 rcu_assign_pointer(nf_loggers[i], NULL); 72 rcu_assign_pointer(nf_loggers[i], NULL);
73 list_del(&logger->list[i]);
69 } 74 }
70 mutex_unlock(&nf_log_mutex); 75 mutex_unlock(&nf_log_mutex);
71 76
@@ -73,6 +78,27 @@ void nf_log_unregister(const struct nf_logger *logger)
73} 78}
74EXPORT_SYMBOL(nf_log_unregister); 79EXPORT_SYMBOL(nf_log_unregister);
75 80
81int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)
82{
83 mutex_lock(&nf_log_mutex);
84 if (__find_logger(pf, logger->name) == NULL) {
85 mutex_unlock(&nf_log_mutex);
86 return -ENOENT;
87 }
88 rcu_assign_pointer(nf_loggers[pf], logger);
89 mutex_unlock(&nf_log_mutex);
90 return 0;
91}
92EXPORT_SYMBOL(nf_log_bind_pf);
93
94void nf_log_unbind_pf(u_int8_t pf)
95{
96 mutex_lock(&nf_log_mutex);
97 rcu_assign_pointer(nf_loggers[pf], NULL);
98 mutex_unlock(&nf_log_mutex);
99}
100EXPORT_SYMBOL(nf_log_unbind_pf);
101
76void nf_log_packet(u_int8_t pf, 102void nf_log_packet(u_int8_t pf,
77 unsigned int hooknum, 103 unsigned int hooknum,
78 const struct sk_buff *skb, 104 const struct sk_buff *skb,
@@ -129,13 +155,37 @@ static int seq_show(struct seq_file *s, void *v)
129{ 155{
130 loff_t *pos = v; 156 loff_t *pos = v;
131 const struct nf_logger *logger; 157 const struct nf_logger *logger;
158 struct nf_logger *t;
159 int ret;
132 160
133 logger = rcu_dereference(nf_loggers[*pos]); 161 logger = rcu_dereference(nf_loggers[*pos]);
134 162
135 if (!logger) 163 if (!logger)
136 return seq_printf(s, "%2lld NONE\n", *pos); 164 ret = seq_printf(s, "%2lld NONE (", *pos);
165 else
166 ret = seq_printf(s, "%2lld %s (", *pos, logger->name);
167
168 if (ret < 0)
169 return ret;
170
171 mutex_lock(&nf_log_mutex);
172 list_for_each_entry(t, &nf_loggers_l[*pos], list[*pos]) {
173 ret = seq_printf(s, "%s", t->name);
174 if (ret < 0) {
175 mutex_unlock(&nf_log_mutex);
176 return ret;
177 }
178 if (&t->list[*pos] != nf_loggers_l[*pos].prev) {
179 ret = seq_printf(s, ",");
180 if (ret < 0) {
181 mutex_unlock(&nf_log_mutex);
182 return ret;
183 }
184 }
185 }
186 mutex_unlock(&nf_log_mutex);
137 187
138 return seq_printf(s, "%2lld %s\n", *pos, logger->name); 188 return seq_printf(s, ")\n");
139} 189}
140 190
141static const struct seq_operations nflog_seq_ops = { 191static const struct seq_operations nflog_seq_ops = {
@@ -158,15 +208,102 @@ static const struct file_operations nflog_file_ops = {
158 .release = seq_release, 208 .release = seq_release,
159}; 209};
160 210
211
161#endif /* PROC_FS */ 212#endif /* PROC_FS */
162 213
214#ifdef CONFIG_SYSCTL
215struct ctl_path nf_log_sysctl_path[] = {
216 { .procname = "net", .ctl_name = CTL_NET, },
217 { .procname = "netfilter", .ctl_name = NET_NETFILTER, },
218 { .procname = "nf_log", .ctl_name = CTL_UNNUMBERED, },
219 { }
220};
221
222static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3];
223static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1];
224static struct ctl_table_header *nf_log_dir_header;
225
226static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
227 void *buffer, size_t *lenp, loff_t *ppos)
228{
229 const struct nf_logger *logger;
230 int r = 0;
231 int tindex = (unsigned long)table->extra1;
232
233 if (write) {
234 if (!strcmp(buffer, "NONE")) {
235 nf_log_unbind_pf(tindex);
236 return 0;
237 }
238 mutex_lock(&nf_log_mutex);
239 logger = __find_logger(tindex, buffer);
240 if (logger == NULL) {
241 mutex_unlock(&nf_log_mutex);
242 return -ENOENT;
243 }
244 rcu_assign_pointer(nf_loggers[tindex], logger);
245 mutex_unlock(&nf_log_mutex);
246 } else {
247 rcu_read_lock();
248 logger = rcu_dereference(nf_loggers[tindex]);
249 if (!logger)
250 table->data = "NONE";
251 else
252 table->data = logger->name;
253 r = proc_dostring(table, write, filp, buffer, lenp, ppos);
254 rcu_read_unlock();
255 }
256
257 return r;
258}
259
260static __init int netfilter_log_sysctl_init(void)
261{
262 int i;
263
264 for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) {
265 snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i);
266 nf_log_sysctl_table[i].ctl_name = CTL_UNNUMBERED;
267 nf_log_sysctl_table[i].procname =
268 nf_log_sysctl_fnames[i-NFPROTO_UNSPEC];
269 nf_log_sysctl_table[i].data = NULL;
270 nf_log_sysctl_table[i].maxlen =
271 NFLOGGER_NAME_LEN * sizeof(char);
272 nf_log_sysctl_table[i].mode = 0644;
273 nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring;
274 nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i;
275 }
276
277 nf_log_dir_header = register_sysctl_paths(nf_log_sysctl_path,
278 nf_log_sysctl_table);
279 if (!nf_log_dir_header)
280 return -ENOMEM;
281
282 return 0;
283}
284#else
285static __init int netfilter_log_sysctl_init(void)
286{
287 return 0;
288}
289#endif /* CONFIG_SYSCTL */
163 290
164int __init netfilter_log_init(void) 291int __init netfilter_log_init(void)
165{ 292{
293 int i, r;
166#ifdef CONFIG_PROC_FS 294#ifdef CONFIG_PROC_FS
167 if (!proc_create("nf_log", S_IRUGO, 295 if (!proc_create("nf_log", S_IRUGO,
168 proc_net_netfilter, &nflog_file_ops)) 296 proc_net_netfilter, &nflog_file_ops))
169 return -1; 297 return -1;
170#endif 298#endif
299
300 /* Errors will trigger panic, unroll on error is unnecessary. */
301 r = netfilter_log_sysctl_init();
302 if (r < 0)
303 return r;
304
305 for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++)
306 INIT_LIST_HEAD(&(nf_loggers_l[i]));
307
171 return 0; 308 return 0;
172} 309}
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index cdc97f3105a3..5490fc37c92d 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -71,6 +71,7 @@ int
71nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) 71nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
72{ 72{
73 if (inet_sk(sk)->transparent) { 73 if (inet_sk(sk)->transparent) {
74 skb_orphan(skb);
74 skb->sk = sk; 75 skb->sk = sk;
75 skb->destructor = nf_tproxy_destructor; 76 skb->destructor = nf_tproxy_destructor;
76 return 1; 77 return 1;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 9c0ba17a1ddb..2785d66a7e38 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -113,6 +113,12 @@ int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
113} 113}
114EXPORT_SYMBOL_GPL(nfnetlink_send); 114EXPORT_SYMBOL_GPL(nfnetlink_send);
115 115
116void nfnetlink_set_err(u32 pid, u32 group, int error)
117{
118 netlink_set_err(nfnl, pid, group, error);
119}
120EXPORT_SYMBOL_GPL(nfnetlink_set_err);
121
116int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) 122int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags)
117{ 123{
118 return netlink_unicast(nfnl, skb, pid, flags); 124 return netlink_unicast(nfnl, skb, pid, flags);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index c712e9fc6bba..fd326ac27ec8 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -693,7 +693,7 @@ nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb,
693 return -ENOTSUPP; 693 return -ENOTSUPP;
694} 694}
695 695
696static const struct nf_logger nfulnl_logger = { 696static struct nf_logger nfulnl_logger __read_mostly = {
697 .name = "nfnetlink_log", 697 .name = "nfnetlink_log",
698 .logfn = &nfulnl_log_packet, 698 .logfn = &nfulnl_log_packet,
699 .me = THIS_MODULE, 699 .me = THIS_MODULE,
@@ -725,9 +725,9 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
725 /* Commands without queue context */ 725 /* Commands without queue context */
726 switch (cmd->command) { 726 switch (cmd->command) {
727 case NFULNL_CFG_CMD_PF_BIND: 727 case NFULNL_CFG_CMD_PF_BIND:
728 return nf_log_register(pf, &nfulnl_logger); 728 return nf_log_bind_pf(pf, &nfulnl_logger);
729 case NFULNL_CFG_CMD_PF_UNBIND: 729 case NFULNL_CFG_CMD_PF_UNBIND:
730 nf_log_unregister_pf(pf); 730 nf_log_unbind_pf(pf);
731 return 0; 731 return 0;
732 } 732 }
733 } 733 }
@@ -952,17 +952,25 @@ static int __init nfnetlink_log_init(void)
952 goto cleanup_netlink_notifier; 952 goto cleanup_netlink_notifier;
953 } 953 }
954 954
955 status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger);
956 if (status < 0) {
957 printk(KERN_ERR "log: failed to register logger\n");
958 goto cleanup_subsys;
959 }
960
955#ifdef CONFIG_PROC_FS 961#ifdef CONFIG_PROC_FS
956 if (!proc_create("nfnetlink_log", 0440, 962 if (!proc_create("nfnetlink_log", 0440,
957 proc_net_netfilter, &nful_file_ops)) 963 proc_net_netfilter, &nful_file_ops))
958 goto cleanup_subsys; 964 goto cleanup_logger;
959#endif 965#endif
960 return status; 966 return status;
961 967
962#ifdef CONFIG_PROC_FS 968#ifdef CONFIG_PROC_FS
969cleanup_logger:
970 nf_log_unregister(&nfulnl_logger);
971#endif
963cleanup_subsys: 972cleanup_subsys:
964 nfnetlink_subsys_unregister(&nfulnl_subsys); 973 nfnetlink_subsys_unregister(&nfulnl_subsys);
965#endif
966cleanup_netlink_notifier: 974cleanup_netlink_notifier:
967 netlink_unregister_notifier(&nfulnl_rtnl_notifier); 975 netlink_unregister_notifier(&nfulnl_rtnl_notifier);
968 return status; 976 return status;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 5baccfa5a0de..509a95621f9f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -625,6 +625,20 @@ void xt_free_table_info(struct xt_table_info *info)
625} 625}
626EXPORT_SYMBOL(xt_free_table_info); 626EXPORT_SYMBOL(xt_free_table_info);
627 627
628void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo,
629 struct xt_table_info *newinfo)
630{
631 unsigned int cpu;
632
633 for_each_possible_cpu(cpu) {
634 void *p = oldinfo->entries[cpu];
635 rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]);
636 newinfo->entries[cpu] = p;
637 }
638
639}
640EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu);
641
628/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ 642/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
629struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, 643struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
630 const char *name) 644 const char *name)
@@ -671,21 +685,22 @@ xt_replace_table(struct xt_table *table,
671 struct xt_table_info *oldinfo, *private; 685 struct xt_table_info *oldinfo, *private;
672 686
673 /* Do the substitution. */ 687 /* Do the substitution. */
674 write_lock_bh(&table->lock); 688 mutex_lock(&table->lock);
675 private = table->private; 689 private = table->private;
676 /* Check inside lock: is the old number correct? */ 690 /* Check inside lock: is the old number correct? */
677 if (num_counters != private->number) { 691 if (num_counters != private->number) {
678 duprintf("num_counters != table->private->number (%u/%u)\n", 692 duprintf("num_counters != table->private->number (%u/%u)\n",
679 num_counters, private->number); 693 num_counters, private->number);
680 write_unlock_bh(&table->lock); 694 mutex_unlock(&table->lock);
681 *error = -EAGAIN; 695 *error = -EAGAIN;
682 return NULL; 696 return NULL;
683 } 697 }
684 oldinfo = private; 698 oldinfo = private;
685 table->private = newinfo; 699 rcu_assign_pointer(table->private, newinfo);
686 newinfo->initial_entries = oldinfo->initial_entries; 700 newinfo->initial_entries = oldinfo->initial_entries;
687 write_unlock_bh(&table->lock); 701 mutex_unlock(&table->lock);
688 702
703 synchronize_net();
689 return oldinfo; 704 return oldinfo;
690} 705}
691EXPORT_SYMBOL_GPL(xt_replace_table); 706EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -719,7 +734,8 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table,
719 734
720 /* Simplifies replace_table code. */ 735 /* Simplifies replace_table code. */
721 table->private = bootstrap; 736 table->private = bootstrap;
722 rwlock_init(&table->lock); 737 mutex_init(&table->lock);
738
723 if (!xt_replace_table(table, 0, newinfo, &ret)) 739 if (!xt_replace_table(table, 0, newinfo, &ret))
724 goto unlock; 740 goto unlock;
725 741
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
new file mode 100644
index 000000000000..10e789e2d12a
--- /dev/null
+++ b/net/netfilter/xt_HL.c
@@ -0,0 +1,171 @@
1/*
2 * TTL modification target for IP tables
3 * (C) 2000,2005 by Harald Welte <laforge@netfilter.org>
4 *
5 * Hop Limit modification target for ip6tables
6 * Maciej Soltysiak <solt@dns.toxicfilms.tv>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/ip.h>
16#include <linux/ipv6.h>
17#include <net/checksum.h>
18
19#include <linux/netfilter/x_tables.h>
20#include <linux/netfilter_ipv4/ipt_TTL.h>
21#include <linux/netfilter_ipv6/ip6t_HL.h>
22
23MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
24MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
25MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target");
26MODULE_LICENSE("GPL");
27
28static unsigned int
29ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
30{
31 struct iphdr *iph;
32 const struct ipt_TTL_info *info = par->targinfo;
33 int new_ttl;
34
35 if (!skb_make_writable(skb, skb->len))
36 return NF_DROP;
37
38 iph = ip_hdr(skb);
39
40 switch (info->mode) {
41 case IPT_TTL_SET:
42 new_ttl = info->ttl;
43 break;
44 case IPT_TTL_INC:
45 new_ttl = iph->ttl + info->ttl;
46 if (new_ttl > 255)
47 new_ttl = 255;
48 break;
49 case IPT_TTL_DEC:
50 new_ttl = iph->ttl - info->ttl;
51 if (new_ttl < 0)
52 new_ttl = 0;
53 break;
54 default:
55 new_ttl = iph->ttl;
56 break;
57 }
58
59 if (new_ttl != iph->ttl) {
60 csum_replace2(&iph->check, htons(iph->ttl << 8),
61 htons(new_ttl << 8));
62 iph->ttl = new_ttl;
63 }
64
65 return XT_CONTINUE;
66}
67
68static unsigned int
69hl_tg6(struct sk_buff *skb, const struct xt_target_param *par)
70{
71 struct ipv6hdr *ip6h;
72 const struct ip6t_HL_info *info = par->targinfo;
73 int new_hl;
74
75 if (!skb_make_writable(skb, skb->len))
76 return NF_DROP;
77
78 ip6h = ipv6_hdr(skb);
79
80 switch (info->mode) {
81 case IP6T_HL_SET:
82 new_hl = info->hop_limit;
83 break;
84 case IP6T_HL_INC:
85 new_hl = ip6h->hop_limit + info->hop_limit;
86 if (new_hl > 255)
87 new_hl = 255;
88 break;
89 case IP6T_HL_DEC:
90 new_hl = ip6h->hop_limit - info->hop_limit;
91 if (new_hl < 0)
92 new_hl = 0;
93 break;
94 default:
95 new_hl = ip6h->hop_limit;
96 break;
97 }
98
99 ip6h->hop_limit = new_hl;
100
101 return XT_CONTINUE;
102}
103
104static bool ttl_tg_check(const struct xt_tgchk_param *par)
105{
106 const struct ipt_TTL_info *info = par->targinfo;
107
108 if (info->mode > IPT_TTL_MAXMODE) {
109 printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n",
110 info->mode);
111 return false;
112 }
113 if (info->mode != IPT_TTL_SET && info->ttl == 0)
114 return false;
115 return true;
116}
117
118static bool hl_tg6_check(const struct xt_tgchk_param *par)
119{
120 const struct ip6t_HL_info *info = par->targinfo;
121
122 if (info->mode > IP6T_HL_MAXMODE) {
123 printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n",
124 info->mode);
125 return false;
126 }
127 if (info->mode != IP6T_HL_SET && info->hop_limit == 0) {
128 printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't "
129 "make sense with value 0\n");
130 return false;
131 }
132 return true;
133}
134
135static struct xt_target hl_tg_reg[] __read_mostly = {
136 {
137 .name = "TTL",
138 .revision = 0,
139 .family = NFPROTO_IPV4,
140 .target = ttl_tg,
141 .targetsize = sizeof(struct ipt_TTL_info),
142 .table = "mangle",
143 .checkentry = ttl_tg_check,
144 .me = THIS_MODULE,
145 },
146 {
147 .name = "HL",
148 .revision = 0,
149 .family = NFPROTO_IPV6,
150 .target = hl_tg6,
151 .targetsize = sizeof(struct ip6t_HL_info),
152 .table = "mangle",
153 .checkentry = hl_tg6_check,
154 .me = THIS_MODULE,
155 },
156};
157
158static int __init hl_tg_init(void)
159{
160 return xt_register_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg));
161}
162
163static void __exit hl_tg_exit(void)
164{
165 xt_unregister_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg));
166}
167
168module_init(hl_tg_init);
169module_exit(hl_tg_exit);
170MODULE_ALIAS("ipt_TTL");
171MODULE_ALIAS("ip6t_HL");
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
new file mode 100644
index 000000000000..8ff7843bb921
--- /dev/null
+++ b/net/netfilter/xt_LED.c
@@ -0,0 +1,161 @@
1/*
2 * xt_LED.c - netfilter target to make LEDs blink upon packet matches
3 *
4 * Copyright (C) 2008 Adam Nielsen <a.nielsen@shikadi.net>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; version 2 of the License.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 * 02110-1301 USA.
19 *
20 */
21
22#include <linux/module.h>
23#include <linux/skbuff.h>
24#include <linux/netfilter/x_tables.h>
25#include <linux/leds.h>
26#include <linux/mutex.h>
27
28#include <linux/netfilter/xt_LED.h>
29
30MODULE_LICENSE("GPL");
31MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
32MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
33
34/*
35 * This is declared in here (the kernel module) only, to avoid having these
36 * dependencies in userspace code. This is what xt_led_info.internal_data
37 * points to.
38 */
39struct xt_led_info_internal {
40 struct led_trigger netfilter_led_trigger;
41 struct timer_list timer;
42};
43
44static unsigned int
45led_tg(struct sk_buff *skb, const struct xt_target_param *par)
46{
47 const struct xt_led_info *ledinfo = par->targinfo;
48 struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
49
50 /*
51 * If "always blink" is enabled, and there's still some time until the
52 * LED will switch off, briefly switch it off now.
53 */
54 if ((ledinfo->delay > 0) && ledinfo->always_blink &&
55 timer_pending(&ledinternal->timer))
56 led_trigger_event(&ledinternal->netfilter_led_trigger,LED_OFF);
57
58 led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL);
59
60 /* If there's a positive delay, start/update the timer */
61 if (ledinfo->delay > 0) {
62 mod_timer(&ledinternal->timer,
63 jiffies + msecs_to_jiffies(ledinfo->delay));
64
65 /* Otherwise if there was no delay given, blink as fast as possible */
66 } else if (ledinfo->delay == 0) {
67 led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
68 }
69
70 /* else the delay is negative, which means switch on and stay on */
71
72 return XT_CONTINUE;
73}
74
75static void led_timeout_callback(unsigned long data)
76{
77 struct xt_led_info *ledinfo = (struct xt_led_info *)data;
78 struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
79
80 led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
81}
82
83static bool led_tg_check(const struct xt_tgchk_param *par)
84{
85 struct xt_led_info *ledinfo = par->targinfo;
86 struct xt_led_info_internal *ledinternal;
87 int err;
88
89 if (ledinfo->id[0] == '\0') {
90 printk(KERN_ERR KBUILD_MODNAME ": No 'id' parameter given.\n");
91 return false;
92 }
93
94 ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL);
95 if (!ledinternal) {
96 printk(KERN_CRIT KBUILD_MODNAME ": out of memory\n");
97 return false;
98 }
99
100 ledinternal->netfilter_led_trigger.name = ledinfo->id;
101
102 err = led_trigger_register(&ledinternal->netfilter_led_trigger);
103 if (err) {
104 printk(KERN_CRIT KBUILD_MODNAME
105 ": led_trigger_register() failed\n");
106 if (err == -EEXIST)
107 printk(KERN_ERR KBUILD_MODNAME
108 ": Trigger name is already in use.\n");
109 goto exit_alloc;
110 }
111
112 /* See if we need to set up a timer */
113 if (ledinfo->delay > 0)
114 setup_timer(&ledinternal->timer, led_timeout_callback,
115 (unsigned long)ledinfo);
116
117 ledinfo->internal_data = ledinternal;
118
119 return true;
120
121exit_alloc:
122 kfree(ledinternal);
123
124 return false;
125}
126
127static void led_tg_destroy(const struct xt_tgdtor_param *par)
128{
129 const struct xt_led_info *ledinfo = par->targinfo;
130 struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
131
132 if (ledinfo->delay > 0)
133 del_timer_sync(&ledinternal->timer);
134
135 led_trigger_unregister(&ledinternal->netfilter_led_trigger);
136 kfree(ledinternal);
137}
138
139static struct xt_target led_tg_reg __read_mostly = {
140 .name = "LED",
141 .revision = 0,
142 .family = NFPROTO_UNSPEC,
143 .target = led_tg,
144 .targetsize = XT_ALIGN(sizeof(struct xt_led_info)),
145 .checkentry = led_tg_check,
146 .destroy = led_tg_destroy,
147 .me = THIS_MODULE,
148};
149
150static int __init led_tg_init(void)
151{
152 return xt_register_target(&led_tg_reg);
153}
154
155static void __exit led_tg_exit(void)
156{
157 xt_unregister_target(&led_tg_reg);
158}
159
160module_init(led_tg_init);
161module_exit(led_tg_exit);
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
new file mode 100644
index 000000000000..ad5bd890e4e8
--- /dev/null
+++ b/net/netfilter/xt_cluster.c
@@ -0,0 +1,164 @@
1/*
2 * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8#include <linux/module.h>
9#include <linux/skbuff.h>
10#include <linux/jhash.h>
11#include <linux/ip.h>
12#include <net/ipv6.h>
13
14#include <linux/netfilter/x_tables.h>
15#include <net/netfilter/nf_conntrack.h>
16#include <linux/netfilter/xt_cluster.h>
17
18static inline u_int32_t nf_ct_orig_ipv4_src(const struct nf_conn *ct)
19{
20 return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip;
21}
22
23static inline const void *nf_ct_orig_ipv6_src(const struct nf_conn *ct)
24{
25 return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6;
26}
27
28static inline u_int32_t
29xt_cluster_hash_ipv4(u_int32_t ip, const struct xt_cluster_match_info *info)
30{
31 return jhash_1word(ip, info->hash_seed);
32}
33
34static inline u_int32_t
35xt_cluster_hash_ipv6(const void *ip, const struct xt_cluster_match_info *info)
36{
37 return jhash2(ip, NF_CT_TUPLE_L3SIZE / sizeof(__u32), info->hash_seed);
38}
39
40static inline u_int32_t
41xt_cluster_hash(const struct nf_conn *ct,
42 const struct xt_cluster_match_info *info)
43{
44 u_int32_t hash = 0;
45
46 switch(nf_ct_l3num(ct)) {
47 case AF_INET:
48 hash = xt_cluster_hash_ipv4(nf_ct_orig_ipv4_src(ct), info);
49 break;
50 case AF_INET6:
51 hash = xt_cluster_hash_ipv6(nf_ct_orig_ipv6_src(ct), info);
52 break;
53 default:
54 WARN_ON(1);
55 break;
56 }
57 return (((u64)hash * info->total_nodes) >> 32);
58}
59
60static inline bool
61xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
62{
63 bool is_multicast = false;
64
65 switch(family) {
66 case NFPROTO_IPV4:
67 is_multicast = ipv4_is_multicast(ip_hdr(skb)->daddr);
68 break;
69 case NFPROTO_IPV6:
70 is_multicast = ipv6_addr_type(&ipv6_hdr(skb)->daddr) &
71 IPV6_ADDR_MULTICAST;
72 break;
73 default:
74 WARN_ON(1);
75 break;
76 }
77 return is_multicast;
78}
79
80static bool
81xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par)
82{
83 struct sk_buff *pskb = (struct sk_buff *)skb;
84 const struct xt_cluster_match_info *info = par->matchinfo;
85 const struct nf_conn *ct;
86 enum ip_conntrack_info ctinfo;
87 unsigned long hash;
88
89 /* This match assumes that all nodes see the same packets. This can be
90 * achieved if the switch that connects the cluster nodes support some
91 * sort of 'port mirroring'. However, if your switch does not support
92 * this, your cluster nodes can reply ARP request using a multicast MAC
93 * address. Thus, your switch will flood the same packets to the
94 * cluster nodes with the same multicast MAC address. Using a multicast
95 * link address is a RFC 1812 (section 3.3.2) violation, but this works
96 * fine in practise.
97 *
98 * Unfortunately, if you use the multicast MAC address, the link layer
99 * sets skbuff's pkt_type to PACKET_MULTICAST, which is not accepted
100 * by TCP and others for packets coming to this node. For that reason,
101 * this match mangles skbuff's pkt_type if it detects a packet
102 * addressed to a unicast address but using PACKET_MULTICAST. Yes, I
103 * know, matches should not alter packets, but we are doing this here
104 * because we would need to add a PKTTYPE target for this sole purpose.
105 */
106 if (!xt_cluster_is_multicast_addr(skb, par->family) &&
107 skb->pkt_type == PACKET_MULTICAST) {
108 pskb->pkt_type = PACKET_HOST;
109 }
110
111 ct = nf_ct_get(skb, &ctinfo);
112 if (ct == NULL)
113 return false;
114
115 if (ct == &nf_conntrack_untracked)
116 return false;
117
118 if (ct->master)
119 hash = xt_cluster_hash(ct->master, info);
120 else
121 hash = xt_cluster_hash(ct, info);
122
123 return !!((1 << hash) & info->node_mask) ^
124 !!(info->flags & XT_CLUSTER_F_INV);
125}
126
127static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
128{
129 struct xt_cluster_match_info *info = par->matchinfo;
130
131 if (info->node_mask >= (1 << info->total_nodes)) {
132 printk(KERN_ERR "xt_cluster: this node mask cannot be "
133 "higher than the total number of nodes\n");
134 return false;
135 }
136 return true;
137}
138
139static struct xt_match xt_cluster_match __read_mostly = {
140 .name = "cluster",
141 .family = NFPROTO_UNSPEC,
142 .match = xt_cluster_mt,
143 .checkentry = xt_cluster_mt_checkentry,
144 .matchsize = sizeof(struct xt_cluster_match_info),
145 .me = THIS_MODULE,
146};
147
148static int __init xt_cluster_mt_init(void)
149{
150 return xt_register_match(&xt_cluster_match);
151}
152
153static void __exit xt_cluster_mt_fini(void)
154{
155 xt_unregister_match(&xt_cluster_match);
156}
157
158MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
159MODULE_LICENSE("GPL");
160MODULE_DESCRIPTION("Xtables: hash-based cluster match");
161MODULE_ALIAS("ipt_cluster");
162MODULE_ALIAS("ip6t_cluster");
163module_init(xt_cluster_mt_init);
164module_exit(xt_cluster_mt_fini);
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 7f404cc64c83..680980954395 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -108,7 +108,7 @@ static int count_them(struct xt_connlimit_data *data,
108 const struct nf_conntrack_tuple_hash *found; 108 const struct nf_conntrack_tuple_hash *found;
109 struct xt_connlimit_conn *conn; 109 struct xt_connlimit_conn *conn;
110 struct xt_connlimit_conn *tmp; 110 struct xt_connlimit_conn *tmp;
111 const struct nf_conn *found_ct; 111 struct nf_conn *found_ct;
112 struct list_head *hash; 112 struct list_head *hash;
113 bool addit = true; 113 bool addit = true;
114 int matches = 0; 114 int matches = 0;
@@ -123,7 +123,7 @@ static int count_them(struct xt_connlimit_data *data,
123 123
124 /* check the saved connections */ 124 /* check the saved connections */
125 list_for_each_entry_safe(conn, tmp, hash, list) { 125 list_for_each_entry_safe(conn, tmp, hash, list) {
126 found = __nf_conntrack_find(&init_net, &conn->tuple); 126 found = nf_conntrack_find_get(&init_net, &conn->tuple);
127 found_ct = NULL; 127 found_ct = NULL;
128 128
129 if (found != NULL) 129 if (found != NULL)
@@ -151,6 +151,7 @@ static int count_them(struct xt_connlimit_data *data,
151 * we do not care about connections which are 151 * we do not care about connections which are
152 * closed already -> ditch it 152 * closed already -> ditch it
153 */ 153 */
154 nf_ct_put(found_ct);
154 list_del(&conn->list); 155 list_del(&conn->list);
155 kfree(conn); 156 kfree(conn);
156 continue; 157 continue;
@@ -160,6 +161,7 @@ static int count_them(struct xt_connlimit_data *data,
160 match->family)) 161 match->family))
161 /* same source network -> be counted! */ 162 /* same source network -> be counted! */
162 ++matches; 163 ++matches;
164 nf_ct_put(found_ct);
163 } 165 }
164 166
165 rcu_read_unlock(); 167 rcu_read_unlock();
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index f97fded024c4..a5b5369c30f9 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -149,7 +149,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
149 /* initialize hash with random val at the time we allocate 149 /* initialize hash with random val at the time we allocate
150 * the first hashtable entry */ 150 * the first hashtable entry */
151 if (!ht->rnd_initialized) { 151 if (!ht->rnd_initialized) {
152 get_random_bytes(&ht->rnd, 4); 152 get_random_bytes(&ht->rnd, sizeof(ht->rnd));
153 ht->rnd_initialized = 1; 153 ht->rnd_initialized = 1;
154 } 154 }
155 155
@@ -565,8 +565,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
565static bool 565static bool
566hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) 566hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
567{ 567{
568 const struct xt_hashlimit_info *r = 568 const struct xt_hashlimit_info *r = par->matchinfo;
569 ((const struct xt_hashlimit_info *)par->matchinfo)->u.master;
570 struct xt_hashlimit_htable *hinfo = r->hinfo; 569 struct xt_hashlimit_htable *hinfo = r->hinfo;
571 unsigned long now = jiffies; 570 unsigned long now = jiffies;
572 struct dsthash_ent *dh; 571 struct dsthash_ent *dh;
@@ -702,8 +701,6 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
702 } 701 }
703 mutex_unlock(&hlimit_mutex); 702 mutex_unlock(&hlimit_mutex);
704 703
705 /* Ugly hack: For SMP, we only want to use one set */
706 r->u.master = r;
707 return true; 704 return true;
708} 705}
709 706
diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c
new file mode 100644
index 000000000000..7726154c87b2
--- /dev/null
+++ b/net/netfilter/xt_hl.c
@@ -0,0 +1,108 @@
1/*
2 * IP tables module for matching the value of the TTL
3 * (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
4 *
5 * Hop Limit matching module
6 * (C) 2001-2002 Maciej Soltysiak <solt@dns.toxicfilms.tv>
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/ip.h>
14#include <linux/ipv6.h>
15#include <linux/module.h>
16#include <linux/skbuff.h>
17
18#include <linux/netfilter/x_tables.h>
19#include <linux/netfilter_ipv4/ipt_ttl.h>
20#include <linux/netfilter_ipv6/ip6t_hl.h>
21
22MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>");
23MODULE_DESCRIPTION("Xtables: Hoplimit/TTL field match");
24MODULE_LICENSE("GPL");
25MODULE_ALIAS("ipt_ttl");
26MODULE_ALIAS("ip6t_hl");
27
28static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
29{
30 const struct ipt_ttl_info *info = par->matchinfo;
31 const u8 ttl = ip_hdr(skb)->ttl;
32
33 switch (info->mode) {
34 case IPT_TTL_EQ:
35 return ttl == info->ttl;
36 case IPT_TTL_NE:
37 return ttl != info->ttl;
38 case IPT_TTL_LT:
39 return ttl < info->ttl;
40 case IPT_TTL_GT:
41 return ttl > info->ttl;
42 default:
43 printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
44 info->mode);
45 return false;
46 }
47
48 return false;
49}
50
51static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
52{
53 const struct ip6t_hl_info *info = par->matchinfo;
54 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
55
56 switch (info->mode) {
57 case IP6T_HL_EQ:
58 return ip6h->hop_limit == info->hop_limit;
59 break;
60 case IP6T_HL_NE:
61 return ip6h->hop_limit != info->hop_limit;
62 break;
63 case IP6T_HL_LT:
64 return ip6h->hop_limit < info->hop_limit;
65 break;
66 case IP6T_HL_GT:
67 return ip6h->hop_limit > info->hop_limit;
68 break;
69 default:
70 printk(KERN_WARNING "ip6t_hl: unknown mode %d\n",
71 info->mode);
72 return false;
73 }
74
75 return false;
76}
77
78static struct xt_match hl_mt_reg[] __read_mostly = {
79 {
80 .name = "ttl",
81 .revision = 0,
82 .family = NFPROTO_IPV4,
83 .match = ttl_mt,
84 .matchsize = sizeof(struct ipt_ttl_info),
85 .me = THIS_MODULE,
86 },
87 {
88 .name = "hl",
89 .revision = 0,
90 .family = NFPROTO_IPV6,
91 .match = hl_mt6,
92 .matchsize = sizeof(struct ip6t_hl_info),
93 .me = THIS_MODULE,
94 },
95};
96
97static int __init hl_mt_init(void)
98{
99 return xt_register_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg));
100}
101
102static void __exit hl_mt_exit(void)
103{
104 xt_unregister_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg));
105}
106
107module_init(hl_mt_init);
108module_exit(hl_mt_exit);
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index c908d69a5595..2e8089ecd0af 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -14,6 +14,11 @@
14#include <linux/netfilter/x_tables.h> 14#include <linux/netfilter/x_tables.h>
15#include <linux/netfilter/xt_limit.h> 15#include <linux/netfilter/xt_limit.h>
16 16
17struct xt_limit_priv {
18 unsigned long prev;
19 uint32_t credit;
20};
21
17MODULE_LICENSE("GPL"); 22MODULE_LICENSE("GPL");
18MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>"); 23MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>");
19MODULE_DESCRIPTION("Xtables: rate-limit match"); 24MODULE_DESCRIPTION("Xtables: rate-limit match");
@@ -60,18 +65,18 @@ static DEFINE_SPINLOCK(limit_lock);
60static bool 65static bool
61limit_mt(const struct sk_buff *skb, const struct xt_match_param *par) 66limit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
62{ 67{
63 struct xt_rateinfo *r = 68 const struct xt_rateinfo *r = par->matchinfo;
64 ((const struct xt_rateinfo *)par->matchinfo)->master; 69 struct xt_limit_priv *priv = r->master;
65 unsigned long now = jiffies; 70 unsigned long now = jiffies;
66 71
67 spin_lock_bh(&limit_lock); 72 spin_lock_bh(&limit_lock);
68 r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY; 73 priv->credit += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY;
69 if (r->credit > r->credit_cap) 74 if (priv->credit > r->credit_cap)
70 r->credit = r->credit_cap; 75 priv->credit = r->credit_cap;
71 76
72 if (r->credit >= r->cost) { 77 if (priv->credit >= r->cost) {
73 /* We're not limited. */ 78 /* We're not limited. */
74 r->credit -= r->cost; 79 priv->credit -= r->cost;
75 spin_unlock_bh(&limit_lock); 80 spin_unlock_bh(&limit_lock);
76 return true; 81 return true;
77 } 82 }
@@ -95,6 +100,7 @@ user2credits(u_int32_t user)
95static bool limit_mt_check(const struct xt_mtchk_param *par) 100static bool limit_mt_check(const struct xt_mtchk_param *par)
96{ 101{
97 struct xt_rateinfo *r = par->matchinfo; 102 struct xt_rateinfo *r = par->matchinfo;
103 struct xt_limit_priv *priv;
98 104
99 /* Check for overflow. */ 105 /* Check for overflow. */
100 if (r->burst == 0 106 if (r->burst == 0
@@ -104,19 +110,30 @@ static bool limit_mt_check(const struct xt_mtchk_param *par)
104 return false; 110 return false;
105 } 111 }
106 112
107 /* For SMP, we only want to use one set of counters. */ 113 priv = kmalloc(sizeof(*priv), GFP_KERNEL);
108 r->master = r; 114 if (priv == NULL)
115 return -ENOMEM;
116
117 /* For SMP, we only want to use one set of state. */
118 r->master = priv;
109 if (r->cost == 0) { 119 if (r->cost == 0) {
110 /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * 120 /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies *
111 128. */ 121 128. */
112 r->prev = jiffies; 122 priv->prev = jiffies;
113 r->credit = user2credits(r->avg * r->burst); /* Credits full. */ 123 priv->credit = user2credits(r->avg * r->burst); /* Credits full. */
114 r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ 124 r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
115 r->cost = user2credits(r->avg); 125 r->cost = user2credits(r->avg);
116 } 126 }
117 return true; 127 return true;
118} 128}
119 129
130static void limit_mt_destroy(const struct xt_mtdtor_param *par)
131{
132 const struct xt_rateinfo *info = par->matchinfo;
133
134 kfree(info->master);
135}
136
120#ifdef CONFIG_COMPAT 137#ifdef CONFIG_COMPAT
121struct compat_xt_rateinfo { 138struct compat_xt_rateinfo {
122 u_int32_t avg; 139 u_int32_t avg;
@@ -167,6 +184,7 @@ static struct xt_match limit_mt_reg __read_mostly = {
167 .family = NFPROTO_UNSPEC, 184 .family = NFPROTO_UNSPEC,
168 .match = limit_mt, 185 .match = limit_mt,
169 .checkentry = limit_mt_check, 186 .checkentry = limit_mt_check,
187 .destroy = limit_mt_destroy,
170 .matchsize = sizeof(struct xt_rateinfo), 188 .matchsize = sizeof(struct xt_rateinfo),
171#ifdef CONFIG_COMPAT 189#ifdef CONFIG_COMPAT
172 .compatsize = sizeof(struct compat_xt_rateinfo), 190 .compatsize = sizeof(struct compat_xt_rateinfo),
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 1bcdfc12cf59..8d28ca5848bc 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -20,13 +20,13 @@ MODULE_DESCRIPTION("Xtables: Bridge physical device match");
20MODULE_ALIAS("ipt_physdev"); 20MODULE_ALIAS("ipt_physdev");
21MODULE_ALIAS("ip6t_physdev"); 21MODULE_ALIAS("ip6t_physdev");
22 22
23
23static bool 24static bool
24physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) 25physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par)
25{ 26{
26 int i; 27 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
27 static const char nulldevname[IFNAMSIZ];
28 const struct xt_physdev_info *info = par->matchinfo; 28 const struct xt_physdev_info *info = par->matchinfo;
29 bool ret; 29 unsigned long ret;
30 const char *indev, *outdev; 30 const char *indev, *outdev;
31 const struct nf_bridge_info *nf_bridge; 31 const struct nf_bridge_info *nf_bridge;
32 32
@@ -68,11 +68,7 @@ physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par)
68 if (!(info->bitmask & XT_PHYSDEV_OP_IN)) 68 if (!(info->bitmask & XT_PHYSDEV_OP_IN))
69 goto match_outdev; 69 goto match_outdev;
70 indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname; 70 indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
71 for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) { 71 ret = ifname_compare_aligned(indev, info->physindev, info->in_mask);
72 ret |= (((const unsigned int *)indev)[i]
73 ^ ((const unsigned int *)info->physindev)[i])
74 & ((const unsigned int *)info->in_mask)[i];
75 }
76 72
77 if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) 73 if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
78 return false; 74 return false;
@@ -82,13 +78,9 @@ match_outdev:
82 return true; 78 return true;
83 outdev = nf_bridge->physoutdev ? 79 outdev = nf_bridge->physoutdev ?
84 nf_bridge->physoutdev->name : nulldevname; 80 nf_bridge->physoutdev->name : nulldevname;
85 for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) { 81 ret = ifname_compare_aligned(outdev, info->physoutdev, info->out_mask);
86 ret |= (((const unsigned int *)outdev)[i]
87 ^ ((const unsigned int *)info->physoutdev)[i])
88 & ((const unsigned int *)info->out_mask)[i];
89 }
90 82
91 return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT); 83 return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
92} 84}
93 85
94static bool physdev_mt_check(const struct xt_mtchk_param *par) 86static bool physdev_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index c84fce5e0f3e..01dd07b764ec 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -9,6 +9,10 @@
9#include <linux/netfilter/x_tables.h> 9#include <linux/netfilter/x_tables.h>
10#include <linux/netfilter/xt_quota.h> 10#include <linux/netfilter/xt_quota.h>
11 11
12struct xt_quota_priv {
13 uint64_t quota;
14};
15
12MODULE_LICENSE("GPL"); 16MODULE_LICENSE("GPL");
13MODULE_AUTHOR("Sam Johnston <samj@samj.net>"); 17MODULE_AUTHOR("Sam Johnston <samj@samj.net>");
14MODULE_DESCRIPTION("Xtables: countdown quota match"); 18MODULE_DESCRIPTION("Xtables: countdown quota match");
@@ -20,18 +24,20 @@ static DEFINE_SPINLOCK(quota_lock);
20static bool 24static bool
21quota_mt(const struct sk_buff *skb, const struct xt_match_param *par) 25quota_mt(const struct sk_buff *skb, const struct xt_match_param *par)
22{ 26{
23 struct xt_quota_info *q = 27 struct xt_quota_info *q = (void *)par->matchinfo;
24 ((const struct xt_quota_info *)par->matchinfo)->master; 28 struct xt_quota_priv *priv = q->master;
25 bool ret = q->flags & XT_QUOTA_INVERT; 29 bool ret = q->flags & XT_QUOTA_INVERT;
26 30
27 spin_lock_bh(&quota_lock); 31 spin_lock_bh(&quota_lock);
28 if (q->quota >= skb->len) { 32 if (priv->quota >= skb->len) {
29 q->quota -= skb->len; 33 priv->quota -= skb->len;
30 ret = !ret; 34 ret = !ret;
31 } else { 35 } else {
32 /* we do not allow even small packets from now on */ 36 /* we do not allow even small packets from now on */
33 q->quota = 0; 37 priv->quota = 0;
34 } 38 }
39 /* Copy quota back to matchinfo so that iptables can display it */
40 q->quota = priv->quota;
35 spin_unlock_bh(&quota_lock); 41 spin_unlock_bh(&quota_lock);
36 42
37 return ret; 43 return ret;
@@ -43,17 +49,28 @@ static bool quota_mt_check(const struct xt_mtchk_param *par)
43 49
44 if (q->flags & ~XT_QUOTA_MASK) 50 if (q->flags & ~XT_QUOTA_MASK)
45 return false; 51 return false;
46 /* For SMP, we only want to use one set of counters. */ 52
47 q->master = q; 53 q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
54 if (q->master == NULL)
55 return -ENOMEM;
56
48 return true; 57 return true;
49} 58}
50 59
60static void quota_mt_destroy(const struct xt_mtdtor_param *par)
61{
62 const struct xt_quota_info *q = par->matchinfo;
63
64 kfree(q->master);
65}
66
51static struct xt_match quota_mt_reg __read_mostly = { 67static struct xt_match quota_mt_reg __read_mostly = {
52 .name = "quota", 68 .name = "quota",
53 .revision = 0, 69 .revision = 0,
54 .family = NFPROTO_UNSPEC, 70 .family = NFPROTO_UNSPEC,
55 .match = quota_mt, 71 .match = quota_mt,
56 .checkentry = quota_mt_check, 72 .checkentry = quota_mt_check,
73 .destroy = quota_mt_destroy,
57 .matchsize = sizeof(struct xt_quota_info), 74 .matchsize = sizeof(struct xt_quota_info),
58 .me = THIS_MODULE, 75 .me = THIS_MODULE,
59}; 76};
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 0d75141139d5..d8c0f8f1a78e 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -16,6 +16,10 @@
16#include <linux/netfilter/xt_statistic.h> 16#include <linux/netfilter/xt_statistic.h>
17#include <linux/netfilter/x_tables.h> 17#include <linux/netfilter/x_tables.h>
18 18
19struct xt_statistic_priv {
20 uint32_t count;
21};
22
19MODULE_LICENSE("GPL"); 23MODULE_LICENSE("GPL");
20MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); 24MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
21MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)"); 25MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)");
@@ -27,7 +31,7 @@ static DEFINE_SPINLOCK(nth_lock);
27static bool 31static bool
28statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par) 32statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
29{ 33{
30 struct xt_statistic_info *info = (void *)par->matchinfo; 34 const struct xt_statistic_info *info = par->matchinfo;
31 bool ret = info->flags & XT_STATISTIC_INVERT; 35 bool ret = info->flags & XT_STATISTIC_INVERT;
32 36
33 switch (info->mode) { 37 switch (info->mode) {
@@ -36,10 +40,9 @@ statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
36 ret = !ret; 40 ret = !ret;
37 break; 41 break;
38 case XT_STATISTIC_MODE_NTH: 42 case XT_STATISTIC_MODE_NTH:
39 info = info->master;
40 spin_lock_bh(&nth_lock); 43 spin_lock_bh(&nth_lock);
41 if (info->u.nth.count++ == info->u.nth.every) { 44 if (info->master->count++ == info->u.nth.every) {
42 info->u.nth.count = 0; 45 info->master->count = 0;
43 ret = !ret; 46 ret = !ret;
44 } 47 }
45 spin_unlock_bh(&nth_lock); 48 spin_unlock_bh(&nth_lock);
@@ -56,16 +59,31 @@ static bool statistic_mt_check(const struct xt_mtchk_param *par)
56 if (info->mode > XT_STATISTIC_MODE_MAX || 59 if (info->mode > XT_STATISTIC_MODE_MAX ||
57 info->flags & ~XT_STATISTIC_MASK) 60 info->flags & ~XT_STATISTIC_MASK)
58 return false; 61 return false;
59 info->master = info; 62
63 info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
64 if (info->master == NULL) {
65 printk(KERN_ERR KBUILD_MODNAME ": Out of memory\n");
66 return false;
67 }
68 info->master->count = info->u.nth.count;
69
60 return true; 70 return true;
61} 71}
62 72
73static void statistic_mt_destroy(const struct xt_mtdtor_param *par)
74{
75 const struct xt_statistic_info *info = par->matchinfo;
76
77 kfree(info->master);
78}
79
63static struct xt_match xt_statistic_mt_reg __read_mostly = { 80static struct xt_match xt_statistic_mt_reg __read_mostly = {
64 .name = "statistic", 81 .name = "statistic",
65 .revision = 0, 82 .revision = 0,
66 .family = NFPROTO_UNSPEC, 83 .family = NFPROTO_UNSPEC,
67 .match = statistic_mt, 84 .match = statistic_mt,
68 .checkentry = statistic_mt_check, 85 .checkentry = statistic_mt_check,
86 .destroy = statistic_mt_destroy,
69 .matchsize = sizeof(struct xt_statistic_info), 87 .matchsize = sizeof(struct xt_statistic_info),
70 .me = THIS_MODULE, 88 .me = THIS_MODULE,
71}; 89};
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index fd9229db075c..b0e582f2d37a 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -619,8 +619,9 @@ int netlbl_enabled(void)
619} 619}
620 620
621/** 621/**
622 * netlbl_socket_setattr - Label a socket using the correct protocol 622 * netlbl_sock_setattr - Label a socket using the correct protocol
623 * @sk: the socket to label 623 * @sk: the socket to label
624 * @family: protocol family
624 * @secattr: the security attributes 625 * @secattr: the security attributes
625 * 626 *
626 * Description: 627 * Description:
@@ -633,29 +634,45 @@ int netlbl_enabled(void)
633 * 634 *
634 */ 635 */
635int netlbl_sock_setattr(struct sock *sk, 636int netlbl_sock_setattr(struct sock *sk,
637 u16 family,
636 const struct netlbl_lsm_secattr *secattr) 638 const struct netlbl_lsm_secattr *secattr)
637{ 639{
638 int ret_val = -ENOENT; 640 int ret_val;
639 struct netlbl_dom_map *dom_entry; 641 struct netlbl_dom_map *dom_entry;
640 642
641 rcu_read_lock(); 643 rcu_read_lock();
642 dom_entry = netlbl_domhsh_getentry(secattr->domain); 644 dom_entry = netlbl_domhsh_getentry(secattr->domain);
643 if (dom_entry == NULL) 645 if (dom_entry == NULL) {
646 ret_val = -ENOENT;
644 goto socket_setattr_return; 647 goto socket_setattr_return;
645 switch (dom_entry->type) { 648 }
646 case NETLBL_NLTYPE_ADDRSELECT: 649 switch (family) {
647 ret_val = -EDESTADDRREQ; 650 case AF_INET:
648 break; 651 switch (dom_entry->type) {
649 case NETLBL_NLTYPE_CIPSOV4: 652 case NETLBL_NLTYPE_ADDRSELECT:
650 ret_val = cipso_v4_sock_setattr(sk, 653 ret_val = -EDESTADDRREQ;
651 dom_entry->type_def.cipsov4, 654 break;
652 secattr); 655 case NETLBL_NLTYPE_CIPSOV4:
656 ret_val = cipso_v4_sock_setattr(sk,
657 dom_entry->type_def.cipsov4,
658 secattr);
659 break;
660 case NETLBL_NLTYPE_UNLABELED:
661 ret_val = 0;
662 break;
663 default:
664 ret_val = -ENOENT;
665 }
653 break; 666 break;
654 case NETLBL_NLTYPE_UNLABELED: 667#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
668 case AF_INET6:
669 /* since we don't support any IPv6 labeling protocols right
670 * now we can optimize everything away until we do */
655 ret_val = 0; 671 ret_val = 0;
656 break; 672 break;
673#endif /* IPv6 */
657 default: 674 default:
658 ret_val = -ENOENT; 675 ret_val = -EPROTONOSUPPORT;
659 } 676 }
660 677
661socket_setattr_return: 678socket_setattr_return:
@@ -689,9 +706,25 @@ void netlbl_sock_delattr(struct sock *sk)
689 * on failure. 706 * on failure.
690 * 707 *
691 */ 708 */
692int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) 709int netlbl_sock_getattr(struct sock *sk,
710 struct netlbl_lsm_secattr *secattr)
693{ 711{
694 return cipso_v4_sock_getattr(sk, secattr); 712 int ret_val;
713
714 switch (sk->sk_family) {
715 case AF_INET:
716 ret_val = cipso_v4_sock_getattr(sk, secattr);
717 break;
718#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
719 case AF_INET6:
720 ret_val = -ENOMSG;
721 break;
722#endif /* IPv6 */
723 default:
724 ret_val = -EPROTONOSUPPORT;
725 }
726
727 return ret_val;
695} 728}
696 729
697/** 730/**
@@ -748,7 +781,7 @@ int netlbl_conn_setattr(struct sock *sk,
748 break; 781 break;
749#endif /* IPv6 */ 782#endif /* IPv6 */
750 default: 783 default:
751 ret_val = 0; 784 ret_val = -EPROTONOSUPPORT;
752 } 785 }
753 786
754conn_setattr_return: 787conn_setattr_return:
@@ -757,6 +790,90 @@ conn_setattr_return:
757} 790}
758 791
759/** 792/**
793 * netlbl_req_setattr - Label a request socket using the correct protocol
794 * @req: the request socket to label
795 * @secattr: the security attributes
796 *
797 * Description:
798 * Attach the correct label to the given socket using the security attributes
799 * specified in @secattr. Returns zero on success, negative values on failure.
800 *
801 */
802int netlbl_req_setattr(struct request_sock *req,
803 const struct netlbl_lsm_secattr *secattr)
804{
805 int ret_val;
806 struct netlbl_dom_map *dom_entry;
807 struct netlbl_domaddr4_map *af4_entry;
808 u32 proto_type;
809 struct cipso_v4_doi *proto_cv4;
810
811 rcu_read_lock();
812 dom_entry = netlbl_domhsh_getentry(secattr->domain);
813 if (dom_entry == NULL) {
814 ret_val = -ENOENT;
815 goto req_setattr_return;
816 }
817 switch (req->rsk_ops->family) {
818 case AF_INET:
819 if (dom_entry->type == NETLBL_NLTYPE_ADDRSELECT) {
820 struct inet_request_sock *req_inet = inet_rsk(req);
821 af4_entry = netlbl_domhsh_getentry_af4(secattr->domain,
822 req_inet->rmt_addr);
823 if (af4_entry == NULL) {
824 ret_val = -ENOENT;
825 goto req_setattr_return;
826 }
827 proto_type = af4_entry->type;
828 proto_cv4 = af4_entry->type_def.cipsov4;
829 } else {
830 proto_type = dom_entry->type;
831 proto_cv4 = dom_entry->type_def.cipsov4;
832 }
833 switch (proto_type) {
834 case NETLBL_NLTYPE_CIPSOV4:
835 ret_val = cipso_v4_req_setattr(req, proto_cv4, secattr);
836 break;
837 case NETLBL_NLTYPE_UNLABELED:
838 /* just delete the protocols we support for right now
839 * but we could remove other protocols if needed */
840 cipso_v4_req_delattr(req);
841 ret_val = 0;
842 break;
843 default:
844 ret_val = -ENOENT;
845 }
846 break;
847#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
848 case AF_INET6:
849 /* since we don't support any IPv6 labeling protocols right
850 * now we can optimize everything away until we do */
851 ret_val = 0;
852 break;
853#endif /* IPv6 */
854 default:
855 ret_val = -EPROTONOSUPPORT;
856 }
857
858req_setattr_return:
859 rcu_read_unlock();
860 return ret_val;
861}
862
863/**
864* netlbl_req_delattr - Delete all the NetLabel labels on a socket
865* @req: the socket
866*
867* Description:
868* Remove all the NetLabel labeling from @req.
869*
870*/
871void netlbl_req_delattr(struct request_sock *req)
872{
873 cipso_v4_req_delattr(req);
874}
875
876/**
760 * netlbl_skbuff_setattr - Label a packet using the correct protocol 877 * netlbl_skbuff_setattr - Label a packet using the correct protocol
761 * @skb: the packet 878 * @skb: the packet
762 * @family: protocol family 879 * @family: protocol family
@@ -808,7 +925,7 @@ int netlbl_skbuff_setattr(struct sk_buff *skb,
808 break; 925 break;
809#endif /* IPv6 */ 926#endif /* IPv6 */
810 default: 927 default:
811 ret_val = 0; 928 ret_val = -EPROTONOSUPPORT;
812 } 929 }
813 930
814skbuff_setattr_return: 931skbuff_setattr_return:
@@ -833,9 +950,17 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
833 u16 family, 950 u16 family,
834 struct netlbl_lsm_secattr *secattr) 951 struct netlbl_lsm_secattr *secattr)
835{ 952{
836 if (CIPSO_V4_OPTEXIST(skb) && 953 switch (family) {
837 cipso_v4_skbuff_getattr(skb, secattr) == 0) 954 case AF_INET:
838 return 0; 955 if (CIPSO_V4_OPTEXIST(skb) &&
956 cipso_v4_skbuff_getattr(skb, secattr) == 0)
957 return 0;
958 break;
959#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
960 case AF_INET6:
961 break;
962#endif /* IPv6 */
963 }
839 964
840 return netlbl_unlabel_getattr(skb, family, secattr); 965 return netlbl_unlabel_getattr(skb, family, secattr);
841} 966}
diff --git a/net/netlink/Makefile b/net/netlink/Makefile
index e3589c2de49e..bdd6ddf4e95b 100644
--- a/net/netlink/Makefile
+++ b/net/netlink/Makefile
@@ -2,4 +2,4 @@
2# Makefile for the netlink driver. 2# Makefile for the netlink driver.
3# 3#
4 4
5obj-y := af_netlink.o attr.o genetlink.o 5obj-y := af_netlink.o genetlink.o
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3ae3cb816563..8b6bbb3032b0 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -85,6 +85,8 @@ struct netlink_sock {
85 85
86#define NETLINK_KERNEL_SOCKET 0x1 86#define NETLINK_KERNEL_SOCKET 0x1
87#define NETLINK_RECV_PKTINFO 0x2 87#define NETLINK_RECV_PKTINFO 0x2
88#define NETLINK_BROADCAST_SEND_ERROR 0x4
89#define NETLINK_RECV_NO_ENOBUFS 0x8
88 90
89static inline struct netlink_sock *nlk_sk(struct sock *sk) 91static inline struct netlink_sock *nlk_sk(struct sock *sk)
90{ 92{
@@ -716,10 +718,15 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
716 718
717static void netlink_overrun(struct sock *sk) 719static void netlink_overrun(struct sock *sk)
718{ 720{
719 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { 721 struct netlink_sock *nlk = nlk_sk(sk);
720 sk->sk_err = ENOBUFS; 722
721 sk->sk_error_report(sk); 723 if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
724 if (!test_and_set_bit(0, &nlk_sk(sk)->state)) {
725 sk->sk_err = ENOBUFS;
726 sk->sk_error_report(sk);
727 }
722 } 728 }
729 atomic_inc(&sk->sk_drops);
723} 730}
724 731
725static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) 732static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid)
@@ -950,6 +957,7 @@ struct netlink_broadcast_data {
950 u32 pid; 957 u32 pid;
951 u32 group; 958 u32 group;
952 int failure; 959 int failure;
960 int delivery_failure;
953 int congested; 961 int congested;
954 int delivered; 962 int delivered;
955 gfp_t allocation; 963 gfp_t allocation;
@@ -994,11 +1002,15 @@ static inline int do_one_broadcast(struct sock *sk,
994 netlink_overrun(sk); 1002 netlink_overrun(sk);
995 /* Clone failed. Notify ALL listeners. */ 1003 /* Clone failed. Notify ALL listeners. */
996 p->failure = 1; 1004 p->failure = 1;
1005 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1006 p->delivery_failure = 1;
997 } else if (sk_filter(sk, p->skb2)) { 1007 } else if (sk_filter(sk, p->skb2)) {
998 kfree_skb(p->skb2); 1008 kfree_skb(p->skb2);
999 p->skb2 = NULL; 1009 p->skb2 = NULL;
1000 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { 1010 } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
1001 netlink_overrun(sk); 1011 netlink_overrun(sk);
1012 if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
1013 p->delivery_failure = 1;
1002 } else { 1014 } else {
1003 p->congested |= val; 1015 p->congested |= val;
1004 p->delivered = 1; 1016 p->delivered = 1;
@@ -1025,6 +1037,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1025 info.pid = pid; 1037 info.pid = pid;
1026 info.group = group; 1038 info.group = group;
1027 info.failure = 0; 1039 info.failure = 0;
1040 info.delivery_failure = 0;
1028 info.congested = 0; 1041 info.congested = 0;
1029 info.delivered = 0; 1042 info.delivered = 0;
1030 info.allocation = allocation; 1043 info.allocation = allocation;
@@ -1042,16 +1055,16 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
1042 1055
1043 netlink_unlock_table(); 1056 netlink_unlock_table();
1044 1057
1045 if (info.skb2) 1058 kfree_skb(info.skb2);
1046 kfree_skb(info.skb2); 1059
1060 if (info.delivery_failure)
1061 return -ENOBUFS;
1047 1062
1048 if (info.delivered) { 1063 if (info.delivered) {
1049 if (info.congested && (allocation & __GFP_WAIT)) 1064 if (info.congested && (allocation & __GFP_WAIT))
1050 yield(); 1065 yield();
1051 return 0; 1066 return 0;
1052 } 1067 }
1053 if (info.failure)
1054 return -ENOBUFS;
1055 return -ESRCH; 1068 return -ESRCH;
1056} 1069}
1057EXPORT_SYMBOL(netlink_broadcast); 1070EXPORT_SYMBOL(netlink_broadcast);
@@ -1110,6 +1123,7 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code)
1110 1123
1111 read_unlock(&nl_table_lock); 1124 read_unlock(&nl_table_lock);
1112} 1125}
1126EXPORT_SYMBOL(netlink_set_err);
1113 1127
1114/* must be called with netlink table grabbed */ 1128/* must be called with netlink table grabbed */
1115static void netlink_update_socket_mc(struct netlink_sock *nlk, 1129static void netlink_update_socket_mc(struct netlink_sock *nlk,
@@ -1167,6 +1181,22 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
1167 err = 0; 1181 err = 0;
1168 break; 1182 break;
1169 } 1183 }
1184 case NETLINK_BROADCAST_ERROR:
1185 if (val)
1186 nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
1187 else
1188 nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
1189 err = 0;
1190 break;
1191 case NETLINK_NO_ENOBUFS:
1192 if (val) {
1193 nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
1194 clear_bit(0, &nlk->state);
1195 wake_up_interruptible(&nlk->wait);
1196 } else
1197 nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
1198 err = 0;
1199 break;
1170 default: 1200 default:
1171 err = -ENOPROTOOPT; 1201 err = -ENOPROTOOPT;
1172 } 1202 }
@@ -1199,6 +1229,26 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
1199 return -EFAULT; 1229 return -EFAULT;
1200 err = 0; 1230 err = 0;
1201 break; 1231 break;
1232 case NETLINK_BROADCAST_ERROR:
1233 if (len < sizeof(int))
1234 return -EINVAL;
1235 len = sizeof(int);
1236 val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
1237 if (put_user(len, optlen) ||
1238 put_user(val, optval))
1239 return -EFAULT;
1240 err = 0;
1241 break;
1242 case NETLINK_NO_ENOBUFS:
1243 if (len < sizeof(int))
1244 return -EINVAL;
1245 len = sizeof(int);
1246 val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
1247 if (put_user(len, optlen) ||
1248 put_user(val, optval))
1249 return -EFAULT;
1250 err = 0;
1251 break;
1202 default: 1252 default:
1203 err = -ENOPROTOOPT; 1253 err = -ENOPROTOOPT;
1204 } 1254 }
@@ -1525,8 +1575,7 @@ EXPORT_SYMBOL(netlink_set_nonroot);
1525 1575
1526static void netlink_destroy_callback(struct netlink_callback *cb) 1576static void netlink_destroy_callback(struct netlink_callback *cb)
1527{ 1577{
1528 if (cb->skb) 1578 kfree_skb(cb->skb);
1529 kfree_skb(cb->skb);
1530 kfree(cb); 1579 kfree(cb);
1531} 1580}
1532 1581
@@ -1743,12 +1792,18 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid,
1743 exclude_pid = pid; 1792 exclude_pid = pid;
1744 } 1793 }
1745 1794
1746 /* errors reported via destination sk->sk_err */ 1795 /* errors reported via destination sk->sk_err, but propagate
1747 nlmsg_multicast(sk, skb, exclude_pid, group, flags); 1796 * delivery errors if NETLINK_BROADCAST_ERROR flag is set */
1797 err = nlmsg_multicast(sk, skb, exclude_pid, group, flags);
1748 } 1798 }
1749 1799
1750 if (report) 1800 if (report) {
1751 err = nlmsg_unicast(sk, skb, pid); 1801 int err2;
1802
1803 err2 = nlmsg_unicast(sk, skb, pid);
1804 if (!err || err == -ESRCH)
1805 err = err2;
1806 }
1752 1807
1753 return err; 1808 return err;
1754} 1809}
@@ -1849,12 +1904,12 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
1849 if (v == SEQ_START_TOKEN) 1904 if (v == SEQ_START_TOKEN)
1850 seq_puts(seq, 1905 seq_puts(seq,
1851 "sk Eth Pid Groups " 1906 "sk Eth Pid Groups "
1852 "Rmem Wmem Dump Locks\n"); 1907 "Rmem Wmem Dump Locks Drops\n");
1853 else { 1908 else {
1854 struct sock *s = v; 1909 struct sock *s = v;
1855 struct netlink_sock *nlk = nlk_sk(s); 1910 struct netlink_sock *nlk = nlk_sk(s);
1856 1911
1857 seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n", 1912 seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %-8d %-8d\n",
1858 s, 1913 s,
1859 s->sk_protocol, 1914 s->sk_protocol,
1860 nlk->pid, 1915 nlk->pid,
@@ -1862,7 +1917,8 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
1862 atomic_read(&s->sk_rmem_alloc), 1917 atomic_read(&s->sk_rmem_alloc),
1863 atomic_read(&s->sk_wmem_alloc), 1918 atomic_read(&s->sk_wmem_alloc),
1864 nlk->cb, 1919 nlk->cb,
1865 atomic_read(&s->sk_refcnt) 1920 atomic_read(&s->sk_refcnt),
1921 atomic_read(&s->sk_drops)
1866 ); 1922 );
1867 1923
1868 } 1924 }
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
deleted file mode 100644
index 56c3ce7fe29a..000000000000
--- a/net/netlink/attr.c
+++ /dev/null
@@ -1,473 +0,0 @@
1/*
2 * NETLINK Netlink attributes
3 *
4 * Authors: Thomas Graf <tgraf@suug.ch>
5 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
6 */
7
8#include <linux/module.h>
9#include <linux/kernel.h>
10#include <linux/errno.h>
11#include <linux/jiffies.h>
12#include <linux/netdevice.h>
13#include <linux/skbuff.h>
14#include <linux/string.h>
15#include <linux/types.h>
16#include <net/netlink.h>
17
18static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = {
19 [NLA_U8] = sizeof(u8),
20 [NLA_U16] = sizeof(u16),
21 [NLA_U32] = sizeof(u32),
22 [NLA_U64] = sizeof(u64),
23 [NLA_NESTED] = NLA_HDRLEN,
24};
25
26static int validate_nla(struct nlattr *nla, int maxtype,
27 const struct nla_policy *policy)
28{
29 const struct nla_policy *pt;
30 int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla);
31
32 if (type <= 0 || type > maxtype)
33 return 0;
34
35 pt = &policy[type];
36
37 BUG_ON(pt->type > NLA_TYPE_MAX);
38
39 switch (pt->type) {
40 case NLA_FLAG:
41 if (attrlen > 0)
42 return -ERANGE;
43 break;
44
45 case NLA_NUL_STRING:
46 if (pt->len)
47 minlen = min_t(int, attrlen, pt->len + 1);
48 else
49 minlen = attrlen;
50
51 if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL)
52 return -EINVAL;
53 /* fall through */
54
55 case NLA_STRING:
56 if (attrlen < 1)
57 return -ERANGE;
58
59 if (pt->len) {
60 char *buf = nla_data(nla);
61
62 if (buf[attrlen - 1] == '\0')
63 attrlen--;
64
65 if (attrlen > pt->len)
66 return -ERANGE;
67 }
68 break;
69
70 case NLA_BINARY:
71 if (pt->len && attrlen > pt->len)
72 return -ERANGE;
73 break;
74
75 case NLA_NESTED_COMPAT:
76 if (attrlen < pt->len)
77 return -ERANGE;
78 if (attrlen < NLA_ALIGN(pt->len))
79 break;
80 if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN)
81 return -ERANGE;
82 nla = nla_data(nla) + NLA_ALIGN(pt->len);
83 if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla))
84 return -ERANGE;
85 break;
86 case NLA_NESTED:
87 /* a nested attributes is allowed to be empty; if its not,
88 * it must have a size of at least NLA_HDRLEN.
89 */
90 if (attrlen == 0)
91 break;
92 default:
93 if (pt->len)
94 minlen = pt->len;
95 else if (pt->type != NLA_UNSPEC)
96 minlen = nla_attr_minlen[pt->type];
97
98 if (attrlen < minlen)
99 return -ERANGE;
100 }
101
102 return 0;
103}
104
105/**
106 * nla_validate - Validate a stream of attributes
107 * @head: head of attribute stream
108 * @len: length of attribute stream
109 * @maxtype: maximum attribute type to be expected
110 * @policy: validation policy
111 *
112 * Validates all attributes in the specified attribute stream against the
113 * specified policy. Attributes with a type exceeding maxtype will be
114 * ignored. See documenation of struct nla_policy for more details.
115 *
116 * Returns 0 on success or a negative error code.
117 */
118int nla_validate(struct nlattr *head, int len, int maxtype,
119 const struct nla_policy *policy)
120{
121 struct nlattr *nla;
122 int rem, err;
123
124 nla_for_each_attr(nla, head, len, rem) {
125 err = validate_nla(nla, maxtype, policy);
126 if (err < 0)
127 goto errout;
128 }
129
130 err = 0;
131errout:
132 return err;
133}
134
135/**
136 * nla_parse - Parse a stream of attributes into a tb buffer
137 * @tb: destination array with maxtype+1 elements
138 * @maxtype: maximum attribute type to be expected
139 * @head: head of attribute stream
140 * @len: length of attribute stream
141 * @policy: validation policy
142 *
143 * Parses a stream of attributes and stores a pointer to each attribute in
144 * the tb array accessable via the attribute type. Attributes with a type
145 * exceeding maxtype will be silently ignored for backwards compatibility
146 * reasons. policy may be set to NULL if no validation is required.
147 *
148 * Returns 0 on success or a negative error code.
149 */
150int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
151 const struct nla_policy *policy)
152{
153 struct nlattr *nla;
154 int rem, err;
155
156 memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
157
158 nla_for_each_attr(nla, head, len, rem) {
159 u16 type = nla_type(nla);
160
161 if (type > 0 && type <= maxtype) {
162 if (policy) {
163 err = validate_nla(nla, maxtype, policy);
164 if (err < 0)
165 goto errout;
166 }
167
168 tb[type] = nla;
169 }
170 }
171
172 if (unlikely(rem > 0))
173 printk(KERN_WARNING "netlink: %d bytes leftover after parsing "
174 "attributes.\n", rem);
175
176 err = 0;
177errout:
178 return err;
179}
180
181/**
182 * nla_find - Find a specific attribute in a stream of attributes
183 * @head: head of attribute stream
184 * @len: length of attribute stream
185 * @attrtype: type of attribute to look for
186 *
187 * Returns the first attribute in the stream matching the specified type.
188 */
189struct nlattr *nla_find(struct nlattr *head, int len, int attrtype)
190{
191 struct nlattr *nla;
192 int rem;
193
194 nla_for_each_attr(nla, head, len, rem)
195 if (nla_type(nla) == attrtype)
196 return nla;
197
198 return NULL;
199}
200
201/**
202 * nla_strlcpy - Copy string attribute payload into a sized buffer
203 * @dst: where to copy the string to
204 * @nla: attribute to copy the string from
205 * @dstsize: size of destination buffer
206 *
207 * Copies at most dstsize - 1 bytes into the destination buffer.
208 * The result is always a valid NUL-terminated string. Unlike
209 * strlcpy the destination buffer is always padded out.
210 *
211 * Returns the length of the source buffer.
212 */
213size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize)
214{
215 size_t srclen = nla_len(nla);
216 char *src = nla_data(nla);
217
218 if (srclen > 0 && src[srclen - 1] == '\0')
219 srclen--;
220
221 if (dstsize > 0) {
222 size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen;
223
224 memset(dst, 0, dstsize);
225 memcpy(dst, src, len);
226 }
227
228 return srclen;
229}
230
231/**
232 * nla_memcpy - Copy a netlink attribute into another memory area
233 * @dest: where to copy to memcpy
234 * @src: netlink attribute to copy from
235 * @count: size of the destination area
236 *
237 * Note: The number of bytes copied is limited by the length of
238 * attribute's payload. memcpy
239 *
240 * Returns the number of bytes copied.
241 */
242int nla_memcpy(void *dest, const struct nlattr *src, int count)
243{
244 int minlen = min_t(int, count, nla_len(src));
245
246 memcpy(dest, nla_data(src), minlen);
247
248 return minlen;
249}
250
251/**
252 * nla_memcmp - Compare an attribute with sized memory area
253 * @nla: netlink attribute
254 * @data: memory area
255 * @size: size of memory area
256 */
257int nla_memcmp(const struct nlattr *nla, const void *data,
258 size_t size)
259{
260 int d = nla_len(nla) - size;
261
262 if (d == 0)
263 d = memcmp(nla_data(nla), data, size);
264
265 return d;
266}
267
268/**
269 * nla_strcmp - Compare a string attribute against a string
270 * @nla: netlink string attribute
271 * @str: another string
272 */
273int nla_strcmp(const struct nlattr *nla, const char *str)
274{
275 int len = strlen(str) + 1;
276 int d = nla_len(nla) - len;
277
278 if (d == 0)
279 d = memcmp(nla_data(nla), str, len);
280
281 return d;
282}
283
284/**
285 * __nla_reserve - reserve room for attribute on the skb
286 * @skb: socket buffer to reserve room on
287 * @attrtype: attribute type
288 * @attrlen: length of attribute payload
289 *
290 * Adds a netlink attribute header to a socket buffer and reserves
291 * room for the payload but does not copy it.
292 *
293 * The caller is responsible to ensure that the skb provides enough
294 * tailroom for the attribute header and payload.
295 */
296struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
297{
298 struct nlattr *nla;
299
300 nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen));
301 nla->nla_type = attrtype;
302 nla->nla_len = nla_attr_size(attrlen);
303
304 memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen));
305
306 return nla;
307}
308
309/**
310 * __nla_reserve_nohdr - reserve room for attribute without header
311 * @skb: socket buffer to reserve room on
312 * @attrlen: length of attribute payload
313 *
314 * Reserves room for attribute payload without a header.
315 *
316 * The caller is responsible to ensure that the skb provides enough
317 * tailroom for the payload.
318 */
319void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
320{
321 void *start;
322
323 start = skb_put(skb, NLA_ALIGN(attrlen));
324 memset(start, 0, NLA_ALIGN(attrlen));
325
326 return start;
327}
328
329/**
330 * nla_reserve - reserve room for attribute on the skb
331 * @skb: socket buffer to reserve room on
332 * @attrtype: attribute type
333 * @attrlen: length of attribute payload
334 *
335 * Adds a netlink attribute header to a socket buffer and reserves
336 * room for the payload but does not copy it.
337 *
338 * Returns NULL if the tailroom of the skb is insufficient to store
339 * the attribute header and payload.
340 */
341struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen)
342{
343 if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
344 return NULL;
345
346 return __nla_reserve(skb, attrtype, attrlen);
347}
348
349/**
350 * nla_reserve_nohdr - reserve room for attribute without header
351 * @skb: socket buffer to reserve room on
352 * @attrlen: length of attribute payload
353 *
354 * Reserves room for attribute payload without a header.
355 *
356 * Returns NULL if the tailroom of the skb is insufficient to store
357 * the attribute payload.
358 */
359void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen)
360{
361 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
362 return NULL;
363
364 return __nla_reserve_nohdr(skb, attrlen);
365}
366
367/**
368 * __nla_put - Add a netlink attribute to a socket buffer
369 * @skb: socket buffer to add attribute to
370 * @attrtype: attribute type
371 * @attrlen: length of attribute payload
372 * @data: head of attribute payload
373 *
374 * The caller is responsible to ensure that the skb provides enough
375 * tailroom for the attribute header and payload.
376 */
377void __nla_put(struct sk_buff *skb, int attrtype, int attrlen,
378 const void *data)
379{
380 struct nlattr *nla;
381
382 nla = __nla_reserve(skb, attrtype, attrlen);
383 memcpy(nla_data(nla), data, attrlen);
384}
385
386/**
387 * __nla_put_nohdr - Add a netlink attribute without header
388 * @skb: socket buffer to add attribute to
389 * @attrlen: length of attribute payload
390 * @data: head of attribute payload
391 *
392 * The caller is responsible to ensure that the skb provides enough
393 * tailroom for the attribute payload.
394 */
395void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
396{
397 void *start;
398
399 start = __nla_reserve_nohdr(skb, attrlen);
400 memcpy(start, data, attrlen);
401}
402
403/**
404 * nla_put - Add a netlink attribute to a socket buffer
405 * @skb: socket buffer to add attribute to
406 * @attrtype: attribute type
407 * @attrlen: length of attribute payload
408 * @data: head of attribute payload
409 *
410 * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
411 * the attribute header and payload.
412 */
413int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
414{
415 if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
416 return -EMSGSIZE;
417
418 __nla_put(skb, attrtype, attrlen, data);
419 return 0;
420}
421
422/**
423 * nla_put_nohdr - Add a netlink attribute without header
424 * @skb: socket buffer to add attribute to
425 * @attrlen: length of attribute payload
426 * @data: head of attribute payload
427 *
428 * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
429 * the attribute payload.
430 */
431int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
432{
433 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
434 return -EMSGSIZE;
435
436 __nla_put_nohdr(skb, attrlen, data);
437 return 0;
438}
439
440/**
441 * nla_append - Add a netlink attribute without header or padding
442 * @skb: socket buffer to add attribute to
443 * @attrlen: length of attribute payload
444 * @data: head of attribute payload
445 *
446 * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
447 * the attribute payload.
448 */
449int nla_append(struct sk_buff *skb, int attrlen, const void *data)
450{
451 if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
452 return -EMSGSIZE;
453
454 memcpy(skb_put(skb, attrlen), data, attrlen);
455 return 0;
456}
457
458EXPORT_SYMBOL(nla_validate);
459EXPORT_SYMBOL(nla_parse);
460EXPORT_SYMBOL(nla_find);
461EXPORT_SYMBOL(nla_strlcpy);
462EXPORT_SYMBOL(__nla_reserve);
463EXPORT_SYMBOL(__nla_reserve_nohdr);
464EXPORT_SYMBOL(nla_reserve);
465EXPORT_SYMBOL(nla_reserve_nohdr);
466EXPORT_SYMBOL(__nla_put);
467EXPORT_SYMBOL(__nla_put_nohdr);
468EXPORT_SYMBOL(nla_put);
469EXPORT_SYMBOL(nla_put_nohdr);
470EXPORT_SYMBOL(nla_memcpy);
471EXPORT_SYMBOL(nla_memcmp);
472EXPORT_SYMBOL(nla_strcmp);
473EXPORT_SYMBOL(nla_append);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index e9c05b8f4f45..4e705f87969f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1082,7 +1082,11 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
1082 1082
1083 SOCK_DEBUG(sk, "NET/ROM: sendto: Addresses built.\n"); 1083 SOCK_DEBUG(sk, "NET/ROM: sendto: Addresses built.\n");
1084 1084
1085 /* Build a packet */ 1085 /* Build a packet - the conventional user limit is 236 bytes. We can
1086 do ludicrously large NetROM frames but must not overflow */
1087 if (len > 65536)
1088 return -EMSGSIZE;
1089
1086 SOCK_DEBUG(sk, "NET/ROM: sendto: building packet.\n"); 1090 SOCK_DEBUG(sk, "NET/ROM: sendto: building packet.\n");
1087 size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN; 1091 size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN;
1088 1092
@@ -1432,7 +1436,7 @@ static int __init nr_proto_init(void)
1432 struct net_device *dev; 1436 struct net_device *dev;
1433 1437
1434 sprintf(name, "nr%d", i); 1438 sprintf(name, "nr%d", i);
1435 dev = alloc_netdev(sizeof(struct nr_private), name, nr_setup); 1439 dev = alloc_netdev(0, name, nr_setup);
1436 if (!dev) { 1440 if (!dev) {
1437 printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n"); 1441 printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n");
1438 goto fail; 1442 goto fail;
diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c
index 6caf459665f2..351372463fed 100644
--- a/net/netrom/nr_dev.c
+++ b/net/netrom/nr_dev.c
@@ -42,7 +42,7 @@
42 42
43int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) 43int nr_rx_ip(struct sk_buff *skb, struct net_device *dev)
44{ 44{
45 struct net_device_stats *stats = netdev_priv(dev); 45 struct net_device_stats *stats = &dev->stats;
46 46
47 if (!netif_running(dev)) { 47 if (!netif_running(dev)) {
48 stats->rx_dropped++; 48 stats->rx_dropped++;
@@ -171,8 +171,7 @@ static int nr_close(struct net_device *dev)
171 171
172static int nr_xmit(struct sk_buff *skb, struct net_device *dev) 172static int nr_xmit(struct sk_buff *skb, struct net_device *dev)
173{ 173{
174 struct nr_private *nr = netdev_priv(dev); 174 struct net_device_stats *stats = &dev->stats;
175 struct net_device_stats *stats = &nr->stats;
176 unsigned int len = skb->len; 175 unsigned int len = skb->len;
177 176
178 if (!nr_route_frame(skb, NULL)) { 177 if (!nr_route_frame(skb, NULL)) {
@@ -187,34 +186,27 @@ static int nr_xmit(struct sk_buff *skb, struct net_device *dev)
187 return 0; 186 return 0;
188} 187}
189 188
190static struct net_device_stats *nr_get_stats(struct net_device *dev)
191{
192 struct nr_private *nr = netdev_priv(dev);
193
194 return &nr->stats;
195}
196
197static const struct header_ops nr_header_ops = { 189static const struct header_ops nr_header_ops = {
198 .create = nr_header, 190 .create = nr_header,
199 .rebuild= nr_rebuild_header, 191 .rebuild= nr_rebuild_header,
200}; 192};
201 193
194static const struct net_device_ops nr_netdev_ops = {
195 .ndo_open = nr_open,
196 .ndo_stop = nr_close,
197 .ndo_start_xmit = nr_xmit,
198 .ndo_set_mac_address = nr_set_mac_address,
199};
202 200
203void nr_setup(struct net_device *dev) 201void nr_setup(struct net_device *dev)
204{ 202{
205 dev->mtu = NR_MAX_PACKET_SIZE; 203 dev->mtu = NR_MAX_PACKET_SIZE;
206 dev->hard_start_xmit = nr_xmit; 204 dev->netdev_ops = &nr_netdev_ops;
207 dev->open = nr_open;
208 dev->stop = nr_close;
209
210 dev->header_ops = &nr_header_ops; 205 dev->header_ops = &nr_header_ops;
211 dev->hard_header_len = NR_NETWORK_LEN + NR_TRANSPORT_LEN; 206 dev->hard_header_len = NR_NETWORK_LEN + NR_TRANSPORT_LEN;
212 dev->addr_len = AX25_ADDR_LEN; 207 dev->addr_len = AX25_ADDR_LEN;
213 dev->type = ARPHRD_NETROM; 208 dev->type = ARPHRD_NETROM;
214 dev->set_mac_address = nr_set_mac_address;
215 209
216 /* New-style flags. */ 210 /* New-style flags. */
217 dev->flags = IFF_NOARP; 211 dev->flags = IFF_NOARP;
218
219 dev->get_stats = nr_get_stats;
220} 212}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1fc4a7885c41..74776de523ec 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -584,7 +584,7 @@ drop_n_restore:
584 skb->len = skb_len; 584 skb->len = skb_len;
585 } 585 }
586drop: 586drop:
587 kfree_skb(skb); 587 consume_skb(skb);
588 return 0; 588 return 0;
589} 589}
590 590
@@ -756,8 +756,7 @@ ring_is_full:
756 spin_unlock(&sk->sk_receive_queue.lock); 756 spin_unlock(&sk->sk_receive_queue.lock);
757 757
758 sk->sk_data_ready(sk, 0); 758 sk->sk_data_ready(sk, 0);
759 if (copy_skb) 759 kfree_skb(copy_skb);
760 kfree_skb(copy_skb);
761 goto drop_n_restore; 760 goto drop_n_restore;
762} 761}
763 762
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 13cb323f8c38..a662e62a99cf 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -275,8 +275,6 @@ static inline int can_respond(struct sk_buff *skb)
275 return 0; 275 return 0;
276 276
277 ph = pn_hdr(skb); 277 ph = pn_hdr(skb);
278 if (phonet_address_get(skb->dev, ph->pn_rdev) != ph->pn_rdev)
279 return 0; /* we are not the destination */
280 if (ph->pn_res == PN_PREFIX && !pskb_may_pull(skb, 5)) 278 if (ph->pn_res == PN_PREFIX && !pskb_may_pull(skb, 5))
281 return 0; 279 return 0;
282 if (ph->pn_res == PN_COMMGR) /* indications */ 280 if (ph->pn_res == PN_COMMGR) /* indications */
@@ -344,8 +342,8 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
344 struct packet_type *pkttype, 342 struct packet_type *pkttype,
345 struct net_device *orig_dev) 343 struct net_device *orig_dev)
346{ 344{
345 struct net *net = dev_net(dev);
347 struct phonethdr *ph; 346 struct phonethdr *ph;
348 struct sock *sk;
349 struct sockaddr_pn sa; 347 struct sockaddr_pn sa;
350 u16 len; 348 u16 len;
351 349
@@ -364,29 +362,28 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
364 skb_reset_transport_header(skb); 362 skb_reset_transport_header(skb);
365 363
366 pn_skb_get_dst_sockaddr(skb, &sa); 364 pn_skb_get_dst_sockaddr(skb, &sa);
367 if (pn_sockaddr_get_addr(&sa) == 0)
368 goto out; /* currently, we cannot be device 0 */
369 365
370 sk = pn_find_sock_by_sa(dev_net(dev), &sa); 366 /* check if we are the destination */
371 if (sk == NULL) { 367 if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) {
368 /* Phonet packet input */
369 struct sock *sk = pn_find_sock_by_sa(net, &sa);
370
371 if (sk)
372 return sk_receive_skb(sk, skb, 0);
373
372 if (can_respond(skb)) { 374 if (can_respond(skb)) {
373 send_obj_unreachable(skb); 375 send_obj_unreachable(skb);
374 send_reset_indications(skb); 376 send_reset_indications(skb);
375 } 377 }
376 goto out;
377 } 378 }
378 379
379 /* Push data to the socket (or other sockets connected to it). */
380 return sk_receive_skb(sk, skb, 0);
381
382out: 380out:
383 kfree_skb(skb); 381 kfree_skb(skb);
384 return NET_RX_DROP; 382 return NET_RX_DROP;
385} 383}
386 384
387static struct packet_type phonet_packet_type = { 385static struct packet_type phonet_packet_type __read_mostly = {
388 .type = __constant_htons(ETH_P_PHONET), 386 .type = cpu_to_be16(ETH_P_PHONET),
389 .dev = NULL,
390 .func = phonet_rcv, 387 .func = phonet_rcv,
391}; 388};
392 389
@@ -428,16 +425,18 @@ static int __init phonet_init(void)
428{ 425{
429 int err; 426 int err;
430 427
428 err = phonet_device_init();
429 if (err)
430 return err;
431
431 err = sock_register(&phonet_proto_family); 432 err = sock_register(&phonet_proto_family);
432 if (err) { 433 if (err) {
433 printk(KERN_ALERT 434 printk(KERN_ALERT
434 "phonet protocol family initialization failed\n"); 435 "phonet protocol family initialization failed\n");
435 return err; 436 goto err_sock;
436 } 437 }
437 438
438 phonet_device_init();
439 dev_add_pack(&phonet_packet_type); 439 dev_add_pack(&phonet_packet_type);
440 phonet_netlink_register();
441 phonet_sysctl_init(); 440 phonet_sysctl_init();
442 441
443 err = isi_register(); 442 err = isi_register();
@@ -449,6 +448,7 @@ err:
449 phonet_sysctl_exit(); 448 phonet_sysctl_exit();
450 sock_unregister(PF_PHONET); 449 sock_unregister(PF_PHONET);
451 dev_remove_pack(&phonet_packet_type); 450 dev_remove_pack(&phonet_packet_type);
451err_sock:
452 phonet_device_exit(); 452 phonet_device_exit();
453 return err; 453 return err;
454} 454}
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 5491bf5e354b..80a322d77909 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -28,32 +28,41 @@
28#include <linux/netdevice.h> 28#include <linux/netdevice.h>
29#include <linux/phonet.h> 29#include <linux/phonet.h>
30#include <net/sock.h> 30#include <net/sock.h>
31#include <net/netns/generic.h>
31#include <net/phonet/pn_dev.h> 32#include <net/phonet/pn_dev.h>
32 33
33/* when accessing, remember to lock with spin_lock(&pndevs.lock); */ 34struct phonet_net {
34struct phonet_device_list pndevs = { 35 struct phonet_device_list pndevs;
35 .list = LIST_HEAD_INIT(pndevs.list),
36 .lock = __SPIN_LOCK_UNLOCKED(pndevs.lock),
37}; 36};
38 37
38int phonet_net_id;
39
40struct phonet_device_list *phonet_device_list(struct net *net)
41{
42 struct phonet_net *pnn = net_generic(net, phonet_net_id);
43 return &pnn->pndevs;
44}
45
39/* Allocate new Phonet device. */ 46/* Allocate new Phonet device. */
40static struct phonet_device *__phonet_device_alloc(struct net_device *dev) 47static struct phonet_device *__phonet_device_alloc(struct net_device *dev)
41{ 48{
49 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
42 struct phonet_device *pnd = kmalloc(sizeof(*pnd), GFP_ATOMIC); 50 struct phonet_device *pnd = kmalloc(sizeof(*pnd), GFP_ATOMIC);
43 if (pnd == NULL) 51 if (pnd == NULL)
44 return NULL; 52 return NULL;
45 pnd->netdev = dev; 53 pnd->netdev = dev;
46 bitmap_zero(pnd->addrs, 64); 54 bitmap_zero(pnd->addrs, 64);
47 55
48 list_add(&pnd->list, &pndevs.list); 56 list_add(&pnd->list, &pndevs->list);
49 return pnd; 57 return pnd;
50} 58}
51 59
52static struct phonet_device *__phonet_get(struct net_device *dev) 60static struct phonet_device *__phonet_get(struct net_device *dev)
53{ 61{
62 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
54 struct phonet_device *pnd; 63 struct phonet_device *pnd;
55 64
56 list_for_each_entry(pnd, &pndevs.list, list) { 65 list_for_each_entry(pnd, &pndevs->list, list) {
57 if (pnd->netdev == dev) 66 if (pnd->netdev == dev)
58 return pnd; 67 return pnd;
59 } 68 }
@@ -68,32 +77,33 @@ static void __phonet_device_free(struct phonet_device *pnd)
68 77
69struct net_device *phonet_device_get(struct net *net) 78struct net_device *phonet_device_get(struct net *net)
70{ 79{
80 struct phonet_device_list *pndevs = phonet_device_list(net);
71 struct phonet_device *pnd; 81 struct phonet_device *pnd;
72 struct net_device *dev; 82 struct net_device *dev;
73 83
74 spin_lock_bh(&pndevs.lock); 84 spin_lock_bh(&pndevs->lock);
75 list_for_each_entry(pnd, &pndevs.list, list) { 85 list_for_each_entry(pnd, &pndevs->list, list) {
76 dev = pnd->netdev; 86 dev = pnd->netdev;
77 BUG_ON(!dev); 87 BUG_ON(!dev);
78 88
79 if (net_eq(dev_net(dev), net) && 89 if ((dev->reg_state == NETREG_REGISTERED) &&
80 (dev->reg_state == NETREG_REGISTERED) &&
81 ((pnd->netdev->flags & IFF_UP)) == IFF_UP) 90 ((pnd->netdev->flags & IFF_UP)) == IFF_UP)
82 break; 91 break;
83 dev = NULL; 92 dev = NULL;
84 } 93 }
85 if (dev) 94 if (dev)
86 dev_hold(dev); 95 dev_hold(dev);
87 spin_unlock_bh(&pndevs.lock); 96 spin_unlock_bh(&pndevs->lock);
88 return dev; 97 return dev;
89} 98}
90 99
91int phonet_address_add(struct net_device *dev, u8 addr) 100int phonet_address_add(struct net_device *dev, u8 addr)
92{ 101{
102 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
93 struct phonet_device *pnd; 103 struct phonet_device *pnd;
94 int err = 0; 104 int err = 0;
95 105
96 spin_lock_bh(&pndevs.lock); 106 spin_lock_bh(&pndevs->lock);
97 /* Find or create Phonet-specific device data */ 107 /* Find or create Phonet-specific device data */
98 pnd = __phonet_get(dev); 108 pnd = __phonet_get(dev);
99 if (pnd == NULL) 109 if (pnd == NULL)
@@ -102,31 +112,33 @@ int phonet_address_add(struct net_device *dev, u8 addr)
102 err = -ENOMEM; 112 err = -ENOMEM;
103 else if (test_and_set_bit(addr >> 2, pnd->addrs)) 113 else if (test_and_set_bit(addr >> 2, pnd->addrs))
104 err = -EEXIST; 114 err = -EEXIST;
105 spin_unlock_bh(&pndevs.lock); 115 spin_unlock_bh(&pndevs->lock);
106 return err; 116 return err;
107} 117}
108 118
109int phonet_address_del(struct net_device *dev, u8 addr) 119int phonet_address_del(struct net_device *dev, u8 addr)
110{ 120{
121 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
111 struct phonet_device *pnd; 122 struct phonet_device *pnd;
112 int err = 0; 123 int err = 0;
113 124
114 spin_lock_bh(&pndevs.lock); 125 spin_lock_bh(&pndevs->lock);
115 pnd = __phonet_get(dev); 126 pnd = __phonet_get(dev);
116 if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) 127 if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs))
117 err = -EADDRNOTAVAIL; 128 err = -EADDRNOTAVAIL;
118 else if (bitmap_empty(pnd->addrs, 64)) 129 else if (bitmap_empty(pnd->addrs, 64))
119 __phonet_device_free(pnd); 130 __phonet_device_free(pnd);
120 spin_unlock_bh(&pndevs.lock); 131 spin_unlock_bh(&pndevs->lock);
121 return err; 132 return err;
122} 133}
123 134
124/* Gets a source address toward a destination, through a interface. */ 135/* Gets a source address toward a destination, through a interface. */
125u8 phonet_address_get(struct net_device *dev, u8 addr) 136u8 phonet_address_get(struct net_device *dev, u8 addr)
126{ 137{
138 struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev));
127 struct phonet_device *pnd; 139 struct phonet_device *pnd;
128 140
129 spin_lock_bh(&pndevs.lock); 141 spin_lock_bh(&pndevs->lock);
130 pnd = __phonet_get(dev); 142 pnd = __phonet_get(dev);
131 if (pnd) { 143 if (pnd) {
132 BUG_ON(bitmap_empty(pnd->addrs, 64)); 144 BUG_ON(bitmap_empty(pnd->addrs, 64));
@@ -136,30 +148,31 @@ u8 phonet_address_get(struct net_device *dev, u8 addr)
136 addr = find_first_bit(pnd->addrs, 64) << 2; 148 addr = find_first_bit(pnd->addrs, 64) << 2;
137 } else 149 } else
138 addr = PN_NO_ADDR; 150 addr = PN_NO_ADDR;
139 spin_unlock_bh(&pndevs.lock); 151 spin_unlock_bh(&pndevs->lock);
140 return addr; 152 return addr;
141} 153}
142 154
143int phonet_address_lookup(struct net *net, u8 addr) 155int phonet_address_lookup(struct net *net, u8 addr)
144{ 156{
157 struct phonet_device_list *pndevs = phonet_device_list(net);
145 struct phonet_device *pnd; 158 struct phonet_device *pnd;
159 int err = -EADDRNOTAVAIL;
146 160
147 spin_lock_bh(&pndevs.lock); 161 spin_lock_bh(&pndevs->lock);
148 list_for_each_entry(pnd, &pndevs.list, list) { 162 list_for_each_entry(pnd, &pndevs->list, list) {
149 if (!net_eq(dev_net(pnd->netdev), net))
150 continue;
151 /* Don't allow unregistering devices! */ 163 /* Don't allow unregistering devices! */
152 if ((pnd->netdev->reg_state != NETREG_REGISTERED) || 164 if ((pnd->netdev->reg_state != NETREG_REGISTERED) ||
153 ((pnd->netdev->flags & IFF_UP)) != IFF_UP) 165 ((pnd->netdev->flags & IFF_UP)) != IFF_UP)
154 continue; 166 continue;
155 167
156 if (test_bit(addr >> 2, pnd->addrs)) { 168 if (test_bit(addr >> 2, pnd->addrs)) {
157 spin_unlock_bh(&pndevs.lock); 169 err = 0;
158 return 0; 170 goto found;
159 } 171 }
160 } 172 }
161 spin_unlock_bh(&pndevs.lock); 173found:
162 return -EADDRNOTAVAIL; 174 spin_unlock_bh(&pndevs->lock);
175 return err;
163} 176}
164 177
165/* notify Phonet of device events */ 178/* notify Phonet of device events */
@@ -169,14 +182,16 @@ static int phonet_device_notify(struct notifier_block *me, unsigned long what,
169 struct net_device *dev = arg; 182 struct net_device *dev = arg;
170 183
171 if (what == NETDEV_UNREGISTER) { 184 if (what == NETDEV_UNREGISTER) {
185 struct phonet_device_list *pndevs;
172 struct phonet_device *pnd; 186 struct phonet_device *pnd;
173 187
174 /* Destroy phonet-specific device data */ 188 /* Destroy phonet-specific device data */
175 spin_lock_bh(&pndevs.lock); 189 pndevs = phonet_device_list(dev_net(dev));
190 spin_lock_bh(&pndevs->lock);
176 pnd = __phonet_get(dev); 191 pnd = __phonet_get(dev);
177 if (pnd) 192 if (pnd)
178 __phonet_device_free(pnd); 193 __phonet_device_free(pnd);
179 spin_unlock_bh(&pndevs.lock); 194 spin_unlock_bh(&pndevs->lock);
180 } 195 }
181 return 0; 196 return 0;
182 197
@@ -187,24 +202,52 @@ static struct notifier_block phonet_device_notifier = {
187 .priority = 0, 202 .priority = 0,
188}; 203};
189 204
190/* Initialize Phonet devices list */ 205/* Per-namespace Phonet devices handling */
191void phonet_device_init(void) 206static int phonet_init_net(struct net *net)
192{ 207{
193 register_netdevice_notifier(&phonet_device_notifier); 208 struct phonet_net *pnn = kmalloc(sizeof(*pnn), GFP_KERNEL);
209 if (!pnn)
210 return -ENOMEM;
211
212 INIT_LIST_HEAD(&pnn->pndevs.list);
213 spin_lock_init(&pnn->pndevs.lock);
214 net_assign_generic(net, phonet_net_id, pnn);
215 return 0;
194} 216}
195 217
196void phonet_device_exit(void) 218static void phonet_exit_net(struct net *net)
197{ 219{
220 struct phonet_net *pnn = net_generic(net, phonet_net_id);
198 struct phonet_device *pnd, *n; 221 struct phonet_device *pnd, *n;
199 222
200 rtnl_unregister_all(PF_PHONET); 223 list_for_each_entry_safe(pnd, n, &pnn->pndevs.list, list)
201 rtnl_lock();
202 spin_lock_bh(&pndevs.lock);
203
204 list_for_each_entry_safe(pnd, n, &pndevs.list, list)
205 __phonet_device_free(pnd); 224 __phonet_device_free(pnd);
206 225
207 spin_unlock_bh(&pndevs.lock); 226 kfree(pnn);
208 rtnl_unlock(); 227}
228
229static struct pernet_operations phonet_net_ops = {
230 .init = phonet_init_net,
231 .exit = phonet_exit_net,
232};
233
234/* Initialize Phonet devices list */
235int __init phonet_device_init(void)
236{
237 int err = register_pernet_gen_device(&phonet_net_id, &phonet_net_ops);
238 if (err)
239 return err;
240
241 register_netdevice_notifier(&phonet_device_notifier);
242 err = phonet_netlink_register();
243 if (err)
244 phonet_device_exit();
245 return err;
246}
247
248void phonet_device_exit(void)
249{
250 rtnl_unregister_all(PF_PHONET);
209 unregister_netdevice_notifier(&phonet_device_notifier); 251 unregister_netdevice_notifier(&phonet_device_notifier);
252 unregister_pernet_gen_device(phonet_net_id, &phonet_net_ops);
210} 253}
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index 242fe8f8c322..cec4e5951681 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -47,8 +47,9 @@ static void rtmsg_notify(int event, struct net_device *dev, u8 addr)
47 kfree_skb(skb); 47 kfree_skb(skb);
48 goto errout; 48 goto errout;
49 } 49 }
50 err = rtnl_notify(skb, dev_net(dev), 0, 50 rtnl_notify(skb, dev_net(dev), 0,
51 RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); 51 RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL);
52 return;
52errout: 53errout:
53 if (err < 0) 54 if (err < 0)
54 rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); 55 rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err);
@@ -123,17 +124,16 @@ nla_put_failure:
123 124
124static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) 125static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
125{ 126{
126 struct net *net = sock_net(skb->sk); 127 struct phonet_device_list *pndevs;
127 struct phonet_device *pnd; 128 struct phonet_device *pnd;
128 int dev_idx = 0, dev_start_idx = cb->args[0]; 129 int dev_idx = 0, dev_start_idx = cb->args[0];
129 int addr_idx = 0, addr_start_idx = cb->args[1]; 130 int addr_idx = 0, addr_start_idx = cb->args[1];
130 131
131 spin_lock_bh(&pndevs.lock); 132 pndevs = phonet_device_list(sock_net(skb->sk));
132 list_for_each_entry(pnd, &pndevs.list, list) { 133 spin_lock_bh(&pndevs->lock);
134 list_for_each_entry(pnd, &pndevs->list, list) {
133 u8 addr; 135 u8 addr;
134 136
135 if (!net_eq(dev_net(pnd->netdev), net))
136 continue;
137 if (dev_idx > dev_start_idx) 137 if (dev_idx > dev_start_idx)
138 addr_start_idx = 0; 138 addr_start_idx = 0;
139 if (dev_idx++ < dev_start_idx) 139 if (dev_idx++ < dev_start_idx)
@@ -153,16 +153,21 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
153 } 153 }
154 154
155out: 155out:
156 spin_unlock_bh(&pndevs.lock); 156 spin_unlock_bh(&pndevs->lock);
157 cb->args[0] = dev_idx; 157 cb->args[0] = dev_idx;
158 cb->args[1] = addr_idx; 158 cb->args[1] = addr_idx;
159 159
160 return skb->len; 160 return skb->len;
161} 161}
162 162
163void __init phonet_netlink_register(void) 163int __init phonet_netlink_register(void)
164{ 164{
165 rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL); 165 int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL);
166 rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL); 166 if (err)
167 rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit); 167 return err;
168
169 /* Further __rtnl_register() cannot fail */
170 __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL);
171 __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit);
172 return 0;
168} 173}
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
new file mode 100644
index 000000000000..796773b5df9b
--- /dev/null
+++ b/net/rds/Kconfig
@@ -0,0 +1,14 @@
1
2config RDS
3 tristate "Reliable Datagram Sockets (RDS) (EXPERIMENTAL)"
4 depends on INET && INFINIBAND_IPOIB && EXPERIMENTAL
5 depends on INFINIBAND && INFINIBAND_ADDR_TRANS
6 ---help---
7 RDS provides reliable, sequenced delivery of datagrams
8 over Infiniband.
9
10config RDS_DEBUG
11 bool "Debugging messages"
12 depends on RDS
13 default n
14
diff --git a/net/rds/Makefile b/net/rds/Makefile
new file mode 100644
index 000000000000..51f27585fa08
--- /dev/null
+++ b/net/rds/Makefile
@@ -0,0 +1,14 @@
1obj-$(CONFIG_RDS) += rds.o
2rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \
3 recv.o send.o stats.o sysctl.o threads.o transport.o \
4 loop.o page.o rdma.o \
5 rdma_transport.o \
6 ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \
7 ib_sysctl.o ib_rdma.o \
8 iw.o iw_cm.o iw_recv.o iw_ring.o iw_send.o iw_stats.o \
9 iw_sysctl.o iw_rdma.o
10
11ifeq ($(CONFIG_RDS_DEBUG), y)
12EXTRA_CFLAGS += -DDEBUG
13endif
14
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
new file mode 100644
index 000000000000..20cf16fc572f
--- /dev/null
+++ b/net/rds/af_rds.c
@@ -0,0 +1,586 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/module.h>
34#include <linux/errno.h>
35#include <linux/kernel.h>
36#include <linux/in.h>
37#include <linux/poll.h>
38#include <linux/version.h>
39#include <net/sock.h>
40
41#include "rds.h"
42#include "rdma.h"
43#include "rdma_transport.h"
44
45/* this is just used for stats gathering :/ */
46static DEFINE_SPINLOCK(rds_sock_lock);
47static unsigned long rds_sock_count;
48static LIST_HEAD(rds_sock_list);
49DECLARE_WAIT_QUEUE_HEAD(rds_poll_waitq);
50
51/*
52 * This is called as the final descriptor referencing this socket is closed.
53 * We have to unbind the socket so that another socket can be bound to the
54 * address it was using.
55 *
56 * We have to be careful about racing with the incoming path. sock_orphan()
57 * sets SOCK_DEAD and we use that as an indicator to the rx path that new
58 * messages shouldn't be queued.
59 */
60static int rds_release(struct socket *sock)
61{
62 struct sock *sk = sock->sk;
63 struct rds_sock *rs;
64 unsigned long flags;
65
66 if (sk == NULL)
67 goto out;
68
69 rs = rds_sk_to_rs(sk);
70
71 sock_orphan(sk);
72 /* Note - rds_clear_recv_queue grabs rs_recv_lock, so
73 * that ensures the recv path has completed messing
74 * with the socket. */
75 rds_clear_recv_queue(rs);
76 rds_cong_remove_socket(rs);
77 rds_remove_bound(rs);
78 rds_send_drop_to(rs, NULL);
79 rds_rdma_drop_keys(rs);
80 rds_notify_queue_get(rs, NULL);
81
82 spin_lock_irqsave(&rds_sock_lock, flags);
83 list_del_init(&rs->rs_item);
84 rds_sock_count--;
85 spin_unlock_irqrestore(&rds_sock_lock, flags);
86
87 sock->sk = NULL;
88 sock_put(sk);
89out:
90 return 0;
91}
92
93/*
94 * Careful not to race with rds_release -> sock_orphan which clears sk_sleep.
95 * _bh() isn't OK here, we're called from interrupt handlers. It's probably OK
96 * to wake the waitqueue after sk_sleep is clear as we hold a sock ref, but
97 * this seems more conservative.
98 * NB - normally, one would use sk_callback_lock for this, but we can
99 * get here from interrupts, whereas the network code grabs sk_callback_lock
100 * with _lock_bh only - so relying on sk_callback_lock introduces livelocks.
101 */
102void rds_wake_sk_sleep(struct rds_sock *rs)
103{
104 unsigned long flags;
105
106 read_lock_irqsave(&rs->rs_recv_lock, flags);
107 __rds_wake_sk_sleep(rds_rs_to_sk(rs));
108 read_unlock_irqrestore(&rs->rs_recv_lock, flags);
109}
110
111static int rds_getname(struct socket *sock, struct sockaddr *uaddr,
112 int *uaddr_len, int peer)
113{
114 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
115 struct rds_sock *rs = rds_sk_to_rs(sock->sk);
116
117 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
118
119 /* racey, don't care */
120 if (peer) {
121 if (!rs->rs_conn_addr)
122 return -ENOTCONN;
123
124 sin->sin_port = rs->rs_conn_port;
125 sin->sin_addr.s_addr = rs->rs_conn_addr;
126 } else {
127 sin->sin_port = rs->rs_bound_port;
128 sin->sin_addr.s_addr = rs->rs_bound_addr;
129 }
130
131 sin->sin_family = AF_INET;
132
133 *uaddr_len = sizeof(*sin);
134 return 0;
135}
136
137/*
138 * RDS' poll is without a doubt the least intuitive part of the interface,
139 * as POLLIN and POLLOUT do not behave entirely as you would expect from
140 * a network protocol.
141 *
142 * POLLIN is asserted if
143 * - there is data on the receive queue.
144 * - to signal that a previously congested destination may have become
145 * uncongested
146 * - A notification has been queued to the socket (this can be a congestion
147 * update, or a RDMA completion).
148 *
149 * POLLOUT is asserted if there is room on the send queue. This does not mean
150 * however, that the next sendmsg() call will succeed. If the application tries
151 * to send to a congested destination, the system call may still fail (and
152 * return ENOBUFS).
153 */
154static unsigned int rds_poll(struct file *file, struct socket *sock,
155 poll_table *wait)
156{
157 struct sock *sk = sock->sk;
158 struct rds_sock *rs = rds_sk_to_rs(sk);
159 unsigned int mask = 0;
160 unsigned long flags;
161
162 poll_wait(file, sk->sk_sleep, wait);
163
164 poll_wait(file, &rds_poll_waitq, wait);
165
166 read_lock_irqsave(&rs->rs_recv_lock, flags);
167 if (!rs->rs_cong_monitor) {
168 /* When a congestion map was updated, we signal POLLIN for
169 * "historical" reasons. Applications can also poll for
170 * WRBAND instead. */
171 if (rds_cong_updated_since(&rs->rs_cong_track))
172 mask |= (POLLIN | POLLRDNORM | POLLWRBAND);
173 } else {
174 spin_lock(&rs->rs_lock);
175 if (rs->rs_cong_notify)
176 mask |= (POLLIN | POLLRDNORM);
177 spin_unlock(&rs->rs_lock);
178 }
179 if (!list_empty(&rs->rs_recv_queue)
180 || !list_empty(&rs->rs_notify_queue))
181 mask |= (POLLIN | POLLRDNORM);
182 if (rs->rs_snd_bytes < rds_sk_sndbuf(rs))
183 mask |= (POLLOUT | POLLWRNORM);
184 read_unlock_irqrestore(&rs->rs_recv_lock, flags);
185
186 return mask;
187}
188
189static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
190{
191 return -ENOIOCTLCMD;
192}
193
194static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval,
195 int len)
196{
197 struct sockaddr_in sin;
198 int ret = 0;
199
200 /* racing with another thread binding seems ok here */
201 if (rs->rs_bound_addr == 0) {
202 ret = -ENOTCONN; /* XXX not a great errno */
203 goto out;
204 }
205
206 if (len < sizeof(struct sockaddr_in)) {
207 ret = -EINVAL;
208 goto out;
209 }
210
211 if (copy_from_user(&sin, optval, sizeof(sin))) {
212 ret = -EFAULT;
213 goto out;
214 }
215
216 rds_send_drop_to(rs, &sin);
217out:
218 return ret;
219}
220
221static int rds_set_bool_option(unsigned char *optvar, char __user *optval,
222 int optlen)
223{
224 int value;
225
226 if (optlen < sizeof(int))
227 return -EINVAL;
228 if (get_user(value, (int __user *) optval))
229 return -EFAULT;
230 *optvar = !!value;
231 return 0;
232}
233
234static int rds_cong_monitor(struct rds_sock *rs, char __user *optval,
235 int optlen)
236{
237 int ret;
238
239 ret = rds_set_bool_option(&rs->rs_cong_monitor, optval, optlen);
240 if (ret == 0) {
241 if (rs->rs_cong_monitor) {
242 rds_cong_add_socket(rs);
243 } else {
244 rds_cong_remove_socket(rs);
245 rs->rs_cong_mask = 0;
246 rs->rs_cong_notify = 0;
247 }
248 }
249 return ret;
250}
251
252static int rds_setsockopt(struct socket *sock, int level, int optname,
253 char __user *optval, int optlen)
254{
255 struct rds_sock *rs = rds_sk_to_rs(sock->sk);
256 int ret;
257
258 if (level != SOL_RDS) {
259 ret = -ENOPROTOOPT;
260 goto out;
261 }
262
263 switch (optname) {
264 case RDS_CANCEL_SENT_TO:
265 ret = rds_cancel_sent_to(rs, optval, optlen);
266 break;
267 case RDS_GET_MR:
268 ret = rds_get_mr(rs, optval, optlen);
269 break;
270 case RDS_FREE_MR:
271 ret = rds_free_mr(rs, optval, optlen);
272 break;
273 case RDS_RECVERR:
274 ret = rds_set_bool_option(&rs->rs_recverr, optval, optlen);
275 break;
276 case RDS_CONG_MONITOR:
277 ret = rds_cong_monitor(rs, optval, optlen);
278 break;
279 default:
280 ret = -ENOPROTOOPT;
281 }
282out:
283 return ret;
284}
285
286static int rds_getsockopt(struct socket *sock, int level, int optname,
287 char __user *optval, int __user *optlen)
288{
289 struct rds_sock *rs = rds_sk_to_rs(sock->sk);
290 int ret = -ENOPROTOOPT, len;
291
292 if (level != SOL_RDS)
293 goto out;
294
295 if (get_user(len, optlen)) {
296 ret = -EFAULT;
297 goto out;
298 }
299
300 switch (optname) {
301 case RDS_INFO_FIRST ... RDS_INFO_LAST:
302 ret = rds_info_getsockopt(sock, optname, optval,
303 optlen);
304 break;
305
306 case RDS_RECVERR:
307 if (len < sizeof(int))
308 ret = -EINVAL;
309 else
310 if (put_user(rs->rs_recverr, (int __user *) optval)
311 || put_user(sizeof(int), optlen))
312 ret = -EFAULT;
313 else
314 ret = 0;
315 break;
316 default:
317 break;
318 }
319
320out:
321 return ret;
322
323}
324
325static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
326 int addr_len, int flags)
327{
328 struct sock *sk = sock->sk;
329 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
330 struct rds_sock *rs = rds_sk_to_rs(sk);
331 int ret = 0;
332
333 lock_sock(sk);
334
335 if (addr_len != sizeof(struct sockaddr_in)) {
336 ret = -EINVAL;
337 goto out;
338 }
339
340 if (sin->sin_family != AF_INET) {
341 ret = -EAFNOSUPPORT;
342 goto out;
343 }
344
345 if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
346 ret = -EDESTADDRREQ;
347 goto out;
348 }
349
350 rs->rs_conn_addr = sin->sin_addr.s_addr;
351 rs->rs_conn_port = sin->sin_port;
352
353out:
354 release_sock(sk);
355 return ret;
356}
357
358static struct proto rds_proto = {
359 .name = "RDS",
360 .owner = THIS_MODULE,
361 .obj_size = sizeof(struct rds_sock),
362};
363
364static struct proto_ops rds_proto_ops = {
365 .family = AF_RDS,
366 .owner = THIS_MODULE,
367 .release = rds_release,
368 .bind = rds_bind,
369 .connect = rds_connect,
370 .socketpair = sock_no_socketpair,
371 .accept = sock_no_accept,
372 .getname = rds_getname,
373 .poll = rds_poll,
374 .ioctl = rds_ioctl,
375 .listen = sock_no_listen,
376 .shutdown = sock_no_shutdown,
377 .setsockopt = rds_setsockopt,
378 .getsockopt = rds_getsockopt,
379 .sendmsg = rds_sendmsg,
380 .recvmsg = rds_recvmsg,
381 .mmap = sock_no_mmap,
382 .sendpage = sock_no_sendpage,
383};
384
385static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
386{
387 unsigned long flags;
388 struct rds_sock *rs;
389
390 sock_init_data(sock, sk);
391 sock->ops = &rds_proto_ops;
392 sk->sk_protocol = protocol;
393
394 rs = rds_sk_to_rs(sk);
395 spin_lock_init(&rs->rs_lock);
396 rwlock_init(&rs->rs_recv_lock);
397 INIT_LIST_HEAD(&rs->rs_send_queue);
398 INIT_LIST_HEAD(&rs->rs_recv_queue);
399 INIT_LIST_HEAD(&rs->rs_notify_queue);
400 INIT_LIST_HEAD(&rs->rs_cong_list);
401 spin_lock_init(&rs->rs_rdma_lock);
402 rs->rs_rdma_keys = RB_ROOT;
403
404 spin_lock_irqsave(&rds_sock_lock, flags);
405 list_add_tail(&rs->rs_item, &rds_sock_list);
406 rds_sock_count++;
407 spin_unlock_irqrestore(&rds_sock_lock, flags);
408
409 return 0;
410}
411
412static int rds_create(struct net *net, struct socket *sock, int protocol)
413{
414 struct sock *sk;
415
416 if (sock->type != SOCK_SEQPACKET || protocol)
417 return -ESOCKTNOSUPPORT;
418
419 sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto);
420 if (!sk)
421 return -ENOMEM;
422
423 return __rds_create(sock, sk, protocol);
424}
425
426void rds_sock_addref(struct rds_sock *rs)
427{
428 sock_hold(rds_rs_to_sk(rs));
429}
430
431void rds_sock_put(struct rds_sock *rs)
432{
433 sock_put(rds_rs_to_sk(rs));
434}
435
436static struct net_proto_family rds_family_ops = {
437 .family = AF_RDS,
438 .create = rds_create,
439 .owner = THIS_MODULE,
440};
441
442static void rds_sock_inc_info(struct socket *sock, unsigned int len,
443 struct rds_info_iterator *iter,
444 struct rds_info_lengths *lens)
445{
446 struct rds_sock *rs;
447 struct sock *sk;
448 struct rds_incoming *inc;
449 unsigned long flags;
450 unsigned int total = 0;
451
452 len /= sizeof(struct rds_info_message);
453
454 spin_lock_irqsave(&rds_sock_lock, flags);
455
456 list_for_each_entry(rs, &rds_sock_list, rs_item) {
457 sk = rds_rs_to_sk(rs);
458 read_lock(&rs->rs_recv_lock);
459
460 /* XXX too lazy to maintain counts.. */
461 list_for_each_entry(inc, &rs->rs_recv_queue, i_item) {
462 total++;
463 if (total <= len)
464 rds_inc_info_copy(inc, iter, inc->i_saddr,
465 rs->rs_bound_addr, 1);
466 }
467
468 read_unlock(&rs->rs_recv_lock);
469 }
470
471 spin_unlock_irqrestore(&rds_sock_lock, flags);
472
473 lens->nr = total;
474 lens->each = sizeof(struct rds_info_message);
475}
476
477static void rds_sock_info(struct socket *sock, unsigned int len,
478 struct rds_info_iterator *iter,
479 struct rds_info_lengths *lens)
480{
481 struct rds_info_socket sinfo;
482 struct rds_sock *rs;
483 unsigned long flags;
484
485 len /= sizeof(struct rds_info_socket);
486
487 spin_lock_irqsave(&rds_sock_lock, flags);
488
489 if (len < rds_sock_count)
490 goto out;
491
492 list_for_each_entry(rs, &rds_sock_list, rs_item) {
493 sinfo.sndbuf = rds_sk_sndbuf(rs);
494 sinfo.rcvbuf = rds_sk_rcvbuf(rs);
495 sinfo.bound_addr = rs->rs_bound_addr;
496 sinfo.connected_addr = rs->rs_conn_addr;
497 sinfo.bound_port = rs->rs_bound_port;
498 sinfo.connected_port = rs->rs_conn_port;
499 sinfo.inum = sock_i_ino(rds_rs_to_sk(rs));
500
501 rds_info_copy(iter, &sinfo, sizeof(sinfo));
502 }
503
504out:
505 lens->nr = rds_sock_count;
506 lens->each = sizeof(struct rds_info_socket);
507
508 spin_unlock_irqrestore(&rds_sock_lock, flags);
509}
510
511static void __exit rds_exit(void)
512{
513 rds_rdma_exit();
514 sock_unregister(rds_family_ops.family);
515 proto_unregister(&rds_proto);
516 rds_conn_exit();
517 rds_cong_exit();
518 rds_sysctl_exit();
519 rds_threads_exit();
520 rds_stats_exit();
521 rds_page_exit();
522 rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info);
523 rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
524}
525module_exit(rds_exit);
526
527static int __init rds_init(void)
528{
529 int ret;
530
531 ret = rds_conn_init();
532 if (ret)
533 goto out;
534 ret = rds_threads_init();
535 if (ret)
536 goto out_conn;
537 ret = rds_sysctl_init();
538 if (ret)
539 goto out_threads;
540 ret = rds_stats_init();
541 if (ret)
542 goto out_sysctl;
543 ret = proto_register(&rds_proto, 1);
544 if (ret)
545 goto out_stats;
546 ret = sock_register(&rds_family_ops);
547 if (ret)
548 goto out_proto;
549
550 rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info);
551 rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info);
552
553 /* ib/iwarp transports currently compiled-in */
554 ret = rds_rdma_init();
555 if (ret)
556 goto out_sock;
557 goto out;
558
559out_sock:
560 sock_unregister(rds_family_ops.family);
561out_proto:
562 proto_unregister(&rds_proto);
563out_stats:
564 rds_stats_exit();
565out_sysctl:
566 rds_sysctl_exit();
567out_threads:
568 rds_threads_exit();
569out_conn:
570 rds_conn_exit();
571 rds_cong_exit();
572 rds_page_exit();
573out:
574 return ret;
575}
576module_init(rds_init);
577
578#define DRV_VERSION "4.0"
579#define DRV_RELDATE "Feb 12, 2009"
580
581MODULE_AUTHOR("Oracle Corporation <rds-devel@oss.oracle.com>");
582MODULE_DESCRIPTION("RDS: Reliable Datagram Sockets"
583 " v" DRV_VERSION " (" DRV_RELDATE ")");
584MODULE_VERSION(DRV_VERSION);
585MODULE_LICENSE("Dual BSD/GPL");
586MODULE_ALIAS_NETPROTO(PF_RDS);
diff --git a/net/rds/bind.c b/net/rds/bind.c
new file mode 100644
index 000000000000..c17cc39160ce
--- /dev/null
+++ b/net/rds/bind.c
@@ -0,0 +1,199 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <net/sock.h>
35#include <linux/in.h>
36#include <linux/if_arp.h>
37#include "rds.h"
38
39/*
40 * XXX this probably still needs more work.. no INADDR_ANY, and rbtrees aren't
41 * particularly zippy.
42 *
43 * This is now called for every incoming frame so we arguably care much more
44 * about it than we used to.
45 */
46static DEFINE_SPINLOCK(rds_bind_lock);
47static struct rb_root rds_bind_tree = RB_ROOT;
48
49static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port,
50 struct rds_sock *insert)
51{
52 struct rb_node **p = &rds_bind_tree.rb_node;
53 struct rb_node *parent = NULL;
54 struct rds_sock *rs;
55 u64 cmp;
56 u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
57
58 while (*p) {
59 parent = *p;
60 rs = rb_entry(parent, struct rds_sock, rs_bound_node);
61
62 cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
63 be16_to_cpu(rs->rs_bound_port);
64
65 if (needle < cmp)
66 p = &(*p)->rb_left;
67 else if (needle > cmp)
68 p = &(*p)->rb_right;
69 else
70 return rs;
71 }
72
73 if (insert) {
74 rb_link_node(&insert->rs_bound_node, parent, p);
75 rb_insert_color(&insert->rs_bound_node, &rds_bind_tree);
76 }
77 return NULL;
78}
79
80/*
81 * Return the rds_sock bound at the given local address.
82 *
83 * The rx path can race with rds_release. We notice if rds_release() has
84 * marked this socket and don't return a rs ref to the rx path.
85 */
86struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
87{
88 struct rds_sock *rs;
89 unsigned long flags;
90
91 spin_lock_irqsave(&rds_bind_lock, flags);
92 rs = rds_bind_tree_walk(addr, port, NULL);
93 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
94 rds_sock_addref(rs);
95 else
96 rs = NULL;
97 spin_unlock_irqrestore(&rds_bind_lock, flags);
98
99 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
100 ntohs(port));
101 return rs;
102}
103
104/* returns -ve errno or +ve port */
105static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
106{
107 unsigned long flags;
108 int ret = -EADDRINUSE;
109 u16 rover, last;
110
111 if (*port != 0) {
112 rover = be16_to_cpu(*port);
113 last = rover;
114 } else {
115 rover = max_t(u16, net_random(), 2);
116 last = rover - 1;
117 }
118
119 spin_lock_irqsave(&rds_bind_lock, flags);
120
121 do {
122 if (rover == 0)
123 rover++;
124 if (rds_bind_tree_walk(addr, cpu_to_be16(rover), rs) == NULL) {
125 *port = cpu_to_be16(rover);
126 ret = 0;
127 break;
128 }
129 } while (rover++ != last);
130
131 if (ret == 0) {
132 rs->rs_bound_addr = addr;
133 rs->rs_bound_port = *port;
134 rds_sock_addref(rs);
135
136 rdsdebug("rs %p binding to %pI4:%d\n",
137 rs, &addr, (int)ntohs(*port));
138 }
139
140 spin_unlock_irqrestore(&rds_bind_lock, flags);
141
142 return ret;
143}
144
145void rds_remove_bound(struct rds_sock *rs)
146{
147 unsigned long flags;
148
149 spin_lock_irqsave(&rds_bind_lock, flags);
150
151 if (rs->rs_bound_addr) {
152 rdsdebug("rs %p unbinding from %pI4:%d\n",
153 rs, &rs->rs_bound_addr,
154 ntohs(rs->rs_bound_port));
155
156 rb_erase(&rs->rs_bound_node, &rds_bind_tree);
157 rds_sock_put(rs);
158 rs->rs_bound_addr = 0;
159 }
160
161 spin_unlock_irqrestore(&rds_bind_lock, flags);
162}
163
164int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
165{
166 struct sock *sk = sock->sk;
167 struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
168 struct rds_sock *rs = rds_sk_to_rs(sk);
169 struct rds_transport *trans;
170 int ret = 0;
171
172 lock_sock(sk);
173
174 if (addr_len != sizeof(struct sockaddr_in) ||
175 sin->sin_family != AF_INET ||
176 rs->rs_bound_addr ||
177 sin->sin_addr.s_addr == htonl(INADDR_ANY)) {
178 ret = -EINVAL;
179 goto out;
180 }
181
182 ret = rds_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port);
183 if (ret)
184 goto out;
185
186 trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
187 if (trans == NULL) {
188 ret = -EADDRNOTAVAIL;
189 rds_remove_bound(rs);
190 goto out;
191 }
192
193 rs->rs_transport = trans;
194 ret = 0;
195
196out:
197 release_sock(sk);
198 return ret;
199}
diff --git a/net/rds/cong.c b/net/rds/cong.c
new file mode 100644
index 000000000000..710e4599d76c
--- /dev/null
+++ b/net/rds/cong.c
@@ -0,0 +1,404 @@
1/*
2 * Copyright (c) 2007 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/types.h>
34#include <linux/rbtree.h>
35
36#include <asm-generic/bitops/le.h>
37
38#include "rds.h"
39
40/*
41 * This file implements the receive side of the unconventional congestion
42 * management in RDS.
43 *
44 * Messages waiting in the receive queue on the receiving socket are accounted
45 * against the sockets SO_RCVBUF option value. Only the payload bytes in the
46 * message are accounted for. If the number of bytes queued equals or exceeds
47 * rcvbuf then the socket is congested. All sends attempted to this socket's
48 * address should return block or return -EWOULDBLOCK.
49 *
50 * Applications are expected to be reasonably tuned such that this situation
51 * very rarely occurs. An application encountering this "back-pressure" is
52 * considered a bug.
53 *
54 * This is implemented by having each node maintain bitmaps which indicate
55 * which ports on bound addresses are congested. As the bitmap changes it is
56 * sent through all the connections which terminate in the local address of the
57 * bitmap which changed.
58 *
59 * The bitmaps are allocated as connections are brought up. This avoids
60 * allocation in the interrupt handling path which queues messages on sockets.
61 * The dense bitmaps let transports send the entire bitmap on any bitmap change
62 * reasonably efficiently. This is much easier to implement than some
63 * finer-grained communication of per-port congestion. The sender does a very
64 * inexpensive bit test to test if the port it's about to send to is congested
65 * or not.
66 */
67
68/*
69 * Interaction with poll is a tad tricky. We want all processes stuck in
70 * poll to wake up and check whether a congested destination became uncongested.
71 * The really sad thing is we have no idea which destinations the application
72 * wants to send to - we don't even know which rds_connections are involved.
73 * So until we implement a more flexible rds poll interface, we have to make
74 * do with this:
75 * We maintain a global counter that is incremented each time a congestion map
76 * update is received. Each rds socket tracks this value, and if rds_poll
77 * finds that the saved generation number is smaller than the global generation
78 * number, it wakes up the process.
79 */
80static atomic_t rds_cong_generation = ATOMIC_INIT(0);
81
82/*
83 * Congestion monitoring
84 */
85static LIST_HEAD(rds_cong_monitor);
86static DEFINE_RWLOCK(rds_cong_monitor_lock);
87
88/*
89 * Yes, a global lock. It's used so infrequently that it's worth keeping it
90 * global to simplify the locking. It's only used in the following
91 * circumstances:
92 *
93 * - on connection buildup to associate a conn with its maps
94 * - on map changes to inform conns of a new map to send
95 *
96 * It's sadly ordered under the socket callback lock and the connection lock.
97 * Receive paths can mark ports congested from interrupt context so the
98 * lock masks interrupts.
99 */
100static DEFINE_SPINLOCK(rds_cong_lock);
101static struct rb_root rds_cong_tree = RB_ROOT;
102
103static struct rds_cong_map *rds_cong_tree_walk(__be32 addr,
104 struct rds_cong_map *insert)
105{
106 struct rb_node **p = &rds_cong_tree.rb_node;
107 struct rb_node *parent = NULL;
108 struct rds_cong_map *map;
109
110 while (*p) {
111 parent = *p;
112 map = rb_entry(parent, struct rds_cong_map, m_rb_node);
113
114 if (addr < map->m_addr)
115 p = &(*p)->rb_left;
116 else if (addr > map->m_addr)
117 p = &(*p)->rb_right;
118 else
119 return map;
120 }
121
122 if (insert) {
123 rb_link_node(&insert->m_rb_node, parent, p);
124 rb_insert_color(&insert->m_rb_node, &rds_cong_tree);
125 }
126 return NULL;
127}
128
129/*
130 * There is only ever one bitmap for any address. Connections try and allocate
131 * these bitmaps in the process getting pointers to them. The bitmaps are only
132 * ever freed as the module is removed after all connections have been freed.
133 */
134static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
135{
136 struct rds_cong_map *map;
137 struct rds_cong_map *ret = NULL;
138 unsigned long zp;
139 unsigned long i;
140 unsigned long flags;
141
142 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL);
143 if (map == NULL)
144 return NULL;
145
146 map->m_addr = addr;
147 init_waitqueue_head(&map->m_waitq);
148 INIT_LIST_HEAD(&map->m_conn_list);
149
150 for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
151 zp = get_zeroed_page(GFP_KERNEL);
152 if (zp == 0)
153 goto out;
154 map->m_page_addrs[i] = zp;
155 }
156
157 spin_lock_irqsave(&rds_cong_lock, flags);
158 ret = rds_cong_tree_walk(addr, map);
159 spin_unlock_irqrestore(&rds_cong_lock, flags);
160
161 if (ret == NULL) {
162 ret = map;
163 map = NULL;
164 }
165
166out:
167 if (map) {
168 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
169 free_page(map->m_page_addrs[i]);
170 kfree(map);
171 }
172
173 rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr));
174
175 return ret;
176}
177
178/*
179 * Put the conn on its local map's list. This is called when the conn is
180 * really added to the hash. It's nested under the rds_conn_lock, sadly.
181 */
182void rds_cong_add_conn(struct rds_connection *conn)
183{
184 unsigned long flags;
185
186 rdsdebug("conn %p now on map %p\n", conn, conn->c_lcong);
187 spin_lock_irqsave(&rds_cong_lock, flags);
188 list_add_tail(&conn->c_map_item, &conn->c_lcong->m_conn_list);
189 spin_unlock_irqrestore(&rds_cong_lock, flags);
190}
191
192void rds_cong_remove_conn(struct rds_connection *conn)
193{
194 unsigned long flags;
195
196 rdsdebug("removing conn %p from map %p\n", conn, conn->c_lcong);
197 spin_lock_irqsave(&rds_cong_lock, flags);
198 list_del_init(&conn->c_map_item);
199 spin_unlock_irqrestore(&rds_cong_lock, flags);
200}
201
202int rds_cong_get_maps(struct rds_connection *conn)
203{
204 conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
205 conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
206
207 if (conn->c_lcong == NULL || conn->c_fcong == NULL)
208 return -ENOMEM;
209
210 return 0;
211}
212
213void rds_cong_queue_updates(struct rds_cong_map *map)
214{
215 struct rds_connection *conn;
216 unsigned long flags;
217
218 spin_lock_irqsave(&rds_cong_lock, flags);
219
220 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
221 if (!test_and_set_bit(0, &conn->c_map_queued)) {
222 rds_stats_inc(s_cong_update_queued);
223 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
224 }
225 }
226
227 spin_unlock_irqrestore(&rds_cong_lock, flags);
228}
229
230void rds_cong_map_updated(struct rds_cong_map *map, uint64_t portmask)
231{
232 rdsdebug("waking map %p for %pI4\n",
233 map, &map->m_addr);
234 rds_stats_inc(s_cong_update_received);
235 atomic_inc(&rds_cong_generation);
236 if (waitqueue_active(&map->m_waitq))
237 wake_up(&map->m_waitq);
238 if (waitqueue_active(&rds_poll_waitq))
239 wake_up_all(&rds_poll_waitq);
240
241 if (portmask && !list_empty(&rds_cong_monitor)) {
242 unsigned long flags;
243 struct rds_sock *rs;
244
245 read_lock_irqsave(&rds_cong_monitor_lock, flags);
246 list_for_each_entry(rs, &rds_cong_monitor, rs_cong_list) {
247 spin_lock(&rs->rs_lock);
248 rs->rs_cong_notify |= (rs->rs_cong_mask & portmask);
249 rs->rs_cong_mask &= ~portmask;
250 spin_unlock(&rs->rs_lock);
251 if (rs->rs_cong_notify)
252 rds_wake_sk_sleep(rs);
253 }
254 read_unlock_irqrestore(&rds_cong_monitor_lock, flags);
255 }
256}
257
258int rds_cong_updated_since(unsigned long *recent)
259{
260 unsigned long gen = atomic_read(&rds_cong_generation);
261
262 if (likely(*recent == gen))
263 return 0;
264 *recent = gen;
265 return 1;
266}
267
268/*
269 * We're called under the locking that protects the sockets receive buffer
270 * consumption. This makes it a lot easier for the caller to only call us
271 * when it knows that an existing set bit needs to be cleared, and vice versa.
272 * We can't block and we need to deal with concurrent sockets working against
273 * the same per-address map.
274 */
275void rds_cong_set_bit(struct rds_cong_map *map, __be16 port)
276{
277 unsigned long i;
278 unsigned long off;
279
280 rdsdebug("setting congestion for %pI4:%u in map %p\n",
281 &map->m_addr, ntohs(port), map);
282
283 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
284 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
285
286 generic___set_le_bit(off, (void *)map->m_page_addrs[i]);
287}
288
289void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port)
290{
291 unsigned long i;
292 unsigned long off;
293
294 rdsdebug("clearing congestion for %pI4:%u in map %p\n",
295 &map->m_addr, ntohs(port), map);
296
297 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
298 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
299
300 generic___clear_le_bit(off, (void *)map->m_page_addrs[i]);
301}
302
303static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port)
304{
305 unsigned long i;
306 unsigned long off;
307
308 i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS;
309 off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS;
310
311 return generic_test_le_bit(off, (void *)map->m_page_addrs[i]);
312}
313
314void rds_cong_add_socket(struct rds_sock *rs)
315{
316 unsigned long flags;
317
318 write_lock_irqsave(&rds_cong_monitor_lock, flags);
319 if (list_empty(&rs->rs_cong_list))
320 list_add(&rs->rs_cong_list, &rds_cong_monitor);
321 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
322}
323
324void rds_cong_remove_socket(struct rds_sock *rs)
325{
326 unsigned long flags;
327 struct rds_cong_map *map;
328
329 write_lock_irqsave(&rds_cong_monitor_lock, flags);
330 list_del_init(&rs->rs_cong_list);
331 write_unlock_irqrestore(&rds_cong_monitor_lock, flags);
332
333 /* update congestion map for now-closed port */
334 spin_lock_irqsave(&rds_cong_lock, flags);
335 map = rds_cong_tree_walk(rs->rs_bound_addr, NULL);
336 spin_unlock_irqrestore(&rds_cong_lock, flags);
337
338 if (map && rds_cong_test_bit(map, rs->rs_bound_port)) {
339 rds_cong_clear_bit(map, rs->rs_bound_port);
340 rds_cong_queue_updates(map);
341 }
342}
343
344int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock,
345 struct rds_sock *rs)
346{
347 if (!rds_cong_test_bit(map, port))
348 return 0;
349 if (nonblock) {
350 if (rs && rs->rs_cong_monitor) {
351 unsigned long flags;
352
353 /* It would have been nice to have an atomic set_bit on
354 * a uint64_t. */
355 spin_lock_irqsave(&rs->rs_lock, flags);
356 rs->rs_cong_mask |= RDS_CONG_MONITOR_MASK(ntohs(port));
357 spin_unlock_irqrestore(&rs->rs_lock, flags);
358
359 /* Test again - a congestion update may have arrived in
360 * the meantime. */
361 if (!rds_cong_test_bit(map, port))
362 return 0;
363 }
364 rds_stats_inc(s_cong_send_error);
365 return -ENOBUFS;
366 }
367
368 rds_stats_inc(s_cong_send_blocked);
369 rdsdebug("waiting on map %p for port %u\n", map, be16_to_cpu(port));
370
371 return wait_event_interruptible(map->m_waitq,
372 !rds_cong_test_bit(map, port));
373}
374
375void rds_cong_exit(void)
376{
377 struct rb_node *node;
378 struct rds_cong_map *map;
379 unsigned long i;
380
381 while ((node = rb_first(&rds_cong_tree))) {
382 map = rb_entry(node, struct rds_cong_map, m_rb_node);
383 rdsdebug("freeing map %p\n", map);
384 rb_erase(&map->m_rb_node, &rds_cong_tree);
385 for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++)
386 free_page(map->m_page_addrs[i]);
387 kfree(map);
388 }
389}
390
391/*
392 * Allocate a RDS message containing a congestion update.
393 */
394struct rds_message *rds_cong_update_alloc(struct rds_connection *conn)
395{
396 struct rds_cong_map *map = conn->c_lcong;
397 struct rds_message *rm;
398
399 rm = rds_message_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES);
400 if (!IS_ERR(rm))
401 rm->m_inc.i_hdr.h_flags = RDS_FLAG_CONG_BITMAP;
402
403 return rm;
404}
diff --git a/net/rds/connection.c b/net/rds/connection.c
new file mode 100644
index 000000000000..273f064930a8
--- /dev/null
+++ b/net/rds/connection.c
@@ -0,0 +1,487 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/list.h>
35#include <net/inet_hashtables.h>
36
37#include "rds.h"
38#include "loop.h"
39#include "rdma.h"
40
41#define RDS_CONNECTION_HASH_BITS 12
42#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS)
43#define RDS_CONNECTION_HASH_MASK (RDS_CONNECTION_HASH_ENTRIES - 1)
44
45/* converting this to RCU is a chore for another day.. */
46static DEFINE_SPINLOCK(rds_conn_lock);
47static unsigned long rds_conn_count;
48static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES];
49static struct kmem_cache *rds_conn_slab;
50
51static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
52{
53 /* Pass NULL, don't need struct net for hash */
54 unsigned long hash = inet_ehashfn(NULL,
55 be32_to_cpu(laddr), 0,
56 be32_to_cpu(faddr), 0);
57 return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK];
58}
59
60#define rds_conn_info_set(var, test, suffix) do { \
61 if (test) \
62 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \
63} while (0)
64
65static inline int rds_conn_is_sending(struct rds_connection *conn)
66{
67 int ret = 0;
68
69 if (!mutex_trylock(&conn->c_send_lock))
70 ret = 1;
71 else
72 mutex_unlock(&conn->c_send_lock);
73
74 return ret;
75}
76
77static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
78 __be32 laddr, __be32 faddr,
79 struct rds_transport *trans)
80{
81 struct rds_connection *conn, *ret = NULL;
82 struct hlist_node *pos;
83
84 hlist_for_each_entry(conn, pos, head, c_hash_node) {
85 if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
86 conn->c_trans == trans) {
87 ret = conn;
88 break;
89 }
90 }
91 rdsdebug("returning conn %p for %pI4 -> %pI4\n", ret,
92 &laddr, &faddr);
93 return ret;
94}
95
96/*
97 * This is called by transports as they're bringing down a connection.
98 * It clears partial message state so that the transport can start sending
99 * and receiving over this connection again in the future. It is up to
100 * the transport to have serialized this call with its send and recv.
101 */
102void rds_conn_reset(struct rds_connection *conn)
103{
104 rdsdebug("connection %pI4 to %pI4 reset\n",
105 &conn->c_laddr, &conn->c_faddr);
106
107 rds_stats_inc(s_conn_reset);
108 rds_send_reset(conn);
109 conn->c_flags = 0;
110
111 /* Do not clear next_rx_seq here, else we cannot distinguish
112 * retransmitted packets from new packets, and will hand all
113 * of them to the application. That is not consistent with the
114 * reliability guarantees of RDS. */
115}
116
117/*
118 * There is only every one 'conn' for a given pair of addresses in the
119 * system at a time. They contain messages to be retransmitted and so
120 * span the lifetime of the actual underlying transport connections.
121 *
122 * For now they are not garbage collected once they're created. They
123 * are torn down as the module is removed, if ever.
124 */
125static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
126 struct rds_transport *trans, gfp_t gfp,
127 int is_outgoing)
128{
129 struct rds_connection *conn, *tmp, *parent = NULL;
130 struct hlist_head *head = rds_conn_bucket(laddr, faddr);
131 unsigned long flags;
132 int ret;
133
134 spin_lock_irqsave(&rds_conn_lock, flags);
135 conn = rds_conn_lookup(head, laddr, faddr, trans);
136 if (conn
137 && conn->c_loopback
138 && conn->c_trans != &rds_loop_transport
139 && !is_outgoing) {
140 /* This is a looped back IB connection, and we're
141 * called by the code handling the incoming connect.
142 * We need a second connection object into which we
143 * can stick the other QP. */
144 parent = conn;
145 conn = parent->c_passive;
146 }
147 spin_unlock_irqrestore(&rds_conn_lock, flags);
148 if (conn)
149 goto out;
150
151 conn = kmem_cache_alloc(rds_conn_slab, gfp);
152 if (conn == NULL) {
153 conn = ERR_PTR(-ENOMEM);
154 goto out;
155 }
156
157 memset(conn, 0, sizeof(*conn));
158
159 INIT_HLIST_NODE(&conn->c_hash_node);
160 conn->c_version = RDS_PROTOCOL_3_0;
161 conn->c_laddr = laddr;
162 conn->c_faddr = faddr;
163 spin_lock_init(&conn->c_lock);
164 conn->c_next_tx_seq = 1;
165
166 mutex_init(&conn->c_send_lock);
167 INIT_LIST_HEAD(&conn->c_send_queue);
168 INIT_LIST_HEAD(&conn->c_retrans);
169
170 ret = rds_cong_get_maps(conn);
171 if (ret) {
172 kmem_cache_free(rds_conn_slab, conn);
173 conn = ERR_PTR(ret);
174 goto out;
175 }
176
177 /*
178 * This is where a connection becomes loopback. If *any* RDS sockets
179 * can bind to the destination address then we'd rather the messages
180 * flow through loopback rather than either transport.
181 */
182 if (rds_trans_get_preferred(faddr)) {
183 conn->c_loopback = 1;
184 if (is_outgoing && trans->t_prefer_loopback) {
185 /* "outgoing" connection - and the transport
186 * says it wants the connection handled by the
187 * loopback transport. This is what TCP does.
188 */
189 trans = &rds_loop_transport;
190 }
191 }
192
193 conn->c_trans = trans;
194
195 ret = trans->conn_alloc(conn, gfp);
196 if (ret) {
197 kmem_cache_free(rds_conn_slab, conn);
198 conn = ERR_PTR(ret);
199 goto out;
200 }
201
202 atomic_set(&conn->c_state, RDS_CONN_DOWN);
203 conn->c_reconnect_jiffies = 0;
204 INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
205 INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
206 INIT_DELAYED_WORK(&conn->c_conn_w, rds_connect_worker);
207 INIT_WORK(&conn->c_down_w, rds_shutdown_worker);
208 mutex_init(&conn->c_cm_lock);
209 conn->c_flags = 0;
210
211 rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",
212 conn, &laddr, &faddr,
213 trans->t_name ? trans->t_name : "[unknown]",
214 is_outgoing ? "(outgoing)" : "");
215
216 spin_lock_irqsave(&rds_conn_lock, flags);
217 if (parent == NULL) {
218 tmp = rds_conn_lookup(head, laddr, faddr, trans);
219 if (tmp == NULL)
220 hlist_add_head(&conn->c_hash_node, head);
221 } else {
222 tmp = parent->c_passive;
223 if (!tmp)
224 parent->c_passive = conn;
225 }
226
227 if (tmp) {
228 trans->conn_free(conn->c_transport_data);
229 kmem_cache_free(rds_conn_slab, conn);
230 conn = tmp;
231 } else {
232 rds_cong_add_conn(conn);
233 rds_conn_count++;
234 }
235
236 spin_unlock_irqrestore(&rds_conn_lock, flags);
237
238out:
239 return conn;
240}
241
242struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
243 struct rds_transport *trans, gfp_t gfp)
244{
245 return __rds_conn_create(laddr, faddr, trans, gfp, 0);
246}
247
248struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
249 struct rds_transport *trans, gfp_t gfp)
250{
251 return __rds_conn_create(laddr, faddr, trans, gfp, 1);
252}
253
254void rds_conn_destroy(struct rds_connection *conn)
255{
256 struct rds_message *rm, *rtmp;
257
258 rdsdebug("freeing conn %p for %pI4 -> "
259 "%pI4\n", conn, &conn->c_laddr,
260 &conn->c_faddr);
261
262 hlist_del_init(&conn->c_hash_node);
263
264 /* wait for the rds thread to shut it down */
265 atomic_set(&conn->c_state, RDS_CONN_ERROR);
266 cancel_delayed_work(&conn->c_conn_w);
267 queue_work(rds_wq, &conn->c_down_w);
268 flush_workqueue(rds_wq);
269
270 /* tear down queued messages */
271 list_for_each_entry_safe(rm, rtmp,
272 &conn->c_send_queue,
273 m_conn_item) {
274 list_del_init(&rm->m_conn_item);
275 BUG_ON(!list_empty(&rm->m_sock_item));
276 rds_message_put(rm);
277 }
278 if (conn->c_xmit_rm)
279 rds_message_put(conn->c_xmit_rm);
280
281 conn->c_trans->conn_free(conn->c_transport_data);
282
283 /*
284 * The congestion maps aren't freed up here. They're
285 * freed by rds_cong_exit() after all the connections
286 * have been freed.
287 */
288 rds_cong_remove_conn(conn);
289
290 BUG_ON(!list_empty(&conn->c_retrans));
291 kmem_cache_free(rds_conn_slab, conn);
292
293 rds_conn_count--;
294}
295
296static void rds_conn_message_info(struct socket *sock, unsigned int len,
297 struct rds_info_iterator *iter,
298 struct rds_info_lengths *lens,
299 int want_send)
300{
301 struct hlist_head *head;
302 struct hlist_node *pos;
303 struct list_head *list;
304 struct rds_connection *conn;
305 struct rds_message *rm;
306 unsigned long flags;
307 unsigned int total = 0;
308 size_t i;
309
310 len /= sizeof(struct rds_info_message);
311
312 spin_lock_irqsave(&rds_conn_lock, flags);
313
314 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
315 i++, head++) {
316 hlist_for_each_entry(conn, pos, head, c_hash_node) {
317 if (want_send)
318 list = &conn->c_send_queue;
319 else
320 list = &conn->c_retrans;
321
322 spin_lock(&conn->c_lock);
323
324 /* XXX too lazy to maintain counts.. */
325 list_for_each_entry(rm, list, m_conn_item) {
326 total++;
327 if (total <= len)
328 rds_inc_info_copy(&rm->m_inc, iter,
329 conn->c_laddr,
330 conn->c_faddr, 0);
331 }
332
333 spin_unlock(&conn->c_lock);
334 }
335 }
336
337 spin_unlock_irqrestore(&rds_conn_lock, flags);
338
339 lens->nr = total;
340 lens->each = sizeof(struct rds_info_message);
341}
342
343static void rds_conn_message_info_send(struct socket *sock, unsigned int len,
344 struct rds_info_iterator *iter,
345 struct rds_info_lengths *lens)
346{
347 rds_conn_message_info(sock, len, iter, lens, 1);
348}
349
350static void rds_conn_message_info_retrans(struct socket *sock,
351 unsigned int len,
352 struct rds_info_iterator *iter,
353 struct rds_info_lengths *lens)
354{
355 rds_conn_message_info(sock, len, iter, lens, 0);
356}
357
358void rds_for_each_conn_info(struct socket *sock, unsigned int len,
359 struct rds_info_iterator *iter,
360 struct rds_info_lengths *lens,
361 int (*visitor)(struct rds_connection *, void *),
362 size_t item_len)
363{
364 uint64_t buffer[(item_len + 7) / 8];
365 struct hlist_head *head;
366 struct hlist_node *pos;
367 struct hlist_node *tmp;
368 struct rds_connection *conn;
369 unsigned long flags;
370 size_t i;
371
372 spin_lock_irqsave(&rds_conn_lock, flags);
373
374 lens->nr = 0;
375 lens->each = item_len;
376
377 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
378 i++, head++) {
379 hlist_for_each_entry_safe(conn, pos, tmp, head, c_hash_node) {
380
381 /* XXX no c_lock usage.. */
382 if (!visitor(conn, buffer))
383 continue;
384
385 /* We copy as much as we can fit in the buffer,
386 * but we count all items so that the caller
387 * can resize the buffer. */
388 if (len >= item_len) {
389 rds_info_copy(iter, buffer, item_len);
390 len -= item_len;
391 }
392 lens->nr++;
393 }
394 }
395
396 spin_unlock_irqrestore(&rds_conn_lock, flags);
397}
398
399static int rds_conn_info_visitor(struct rds_connection *conn,
400 void *buffer)
401{
402 struct rds_info_connection *cinfo = buffer;
403
404 cinfo->next_tx_seq = conn->c_next_tx_seq;
405 cinfo->next_rx_seq = conn->c_next_rx_seq;
406 cinfo->laddr = conn->c_laddr;
407 cinfo->faddr = conn->c_faddr;
408 strncpy(cinfo->transport, conn->c_trans->t_name,
409 sizeof(cinfo->transport));
410 cinfo->flags = 0;
411
412 rds_conn_info_set(cinfo->flags,
413 rds_conn_is_sending(conn), SENDING);
414 /* XXX Future: return the state rather than these funky bits */
415 rds_conn_info_set(cinfo->flags,
416 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING,
417 CONNECTING);
418 rds_conn_info_set(cinfo->flags,
419 atomic_read(&conn->c_state) == RDS_CONN_UP,
420 CONNECTED);
421 return 1;
422}
423
424static void rds_conn_info(struct socket *sock, unsigned int len,
425 struct rds_info_iterator *iter,
426 struct rds_info_lengths *lens)
427{
428 rds_for_each_conn_info(sock, len, iter, lens,
429 rds_conn_info_visitor,
430 sizeof(struct rds_info_connection));
431}
432
433int __init rds_conn_init(void)
434{
435 rds_conn_slab = kmem_cache_create("rds_connection",
436 sizeof(struct rds_connection),
437 0, 0, NULL);
438 if (rds_conn_slab == NULL)
439 return -ENOMEM;
440
441 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info);
442 rds_info_register_func(RDS_INFO_SEND_MESSAGES,
443 rds_conn_message_info_send);
444 rds_info_register_func(RDS_INFO_RETRANS_MESSAGES,
445 rds_conn_message_info_retrans);
446
447 return 0;
448}
449
450void rds_conn_exit(void)
451{
452 rds_loop_exit();
453
454 WARN_ON(!hlist_empty(rds_conn_hash));
455
456 kmem_cache_destroy(rds_conn_slab);
457
458 rds_info_deregister_func(RDS_INFO_CONNECTIONS, rds_conn_info);
459 rds_info_deregister_func(RDS_INFO_SEND_MESSAGES,
460 rds_conn_message_info_send);
461 rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES,
462 rds_conn_message_info_retrans);
463}
464
465/*
466 * Force a disconnect
467 */
468void rds_conn_drop(struct rds_connection *conn)
469{
470 atomic_set(&conn->c_state, RDS_CONN_ERROR);
471 queue_work(rds_wq, &conn->c_down_w);
472}
473
474/*
475 * An error occurred on the connection
476 */
477void
478__rds_conn_error(struct rds_connection *conn, const char *fmt, ...)
479{
480 va_list ap;
481
482 va_start(ap, fmt);
483 vprintk(fmt, ap);
484 va_end(ap);
485
486 rds_conn_drop(conn);
487}
diff --git a/net/rds/ib.c b/net/rds/ib.c
new file mode 100644
index 000000000000..06a7b798d9a7
--- /dev/null
+++ b/net/rds/ib.c
@@ -0,0 +1,323 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/if.h>
36#include <linux/netdevice.h>
37#include <linux/inetdevice.h>
38#include <linux/if_arp.h>
39#include <linux/delay.h>
40
41#include "rds.h"
42#include "ib.h"
43
44unsigned int fmr_pool_size = RDS_FMR_POOL_SIZE;
45unsigned int fmr_message_size = RDS_FMR_SIZE + 1; /* +1 allows for unaligned MRs */
46
47module_param(fmr_pool_size, int, 0444);
48MODULE_PARM_DESC(fmr_pool_size, " Max number of fmr per HCA");
49module_param(fmr_message_size, int, 0444);
50MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
51
52struct list_head rds_ib_devices;
53
54DEFINE_SPINLOCK(ib_nodev_conns_lock);
55LIST_HEAD(ib_nodev_conns);
56
57void rds_ib_add_one(struct ib_device *device)
58{
59 struct rds_ib_device *rds_ibdev;
60 struct ib_device_attr *dev_attr;
61
62 /* Only handle IB (no iWARP) devices */
63 if (device->node_type != RDMA_NODE_IB_CA)
64 return;
65
66 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
67 if (!dev_attr)
68 return;
69
70 if (ib_query_device(device, dev_attr)) {
71 rdsdebug("Query device failed for %s\n", device->name);
72 goto free_attr;
73 }
74
75 rds_ibdev = kmalloc(sizeof *rds_ibdev, GFP_KERNEL);
76 if (!rds_ibdev)
77 goto free_attr;
78
79 spin_lock_init(&rds_ibdev->spinlock);
80
81 rds_ibdev->max_wrs = dev_attr->max_qp_wr;
82 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
83
84 rds_ibdev->fmr_page_shift = max(9, ffs(dev_attr->page_size_cap) - 1);
85 rds_ibdev->fmr_page_size = 1 << rds_ibdev->fmr_page_shift;
86 rds_ibdev->fmr_page_mask = ~((u64) rds_ibdev->fmr_page_size - 1);
87 rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32;
88 rds_ibdev->max_fmrs = dev_attr->max_fmr ?
89 min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) :
90 fmr_pool_size;
91
92 rds_ibdev->dev = device;
93 rds_ibdev->pd = ib_alloc_pd(device);
94 if (IS_ERR(rds_ibdev->pd))
95 goto free_dev;
96
97 rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd,
98 IB_ACCESS_LOCAL_WRITE);
99 if (IS_ERR(rds_ibdev->mr))
100 goto err_pd;
101
102 rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev);
103 if (IS_ERR(rds_ibdev->mr_pool)) {
104 rds_ibdev->mr_pool = NULL;
105 goto err_mr;
106 }
107
108 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
109 INIT_LIST_HEAD(&rds_ibdev->conn_list);
110 list_add_tail(&rds_ibdev->list, &rds_ib_devices);
111
112 ib_set_client_data(device, &rds_ib_client, rds_ibdev);
113
114 goto free_attr;
115
116err_mr:
117 ib_dereg_mr(rds_ibdev->mr);
118err_pd:
119 ib_dealloc_pd(rds_ibdev->pd);
120free_dev:
121 kfree(rds_ibdev);
122free_attr:
123 kfree(dev_attr);
124}
125
126void rds_ib_remove_one(struct ib_device *device)
127{
128 struct rds_ib_device *rds_ibdev;
129 struct rds_ib_ipaddr *i_ipaddr, *i_next;
130
131 rds_ibdev = ib_get_client_data(device, &rds_ib_client);
132 if (!rds_ibdev)
133 return;
134
135 list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
136 list_del(&i_ipaddr->list);
137 kfree(i_ipaddr);
138 }
139
140 rds_ib_remove_conns(rds_ibdev);
141
142 if (rds_ibdev->mr_pool)
143 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
144
145 ib_dereg_mr(rds_ibdev->mr);
146
147 while (ib_dealloc_pd(rds_ibdev->pd)) {
148 rdsdebug("Failed to dealloc pd %p\n", rds_ibdev->pd);
149 msleep(1);
150 }
151
152 list_del(&rds_ibdev->list);
153 kfree(rds_ibdev);
154}
155
156struct ib_client rds_ib_client = {
157 .name = "rds_ib",
158 .add = rds_ib_add_one,
159 .remove = rds_ib_remove_one
160};
161
162static int rds_ib_conn_info_visitor(struct rds_connection *conn,
163 void *buffer)
164{
165 struct rds_info_rdma_connection *iinfo = buffer;
166 struct rds_ib_connection *ic;
167
168 /* We will only ever look at IB transports */
169 if (conn->c_trans != &rds_ib_transport)
170 return 0;
171
172 iinfo->src_addr = conn->c_laddr;
173 iinfo->dst_addr = conn->c_faddr;
174
175 memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
176 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
177 if (rds_conn_state(conn) == RDS_CONN_UP) {
178 struct rds_ib_device *rds_ibdev;
179 struct rdma_dev_addr *dev_addr;
180
181 ic = conn->c_transport_data;
182 dev_addr = &ic->i_cm_id->route.addr.dev_addr;
183
184 ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
185 ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
186
187 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
188 iinfo->max_send_wr = ic->i_send_ring.w_nr;
189 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
190 iinfo->max_send_sge = rds_ibdev->max_sge;
191 rds_ib_get_mr_info(rds_ibdev, iinfo);
192 }
193 return 1;
194}
195
196static void rds_ib_ic_info(struct socket *sock, unsigned int len,
197 struct rds_info_iterator *iter,
198 struct rds_info_lengths *lens)
199{
200 rds_for_each_conn_info(sock, len, iter, lens,
201 rds_ib_conn_info_visitor,
202 sizeof(struct rds_info_rdma_connection));
203}
204
205
206/*
207 * Early RDS/IB was built to only bind to an address if there is an IPoIB
208 * device with that address set.
209 *
210 * If it were me, I'd advocate for something more flexible. Sending and
211 * receiving should be device-agnostic. Transports would try and maintain
212 * connections between peers who have messages queued. Userspace would be
213 * allowed to influence which paths have priority. We could call userspace
214 * asserting this policy "routing".
215 */
216static int rds_ib_laddr_check(__be32 addr)
217{
218 int ret;
219 struct rdma_cm_id *cm_id;
220 struct sockaddr_in sin;
221
222 /* Create a CMA ID and try to bind it. This catches both
223 * IB and iWARP capable NICs.
224 */
225 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP);
226 if (!cm_id)
227 return -EADDRNOTAVAIL;
228
229 memset(&sin, 0, sizeof(sin));
230 sin.sin_family = AF_INET;
231 sin.sin_addr.s_addr = addr;
232
233 /* rdma_bind_addr will only succeed for IB & iWARP devices */
234 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
235 /* due to this, we will claim to support iWARP devices unless we
236 check node_type. */
237 if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA)
238 ret = -EADDRNOTAVAIL;
239
240 rdsdebug("addr %pI4 ret %d node type %d\n",
241 &addr, ret,
242 cm_id->device ? cm_id->device->node_type : -1);
243
244 rdma_destroy_id(cm_id);
245
246 return ret;
247}
248
249void rds_ib_exit(void)
250{
251 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
252 rds_ib_remove_nodev_conns();
253 ib_unregister_client(&rds_ib_client);
254 rds_ib_sysctl_exit();
255 rds_ib_recv_exit();
256 rds_trans_unregister(&rds_ib_transport);
257}
258
259struct rds_transport rds_ib_transport = {
260 .laddr_check = rds_ib_laddr_check,
261 .xmit_complete = rds_ib_xmit_complete,
262 .xmit = rds_ib_xmit,
263 .xmit_cong_map = NULL,
264 .xmit_rdma = rds_ib_xmit_rdma,
265 .recv = rds_ib_recv,
266 .conn_alloc = rds_ib_conn_alloc,
267 .conn_free = rds_ib_conn_free,
268 .conn_connect = rds_ib_conn_connect,
269 .conn_shutdown = rds_ib_conn_shutdown,
270 .inc_copy_to_user = rds_ib_inc_copy_to_user,
271 .inc_purge = rds_ib_inc_purge,
272 .inc_free = rds_ib_inc_free,
273 .cm_initiate_connect = rds_ib_cm_initiate_connect,
274 .cm_handle_connect = rds_ib_cm_handle_connect,
275 .cm_connect_complete = rds_ib_cm_connect_complete,
276 .stats_info_copy = rds_ib_stats_info_copy,
277 .exit = rds_ib_exit,
278 .get_mr = rds_ib_get_mr,
279 .sync_mr = rds_ib_sync_mr,
280 .free_mr = rds_ib_free_mr,
281 .flush_mrs = rds_ib_flush_mrs,
282 .t_owner = THIS_MODULE,
283 .t_name = "infiniband",
284};
285
286int __init rds_ib_init(void)
287{
288 int ret;
289
290 INIT_LIST_HEAD(&rds_ib_devices);
291
292 ret = ib_register_client(&rds_ib_client);
293 if (ret)
294 goto out;
295
296 ret = rds_ib_sysctl_init();
297 if (ret)
298 goto out_ibreg;
299
300 ret = rds_ib_recv_init();
301 if (ret)
302 goto out_sysctl;
303
304 ret = rds_trans_register(&rds_ib_transport);
305 if (ret)
306 goto out_recv;
307
308 rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
309
310 goto out;
311
312out_recv:
313 rds_ib_recv_exit();
314out_sysctl:
315 rds_ib_sysctl_exit();
316out_ibreg:
317 ib_unregister_client(&rds_ib_client);
318out:
319 return ret;
320}
321
322MODULE_LICENSE("GPL");
323
diff --git a/net/rds/ib.h b/net/rds/ib.h
new file mode 100644
index 000000000000..8be563a1363a
--- /dev/null
+++ b/net/rds/ib.h
@@ -0,0 +1,367 @@
1#ifndef _RDS_IB_H
2#define _RDS_IB_H
3
4#include <rdma/ib_verbs.h>
5#include <rdma/rdma_cm.h>
6#include "rds.h"
7#include "rdma_transport.h"
8
9#define RDS_FMR_SIZE 256
10#define RDS_FMR_POOL_SIZE 4096
11
12#define RDS_IB_MAX_SGE 8
13#define RDS_IB_RECV_SGE 2
14
15#define RDS_IB_DEFAULT_RECV_WR 1024
16#define RDS_IB_DEFAULT_SEND_WR 256
17
18#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
19
20extern struct list_head rds_ib_devices;
21
22/*
23 * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to
24 * try and minimize the amount of memory tied up both the device and
25 * socket receive queues.
26 */
27/* page offset of the final full frag that fits in the page */
28#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE)
29struct rds_page_frag {
30 struct list_head f_item;
31 struct page *f_page;
32 unsigned long f_offset;
33 dma_addr_t f_mapped;
34};
35
36struct rds_ib_incoming {
37 struct list_head ii_frags;
38 struct rds_incoming ii_inc;
39};
40
41struct rds_ib_connect_private {
42 /* Add new fields at the end, and don't permute existing fields. */
43 __be32 dp_saddr;
44 __be32 dp_daddr;
45 u8 dp_protocol_major;
46 u8 dp_protocol_minor;
47 __be16 dp_protocol_minor_mask; /* bitmask */
48 __be32 dp_reserved1;
49 __be64 dp_ack_seq;
50 __be32 dp_credit; /* non-zero enables flow ctl */
51};
52
53struct rds_ib_send_work {
54 struct rds_message *s_rm;
55 struct rds_rdma_op *s_op;
56 struct ib_send_wr s_wr;
57 struct ib_sge s_sge[RDS_IB_MAX_SGE];
58 unsigned long s_queued;
59};
60
61struct rds_ib_recv_work {
62 struct rds_ib_incoming *r_ibinc;
63 struct rds_page_frag *r_frag;
64 struct ib_recv_wr r_wr;
65 struct ib_sge r_sge[2];
66};
67
68struct rds_ib_work_ring {
69 u32 w_nr;
70 u32 w_alloc_ptr;
71 u32 w_alloc_ctr;
72 u32 w_free_ptr;
73 atomic_t w_free_ctr;
74};
75
76struct rds_ib_device;
77
78struct rds_ib_connection {
79
80 struct list_head ib_node;
81 struct rds_ib_device *rds_ibdev;
82 struct rds_connection *conn;
83
84 /* alphabet soup, IBTA style */
85 struct rdma_cm_id *i_cm_id;
86 struct ib_pd *i_pd;
87 struct ib_mr *i_mr;
88 struct ib_cq *i_send_cq;
89 struct ib_cq *i_recv_cq;
90
91 /* tx */
92 struct rds_ib_work_ring i_send_ring;
93 struct rds_message *i_rm;
94 struct rds_header *i_send_hdrs;
95 u64 i_send_hdrs_dma;
96 struct rds_ib_send_work *i_sends;
97
98 /* rx */
99 struct mutex i_recv_mutex;
100 struct rds_ib_work_ring i_recv_ring;
101 struct rds_ib_incoming *i_ibinc;
102 u32 i_recv_data_rem;
103 struct rds_header *i_recv_hdrs;
104 u64 i_recv_hdrs_dma;
105 struct rds_ib_recv_work *i_recvs;
106 struct rds_page_frag i_frag;
107 u64 i_ack_recv; /* last ACK received */
108
109 /* sending acks */
110 unsigned long i_ack_flags;
111 u64 i_ack_next; /* next ACK to send */
112 struct rds_header *i_ack;
113 struct ib_send_wr i_ack_wr;
114 struct ib_sge i_ack_sge;
115 u64 i_ack_dma;
116 unsigned long i_ack_queued;
117
118 /* Flow control related information
119 *
120 * Our algorithm uses a pair variables that we need to access
121 * atomically - one for the send credits, and one posted
122 * recv credits we need to transfer to remote.
123 * Rather than protect them using a slow spinlock, we put both into
124 * a single atomic_t and update it using cmpxchg
125 */
126 atomic_t i_credits;
127
128 /* Protocol version specific information */
129 unsigned int i_flowctl:1; /* enable/disable flow ctl */
130
131 /* Batched completions */
132 unsigned int i_unsignaled_wrs;
133 long i_unsignaled_bytes;
134};
135
136/* This assumes that atomic_t is at least 32 bits */
137#define IB_GET_SEND_CREDITS(v) ((v) & 0xffff)
138#define IB_GET_POST_CREDITS(v) ((v) >> 16)
139#define IB_SET_SEND_CREDITS(v) ((v) & 0xffff)
140#define IB_SET_POST_CREDITS(v) ((v) << 16)
141
142struct rds_ib_ipaddr {
143 struct list_head list;
144 __be32 ipaddr;
145};
146
147struct rds_ib_device {
148 struct list_head list;
149 struct list_head ipaddr_list;
150 struct list_head conn_list;
151 struct ib_device *dev;
152 struct ib_pd *pd;
153 struct ib_mr *mr;
154 struct rds_ib_mr_pool *mr_pool;
155 int fmr_page_shift;
156 int fmr_page_size;
157 u64 fmr_page_mask;
158 unsigned int fmr_max_remaps;
159 unsigned int max_fmrs;
160 int max_sge;
161 unsigned int max_wrs;
162 spinlock_t spinlock; /* protect the above */
163};
164
165/* bits for i_ack_flags */
166#define IB_ACK_IN_FLIGHT 0
167#define IB_ACK_REQUESTED 1
168
169/* Magic WR_ID for ACKs */
170#define RDS_IB_ACK_WR_ID (~(u64) 0)
171
172struct rds_ib_statistics {
173 uint64_t s_ib_connect_raced;
174 uint64_t s_ib_listen_closed_stale;
175 uint64_t s_ib_tx_cq_call;
176 uint64_t s_ib_tx_cq_event;
177 uint64_t s_ib_tx_ring_full;
178 uint64_t s_ib_tx_throttle;
179 uint64_t s_ib_tx_sg_mapping_failure;
180 uint64_t s_ib_tx_stalled;
181 uint64_t s_ib_tx_credit_updates;
182 uint64_t s_ib_rx_cq_call;
183 uint64_t s_ib_rx_cq_event;
184 uint64_t s_ib_rx_ring_empty;
185 uint64_t s_ib_rx_refill_from_cq;
186 uint64_t s_ib_rx_refill_from_thread;
187 uint64_t s_ib_rx_alloc_limit;
188 uint64_t s_ib_rx_credit_updates;
189 uint64_t s_ib_ack_sent;
190 uint64_t s_ib_ack_send_failure;
191 uint64_t s_ib_ack_send_delayed;
192 uint64_t s_ib_ack_send_piggybacked;
193 uint64_t s_ib_ack_received;
194 uint64_t s_ib_rdma_mr_alloc;
195 uint64_t s_ib_rdma_mr_free;
196 uint64_t s_ib_rdma_mr_used;
197 uint64_t s_ib_rdma_mr_pool_flush;
198 uint64_t s_ib_rdma_mr_pool_wait;
199 uint64_t s_ib_rdma_mr_pool_depleted;
200};
201
202extern struct workqueue_struct *rds_ib_wq;
203
204/*
205 * Fake ib_dma_sync_sg_for_{cpu,device} as long as ib_verbs.h
206 * doesn't define it.
207 */
208static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev,
209 struct scatterlist *sg, unsigned int sg_dma_len, int direction)
210{
211 unsigned int i;
212
213 for (i = 0; i < sg_dma_len; ++i) {
214 ib_dma_sync_single_for_cpu(dev,
215 ib_sg_dma_address(dev, &sg[i]),
216 ib_sg_dma_len(dev, &sg[i]),
217 direction);
218 }
219}
220#define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu
221
222static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
223 struct scatterlist *sg, unsigned int sg_dma_len, int direction)
224{
225 unsigned int i;
226
227 for (i = 0; i < sg_dma_len; ++i) {
228 ib_dma_sync_single_for_device(dev,
229 ib_sg_dma_address(dev, &sg[i]),
230 ib_sg_dma_len(dev, &sg[i]),
231 direction);
232 }
233}
234#define ib_dma_sync_sg_for_device rds_ib_dma_sync_sg_for_device
235
236
237/* ib.c */
238extern struct rds_transport rds_ib_transport;
239extern void rds_ib_add_one(struct ib_device *device);
240extern void rds_ib_remove_one(struct ib_device *device);
241extern struct ib_client rds_ib_client;
242
243extern unsigned int fmr_pool_size;
244extern unsigned int fmr_message_size;
245
246extern spinlock_t ib_nodev_conns_lock;
247extern struct list_head ib_nodev_conns;
248
249/* ib_cm.c */
250int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp);
251void rds_ib_conn_free(void *arg);
252int rds_ib_conn_connect(struct rds_connection *conn);
253void rds_ib_conn_shutdown(struct rds_connection *conn);
254void rds_ib_state_change(struct sock *sk);
255int __init rds_ib_listen_init(void);
256void rds_ib_listen_stop(void);
257void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
258int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
259 struct rdma_cm_event *event);
260int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id);
261void rds_ib_cm_connect_complete(struct rds_connection *conn,
262 struct rdma_cm_event *event);
263
264
265#define rds_ib_conn_error(conn, fmt...) \
266 __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt)
267
268/* ib_rdma.c */
269int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
270int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
271void rds_ib_remove_nodev_conns(void);
272void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev);
273struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
274void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
275void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
276void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
277 struct rds_sock *rs, u32 *key_ret);
278void rds_ib_sync_mr(void *trans_private, int dir);
279void rds_ib_free_mr(void *trans_private, int invalidate);
280void rds_ib_flush_mrs(void);
281
282/* ib_recv.c */
283int __init rds_ib_recv_init(void);
284void rds_ib_recv_exit(void);
285int rds_ib_recv(struct rds_connection *conn);
286int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
287 gfp_t page_gfp, int prefill);
288void rds_ib_inc_purge(struct rds_incoming *inc);
289void rds_ib_inc_free(struct rds_incoming *inc);
290int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
291 size_t size);
292void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context);
293void rds_ib_recv_init_ring(struct rds_ib_connection *ic);
294void rds_ib_recv_clear_ring(struct rds_ib_connection *ic);
295void rds_ib_recv_init_ack(struct rds_ib_connection *ic);
296void rds_ib_attempt_ack(struct rds_ib_connection *ic);
297void rds_ib_ack_send_complete(struct rds_ib_connection *ic);
298u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic);
299
300/* ib_ring.c */
301void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr);
302void rds_ib_ring_resize(struct rds_ib_work_ring *ring, u32 nr);
303u32 rds_ib_ring_alloc(struct rds_ib_work_ring *ring, u32 val, u32 *pos);
304void rds_ib_ring_free(struct rds_ib_work_ring *ring, u32 val);
305void rds_ib_ring_unalloc(struct rds_ib_work_ring *ring, u32 val);
306int rds_ib_ring_empty(struct rds_ib_work_ring *ring);
307int rds_ib_ring_low(struct rds_ib_work_ring *ring);
308u32 rds_ib_ring_oldest(struct rds_ib_work_ring *ring);
309u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
310extern wait_queue_head_t rds_ib_ring_empty_wait;
311
312/* ib_send.c */
313void rds_ib_xmit_complete(struct rds_connection *conn);
314int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
315 unsigned int hdr_off, unsigned int sg, unsigned int off);
316void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context);
317void rds_ib_send_init_ring(struct rds_ib_connection *ic);
318void rds_ib_send_clear_ring(struct rds_ib_connection *ic);
319int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op);
320void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits);
321void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted);
322int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted,
323 u32 *adv_credits, int need_posted);
324
325/* ib_stats.c */
326DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
327#define rds_ib_stats_inc(member) rds_stats_inc_which(rds_ib_stats, member)
328unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
329 unsigned int avail);
330
331/* ib_sysctl.c */
332int __init rds_ib_sysctl_init(void);
333void rds_ib_sysctl_exit(void);
334extern unsigned long rds_ib_sysctl_max_send_wr;
335extern unsigned long rds_ib_sysctl_max_recv_wr;
336extern unsigned long rds_ib_sysctl_max_unsig_wrs;
337extern unsigned long rds_ib_sysctl_max_unsig_bytes;
338extern unsigned long rds_ib_sysctl_max_recv_allocation;
339extern unsigned int rds_ib_sysctl_flow_control;
340extern ctl_table rds_ib_sysctl_table[];
341
342/*
343 * Helper functions for getting/setting the header and data SGEs in
344 * RDS packets (not RDMA)
345 */
346static inline struct ib_sge *
347rds_ib_header_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
348{
349 return &sge[0];
350}
351
352static inline struct ib_sge *
353rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
354{
355 return &sge[1];
356}
357
358static inline void rds_ib_set_64bit(u64 *ptr, u64 val)
359{
360#if BITS_PER_LONG == 64
361 *ptr = val;
362#else
363 set_64bit(ptr, val);
364#endif
365}
366
367#endif
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
new file mode 100644
index 000000000000..0532237bd128
--- /dev/null
+++ b/net/rds/ib_cm.c
@@ -0,0 +1,726 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/vmalloc.h>
36
37#include "rds.h"
38#include "ib.h"
39
40/*
41 * Set the selected protocol version
42 */
43static void rds_ib_set_protocol(struct rds_connection *conn, unsigned int version)
44{
45 conn->c_version = version;
46}
47
48/*
49 * Set up flow control
50 */
51static void rds_ib_set_flow_control(struct rds_connection *conn, u32 credits)
52{
53 struct rds_ib_connection *ic = conn->c_transport_data;
54
55 if (rds_ib_sysctl_flow_control && credits != 0) {
56 /* We're doing flow control */
57 ic->i_flowctl = 1;
58 rds_ib_send_add_credits(conn, credits);
59 } else {
60 ic->i_flowctl = 0;
61 }
62}
63
64/*
65 * Tune RNR behavior. Without flow control, we use a rather
66 * low timeout, but not the absolute minimum - this should
67 * be tunable.
68 *
69 * We already set the RNR retry count to 7 (which is the
70 * smallest infinite number :-) above.
71 * If flow control is off, we want to change this back to 0
72 * so that we learn quickly when our credit accounting is
73 * buggy.
74 *
75 * Caller passes in a qp_attr pointer - don't waste stack spacv
76 * by allocation this twice.
77 */
78static void
79rds_ib_tune_rnr(struct rds_ib_connection *ic, struct ib_qp_attr *attr)
80{
81 int ret;
82
83 attr->min_rnr_timer = IB_RNR_TIMER_000_32;
84 ret = ib_modify_qp(ic->i_cm_id->qp, attr, IB_QP_MIN_RNR_TIMER);
85 if (ret)
86 printk(KERN_NOTICE "ib_modify_qp(IB_QP_MIN_RNR_TIMER): err=%d\n", -ret);
87}
88
89/*
90 * Connection established.
91 * We get here for both outgoing and incoming connection.
92 */
93void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
94{
95 const struct rds_ib_connect_private *dp = NULL;
96 struct rds_ib_connection *ic = conn->c_transport_data;
97 struct rds_ib_device *rds_ibdev;
98 struct ib_qp_attr qp_attr;
99 int err;
100
101 if (event->param.conn.private_data_len) {
102 dp = event->param.conn.private_data;
103
104 rds_ib_set_protocol(conn,
105 RDS_PROTOCOL(dp->dp_protocol_major,
106 dp->dp_protocol_minor));
107 rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
108 }
109
110 printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n",
111 &conn->c_laddr,
112 RDS_PROTOCOL_MAJOR(conn->c_version),
113 RDS_PROTOCOL_MINOR(conn->c_version),
114 ic->i_flowctl ? ", flow control" : "");
115
116 /* Tune RNR behavior */
117 rds_ib_tune_rnr(ic, &qp_attr);
118
119 qp_attr.qp_state = IB_QPS_RTS;
120 err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE);
121 if (err)
122 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);
123
124 /* update ib_device with this local ipaddr & conn */
125 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
126 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
127 if (err)
128 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err);
129 err = rds_ib_add_conn(rds_ibdev, conn);
130 if (err)
131 printk(KERN_ERR "rds_ib_add_conn failed (%d)\n", err);
132
133 /* If the peer gave us the last packet it saw, process this as if
134 * we had received a regular ACK. */
135 if (dp && dp->dp_ack_seq)
136 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
137
138 rds_connect_complete(conn);
139}
140
141static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
142 struct rdma_conn_param *conn_param,
143 struct rds_ib_connect_private *dp,
144 u32 protocol_version)
145{
146 memset(conn_param, 0, sizeof(struct rdma_conn_param));
147 /* XXX tune these? */
148 conn_param->responder_resources = 1;
149 conn_param->initiator_depth = 1;
150 conn_param->retry_count = 7;
151 conn_param->rnr_retry_count = 7;
152
153 if (dp) {
154 struct rds_ib_connection *ic = conn->c_transport_data;
155
156 memset(dp, 0, sizeof(*dp));
157 dp->dp_saddr = conn->c_laddr;
158 dp->dp_daddr = conn->c_faddr;
159 dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
160 dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
161 dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS);
162 dp->dp_ack_seq = rds_ib_piggyb_ack(ic);
163
164 /* Advertise flow control */
165 if (ic->i_flowctl) {
166 unsigned int credits;
167
168 credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits));
169 dp->dp_credit = cpu_to_be32(credits);
170 atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits);
171 }
172
173 conn_param->private_data = dp;
174 conn_param->private_data_len = sizeof(*dp);
175 }
176}
177
178static void rds_ib_cq_event_handler(struct ib_event *event, void *data)
179{
180 rdsdebug("event %u data %p\n", event->event, data);
181}
182
183static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
184{
185 struct rds_connection *conn = data;
186 struct rds_ib_connection *ic = conn->c_transport_data;
187
188 rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event);
189
190 switch (event->event) {
191 case IB_EVENT_COMM_EST:
192 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
193 break;
194 default:
195 printk(KERN_WARNING "RDS/ib: unhandled QP event %u "
196 "on connection to %pI4\n", event->event,
197 &conn->c_faddr);
198 break;
199 }
200}
201
202/*
203 * This needs to be very careful to not leave IS_ERR pointers around for
204 * cleanup to trip over.
205 */
206static int rds_ib_setup_qp(struct rds_connection *conn)
207{
208 struct rds_ib_connection *ic = conn->c_transport_data;
209 struct ib_device *dev = ic->i_cm_id->device;
210 struct ib_qp_init_attr attr;
211 struct rds_ib_device *rds_ibdev;
212 int ret;
213
214 /* rds_ib_add_one creates a rds_ib_device object per IB device,
215 * and allocates a protection domain, memory range and FMR pool
216 * for each. If that fails for any reason, it will not register
217 * the rds_ibdev at all.
218 */
219 rds_ibdev = ib_get_client_data(dev, &rds_ib_client);
220 if (rds_ibdev == NULL) {
221 if (printk_ratelimit())
222 printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n",
223 dev->name);
224 return -EOPNOTSUPP;
225 }
226
227 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
228 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
229 if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1)
230 rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1);
231
232 /* Protection domain and memory range */
233 ic->i_pd = rds_ibdev->pd;
234 ic->i_mr = rds_ibdev->mr;
235
236 ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler,
237 rds_ib_cq_event_handler, conn,
238 ic->i_send_ring.w_nr + 1, 0);
239 if (IS_ERR(ic->i_send_cq)) {
240 ret = PTR_ERR(ic->i_send_cq);
241 ic->i_send_cq = NULL;
242 rdsdebug("ib_create_cq send failed: %d\n", ret);
243 goto out;
244 }
245
246 ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler,
247 rds_ib_cq_event_handler, conn,
248 ic->i_recv_ring.w_nr, 0);
249 if (IS_ERR(ic->i_recv_cq)) {
250 ret = PTR_ERR(ic->i_recv_cq);
251 ic->i_recv_cq = NULL;
252 rdsdebug("ib_create_cq recv failed: %d\n", ret);
253 goto out;
254 }
255
256 ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
257 if (ret) {
258 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
259 goto out;
260 }
261
262 ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
263 if (ret) {
264 rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
265 goto out;
266 }
267
268 /* XXX negotiate max send/recv with remote? */
269 memset(&attr, 0, sizeof(attr));
270 attr.event_handler = rds_ib_qp_event_handler;
271 attr.qp_context = conn;
272 /* + 1 to allow for the single ack message */
273 attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1;
274 attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1;
275 attr.cap.max_send_sge = rds_ibdev->max_sge;
276 attr.cap.max_recv_sge = RDS_IB_RECV_SGE;
277 attr.sq_sig_type = IB_SIGNAL_REQ_WR;
278 attr.qp_type = IB_QPT_RC;
279 attr.send_cq = ic->i_send_cq;
280 attr.recv_cq = ic->i_recv_cq;
281
282 /*
283 * XXX this can fail if max_*_wr is too large? Are we supposed
284 * to back off until we get a value that the hardware can support?
285 */
286 ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
287 if (ret) {
288 rdsdebug("rdma_create_qp failed: %d\n", ret);
289 goto out;
290 }
291
292 ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
293 ic->i_send_ring.w_nr *
294 sizeof(struct rds_header),
295 &ic->i_send_hdrs_dma, GFP_KERNEL);
296 if (ic->i_send_hdrs == NULL) {
297 ret = -ENOMEM;
298 rdsdebug("ib_dma_alloc_coherent send failed\n");
299 goto out;
300 }
301
302 ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
303 ic->i_recv_ring.w_nr *
304 sizeof(struct rds_header),
305 &ic->i_recv_hdrs_dma, GFP_KERNEL);
306 if (ic->i_recv_hdrs == NULL) {
307 ret = -ENOMEM;
308 rdsdebug("ib_dma_alloc_coherent recv failed\n");
309 goto out;
310 }
311
312 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
313 &ic->i_ack_dma, GFP_KERNEL);
314 if (ic->i_ack == NULL) {
315 ret = -ENOMEM;
316 rdsdebug("ib_dma_alloc_coherent ack failed\n");
317 goto out;
318 }
319
320 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work));
321 if (ic->i_sends == NULL) {
322 ret = -ENOMEM;
323 rdsdebug("send allocation failed\n");
324 goto out;
325 }
326 rds_ib_send_init_ring(ic);
327
328 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work));
329 if (ic->i_recvs == NULL) {
330 ret = -ENOMEM;
331 rdsdebug("recv allocation failed\n");
332 goto out;
333 }
334
335 rds_ib_recv_init_ring(ic);
336 rds_ib_recv_init_ack(ic);
337
338 /* Post receive buffers - as a side effect, this will update
339 * the posted credit count. */
340 rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1);
341
342 rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr,
343 ic->i_send_cq, ic->i_recv_cq);
344
345out:
346 return ret;
347}
348
349static u32 rds_ib_protocol_compatible(const struct rds_ib_connect_private *dp)
350{
351 u16 common;
352 u32 version = 0;
353
354 /* rdma_cm private data is odd - when there is any private data in the
355 * request, we will be given a pretty large buffer without telling us the
356 * original size. The only way to tell the difference is by looking at
357 * the contents, which are initialized to zero.
358 * If the protocol version fields aren't set, this is a connection attempt
359 * from an older version. This could could be 3.0 or 2.0 - we can't tell.
360 * We really should have changed this for OFED 1.3 :-( */
361 if (dp->dp_protocol_major == 0)
362 return RDS_PROTOCOL_3_0;
363
364 common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IB_SUPPORTED_PROTOCOLS;
365 if (dp->dp_protocol_major == 3 && common) {
366 version = RDS_PROTOCOL_3_0;
367 while ((common >>= 1) != 0)
368 version++;
369 } else if (printk_ratelimit()) {
370 printk(KERN_NOTICE "RDS: Connection from %pI4 using "
371 "incompatible protocol version %u.%u\n",
372 &dp->dp_saddr,
373 dp->dp_protocol_major,
374 dp->dp_protocol_minor);
375 }
376 return version;
377}
378
379int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
380 struct rdma_cm_event *event)
381{
382 __be64 lguid = cm_id->route.path_rec->sgid.global.interface_id;
383 __be64 fguid = cm_id->route.path_rec->dgid.global.interface_id;
384 const struct rds_ib_connect_private *dp = event->param.conn.private_data;
385 struct rds_ib_connect_private dp_rep;
386 struct rds_connection *conn = NULL;
387 struct rds_ib_connection *ic = NULL;
388 struct rdma_conn_param conn_param;
389 u32 version;
390 int err, destroy = 1;
391
392 /* Check whether the remote protocol version matches ours. */
393 version = rds_ib_protocol_compatible(dp);
394 if (!version)
395 goto out;
396
397 rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u lguid 0x%llx fguid "
398 "0x%llx\n", &dp->dp_saddr, &dp->dp_daddr,
399 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version),
400 (unsigned long long)be64_to_cpu(lguid),
401 (unsigned long long)be64_to_cpu(fguid));
402
403 conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport,
404 GFP_KERNEL);
405 if (IS_ERR(conn)) {
406 rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
407 conn = NULL;
408 goto out;
409 }
410
411 /*
412 * The connection request may occur while the
413 * previous connection exist, e.g. in case of failover.
414 * But as connections may be initiated simultaneously
415 * by both hosts, we have a random backoff mechanism -
416 * see the comment above rds_queue_reconnect()
417 */
418 mutex_lock(&conn->c_cm_lock);
419 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
420 if (rds_conn_state(conn) == RDS_CONN_UP) {
421 rdsdebug("incoming connect while connecting\n");
422 rds_conn_drop(conn);
423 rds_ib_stats_inc(s_ib_listen_closed_stale);
424 } else
425 if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
426 /* Wait and see - our connect may still be succeeding */
427 rds_ib_stats_inc(s_ib_connect_raced);
428 }
429 mutex_unlock(&conn->c_cm_lock);
430 goto out;
431 }
432
433 ic = conn->c_transport_data;
434
435 rds_ib_set_protocol(conn, version);
436 rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
437
438 /* If the peer gave us the last packet it saw, process this as if
439 * we had received a regular ACK. */
440 if (dp->dp_ack_seq)
441 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
442
443 BUG_ON(cm_id->context);
444 BUG_ON(ic->i_cm_id);
445
446 ic->i_cm_id = cm_id;
447 cm_id->context = conn;
448
449 /* We got halfway through setting up the ib_connection, if we
450 * fail now, we have to take the long route out of this mess. */
451 destroy = 0;
452
453 err = rds_ib_setup_qp(conn);
454 if (err) {
455 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
456 goto out;
457 }
458
459 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version);
460
461 /* rdma_accept() calls rdma_reject() internally if it fails */
462 err = rdma_accept(cm_id, &conn_param);
463 mutex_unlock(&conn->c_cm_lock);
464 if (err) {
465 rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err);
466 goto out;
467 }
468
469 return 0;
470
471out:
472 rdma_reject(cm_id, NULL, 0);
473 return destroy;
474}
475
476
477int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
478{
479 struct rds_connection *conn = cm_id->context;
480 struct rds_ib_connection *ic = conn->c_transport_data;
481 struct rdma_conn_param conn_param;
482 struct rds_ib_connect_private dp;
483 int ret;
484
485 /* If the peer doesn't do protocol negotiation, we must
486 * default to RDSv3.0 */
487 rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0);
488 ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */
489
490 ret = rds_ib_setup_qp(conn);
491 if (ret) {
492 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", ret);
493 goto out;
494 }
495
496 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION);
497
498 ret = rdma_connect(cm_id, &conn_param);
499 if (ret)
500 rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
501
502out:
503 /* Beware - returning non-zero tells the rdma_cm to destroy
504 * the cm_id. We should certainly not do it as long as we still
505 * "own" the cm_id. */
506 if (ret) {
507 if (ic->i_cm_id == cm_id)
508 ret = 0;
509 }
510 return ret;
511}
512
513int rds_ib_conn_connect(struct rds_connection *conn)
514{
515 struct rds_ib_connection *ic = conn->c_transport_data;
516 struct sockaddr_in src, dest;
517 int ret;
518
519 /* XXX I wonder what affect the port space has */
520 /* delegate cm event handler to rdma_transport */
521 ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
522 RDMA_PS_TCP);
523 if (IS_ERR(ic->i_cm_id)) {
524 ret = PTR_ERR(ic->i_cm_id);
525 ic->i_cm_id = NULL;
526 rdsdebug("rdma_create_id() failed: %d\n", ret);
527 goto out;
528 }
529
530 rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn);
531
532 src.sin_family = AF_INET;
533 src.sin_addr.s_addr = (__force u32)conn->c_laddr;
534 src.sin_port = (__force u16)htons(0);
535
536 dest.sin_family = AF_INET;
537 dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
538 dest.sin_port = (__force u16)htons(RDS_PORT);
539
540 ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
541 (struct sockaddr *)&dest,
542 RDS_RDMA_RESOLVE_TIMEOUT_MS);
543 if (ret) {
544 rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id,
545 ret);
546 rdma_destroy_id(ic->i_cm_id);
547 ic->i_cm_id = NULL;
548 }
549
550out:
551 return ret;
552}
553
554/*
555 * This is so careful about only cleaning up resources that were built up
556 * so that it can be called at any point during startup. In fact it
557 * can be called multiple times for a given connection.
558 */
559void rds_ib_conn_shutdown(struct rds_connection *conn)
560{
561 struct rds_ib_connection *ic = conn->c_transport_data;
562 int err = 0;
563
564 rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id,
565 ic->i_pd, ic->i_send_cq, ic->i_recv_cq,
566 ic->i_cm_id ? ic->i_cm_id->qp : NULL);
567
568 if (ic->i_cm_id) {
569 struct ib_device *dev = ic->i_cm_id->device;
570
571 rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
572 err = rdma_disconnect(ic->i_cm_id);
573 if (err) {
574 /* Actually this may happen quite frequently, when
575 * an outgoing connect raced with an incoming connect.
576 */
577 rdsdebug("failed to disconnect, cm: %p err %d\n",
578 ic->i_cm_id, err);
579 }
580
581 wait_event(rds_ib_ring_empty_wait,
582 rds_ib_ring_empty(&ic->i_send_ring) &&
583 rds_ib_ring_empty(&ic->i_recv_ring));
584
585 if (ic->i_send_hdrs)
586 ib_dma_free_coherent(dev,
587 ic->i_send_ring.w_nr *
588 sizeof(struct rds_header),
589 ic->i_send_hdrs,
590 ic->i_send_hdrs_dma);
591
592 if (ic->i_recv_hdrs)
593 ib_dma_free_coherent(dev,
594 ic->i_recv_ring.w_nr *
595 sizeof(struct rds_header),
596 ic->i_recv_hdrs,
597 ic->i_recv_hdrs_dma);
598
599 if (ic->i_ack)
600 ib_dma_free_coherent(dev, sizeof(struct rds_header),
601 ic->i_ack, ic->i_ack_dma);
602
603 if (ic->i_sends)
604 rds_ib_send_clear_ring(ic);
605 if (ic->i_recvs)
606 rds_ib_recv_clear_ring(ic);
607
608 if (ic->i_cm_id->qp)
609 rdma_destroy_qp(ic->i_cm_id);
610 if (ic->i_send_cq)
611 ib_destroy_cq(ic->i_send_cq);
612 if (ic->i_recv_cq)
613 ib_destroy_cq(ic->i_recv_cq);
614 rdma_destroy_id(ic->i_cm_id);
615
616 /*
617 * Move connection back to the nodev list.
618 */
619 if (ic->rds_ibdev) {
620
621 spin_lock_irq(&ic->rds_ibdev->spinlock);
622 BUG_ON(list_empty(&ic->ib_node));
623 list_del(&ic->ib_node);
624 spin_unlock_irq(&ic->rds_ibdev->spinlock);
625
626 spin_lock_irq(&ib_nodev_conns_lock);
627 list_add_tail(&ic->ib_node, &ib_nodev_conns);
628 spin_unlock_irq(&ib_nodev_conns_lock);
629 ic->rds_ibdev = NULL;
630 }
631
632 ic->i_cm_id = NULL;
633 ic->i_pd = NULL;
634 ic->i_mr = NULL;
635 ic->i_send_cq = NULL;
636 ic->i_recv_cq = NULL;
637 ic->i_send_hdrs = NULL;
638 ic->i_recv_hdrs = NULL;
639 ic->i_ack = NULL;
640 }
641 BUG_ON(ic->rds_ibdev);
642
643 /* Clear pending transmit */
644 if (ic->i_rm) {
645 rds_message_put(ic->i_rm);
646 ic->i_rm = NULL;
647 }
648
649 /* Clear the ACK state */
650 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
651 rds_ib_set_64bit(&ic->i_ack_next, 0);
652 ic->i_ack_recv = 0;
653
654 /* Clear flow control state */
655 ic->i_flowctl = 0;
656 atomic_set(&ic->i_credits, 0);
657
658 rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr);
659 rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr);
660
661 if (ic->i_ibinc) {
662 rds_inc_put(&ic->i_ibinc->ii_inc);
663 ic->i_ibinc = NULL;
664 }
665
666 vfree(ic->i_sends);
667 ic->i_sends = NULL;
668 vfree(ic->i_recvs);
669 ic->i_recvs = NULL;
670}
671
672int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
673{
674 struct rds_ib_connection *ic;
675 unsigned long flags;
676
677 /* XXX too lazy? */
678 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL);
679 if (ic == NULL)
680 return -ENOMEM;
681
682 INIT_LIST_HEAD(&ic->ib_node);
683 mutex_init(&ic->i_recv_mutex);
684
685 /*
686 * rds_ib_conn_shutdown() waits for these to be emptied so they
687 * must be initialized before it can be called.
688 */
689 rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr);
690 rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr);
691
692 ic->conn = conn;
693 conn->c_transport_data = ic;
694
695 spin_lock_irqsave(&ib_nodev_conns_lock, flags);
696 list_add_tail(&ic->ib_node, &ib_nodev_conns);
697 spin_unlock_irqrestore(&ib_nodev_conns_lock, flags);
698
699
700 rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data);
701 return 0;
702}
703
704void rds_ib_conn_free(void *arg)
705{
706 struct rds_ib_connection *ic = arg;
707 rdsdebug("ic %p\n", ic);
708 list_del(&ic->ib_node);
709 kfree(ic);
710}
711
712
713/*
714 * An error occurred on the connection
715 */
716void
717__rds_ib_conn_error(struct rds_connection *conn, const char *fmt, ...)
718{
719 va_list ap;
720
721 rds_conn_drop(conn);
722
723 va_start(ap, fmt);
724 vprintk(fmt, ap);
725 va_end(ap);
726}
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
new file mode 100644
index 000000000000..69a6289ed672
--- /dev/null
+++ b/net/rds/ib_rdma.c
@@ -0,0 +1,641 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34
35#include "rds.h"
36#include "rdma.h"
37#include "ib.h"
38
39
40/*
41 * This is stored as mr->r_trans_private.
42 */
43struct rds_ib_mr {
44 struct rds_ib_device *device;
45 struct rds_ib_mr_pool *pool;
46 struct ib_fmr *fmr;
47 struct list_head list;
48 unsigned int remap_count;
49
50 struct scatterlist *sg;
51 unsigned int sg_len;
52 u64 *dma;
53 int sg_dma_len;
54};
55
56/*
57 * Our own little FMR pool
58 */
59struct rds_ib_mr_pool {
60 struct mutex flush_lock; /* serialize fmr invalidate */
61 struct work_struct flush_worker; /* flush worker */
62
63 spinlock_t list_lock; /* protect variables below */
64 atomic_t item_count; /* total # of MRs */
65 atomic_t dirty_count; /* # dirty of MRs */
66 struct list_head drop_list; /* MRs that have reached their max_maps limit */
67 struct list_head free_list; /* unused MRs */
68 struct list_head clean_list; /* unused & unamapped MRs */
69 atomic_t free_pinned; /* memory pinned by free MRs */
70 unsigned long max_items;
71 unsigned long max_items_soft;
72 unsigned long max_free_pinned;
73 struct ib_fmr_attr fmr_attr;
74};
75
76static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all);
77static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr);
78static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
79
80static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
81{
82 struct rds_ib_device *rds_ibdev;
83 struct rds_ib_ipaddr *i_ipaddr;
84
85 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
86 spin_lock_irq(&rds_ibdev->spinlock);
87 list_for_each_entry(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
88 if (i_ipaddr->ipaddr == ipaddr) {
89 spin_unlock_irq(&rds_ibdev->spinlock);
90 return rds_ibdev;
91 }
92 }
93 spin_unlock_irq(&rds_ibdev->spinlock);
94 }
95
96 return NULL;
97}
98
99static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
100{
101 struct rds_ib_ipaddr *i_ipaddr;
102
103 i_ipaddr = kmalloc(sizeof *i_ipaddr, GFP_KERNEL);
104 if (!i_ipaddr)
105 return -ENOMEM;
106
107 i_ipaddr->ipaddr = ipaddr;
108
109 spin_lock_irq(&rds_ibdev->spinlock);
110 list_add_tail(&i_ipaddr->list, &rds_ibdev->ipaddr_list);
111 spin_unlock_irq(&rds_ibdev->spinlock);
112
113 return 0;
114}
115
116static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
117{
118 struct rds_ib_ipaddr *i_ipaddr, *next;
119
120 spin_lock_irq(&rds_ibdev->spinlock);
121 list_for_each_entry_safe(i_ipaddr, next, &rds_ibdev->ipaddr_list, list) {
122 if (i_ipaddr->ipaddr == ipaddr) {
123 list_del(&i_ipaddr->list);
124 kfree(i_ipaddr);
125 break;
126 }
127 }
128 spin_unlock_irq(&rds_ibdev->spinlock);
129}
130
131int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
132{
133 struct rds_ib_device *rds_ibdev_old;
134
135 rds_ibdev_old = rds_ib_get_device(ipaddr);
136 if (rds_ibdev_old)
137 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
138
139 return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
140}
141
142int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
143{
144 struct rds_ib_connection *ic = conn->c_transport_data;
145
146 /* conn was previously on the nodev_conns_list */
147 spin_lock_irq(&ib_nodev_conns_lock);
148 BUG_ON(list_empty(&ib_nodev_conns));
149 BUG_ON(list_empty(&ic->ib_node));
150 list_del(&ic->ib_node);
151 spin_unlock_irq(&ib_nodev_conns_lock);
152
153 spin_lock_irq(&rds_ibdev->spinlock);
154 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
155 spin_unlock_irq(&rds_ibdev->spinlock);
156
157 ic->rds_ibdev = rds_ibdev;
158
159 return 0;
160}
161
162void rds_ib_remove_nodev_conns(void)
163{
164 struct rds_ib_connection *ic, *_ic;
165 LIST_HEAD(tmp_list);
166
167 /* avoid calling conn_destroy with irqs off */
168 spin_lock_irq(&ib_nodev_conns_lock);
169 list_splice(&ib_nodev_conns, &tmp_list);
170 INIT_LIST_HEAD(&ib_nodev_conns);
171 spin_unlock_irq(&ib_nodev_conns_lock);
172
173 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) {
174 if (ic->conn->c_passive)
175 rds_conn_destroy(ic->conn->c_passive);
176 rds_conn_destroy(ic->conn);
177 }
178}
179
180void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev)
181{
182 struct rds_ib_connection *ic, *_ic;
183 LIST_HEAD(tmp_list);
184
185 /* avoid calling conn_destroy with irqs off */
186 spin_lock_irq(&rds_ibdev->spinlock);
187 list_splice(&rds_ibdev->conn_list, &tmp_list);
188 INIT_LIST_HEAD(&rds_ibdev->conn_list);
189 spin_unlock_irq(&rds_ibdev->spinlock);
190
191 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) {
192 if (ic->conn->c_passive)
193 rds_conn_destroy(ic->conn->c_passive);
194 rds_conn_destroy(ic->conn);
195 }
196}
197
198struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
199{
200 struct rds_ib_mr_pool *pool;
201
202 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
203 if (!pool)
204 return ERR_PTR(-ENOMEM);
205
206 INIT_LIST_HEAD(&pool->free_list);
207 INIT_LIST_HEAD(&pool->drop_list);
208 INIT_LIST_HEAD(&pool->clean_list);
209 mutex_init(&pool->flush_lock);
210 spin_lock_init(&pool->list_lock);
211 INIT_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
212
213 pool->fmr_attr.max_pages = fmr_message_size;
214 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
215 pool->fmr_attr.page_shift = rds_ibdev->fmr_page_shift;
216 pool->max_free_pinned = rds_ibdev->max_fmrs * fmr_message_size / 4;
217
218 /* We never allow more than max_items MRs to be allocated.
219 * When we exceed more than max_items_soft, we start freeing
220 * items more aggressively.
221 * Make sure that max_items > max_items_soft > max_items / 2
222 */
223 pool->max_items_soft = rds_ibdev->max_fmrs * 3 / 4;
224 pool->max_items = rds_ibdev->max_fmrs;
225
226 return pool;
227}
228
229void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo)
230{
231 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
232
233 iinfo->rdma_mr_max = pool->max_items;
234 iinfo->rdma_mr_size = pool->fmr_attr.max_pages;
235}
236
237void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
238{
239 flush_workqueue(rds_wq);
240 rds_ib_flush_mr_pool(pool, 1);
241 BUG_ON(atomic_read(&pool->item_count));
242 BUG_ON(atomic_read(&pool->free_pinned));
243 kfree(pool);
244}
245
246static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
247{
248 struct rds_ib_mr *ibmr = NULL;
249 unsigned long flags;
250
251 spin_lock_irqsave(&pool->list_lock, flags);
252 if (!list_empty(&pool->clean_list)) {
253 ibmr = list_entry(pool->clean_list.next, struct rds_ib_mr, list);
254 list_del_init(&ibmr->list);
255 }
256 spin_unlock_irqrestore(&pool->list_lock, flags);
257
258 return ibmr;
259}
260
261static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
262{
263 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
264 struct rds_ib_mr *ibmr = NULL;
265 int err = 0, iter = 0;
266
267 while (1) {
268 ibmr = rds_ib_reuse_fmr(pool);
269 if (ibmr)
270 return ibmr;
271
272 /* No clean MRs - now we have the choice of either
273 * allocating a fresh MR up to the limit imposed by the
274 * driver, or flush any dirty unused MRs.
275 * We try to avoid stalling in the send path if possible,
276 * so we allocate as long as we're allowed to.
277 *
278 * We're fussy with enforcing the FMR limit, though. If the driver
279 * tells us we can't use more than N fmrs, we shouldn't start
280 * arguing with it */
281 if (atomic_inc_return(&pool->item_count) <= pool->max_items)
282 break;
283
284 atomic_dec(&pool->item_count);
285
286 if (++iter > 2) {
287 rds_ib_stats_inc(s_ib_rdma_mr_pool_depleted);
288 return ERR_PTR(-EAGAIN);
289 }
290
291 /* We do have some empty MRs. Flush them out. */
292 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait);
293 rds_ib_flush_mr_pool(pool, 0);
294 }
295
296 ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL);
297 if (!ibmr) {
298 err = -ENOMEM;
299 goto out_no_cigar;
300 }
301
302 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
303 (IB_ACCESS_LOCAL_WRITE |
304 IB_ACCESS_REMOTE_READ |
305 IB_ACCESS_REMOTE_WRITE),
306 &pool->fmr_attr);
307 if (IS_ERR(ibmr->fmr)) {
308 err = PTR_ERR(ibmr->fmr);
309 ibmr->fmr = NULL;
310 printk(KERN_WARNING "RDS/IB: ib_alloc_fmr failed (err=%d)\n", err);
311 goto out_no_cigar;
312 }
313
314 rds_ib_stats_inc(s_ib_rdma_mr_alloc);
315 return ibmr;
316
317out_no_cigar:
318 if (ibmr) {
319 if (ibmr->fmr)
320 ib_dealloc_fmr(ibmr->fmr);
321 kfree(ibmr);
322 }
323 atomic_dec(&pool->item_count);
324 return ERR_PTR(err);
325}
326
327static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr,
328 struct scatterlist *sg, unsigned int nents)
329{
330 struct ib_device *dev = rds_ibdev->dev;
331 struct scatterlist *scat = sg;
332 u64 io_addr = 0;
333 u64 *dma_pages;
334 u32 len;
335 int page_cnt, sg_dma_len;
336 int i, j;
337 int ret;
338
339 sg_dma_len = ib_dma_map_sg(dev, sg, nents,
340 DMA_BIDIRECTIONAL);
341 if (unlikely(!sg_dma_len)) {
342 printk(KERN_WARNING "RDS/IB: dma_map_sg failed!\n");
343 return -EBUSY;
344 }
345
346 len = 0;
347 page_cnt = 0;
348
349 for (i = 0; i < sg_dma_len; ++i) {
350 unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
351 u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
352
353 if (dma_addr & ~rds_ibdev->fmr_page_mask) {
354 if (i > 0)
355 return -EINVAL;
356 else
357 ++page_cnt;
358 }
359 if ((dma_addr + dma_len) & ~rds_ibdev->fmr_page_mask) {
360 if (i < sg_dma_len - 1)
361 return -EINVAL;
362 else
363 ++page_cnt;
364 }
365
366 len += dma_len;
367 }
368
369 page_cnt += len >> rds_ibdev->fmr_page_shift;
370 if (page_cnt > fmr_message_size)
371 return -EINVAL;
372
373 dma_pages = kmalloc(sizeof(u64) * page_cnt, GFP_ATOMIC);
374 if (!dma_pages)
375 return -ENOMEM;
376
377 page_cnt = 0;
378 for (i = 0; i < sg_dma_len; ++i) {
379 unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]);
380 u64 dma_addr = ib_sg_dma_address(dev, &scat[i]);
381
382 for (j = 0; j < dma_len; j += rds_ibdev->fmr_page_size)
383 dma_pages[page_cnt++] =
384 (dma_addr & rds_ibdev->fmr_page_mask) + j;
385 }
386
387 ret = ib_map_phys_fmr(ibmr->fmr,
388 dma_pages, page_cnt, io_addr);
389 if (ret)
390 goto out;
391
392 /* Success - we successfully remapped the MR, so we can
393 * safely tear down the old mapping. */
394 rds_ib_teardown_mr(ibmr);
395
396 ibmr->sg = scat;
397 ibmr->sg_len = nents;
398 ibmr->sg_dma_len = sg_dma_len;
399 ibmr->remap_count++;
400
401 rds_ib_stats_inc(s_ib_rdma_mr_used);
402 ret = 0;
403
404out:
405 kfree(dma_pages);
406
407 return ret;
408}
409
410void rds_ib_sync_mr(void *trans_private, int direction)
411{
412 struct rds_ib_mr *ibmr = trans_private;
413 struct rds_ib_device *rds_ibdev = ibmr->device;
414
415 switch (direction) {
416 case DMA_FROM_DEVICE:
417 ib_dma_sync_sg_for_cpu(rds_ibdev->dev, ibmr->sg,
418 ibmr->sg_dma_len, DMA_BIDIRECTIONAL);
419 break;
420 case DMA_TO_DEVICE:
421 ib_dma_sync_sg_for_device(rds_ibdev->dev, ibmr->sg,
422 ibmr->sg_dma_len, DMA_BIDIRECTIONAL);
423 break;
424 }
425}
426
427static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
428{
429 struct rds_ib_device *rds_ibdev = ibmr->device;
430
431 if (ibmr->sg_dma_len) {
432 ib_dma_unmap_sg(rds_ibdev->dev,
433 ibmr->sg, ibmr->sg_len,
434 DMA_BIDIRECTIONAL);
435 ibmr->sg_dma_len = 0;
436 }
437
438 /* Release the s/g list */
439 if (ibmr->sg_len) {
440 unsigned int i;
441
442 for (i = 0; i < ibmr->sg_len; ++i) {
443 struct page *page = sg_page(&ibmr->sg[i]);
444
445 /* FIXME we need a way to tell a r/w MR
446 * from a r/o MR */
447 set_page_dirty(page);
448 put_page(page);
449 }
450 kfree(ibmr->sg);
451
452 ibmr->sg = NULL;
453 ibmr->sg_len = 0;
454 }
455}
456
457static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
458{
459 unsigned int pinned = ibmr->sg_len;
460
461 __rds_ib_teardown_mr(ibmr);
462 if (pinned) {
463 struct rds_ib_device *rds_ibdev = ibmr->device;
464 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
465
466 atomic_sub(pinned, &pool->free_pinned);
467 }
468}
469
470static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int free_all)
471{
472 unsigned int item_count;
473
474 item_count = atomic_read(&pool->item_count);
475 if (free_all)
476 return item_count;
477
478 return 0;
479}
480
481/*
482 * Flush our pool of MRs.
483 * At a minimum, all currently unused MRs are unmapped.
484 * If the number of MRs allocated exceeds the limit, we also try
485 * to free as many MRs as needed to get back to this limit.
486 */
487static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
488{
489 struct rds_ib_mr *ibmr, *next;
490 LIST_HEAD(unmap_list);
491 LIST_HEAD(fmr_list);
492 unsigned long unpinned = 0;
493 unsigned long flags;
494 unsigned int nfreed = 0, ncleaned = 0, free_goal;
495 int ret = 0;
496
497 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush);
498
499 mutex_lock(&pool->flush_lock);
500
501 spin_lock_irqsave(&pool->list_lock, flags);
502 /* Get the list of all MRs to be dropped. Ordering matters -
503 * we want to put drop_list ahead of free_list. */
504 list_splice_init(&pool->free_list, &unmap_list);
505 list_splice_init(&pool->drop_list, &unmap_list);
506 if (free_all)
507 list_splice_init(&pool->clean_list, &unmap_list);
508 spin_unlock_irqrestore(&pool->list_lock, flags);
509
510 free_goal = rds_ib_flush_goal(pool, free_all);
511
512 if (list_empty(&unmap_list))
513 goto out;
514
515 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
516 list_for_each_entry(ibmr, &unmap_list, list)
517 list_add(&ibmr->fmr->list, &fmr_list);
518 ret = ib_unmap_fmr(&fmr_list);
519 if (ret)
520 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret);
521
522 /* Now we can destroy the DMA mapping and unpin any pages */
523 list_for_each_entry_safe(ibmr, next, &unmap_list, list) {
524 unpinned += ibmr->sg_len;
525 __rds_ib_teardown_mr(ibmr);
526 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) {
527 rds_ib_stats_inc(s_ib_rdma_mr_free);
528 list_del(&ibmr->list);
529 ib_dealloc_fmr(ibmr->fmr);
530 kfree(ibmr);
531 nfreed++;
532 }
533 ncleaned++;
534 }
535
536 spin_lock_irqsave(&pool->list_lock, flags);
537 list_splice(&unmap_list, &pool->clean_list);
538 spin_unlock_irqrestore(&pool->list_lock, flags);
539
540 atomic_sub(unpinned, &pool->free_pinned);
541 atomic_sub(ncleaned, &pool->dirty_count);
542 atomic_sub(nfreed, &pool->item_count);
543
544out:
545 mutex_unlock(&pool->flush_lock);
546 return ret;
547}
548
549static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
550{
551 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker);
552
553 rds_ib_flush_mr_pool(pool, 0);
554}
555
556void rds_ib_free_mr(void *trans_private, int invalidate)
557{
558 struct rds_ib_mr *ibmr = trans_private;
559 struct rds_ib_device *rds_ibdev = ibmr->device;
560 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
561 unsigned long flags;
562
563 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
564
565 /* Return it to the pool's free list */
566 spin_lock_irqsave(&pool->list_lock, flags);
567 if (ibmr->remap_count >= pool->fmr_attr.max_maps)
568 list_add(&ibmr->list, &pool->drop_list);
569 else
570 list_add(&ibmr->list, &pool->free_list);
571
572 atomic_add(ibmr->sg_len, &pool->free_pinned);
573 atomic_inc(&pool->dirty_count);
574 spin_unlock_irqrestore(&pool->list_lock, flags);
575
576 /* If we've pinned too many pages, request a flush */
577 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned
578 || atomic_read(&pool->dirty_count) >= pool->max_items / 10)
579 queue_work(rds_wq, &pool->flush_worker);
580
581 if (invalidate) {
582 if (likely(!in_interrupt())) {
583 rds_ib_flush_mr_pool(pool, 0);
584 } else {
585 /* We get here if the user created a MR marked
586 * as use_once and invalidate at the same time. */
587 queue_work(rds_wq, &pool->flush_worker);
588 }
589 }
590}
591
592void rds_ib_flush_mrs(void)
593{
594 struct rds_ib_device *rds_ibdev;
595
596 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
597 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
598
599 if (pool)
600 rds_ib_flush_mr_pool(pool, 0);
601 }
602}
603
604void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
605 struct rds_sock *rs, u32 *key_ret)
606{
607 struct rds_ib_device *rds_ibdev;
608 struct rds_ib_mr *ibmr = NULL;
609 int ret;
610
611 rds_ibdev = rds_ib_get_device(rs->rs_bound_addr);
612 if (!rds_ibdev) {
613 ret = -ENODEV;
614 goto out;
615 }
616
617 if (!rds_ibdev->mr_pool) {
618 ret = -ENODEV;
619 goto out;
620 }
621
622 ibmr = rds_ib_alloc_fmr(rds_ibdev);
623 if (IS_ERR(ibmr))
624 return ibmr;
625
626 ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
627 if (ret == 0)
628 *key_ret = ibmr->fmr->rkey;
629 else
630 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret);
631
632 ibmr->device = rds_ibdev;
633
634 out:
635 if (ret) {
636 if (ibmr)
637 rds_ib_free_mr(ibmr, 0);
638 ibmr = ERR_PTR(ret);
639 }
640 return ibmr;
641}
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
new file mode 100644
index 000000000000..5061b5502162
--- /dev/null
+++ b/net/rds/ib_recv.c
@@ -0,0 +1,869 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/pci.h>
35#include <linux/dma-mapping.h>
36#include <rdma/rdma_cm.h>
37
38#include "rds.h"
39#include "ib.h"
40
41static struct kmem_cache *rds_ib_incoming_slab;
42static struct kmem_cache *rds_ib_frag_slab;
43static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
44
45static void rds_ib_frag_drop_page(struct rds_page_frag *frag)
46{
47 rdsdebug("frag %p page %p\n", frag, frag->f_page);
48 __free_page(frag->f_page);
49 frag->f_page = NULL;
50}
51
52static void rds_ib_frag_free(struct rds_page_frag *frag)
53{
54 rdsdebug("frag %p page %p\n", frag, frag->f_page);
55 BUG_ON(frag->f_page != NULL);
56 kmem_cache_free(rds_ib_frag_slab, frag);
57}
58
59/*
60 * We map a page at a time. Its fragments are posted in order. This
61 * is called in fragment order as the fragments get send completion events.
62 * Only the last frag in the page performs the unmapping.
63 *
64 * It's OK for ring cleanup to call this in whatever order it likes because
65 * DMA is not in flight and so we can unmap while other ring entries still
66 * hold page references in their frags.
67 */
68static void rds_ib_recv_unmap_page(struct rds_ib_connection *ic,
69 struct rds_ib_recv_work *recv)
70{
71 struct rds_page_frag *frag = recv->r_frag;
72
73 rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page);
74 if (frag->f_mapped)
75 ib_dma_unmap_page(ic->i_cm_id->device,
76 frag->f_mapped,
77 RDS_FRAG_SIZE, DMA_FROM_DEVICE);
78 frag->f_mapped = 0;
79}
80
81void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
82{
83 struct rds_ib_recv_work *recv;
84 u32 i;
85
86 for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
87 struct ib_sge *sge;
88
89 recv->r_ibinc = NULL;
90 recv->r_frag = NULL;
91
92 recv->r_wr.next = NULL;
93 recv->r_wr.wr_id = i;
94 recv->r_wr.sg_list = recv->r_sge;
95 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
96
97 sge = rds_ib_data_sge(ic, recv->r_sge);
98 sge->addr = 0;
99 sge->length = RDS_FRAG_SIZE;
100 sge->lkey = ic->i_mr->lkey;
101
102 sge = rds_ib_header_sge(ic, recv->r_sge);
103 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
104 sge->length = sizeof(struct rds_header);
105 sge->lkey = ic->i_mr->lkey;
106 }
107}
108
109static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
110 struct rds_ib_recv_work *recv)
111{
112 if (recv->r_ibinc) {
113 rds_inc_put(&recv->r_ibinc->ii_inc);
114 recv->r_ibinc = NULL;
115 }
116 if (recv->r_frag) {
117 rds_ib_recv_unmap_page(ic, recv);
118 if (recv->r_frag->f_page)
119 rds_ib_frag_drop_page(recv->r_frag);
120 rds_ib_frag_free(recv->r_frag);
121 recv->r_frag = NULL;
122 }
123}
124
125void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
126{
127 u32 i;
128
129 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
130 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
131
132 if (ic->i_frag.f_page)
133 rds_ib_frag_drop_page(&ic->i_frag);
134}
135
136static int rds_ib_recv_refill_one(struct rds_connection *conn,
137 struct rds_ib_recv_work *recv,
138 gfp_t kptr_gfp, gfp_t page_gfp)
139{
140 struct rds_ib_connection *ic = conn->c_transport_data;
141 dma_addr_t dma_addr;
142 struct ib_sge *sge;
143 int ret = -ENOMEM;
144
145 if (recv->r_ibinc == NULL) {
146 if (atomic_read(&rds_ib_allocation) >= rds_ib_sysctl_max_recv_allocation) {
147 rds_ib_stats_inc(s_ib_rx_alloc_limit);
148 goto out;
149 }
150 recv->r_ibinc = kmem_cache_alloc(rds_ib_incoming_slab,
151 kptr_gfp);
152 if (recv->r_ibinc == NULL)
153 goto out;
154 atomic_inc(&rds_ib_allocation);
155 INIT_LIST_HEAD(&recv->r_ibinc->ii_frags);
156 rds_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr);
157 }
158
159 if (recv->r_frag == NULL) {
160 recv->r_frag = kmem_cache_alloc(rds_ib_frag_slab, kptr_gfp);
161 if (recv->r_frag == NULL)
162 goto out;
163 INIT_LIST_HEAD(&recv->r_frag->f_item);
164 recv->r_frag->f_page = NULL;
165 }
166
167 if (ic->i_frag.f_page == NULL) {
168 ic->i_frag.f_page = alloc_page(page_gfp);
169 if (ic->i_frag.f_page == NULL)
170 goto out;
171 ic->i_frag.f_offset = 0;
172 }
173
174 dma_addr = ib_dma_map_page(ic->i_cm_id->device,
175 ic->i_frag.f_page,
176 ic->i_frag.f_offset,
177 RDS_FRAG_SIZE,
178 DMA_FROM_DEVICE);
179 if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr))
180 goto out;
181
182 /*
183 * Once we get the RDS_PAGE_LAST_OFF frag then rds_ib_frag_unmap()
184 * must be called on this recv. This happens as completions hit
185 * in order or on connection shutdown.
186 */
187 recv->r_frag->f_page = ic->i_frag.f_page;
188 recv->r_frag->f_offset = ic->i_frag.f_offset;
189 recv->r_frag->f_mapped = dma_addr;
190
191 sge = rds_ib_data_sge(ic, recv->r_sge);
192 sge->addr = dma_addr;
193 sge->length = RDS_FRAG_SIZE;
194
195 sge = rds_ib_header_sge(ic, recv->r_sge);
196 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
197 sge->length = sizeof(struct rds_header);
198
199 get_page(recv->r_frag->f_page);
200
201 if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) {
202 ic->i_frag.f_offset += RDS_FRAG_SIZE;
203 } else {
204 put_page(ic->i_frag.f_page);
205 ic->i_frag.f_page = NULL;
206 ic->i_frag.f_offset = 0;
207 }
208
209 ret = 0;
210out:
211 return ret;
212}
213
214/*
215 * This tries to allocate and post unused work requests after making sure that
216 * they have all the allocations they need to queue received fragments into
217 * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc
218 * pairs don't go unmatched.
219 *
220 * -1 is returned if posting fails due to temporary resource exhaustion.
221 */
222int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
223 gfp_t page_gfp, int prefill)
224{
225 struct rds_ib_connection *ic = conn->c_transport_data;
226 struct rds_ib_recv_work *recv;
227 struct ib_recv_wr *failed_wr;
228 unsigned int posted = 0;
229 int ret = 0;
230 u32 pos;
231
232 while ((prefill || rds_conn_up(conn))
233 && rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
234 if (pos >= ic->i_recv_ring.w_nr) {
235 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
236 pos);
237 ret = -EINVAL;
238 break;
239 }
240
241 recv = &ic->i_recvs[pos];
242 ret = rds_ib_recv_refill_one(conn, recv, kptr_gfp, page_gfp);
243 if (ret) {
244 ret = -1;
245 break;
246 }
247
248 /* XXX when can this fail? */
249 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
250 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
251 recv->r_ibinc, recv->r_frag->f_page,
252 (long) recv->r_frag->f_mapped, ret);
253 if (ret) {
254 rds_ib_conn_error(conn, "recv post on "
255 "%pI4 returned %d, disconnecting and "
256 "reconnecting\n", &conn->c_faddr,
257 ret);
258 ret = -1;
259 break;
260 }
261
262 posted++;
263 }
264
265 /* We're doing flow control - update the window. */
266 if (ic->i_flowctl && posted)
267 rds_ib_advertise_credits(conn, posted);
268
269 if (ret)
270 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
271 return ret;
272}
273
274void rds_ib_inc_purge(struct rds_incoming *inc)
275{
276 struct rds_ib_incoming *ibinc;
277 struct rds_page_frag *frag;
278 struct rds_page_frag *pos;
279
280 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
281 rdsdebug("purging ibinc %p inc %p\n", ibinc, inc);
282
283 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
284 list_del_init(&frag->f_item);
285 rds_ib_frag_drop_page(frag);
286 rds_ib_frag_free(frag);
287 }
288}
289
290void rds_ib_inc_free(struct rds_incoming *inc)
291{
292 struct rds_ib_incoming *ibinc;
293
294 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
295
296 rds_ib_inc_purge(inc);
297 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
298 BUG_ON(!list_empty(&ibinc->ii_frags));
299 kmem_cache_free(rds_ib_incoming_slab, ibinc);
300 atomic_dec(&rds_ib_allocation);
301 BUG_ON(atomic_read(&rds_ib_allocation) < 0);
302}
303
304int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
305 size_t size)
306{
307 struct rds_ib_incoming *ibinc;
308 struct rds_page_frag *frag;
309 struct iovec *iov = first_iov;
310 unsigned long to_copy;
311 unsigned long frag_off = 0;
312 unsigned long iov_off = 0;
313 int copied = 0;
314 int ret;
315 u32 len;
316
317 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
318 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
319 len = be32_to_cpu(inc->i_hdr.h_len);
320
321 while (copied < size && copied < len) {
322 if (frag_off == RDS_FRAG_SIZE) {
323 frag = list_entry(frag->f_item.next,
324 struct rds_page_frag, f_item);
325 frag_off = 0;
326 }
327 while (iov_off == iov->iov_len) {
328 iov_off = 0;
329 iov++;
330 }
331
332 to_copy = min(iov->iov_len - iov_off, RDS_FRAG_SIZE - frag_off);
333 to_copy = min_t(size_t, to_copy, size - copied);
334 to_copy = min_t(unsigned long, to_copy, len - copied);
335
336 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag "
337 "[%p, %lu] + %lu\n",
338 to_copy, iov->iov_base, iov->iov_len, iov_off,
339 frag->f_page, frag->f_offset, frag_off);
340
341 /* XXX needs + offset for multiple recvs per page */
342 ret = rds_page_copy_to_user(frag->f_page,
343 frag->f_offset + frag_off,
344 iov->iov_base + iov_off,
345 to_copy);
346 if (ret) {
347 copied = ret;
348 break;
349 }
350
351 iov_off += to_copy;
352 frag_off += to_copy;
353 copied += to_copy;
354 }
355
356 return copied;
357}
358
359/* ic starts out kzalloc()ed */
360void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
361{
362 struct ib_send_wr *wr = &ic->i_ack_wr;
363 struct ib_sge *sge = &ic->i_ack_sge;
364
365 sge->addr = ic->i_ack_dma;
366 sge->length = sizeof(struct rds_header);
367 sge->lkey = ic->i_mr->lkey;
368
369 wr->sg_list = sge;
370 wr->num_sge = 1;
371 wr->opcode = IB_WR_SEND;
372 wr->wr_id = RDS_IB_ACK_WR_ID;
373 wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
374}
375
376/*
377 * You'd think that with reliable IB connections you wouldn't need to ack
378 * messages that have been received. The problem is that IB hardware generates
379 * an ack message before it has DMAed the message into memory. This creates a
380 * potential message loss if the HCA is disabled for any reason between when it
381 * sends the ack and before the message is DMAed and processed. This is only a
382 * potential issue if another HCA is available for fail-over.
383 *
384 * When the remote host receives our ack they'll free the sent message from
385 * their send queue. To decrease the latency of this we always send an ack
386 * immediately after we've received messages.
387 *
388 * For simplicity, we only have one ack in flight at a time. This puts
389 * pressure on senders to have deep enough send queues to absorb the latency of
390 * a single ack frame being in flight. This might not be good enough.
391 *
392 * This is implemented by have a long-lived send_wr and sge which point to a
393 * statically allocated ack frame. This ack wr does not fall under the ring
394 * accounting that the tx and rx wrs do. The QP attribute specifically makes
395 * room for it beyond the ring size. Send completion notices its special
396 * wr_id and avoids working with the ring in that case.
397 */
398static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
399 int ack_required)
400{
401 rds_ib_set_64bit(&ic->i_ack_next, seq);
402 if (ack_required) {
403 smp_mb__before_clear_bit();
404 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
405 }
406}
407
408static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
409{
410 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
411 smp_mb__after_clear_bit();
412
413 return ic->i_ack_next;
414}
415
416static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits)
417{
418 struct rds_header *hdr = ic->i_ack;
419 struct ib_send_wr *failed_wr;
420 u64 seq;
421 int ret;
422
423 seq = rds_ib_get_ack(ic);
424
425 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
426 rds_message_populate_header(hdr, 0, 0, 0);
427 hdr->h_ack = cpu_to_be64(seq);
428 hdr->h_credit = adv_credits;
429 rds_message_make_checksum(hdr);
430 ic->i_ack_queued = jiffies;
431
432 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr);
433 if (unlikely(ret)) {
434 /* Failed to send. Release the WR, and
435 * force another ACK.
436 */
437 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
438 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
439
440 rds_ib_stats_inc(s_ib_ack_send_failure);
441 /* Need to finesse this later. */
442 BUG();
443 } else
444 rds_ib_stats_inc(s_ib_ack_sent);
445}
446
447/*
448 * There are 3 ways of getting acknowledgements to the peer:
449 * 1. We call rds_ib_attempt_ack from the recv completion handler
450 * to send an ACK-only frame.
451 * However, there can be only one such frame in the send queue
452 * at any time, so we may have to postpone it.
453 * 2. When another (data) packet is transmitted while there's
454 * an ACK in the queue, we piggyback the ACK sequence number
455 * on the data packet.
456 * 3. If the ACK WR is done sending, we get called from the
457 * send queue completion handler, and check whether there's
458 * another ACK pending (postponed because the WR was on the
459 * queue). If so, we transmit it.
460 *
461 * We maintain 2 variables:
462 * - i_ack_flags, which keeps track of whether the ACK WR
463 * is currently in the send queue or not (IB_ACK_IN_FLIGHT)
464 * - i_ack_next, which is the last sequence number we received
465 *
466 * Potentially, send queue and receive queue handlers can run concurrently.
467 *
468 * Reconnecting complicates this picture just slightly. When we
469 * reconnect, we may be seeing duplicate packets. The peer
470 * is retransmitting them, because it hasn't seen an ACK for
471 * them. It is important that we ACK these.
472 *
473 * ACK mitigation adds a header flag "ACK_REQUIRED"; any packet with
474 * this flag set *MUST* be acknowledged immediately.
475 */
476
477/*
478 * When we get here, we're called from the recv queue handler.
479 * Check whether we ought to transmit an ACK.
480 */
481void rds_ib_attempt_ack(struct rds_ib_connection *ic)
482{
483 unsigned int adv_credits;
484
485 if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
486 return;
487
488 if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
489 rds_ib_stats_inc(s_ib_ack_send_delayed);
490 return;
491 }
492
493 /* Can we get a send credit? */
494 if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0)) {
495 rds_ib_stats_inc(s_ib_tx_throttle);
496 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
497 return;
498 }
499
500 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
501 rds_ib_send_ack(ic, adv_credits);
502}
503
504/*
505 * We get here from the send completion handler, when the
506 * adapter tells us the ACK frame was sent.
507 */
508void rds_ib_ack_send_complete(struct rds_ib_connection *ic)
509{
510 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
511 rds_ib_attempt_ack(ic);
512}
513
514/*
515 * This is called by the regular xmit code when it wants to piggyback
516 * an ACK on an outgoing frame.
517 */
518u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
519{
520 if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
521 rds_ib_stats_inc(s_ib_ack_send_piggybacked);
522 return rds_ib_get_ack(ic);
523}
524
525/*
526 * It's kind of lame that we're copying from the posted receive pages into
527 * long-lived bitmaps. We could have posted the bitmaps and rdma written into
528 * them. But receiving new congestion bitmaps should be a *rare* event, so
529 * hopefully we won't need to invest that complexity in making it more
530 * efficient. By copying we can share a simpler core with TCP which has to
531 * copy.
532 */
533static void rds_ib_cong_recv(struct rds_connection *conn,
534 struct rds_ib_incoming *ibinc)
535{
536 struct rds_cong_map *map;
537 unsigned int map_off;
538 unsigned int map_page;
539 struct rds_page_frag *frag;
540 unsigned long frag_off;
541 unsigned long to_copy;
542 unsigned long copied;
543 uint64_t uncongested = 0;
544 void *addr;
545
546 /* catch completely corrupt packets */
547 if (be32_to_cpu(ibinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
548 return;
549
550 map = conn->c_fcong;
551 map_page = 0;
552 map_off = 0;
553
554 frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item);
555 frag_off = 0;
556
557 copied = 0;
558
559 while (copied < RDS_CONG_MAP_BYTES) {
560 uint64_t *src, *dst;
561 unsigned int k;
562
563 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
564 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
565
566 addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0);
567
568 src = addr + frag_off;
569 dst = (void *)map->m_page_addrs[map_page] + map_off;
570 for (k = 0; k < to_copy; k += 8) {
571 /* Record ports that became uncongested, ie
572 * bits that changed from 0 to 1. */
573 uncongested |= ~(*src) & *dst;
574 *dst++ = *src++;
575 }
576 kunmap_atomic(addr, KM_SOFTIRQ0);
577
578 copied += to_copy;
579
580 map_off += to_copy;
581 if (map_off == PAGE_SIZE) {
582 map_off = 0;
583 map_page++;
584 }
585
586 frag_off += to_copy;
587 if (frag_off == RDS_FRAG_SIZE) {
588 frag = list_entry(frag->f_item.next,
589 struct rds_page_frag, f_item);
590 frag_off = 0;
591 }
592 }
593
594 /* the congestion map is in little endian order */
595 uncongested = le64_to_cpu(uncongested);
596
597 rds_cong_map_updated(map, uncongested);
598}
599
600/*
601 * Rings are posted with all the allocations they'll need to queue the
602 * incoming message to the receiving socket so this can't fail.
603 * All fragments start with a header, so we can make sure we're not receiving
604 * garbage, and we can tell a small 8 byte fragment from an ACK frame.
605 */
606struct rds_ib_ack_state {
607 u64 ack_next;
608 u64 ack_recv;
609 unsigned int ack_required:1;
610 unsigned int ack_next_valid:1;
611 unsigned int ack_recv_valid:1;
612};
613
614static void rds_ib_process_recv(struct rds_connection *conn,
615 struct rds_ib_recv_work *recv, u32 byte_len,
616 struct rds_ib_ack_state *state)
617{
618 struct rds_ib_connection *ic = conn->c_transport_data;
619 struct rds_ib_incoming *ibinc = ic->i_ibinc;
620 struct rds_header *ihdr, *hdr;
621
622 /* XXX shut down the connection if port 0,0 are seen? */
623
624 rdsdebug("ic %p ibinc %p recv %p byte len %u\n", ic, ibinc, recv,
625 byte_len);
626
627 if (byte_len < sizeof(struct rds_header)) {
628 rds_ib_conn_error(conn, "incoming message "
629 "from %pI4 didn't inclue a "
630 "header, disconnecting and "
631 "reconnecting\n",
632 &conn->c_faddr);
633 return;
634 }
635 byte_len -= sizeof(struct rds_header);
636
637 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
638
639 /* Validate the checksum. */
640 if (!rds_message_verify_checksum(ihdr)) {
641 rds_ib_conn_error(conn, "incoming message "
642 "from %pI4 has corrupted header - "
643 "forcing a reconnect\n",
644 &conn->c_faddr);
645 rds_stats_inc(s_recv_drop_bad_checksum);
646 return;
647 }
648
649 /* Process the ACK sequence which comes with every packet */
650 state->ack_recv = be64_to_cpu(ihdr->h_ack);
651 state->ack_recv_valid = 1;
652
653 /* Process the credits update if there was one */
654 if (ihdr->h_credit)
655 rds_ib_send_add_credits(conn, ihdr->h_credit);
656
657 if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && byte_len == 0) {
658 /* This is an ACK-only packet. The fact that it gets
659 * special treatment here is that historically, ACKs
660 * were rather special beasts.
661 */
662 rds_ib_stats_inc(s_ib_ack_received);
663
664 /*
665 * Usually the frags make their way on to incs and are then freed as
666 * the inc is freed. We don't go that route, so we have to drop the
667 * page ref ourselves. We can't just leave the page on the recv
668 * because that confuses the dma mapping of pages and each recv's use
669 * of a partial page. We can leave the frag, though, it will be
670 * reused.
671 *
672 * FIXME: Fold this into the code path below.
673 */
674 rds_ib_frag_drop_page(recv->r_frag);
675 return;
676 }
677
678 /*
679 * If we don't already have an inc on the connection then this
680 * fragment has a header and starts a message.. copy its header
681 * into the inc and save the inc so we can hang upcoming fragments
682 * off its list.
683 */
684 if (ibinc == NULL) {
685 ibinc = recv->r_ibinc;
686 recv->r_ibinc = NULL;
687 ic->i_ibinc = ibinc;
688
689 hdr = &ibinc->ii_inc.i_hdr;
690 memcpy(hdr, ihdr, sizeof(*hdr));
691 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
692
693 rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc,
694 ic->i_recv_data_rem, hdr->h_flags);
695 } else {
696 hdr = &ibinc->ii_inc.i_hdr;
697 /* We can't just use memcmp here; fragments of a
698 * single message may carry different ACKs */
699 if (hdr->h_sequence != ihdr->h_sequence
700 || hdr->h_len != ihdr->h_len
701 || hdr->h_sport != ihdr->h_sport
702 || hdr->h_dport != ihdr->h_dport) {
703 rds_ib_conn_error(conn,
704 "fragment header mismatch; forcing reconnect\n");
705 return;
706 }
707 }
708
709 list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags);
710 recv->r_frag = NULL;
711
712 if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
713 ic->i_recv_data_rem -= RDS_FRAG_SIZE;
714 else {
715 ic->i_recv_data_rem = 0;
716 ic->i_ibinc = NULL;
717
718 if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
719 rds_ib_cong_recv(conn, ibinc);
720 else {
721 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
722 &ibinc->ii_inc, GFP_ATOMIC,
723 KM_SOFTIRQ0);
724 state->ack_next = be64_to_cpu(hdr->h_sequence);
725 state->ack_next_valid = 1;
726 }
727
728 /* Evaluate the ACK_REQUIRED flag *after* we received
729 * the complete frame, and after bumping the next_rx
730 * sequence. */
731 if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) {
732 rds_stats_inc(s_recv_ack_required);
733 state->ack_required = 1;
734 }
735
736 rds_inc_put(&ibinc->ii_inc);
737 }
738}
739
740/*
741 * Plucking the oldest entry from the ring can be done concurrently with
742 * the thread refilling the ring. Each ring operation is protected by
743 * spinlocks and the transient state of refilling doesn't change the
744 * recording of which entry is oldest.
745 *
746 * This relies on IB only calling one cq comp_handler for each cq so that
747 * there will only be one caller of rds_recv_incoming() per RDS connection.
748 */
749void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context)
750{
751 struct rds_connection *conn = context;
752 struct rds_ib_connection *ic = conn->c_transport_data;
753 struct ib_wc wc;
754 struct rds_ib_ack_state state = { 0, };
755 struct rds_ib_recv_work *recv;
756
757 rdsdebug("conn %p cq %p\n", conn, cq);
758
759 rds_ib_stats_inc(s_ib_rx_cq_call);
760
761 ib_req_notify_cq(cq, IB_CQ_SOLICITED);
762
763 while (ib_poll_cq(cq, 1, &wc) > 0) {
764 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
765 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
766 be32_to_cpu(wc.ex.imm_data));
767 rds_ib_stats_inc(s_ib_rx_cq_event);
768
769 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
770
771 rds_ib_recv_unmap_page(ic, recv);
772
773 /*
774 * Also process recvs in connecting state because it is possible
775 * to get a recv completion _before_ the rdmacm ESTABLISHED
776 * event is processed.
777 */
778 if (rds_conn_up(conn) || rds_conn_connecting(conn)) {
779 /* We expect errors as the qp is drained during shutdown */
780 if (wc.status == IB_WC_SUCCESS) {
781 rds_ib_process_recv(conn, recv, wc.byte_len, &state);
782 } else {
783 rds_ib_conn_error(conn, "recv completion on "
784 "%pI4 had status %u, disconnecting and "
785 "reconnecting\n", &conn->c_faddr,
786 wc.status);
787 }
788 }
789
790 rds_ib_ring_free(&ic->i_recv_ring, 1);
791 }
792
793 if (state.ack_next_valid)
794 rds_ib_set_ack(ic, state.ack_next, state.ack_required);
795 if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
796 rds_send_drop_acked(conn, state.ack_recv, NULL);
797 ic->i_ack_recv = state.ack_recv;
798 }
799 if (rds_conn_up(conn))
800 rds_ib_attempt_ack(ic);
801
802 /* If we ever end up with a really empty receive ring, we're
803 * in deep trouble, as the sender will definitely see RNR
804 * timeouts. */
805 if (rds_ib_ring_empty(&ic->i_recv_ring))
806 rds_ib_stats_inc(s_ib_rx_ring_empty);
807
808 /*
809 * If the ring is running low, then schedule the thread to refill.
810 */
811 if (rds_ib_ring_low(&ic->i_recv_ring))
812 queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
813}
814
815int rds_ib_recv(struct rds_connection *conn)
816{
817 struct rds_ib_connection *ic = conn->c_transport_data;
818 int ret = 0;
819
820 rdsdebug("conn %p\n", conn);
821
822 /*
823 * If we get a temporary posting failure in this context then
824 * we're really low and we want the caller to back off for a bit.
825 */
826 mutex_lock(&ic->i_recv_mutex);
827 if (rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0))
828 ret = -ENOMEM;
829 else
830 rds_ib_stats_inc(s_ib_rx_refill_from_thread);
831 mutex_unlock(&ic->i_recv_mutex);
832
833 if (rds_conn_up(conn))
834 rds_ib_attempt_ack(ic);
835
836 return ret;
837}
838
839int __init rds_ib_recv_init(void)
840{
841 struct sysinfo si;
842 int ret = -ENOMEM;
843
844 /* Default to 30% of all available RAM for recv memory */
845 si_meminfo(&si);
846 rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
847
848 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
849 sizeof(struct rds_ib_incoming),
850 0, 0, NULL);
851 if (rds_ib_incoming_slab == NULL)
852 goto out;
853
854 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
855 sizeof(struct rds_page_frag),
856 0, 0, NULL);
857 if (rds_ib_frag_slab == NULL)
858 kmem_cache_destroy(rds_ib_incoming_slab);
859 else
860 ret = 0;
861out:
862 return ret;
863}
864
865void rds_ib_recv_exit(void)
866{
867 kmem_cache_destroy(rds_ib_incoming_slab);
868 kmem_cache_destroy(rds_ib_frag_slab);
869}
diff --git a/net/rds/ib_ring.c b/net/rds/ib_ring.c
new file mode 100644
index 000000000000..99a6ccae964c
--- /dev/null
+++ b/net/rds/ib_ring.c
@@ -0,0 +1,168 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34
35#include "rds.h"
36#include "ib.h"
37
38/*
39 * Locking for IB rings.
40 * We assume that allocation is always protected by a mutex
41 * in the caller (this is a valid assumption for the current
42 * implementation).
43 *
44 * Freeing always happens in an interrupt, and hence only
45 * races with allocations, but not with other free()s.
46 *
47 * The interaction between allocation and freeing is that
48 * the alloc code has to determine the number of free entries.
49 * To this end, we maintain two counters; an allocation counter
50 * and a free counter. Both are allowed to run freely, and wrap
51 * around.
52 * The number of used entries is always (alloc_ctr - free_ctr) % NR.
53 *
54 * The current implementation makes free_ctr atomic. When the
55 * caller finds an allocation fails, it should set an "alloc fail"
56 * bit and retry the allocation. The "alloc fail" bit essentially tells
57 * the CQ completion handlers to wake it up after freeing some
58 * more entries.
59 */
60
61/*
62 * This only happens on shutdown.
63 */
64DECLARE_WAIT_QUEUE_HEAD(rds_ib_ring_empty_wait);
65
66void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr)
67{
68 memset(ring, 0, sizeof(*ring));
69 ring->w_nr = nr;
70 rdsdebug("ring %p nr %u\n", ring, ring->w_nr);
71}
72
73static inline u32 __rds_ib_ring_used(struct rds_ib_work_ring *ring)
74{
75 u32 diff;
76
77 /* This assumes that atomic_t has at least as many bits as u32 */
78 diff = ring->w_alloc_ctr - (u32) atomic_read(&ring->w_free_ctr);
79 BUG_ON(diff > ring->w_nr);
80
81 return diff;
82}
83
84void rds_ib_ring_resize(struct rds_ib_work_ring *ring, u32 nr)
85{
86 /* We only ever get called from the connection setup code,
87 * prior to creating the QP. */
88 BUG_ON(__rds_ib_ring_used(ring));
89 ring->w_nr = nr;
90}
91
92static int __rds_ib_ring_empty(struct rds_ib_work_ring *ring)
93{
94 return __rds_ib_ring_used(ring) == 0;
95}
96
97u32 rds_ib_ring_alloc(struct rds_ib_work_ring *ring, u32 val, u32 *pos)
98{
99 u32 ret = 0, avail;
100
101 avail = ring->w_nr - __rds_ib_ring_used(ring);
102
103 rdsdebug("ring %p val %u next %u free %u\n", ring, val,
104 ring->w_alloc_ptr, avail);
105
106 if (val && avail) {
107 ret = min(val, avail);
108 *pos = ring->w_alloc_ptr;
109
110 ring->w_alloc_ptr = (ring->w_alloc_ptr + ret) % ring->w_nr;
111 ring->w_alloc_ctr += ret;
112 }
113
114 return ret;
115}
116
117void rds_ib_ring_free(struct rds_ib_work_ring *ring, u32 val)
118{
119 ring->w_free_ptr = (ring->w_free_ptr + val) % ring->w_nr;
120 atomic_add(val, &ring->w_free_ctr);
121
122 if (__rds_ib_ring_empty(ring) &&
123 waitqueue_active(&rds_ib_ring_empty_wait))
124 wake_up(&rds_ib_ring_empty_wait);
125}
126
127void rds_ib_ring_unalloc(struct rds_ib_work_ring *ring, u32 val)
128{
129 ring->w_alloc_ptr = (ring->w_alloc_ptr - val) % ring->w_nr;
130 ring->w_alloc_ctr -= val;
131}
132
133int rds_ib_ring_empty(struct rds_ib_work_ring *ring)
134{
135 return __rds_ib_ring_empty(ring);
136}
137
138int rds_ib_ring_low(struct rds_ib_work_ring *ring)
139{
140 return __rds_ib_ring_used(ring) <= (ring->w_nr >> 2);
141}
142
143/*
144 * returns the oldest alloced ring entry. This will be the next one
145 * freed. This can't be called if there are none allocated.
146 */
147u32 rds_ib_ring_oldest(struct rds_ib_work_ring *ring)
148{
149 return ring->w_free_ptr;
150}
151
152/*
153 * returns the number of completed work requests.
154 */
155
156u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest)
157{
158 u32 ret;
159
160 if (oldest <= (unsigned long long)wr_id)
161 ret = (unsigned long long)wr_id - oldest + 1;
162 else
163 ret = ring->w_nr - oldest + (unsigned long long)wr_id + 1;
164
165 rdsdebug("ring %p ret %u wr_id %u oldest %u\n", ring, ret,
166 wr_id, oldest);
167 return ret;
168}
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
new file mode 100644
index 000000000000..cb6c52cb1c4c
--- /dev/null
+++ b/net/rds/ib_send.c
@@ -0,0 +1,874 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/device.h>
36#include <linux/dmapool.h>
37
38#include "rds.h"
39#include "rdma.h"
40#include "ib.h"
41
42static void rds_ib_send_rdma_complete(struct rds_message *rm,
43 int wc_status)
44{
45 int notify_status;
46
47 switch (wc_status) {
48 case IB_WC_WR_FLUSH_ERR:
49 return;
50
51 case IB_WC_SUCCESS:
52 notify_status = RDS_RDMA_SUCCESS;
53 break;
54
55 case IB_WC_REM_ACCESS_ERR:
56 notify_status = RDS_RDMA_REMOTE_ERROR;
57 break;
58
59 default:
60 notify_status = RDS_RDMA_OTHER_ERROR;
61 break;
62 }
63 rds_rdma_send_complete(rm, notify_status);
64}
65
66static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
67 struct rds_rdma_op *op)
68{
69 if (op->r_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0;
74 }
75}
76
77static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic,
78 struct rds_ib_send_work *send,
79 int wc_status)
80{
81 struct rds_message *rm = send->s_rm;
82
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
84
85 ib_dma_unmap_sg(ic->i_cm_id->device,
86 rm->m_sg, rm->m_nents,
87 DMA_TO_DEVICE);
88
89 if (rm->m_rdma_op != NULL) {
90 rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
91
92 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics:
94 * 1. Notify when we received the ACK on the RDS message
95 * that was queued with the RDMA. This provides reliable
96 * notification of RDMA status at the expense of a one-way
97 * packet delay.
98 * 2. Notify when the IB stack gives us the completion event for
99 * the RDMA operation.
100 * 3. Notify when the IB stack gives us the completion event for
101 * the accompanying RDS messages.
102 * Here, we implement approach #3. To implement approach #2,
103 * call rds_rdma_send_complete from the cq_handler. To implement #1,
104 * don't call rds_rdma_send_complete at all, and fall back to the notify
105 * handling in the ACK processing code.
106 *
107 * Note: There's no need to explicitly sync any RDMA buffers using
108 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
109 * operation itself unmapped the RDMA buffers, which takes care
110 * of synching.
111 */
112 rds_ib_send_rdma_complete(rm, wc_status);
113
114 if (rm->m_rdma_op->r_write)
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes);
116 else
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes);
118 }
119
120 /* If anyone waited for this message to get flushed out, wake
121 * them up now */
122 rds_message_unmapped(rm);
123
124 rds_message_put(rm);
125 send->s_rm = NULL;
126}
127
128void rds_ib_send_init_ring(struct rds_ib_connection *ic)
129{
130 struct rds_ib_send_work *send;
131 u32 i;
132
133 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
134 struct ib_sge *sge;
135
136 send->s_rm = NULL;
137 send->s_op = NULL;
138
139 send->s_wr.wr_id = i;
140 send->s_wr.sg_list = send->s_sge;
141 send->s_wr.num_sge = 1;
142 send->s_wr.opcode = IB_WR_SEND;
143 send->s_wr.send_flags = 0;
144 send->s_wr.ex.imm_data = 0;
145
146 sge = rds_ib_data_sge(ic, send->s_sge);
147 sge->lkey = ic->i_mr->lkey;
148
149 sge = rds_ib_header_sge(ic, send->s_sge);
150 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
151 sge->length = sizeof(struct rds_header);
152 sge->lkey = ic->i_mr->lkey;
153 }
154}
155
156void rds_ib_send_clear_ring(struct rds_ib_connection *ic)
157{
158 struct rds_ib_send_work *send;
159 u32 i;
160
161 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
162 if (send->s_wr.opcode == 0xdead)
163 continue;
164 if (send->s_rm)
165 rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
166 if (send->s_op)
167 rds_ib_send_unmap_rdma(ic, send->s_op);
168 }
169}
170
171/*
172 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
173 * operations performed in the send path. As the sender allocs and potentially
174 * unallocs the next free entry in the ring it doesn't alter which is
175 * the next to be freed, which is what this is concerned with.
176 */
177void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
178{
179 struct rds_connection *conn = context;
180 struct rds_ib_connection *ic = conn->c_transport_data;
181 struct ib_wc wc;
182 struct rds_ib_send_work *send;
183 u32 completed;
184 u32 oldest;
185 u32 i = 0;
186 int ret;
187
188 rdsdebug("cq %p conn %p\n", cq, conn);
189 rds_ib_stats_inc(s_ib_tx_cq_call);
190 ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
191 if (ret)
192 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
193
194 while (ib_poll_cq(cq, 1, &wc) > 0) {
195 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
196 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
197 be32_to_cpu(wc.ex.imm_data));
198 rds_ib_stats_inc(s_ib_tx_cq_event);
199
200 if (wc.wr_id == RDS_IB_ACK_WR_ID) {
201 if (ic->i_ack_queued + HZ/2 < jiffies)
202 rds_ib_stats_inc(s_ib_tx_stalled);
203 rds_ib_ack_send_complete(ic);
204 continue;
205 }
206
207 oldest = rds_ib_ring_oldest(&ic->i_send_ring);
208
209 completed = rds_ib_ring_completed(&ic->i_send_ring, wc.wr_id, oldest);
210
211 for (i = 0; i < completed; i++) {
212 send = &ic->i_sends[oldest];
213
214 /* In the error case, wc.opcode sometimes contains garbage */
215 switch (send->s_wr.opcode) {
216 case IB_WR_SEND:
217 if (send->s_rm)
218 rds_ib_send_unmap_rm(ic, send, wc.status);
219 break;
220 case IB_WR_RDMA_WRITE:
221 case IB_WR_RDMA_READ:
222 /* Nothing to be done - the SG list will be unmapped
223 * when the SEND completes. */
224 break;
225 default:
226 if (printk_ratelimit())
227 printk(KERN_NOTICE
228 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
229 __func__, send->s_wr.opcode);
230 break;
231 }
232
233 send->s_wr.opcode = 0xdead;
234 send->s_wr.num_sge = 1;
235 if (send->s_queued + HZ/2 < jiffies)
236 rds_ib_stats_inc(s_ib_tx_stalled);
237
238 /* If a RDMA operation produced an error, signal this right
239 * away. If we don't, the subsequent SEND that goes with this
240 * RDMA will be canceled with ERR_WFLUSH, and the application
241 * never learn that the RDMA failed. */
242 if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) {
243 struct rds_message *rm;
244
245 rm = rds_send_get_message(conn, send->s_op);
246 if (rm)
247 rds_ib_send_rdma_complete(rm, wc.status);
248 }
249
250 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
251 }
252
253 rds_ib_ring_free(&ic->i_send_ring, completed);
254
255 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags)
256 || test_bit(0, &conn->c_map_queued))
257 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
258
259 /* We expect errors as the qp is drained during shutdown */
260 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
261 rds_ib_conn_error(conn,
262 "send completion on %pI4 "
263 "had status %u, disconnecting and reconnecting\n",
264 &conn->c_faddr, wc.status);
265 }
266 }
267}
268
269/*
270 * This is the main function for allocating credits when sending
271 * messages.
272 *
273 * Conceptually, we have two counters:
274 * - send credits: this tells us how many WRs we're allowed
275 * to submit without overruning the reciever's queue. For
276 * each SEND WR we post, we decrement this by one.
277 *
278 * - posted credits: this tells us how many WRs we recently
279 * posted to the receive queue. This value is transferred
280 * to the peer as a "credit update" in a RDS header field.
281 * Every time we transmit credits to the peer, we subtract
282 * the amount of transferred credits from this counter.
283 *
284 * It is essential that we avoid situations where both sides have
285 * exhausted their send credits, and are unable to send new credits
286 * to the peer. We achieve this by requiring that we send at least
287 * one credit update to the peer before exhausting our credits.
288 * When new credits arrive, we subtract one credit that is withheld
289 * until we've posted new buffers and are ready to transmit these
290 * credits (see rds_ib_send_add_credits below).
291 *
292 * The RDS send code is essentially single-threaded; rds_send_xmit
293 * grabs c_send_lock to ensure exclusive access to the send ring.
294 * However, the ACK sending code is independent and can race with
295 * message SENDs.
296 *
297 * In the send path, we need to update the counters for send credits
298 * and the counter of posted buffers atomically - when we use the
299 * last available credit, we cannot allow another thread to race us
300 * and grab the posted credits counter. Hence, we have to use a
301 * spinlock to protect the credit counter, or use atomics.
302 *
303 * Spinlocks shared between the send and the receive path are bad,
304 * because they create unnecessary delays. An early implementation
305 * using a spinlock showed a 5% degradation in throughput at some
306 * loads.
307 *
308 * This implementation avoids spinlocks completely, putting both
309 * counters into a single atomic, and updating that atomic using
310 * atomic_add (in the receive path, when receiving fresh credits),
311 * and using atomic_cmpxchg when updating the two counters.
312 */
313int rds_ib_send_grab_credits(struct rds_ib_connection *ic,
314 u32 wanted, u32 *adv_credits, int need_posted)
315{
316 unsigned int avail, posted, got = 0, advertise;
317 long oldval, newval;
318
319 *adv_credits = 0;
320 if (!ic->i_flowctl)
321 return wanted;
322
323try_again:
324 advertise = 0;
325 oldval = newval = atomic_read(&ic->i_credits);
326 posted = IB_GET_POST_CREDITS(oldval);
327 avail = IB_GET_SEND_CREDITS(oldval);
328
329 rdsdebug("rds_ib_send_grab_credits(%u): credits=%u posted=%u\n",
330 wanted, avail, posted);
331
332 /* The last credit must be used to send a credit update. */
333 if (avail && !posted)
334 avail--;
335
336 if (avail < wanted) {
337 struct rds_connection *conn = ic->i_cm_id->context;
338
339 /* Oops, there aren't that many credits left! */
340 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
341 got = avail;
342 } else {
343 /* Sometimes you get what you want, lalala. */
344 got = wanted;
345 }
346 newval -= IB_SET_SEND_CREDITS(got);
347
348 /*
349 * If need_posted is non-zero, then the caller wants
350 * the posted regardless of whether any send credits are
351 * available.
352 */
353 if (posted && (got || need_posted)) {
354 advertise = min_t(unsigned int, posted, RDS_MAX_ADV_CREDIT);
355 newval -= IB_SET_POST_CREDITS(advertise);
356 }
357
358 /* Finally bill everything */
359 if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval)
360 goto try_again;
361
362 *adv_credits = advertise;
363 return got;
364}
365
366void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits)
367{
368 struct rds_ib_connection *ic = conn->c_transport_data;
369
370 if (credits == 0)
371 return;
372
373 rdsdebug("rds_ib_send_add_credits(%u): current=%u%s\n",
374 credits,
375 IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
376 test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
377
378 atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits);
379 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
380 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
381
382 WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384);
383
384 rds_ib_stats_inc(s_ib_rx_credit_updates);
385}
386
387void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted)
388{
389 struct rds_ib_connection *ic = conn->c_transport_data;
390
391 if (posted == 0)
392 return;
393
394 atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits);
395
396 /* Decide whether to send an update to the peer now.
397 * If we would send a credit update for every single buffer we
398 * post, we would end up with an ACK storm (ACK arrives,
399 * consumes buffer, we refill the ring, send ACK to remote
400 * advertising the newly posted buffer... ad inf)
401 *
402 * Performance pretty much depends on how often we send
403 * credit updates - too frequent updates mean lots of ACKs.
404 * Too infrequent updates, and the peer will run out of
405 * credits and has to throttle.
406 * For the time being, 16 seems to be a good compromise.
407 */
408 if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16)
409 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
410}
411
412static inline void
413rds_ib_xmit_populate_wr(struct rds_ib_connection *ic,
414 struct rds_ib_send_work *send, unsigned int pos,
415 unsigned long buffer, unsigned int length,
416 int send_flags)
417{
418 struct ib_sge *sge;
419
420 WARN_ON(pos != send - ic->i_sends);
421
422 send->s_wr.send_flags = send_flags;
423 send->s_wr.opcode = IB_WR_SEND;
424 send->s_wr.num_sge = 2;
425 send->s_wr.next = NULL;
426 send->s_queued = jiffies;
427 send->s_op = NULL;
428
429 if (length != 0) {
430 sge = rds_ib_data_sge(ic, send->s_sge);
431 sge->addr = buffer;
432 sge->length = length;
433 sge->lkey = ic->i_mr->lkey;
434
435 sge = rds_ib_header_sge(ic, send->s_sge);
436 } else {
437 /* We're sending a packet with no payload. There is only
438 * one SGE */
439 send->s_wr.num_sge = 1;
440 sge = &send->s_sge[0];
441 }
442
443 sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header));
444 sge->length = sizeof(struct rds_header);
445 sge->lkey = ic->i_mr->lkey;
446}
447
448/*
449 * This can be called multiple times for a given message. The first time
450 * we see a message we map its scatterlist into the IB device so that
451 * we can provide that mapped address to the IB scatter gather entries
452 * in the IB work requests. We translate the scatterlist into a series
453 * of work requests that fragment the message. These work requests complete
454 * in order so we pass ownership of the message to the completion handler
455 * once we send the final fragment.
456 *
457 * The RDS core uses the c_send_lock to only enter this function once
458 * per connection. This makes sure that the tx ring alloc/unalloc pairs
459 * don't get out of sync and confuse the ring.
460 */
461int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
462 unsigned int hdr_off, unsigned int sg, unsigned int off)
463{
464 struct rds_ib_connection *ic = conn->c_transport_data;
465 struct ib_device *dev = ic->i_cm_id->device;
466 struct rds_ib_send_work *send = NULL;
467 struct rds_ib_send_work *first;
468 struct rds_ib_send_work *prev;
469 struct ib_send_wr *failed_wr;
470 struct scatterlist *scat;
471 u32 pos;
472 u32 i;
473 u32 work_alloc;
474 u32 credit_alloc;
475 u32 posted;
476 u32 adv_credits = 0;
477 int send_flags = 0;
478 int sent;
479 int ret;
480 int flow_controlled = 0;
481
482 BUG_ON(off % RDS_FRAG_SIZE);
483 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
484
485 /* FIXME we may overallocate here */
486 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
487 i = 1;
488 else
489 i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
490
491 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
492 if (work_alloc == 0) {
493 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
494 rds_ib_stats_inc(s_ib_tx_ring_full);
495 ret = -ENOMEM;
496 goto out;
497 }
498
499 credit_alloc = work_alloc;
500 if (ic->i_flowctl) {
501 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0);
502 adv_credits += posted;
503 if (credit_alloc < work_alloc) {
504 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
505 work_alloc = credit_alloc;
506 flow_controlled++;
507 }
508 if (work_alloc == 0) {
509 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
510 rds_ib_stats_inc(s_ib_tx_throttle);
511 ret = -ENOMEM;
512 goto out;
513 }
514 }
515
516 /* map the message the first time we see it */
517 if (ic->i_rm == NULL) {
518 /*
519 printk(KERN_NOTICE "rds_ib_xmit prep msg dport=%u flags=0x%x len=%d\n",
520 be16_to_cpu(rm->m_inc.i_hdr.h_dport),
521 rm->m_inc.i_hdr.h_flags,
522 be32_to_cpu(rm->m_inc.i_hdr.h_len));
523 */
524 if (rm->m_nents) {
525 rm->m_count = ib_dma_map_sg(dev,
526 rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
527 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
528 if (rm->m_count == 0) {
529 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
530 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
531 ret = -ENOMEM; /* XXX ? */
532 goto out;
533 }
534 } else {
535 rm->m_count = 0;
536 }
537
538 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
539 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes;
540 rds_message_addref(rm);
541 ic->i_rm = rm;
542
543 /* Finalize the header */
544 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
545 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED;
546 if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
547 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
548
549 /* If it has a RDMA op, tell the peer we did it. This is
550 * used by the peer to release use-once RDMA MRs. */
551 if (rm->m_rdma_op) {
552 struct rds_ext_header_rdma ext_hdr;
553
554 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key);
555 rds_message_add_extension(&rm->m_inc.i_hdr,
556 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
557 }
558 if (rm->m_rdma_cookie) {
559 rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
560 rds_rdma_cookie_key(rm->m_rdma_cookie),
561 rds_rdma_cookie_offset(rm->m_rdma_cookie));
562 }
563
564 /* Note - rds_ib_piggyb_ack clears the ACK_REQUIRED bit, so
565 * we should not do this unless we have a chance of at least
566 * sticking the header into the send ring. Which is why we
567 * should call rds_ib_ring_alloc first. */
568 rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_ib_piggyb_ack(ic));
569 rds_message_make_checksum(&rm->m_inc.i_hdr);
570
571 /*
572 * Update adv_credits since we reset the ACK_REQUIRED bit.
573 */
574 rds_ib_send_grab_credits(ic, 0, &posted, 1);
575 adv_credits += posted;
576 BUG_ON(adv_credits > 255);
577 } else if (ic->i_rm != rm)
578 BUG();
579
580 send = &ic->i_sends[pos];
581 first = send;
582 prev = NULL;
583 scat = &rm->m_sg[sg];
584 sent = 0;
585 i = 0;
586
587 /* Sometimes you want to put a fence between an RDMA
588 * READ and the following SEND.
589 * We could either do this all the time
590 * or when requested by the user. Right now, we let
591 * the application choose.
592 */
593 if (rm->m_rdma_op && rm->m_rdma_op->r_fence)
594 send_flags = IB_SEND_FENCE;
595
596 /*
597 * We could be copying the header into the unused tail of the page.
598 * That would need to be changed in the future when those pages might
599 * be mapped userspace pages or page cache pages. So instead we always
600 * use a second sge and our long-lived ring of mapped headers. We send
601 * the header after the data so that the data payload can be aligned on
602 * the receiver.
603 */
604
605 /* handle a 0-len message */
606 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) {
607 rds_ib_xmit_populate_wr(ic, send, pos, 0, 0, send_flags);
608 goto add_header;
609 }
610
611 /* if there's data reference it with a chain of work reqs */
612 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
613 unsigned int len;
614
615 send = &ic->i_sends[pos];
616
617 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off);
618 rds_ib_xmit_populate_wr(ic, send, pos,
619 ib_sg_dma_address(dev, scat) + off, len,
620 send_flags);
621
622 /*
623 * We want to delay signaling completions just enough to get
624 * the batching benefits but not so much that we create dead time
625 * on the wire.
626 */
627 if (ic->i_unsignaled_wrs-- == 0) {
628 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
629 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
630 }
631
632 ic->i_unsignaled_bytes -= len;
633 if (ic->i_unsignaled_bytes <= 0) {
634 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes;
635 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
636 }
637
638 /*
639 * Always signal the last one if we're stopping due to flow control.
640 */
641 if (flow_controlled && i == (work_alloc-1))
642 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
643
644 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
645 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
646
647 sent += len;
648 off += len;
649 if (off == ib_sg_dma_len(dev, scat)) {
650 scat++;
651 off = 0;
652 }
653
654add_header:
655 /* Tack on the header after the data. The header SGE should already
656 * have been set up to point to the right header buffer. */
657 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
658
659 if (0) {
660 struct rds_header *hdr = &ic->i_send_hdrs[pos];
661
662 printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n",
663 be16_to_cpu(hdr->h_dport),
664 hdr->h_flags,
665 be32_to_cpu(hdr->h_len));
666 }
667 if (adv_credits) {
668 struct rds_header *hdr = &ic->i_send_hdrs[pos];
669
670 /* add credit and redo the header checksum */
671 hdr->h_credit = adv_credits;
672 rds_message_make_checksum(hdr);
673 adv_credits = 0;
674 rds_ib_stats_inc(s_ib_tx_credit_updates);
675 }
676
677 if (prev)
678 prev->s_wr.next = &send->s_wr;
679 prev = send;
680
681 pos = (pos + 1) % ic->i_send_ring.w_nr;
682 }
683
684 /* Account the RDS header in the number of bytes we sent, but just once.
685 * The caller has no concept of fragmentation. */
686 if (hdr_off == 0)
687 sent += sizeof(struct rds_header);
688
689 /* if we finished the message then send completion owns it */
690 if (scat == &rm->m_sg[rm->m_count]) {
691 prev->s_rm = ic->i_rm;
692 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
693 ic->i_rm = NULL;
694 }
695
696 if (i < work_alloc) {
697 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
698 work_alloc = i;
699 }
700 if (ic->i_flowctl && i < credit_alloc)
701 rds_ib_send_add_credits(conn, credit_alloc - i);
702
703 /* XXX need to worry about failed_wr and partial sends. */
704 failed_wr = &first->s_wr;
705 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
706 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
707 first, &first->s_wr, ret, failed_wr);
708 BUG_ON(failed_wr != &first->s_wr);
709 if (ret) {
710 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 "
711 "returned %d\n", &conn->c_faddr, ret);
712 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
713 if (prev->s_rm) {
714 ic->i_rm = prev->s_rm;
715 prev->s_rm = NULL;
716 }
717 /* Finesse this later */
718 BUG();
719 goto out;
720 }
721
722 ret = sent;
723out:
724 BUG_ON(adv_credits);
725 return ret;
726}
727
728int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
729{
730 struct rds_ib_connection *ic = conn->c_transport_data;
731 struct rds_ib_send_work *send = NULL;
732 struct rds_ib_send_work *first;
733 struct rds_ib_send_work *prev;
734 struct ib_send_wr *failed_wr;
735 struct rds_ib_device *rds_ibdev;
736 struct scatterlist *scat;
737 unsigned long len;
738 u64 remote_addr = op->r_remote_addr;
739 u32 pos;
740 u32 work_alloc;
741 u32 i;
742 u32 j;
743 int sent;
744 int ret;
745 int num_sge;
746
747 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
748
749 /* map the message the first time we see it */
750 if (!op->r_mapped) {
751 op->r_count = ib_dma_map_sg(ic->i_cm_id->device,
752 op->r_sg, op->r_nents, (op->r_write) ?
753 DMA_TO_DEVICE : DMA_FROM_DEVICE);
754 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count);
755 if (op->r_count == 0) {
756 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
757 ret = -ENOMEM; /* XXX ? */
758 goto out;
759 }
760
761 op->r_mapped = 1;
762 }
763
764 /*
765 * Instead of knowing how to return a partial rdma read/write we insist that there
766 * be enough work requests to send the entire message.
767 */
768 i = ceil(op->r_count, rds_ibdev->max_sge);
769
770 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
771 if (work_alloc != i) {
772 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
773 rds_ib_stats_inc(s_ib_tx_ring_full);
774 ret = -ENOMEM;
775 goto out;
776 }
777
778 send = &ic->i_sends[pos];
779 first = send;
780 prev = NULL;
781 scat = &op->r_sg[0];
782 sent = 0;
783 num_sge = op->r_count;
784
785 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) {
786 send->s_wr.send_flags = 0;
787 send->s_queued = jiffies;
788 /*
789 * We want to delay signaling completions just enough to get
790 * the batching benefits but not so much that we create dead time on the wire.
791 */
792 if (ic->i_unsignaled_wrs-- == 0) {
793 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
794 send->s_wr.send_flags = IB_SEND_SIGNALED;
795 }
796
797 send->s_wr.opcode = op->r_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
798 send->s_wr.wr.rdma.remote_addr = remote_addr;
799 send->s_wr.wr.rdma.rkey = op->r_key;
800 send->s_op = op;
801
802 if (num_sge > rds_ibdev->max_sge) {
803 send->s_wr.num_sge = rds_ibdev->max_sge;
804 num_sge -= rds_ibdev->max_sge;
805 } else {
806 send->s_wr.num_sge = num_sge;
807 }
808
809 send->s_wr.next = NULL;
810
811 if (prev)
812 prev->s_wr.next = &send->s_wr;
813
814 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) {
815 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
816 send->s_sge[j].addr =
817 ib_sg_dma_address(ic->i_cm_id->device, scat);
818 send->s_sge[j].length = len;
819 send->s_sge[j].lkey = ic->i_mr->lkey;
820
821 sent += len;
822 rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
823
824 remote_addr += len;
825 scat++;
826 }
827
828 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
829 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
830
831 prev = send;
832 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
833 send = ic->i_sends;
834 }
835
836 /* if we finished the message then send completion owns it */
837 if (scat == &op->r_sg[op->r_count])
838 prev->s_wr.send_flags = IB_SEND_SIGNALED;
839
840 if (i < work_alloc) {
841 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
842 work_alloc = i;
843 }
844
845 failed_wr = &first->s_wr;
846 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
847 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
848 first, &first->s_wr, ret, failed_wr);
849 BUG_ON(failed_wr != &first->s_wr);
850 if (ret) {
851 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
852 "returned %d\n", &conn->c_faddr, ret);
853 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
854 goto out;
855 }
856
857 if (unlikely(failed_wr != &first->s_wr)) {
858 printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
859 BUG_ON(failed_wr != &first->s_wr);
860 }
861
862
863out:
864 return ret;
865}
866
867void rds_ib_xmit_complete(struct rds_connection *conn)
868{
869 struct rds_ib_connection *ic = conn->c_transport_data;
870
871 /* We may have a pending ACK or window update we were unable
872 * to send previously (due to flow control). Try again. */
873 rds_ib_attempt_ack(ic);
874}
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
new file mode 100644
index 000000000000..02e3e3d50d4a
--- /dev/null
+++ b/net/rds/ib_stats.c
@@ -0,0 +1,95 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/percpu.h>
34#include <linux/seq_file.h>
35#include <linux/proc_fs.h>
36
37#include "rds.h"
38#include "ib.h"
39
40DEFINE_PER_CPU(struct rds_ib_statistics, rds_ib_stats) ____cacheline_aligned;
41
42static char *rds_ib_stat_names[] = {
43 "ib_connect_raced",
44 "ib_listen_closed_stale",
45 "ib_tx_cq_call",
46 "ib_tx_cq_event",
47 "ib_tx_ring_full",
48 "ib_tx_throttle",
49 "ib_tx_sg_mapping_failure",
50 "ib_tx_stalled",
51 "ib_tx_credit_updates",
52 "ib_rx_cq_call",
53 "ib_rx_cq_event",
54 "ib_rx_ring_empty",
55 "ib_rx_refill_from_cq",
56 "ib_rx_refill_from_thread",
57 "ib_rx_alloc_limit",
58 "ib_rx_credit_updates",
59 "ib_ack_sent",
60 "ib_ack_send_failure",
61 "ib_ack_send_delayed",
62 "ib_ack_send_piggybacked",
63 "ib_ack_received",
64 "ib_rdma_mr_alloc",
65 "ib_rdma_mr_free",
66 "ib_rdma_mr_used",
67 "ib_rdma_mr_pool_flush",
68 "ib_rdma_mr_pool_wait",
69 "ib_rdma_mr_pool_depleted",
70};
71
72unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
73 unsigned int avail)
74{
75 struct rds_ib_statistics stats = {0, };
76 uint64_t *src;
77 uint64_t *sum;
78 size_t i;
79 int cpu;
80
81 if (avail < ARRAY_SIZE(rds_ib_stat_names))
82 goto out;
83
84 for_each_online_cpu(cpu) {
85 src = (uint64_t *)&(per_cpu(rds_ib_stats, cpu));
86 sum = (uint64_t *)&stats;
87 for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++)
88 *(sum++) += *(src++);
89 }
90
91 rds_stats_info_copy(iter, (uint64_t *)&stats, rds_ib_stat_names,
92 ARRAY_SIZE(rds_ib_stat_names));
93out:
94 return ARRAY_SIZE(rds_ib_stat_names);
95}
diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c
new file mode 100644
index 000000000000..d87830db93a0
--- /dev/null
+++ b/net/rds/ib_sysctl.c
@@ -0,0 +1,137 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/sysctl.h>
35#include <linux/proc_fs.h>
36
37#include "ib.h"
38
39static struct ctl_table_header *rds_ib_sysctl_hdr;
40
41unsigned long rds_ib_sysctl_max_send_wr = RDS_IB_DEFAULT_SEND_WR;
42unsigned long rds_ib_sysctl_max_recv_wr = RDS_IB_DEFAULT_RECV_WR;
43unsigned long rds_ib_sysctl_max_recv_allocation = (128 * 1024 * 1024) / RDS_FRAG_SIZE;
44static unsigned long rds_ib_sysctl_max_wr_min = 1;
45/* hardware will fail CQ creation long before this */
46static unsigned long rds_ib_sysctl_max_wr_max = (u32)~0;
47
48unsigned long rds_ib_sysctl_max_unsig_wrs = 16;
49static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1;
50static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64;
51
52unsigned long rds_ib_sysctl_max_unsig_bytes = (16 << 20);
53static unsigned long rds_ib_sysctl_max_unsig_bytes_min = 1;
54static unsigned long rds_ib_sysctl_max_unsig_bytes_max = ~0UL;
55
56unsigned int rds_ib_sysctl_flow_control = 1;
57
58ctl_table rds_ib_sysctl_table[] = {
59 {
60 .ctl_name = CTL_UNNUMBERED,
61 .procname = "max_send_wr",
62 .data = &rds_ib_sysctl_max_send_wr,
63 .maxlen = sizeof(unsigned long),
64 .mode = 0644,
65 .proc_handler = &proc_doulongvec_minmax,
66 .extra1 = &rds_ib_sysctl_max_wr_min,
67 .extra2 = &rds_ib_sysctl_max_wr_max,
68 },
69 {
70 .ctl_name = CTL_UNNUMBERED,
71 .procname = "max_recv_wr",
72 .data = &rds_ib_sysctl_max_recv_wr,
73 .maxlen = sizeof(unsigned long),
74 .mode = 0644,
75 .proc_handler = &proc_doulongvec_minmax,
76 .extra1 = &rds_ib_sysctl_max_wr_min,
77 .extra2 = &rds_ib_sysctl_max_wr_max,
78 },
79 {
80 .ctl_name = CTL_UNNUMBERED,
81 .procname = "max_unsignaled_wr",
82 .data = &rds_ib_sysctl_max_unsig_wrs,
83 .maxlen = sizeof(unsigned long),
84 .mode = 0644,
85 .proc_handler = &proc_doulongvec_minmax,
86 .extra1 = &rds_ib_sysctl_max_unsig_wr_min,
87 .extra2 = &rds_ib_sysctl_max_unsig_wr_max,
88 },
89 {
90 .ctl_name = CTL_UNNUMBERED,
91 .procname = "max_unsignaled_bytes",
92 .data = &rds_ib_sysctl_max_unsig_bytes,
93 .maxlen = sizeof(unsigned long),
94 .mode = 0644,
95 .proc_handler = &proc_doulongvec_minmax,
96 .extra1 = &rds_ib_sysctl_max_unsig_bytes_min,
97 .extra2 = &rds_ib_sysctl_max_unsig_bytes_max,
98 },
99 {
100 .ctl_name = CTL_UNNUMBERED,
101 .procname = "max_recv_allocation",
102 .data = &rds_ib_sysctl_max_recv_allocation,
103 .maxlen = sizeof(unsigned long),
104 .mode = 0644,
105 .proc_handler = &proc_doulongvec_minmax,
106 },
107 {
108 .ctl_name = CTL_UNNUMBERED,
109 .procname = "flow_control",
110 .data = &rds_ib_sysctl_flow_control,
111 .maxlen = sizeof(rds_ib_sysctl_flow_control),
112 .mode = 0644,
113 .proc_handler = &proc_dointvec,
114 },
115 { .ctl_name = 0}
116};
117
118static struct ctl_path rds_ib_sysctl_path[] = {
119 { .procname = "net", .ctl_name = CTL_NET, },
120 { .procname = "rds", .ctl_name = CTL_UNNUMBERED, },
121 { .procname = "ib", .ctl_name = CTL_UNNUMBERED, },
122 { }
123};
124
125void rds_ib_sysctl_exit(void)
126{
127 if (rds_ib_sysctl_hdr)
128 unregister_sysctl_table(rds_ib_sysctl_hdr);
129}
130
131int __init rds_ib_sysctl_init(void)
132{
133 rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table);
134 if (rds_ib_sysctl_hdr == NULL)
135 return -ENOMEM;
136 return 0;
137}
diff --git a/net/rds/info.c b/net/rds/info.c
new file mode 100644
index 000000000000..1d885535214d
--- /dev/null
+++ b/net/rds/info.c
@@ -0,0 +1,241 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/percpu.h>
34#include <linux/seq_file.h>
35#include <linux/proc_fs.h>
36
37#include "rds.h"
38
39/*
40 * This file implements a getsockopt() call which copies a set of fixed
41 * sized structs into a user-specified buffer as a means of providing
42 * read-only information about RDS.
43 *
44 * For a given information source there are a given number of fixed sized
45 * structs at a given time. The structs are only copied if the user-specified
46 * buffer is big enough. The destination pages that make up the buffer
47 * are pinned for the duration of the copy.
48 *
49 * This gives us the following benefits:
50 *
51 * - simple implementation, no copy "position" across multiple calls
52 * - consistent snapshot of an info source
53 * - atomic copy works well with whatever locking info source has
54 * - one portable tool to get rds info across implementations
55 * - long-lived tool can get info without allocating
56 *
57 * at the following costs:
58 *
59 * - info source copy must be pinned, may be "large"
60 */
61
62struct rds_info_iterator {
63 struct page **pages;
64 void *addr;
65 unsigned long offset;
66};
67
68static DEFINE_SPINLOCK(rds_info_lock);
69static rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1];
70
71void rds_info_register_func(int optname, rds_info_func func)
72{
73 int offset = optname - RDS_INFO_FIRST;
74
75 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST);
76
77 spin_lock(&rds_info_lock);
78 BUG_ON(rds_info_funcs[offset] != NULL);
79 rds_info_funcs[offset] = func;
80 spin_unlock(&rds_info_lock);
81}
82
83void rds_info_deregister_func(int optname, rds_info_func func)
84{
85 int offset = optname - RDS_INFO_FIRST;
86
87 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST);
88
89 spin_lock(&rds_info_lock);
90 BUG_ON(rds_info_funcs[offset] != func);
91 rds_info_funcs[offset] = NULL;
92 spin_unlock(&rds_info_lock);
93}
94
95/*
96 * Typically we hold an atomic kmap across multiple rds_info_copy() calls
97 * because the kmap is so expensive. This must be called before using blocking
98 * operations while holding the mapping and as the iterator is torn down.
99 */
100void rds_info_iter_unmap(struct rds_info_iterator *iter)
101{
102 if (iter->addr != NULL) {
103 kunmap_atomic(iter->addr, KM_USER0);
104 iter->addr = NULL;
105 }
106}
107
108/*
109 * get_user_pages() called flush_dcache_page() on the pages for us.
110 */
111void rds_info_copy(struct rds_info_iterator *iter, void *data,
112 unsigned long bytes)
113{
114 unsigned long this;
115
116 while (bytes) {
117 if (iter->addr == NULL)
118 iter->addr = kmap_atomic(*iter->pages, KM_USER0);
119
120 this = min(bytes, PAGE_SIZE - iter->offset);
121
122 rdsdebug("page %p addr %p offset %lu this %lu data %p "
123 "bytes %lu\n", *iter->pages, iter->addr,
124 iter->offset, this, data, bytes);
125
126 memcpy(iter->addr + iter->offset, data, this);
127
128 data += this;
129 bytes -= this;
130 iter->offset += this;
131
132 if (iter->offset == PAGE_SIZE) {
133 kunmap_atomic(iter->addr, KM_USER0);
134 iter->addr = NULL;
135 iter->offset = 0;
136 iter->pages++;
137 }
138 }
139}
140
141/*
142 * @optval points to the userspace buffer that the information snapshot
143 * will be copied into.
144 *
145 * @optlen on input is the size of the buffer in userspace. @optlen
146 * on output is the size of the requested snapshot in bytes.
147 *
148 * This function returns -errno if there is a failure, particularly -ENOSPC
149 * if the given userspace buffer was not large enough to fit the snapshot.
150 * On success it returns the positive number of bytes of each array element
151 * in the snapshot.
152 */
153int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
154 int __user *optlen)
155{
156 struct rds_info_iterator iter;
157 struct rds_info_lengths lens;
158 unsigned long nr_pages = 0;
159 unsigned long start;
160 unsigned long i;
161 rds_info_func func;
162 struct page **pages = NULL;
163 int ret;
164 int len;
165 int total;
166
167 if (get_user(len, optlen)) {
168 ret = -EFAULT;
169 goto out;
170 }
171
172 /* check for all kinds of wrapping and the like */
173 start = (unsigned long)optval;
174 if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) {
175 ret = -EINVAL;
176 goto out;
177 }
178
179 /* a 0 len call is just trying to probe its length */
180 if (len == 0)
181 goto call_func;
182
183 nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK))
184 >> PAGE_SHIFT;
185
186 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
187 if (pages == NULL) {
188 ret = -ENOMEM;
189 goto out;
190 }
191 down_read(&current->mm->mmap_sem);
192 ret = get_user_pages(current, current->mm, start, nr_pages, 1, 0,
193 pages, NULL);
194 up_read(&current->mm->mmap_sem);
195 if (ret != nr_pages) {
196 if (ret > 0)
197 nr_pages = ret;
198 else
199 nr_pages = 0;
200 ret = -EAGAIN; /* XXX ? */
201 goto out;
202 }
203
204 rdsdebug("len %d nr_pages %lu\n", len, nr_pages);
205
206call_func:
207 func = rds_info_funcs[optname - RDS_INFO_FIRST];
208 if (func == NULL) {
209 ret = -ENOPROTOOPT;
210 goto out;
211 }
212
213 iter.pages = pages;
214 iter.addr = NULL;
215 iter.offset = start & (PAGE_SIZE - 1);
216
217 func(sock, len, &iter, &lens);
218 BUG_ON(lens.each == 0);
219
220 total = lens.nr * lens.each;
221
222 rds_info_iter_unmap(&iter);
223
224 if (total > len) {
225 len = total;
226 ret = -ENOSPC;
227 } else {
228 len = total;
229 ret = lens.each;
230 }
231
232 if (put_user(len, optlen))
233 ret = -EFAULT;
234
235out:
236 for (i = 0; pages != NULL && i < nr_pages; i++)
237 put_page(pages[i]);
238 kfree(pages);
239
240 return ret;
241}
diff --git a/net/rds/info.h b/net/rds/info.h
new file mode 100644
index 000000000000..b6c052ca7d22
--- /dev/null
+++ b/net/rds/info.h
@@ -0,0 +1,30 @@
1#ifndef _RDS_INFO_H
2#define _RDS_INFO_H
3
4struct rds_info_lengths {
5 unsigned int nr;
6 unsigned int each;
7};
8
9struct rds_info_iterator;
10
11/*
12 * These functions must fill in the fields of @lens to reflect the size
13 * of the available info source. If the snapshot fits in @len then it
14 * should be copied using @iter. The caller will deduce if it was copied
15 * or not by comparing the lengths.
16 */
17typedef void (*rds_info_func)(struct socket *sock, unsigned int len,
18 struct rds_info_iterator *iter,
19 struct rds_info_lengths *lens);
20
21void rds_info_register_func(int optname, rds_info_func func);
22void rds_info_deregister_func(int optname, rds_info_func func);
23int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
24 int __user *optlen);
25void rds_info_copy(struct rds_info_iterator *iter, void *data,
26 unsigned long bytes);
27void rds_info_iter_unmap(struct rds_info_iterator *iter);
28
29
30#endif
diff --git a/net/rds/iw.c b/net/rds/iw.c
new file mode 100644
index 000000000000..1b56905c4c08
--- /dev/null
+++ b/net/rds/iw.c
@@ -0,0 +1,333 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/if.h>
36#include <linux/netdevice.h>
37#include <linux/inetdevice.h>
38#include <linux/if_arp.h>
39#include <linux/delay.h>
40
41#include "rds.h"
42#include "iw.h"
43
44unsigned int fastreg_pool_size = RDS_FASTREG_POOL_SIZE;
45unsigned int fastreg_message_size = RDS_FASTREG_SIZE + 1; /* +1 allows for unaligned MRs */
46
47module_param(fastreg_pool_size, int, 0444);
48MODULE_PARM_DESC(fastreg_pool_size, " Max number of fastreg MRs per device");
49module_param(fastreg_message_size, int, 0444);
50MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MRs)");
51
52struct list_head rds_iw_devices;
53
54DEFINE_SPINLOCK(iw_nodev_conns_lock);
55LIST_HEAD(iw_nodev_conns);
56
57void rds_iw_add_one(struct ib_device *device)
58{
59 struct rds_iw_device *rds_iwdev;
60 struct ib_device_attr *dev_attr;
61
62 /* Only handle iwarp devices */
63 if (device->node_type != RDMA_NODE_RNIC)
64 return;
65
66 dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
67 if (!dev_attr)
68 return;
69
70 if (ib_query_device(device, dev_attr)) {
71 rdsdebug("Query device failed for %s\n", device->name);
72 goto free_attr;
73 }
74
75 rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL);
76 if (!rds_iwdev)
77 goto free_attr;
78
79 spin_lock_init(&rds_iwdev->spinlock);
80
81 rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY);
82 rds_iwdev->max_wrs = dev_attr->max_qp_wr;
83 rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE);
84
85 rds_iwdev->page_shift = max(PAGE_SHIFT, ffs(dev_attr->page_size_cap) - 1);
86
87 rds_iwdev->dev = device;
88 rds_iwdev->pd = ib_alloc_pd(device);
89 if (IS_ERR(rds_iwdev->pd))
90 goto free_dev;
91
92 if (!rds_iwdev->dma_local_lkey) {
93 if (device->node_type != RDMA_NODE_RNIC) {
94 rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd,
95 IB_ACCESS_LOCAL_WRITE);
96 } else {
97 rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd,
98 IB_ACCESS_REMOTE_READ |
99 IB_ACCESS_REMOTE_WRITE |
100 IB_ACCESS_LOCAL_WRITE);
101 }
102 if (IS_ERR(rds_iwdev->mr))
103 goto err_pd;
104 } else
105 rds_iwdev->mr = NULL;
106
107 rds_iwdev->mr_pool = rds_iw_create_mr_pool(rds_iwdev);
108 if (IS_ERR(rds_iwdev->mr_pool)) {
109 rds_iwdev->mr_pool = NULL;
110 goto err_mr;
111 }
112
113 INIT_LIST_HEAD(&rds_iwdev->cm_id_list);
114 INIT_LIST_HEAD(&rds_iwdev->conn_list);
115 list_add_tail(&rds_iwdev->list, &rds_iw_devices);
116
117 ib_set_client_data(device, &rds_iw_client, rds_iwdev);
118
119 goto free_attr;
120
121err_mr:
122 if (rds_iwdev->mr)
123 ib_dereg_mr(rds_iwdev->mr);
124err_pd:
125 ib_dealloc_pd(rds_iwdev->pd);
126free_dev:
127 kfree(rds_iwdev);
128free_attr:
129 kfree(dev_attr);
130}
131
132void rds_iw_remove_one(struct ib_device *device)
133{
134 struct rds_iw_device *rds_iwdev;
135 struct rds_iw_cm_id *i_cm_id, *next;
136
137 rds_iwdev = ib_get_client_data(device, &rds_iw_client);
138 if (!rds_iwdev)
139 return;
140
141 spin_lock_irq(&rds_iwdev->spinlock);
142 list_for_each_entry_safe(i_cm_id, next, &rds_iwdev->cm_id_list, list) {
143 list_del(&i_cm_id->list);
144 kfree(i_cm_id);
145 }
146 spin_unlock_irq(&rds_iwdev->spinlock);
147
148 rds_iw_remove_conns(rds_iwdev);
149
150 if (rds_iwdev->mr_pool)
151 rds_iw_destroy_mr_pool(rds_iwdev->mr_pool);
152
153 if (rds_iwdev->mr)
154 ib_dereg_mr(rds_iwdev->mr);
155
156 while (ib_dealloc_pd(rds_iwdev->pd)) {
157 rdsdebug("Failed to dealloc pd %p\n", rds_iwdev->pd);
158 msleep(1);
159 }
160
161 list_del(&rds_iwdev->list);
162 kfree(rds_iwdev);
163}
164
165struct ib_client rds_iw_client = {
166 .name = "rds_iw",
167 .add = rds_iw_add_one,
168 .remove = rds_iw_remove_one
169};
170
171static int rds_iw_conn_info_visitor(struct rds_connection *conn,
172 void *buffer)
173{
174 struct rds_info_rdma_connection *iinfo = buffer;
175 struct rds_iw_connection *ic;
176
177 /* We will only ever look at IB transports */
178 if (conn->c_trans != &rds_iw_transport)
179 return 0;
180
181 iinfo->src_addr = conn->c_laddr;
182 iinfo->dst_addr = conn->c_faddr;
183
184 memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid));
185 memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));
186 if (rds_conn_state(conn) == RDS_CONN_UP) {
187 struct rds_iw_device *rds_iwdev;
188 struct rdma_dev_addr *dev_addr;
189
190 ic = conn->c_transport_data;
191 dev_addr = &ic->i_cm_id->route.addr.dev_addr;
192
193 ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
194 ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
195
196 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
197 iinfo->max_send_wr = ic->i_send_ring.w_nr;
198 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
199 iinfo->max_send_sge = rds_iwdev->max_sge;
200 rds_iw_get_mr_info(rds_iwdev, iinfo);
201 }
202 return 1;
203}
204
205static void rds_iw_ic_info(struct socket *sock, unsigned int len,
206 struct rds_info_iterator *iter,
207 struct rds_info_lengths *lens)
208{
209 rds_for_each_conn_info(sock, len, iter, lens,
210 rds_iw_conn_info_visitor,
211 sizeof(struct rds_info_rdma_connection));
212}
213
214
215/*
216 * Early RDS/IB was built to only bind to an address if there is an IPoIB
217 * device with that address set.
218 *
219 * If it were me, I'd advocate for something more flexible. Sending and
220 * receiving should be device-agnostic. Transports would try and maintain
221 * connections between peers who have messages queued. Userspace would be
222 * allowed to influence which paths have priority. We could call userspace
223 * asserting this policy "routing".
224 */
225static int rds_iw_laddr_check(__be32 addr)
226{
227 int ret;
228 struct rdma_cm_id *cm_id;
229 struct sockaddr_in sin;
230
231 /* Create a CMA ID and try to bind it. This catches both
232 * IB and iWARP capable NICs.
233 */
234 cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP);
235 if (!cm_id)
236 return -EADDRNOTAVAIL;
237
238 memset(&sin, 0, sizeof(sin));
239 sin.sin_family = AF_INET;
240 sin.sin_addr.s_addr = addr;
241
242 /* rdma_bind_addr will only succeed for IB & iWARP devices */
243 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
244 /* due to this, we will claim to support IB devices unless we
245 check node_type. */
246 if (ret || cm_id->device->node_type != RDMA_NODE_RNIC)
247 ret = -EADDRNOTAVAIL;
248
249 rdsdebug("addr %pI4 ret %d node type %d\n",
250 &addr, ret,
251 cm_id->device ? cm_id->device->node_type : -1);
252
253 rdma_destroy_id(cm_id);
254
255 return ret;
256}
257
258void rds_iw_exit(void)
259{
260 rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
261 rds_iw_remove_nodev_conns();
262 ib_unregister_client(&rds_iw_client);
263 rds_iw_sysctl_exit();
264 rds_iw_recv_exit();
265 rds_trans_unregister(&rds_iw_transport);
266}
267
268struct rds_transport rds_iw_transport = {
269 .laddr_check = rds_iw_laddr_check,
270 .xmit_complete = rds_iw_xmit_complete,
271 .xmit = rds_iw_xmit,
272 .xmit_cong_map = NULL,
273 .xmit_rdma = rds_iw_xmit_rdma,
274 .recv = rds_iw_recv,
275 .conn_alloc = rds_iw_conn_alloc,
276 .conn_free = rds_iw_conn_free,
277 .conn_connect = rds_iw_conn_connect,
278 .conn_shutdown = rds_iw_conn_shutdown,
279 .inc_copy_to_user = rds_iw_inc_copy_to_user,
280 .inc_purge = rds_iw_inc_purge,
281 .inc_free = rds_iw_inc_free,
282 .cm_initiate_connect = rds_iw_cm_initiate_connect,
283 .cm_handle_connect = rds_iw_cm_handle_connect,
284 .cm_connect_complete = rds_iw_cm_connect_complete,
285 .stats_info_copy = rds_iw_stats_info_copy,
286 .exit = rds_iw_exit,
287 .get_mr = rds_iw_get_mr,
288 .sync_mr = rds_iw_sync_mr,
289 .free_mr = rds_iw_free_mr,
290 .flush_mrs = rds_iw_flush_mrs,
291 .t_owner = THIS_MODULE,
292 .t_name = "iwarp",
293 .t_prefer_loopback = 1,
294};
295
296int __init rds_iw_init(void)
297{
298 int ret;
299
300 INIT_LIST_HEAD(&rds_iw_devices);
301
302 ret = ib_register_client(&rds_iw_client);
303 if (ret)
304 goto out;
305
306 ret = rds_iw_sysctl_init();
307 if (ret)
308 goto out_ibreg;
309
310 ret = rds_iw_recv_init();
311 if (ret)
312 goto out_sysctl;
313
314 ret = rds_trans_register(&rds_iw_transport);
315 if (ret)
316 goto out_recv;
317
318 rds_info_register_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info);
319
320 goto out;
321
322out_recv:
323 rds_iw_recv_exit();
324out_sysctl:
325 rds_iw_sysctl_exit();
326out_ibreg:
327 ib_unregister_client(&rds_iw_client);
328out:
329 return ret;
330}
331
332MODULE_LICENSE("GPL");
333
diff --git a/net/rds/iw.h b/net/rds/iw.h
new file mode 100644
index 000000000000..0ddda34f2a1c
--- /dev/null
+++ b/net/rds/iw.h
@@ -0,0 +1,395 @@
1#ifndef _RDS_IW_H
2#define _RDS_IW_H
3
4#include <rdma/ib_verbs.h>
5#include <rdma/rdma_cm.h>
6#include "rds.h"
7#include "rdma_transport.h"
8
9#define RDS_FASTREG_SIZE 20
10#define RDS_FASTREG_POOL_SIZE 2048
11
12#define RDS_IW_MAX_SGE 8
13#define RDS_IW_RECV_SGE 2
14
15#define RDS_IW_DEFAULT_RECV_WR 1024
16#define RDS_IW_DEFAULT_SEND_WR 256
17
18#define RDS_IW_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
19
20extern struct list_head rds_iw_devices;
21
22/*
23 * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to
24 * try and minimize the amount of memory tied up both the device and
25 * socket receive queues.
26 */
27/* page offset of the final full frag that fits in the page */
28#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE)
29struct rds_page_frag {
30 struct list_head f_item;
31 struct page *f_page;
32 unsigned long f_offset;
33 dma_addr_t f_mapped;
34};
35
36struct rds_iw_incoming {
37 struct list_head ii_frags;
38 struct rds_incoming ii_inc;
39};
40
41struct rds_iw_connect_private {
42 /* Add new fields at the end, and don't permute existing fields. */
43 __be32 dp_saddr;
44 __be32 dp_daddr;
45 u8 dp_protocol_major;
46 u8 dp_protocol_minor;
47 __be16 dp_protocol_minor_mask; /* bitmask */
48 __be32 dp_reserved1;
49 __be64 dp_ack_seq;
50 __be32 dp_credit; /* non-zero enables flow ctl */
51};
52
53struct rds_iw_scatterlist {
54 struct scatterlist *list;
55 unsigned int len;
56 int dma_len;
57 unsigned int dma_npages;
58 unsigned int bytes;
59};
60
61struct rds_iw_mapping {
62 spinlock_t m_lock; /* protect the mapping struct */
63 struct list_head m_list;
64 struct rds_iw_mr *m_mr;
65 uint32_t m_rkey;
66 struct rds_iw_scatterlist m_sg;
67};
68
69struct rds_iw_send_work {
70 struct rds_message *s_rm;
71
72 /* We should really put these into a union: */
73 struct rds_rdma_op *s_op;
74 struct rds_iw_mapping *s_mapping;
75 struct ib_mr *s_mr;
76 struct ib_fast_reg_page_list *s_page_list;
77 unsigned char s_remap_count;
78
79 struct ib_send_wr s_wr;
80 struct ib_sge s_sge[RDS_IW_MAX_SGE];
81 unsigned long s_queued;
82};
83
84struct rds_iw_recv_work {
85 struct rds_iw_incoming *r_iwinc;
86 struct rds_page_frag *r_frag;
87 struct ib_recv_wr r_wr;
88 struct ib_sge r_sge[2];
89};
90
91struct rds_iw_work_ring {
92 u32 w_nr;
93 u32 w_alloc_ptr;
94 u32 w_alloc_ctr;
95 u32 w_free_ptr;
96 atomic_t w_free_ctr;
97};
98
99struct rds_iw_device;
100
101struct rds_iw_connection {
102
103 struct list_head iw_node;
104 struct rds_iw_device *rds_iwdev;
105 struct rds_connection *conn;
106
107 /* alphabet soup, IBTA style */
108 struct rdma_cm_id *i_cm_id;
109 struct ib_pd *i_pd;
110 struct ib_mr *i_mr;
111 struct ib_cq *i_send_cq;
112 struct ib_cq *i_recv_cq;
113
114 /* tx */
115 struct rds_iw_work_ring i_send_ring;
116 struct rds_message *i_rm;
117 struct rds_header *i_send_hdrs;
118 u64 i_send_hdrs_dma;
119 struct rds_iw_send_work *i_sends;
120
121 /* rx */
122 struct mutex i_recv_mutex;
123 struct rds_iw_work_ring i_recv_ring;
124 struct rds_iw_incoming *i_iwinc;
125 u32 i_recv_data_rem;
126 struct rds_header *i_recv_hdrs;
127 u64 i_recv_hdrs_dma;
128 struct rds_iw_recv_work *i_recvs;
129 struct rds_page_frag i_frag;
130 u64 i_ack_recv; /* last ACK received */
131
132 /* sending acks */
133 unsigned long i_ack_flags;
134 u64 i_ack_next; /* next ACK to send */
135 struct rds_header *i_ack;
136 struct ib_send_wr i_ack_wr;
137 struct ib_sge i_ack_sge;
138 u64 i_ack_dma;
139 unsigned long i_ack_queued;
140
141 /* Flow control related information
142 *
143 * Our algorithm uses a pair variables that we need to access
144 * atomically - one for the send credits, and one posted
145 * recv credits we need to transfer to remote.
146 * Rather than protect them using a slow spinlock, we put both into
147 * a single atomic_t and update it using cmpxchg
148 */
149 atomic_t i_credits;
150
151 /* Protocol version specific information */
152 unsigned int i_flowctl:1; /* enable/disable flow ctl */
153 unsigned int i_dma_local_lkey:1;
154 unsigned int i_fastreg_posted:1; /* fastreg posted on this connection */
155 /* Batched completions */
156 unsigned int i_unsignaled_wrs;
157 long i_unsignaled_bytes;
158};
159
160/* This assumes that atomic_t is at least 32 bits */
161#define IB_GET_SEND_CREDITS(v) ((v) & 0xffff)
162#define IB_GET_POST_CREDITS(v) ((v) >> 16)
163#define IB_SET_SEND_CREDITS(v) ((v) & 0xffff)
164#define IB_SET_POST_CREDITS(v) ((v) << 16)
165
166struct rds_iw_cm_id {
167 struct list_head list;
168 struct rdma_cm_id *cm_id;
169};
170
171struct rds_iw_device {
172 struct list_head list;
173 struct list_head cm_id_list;
174 struct list_head conn_list;
175 struct ib_device *dev;
176 struct ib_pd *pd;
177 struct ib_mr *mr;
178 struct rds_iw_mr_pool *mr_pool;
179 int page_shift;
180 int max_sge;
181 unsigned int max_wrs;
182 unsigned int dma_local_lkey:1;
183 spinlock_t spinlock; /* protect the above */
184};
185
186/* bits for i_ack_flags */
187#define IB_ACK_IN_FLIGHT 0
188#define IB_ACK_REQUESTED 1
189
190/* Magic WR_ID for ACKs */
191#define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL)
192#define RDS_IW_FAST_REG_WR_ID ((u64)0xefefefefefefefefULL)
193#define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL)
194
195struct rds_iw_statistics {
196 uint64_t s_iw_connect_raced;
197 uint64_t s_iw_listen_closed_stale;
198 uint64_t s_iw_tx_cq_call;
199 uint64_t s_iw_tx_cq_event;
200 uint64_t s_iw_tx_ring_full;
201 uint64_t s_iw_tx_throttle;
202 uint64_t s_iw_tx_sg_mapping_failure;
203 uint64_t s_iw_tx_stalled;
204 uint64_t s_iw_tx_credit_updates;
205 uint64_t s_iw_rx_cq_call;
206 uint64_t s_iw_rx_cq_event;
207 uint64_t s_iw_rx_ring_empty;
208 uint64_t s_iw_rx_refill_from_cq;
209 uint64_t s_iw_rx_refill_from_thread;
210 uint64_t s_iw_rx_alloc_limit;
211 uint64_t s_iw_rx_credit_updates;
212 uint64_t s_iw_ack_sent;
213 uint64_t s_iw_ack_send_failure;
214 uint64_t s_iw_ack_send_delayed;
215 uint64_t s_iw_ack_send_piggybacked;
216 uint64_t s_iw_ack_received;
217 uint64_t s_iw_rdma_mr_alloc;
218 uint64_t s_iw_rdma_mr_free;
219 uint64_t s_iw_rdma_mr_used;
220 uint64_t s_iw_rdma_mr_pool_flush;
221 uint64_t s_iw_rdma_mr_pool_wait;
222 uint64_t s_iw_rdma_mr_pool_depleted;
223};
224
225extern struct workqueue_struct *rds_iw_wq;
226
227/*
228 * Fake ib_dma_sync_sg_for_{cpu,device} as long as ib_verbs.h
229 * doesn't define it.
230 */
231static inline void rds_iw_dma_sync_sg_for_cpu(struct ib_device *dev,
232 struct scatterlist *sg, unsigned int sg_dma_len, int direction)
233{
234 unsigned int i;
235
236 for (i = 0; i < sg_dma_len; ++i) {
237 ib_dma_sync_single_for_cpu(dev,
238 ib_sg_dma_address(dev, &sg[i]),
239 ib_sg_dma_len(dev, &sg[i]),
240 direction);
241 }
242}
243#define ib_dma_sync_sg_for_cpu rds_iw_dma_sync_sg_for_cpu
244
245static inline void rds_iw_dma_sync_sg_for_device(struct ib_device *dev,
246 struct scatterlist *sg, unsigned int sg_dma_len, int direction)
247{
248 unsigned int i;
249
250 for (i = 0; i < sg_dma_len; ++i) {
251 ib_dma_sync_single_for_device(dev,
252 ib_sg_dma_address(dev, &sg[i]),
253 ib_sg_dma_len(dev, &sg[i]),
254 direction);
255 }
256}
257#define ib_dma_sync_sg_for_device rds_iw_dma_sync_sg_for_device
258
259static inline u32 rds_iw_local_dma_lkey(struct rds_iw_connection *ic)
260{
261 return ic->i_dma_local_lkey ? ic->i_cm_id->device->local_dma_lkey : ic->i_mr->lkey;
262}
263
264/* ib.c */
265extern struct rds_transport rds_iw_transport;
266extern void rds_iw_add_one(struct ib_device *device);
267extern void rds_iw_remove_one(struct ib_device *device);
268extern struct ib_client rds_iw_client;
269
270extern unsigned int fastreg_pool_size;
271extern unsigned int fastreg_message_size;
272
273extern spinlock_t iw_nodev_conns_lock;
274extern struct list_head iw_nodev_conns;
275
276/* ib_cm.c */
277int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp);
278void rds_iw_conn_free(void *arg);
279int rds_iw_conn_connect(struct rds_connection *conn);
280void rds_iw_conn_shutdown(struct rds_connection *conn);
281void rds_iw_state_change(struct sock *sk);
282int __init rds_iw_listen_init(void);
283void rds_iw_listen_stop(void);
284void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...);
285int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
286 struct rdma_cm_event *event);
287int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id);
288void rds_iw_cm_connect_complete(struct rds_connection *conn,
289 struct rdma_cm_event *event);
290
291
292#define rds_iw_conn_error(conn, fmt...) \
293 __rds_iw_conn_error(conn, KERN_WARNING "RDS/IW: " fmt)
294
295/* ib_rdma.c */
296int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
297int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn);
298void rds_iw_remove_nodev_conns(void);
299void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev);
300struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *);
301void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo);
302void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *);
303void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
304 struct rds_sock *rs, u32 *key_ret);
305void rds_iw_sync_mr(void *trans_private, int dir);
306void rds_iw_free_mr(void *trans_private, int invalidate);
307void rds_iw_flush_mrs(void);
308void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
309
310/* ib_recv.c */
311int __init rds_iw_recv_init(void);
312void rds_iw_recv_exit(void);
313int rds_iw_recv(struct rds_connection *conn);
314int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
315 gfp_t page_gfp, int prefill);
316void rds_iw_inc_purge(struct rds_incoming *inc);
317void rds_iw_inc_free(struct rds_incoming *inc);
318int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
319 size_t size);
320void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context);
321void rds_iw_recv_init_ring(struct rds_iw_connection *ic);
322void rds_iw_recv_clear_ring(struct rds_iw_connection *ic);
323void rds_iw_recv_init_ack(struct rds_iw_connection *ic);
324void rds_iw_attempt_ack(struct rds_iw_connection *ic);
325void rds_iw_ack_send_complete(struct rds_iw_connection *ic);
326u64 rds_iw_piggyb_ack(struct rds_iw_connection *ic);
327
328/* ib_ring.c */
329void rds_iw_ring_init(struct rds_iw_work_ring *ring, u32 nr);
330void rds_iw_ring_resize(struct rds_iw_work_ring *ring, u32 nr);
331u32 rds_iw_ring_alloc(struct rds_iw_work_ring *ring, u32 val, u32 *pos);
332void rds_iw_ring_free(struct rds_iw_work_ring *ring, u32 val);
333void rds_iw_ring_unalloc(struct rds_iw_work_ring *ring, u32 val);
334int rds_iw_ring_empty(struct rds_iw_work_ring *ring);
335int rds_iw_ring_low(struct rds_iw_work_ring *ring);
336u32 rds_iw_ring_oldest(struct rds_iw_work_ring *ring);
337u32 rds_iw_ring_completed(struct rds_iw_work_ring *ring, u32 wr_id, u32 oldest);
338extern wait_queue_head_t rds_iw_ring_empty_wait;
339
340/* ib_send.c */
341void rds_iw_xmit_complete(struct rds_connection *conn);
342int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
343 unsigned int hdr_off, unsigned int sg, unsigned int off);
344void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context);
345void rds_iw_send_init_ring(struct rds_iw_connection *ic);
346void rds_iw_send_clear_ring(struct rds_iw_connection *ic);
347int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op);
348void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits);
349void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted);
350int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted,
351 u32 *adv_credits, int need_posted);
352
353/* ib_stats.c */
354DECLARE_PER_CPU(struct rds_iw_statistics, rds_iw_stats);
355#define rds_iw_stats_inc(member) rds_stats_inc_which(rds_iw_stats, member)
356unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter,
357 unsigned int avail);
358
359/* ib_sysctl.c */
360int __init rds_iw_sysctl_init(void);
361void rds_iw_sysctl_exit(void);
362extern unsigned long rds_iw_sysctl_max_send_wr;
363extern unsigned long rds_iw_sysctl_max_recv_wr;
364extern unsigned long rds_iw_sysctl_max_unsig_wrs;
365extern unsigned long rds_iw_sysctl_max_unsig_bytes;
366extern unsigned long rds_iw_sysctl_max_recv_allocation;
367extern unsigned int rds_iw_sysctl_flow_control;
368extern ctl_table rds_iw_sysctl_table[];
369
370/*
371 * Helper functions for getting/setting the header and data SGEs in
372 * RDS packets (not RDMA)
373 */
374static inline struct ib_sge *
375rds_iw_header_sge(struct rds_iw_connection *ic, struct ib_sge *sge)
376{
377 return &sge[0];
378}
379
380static inline struct ib_sge *
381rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge)
382{
383 return &sge[1];
384}
385
386static inline void rds_iw_set_64bit(u64 *ptr, u64 val)
387{
388#if BITS_PER_LONG == 64
389 *ptr = val;
390#else
391 set_64bit(ptr, val);
392#endif
393}
394
395#endif
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
new file mode 100644
index 000000000000..57ecb3d4b8a5
--- /dev/null
+++ b/net/rds/iw_cm.c
@@ -0,0 +1,750 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/vmalloc.h>
36
37#include "rds.h"
38#include "iw.h"
39
40/*
41 * Set the selected protocol version
42 */
43static void rds_iw_set_protocol(struct rds_connection *conn, unsigned int version)
44{
45 conn->c_version = version;
46}
47
48/*
49 * Set up flow control
50 */
51static void rds_iw_set_flow_control(struct rds_connection *conn, u32 credits)
52{
53 struct rds_iw_connection *ic = conn->c_transport_data;
54
55 if (rds_iw_sysctl_flow_control && credits != 0) {
56 /* We're doing flow control */
57 ic->i_flowctl = 1;
58 rds_iw_send_add_credits(conn, credits);
59 } else {
60 ic->i_flowctl = 0;
61 }
62}
63
64/*
65 * Connection established.
66 * We get here for both outgoing and incoming connection.
67 */
68void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event)
69{
70 const struct rds_iw_connect_private *dp = NULL;
71 struct rds_iw_connection *ic = conn->c_transport_data;
72 struct rds_iw_device *rds_iwdev;
73 int err;
74
75 if (event->param.conn.private_data_len) {
76 dp = event->param.conn.private_data;
77
78 rds_iw_set_protocol(conn,
79 RDS_PROTOCOL(dp->dp_protocol_major,
80 dp->dp_protocol_minor));
81 rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
82 }
83
84 /* update ib_device with this local ipaddr & conn */
85 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
86 err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id);
87 if (err)
88 printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err);
89 err = rds_iw_add_conn(rds_iwdev, conn);
90 if (err)
91 printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err);
92
93 /* If the peer gave us the last packet it saw, process this as if
94 * we had received a regular ACK. */
95 if (dp && dp->dp_ack_seq)
96 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
97
98 printk(KERN_NOTICE "RDS/IW: connected to %pI4<->%pI4 version %u.%u%s\n",
99 &conn->c_laddr, &conn->c_faddr,
100 RDS_PROTOCOL_MAJOR(conn->c_version),
101 RDS_PROTOCOL_MINOR(conn->c_version),
102 ic->i_flowctl ? ", flow control" : "");
103
104 rds_connect_complete(conn);
105}
106
107static void rds_iw_cm_fill_conn_param(struct rds_connection *conn,
108 struct rdma_conn_param *conn_param,
109 struct rds_iw_connect_private *dp,
110 u32 protocol_version)
111{
112 struct rds_iw_connection *ic = conn->c_transport_data;
113
114 memset(conn_param, 0, sizeof(struct rdma_conn_param));
115 /* XXX tune these? */
116 conn_param->responder_resources = 1;
117 conn_param->initiator_depth = 1;
118
119 if (dp) {
120 memset(dp, 0, sizeof(*dp));
121 dp->dp_saddr = conn->c_laddr;
122 dp->dp_daddr = conn->c_faddr;
123 dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version);
124 dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version);
125 dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IW_SUPPORTED_PROTOCOLS);
126 dp->dp_ack_seq = rds_iw_piggyb_ack(ic);
127
128 /* Advertise flow control */
129 if (ic->i_flowctl) {
130 unsigned int credits;
131
132 credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits));
133 dp->dp_credit = cpu_to_be32(credits);
134 atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits);
135 }
136
137 conn_param->private_data = dp;
138 conn_param->private_data_len = sizeof(*dp);
139 }
140}
141
142static void rds_iw_cq_event_handler(struct ib_event *event, void *data)
143{
144 rdsdebug("event %u data %p\n", event->event, data);
145}
146
147static void rds_iw_qp_event_handler(struct ib_event *event, void *data)
148{
149 struct rds_connection *conn = data;
150 struct rds_iw_connection *ic = conn->c_transport_data;
151
152 rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event);
153
154 switch (event->event) {
155 case IB_EVENT_COMM_EST:
156 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
157 break;
158 case IB_EVENT_QP_REQ_ERR:
159 case IB_EVENT_QP_FATAL:
160 default:
161 rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %pI4->%pI4...reconnecting\n",
162 event->event, &conn->c_laddr,
163 &conn->c_faddr);
164 break;
165 }
166}
167
168/*
169 * Create a QP
170 */
171static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr,
172 struct rds_iw_device *rds_iwdev,
173 struct rds_iw_work_ring *send_ring,
174 void (*send_cq_handler)(struct ib_cq *, void *),
175 struct rds_iw_work_ring *recv_ring,
176 void (*recv_cq_handler)(struct ib_cq *, void *),
177 void *context)
178{
179 struct ib_device *dev = rds_iwdev->dev;
180 unsigned int send_size, recv_size;
181 int ret;
182
183 /* The offset of 1 is to accomodate the additional ACK WR. */
184 send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1);
185 recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1);
186 rds_iw_ring_resize(send_ring, send_size - 1);
187 rds_iw_ring_resize(recv_ring, recv_size - 1);
188
189 memset(attr, 0, sizeof(*attr));
190 attr->event_handler = rds_iw_qp_event_handler;
191 attr->qp_context = context;
192 attr->cap.max_send_wr = send_size;
193 attr->cap.max_recv_wr = recv_size;
194 attr->cap.max_send_sge = rds_iwdev->max_sge;
195 attr->cap.max_recv_sge = RDS_IW_RECV_SGE;
196 attr->sq_sig_type = IB_SIGNAL_REQ_WR;
197 attr->qp_type = IB_QPT_RC;
198
199 attr->send_cq = ib_create_cq(dev, send_cq_handler,
200 rds_iw_cq_event_handler,
201 context, send_size, 0);
202 if (IS_ERR(attr->send_cq)) {
203 ret = PTR_ERR(attr->send_cq);
204 attr->send_cq = NULL;
205 rdsdebug("ib_create_cq send failed: %d\n", ret);
206 goto out;
207 }
208
209 attr->recv_cq = ib_create_cq(dev, recv_cq_handler,
210 rds_iw_cq_event_handler,
211 context, recv_size, 0);
212 if (IS_ERR(attr->recv_cq)) {
213 ret = PTR_ERR(attr->recv_cq);
214 attr->recv_cq = NULL;
215 rdsdebug("ib_create_cq send failed: %d\n", ret);
216 goto out;
217 }
218
219 ret = ib_req_notify_cq(attr->send_cq, IB_CQ_NEXT_COMP);
220 if (ret) {
221 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
222 goto out;
223 }
224
225 ret = ib_req_notify_cq(attr->recv_cq, IB_CQ_SOLICITED);
226 if (ret) {
227 rdsdebug("ib_req_notify_cq recv failed: %d\n", ret);
228 goto out;
229 }
230
231out:
232 if (ret) {
233 if (attr->send_cq)
234 ib_destroy_cq(attr->send_cq);
235 if (attr->recv_cq)
236 ib_destroy_cq(attr->recv_cq);
237 }
238 return ret;
239}
240
241/*
242 * This needs to be very careful to not leave IS_ERR pointers around for
243 * cleanup to trip over.
244 */
245static int rds_iw_setup_qp(struct rds_connection *conn)
246{
247 struct rds_iw_connection *ic = conn->c_transport_data;
248 struct ib_device *dev = ic->i_cm_id->device;
249 struct ib_qp_init_attr attr;
250 struct rds_iw_device *rds_iwdev;
251 int ret;
252
253 /* rds_iw_add_one creates a rds_iw_device object per IB device,
254 * and allocates a protection domain, memory range and MR pool
255 * for each. If that fails for any reason, it will not register
256 * the rds_iwdev at all.
257 */
258 rds_iwdev = ib_get_client_data(dev, &rds_iw_client);
259 if (rds_iwdev == NULL) {
260 if (printk_ratelimit())
261 printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n",
262 dev->name);
263 return -EOPNOTSUPP;
264 }
265
266 /* Protection domain and memory range */
267 ic->i_pd = rds_iwdev->pd;
268 ic->i_mr = rds_iwdev->mr;
269
270 ret = rds_iw_init_qp_attrs(&attr, rds_iwdev,
271 &ic->i_send_ring, rds_iw_send_cq_comp_handler,
272 &ic->i_recv_ring, rds_iw_recv_cq_comp_handler,
273 conn);
274 if (ret < 0)
275 goto out;
276
277 ic->i_send_cq = attr.send_cq;
278 ic->i_recv_cq = attr.recv_cq;
279
280 /*
281 * XXX this can fail if max_*_wr is too large? Are we supposed
282 * to back off until we get a value that the hardware can support?
283 */
284 ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr);
285 if (ret) {
286 rdsdebug("rdma_create_qp failed: %d\n", ret);
287 goto out;
288 }
289
290 ic->i_send_hdrs = ib_dma_alloc_coherent(dev,
291 ic->i_send_ring.w_nr *
292 sizeof(struct rds_header),
293 &ic->i_send_hdrs_dma, GFP_KERNEL);
294 if (ic->i_send_hdrs == NULL) {
295 ret = -ENOMEM;
296 rdsdebug("ib_dma_alloc_coherent send failed\n");
297 goto out;
298 }
299
300 ic->i_recv_hdrs = ib_dma_alloc_coherent(dev,
301 ic->i_recv_ring.w_nr *
302 sizeof(struct rds_header),
303 &ic->i_recv_hdrs_dma, GFP_KERNEL);
304 if (ic->i_recv_hdrs == NULL) {
305 ret = -ENOMEM;
306 rdsdebug("ib_dma_alloc_coherent recv failed\n");
307 goto out;
308 }
309
310 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
311 &ic->i_ack_dma, GFP_KERNEL);
312 if (ic->i_ack == NULL) {
313 ret = -ENOMEM;
314 rdsdebug("ib_dma_alloc_coherent ack failed\n");
315 goto out;
316 }
317
318 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work));
319 if (ic->i_sends == NULL) {
320 ret = -ENOMEM;
321 rdsdebug("send allocation failed\n");
322 goto out;
323 }
324 rds_iw_send_init_ring(ic);
325
326 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work));
327 if (ic->i_recvs == NULL) {
328 ret = -ENOMEM;
329 rdsdebug("recv allocation failed\n");
330 goto out;
331 }
332
333 rds_iw_recv_init_ring(ic);
334 rds_iw_recv_init_ack(ic);
335
336 /* Post receive buffers - as a side effect, this will update
337 * the posted credit count. */
338 rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1);
339
340 rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr,
341 ic->i_send_cq, ic->i_recv_cq);
342
343out:
344 return ret;
345}
346
347static u32 rds_iw_protocol_compatible(const struct rds_iw_connect_private *dp)
348{
349 u16 common;
350 u32 version = 0;
351
352 /* rdma_cm private data is odd - when there is any private data in the
353 * request, we will be given a pretty large buffer without telling us the
354 * original size. The only way to tell the difference is by looking at
355 * the contents, which are initialized to zero.
356 * If the protocol version fields aren't set, this is a connection attempt
357 * from an older version. This could could be 3.0 or 2.0 - we can't tell.
358 * We really should have changed this for OFED 1.3 :-( */
359 if (dp->dp_protocol_major == 0)
360 return RDS_PROTOCOL_3_0;
361
362 common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IW_SUPPORTED_PROTOCOLS;
363 if (dp->dp_protocol_major == 3 && common) {
364 version = RDS_PROTOCOL_3_0;
365 while ((common >>= 1) != 0)
366 version++;
367 } else if (printk_ratelimit()) {
368 printk(KERN_NOTICE "RDS: Connection from %pI4 using "
369 "incompatible protocol version %u.%u\n",
370 &dp->dp_saddr,
371 dp->dp_protocol_major,
372 dp->dp_protocol_minor);
373 }
374 return version;
375}
376
377int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
378 struct rdma_cm_event *event)
379{
380 const struct rds_iw_connect_private *dp = event->param.conn.private_data;
381 struct rds_iw_connect_private dp_rep;
382 struct rds_connection *conn = NULL;
383 struct rds_iw_connection *ic = NULL;
384 struct rdma_conn_param conn_param;
385 struct rds_iw_device *rds_iwdev;
386 u32 version;
387 int err, destroy = 1;
388
389 /* Check whether the remote protocol version matches ours. */
390 version = rds_iw_protocol_compatible(dp);
391 if (!version)
392 goto out;
393
394 rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u\n",
395 &dp->dp_saddr, &dp->dp_daddr,
396 RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version));
397
398 conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_iw_transport,
399 GFP_KERNEL);
400 if (IS_ERR(conn)) {
401 rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
402 conn = NULL;
403 goto out;
404 }
405
406 /*
407 * The connection request may occur while the
408 * previous connection exist, e.g. in case of failover.
409 * But as connections may be initiated simultaneously
410 * by both hosts, we have a random backoff mechanism -
411 * see the comment above rds_queue_reconnect()
412 */
413 mutex_lock(&conn->c_cm_lock);
414 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
415 if (rds_conn_state(conn) == RDS_CONN_UP) {
416 rdsdebug("incoming connect while connecting\n");
417 rds_conn_drop(conn);
418 rds_iw_stats_inc(s_iw_listen_closed_stale);
419 } else
420 if (rds_conn_state(conn) == RDS_CONN_CONNECTING) {
421 /* Wait and see - our connect may still be succeeding */
422 rds_iw_stats_inc(s_iw_connect_raced);
423 }
424 mutex_unlock(&conn->c_cm_lock);
425 goto out;
426 }
427
428 ic = conn->c_transport_data;
429
430 rds_iw_set_protocol(conn, version);
431 rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit));
432
433 /* If the peer gave us the last packet it saw, process this as if
434 * we had received a regular ACK. */
435 if (dp->dp_ack_seq)
436 rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL);
437
438 BUG_ON(cm_id->context);
439 BUG_ON(ic->i_cm_id);
440
441 ic->i_cm_id = cm_id;
442 cm_id->context = conn;
443
444 rds_iwdev = ib_get_client_data(cm_id->device, &rds_iw_client);
445 ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey;
446
447 /* We got halfway through setting up the ib_connection, if we
448 * fail now, we have to take the long route out of this mess. */
449 destroy = 0;
450
451 err = rds_iw_setup_qp(conn);
452 if (err) {
453 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err);
454 goto out;
455 }
456
457 rds_iw_cm_fill_conn_param(conn, &conn_param, &dp_rep, version);
458
459 /* rdma_accept() calls rdma_reject() internally if it fails */
460 err = rdma_accept(cm_id, &conn_param);
461 mutex_unlock(&conn->c_cm_lock);
462 if (err) {
463 rds_iw_conn_error(conn, "rdma_accept failed (%d)\n", err);
464 goto out;
465 }
466
467 return 0;
468
469out:
470 rdma_reject(cm_id, NULL, 0);
471 return destroy;
472}
473
474
475int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id)
476{
477 struct rds_connection *conn = cm_id->context;
478 struct rds_iw_connection *ic = conn->c_transport_data;
479 struct rdma_conn_param conn_param;
480 struct rds_iw_connect_private dp;
481 int ret;
482
483 /* If the peer doesn't do protocol negotiation, we must
484 * default to RDSv3.0 */
485 rds_iw_set_protocol(conn, RDS_PROTOCOL_3_0);
486 ic->i_flowctl = rds_iw_sysctl_flow_control; /* advertise flow control */
487
488 ret = rds_iw_setup_qp(conn);
489 if (ret) {
490 rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", ret);
491 goto out;
492 }
493
494 rds_iw_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION);
495
496 ret = rdma_connect(cm_id, &conn_param);
497 if (ret)
498 rds_iw_conn_error(conn, "rdma_connect failed (%d)\n", ret);
499
500out:
501 /* Beware - returning non-zero tells the rdma_cm to destroy
502 * the cm_id. We should certainly not do it as long as we still
503 * "own" the cm_id. */
504 if (ret) {
505 struct rds_iw_connection *ic = conn->c_transport_data;
506
507 if (ic->i_cm_id == cm_id)
508 ret = 0;
509 }
510 return ret;
511}
512
513int rds_iw_conn_connect(struct rds_connection *conn)
514{
515 struct rds_iw_connection *ic = conn->c_transport_data;
516 struct rds_iw_device *rds_iwdev;
517 struct sockaddr_in src, dest;
518 int ret;
519
520 /* XXX I wonder what affect the port space has */
521 /* delegate cm event handler to rdma_transport */
522 ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
523 RDMA_PS_TCP);
524 if (IS_ERR(ic->i_cm_id)) {
525 ret = PTR_ERR(ic->i_cm_id);
526 ic->i_cm_id = NULL;
527 rdsdebug("rdma_create_id() failed: %d\n", ret);
528 goto out;
529 }
530
531 rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn);
532
533 src.sin_family = AF_INET;
534 src.sin_addr.s_addr = (__force u32)conn->c_laddr;
535 src.sin_port = (__force u16)htons(0);
536
537 /* First, bind to the local address and device. */
538 ret = rdma_bind_addr(ic->i_cm_id, (struct sockaddr *) &src);
539 if (ret) {
540 rdsdebug("rdma_bind_addr(%pI4) failed: %d\n",
541 &conn->c_laddr, ret);
542 rdma_destroy_id(ic->i_cm_id);
543 ic->i_cm_id = NULL;
544 goto out;
545 }
546
547 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
548 ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey;
549
550 dest.sin_family = AF_INET;
551 dest.sin_addr.s_addr = (__force u32)conn->c_faddr;
552 dest.sin_port = (__force u16)htons(RDS_PORT);
553
554 ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src,
555 (struct sockaddr *)&dest,
556 RDS_RDMA_RESOLVE_TIMEOUT_MS);
557 if (ret) {
558 rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id,
559 ret);
560 rdma_destroy_id(ic->i_cm_id);
561 ic->i_cm_id = NULL;
562 }
563
564out:
565 return ret;
566}
567
568/*
569 * This is so careful about only cleaning up resources that were built up
570 * so that it can be called at any point during startup. In fact it
571 * can be called multiple times for a given connection.
572 */
573void rds_iw_conn_shutdown(struct rds_connection *conn)
574{
575 struct rds_iw_connection *ic = conn->c_transport_data;
576 int err = 0;
577 struct ib_qp_attr qp_attr;
578
579 rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id,
580 ic->i_pd, ic->i_send_cq, ic->i_recv_cq,
581 ic->i_cm_id ? ic->i_cm_id->qp : NULL);
582
583 if (ic->i_cm_id) {
584 struct ib_device *dev = ic->i_cm_id->device;
585
586 rdsdebug("disconnecting cm %p\n", ic->i_cm_id);
587 err = rdma_disconnect(ic->i_cm_id);
588 if (err) {
589 /* Actually this may happen quite frequently, when
590 * an outgoing connect raced with an incoming connect.
591 */
592 rdsdebug("rds_iw_conn_shutdown: failed to disconnect,"
593 " cm: %p err %d\n", ic->i_cm_id, err);
594 }
595
596 if (ic->i_cm_id->qp) {
597 qp_attr.qp_state = IB_QPS_ERR;
598 ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE);
599 }
600
601 wait_event(rds_iw_ring_empty_wait,
602 rds_iw_ring_empty(&ic->i_send_ring) &&
603 rds_iw_ring_empty(&ic->i_recv_ring));
604
605 if (ic->i_send_hdrs)
606 ib_dma_free_coherent(dev,
607 ic->i_send_ring.w_nr *
608 sizeof(struct rds_header),
609 ic->i_send_hdrs,
610 ic->i_send_hdrs_dma);
611
612 if (ic->i_recv_hdrs)
613 ib_dma_free_coherent(dev,
614 ic->i_recv_ring.w_nr *
615 sizeof(struct rds_header),
616 ic->i_recv_hdrs,
617 ic->i_recv_hdrs_dma);
618
619 if (ic->i_ack)
620 ib_dma_free_coherent(dev, sizeof(struct rds_header),
621 ic->i_ack, ic->i_ack_dma);
622
623 if (ic->i_sends)
624 rds_iw_send_clear_ring(ic);
625 if (ic->i_recvs)
626 rds_iw_recv_clear_ring(ic);
627
628 if (ic->i_cm_id->qp)
629 rdma_destroy_qp(ic->i_cm_id);
630 if (ic->i_send_cq)
631 ib_destroy_cq(ic->i_send_cq);
632 if (ic->i_recv_cq)
633 ib_destroy_cq(ic->i_recv_cq);
634
635 /*
636 * If associated with an rds_iw_device:
637 * Move connection back to the nodev list.
638 * Remove cm_id from the device cm_id list.
639 */
640 if (ic->rds_iwdev) {
641
642 spin_lock_irq(&ic->rds_iwdev->spinlock);
643 BUG_ON(list_empty(&ic->iw_node));
644 list_del(&ic->iw_node);
645 spin_unlock_irq(&ic->rds_iwdev->spinlock);
646
647 spin_lock_irq(&iw_nodev_conns_lock);
648 list_add_tail(&ic->iw_node, &iw_nodev_conns);
649 spin_unlock_irq(&iw_nodev_conns_lock);
650 rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id);
651 ic->rds_iwdev = NULL;
652 }
653
654 rdma_destroy_id(ic->i_cm_id);
655
656 ic->i_cm_id = NULL;
657 ic->i_pd = NULL;
658 ic->i_mr = NULL;
659 ic->i_send_cq = NULL;
660 ic->i_recv_cq = NULL;
661 ic->i_send_hdrs = NULL;
662 ic->i_recv_hdrs = NULL;
663 ic->i_ack = NULL;
664 }
665 BUG_ON(ic->rds_iwdev);
666
667 /* Clear pending transmit */
668 if (ic->i_rm) {
669 rds_message_put(ic->i_rm);
670 ic->i_rm = NULL;
671 }
672
673 /* Clear the ACK state */
674 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
675 rds_iw_set_64bit(&ic->i_ack_next, 0);
676 ic->i_ack_recv = 0;
677
678 /* Clear flow control state */
679 ic->i_flowctl = 0;
680 atomic_set(&ic->i_credits, 0);
681
682 rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr);
683 rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr);
684
685 if (ic->i_iwinc) {
686 rds_inc_put(&ic->i_iwinc->ii_inc);
687 ic->i_iwinc = NULL;
688 }
689
690 vfree(ic->i_sends);
691 ic->i_sends = NULL;
692 vfree(ic->i_recvs);
693 ic->i_recvs = NULL;
694 rdsdebug("shutdown complete\n");
695}
696
697int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
698{
699 struct rds_iw_connection *ic;
700 unsigned long flags;
701
702 /* XXX too lazy? */
703 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL);
704 if (ic == NULL)
705 return -ENOMEM;
706
707 INIT_LIST_HEAD(&ic->iw_node);
708 mutex_init(&ic->i_recv_mutex);
709
710 /*
711 * rds_iw_conn_shutdown() waits for these to be emptied so they
712 * must be initialized before it can be called.
713 */
714 rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr);
715 rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr);
716
717 ic->conn = conn;
718 conn->c_transport_data = ic;
719
720 spin_lock_irqsave(&iw_nodev_conns_lock, flags);
721 list_add_tail(&ic->iw_node, &iw_nodev_conns);
722 spin_unlock_irqrestore(&iw_nodev_conns_lock, flags);
723
724
725 rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data);
726 return 0;
727}
728
729void rds_iw_conn_free(void *arg)
730{
731 struct rds_iw_connection *ic = arg;
732 rdsdebug("ic %p\n", ic);
733 list_del(&ic->iw_node);
734 kfree(ic);
735}
736
737/*
738 * An error occurred on the connection
739 */
740void
741__rds_iw_conn_error(struct rds_connection *conn, const char *fmt, ...)
742{
743 va_list ap;
744
745 rds_conn_drop(conn);
746
747 va_start(ap, fmt);
748 vprintk(fmt, ap);
749 va_end(ap);
750}
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
new file mode 100644
index 000000000000..1c02a8f952d0
--- /dev/null
+++ b/net/rds/iw_rdma.c
@@ -0,0 +1,888 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34
35#include "rds.h"
36#include "rdma.h"
37#include "iw.h"
38
39
40/*
41 * This is stored as mr->r_trans_private.
42 */
43struct rds_iw_mr {
44 struct rds_iw_device *device;
45 struct rds_iw_mr_pool *pool;
46 struct rdma_cm_id *cm_id;
47
48 struct ib_mr *mr;
49 struct ib_fast_reg_page_list *page_list;
50
51 struct rds_iw_mapping mapping;
52 unsigned char remap_count;
53};
54
55/*
56 * Our own little MR pool
57 */
58struct rds_iw_mr_pool {
59 struct rds_iw_device *device; /* back ptr to the device that owns us */
60
61 struct mutex flush_lock; /* serialize fmr invalidate */
62 struct work_struct flush_worker; /* flush worker */
63
64 spinlock_t list_lock; /* protect variables below */
65 atomic_t item_count; /* total # of MRs */
66 atomic_t dirty_count; /* # dirty of MRs */
67 struct list_head dirty_list; /* dirty mappings */
68 struct list_head clean_list; /* unused & unamapped MRs */
69 atomic_t free_pinned; /* memory pinned by free MRs */
70 unsigned long max_message_size; /* in pages */
71 unsigned long max_items;
72 unsigned long max_items_soft;
73 unsigned long max_free_pinned;
74 int max_pages;
75};
76
77static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all);
78static void rds_iw_mr_pool_flush_worker(struct work_struct *work);
79static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
80static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
81 struct rds_iw_mr *ibmr,
82 struct scatterlist *sg, unsigned int nents);
83static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
84static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
85 struct list_head *unmap_list,
86 struct list_head *kill_list);
87static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr);
88
89static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id)
90{
91 struct rds_iw_device *iwdev;
92 struct rds_iw_cm_id *i_cm_id;
93
94 *rds_iwdev = NULL;
95 *cm_id = NULL;
96
97 list_for_each_entry(iwdev, &rds_iw_devices, list) {
98 spin_lock_irq(&iwdev->spinlock);
99 list_for_each_entry(i_cm_id, &iwdev->cm_id_list, list) {
100 struct sockaddr_in *src_addr, *dst_addr;
101
102 src_addr = (struct sockaddr_in *)&i_cm_id->cm_id->route.addr.src_addr;
103 dst_addr = (struct sockaddr_in *)&i_cm_id->cm_id->route.addr.dst_addr;
104
105 rdsdebug("local ipaddr = %x port %d, "
106 "remote ipaddr = %x port %d"
107 "..looking for %x port %d, "
108 "remote ipaddr = %x port %d\n",
109 src_addr->sin_addr.s_addr,
110 src_addr->sin_port,
111 dst_addr->sin_addr.s_addr,
112 dst_addr->sin_port,
113 rs->rs_bound_addr,
114 rs->rs_bound_port,
115 rs->rs_conn_addr,
116 rs->rs_conn_port);
117#ifdef WORKING_TUPLE_DETECTION
118 if (src_addr->sin_addr.s_addr == rs->rs_bound_addr &&
119 src_addr->sin_port == rs->rs_bound_port &&
120 dst_addr->sin_addr.s_addr == rs->rs_conn_addr &&
121 dst_addr->sin_port == rs->rs_conn_port) {
122#else
123 /* FIXME - needs to compare the local and remote
124 * ipaddr/port tuple, but the ipaddr is the only
125 * available infomation in the rds_sock (as the rest are
126 * zero'ed. It doesn't appear to be properly populated
127 * during connection setup...
128 */
129 if (src_addr->sin_addr.s_addr == rs->rs_bound_addr) {
130#endif
131 spin_unlock_irq(&iwdev->spinlock);
132 *rds_iwdev = iwdev;
133 *cm_id = i_cm_id->cm_id;
134 return 0;
135 }
136 }
137 spin_unlock_irq(&iwdev->spinlock);
138 }
139
140 return 1;
141}
142
143static int rds_iw_add_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id)
144{
145 struct rds_iw_cm_id *i_cm_id;
146
147 i_cm_id = kmalloc(sizeof *i_cm_id, GFP_KERNEL);
148 if (!i_cm_id)
149 return -ENOMEM;
150
151 i_cm_id->cm_id = cm_id;
152
153 spin_lock_irq(&rds_iwdev->spinlock);
154 list_add_tail(&i_cm_id->list, &rds_iwdev->cm_id_list);
155 spin_unlock_irq(&rds_iwdev->spinlock);
156
157 return 0;
158}
159
160void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id)
161{
162 struct rds_iw_cm_id *i_cm_id;
163
164 spin_lock_irq(&rds_iwdev->spinlock);
165 list_for_each_entry(i_cm_id, &rds_iwdev->cm_id_list, list) {
166 if (i_cm_id->cm_id == cm_id) {
167 list_del(&i_cm_id->list);
168 kfree(i_cm_id);
169 break;
170 }
171 }
172 spin_unlock_irq(&rds_iwdev->spinlock);
173}
174
175
176int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id)
177{
178 struct sockaddr_in *src_addr, *dst_addr;
179 struct rds_iw_device *rds_iwdev_old;
180 struct rds_sock rs;
181 struct rdma_cm_id *pcm_id;
182 int rc;
183
184 src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr;
185 dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr;
186
187 rs.rs_bound_addr = src_addr->sin_addr.s_addr;
188 rs.rs_bound_port = src_addr->sin_port;
189 rs.rs_conn_addr = dst_addr->sin_addr.s_addr;
190 rs.rs_conn_port = dst_addr->sin_port;
191
192 rc = rds_iw_get_device(&rs, &rds_iwdev_old, &pcm_id);
193 if (rc)
194 rds_iw_remove_cm_id(rds_iwdev, cm_id);
195
196 return rds_iw_add_cm_id(rds_iwdev, cm_id);
197}
198
199int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn)
200{
201 struct rds_iw_connection *ic = conn->c_transport_data;
202
203 /* conn was previously on the nodev_conns_list */
204 spin_lock_irq(&iw_nodev_conns_lock);
205 BUG_ON(list_empty(&iw_nodev_conns));
206 BUG_ON(list_empty(&ic->iw_node));
207 list_del(&ic->iw_node);
208 spin_unlock_irq(&iw_nodev_conns_lock);
209
210 spin_lock_irq(&rds_iwdev->spinlock);
211 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
212 spin_unlock_irq(&rds_iwdev->spinlock);
213
214 ic->rds_iwdev = rds_iwdev;
215
216 return 0;
217}
218
219void rds_iw_remove_nodev_conns(void)
220{
221 struct rds_iw_connection *ic, *_ic;
222 LIST_HEAD(tmp_list);
223
224 /* avoid calling conn_destroy with irqs off */
225 spin_lock_irq(&iw_nodev_conns_lock);
226 list_splice(&iw_nodev_conns, &tmp_list);
227 INIT_LIST_HEAD(&iw_nodev_conns);
228 spin_unlock_irq(&iw_nodev_conns_lock);
229
230 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) {
231 if (ic->conn->c_passive)
232 rds_conn_destroy(ic->conn->c_passive);
233 rds_conn_destroy(ic->conn);
234 }
235}
236
237void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev)
238{
239 struct rds_iw_connection *ic, *_ic;
240 LIST_HEAD(tmp_list);
241
242 /* avoid calling conn_destroy with irqs off */
243 spin_lock_irq(&rds_iwdev->spinlock);
244 list_splice(&rds_iwdev->conn_list, &tmp_list);
245 INIT_LIST_HEAD(&rds_iwdev->conn_list);
246 spin_unlock_irq(&rds_iwdev->spinlock);
247
248 list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) {
249 if (ic->conn->c_passive)
250 rds_conn_destroy(ic->conn->c_passive);
251 rds_conn_destroy(ic->conn);
252 }
253}
254
255static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg,
256 struct scatterlist *list, unsigned int sg_len)
257{
258 sg->list = list;
259 sg->len = sg_len;
260 sg->dma_len = 0;
261 sg->dma_npages = 0;
262 sg->bytes = 0;
263}
264
265static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev,
266 struct rds_iw_scatterlist *sg,
267 unsigned int dma_page_shift)
268{
269 struct ib_device *dev = rds_iwdev->dev;
270 u64 *dma_pages = NULL;
271 u64 dma_mask;
272 unsigned int dma_page_size;
273 int i, j, ret;
274
275 dma_page_size = 1 << dma_page_shift;
276 dma_mask = dma_page_size - 1;
277
278 WARN_ON(sg->dma_len);
279
280 sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
281 if (unlikely(!sg->dma_len)) {
282 printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n");
283 return ERR_PTR(-EBUSY);
284 }
285
286 sg->bytes = 0;
287 sg->dma_npages = 0;
288
289 ret = -EINVAL;
290 for (i = 0; i < sg->dma_len; ++i) {
291 unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
292 u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
293 u64 end_addr;
294
295 sg->bytes += dma_len;
296
297 end_addr = dma_addr + dma_len;
298 if (dma_addr & dma_mask) {
299 if (i > 0)
300 goto out_unmap;
301 dma_addr &= ~dma_mask;
302 }
303 if (end_addr & dma_mask) {
304 if (i < sg->dma_len - 1)
305 goto out_unmap;
306 end_addr = (end_addr + dma_mask) & ~dma_mask;
307 }
308
309 sg->dma_npages += (end_addr - dma_addr) >> dma_page_shift;
310 }
311
312 /* Now gather the dma addrs into one list */
313 if (sg->dma_npages > fastreg_message_size)
314 goto out_unmap;
315
316 dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC);
317 if (!dma_pages) {
318 ret = -ENOMEM;
319 goto out_unmap;
320 }
321
322 for (i = j = 0; i < sg->dma_len; ++i) {
323 unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]);
324 u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]);
325 u64 end_addr;
326
327 end_addr = dma_addr + dma_len;
328 dma_addr &= ~dma_mask;
329 for (; dma_addr < end_addr; dma_addr += dma_page_size)
330 dma_pages[j++] = dma_addr;
331 BUG_ON(j > sg->dma_npages);
332 }
333
334 return dma_pages;
335
336out_unmap:
337 ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL);
338 sg->dma_len = 0;
339 kfree(dma_pages);
340 return ERR_PTR(ret);
341}
342
343
344struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *rds_iwdev)
345{
346 struct rds_iw_mr_pool *pool;
347
348 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
349 if (!pool) {
350 printk(KERN_WARNING "RDS/IW: rds_iw_create_mr_pool alloc error\n");
351 return ERR_PTR(-ENOMEM);
352 }
353
354 pool->device = rds_iwdev;
355 INIT_LIST_HEAD(&pool->dirty_list);
356 INIT_LIST_HEAD(&pool->clean_list);
357 mutex_init(&pool->flush_lock);
358 spin_lock_init(&pool->list_lock);
359 INIT_WORK(&pool->flush_worker, rds_iw_mr_pool_flush_worker);
360
361 pool->max_message_size = fastreg_message_size;
362 pool->max_items = fastreg_pool_size;
363 pool->max_free_pinned = pool->max_items * pool->max_message_size / 4;
364 pool->max_pages = fastreg_message_size;
365
366 /* We never allow more than max_items MRs to be allocated.
367 * When we exceed more than max_items_soft, we start freeing
368 * items more aggressively.
369 * Make sure that max_items > max_items_soft > max_items / 2
370 */
371 pool->max_items_soft = pool->max_items * 3 / 4;
372
373 return pool;
374}
375
376void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo)
377{
378 struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool;
379
380 iinfo->rdma_mr_max = pool->max_items;
381 iinfo->rdma_mr_size = pool->max_pages;
382}
383
384void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *pool)
385{
386 flush_workqueue(rds_wq);
387 rds_iw_flush_mr_pool(pool, 1);
388 BUG_ON(atomic_read(&pool->item_count));
389 BUG_ON(atomic_read(&pool->free_pinned));
390 kfree(pool);
391}
392
393static inline struct rds_iw_mr *rds_iw_reuse_fmr(struct rds_iw_mr_pool *pool)
394{
395 struct rds_iw_mr *ibmr = NULL;
396 unsigned long flags;
397
398 spin_lock_irqsave(&pool->list_lock, flags);
399 if (!list_empty(&pool->clean_list)) {
400 ibmr = list_entry(pool->clean_list.next, struct rds_iw_mr, mapping.m_list);
401 list_del_init(&ibmr->mapping.m_list);
402 }
403 spin_unlock_irqrestore(&pool->list_lock, flags);
404
405 return ibmr;
406}
407
408static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev)
409{
410 struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool;
411 struct rds_iw_mr *ibmr = NULL;
412 int err = 0, iter = 0;
413
414 while (1) {
415 ibmr = rds_iw_reuse_fmr(pool);
416 if (ibmr)
417 return ibmr;
418
419 /* No clean MRs - now we have the choice of either
420 * allocating a fresh MR up to the limit imposed by the
421 * driver, or flush any dirty unused MRs.
422 * We try to avoid stalling in the send path if possible,
423 * so we allocate as long as we're allowed to.
424 *
425 * We're fussy with enforcing the FMR limit, though. If the driver
426 * tells us we can't use more than N fmrs, we shouldn't start
427 * arguing with it */
428 if (atomic_inc_return(&pool->item_count) <= pool->max_items)
429 break;
430
431 atomic_dec(&pool->item_count);
432
433 if (++iter > 2) {
434 rds_iw_stats_inc(s_iw_rdma_mr_pool_depleted);
435 return ERR_PTR(-EAGAIN);
436 }
437
438 /* We do have some empty MRs. Flush them out. */
439 rds_iw_stats_inc(s_iw_rdma_mr_pool_wait);
440 rds_iw_flush_mr_pool(pool, 0);
441 }
442
443 ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL);
444 if (!ibmr) {
445 err = -ENOMEM;
446 goto out_no_cigar;
447 }
448
449 spin_lock_init(&ibmr->mapping.m_lock);
450 INIT_LIST_HEAD(&ibmr->mapping.m_list);
451 ibmr->mapping.m_mr = ibmr;
452
453 err = rds_iw_init_fastreg(pool, ibmr);
454 if (err)
455 goto out_no_cigar;
456
457 rds_iw_stats_inc(s_iw_rdma_mr_alloc);
458 return ibmr;
459
460out_no_cigar:
461 if (ibmr) {
462 rds_iw_destroy_fastreg(pool, ibmr);
463 kfree(ibmr);
464 }
465 atomic_dec(&pool->item_count);
466 return ERR_PTR(err);
467}
468
469void rds_iw_sync_mr(void *trans_private, int direction)
470{
471 struct rds_iw_mr *ibmr = trans_private;
472 struct rds_iw_device *rds_iwdev = ibmr->device;
473
474 switch (direction) {
475 case DMA_FROM_DEVICE:
476 ib_dma_sync_sg_for_cpu(rds_iwdev->dev, ibmr->mapping.m_sg.list,
477 ibmr->mapping.m_sg.dma_len, DMA_BIDIRECTIONAL);
478 break;
479 case DMA_TO_DEVICE:
480 ib_dma_sync_sg_for_device(rds_iwdev->dev, ibmr->mapping.m_sg.list,
481 ibmr->mapping.m_sg.dma_len, DMA_BIDIRECTIONAL);
482 break;
483 }
484}
485
486static inline unsigned int rds_iw_flush_goal(struct rds_iw_mr_pool *pool, int free_all)
487{
488 unsigned int item_count;
489
490 item_count = atomic_read(&pool->item_count);
491 if (free_all)
492 return item_count;
493
494 return 0;
495}
496
497/*
498 * Flush our pool of MRs.
499 * At a minimum, all currently unused MRs are unmapped.
500 * If the number of MRs allocated exceeds the limit, we also try
501 * to free as many MRs as needed to get back to this limit.
502 */
503static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all)
504{
505 struct rds_iw_mr *ibmr, *next;
506 LIST_HEAD(unmap_list);
507 LIST_HEAD(kill_list);
508 unsigned long flags;
509 unsigned int nfreed = 0, ncleaned = 0, free_goal;
510 int ret = 0;
511
512 rds_iw_stats_inc(s_iw_rdma_mr_pool_flush);
513
514 mutex_lock(&pool->flush_lock);
515
516 spin_lock_irqsave(&pool->list_lock, flags);
517 /* Get the list of all mappings to be destroyed */
518 list_splice_init(&pool->dirty_list, &unmap_list);
519 if (free_all)
520 list_splice_init(&pool->clean_list, &kill_list);
521 spin_unlock_irqrestore(&pool->list_lock, flags);
522
523 free_goal = rds_iw_flush_goal(pool, free_all);
524
525 /* Batched invalidate of dirty MRs.
526 * For FMR based MRs, the mappings on the unmap list are
527 * actually members of an ibmr (ibmr->mapping). They either
528 * migrate to the kill_list, or have been cleaned and should be
529 * moved to the clean_list.
530 * For fastregs, they will be dynamically allocated, and
531 * will be destroyed by the unmap function.
532 */
533 if (!list_empty(&unmap_list)) {
534 ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, &kill_list);
535 /* If we've been asked to destroy all MRs, move those
536 * that were simply cleaned to the kill list */
537 if (free_all)
538 list_splice_init(&unmap_list, &kill_list);
539 }
540
541 /* Destroy any MRs that are past their best before date */
542 list_for_each_entry_safe(ibmr, next, &kill_list, mapping.m_list) {
543 rds_iw_stats_inc(s_iw_rdma_mr_free);
544 list_del(&ibmr->mapping.m_list);
545 rds_iw_destroy_fastreg(pool, ibmr);
546 kfree(ibmr);
547 nfreed++;
548 }
549
550 /* Anything that remains are laundered ibmrs, which we can add
551 * back to the clean list. */
552 if (!list_empty(&unmap_list)) {
553 spin_lock_irqsave(&pool->list_lock, flags);
554 list_splice(&unmap_list, &pool->clean_list);
555 spin_unlock_irqrestore(&pool->list_lock, flags);
556 }
557
558 atomic_sub(ncleaned, &pool->dirty_count);
559 atomic_sub(nfreed, &pool->item_count);
560
561 mutex_unlock(&pool->flush_lock);
562 return ret;
563}
564
565static void rds_iw_mr_pool_flush_worker(struct work_struct *work)
566{
567 struct rds_iw_mr_pool *pool = container_of(work, struct rds_iw_mr_pool, flush_worker);
568
569 rds_iw_flush_mr_pool(pool, 0);
570}
571
572void rds_iw_free_mr(void *trans_private, int invalidate)
573{
574 struct rds_iw_mr *ibmr = trans_private;
575 struct rds_iw_mr_pool *pool = ibmr->device->mr_pool;
576
577 rdsdebug("RDS/IW: free_mr nents %u\n", ibmr->mapping.m_sg.len);
578 if (!pool)
579 return;
580
581 /* Return it to the pool's free list */
582 rds_iw_free_fastreg(pool, ibmr);
583
584 /* If we've pinned too many pages, request a flush */
585 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned
586 || atomic_read(&pool->dirty_count) >= pool->max_items / 10)
587 queue_work(rds_wq, &pool->flush_worker);
588
589 if (invalidate) {
590 if (likely(!in_interrupt())) {
591 rds_iw_flush_mr_pool(pool, 0);
592 } else {
593 /* We get here if the user created a MR marked
594 * as use_once and invalidate at the same time. */
595 queue_work(rds_wq, &pool->flush_worker);
596 }
597 }
598}
599
600void rds_iw_flush_mrs(void)
601{
602 struct rds_iw_device *rds_iwdev;
603
604 list_for_each_entry(rds_iwdev, &rds_iw_devices, list) {
605 struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool;
606
607 if (pool)
608 rds_iw_flush_mr_pool(pool, 0);
609 }
610}
611
612void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
613 struct rds_sock *rs, u32 *key_ret)
614{
615 struct rds_iw_device *rds_iwdev;
616 struct rds_iw_mr *ibmr = NULL;
617 struct rdma_cm_id *cm_id;
618 int ret;
619
620 ret = rds_iw_get_device(rs, &rds_iwdev, &cm_id);
621 if (ret || !cm_id) {
622 ret = -ENODEV;
623 goto out;
624 }
625
626 if (!rds_iwdev->mr_pool) {
627 ret = -ENODEV;
628 goto out;
629 }
630
631 ibmr = rds_iw_alloc_mr(rds_iwdev);
632 if (IS_ERR(ibmr))
633 return ibmr;
634
635 ibmr->cm_id = cm_id;
636 ibmr->device = rds_iwdev;
637
638 ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents);
639 if (ret == 0)
640 *key_ret = ibmr->mr->rkey;
641 else
642 printk(KERN_WARNING "RDS/IW: failed to map mr (errno=%d)\n", ret);
643
644out:
645 if (ret) {
646 if (ibmr)
647 rds_iw_free_mr(ibmr, 0);
648 ibmr = ERR_PTR(ret);
649 }
650 return ibmr;
651}
652
653/*
654 * iWARP fastreg handling
655 *
656 * The life cycle of a fastreg registration is a bit different from
657 * FMRs.
658 * The idea behind fastreg is to have one MR, to which we bind different
659 * mappings over time. To avoid stalling on the expensive map and invalidate
660 * operations, these operations are pipelined on the same send queue on
661 * which we want to send the message containing the r_key.
662 *
663 * This creates a bit of a problem for us, as we do not have the destination
664 * IP in GET_MR, so the connection must be setup prior to the GET_MR call for
665 * RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit
666 * will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request
667 * before queuing the SEND. When completions for these arrive, they are
668 * dispatched to the MR has a bit set showing that RDMa can be performed.
669 *
670 * There is another interesting aspect that's related to invalidation.
671 * The application can request that a mapping is invalidated in FREE_MR.
672 * The expectation there is that this invalidation step includes ALL
673 * PREVIOUSLY FREED MRs.
674 */
675static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool,
676 struct rds_iw_mr *ibmr)
677{
678 struct rds_iw_device *rds_iwdev = pool->device;
679 struct ib_fast_reg_page_list *page_list = NULL;
680 struct ib_mr *mr;
681 int err;
682
683 mr = ib_alloc_fast_reg_mr(rds_iwdev->pd, pool->max_message_size);
684 if (IS_ERR(mr)) {
685 err = PTR_ERR(mr);
686
687 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed (err=%d)\n", err);
688 return err;
689 }
690
691 /* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages
692 * is not filled in.
693 */
694 page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size);
695 if (IS_ERR(page_list)) {
696 err = PTR_ERR(page_list);
697
698 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err);
699 ib_dereg_mr(mr);
700 return err;
701 }
702
703 ibmr->page_list = page_list;
704 ibmr->mr = mr;
705 return 0;
706}
707
708static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping)
709{
710 struct rds_iw_mr *ibmr = mapping->m_mr;
711 struct ib_send_wr f_wr, *failed_wr;
712 int ret;
713
714 /*
715 * Perform a WR for the fast_reg_mr. Each individual page
716 * in the sg list is added to the fast reg page list and placed
717 * inside the fast_reg_mr WR. The key used is a rolling 8bit
718 * counter, which should guarantee uniqueness.
719 */
720 ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++);
721 mapping->m_rkey = ibmr->mr->rkey;
722
723 memset(&f_wr, 0, sizeof(f_wr));
724 f_wr.wr_id = RDS_IW_FAST_REG_WR_ID;
725 f_wr.opcode = IB_WR_FAST_REG_MR;
726 f_wr.wr.fast_reg.length = mapping->m_sg.bytes;
727 f_wr.wr.fast_reg.rkey = mapping->m_rkey;
728 f_wr.wr.fast_reg.page_list = ibmr->page_list;
729 f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len;
730 f_wr.wr.fast_reg.page_shift = ibmr->device->page_shift;
731 f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE |
732 IB_ACCESS_REMOTE_READ |
733 IB_ACCESS_REMOTE_WRITE;
734 f_wr.wr.fast_reg.iova_start = 0;
735 f_wr.send_flags = IB_SEND_SIGNALED;
736
737 failed_wr = &f_wr;
738 ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr);
739 BUG_ON(failed_wr != &f_wr);
740 if (ret && printk_ratelimit())
741 printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
742 __func__, __LINE__, ret);
743 return ret;
744}
745
746static int rds_iw_rdma_fastreg_inv(struct rds_iw_mr *ibmr)
747{
748 struct ib_send_wr s_wr, *failed_wr;
749 int ret = 0;
750
751 if (!ibmr->cm_id->qp || !ibmr->mr)
752 goto out;
753
754 memset(&s_wr, 0, sizeof(s_wr));
755 s_wr.wr_id = RDS_IW_LOCAL_INV_WR_ID;
756 s_wr.opcode = IB_WR_LOCAL_INV;
757 s_wr.ex.invalidate_rkey = ibmr->mr->rkey;
758 s_wr.send_flags = IB_SEND_SIGNALED;
759
760 failed_wr = &s_wr;
761 ret = ib_post_send(ibmr->cm_id->qp, &s_wr, &failed_wr);
762 if (ret && printk_ratelimit()) {
763 printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n",
764 __func__, __LINE__, ret);
765 goto out;
766 }
767out:
768 return ret;
769}
770
771static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool,
772 struct rds_iw_mr *ibmr,
773 struct scatterlist *sg,
774 unsigned int sg_len)
775{
776 struct rds_iw_device *rds_iwdev = pool->device;
777 struct rds_iw_mapping *mapping = &ibmr->mapping;
778 u64 *dma_pages;
779 int i, ret = 0;
780
781 rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len);
782
783 dma_pages = rds_iw_map_scatterlist(rds_iwdev,
784 &mapping->m_sg,
785 rds_iwdev->page_shift);
786 if (IS_ERR(dma_pages)) {
787 ret = PTR_ERR(dma_pages);
788 dma_pages = NULL;
789 goto out;
790 }
791
792 if (mapping->m_sg.dma_len > pool->max_message_size) {
793 ret = -EMSGSIZE;
794 goto out;
795 }
796
797 for (i = 0; i < mapping->m_sg.dma_npages; ++i)
798 ibmr->page_list->page_list[i] = dma_pages[i];
799
800 ret = rds_iw_rdma_build_fastreg(mapping);
801 if (ret)
802 goto out;
803
804 rds_iw_stats_inc(s_iw_rdma_mr_used);
805
806out:
807 kfree(dma_pages);
808
809 return ret;
810}
811
812/*
813 * "Free" a fastreg MR.
814 */
815static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool,
816 struct rds_iw_mr *ibmr)
817{
818 unsigned long flags;
819 int ret;
820
821 if (!ibmr->mapping.m_sg.dma_len)
822 return;
823
824 ret = rds_iw_rdma_fastreg_inv(ibmr);
825 if (ret)
826 return;
827
828 /* Try to post the LOCAL_INV WR to the queue. */
829 spin_lock_irqsave(&pool->list_lock, flags);
830
831 list_add_tail(&ibmr->mapping.m_list, &pool->dirty_list);
832 atomic_add(ibmr->mapping.m_sg.len, &pool->free_pinned);
833 atomic_inc(&pool->dirty_count);
834
835 spin_unlock_irqrestore(&pool->list_lock, flags);
836}
837
838static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool,
839 struct list_head *unmap_list,
840 struct list_head *kill_list)
841{
842 struct rds_iw_mapping *mapping, *next;
843 unsigned int ncleaned = 0;
844 LIST_HEAD(laundered);
845
846 /* Batched invalidation of fastreg MRs.
847 * Why do we do it this way, even though we could pipeline unmap
848 * and remap? The reason is the application semantics - when the
849 * application requests an invalidation of MRs, it expects all
850 * previously released R_Keys to become invalid.
851 *
852 * If we implement MR reuse naively, we risk memory corruption
853 * (this has actually been observed). So the default behavior
854 * requires that a MR goes through an explicit unmap operation before
855 * we can reuse it again.
856 *
857 * We could probably improve on this a little, by allowing immediate
858 * reuse of a MR on the same socket (eg you could add small
859 * cache of unused MRs to strct rds_socket - GET_MR could grab one
860 * of these without requiring an explicit invalidate).
861 */
862 while (!list_empty(unmap_list)) {
863 unsigned long flags;
864
865 spin_lock_irqsave(&pool->list_lock, flags);
866 list_for_each_entry_safe(mapping, next, unmap_list, m_list) {
867 list_move(&mapping->m_list, &laundered);
868 ncleaned++;
869 }
870 spin_unlock_irqrestore(&pool->list_lock, flags);
871 }
872
873 /* Move all laundered mappings back to the unmap list.
874 * We do not kill any WRs right now - it doesn't seem the
875 * fastreg API has a max_remap limit. */
876 list_splice_init(&laundered, unmap_list);
877
878 return ncleaned;
879}
880
881static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool,
882 struct rds_iw_mr *ibmr)
883{
884 if (ibmr->page_list)
885 ib_free_fast_reg_page_list(ibmr->page_list);
886 if (ibmr->mr)
887 ib_dereg_mr(ibmr->mr);
888}
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
new file mode 100644
index 000000000000..a1931f0027a2
--- /dev/null
+++ b/net/rds/iw_recv.c
@@ -0,0 +1,869 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/pci.h>
35#include <linux/dma-mapping.h>
36#include <rdma/rdma_cm.h>
37
38#include "rds.h"
39#include "iw.h"
40
41static struct kmem_cache *rds_iw_incoming_slab;
42static struct kmem_cache *rds_iw_frag_slab;
43static atomic_t rds_iw_allocation = ATOMIC_INIT(0);
44
45static void rds_iw_frag_drop_page(struct rds_page_frag *frag)
46{
47 rdsdebug("frag %p page %p\n", frag, frag->f_page);
48 __free_page(frag->f_page);
49 frag->f_page = NULL;
50}
51
52static void rds_iw_frag_free(struct rds_page_frag *frag)
53{
54 rdsdebug("frag %p page %p\n", frag, frag->f_page);
55 BUG_ON(frag->f_page != NULL);
56 kmem_cache_free(rds_iw_frag_slab, frag);
57}
58
59/*
60 * We map a page at a time. Its fragments are posted in order. This
61 * is called in fragment order as the fragments get send completion events.
62 * Only the last frag in the page performs the unmapping.
63 *
64 * It's OK for ring cleanup to call this in whatever order it likes because
65 * DMA is not in flight and so we can unmap while other ring entries still
66 * hold page references in their frags.
67 */
68static void rds_iw_recv_unmap_page(struct rds_iw_connection *ic,
69 struct rds_iw_recv_work *recv)
70{
71 struct rds_page_frag *frag = recv->r_frag;
72
73 rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page);
74 if (frag->f_mapped)
75 ib_dma_unmap_page(ic->i_cm_id->device,
76 frag->f_mapped,
77 RDS_FRAG_SIZE, DMA_FROM_DEVICE);
78 frag->f_mapped = 0;
79}
80
81void rds_iw_recv_init_ring(struct rds_iw_connection *ic)
82{
83 struct rds_iw_recv_work *recv;
84 u32 i;
85
86 for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) {
87 struct ib_sge *sge;
88
89 recv->r_iwinc = NULL;
90 recv->r_frag = NULL;
91
92 recv->r_wr.next = NULL;
93 recv->r_wr.wr_id = i;
94 recv->r_wr.sg_list = recv->r_sge;
95 recv->r_wr.num_sge = RDS_IW_RECV_SGE;
96
97 sge = rds_iw_data_sge(ic, recv->r_sge);
98 sge->addr = 0;
99 sge->length = RDS_FRAG_SIZE;
100 sge->lkey = 0;
101
102 sge = rds_iw_header_sge(ic, recv->r_sge);
103 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
104 sge->length = sizeof(struct rds_header);
105 sge->lkey = 0;
106 }
107}
108
109static void rds_iw_recv_clear_one(struct rds_iw_connection *ic,
110 struct rds_iw_recv_work *recv)
111{
112 if (recv->r_iwinc) {
113 rds_inc_put(&recv->r_iwinc->ii_inc);
114 recv->r_iwinc = NULL;
115 }
116 if (recv->r_frag) {
117 rds_iw_recv_unmap_page(ic, recv);
118 if (recv->r_frag->f_page)
119 rds_iw_frag_drop_page(recv->r_frag);
120 rds_iw_frag_free(recv->r_frag);
121 recv->r_frag = NULL;
122 }
123}
124
125void rds_iw_recv_clear_ring(struct rds_iw_connection *ic)
126{
127 u32 i;
128
129 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
130 rds_iw_recv_clear_one(ic, &ic->i_recvs[i]);
131
132 if (ic->i_frag.f_page)
133 rds_iw_frag_drop_page(&ic->i_frag);
134}
135
136static int rds_iw_recv_refill_one(struct rds_connection *conn,
137 struct rds_iw_recv_work *recv,
138 gfp_t kptr_gfp, gfp_t page_gfp)
139{
140 struct rds_iw_connection *ic = conn->c_transport_data;
141 dma_addr_t dma_addr;
142 struct ib_sge *sge;
143 int ret = -ENOMEM;
144
145 if (recv->r_iwinc == NULL) {
146 if (atomic_read(&rds_iw_allocation) >= rds_iw_sysctl_max_recv_allocation) {
147 rds_iw_stats_inc(s_iw_rx_alloc_limit);
148 goto out;
149 }
150 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab,
151 kptr_gfp);
152 if (recv->r_iwinc == NULL)
153 goto out;
154 atomic_inc(&rds_iw_allocation);
155 INIT_LIST_HEAD(&recv->r_iwinc->ii_frags);
156 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr);
157 }
158
159 if (recv->r_frag == NULL) {
160 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp);
161 if (recv->r_frag == NULL)
162 goto out;
163 INIT_LIST_HEAD(&recv->r_frag->f_item);
164 recv->r_frag->f_page = NULL;
165 }
166
167 if (ic->i_frag.f_page == NULL) {
168 ic->i_frag.f_page = alloc_page(page_gfp);
169 if (ic->i_frag.f_page == NULL)
170 goto out;
171 ic->i_frag.f_offset = 0;
172 }
173
174 dma_addr = ib_dma_map_page(ic->i_cm_id->device,
175 ic->i_frag.f_page,
176 ic->i_frag.f_offset,
177 RDS_FRAG_SIZE,
178 DMA_FROM_DEVICE);
179 if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr))
180 goto out;
181
182 /*
183 * Once we get the RDS_PAGE_LAST_OFF frag then rds_iw_frag_unmap()
184 * must be called on this recv. This happens as completions hit
185 * in order or on connection shutdown.
186 */
187 recv->r_frag->f_page = ic->i_frag.f_page;
188 recv->r_frag->f_offset = ic->i_frag.f_offset;
189 recv->r_frag->f_mapped = dma_addr;
190
191 sge = rds_iw_data_sge(ic, recv->r_sge);
192 sge->addr = dma_addr;
193 sge->length = RDS_FRAG_SIZE;
194
195 sge = rds_iw_header_sge(ic, recv->r_sge);
196 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
197 sge->length = sizeof(struct rds_header);
198
199 get_page(recv->r_frag->f_page);
200
201 if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) {
202 ic->i_frag.f_offset += RDS_FRAG_SIZE;
203 } else {
204 put_page(ic->i_frag.f_page);
205 ic->i_frag.f_page = NULL;
206 ic->i_frag.f_offset = 0;
207 }
208
209 ret = 0;
210out:
211 return ret;
212}
213
214/*
215 * This tries to allocate and post unused work requests after making sure that
216 * they have all the allocations they need to queue received fragments into
217 * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc
218 * pairs don't go unmatched.
219 *
220 * -1 is returned if posting fails due to temporary resource exhaustion.
221 */
222int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
223 gfp_t page_gfp, int prefill)
224{
225 struct rds_iw_connection *ic = conn->c_transport_data;
226 struct rds_iw_recv_work *recv;
227 struct ib_recv_wr *failed_wr;
228 unsigned int posted = 0;
229 int ret = 0;
230 u32 pos;
231
232 while ((prefill || rds_conn_up(conn))
233 && rds_iw_ring_alloc(&ic->i_recv_ring, 1, &pos)) {
234 if (pos >= ic->i_recv_ring.w_nr) {
235 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
236 pos);
237 ret = -EINVAL;
238 break;
239 }
240
241 recv = &ic->i_recvs[pos];
242 ret = rds_iw_recv_refill_one(conn, recv, kptr_gfp, page_gfp);
243 if (ret) {
244 ret = -1;
245 break;
246 }
247
248 /* XXX when can this fail? */
249 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
250 rdsdebug("recv %p iwinc %p page %p addr %lu ret %d\n", recv,
251 recv->r_iwinc, recv->r_frag->f_page,
252 (long) recv->r_frag->f_mapped, ret);
253 if (ret) {
254 rds_iw_conn_error(conn, "recv post on "
255 "%pI4 returned %d, disconnecting and "
256 "reconnecting\n", &conn->c_faddr,
257 ret);
258 ret = -1;
259 break;
260 }
261
262 posted++;
263 }
264
265 /* We're doing flow control - update the window. */
266 if (ic->i_flowctl && posted)
267 rds_iw_advertise_credits(conn, posted);
268
269 if (ret)
270 rds_iw_ring_unalloc(&ic->i_recv_ring, 1);
271 return ret;
272}
273
274void rds_iw_inc_purge(struct rds_incoming *inc)
275{
276 struct rds_iw_incoming *iwinc;
277 struct rds_page_frag *frag;
278 struct rds_page_frag *pos;
279
280 iwinc = container_of(inc, struct rds_iw_incoming, ii_inc);
281 rdsdebug("purging iwinc %p inc %p\n", iwinc, inc);
282
283 list_for_each_entry_safe(frag, pos, &iwinc->ii_frags, f_item) {
284 list_del_init(&frag->f_item);
285 rds_iw_frag_drop_page(frag);
286 rds_iw_frag_free(frag);
287 }
288}
289
290void rds_iw_inc_free(struct rds_incoming *inc)
291{
292 struct rds_iw_incoming *iwinc;
293
294 iwinc = container_of(inc, struct rds_iw_incoming, ii_inc);
295
296 rds_iw_inc_purge(inc);
297 rdsdebug("freeing iwinc %p inc %p\n", iwinc, inc);
298 BUG_ON(!list_empty(&iwinc->ii_frags));
299 kmem_cache_free(rds_iw_incoming_slab, iwinc);
300 atomic_dec(&rds_iw_allocation);
301 BUG_ON(atomic_read(&rds_iw_allocation) < 0);
302}
303
304int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
305 size_t size)
306{
307 struct rds_iw_incoming *iwinc;
308 struct rds_page_frag *frag;
309 struct iovec *iov = first_iov;
310 unsigned long to_copy;
311 unsigned long frag_off = 0;
312 unsigned long iov_off = 0;
313 int copied = 0;
314 int ret;
315 u32 len;
316
317 iwinc = container_of(inc, struct rds_iw_incoming, ii_inc);
318 frag = list_entry(iwinc->ii_frags.next, struct rds_page_frag, f_item);
319 len = be32_to_cpu(inc->i_hdr.h_len);
320
321 while (copied < size && copied < len) {
322 if (frag_off == RDS_FRAG_SIZE) {
323 frag = list_entry(frag->f_item.next,
324 struct rds_page_frag, f_item);
325 frag_off = 0;
326 }
327 while (iov_off == iov->iov_len) {
328 iov_off = 0;
329 iov++;
330 }
331
332 to_copy = min(iov->iov_len - iov_off, RDS_FRAG_SIZE - frag_off);
333 to_copy = min_t(size_t, to_copy, size - copied);
334 to_copy = min_t(unsigned long, to_copy, len - copied);
335
336 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag "
337 "[%p, %lu] + %lu\n",
338 to_copy, iov->iov_base, iov->iov_len, iov_off,
339 frag->f_page, frag->f_offset, frag_off);
340
341 /* XXX needs + offset for multiple recvs per page */
342 ret = rds_page_copy_to_user(frag->f_page,
343 frag->f_offset + frag_off,
344 iov->iov_base + iov_off,
345 to_copy);
346 if (ret) {
347 copied = ret;
348 break;
349 }
350
351 iov_off += to_copy;
352 frag_off += to_copy;
353 copied += to_copy;
354 }
355
356 return copied;
357}
358
359/* ic starts out kzalloc()ed */
360void rds_iw_recv_init_ack(struct rds_iw_connection *ic)
361{
362 struct ib_send_wr *wr = &ic->i_ack_wr;
363 struct ib_sge *sge = &ic->i_ack_sge;
364
365 sge->addr = ic->i_ack_dma;
366 sge->length = sizeof(struct rds_header);
367 sge->lkey = rds_iw_local_dma_lkey(ic);
368
369 wr->sg_list = sge;
370 wr->num_sge = 1;
371 wr->opcode = IB_WR_SEND;
372 wr->wr_id = RDS_IW_ACK_WR_ID;
373 wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED;
374}
375
376/*
377 * You'd think that with reliable IB connections you wouldn't need to ack
378 * messages that have been received. The problem is that IB hardware generates
379 * an ack message before it has DMAed the message into memory. This creates a
380 * potential message loss if the HCA is disabled for any reason between when it
381 * sends the ack and before the message is DMAed and processed. This is only a
382 * potential issue if another HCA is available for fail-over.
383 *
384 * When the remote host receives our ack they'll free the sent message from
385 * their send queue. To decrease the latency of this we always send an ack
386 * immediately after we've received messages.
387 *
388 * For simplicity, we only have one ack in flight at a time. This puts
389 * pressure on senders to have deep enough send queues to absorb the latency of
390 * a single ack frame being in flight. This might not be good enough.
391 *
392 * This is implemented by have a long-lived send_wr and sge which point to a
393 * statically allocated ack frame. This ack wr does not fall under the ring
394 * accounting that the tx and rx wrs do. The QP attribute specifically makes
395 * room for it beyond the ring size. Send completion notices its special
396 * wr_id and avoids working with the ring in that case.
397 */
398static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq,
399 int ack_required)
400{
401 rds_iw_set_64bit(&ic->i_ack_next, seq);
402 if (ack_required) {
403 smp_mb__before_clear_bit();
404 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
405 }
406}
407
408static u64 rds_iw_get_ack(struct rds_iw_connection *ic)
409{
410 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
411 smp_mb__after_clear_bit();
412
413 return ic->i_ack_next;
414}
415
416static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits)
417{
418 struct rds_header *hdr = ic->i_ack;
419 struct ib_send_wr *failed_wr;
420 u64 seq;
421 int ret;
422
423 seq = rds_iw_get_ack(ic);
424
425 rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq);
426 rds_message_populate_header(hdr, 0, 0, 0);
427 hdr->h_ack = cpu_to_be64(seq);
428 hdr->h_credit = adv_credits;
429 rds_message_make_checksum(hdr);
430 ic->i_ack_queued = jiffies;
431
432 ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr);
433 if (unlikely(ret)) {
434 /* Failed to send. Release the WR, and
435 * force another ACK.
436 */
437 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
438 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
439
440 rds_iw_stats_inc(s_iw_ack_send_failure);
441 /* Need to finesse this later. */
442 BUG();
443 } else
444 rds_iw_stats_inc(s_iw_ack_sent);
445}
446
447/*
448 * There are 3 ways of getting acknowledgements to the peer:
449 * 1. We call rds_iw_attempt_ack from the recv completion handler
450 * to send an ACK-only frame.
451 * However, there can be only one such frame in the send queue
452 * at any time, so we may have to postpone it.
453 * 2. When another (data) packet is transmitted while there's
454 * an ACK in the queue, we piggyback the ACK sequence number
455 * on the data packet.
456 * 3. If the ACK WR is done sending, we get called from the
457 * send queue completion handler, and check whether there's
458 * another ACK pending (postponed because the WR was on the
459 * queue). If so, we transmit it.
460 *
461 * We maintain 2 variables:
462 * - i_ack_flags, which keeps track of whether the ACK WR
463 * is currently in the send queue or not (IB_ACK_IN_FLIGHT)
464 * - i_ack_next, which is the last sequence number we received
465 *
466 * Potentially, send queue and receive queue handlers can run concurrently.
467 *
468 * Reconnecting complicates this picture just slightly. When we
469 * reconnect, we may be seeing duplicate packets. The peer
470 * is retransmitting them, because it hasn't seen an ACK for
471 * them. It is important that we ACK these.
472 *
473 * ACK mitigation adds a header flag "ACK_REQUIRED"; any packet with
474 * this flag set *MUST* be acknowledged immediately.
475 */
476
477/*
478 * When we get here, we're called from the recv queue handler.
479 * Check whether we ought to transmit an ACK.
480 */
481void rds_iw_attempt_ack(struct rds_iw_connection *ic)
482{
483 unsigned int adv_credits;
484
485 if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
486 return;
487
488 if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) {
489 rds_iw_stats_inc(s_iw_ack_send_delayed);
490 return;
491 }
492
493 /* Can we get a send credit? */
494 if (!rds_iw_send_grab_credits(ic, 1, &adv_credits, 0)) {
495 rds_iw_stats_inc(s_iw_tx_throttle);
496 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
497 return;
498 }
499
500 clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
501 rds_iw_send_ack(ic, adv_credits);
502}
503
504/*
505 * We get here from the send completion handler, when the
506 * adapter tells us the ACK frame was sent.
507 */
508void rds_iw_ack_send_complete(struct rds_iw_connection *ic)
509{
510 clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags);
511 rds_iw_attempt_ack(ic);
512}
513
514/*
515 * This is called by the regular xmit code when it wants to piggyback
516 * an ACK on an outgoing frame.
517 */
518u64 rds_iw_piggyb_ack(struct rds_iw_connection *ic)
519{
520 if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags))
521 rds_iw_stats_inc(s_iw_ack_send_piggybacked);
522 return rds_iw_get_ack(ic);
523}
524
525/*
526 * It's kind of lame that we're copying from the posted receive pages into
527 * long-lived bitmaps. We could have posted the bitmaps and rdma written into
528 * them. But receiving new congestion bitmaps should be a *rare* event, so
529 * hopefully we won't need to invest that complexity in making it more
530 * efficient. By copying we can share a simpler core with TCP which has to
531 * copy.
532 */
533static void rds_iw_cong_recv(struct rds_connection *conn,
534 struct rds_iw_incoming *iwinc)
535{
536 struct rds_cong_map *map;
537 unsigned int map_off;
538 unsigned int map_page;
539 struct rds_page_frag *frag;
540 unsigned long frag_off;
541 unsigned long to_copy;
542 unsigned long copied;
543 uint64_t uncongested = 0;
544 void *addr;
545
546 /* catch completely corrupt packets */
547 if (be32_to_cpu(iwinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES)
548 return;
549
550 map = conn->c_fcong;
551 map_page = 0;
552 map_off = 0;
553
554 frag = list_entry(iwinc->ii_frags.next, struct rds_page_frag, f_item);
555 frag_off = 0;
556
557 copied = 0;
558
559 while (copied < RDS_CONG_MAP_BYTES) {
560 uint64_t *src, *dst;
561 unsigned int k;
562
563 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
564 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
565
566 addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0);
567
568 src = addr + frag_off;
569 dst = (void *)map->m_page_addrs[map_page] + map_off;
570 for (k = 0; k < to_copy; k += 8) {
571 /* Record ports that became uncongested, ie
572 * bits that changed from 0 to 1. */
573 uncongested |= ~(*src) & *dst;
574 *dst++ = *src++;
575 }
576 kunmap_atomic(addr, KM_SOFTIRQ0);
577
578 copied += to_copy;
579
580 map_off += to_copy;
581 if (map_off == PAGE_SIZE) {
582 map_off = 0;
583 map_page++;
584 }
585
586 frag_off += to_copy;
587 if (frag_off == RDS_FRAG_SIZE) {
588 frag = list_entry(frag->f_item.next,
589 struct rds_page_frag, f_item);
590 frag_off = 0;
591 }
592 }
593
594 /* the congestion map is in little endian order */
595 uncongested = le64_to_cpu(uncongested);
596
597 rds_cong_map_updated(map, uncongested);
598}
599
600/*
601 * Rings are posted with all the allocations they'll need to queue the
602 * incoming message to the receiving socket so this can't fail.
603 * All fragments start with a header, so we can make sure we're not receiving
604 * garbage, and we can tell a small 8 byte fragment from an ACK frame.
605 */
606struct rds_iw_ack_state {
607 u64 ack_next;
608 u64 ack_recv;
609 unsigned int ack_required:1;
610 unsigned int ack_next_valid:1;
611 unsigned int ack_recv_valid:1;
612};
613
614static void rds_iw_process_recv(struct rds_connection *conn,
615 struct rds_iw_recv_work *recv, u32 byte_len,
616 struct rds_iw_ack_state *state)
617{
618 struct rds_iw_connection *ic = conn->c_transport_data;
619 struct rds_iw_incoming *iwinc = ic->i_iwinc;
620 struct rds_header *ihdr, *hdr;
621
622 /* XXX shut down the connection if port 0,0 are seen? */
623
624 rdsdebug("ic %p iwinc %p recv %p byte len %u\n", ic, iwinc, recv,
625 byte_len);
626
627 if (byte_len < sizeof(struct rds_header)) {
628 rds_iw_conn_error(conn, "incoming message "
629 "from %pI4 didn't inclue a "
630 "header, disconnecting and "
631 "reconnecting\n",
632 &conn->c_faddr);
633 return;
634 }
635 byte_len -= sizeof(struct rds_header);
636
637 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
638
639 /* Validate the checksum. */
640 if (!rds_message_verify_checksum(ihdr)) {
641 rds_iw_conn_error(conn, "incoming message "
642 "from %pI4 has corrupted header - "
643 "forcing a reconnect\n",
644 &conn->c_faddr);
645 rds_stats_inc(s_recv_drop_bad_checksum);
646 return;
647 }
648
649 /* Process the ACK sequence which comes with every packet */
650 state->ack_recv = be64_to_cpu(ihdr->h_ack);
651 state->ack_recv_valid = 1;
652
653 /* Process the credits update if there was one */
654 if (ihdr->h_credit)
655 rds_iw_send_add_credits(conn, ihdr->h_credit);
656
657 if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && byte_len == 0) {
658 /* This is an ACK-only packet. The fact that it gets
659 * special treatment here is that historically, ACKs
660 * were rather special beasts.
661 */
662 rds_iw_stats_inc(s_iw_ack_received);
663
664 /*
665 * Usually the frags make their way on to incs and are then freed as
666 * the inc is freed. We don't go that route, so we have to drop the
667 * page ref ourselves. We can't just leave the page on the recv
668 * because that confuses the dma mapping of pages and each recv's use
669 * of a partial page. We can leave the frag, though, it will be
670 * reused.
671 *
672 * FIXME: Fold this into the code path below.
673 */
674 rds_iw_frag_drop_page(recv->r_frag);
675 return;
676 }
677
678 /*
679 * If we don't already have an inc on the connection then this
680 * fragment has a header and starts a message.. copy its header
681 * into the inc and save the inc so we can hang upcoming fragments
682 * off its list.
683 */
684 if (iwinc == NULL) {
685 iwinc = recv->r_iwinc;
686 recv->r_iwinc = NULL;
687 ic->i_iwinc = iwinc;
688
689 hdr = &iwinc->ii_inc.i_hdr;
690 memcpy(hdr, ihdr, sizeof(*hdr));
691 ic->i_recv_data_rem = be32_to_cpu(hdr->h_len);
692
693 rdsdebug("ic %p iwinc %p rem %u flag 0x%x\n", ic, iwinc,
694 ic->i_recv_data_rem, hdr->h_flags);
695 } else {
696 hdr = &iwinc->ii_inc.i_hdr;
697 /* We can't just use memcmp here; fragments of a
698 * single message may carry different ACKs */
699 if (hdr->h_sequence != ihdr->h_sequence
700 || hdr->h_len != ihdr->h_len
701 || hdr->h_sport != ihdr->h_sport
702 || hdr->h_dport != ihdr->h_dport) {
703 rds_iw_conn_error(conn,
704 "fragment header mismatch; forcing reconnect\n");
705 return;
706 }
707 }
708
709 list_add_tail(&recv->r_frag->f_item, &iwinc->ii_frags);
710 recv->r_frag = NULL;
711
712 if (ic->i_recv_data_rem > RDS_FRAG_SIZE)
713 ic->i_recv_data_rem -= RDS_FRAG_SIZE;
714 else {
715 ic->i_recv_data_rem = 0;
716 ic->i_iwinc = NULL;
717
718 if (iwinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP)
719 rds_iw_cong_recv(conn, iwinc);
720 else {
721 rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr,
722 &iwinc->ii_inc, GFP_ATOMIC,
723 KM_SOFTIRQ0);
724 state->ack_next = be64_to_cpu(hdr->h_sequence);
725 state->ack_next_valid = 1;
726 }
727
728 /* Evaluate the ACK_REQUIRED flag *after* we received
729 * the complete frame, and after bumping the next_rx
730 * sequence. */
731 if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) {
732 rds_stats_inc(s_recv_ack_required);
733 state->ack_required = 1;
734 }
735
736 rds_inc_put(&iwinc->ii_inc);
737 }
738}
739
740/*
741 * Plucking the oldest entry from the ring can be done concurrently with
742 * the thread refilling the ring. Each ring operation is protected by
743 * spinlocks and the transient state of refilling doesn't change the
744 * recording of which entry is oldest.
745 *
746 * This relies on IB only calling one cq comp_handler for each cq so that
747 * there will only be one caller of rds_recv_incoming() per RDS connection.
748 */
749void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context)
750{
751 struct rds_connection *conn = context;
752 struct rds_iw_connection *ic = conn->c_transport_data;
753 struct ib_wc wc;
754 struct rds_iw_ack_state state = { 0, };
755 struct rds_iw_recv_work *recv;
756
757 rdsdebug("conn %p cq %p\n", conn, cq);
758
759 rds_iw_stats_inc(s_iw_rx_cq_call);
760
761 ib_req_notify_cq(cq, IB_CQ_SOLICITED);
762
763 while (ib_poll_cq(cq, 1, &wc) > 0) {
764 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
765 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
766 be32_to_cpu(wc.ex.imm_data));
767 rds_iw_stats_inc(s_iw_rx_cq_event);
768
769 recv = &ic->i_recvs[rds_iw_ring_oldest(&ic->i_recv_ring)];
770
771 rds_iw_recv_unmap_page(ic, recv);
772
773 /*
774 * Also process recvs in connecting state because it is possible
775 * to get a recv completion _before_ the rdmacm ESTABLISHED
776 * event is processed.
777 */
778 if (rds_conn_up(conn) || rds_conn_connecting(conn)) {
779 /* We expect errors as the qp is drained during shutdown */
780 if (wc.status == IB_WC_SUCCESS) {
781 rds_iw_process_recv(conn, recv, wc.byte_len, &state);
782 } else {
783 rds_iw_conn_error(conn, "recv completion on "
784 "%pI4 had status %u, disconnecting and "
785 "reconnecting\n", &conn->c_faddr,
786 wc.status);
787 }
788 }
789
790 rds_iw_ring_free(&ic->i_recv_ring, 1);
791 }
792
793 if (state.ack_next_valid)
794 rds_iw_set_ack(ic, state.ack_next, state.ack_required);
795 if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
796 rds_send_drop_acked(conn, state.ack_recv, NULL);
797 ic->i_ack_recv = state.ack_recv;
798 }
799 if (rds_conn_up(conn))
800 rds_iw_attempt_ack(ic);
801
802 /* If we ever end up with a really empty receive ring, we're
803 * in deep trouble, as the sender will definitely see RNR
804 * timeouts. */
805 if (rds_iw_ring_empty(&ic->i_recv_ring))
806 rds_iw_stats_inc(s_iw_rx_ring_empty);
807
808 /*
809 * If the ring is running low, then schedule the thread to refill.
810 */
811 if (rds_iw_ring_low(&ic->i_recv_ring))
812 queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
813}
814
815int rds_iw_recv(struct rds_connection *conn)
816{
817 struct rds_iw_connection *ic = conn->c_transport_data;
818 int ret = 0;
819
820 rdsdebug("conn %p\n", conn);
821
822 /*
823 * If we get a temporary posting failure in this context then
824 * we're really low and we want the caller to back off for a bit.
825 */
826 mutex_lock(&ic->i_recv_mutex);
827 if (rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0))
828 ret = -ENOMEM;
829 else
830 rds_iw_stats_inc(s_iw_rx_refill_from_thread);
831 mutex_unlock(&ic->i_recv_mutex);
832
833 if (rds_conn_up(conn))
834 rds_iw_attempt_ack(ic);
835
836 return ret;
837}
838
839int __init rds_iw_recv_init(void)
840{
841 struct sysinfo si;
842 int ret = -ENOMEM;
843
844 /* Default to 30% of all available RAM for recv memory */
845 si_meminfo(&si);
846 rds_iw_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE;
847
848 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming",
849 sizeof(struct rds_iw_incoming),
850 0, 0, NULL);
851 if (rds_iw_incoming_slab == NULL)
852 goto out;
853
854 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag",
855 sizeof(struct rds_page_frag),
856 0, 0, NULL);
857 if (rds_iw_frag_slab == NULL)
858 kmem_cache_destroy(rds_iw_incoming_slab);
859 else
860 ret = 0;
861out:
862 return ret;
863}
864
865void rds_iw_recv_exit(void)
866{
867 kmem_cache_destroy(rds_iw_incoming_slab);
868 kmem_cache_destroy(rds_iw_frag_slab);
869}
diff --git a/net/rds/iw_ring.c b/net/rds/iw_ring.c
new file mode 100644
index 000000000000..d422d4b5deef
--- /dev/null
+++ b/net/rds/iw_ring.c
@@ -0,0 +1,169 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34
35#include "rds.h"
36#include "iw.h"
37
38/*
39 * Locking for IB rings.
40 * We assume that allocation is always protected by a mutex
41 * in the caller (this is a valid assumption for the current
42 * implementation).
43 *
44 * Freeing always happens in an interrupt, and hence only
45 * races with allocations, but not with other free()s.
46 *
47 * The interaction between allocation and freeing is that
48 * the alloc code has to determine the number of free entries.
49 * To this end, we maintain two counters; an allocation counter
50 * and a free counter. Both are allowed to run freely, and wrap
51 * around.
52 * The number of used entries is always (alloc_ctr - free_ctr) % NR.
53 *
54 * The current implementation makes free_ctr atomic. When the
55 * caller finds an allocation fails, it should set an "alloc fail"
56 * bit and retry the allocation. The "alloc fail" bit essentially tells
57 * the CQ completion handlers to wake it up after freeing some
58 * more entries.
59 */
60
61/*
62 * This only happens on shutdown.
63 */
64DECLARE_WAIT_QUEUE_HEAD(rds_iw_ring_empty_wait);
65
66void rds_iw_ring_init(struct rds_iw_work_ring *ring, u32 nr)
67{
68 memset(ring, 0, sizeof(*ring));
69 ring->w_nr = nr;
70 rdsdebug("ring %p nr %u\n", ring, ring->w_nr);
71}
72
73static inline u32 __rds_iw_ring_used(struct rds_iw_work_ring *ring)
74{
75 u32 diff;
76
77 /* This assumes that atomic_t has at least as many bits as u32 */
78 diff = ring->w_alloc_ctr - (u32) atomic_read(&ring->w_free_ctr);
79 BUG_ON(diff > ring->w_nr);
80
81 return diff;
82}
83
84void rds_iw_ring_resize(struct rds_iw_work_ring *ring, u32 nr)
85{
86 /* We only ever get called from the connection setup code,
87 * prior to creating the QP. */
88 BUG_ON(__rds_iw_ring_used(ring));
89 ring->w_nr = nr;
90}
91
92static int __rds_iw_ring_empty(struct rds_iw_work_ring *ring)
93{
94 return __rds_iw_ring_used(ring) == 0;
95}
96
97u32 rds_iw_ring_alloc(struct rds_iw_work_ring *ring, u32 val, u32 *pos)
98{
99 u32 ret = 0, avail;
100
101 avail = ring->w_nr - __rds_iw_ring_used(ring);
102
103 rdsdebug("ring %p val %u next %u free %u\n", ring, val,
104 ring->w_alloc_ptr, avail);
105
106 if (val && avail) {
107 ret = min(val, avail);
108 *pos = ring->w_alloc_ptr;
109
110 ring->w_alloc_ptr = (ring->w_alloc_ptr + ret) % ring->w_nr;
111 ring->w_alloc_ctr += ret;
112 }
113
114 return ret;
115}
116
117void rds_iw_ring_free(struct rds_iw_work_ring *ring, u32 val)
118{
119 ring->w_free_ptr = (ring->w_free_ptr + val) % ring->w_nr;
120 atomic_add(val, &ring->w_free_ctr);
121
122 if (__rds_iw_ring_empty(ring) &&
123 waitqueue_active(&rds_iw_ring_empty_wait))
124 wake_up(&rds_iw_ring_empty_wait);
125}
126
127void rds_iw_ring_unalloc(struct rds_iw_work_ring *ring, u32 val)
128{
129 ring->w_alloc_ptr = (ring->w_alloc_ptr - val) % ring->w_nr;
130 ring->w_alloc_ctr -= val;
131}
132
133int rds_iw_ring_empty(struct rds_iw_work_ring *ring)
134{
135 return __rds_iw_ring_empty(ring);
136}
137
138int rds_iw_ring_low(struct rds_iw_work_ring *ring)
139{
140 return __rds_iw_ring_used(ring) <= (ring->w_nr >> 2);
141}
142
143
144/*
145 * returns the oldest alloced ring entry. This will be the next one
146 * freed. This can't be called if there are none allocated.
147 */
148u32 rds_iw_ring_oldest(struct rds_iw_work_ring *ring)
149{
150 return ring->w_free_ptr;
151}
152
153/*
154 * returns the number of completed work requests.
155 */
156
157u32 rds_iw_ring_completed(struct rds_iw_work_ring *ring, u32 wr_id, u32 oldest)
158{
159 u32 ret;
160
161 if (oldest <= (unsigned long long)wr_id)
162 ret = (unsigned long long)wr_id - oldest + 1;
163 else
164 ret = ring->w_nr - oldest + (unsigned long long)wr_id + 1;
165
166 rdsdebug("ring %p ret %u wr_id %u oldest %u\n", ring, ret,
167 wr_id, oldest);
168 return ret;
169}
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
new file mode 100644
index 000000000000..22dd38ffd608
--- /dev/null
+++ b/net/rds/iw_send.c
@@ -0,0 +1,975 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35#include <linux/device.h>
36#include <linux/dmapool.h>
37
38#include "rds.h"
39#include "rdma.h"
40#include "iw.h"
41
42static void rds_iw_send_rdma_complete(struct rds_message *rm,
43 int wc_status)
44{
45 int notify_status;
46
47 switch (wc_status) {
48 case IB_WC_WR_FLUSH_ERR:
49 return;
50
51 case IB_WC_SUCCESS:
52 notify_status = RDS_RDMA_SUCCESS;
53 break;
54
55 case IB_WC_REM_ACCESS_ERR:
56 notify_status = RDS_RDMA_REMOTE_ERROR;
57 break;
58
59 default:
60 notify_status = RDS_RDMA_OTHER_ERROR;
61 break;
62 }
63 rds_rdma_send_complete(rm, notify_status);
64}
65
66static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic,
67 struct rds_rdma_op *op)
68{
69 if (op->r_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0;
74 }
75}
76
77static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
78 struct rds_iw_send_work *send,
79 int wc_status)
80{
81 struct rds_message *rm = send->s_rm;
82
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
84
85 ib_dma_unmap_sg(ic->i_cm_id->device,
86 rm->m_sg, rm->m_nents,
87 DMA_TO_DEVICE);
88
89 if (rm->m_rdma_op != NULL) {
90 rds_iw_send_unmap_rdma(ic, rm->m_rdma_op);
91
92 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics:
94 * 1. Notify when we received the ACK on the RDS message
95 * that was queued with the RDMA. This provides reliable
96 * notification of RDMA status at the expense of a one-way
97 * packet delay.
98 * 2. Notify when the IB stack gives us the completion event for
99 * the RDMA operation.
100 * 3. Notify when the IB stack gives us the completion event for
101 * the accompanying RDS messages.
102 * Here, we implement approach #3. To implement approach #2,
103 * call rds_rdma_send_complete from the cq_handler. To implement #1,
104 * don't call rds_rdma_send_complete at all, and fall back to the notify
105 * handling in the ACK processing code.
106 *
107 * Note: There's no need to explicitly sync any RDMA buffers using
108 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
109 * operation itself unmapped the RDMA buffers, which takes care
110 * of synching.
111 */
112 rds_iw_send_rdma_complete(rm, wc_status);
113
114 if (rm->m_rdma_op->r_write)
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes);
116 else
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes);
118 }
119
120 /* If anyone waited for this message to get flushed out, wake
121 * them up now */
122 rds_message_unmapped(rm);
123
124 rds_message_put(rm);
125 send->s_rm = NULL;
126}
127
128void rds_iw_send_init_ring(struct rds_iw_connection *ic)
129{
130 struct rds_iw_send_work *send;
131 u32 i;
132
133 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
134 struct ib_sge *sge;
135
136 send->s_rm = NULL;
137 send->s_op = NULL;
138 send->s_mapping = NULL;
139
140 send->s_wr.next = NULL;
141 send->s_wr.wr_id = i;
142 send->s_wr.sg_list = send->s_sge;
143 send->s_wr.num_sge = 1;
144 send->s_wr.opcode = IB_WR_SEND;
145 send->s_wr.send_flags = 0;
146 send->s_wr.ex.imm_data = 0;
147
148 sge = rds_iw_data_sge(ic, send->s_sge);
149 sge->lkey = 0;
150
151 sge = rds_iw_header_sge(ic, send->s_sge);
152 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
153 sge->length = sizeof(struct rds_header);
154 sge->lkey = 0;
155
156 send->s_mr = ib_alloc_fast_reg_mr(ic->i_pd, fastreg_message_size);
157 if (IS_ERR(send->s_mr)) {
158 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed\n");
159 break;
160 }
161
162 send->s_page_list = ib_alloc_fast_reg_page_list(
163 ic->i_cm_id->device, fastreg_message_size);
164 if (IS_ERR(send->s_page_list)) {
165 printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n");
166 break;
167 }
168 }
169}
170
171void rds_iw_send_clear_ring(struct rds_iw_connection *ic)
172{
173 struct rds_iw_send_work *send;
174 u32 i;
175
176 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
177 BUG_ON(!send->s_mr);
178 ib_dereg_mr(send->s_mr);
179 BUG_ON(!send->s_page_list);
180 ib_free_fast_reg_page_list(send->s_page_list);
181 if (send->s_wr.opcode == 0xdead)
182 continue;
183 if (send->s_rm)
184 rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
185 if (send->s_op)
186 rds_iw_send_unmap_rdma(ic, send->s_op);
187 }
188}
189
190/*
191 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
192 * operations performed in the send path. As the sender allocs and potentially
193 * unallocs the next free entry in the ring it doesn't alter which is
194 * the next to be freed, which is what this is concerned with.
195 */
196void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context)
197{
198 struct rds_connection *conn = context;
199 struct rds_iw_connection *ic = conn->c_transport_data;
200 struct ib_wc wc;
201 struct rds_iw_send_work *send;
202 u32 completed;
203 u32 oldest;
204 u32 i;
205 int ret;
206
207 rdsdebug("cq %p conn %p\n", cq, conn);
208 rds_iw_stats_inc(s_iw_tx_cq_call);
209 ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
210 if (ret)
211 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
212
213 while (ib_poll_cq(cq, 1, &wc) > 0) {
214 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
215 (unsigned long long)wc.wr_id, wc.status, wc.byte_len,
216 be32_to_cpu(wc.ex.imm_data));
217 rds_iw_stats_inc(s_iw_tx_cq_event);
218
219 if (wc.status != IB_WC_SUCCESS) {
220 printk(KERN_ERR "WC Error: status = %d opcode = %d\n", wc.status, wc.opcode);
221 break;
222 }
223
224 if (wc.opcode == IB_WC_LOCAL_INV && wc.wr_id == RDS_IW_LOCAL_INV_WR_ID) {
225 ic->i_fastreg_posted = 0;
226 continue;
227 }
228
229 if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) {
230 ic->i_fastreg_posted = 1;
231 continue;
232 }
233
234 if (wc.wr_id == RDS_IW_ACK_WR_ID) {
235 if (ic->i_ack_queued + HZ/2 < jiffies)
236 rds_iw_stats_inc(s_iw_tx_stalled);
237 rds_iw_ack_send_complete(ic);
238 continue;
239 }
240
241 oldest = rds_iw_ring_oldest(&ic->i_send_ring);
242
243 completed = rds_iw_ring_completed(&ic->i_send_ring, wc.wr_id, oldest);
244
245 for (i = 0; i < completed; i++) {
246 send = &ic->i_sends[oldest];
247
248 /* In the error case, wc.opcode sometimes contains garbage */
249 switch (send->s_wr.opcode) {
250 case IB_WR_SEND:
251 if (send->s_rm)
252 rds_iw_send_unmap_rm(ic, send, wc.status);
253 break;
254 case IB_WR_FAST_REG_MR:
255 case IB_WR_RDMA_WRITE:
256 case IB_WR_RDMA_READ:
257 case IB_WR_RDMA_READ_WITH_INV:
258 /* Nothing to be done - the SG list will be unmapped
259 * when the SEND completes. */
260 break;
261 default:
262 if (printk_ratelimit())
263 printk(KERN_NOTICE
264 "RDS/IW: %s: unexpected opcode 0x%x in WR!\n",
265 __func__, send->s_wr.opcode);
266 break;
267 }
268
269 send->s_wr.opcode = 0xdead;
270 send->s_wr.num_sge = 1;
271 if (send->s_queued + HZ/2 < jiffies)
272 rds_iw_stats_inc(s_iw_tx_stalled);
273
274 /* If a RDMA operation produced an error, signal this right
275 * away. If we don't, the subsequent SEND that goes with this
276 * RDMA will be canceled with ERR_WFLUSH, and the application
277 * never learn that the RDMA failed. */
278 if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) {
279 struct rds_message *rm;
280
281 rm = rds_send_get_message(conn, send->s_op);
282 if (rm)
283 rds_iw_send_rdma_complete(rm, wc.status);
284 }
285
286 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
287 }
288
289 rds_iw_ring_free(&ic->i_send_ring, completed);
290
291 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags)
292 || test_bit(0, &conn->c_map_queued))
293 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
294
295 /* We expect errors as the qp is drained during shutdown */
296 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
297 rds_iw_conn_error(conn,
298 "send completion on %pI4 "
299 "had status %u, disconnecting and reconnecting\n",
300 &conn->c_faddr, wc.status);
301 }
302 }
303}
304
305/*
306 * This is the main function for allocating credits when sending
307 * messages.
308 *
309 * Conceptually, we have two counters:
310 * - send credits: this tells us how many WRs we're allowed
311 * to submit without overruning the reciever's queue. For
312 * each SEND WR we post, we decrement this by one.
313 *
314 * - posted credits: this tells us how many WRs we recently
315 * posted to the receive queue. This value is transferred
316 * to the peer as a "credit update" in a RDS header field.
317 * Every time we transmit credits to the peer, we subtract
318 * the amount of transferred credits from this counter.
319 *
320 * It is essential that we avoid situations where both sides have
321 * exhausted their send credits, and are unable to send new credits
322 * to the peer. We achieve this by requiring that we send at least
323 * one credit update to the peer before exhausting our credits.
324 * When new credits arrive, we subtract one credit that is withheld
325 * until we've posted new buffers and are ready to transmit these
326 * credits (see rds_iw_send_add_credits below).
327 *
328 * The RDS send code is essentially single-threaded; rds_send_xmit
329 * grabs c_send_lock to ensure exclusive access to the send ring.
330 * However, the ACK sending code is independent and can race with
331 * message SENDs.
332 *
333 * In the send path, we need to update the counters for send credits
334 * and the counter of posted buffers atomically - when we use the
335 * last available credit, we cannot allow another thread to race us
336 * and grab the posted credits counter. Hence, we have to use a
337 * spinlock to protect the credit counter, or use atomics.
338 *
339 * Spinlocks shared between the send and the receive path are bad,
340 * because they create unnecessary delays. An early implementation
341 * using a spinlock showed a 5% degradation in throughput at some
342 * loads.
343 *
344 * This implementation avoids spinlocks completely, putting both
345 * counters into a single atomic, and updating that atomic using
346 * atomic_add (in the receive path, when receiving fresh credits),
347 * and using atomic_cmpxchg when updating the two counters.
348 */
349int rds_iw_send_grab_credits(struct rds_iw_connection *ic,
350 u32 wanted, u32 *adv_credits, int need_posted)
351{
352 unsigned int avail, posted, got = 0, advertise;
353 long oldval, newval;
354
355 *adv_credits = 0;
356 if (!ic->i_flowctl)
357 return wanted;
358
359try_again:
360 advertise = 0;
361 oldval = newval = atomic_read(&ic->i_credits);
362 posted = IB_GET_POST_CREDITS(oldval);
363 avail = IB_GET_SEND_CREDITS(oldval);
364
365 rdsdebug("rds_iw_send_grab_credits(%u): credits=%u posted=%u\n",
366 wanted, avail, posted);
367
368 /* The last credit must be used to send a credit update. */
369 if (avail && !posted)
370 avail--;
371
372 if (avail < wanted) {
373 struct rds_connection *conn = ic->i_cm_id->context;
374
375 /* Oops, there aren't that many credits left! */
376 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
377 got = avail;
378 } else {
379 /* Sometimes you get what you want, lalala. */
380 got = wanted;
381 }
382 newval -= IB_SET_SEND_CREDITS(got);
383
384 /*
385 * If need_posted is non-zero, then the caller wants
386 * the posted regardless of whether any send credits are
387 * available.
388 */
389 if (posted && (got || need_posted)) {
390 advertise = min_t(unsigned int, posted, RDS_MAX_ADV_CREDIT);
391 newval -= IB_SET_POST_CREDITS(advertise);
392 }
393
394 /* Finally bill everything */
395 if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval)
396 goto try_again;
397
398 *adv_credits = advertise;
399 return got;
400}
401
402void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits)
403{
404 struct rds_iw_connection *ic = conn->c_transport_data;
405
406 if (credits == 0)
407 return;
408
409 rdsdebug("rds_iw_send_add_credits(%u): current=%u%s\n",
410 credits,
411 IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)),
412 test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : "");
413
414 atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits);
415 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags))
416 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
417
418 WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384);
419
420 rds_iw_stats_inc(s_iw_rx_credit_updates);
421}
422
423void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted)
424{
425 struct rds_iw_connection *ic = conn->c_transport_data;
426
427 if (posted == 0)
428 return;
429
430 atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits);
431
432 /* Decide whether to send an update to the peer now.
433 * If we would send a credit update for every single buffer we
434 * post, we would end up with an ACK storm (ACK arrives,
435 * consumes buffer, we refill the ring, send ACK to remote
436 * advertising the newly posted buffer... ad inf)
437 *
438 * Performance pretty much depends on how often we send
439 * credit updates - too frequent updates mean lots of ACKs.
440 * Too infrequent updates, and the peer will run out of
441 * credits and has to throttle.
442 * For the time being, 16 seems to be a good compromise.
443 */
444 if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16)
445 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
446}
447
448static inline void
449rds_iw_xmit_populate_wr(struct rds_iw_connection *ic,
450 struct rds_iw_send_work *send, unsigned int pos,
451 unsigned long buffer, unsigned int length,
452 int send_flags)
453{
454 struct ib_sge *sge;
455
456 WARN_ON(pos != send - ic->i_sends);
457
458 send->s_wr.send_flags = send_flags;
459 send->s_wr.opcode = IB_WR_SEND;
460 send->s_wr.num_sge = 2;
461 send->s_wr.next = NULL;
462 send->s_queued = jiffies;
463 send->s_op = NULL;
464
465 if (length != 0) {
466 sge = rds_iw_data_sge(ic, send->s_sge);
467 sge->addr = buffer;
468 sge->length = length;
469 sge->lkey = rds_iw_local_dma_lkey(ic);
470
471 sge = rds_iw_header_sge(ic, send->s_sge);
472 } else {
473 /* We're sending a packet with no payload. There is only
474 * one SGE */
475 send->s_wr.num_sge = 1;
476 sge = &send->s_sge[0];
477 }
478
479 sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header));
480 sge->length = sizeof(struct rds_header);
481 sge->lkey = rds_iw_local_dma_lkey(ic);
482}
483
484/*
485 * This can be called multiple times for a given message. The first time
486 * we see a message we map its scatterlist into the IB device so that
487 * we can provide that mapped address to the IB scatter gather entries
488 * in the IB work requests. We translate the scatterlist into a series
489 * of work requests that fragment the message. These work requests complete
490 * in order so we pass ownership of the message to the completion handler
491 * once we send the final fragment.
492 *
493 * The RDS core uses the c_send_lock to only enter this function once
494 * per connection. This makes sure that the tx ring alloc/unalloc pairs
495 * don't get out of sync and confuse the ring.
496 */
497int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
498 unsigned int hdr_off, unsigned int sg, unsigned int off)
499{
500 struct rds_iw_connection *ic = conn->c_transport_data;
501 struct ib_device *dev = ic->i_cm_id->device;
502 struct rds_iw_send_work *send = NULL;
503 struct rds_iw_send_work *first;
504 struct rds_iw_send_work *prev;
505 struct ib_send_wr *failed_wr;
506 struct scatterlist *scat;
507 u32 pos;
508 u32 i;
509 u32 work_alloc;
510 u32 credit_alloc;
511 u32 posted;
512 u32 adv_credits = 0;
513 int send_flags = 0;
514 int sent;
515 int ret;
516 int flow_controlled = 0;
517
518 BUG_ON(off % RDS_FRAG_SIZE);
519 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
520
521 /* Fastreg support */
522 if (rds_rdma_cookie_key(rm->m_rdma_cookie)
523 && !ic->i_fastreg_posted) {
524 ret = -EAGAIN;
525 goto out;
526 }
527
528 /* FIXME we may overallocate here */
529 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
530 i = 1;
531 else
532 i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE);
533
534 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
535 if (work_alloc == 0) {
536 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
537 rds_iw_stats_inc(s_iw_tx_ring_full);
538 ret = -ENOMEM;
539 goto out;
540 }
541
542 credit_alloc = work_alloc;
543 if (ic->i_flowctl) {
544 credit_alloc = rds_iw_send_grab_credits(ic, work_alloc, &posted, 0);
545 adv_credits += posted;
546 if (credit_alloc < work_alloc) {
547 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
548 work_alloc = credit_alloc;
549 flow_controlled++;
550 }
551 if (work_alloc == 0) {
552 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
553 rds_iw_stats_inc(s_iw_tx_throttle);
554 ret = -ENOMEM;
555 goto out;
556 }
557 }
558
559 /* map the message the first time we see it */
560 if (ic->i_rm == NULL) {
561 /*
562 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n",
563 be16_to_cpu(rm->m_inc.i_hdr.h_dport),
564 rm->m_inc.i_hdr.h_flags,
565 be32_to_cpu(rm->m_inc.i_hdr.h_len));
566 */
567 if (rm->m_nents) {
568 rm->m_count = ib_dma_map_sg(dev,
569 rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
570 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
571 if (rm->m_count == 0) {
572 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
573 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
574 ret = -ENOMEM; /* XXX ? */
575 goto out;
576 }
577 } else {
578 rm->m_count = 0;
579 }
580
581 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
582 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
583 rds_message_addref(rm);
584 ic->i_rm = rm;
585
586 /* Finalize the header */
587 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
588 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED;
589 if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags))
590 rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED;
591
592 /* If it has a RDMA op, tell the peer we did it. This is
593 * used by the peer to release use-once RDMA MRs. */
594 if (rm->m_rdma_op) {
595 struct rds_ext_header_rdma ext_hdr;
596
597 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key);
598 rds_message_add_extension(&rm->m_inc.i_hdr,
599 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
600 }
601 if (rm->m_rdma_cookie) {
602 rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr,
603 rds_rdma_cookie_key(rm->m_rdma_cookie),
604 rds_rdma_cookie_offset(rm->m_rdma_cookie));
605 }
606
607 /* Note - rds_iw_piggyb_ack clears the ACK_REQUIRED bit, so
608 * we should not do this unless we have a chance of at least
609 * sticking the header into the send ring. Which is why we
610 * should call rds_iw_ring_alloc first. */
611 rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_iw_piggyb_ack(ic));
612 rds_message_make_checksum(&rm->m_inc.i_hdr);
613
614 /*
615 * Update adv_credits since we reset the ACK_REQUIRED bit.
616 */
617 rds_iw_send_grab_credits(ic, 0, &posted, 1);
618 adv_credits += posted;
619 BUG_ON(adv_credits > 255);
620 } else if (ic->i_rm != rm)
621 BUG();
622
623 send = &ic->i_sends[pos];
624 first = send;
625 prev = NULL;
626 scat = &rm->m_sg[sg];
627 sent = 0;
628 i = 0;
629
630 /* Sometimes you want to put a fence between an RDMA
631 * READ and the following SEND.
632 * We could either do this all the time
633 * or when requested by the user. Right now, we let
634 * the application choose.
635 */
636 if (rm->m_rdma_op && rm->m_rdma_op->r_fence)
637 send_flags = IB_SEND_FENCE;
638
639 /*
640 * We could be copying the header into the unused tail of the page.
641 * That would need to be changed in the future when those pages might
642 * be mapped userspace pages or page cache pages. So instead we always
643 * use a second sge and our long-lived ring of mapped headers. We send
644 * the header after the data so that the data payload can be aligned on
645 * the receiver.
646 */
647
648 /* handle a 0-len message */
649 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) {
650 rds_iw_xmit_populate_wr(ic, send, pos, 0, 0, send_flags);
651 goto add_header;
652 }
653
654 /* if there's data reference it with a chain of work reqs */
655 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) {
656 unsigned int len;
657
658 send = &ic->i_sends[pos];
659
660 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off);
661 rds_iw_xmit_populate_wr(ic, send, pos,
662 ib_sg_dma_address(dev, scat) + off, len,
663 send_flags);
664
665 /*
666 * We want to delay signaling completions just enough to get
667 * the batching benefits but not so much that we create dead time
668 * on the wire.
669 */
670 if (ic->i_unsignaled_wrs-- == 0) {
671 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
672 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
673 }
674
675 ic->i_unsignaled_bytes -= len;
676 if (ic->i_unsignaled_bytes <= 0) {
677 ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes;
678 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
679 }
680
681 /*
682 * Always signal the last one if we're stopping due to flow control.
683 */
684 if (flow_controlled && i == (work_alloc-1))
685 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
686
687 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
688 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
689
690 sent += len;
691 off += len;
692 if (off == ib_sg_dma_len(dev, scat)) {
693 scat++;
694 off = 0;
695 }
696
697add_header:
698 /* Tack on the header after the data. The header SGE should already
699 * have been set up to point to the right header buffer. */
700 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
701
702 if (0) {
703 struct rds_header *hdr = &ic->i_send_hdrs[pos];
704
705 printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n",
706 be16_to_cpu(hdr->h_dport),
707 hdr->h_flags,
708 be32_to_cpu(hdr->h_len));
709 }
710 if (adv_credits) {
711 struct rds_header *hdr = &ic->i_send_hdrs[pos];
712
713 /* add credit and redo the header checksum */
714 hdr->h_credit = adv_credits;
715 rds_message_make_checksum(hdr);
716 adv_credits = 0;
717 rds_iw_stats_inc(s_iw_tx_credit_updates);
718 }
719
720 if (prev)
721 prev->s_wr.next = &send->s_wr;
722 prev = send;
723
724 pos = (pos + 1) % ic->i_send_ring.w_nr;
725 }
726
727 /* Account the RDS header in the number of bytes we sent, but just once.
728 * The caller has no concept of fragmentation. */
729 if (hdr_off == 0)
730 sent += sizeof(struct rds_header);
731
732 /* if we finished the message then send completion owns it */
733 if (scat == &rm->m_sg[rm->m_count]) {
734 prev->s_rm = ic->i_rm;
735 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
736 ic->i_rm = NULL;
737 }
738
739 if (i < work_alloc) {
740 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
741 work_alloc = i;
742 }
743 if (ic->i_flowctl && i < credit_alloc)
744 rds_iw_send_add_credits(conn, credit_alloc - i);
745
746 /* XXX need to worry about failed_wr and partial sends. */
747 failed_wr = &first->s_wr;
748 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
749 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
750 first, &first->s_wr, ret, failed_wr);
751 BUG_ON(failed_wr != &first->s_wr);
752 if (ret) {
753 printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 "
754 "returned %d\n", &conn->c_faddr, ret);
755 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
756 if (prev->s_rm) {
757 ic->i_rm = prev->s_rm;
758 prev->s_rm = NULL;
759 }
760 goto out;
761 }
762
763 ret = sent;
764out:
765 BUG_ON(adv_credits);
766 return ret;
767}
768
769static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr)
770{
771 BUG_ON(nent > send->s_page_list->max_page_list_len);
772 /*
773 * Perform a WR for the fast_reg_mr. Each individual page
774 * in the sg list is added to the fast reg page list and placed
775 * inside the fast_reg_mr WR.
776 */
777 send->s_wr.opcode = IB_WR_FAST_REG_MR;
778 send->s_wr.wr.fast_reg.length = len;
779 send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey;
780 send->s_wr.wr.fast_reg.page_list = send->s_page_list;
781 send->s_wr.wr.fast_reg.page_list_len = nent;
782 send->s_wr.wr.fast_reg.page_shift = rds_iwdev->page_shift;
783 send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE;
784 send->s_wr.wr.fast_reg.iova_start = sg_addr;
785
786 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
787}
788
789int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
790{
791 struct rds_iw_connection *ic = conn->c_transport_data;
792 struct rds_iw_send_work *send = NULL;
793 struct rds_iw_send_work *first;
794 struct rds_iw_send_work *prev;
795 struct ib_send_wr *failed_wr;
796 struct rds_iw_device *rds_iwdev;
797 struct scatterlist *scat;
798 unsigned long len;
799 u64 remote_addr = op->r_remote_addr;
800 u32 pos, fr_pos;
801 u32 work_alloc;
802 u32 i;
803 u32 j;
804 int sent;
805 int ret;
806 int num_sge;
807
808 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
809
810 /* map the message the first time we see it */
811 if (!op->r_mapped) {
812 op->r_count = ib_dma_map_sg(ic->i_cm_id->device,
813 op->r_sg, op->r_nents, (op->r_write) ?
814 DMA_TO_DEVICE : DMA_FROM_DEVICE);
815 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count);
816 if (op->r_count == 0) {
817 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
818 ret = -ENOMEM; /* XXX ? */
819 goto out;
820 }
821
822 op->r_mapped = 1;
823 }
824
825 if (!op->r_write) {
826 /* Alloc space on the send queue for the fastreg */
827 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos);
828 if (work_alloc != 1) {
829 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
830 rds_iw_stats_inc(s_iw_tx_ring_full);
831 ret = -ENOMEM;
832 goto out;
833 }
834 }
835
836 /*
837 * Instead of knowing how to return a partial rdma read/write we insist that there
838 * be enough work requests to send the entire message.
839 */
840 i = ceil(op->r_count, rds_iwdev->max_sge);
841
842 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
843 if (work_alloc != i) {
844 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
845 rds_iw_stats_inc(s_iw_tx_ring_full);
846 ret = -ENOMEM;
847 goto out;
848 }
849
850 send = &ic->i_sends[pos];
851 if (!op->r_write) {
852 first = prev = &ic->i_sends[fr_pos];
853 } else {
854 first = send;
855 prev = NULL;
856 }
857 scat = &op->r_sg[0];
858 sent = 0;
859 num_sge = op->r_count;
860
861 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) {
862 send->s_wr.send_flags = 0;
863 send->s_queued = jiffies;
864
865 /*
866 * We want to delay signaling completions just enough to get
867 * the batching benefits but not so much that we create dead time on the wire.
868 */
869 if (ic->i_unsignaled_wrs-- == 0) {
870 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
871 send->s_wr.send_flags = IB_SEND_SIGNALED;
872 }
873
874 /* To avoid the need to have the plumbing to invalidate the fastreg_mr used
875 * for local access after RDS is finished with it, using
876 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
877 */
878 if (op->r_write)
879 send->s_wr.opcode = IB_WR_RDMA_WRITE;
880 else
881 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
882
883 send->s_wr.wr.rdma.remote_addr = remote_addr;
884 send->s_wr.wr.rdma.rkey = op->r_key;
885 send->s_op = op;
886
887 if (num_sge > rds_iwdev->max_sge) {
888 send->s_wr.num_sge = rds_iwdev->max_sge;
889 num_sge -= rds_iwdev->max_sge;
890 } else
891 send->s_wr.num_sge = num_sge;
892
893 send->s_wr.next = NULL;
894
895 if (prev)
896 prev->s_wr.next = &send->s_wr;
897
898 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) {
899 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
900
901 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
902 send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat);
903 else {
904 send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat);
905 send->s_sge[j].length = len;
906 send->s_sge[j].lkey = rds_iw_local_dma_lkey(ic);
907 }
908
909 sent += len;
910 rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr);
911 remote_addr += len;
912
913 scat++;
914 }
915
916 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) {
917 send->s_wr.num_sge = 1;
918 send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr;
919 send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes;
920 send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey;
921 }
922
923 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
924 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
925
926 prev = send;
927 if (++send == &ic->i_sends[ic->i_send_ring.w_nr])
928 send = ic->i_sends;
929 }
930
931 /* if we finished the message then send completion owns it */
932 if (scat == &op->r_sg[op->r_count])
933 first->s_wr.send_flags = IB_SEND_SIGNALED;
934
935 if (i < work_alloc) {
936 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i);
937 work_alloc = i;
938 }
939
940 /* On iWARP, local memory access by a remote system (ie, RDMA Read) is not
941 * recommended. Putting the lkey on the wire is a security hole, as it can
942 * allow for memory access to all of memory on the remote system. Some
943 * adapters do not allow using the lkey for this at all. To bypass this use a
944 * fastreg_mr (or possibly a dma_mr)
945 */
946 if (!op->r_write) {
947 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
948 op->r_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
949 work_alloc++;
950 }
951
952 failed_wr = &first->s_wr;
953 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
954 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
955 first, &first->s_wr, ret, failed_wr);
956 BUG_ON(failed_wr != &first->s_wr);
957 if (ret) {
958 printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 "
959 "returned %d\n", &conn->c_faddr, ret);
960 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
961 goto out;
962 }
963
964out:
965 return ret;
966}
967
968void rds_iw_xmit_complete(struct rds_connection *conn)
969{
970 struct rds_iw_connection *ic = conn->c_transport_data;
971
972 /* We may have a pending ACK or window update we were unable
973 * to send previously (due to flow control). Try again. */
974 rds_iw_attempt_ack(ic);
975}
diff --git a/net/rds/iw_stats.c b/net/rds/iw_stats.c
new file mode 100644
index 000000000000..ccc7e8f0bf0e
--- /dev/null
+++ b/net/rds/iw_stats.c
@@ -0,0 +1,95 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/percpu.h>
34#include <linux/seq_file.h>
35#include <linux/proc_fs.h>
36
37#include "rds.h"
38#include "iw.h"
39
40DEFINE_PER_CPU(struct rds_iw_statistics, rds_iw_stats) ____cacheline_aligned;
41
42static char *rds_iw_stat_names[] = {
43 "iw_connect_raced",
44 "iw_listen_closed_stale",
45 "iw_tx_cq_call",
46 "iw_tx_cq_event",
47 "iw_tx_ring_full",
48 "iw_tx_throttle",
49 "iw_tx_sg_mapping_failure",
50 "iw_tx_stalled",
51 "iw_tx_credit_updates",
52 "iw_rx_cq_call",
53 "iw_rx_cq_event",
54 "iw_rx_ring_empty",
55 "iw_rx_refill_from_cq",
56 "iw_rx_refill_from_thread",
57 "iw_rx_alloc_limit",
58 "iw_rx_credit_updates",
59 "iw_ack_sent",
60 "iw_ack_send_failure",
61 "iw_ack_send_delayed",
62 "iw_ack_send_piggybacked",
63 "iw_ack_received",
64 "iw_rdma_mr_alloc",
65 "iw_rdma_mr_free",
66 "iw_rdma_mr_used",
67 "iw_rdma_mr_pool_flush",
68 "iw_rdma_mr_pool_wait",
69 "iw_rdma_mr_pool_depleted",
70};
71
72unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter,
73 unsigned int avail)
74{
75 struct rds_iw_statistics stats = {0, };
76 uint64_t *src;
77 uint64_t *sum;
78 size_t i;
79 int cpu;
80
81 if (avail < ARRAY_SIZE(rds_iw_stat_names))
82 goto out;
83
84 for_each_online_cpu(cpu) {
85 src = (uint64_t *)&(per_cpu(rds_iw_stats, cpu));
86 sum = (uint64_t *)&stats;
87 for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++)
88 *(sum++) += *(src++);
89 }
90
91 rds_stats_info_copy(iter, (uint64_t *)&stats, rds_iw_stat_names,
92 ARRAY_SIZE(rds_iw_stat_names));
93out:
94 return ARRAY_SIZE(rds_iw_stat_names);
95}
diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c
new file mode 100644
index 000000000000..9590678cd616
--- /dev/null
+++ b/net/rds/iw_sysctl.c
@@ -0,0 +1,137 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/sysctl.h>
35#include <linux/proc_fs.h>
36
37#include "iw.h"
38
39static struct ctl_table_header *rds_iw_sysctl_hdr;
40
41unsigned long rds_iw_sysctl_max_send_wr = RDS_IW_DEFAULT_SEND_WR;
42unsigned long rds_iw_sysctl_max_recv_wr = RDS_IW_DEFAULT_RECV_WR;
43unsigned long rds_iw_sysctl_max_recv_allocation = (128 * 1024 * 1024) / RDS_FRAG_SIZE;
44static unsigned long rds_iw_sysctl_max_wr_min = 1;
45/* hardware will fail CQ creation long before this */
46static unsigned long rds_iw_sysctl_max_wr_max = (u32)~0;
47
48unsigned long rds_iw_sysctl_max_unsig_wrs = 16;
49static unsigned long rds_iw_sysctl_max_unsig_wr_min = 1;
50static unsigned long rds_iw_sysctl_max_unsig_wr_max = 64;
51
52unsigned long rds_iw_sysctl_max_unsig_bytes = (16 << 20);
53static unsigned long rds_iw_sysctl_max_unsig_bytes_min = 1;
54static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL;
55
56unsigned int rds_iw_sysctl_flow_control = 1;
57
58ctl_table rds_iw_sysctl_table[] = {
59 {
60 .ctl_name = CTL_UNNUMBERED,
61 .procname = "max_send_wr",
62 .data = &rds_iw_sysctl_max_send_wr,
63 .maxlen = sizeof(unsigned long),
64 .mode = 0644,
65 .proc_handler = &proc_doulongvec_minmax,
66 .extra1 = &rds_iw_sysctl_max_wr_min,
67 .extra2 = &rds_iw_sysctl_max_wr_max,
68 },
69 {
70 .ctl_name = CTL_UNNUMBERED,
71 .procname = "max_recv_wr",
72 .data = &rds_iw_sysctl_max_recv_wr,
73 .maxlen = sizeof(unsigned long),
74 .mode = 0644,
75 .proc_handler = &proc_doulongvec_minmax,
76 .extra1 = &rds_iw_sysctl_max_wr_min,
77 .extra2 = &rds_iw_sysctl_max_wr_max,
78 },
79 {
80 .ctl_name = CTL_UNNUMBERED,
81 .procname = "max_unsignaled_wr",
82 .data = &rds_iw_sysctl_max_unsig_wrs,
83 .maxlen = sizeof(unsigned long),
84 .mode = 0644,
85 .proc_handler = &proc_doulongvec_minmax,
86 .extra1 = &rds_iw_sysctl_max_unsig_wr_min,
87 .extra2 = &rds_iw_sysctl_max_unsig_wr_max,
88 },
89 {
90 .ctl_name = CTL_UNNUMBERED,
91 .procname = "max_unsignaled_bytes",
92 .data = &rds_iw_sysctl_max_unsig_bytes,
93 .maxlen = sizeof(unsigned long),
94 .mode = 0644,
95 .proc_handler = &proc_doulongvec_minmax,
96 .extra1 = &rds_iw_sysctl_max_unsig_bytes_min,
97 .extra2 = &rds_iw_sysctl_max_unsig_bytes_max,
98 },
99 {
100 .ctl_name = CTL_UNNUMBERED,
101 .procname = "max_recv_allocation",
102 .data = &rds_iw_sysctl_max_recv_allocation,
103 .maxlen = sizeof(unsigned long),
104 .mode = 0644,
105 .proc_handler = &proc_doulongvec_minmax,
106 },
107 {
108 .ctl_name = CTL_UNNUMBERED,
109 .procname = "flow_control",
110 .data = &rds_iw_sysctl_flow_control,
111 .maxlen = sizeof(rds_iw_sysctl_flow_control),
112 .mode = 0644,
113 .proc_handler = &proc_dointvec,
114 },
115 { .ctl_name = 0}
116};
117
118static struct ctl_path rds_iw_sysctl_path[] = {
119 { .procname = "net", .ctl_name = CTL_NET, },
120 { .procname = "rds", .ctl_name = CTL_UNNUMBERED, },
121 { .procname = "iw", .ctl_name = CTL_UNNUMBERED, },
122 { }
123};
124
125void rds_iw_sysctl_exit(void)
126{
127 if (rds_iw_sysctl_hdr)
128 unregister_sysctl_table(rds_iw_sysctl_hdr);
129}
130
131int __init rds_iw_sysctl_init(void)
132{
133 rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table);
134 if (rds_iw_sysctl_hdr == NULL)
135 return -ENOMEM;
136 return 0;
137}
diff --git a/net/rds/loop.c b/net/rds/loop.c
new file mode 100644
index 000000000000..4a61997f554d
--- /dev/null
+++ b/net/rds/loop.c
@@ -0,0 +1,188 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/in.h>
35
36#include "rds.h"
37#include "loop.h"
38
39static DEFINE_SPINLOCK(loop_conns_lock);
40static LIST_HEAD(loop_conns);
41
42/*
43 * This 'loopback' transport is a special case for flows that originate
44 * and terminate on the same machine.
45 *
46 * Connection build-up notices if the destination address is thought of
47 * as a local address by a transport. At that time it decides to use the
48 * loopback transport instead of the bound transport of the sending socket.
49 *
50 * The loopback transport's sending path just hands the sent rds_message
51 * straight to the receiving path via an embedded rds_incoming.
52 */
53
54/*
55 * Usually a message transits both the sender and receiver's conns as it
56 * flows to the receiver. In the loopback case, though, the receive path
57 * is handed the sending conn so the sense of the addresses is reversed.
58 */
59static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
60 unsigned int hdr_off, unsigned int sg,
61 unsigned int off)
62{
63 BUG_ON(hdr_off || sg || off);
64
65 rds_inc_init(&rm->m_inc, conn, conn->c_laddr);
66 rds_message_addref(rm); /* for the inc */
67
68 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc,
69 GFP_KERNEL, KM_USER0);
70
71 rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence),
72 NULL);
73
74 rds_inc_put(&rm->m_inc);
75
76 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len);
77}
78
79static int rds_loop_xmit_cong_map(struct rds_connection *conn,
80 struct rds_cong_map *map,
81 unsigned long offset)
82{
83 unsigned long i;
84
85 BUG_ON(offset);
86 BUG_ON(map != conn->c_lcong);
87
88 for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
89 memcpy((void *)conn->c_fcong->m_page_addrs[i],
90 (void *)map->m_page_addrs[i], PAGE_SIZE);
91 }
92
93 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
94
95 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
96}
97
98/* we need to at least give the thread something to succeed */
99static int rds_loop_recv(struct rds_connection *conn)
100{
101 return 0;
102}
103
104struct rds_loop_connection {
105 struct list_head loop_node;
106 struct rds_connection *conn;
107};
108
109/*
110 * Even the loopback transport needs to keep track of its connections,
111 * so it can call rds_conn_destroy() on them on exit. N.B. there are
112 * 1+ loopback addresses (127.*.*.*) so it's not a bug to have
113 * multiple loopback conns allocated, although rather useless.
114 */
115static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
116{
117 struct rds_loop_connection *lc;
118 unsigned long flags;
119
120 lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL);
121 if (lc == NULL)
122 return -ENOMEM;
123
124 INIT_LIST_HEAD(&lc->loop_node);
125 lc->conn = conn;
126 conn->c_transport_data = lc;
127
128 spin_lock_irqsave(&loop_conns_lock, flags);
129 list_add_tail(&lc->loop_node, &loop_conns);
130 spin_unlock_irqrestore(&loop_conns_lock, flags);
131
132 return 0;
133}
134
135static void rds_loop_conn_free(void *arg)
136{
137 struct rds_loop_connection *lc = arg;
138 rdsdebug("lc %p\n", lc);
139 list_del(&lc->loop_node);
140 kfree(lc);
141}
142
143static int rds_loop_conn_connect(struct rds_connection *conn)
144{
145 rds_connect_complete(conn);
146 return 0;
147}
148
149static void rds_loop_conn_shutdown(struct rds_connection *conn)
150{
151}
152
153void rds_loop_exit(void)
154{
155 struct rds_loop_connection *lc, *_lc;
156 LIST_HEAD(tmp_list);
157
158 /* avoid calling conn_destroy with irqs off */
159 spin_lock_irq(&loop_conns_lock);
160 list_splice(&loop_conns, &tmp_list);
161 INIT_LIST_HEAD(&loop_conns);
162 spin_unlock_irq(&loop_conns_lock);
163
164 list_for_each_entry_safe(lc, _lc, &tmp_list, loop_node) {
165 WARN_ON(lc->conn->c_passive);
166 rds_conn_destroy(lc->conn);
167 }
168}
169
170/*
171 * This is missing .xmit_* because loop doesn't go through generic
172 * rds_send_xmit() and doesn't call rds_recv_incoming(). .listen_stop and
173 * .laddr_check are missing because transport.c doesn't iterate over
174 * rds_loop_transport.
175 */
176struct rds_transport rds_loop_transport = {
177 .xmit = rds_loop_xmit,
178 .xmit_cong_map = rds_loop_xmit_cong_map,
179 .recv = rds_loop_recv,
180 .conn_alloc = rds_loop_conn_alloc,
181 .conn_free = rds_loop_conn_free,
182 .conn_connect = rds_loop_conn_connect,
183 .conn_shutdown = rds_loop_conn_shutdown,
184 .inc_copy_to_user = rds_message_inc_copy_to_user,
185 .inc_purge = rds_message_inc_purge,
186 .inc_free = rds_message_inc_free,
187 .t_name = "loopback",
188};
diff --git a/net/rds/loop.h b/net/rds/loop.h
new file mode 100644
index 000000000000..f32b0939a04d
--- /dev/null
+++ b/net/rds/loop.h
@@ -0,0 +1,9 @@
1#ifndef _RDS_LOOP_H
2#define _RDS_LOOP_H
3
4/* loop.c */
5extern struct rds_transport rds_loop_transport;
6
7void rds_loop_exit(void);
8
9#endif
diff --git a/net/rds/message.c b/net/rds/message.c
new file mode 100644
index 000000000000..5a15dc8d0cd7
--- /dev/null
+++ b/net/rds/message.c
@@ -0,0 +1,402 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34
35#include "rds.h"
36#include "rdma.h"
37
38static DECLARE_WAIT_QUEUE_HEAD(rds_message_flush_waitq);
39
40static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
41[RDS_EXTHDR_NONE] = 0,
42[RDS_EXTHDR_VERSION] = sizeof(struct rds_ext_header_version),
43[RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma),
44[RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest),
45};
46
47
48void rds_message_addref(struct rds_message *rm)
49{
50 rdsdebug("addref rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
51 atomic_inc(&rm->m_refcount);
52}
53
54/*
55 * This relies on dma_map_sg() not touching sg[].page during merging.
56 */
57static void rds_message_purge(struct rds_message *rm)
58{
59 unsigned long i;
60
61 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
62 return;
63
64 for (i = 0; i < rm->m_nents; i++) {
65 rdsdebug("putting data page %p\n", (void *)sg_page(&rm->m_sg[i]));
66 /* XXX will have to put_page for page refs */
67 __free_page(sg_page(&rm->m_sg[i]));
68 }
69 rm->m_nents = 0;
70
71 if (rm->m_rdma_op)
72 rds_rdma_free_op(rm->m_rdma_op);
73 if (rm->m_rdma_mr)
74 rds_mr_put(rm->m_rdma_mr);
75}
76
77void rds_message_inc_purge(struct rds_incoming *inc)
78{
79 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
80 rds_message_purge(rm);
81}
82
83void rds_message_put(struct rds_message *rm)
84{
85 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
86
87 if (atomic_dec_and_test(&rm->m_refcount)) {
88 BUG_ON(!list_empty(&rm->m_sock_item));
89 BUG_ON(!list_empty(&rm->m_conn_item));
90 rds_message_purge(rm);
91
92 kfree(rm);
93 }
94}
95
96void rds_message_inc_free(struct rds_incoming *inc)
97{
98 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
99 rds_message_put(rm);
100}
101
102void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
103 __be16 dport, u64 seq)
104{
105 hdr->h_flags = 0;
106 hdr->h_sport = sport;
107 hdr->h_dport = dport;
108 hdr->h_sequence = cpu_to_be64(seq);
109 hdr->h_exthdr[0] = RDS_EXTHDR_NONE;
110}
111
112int rds_message_add_extension(struct rds_header *hdr,
113 unsigned int type, const void *data, unsigned int len)
114{
115 unsigned int ext_len = sizeof(u8) + len;
116 unsigned char *dst;
117
118 /* For now, refuse to add more than one extension header */
119 if (hdr->h_exthdr[0] != RDS_EXTHDR_NONE)
120 return 0;
121
122 if (type >= __RDS_EXTHDR_MAX
123 || len != rds_exthdr_size[type])
124 return 0;
125
126 if (ext_len >= RDS_HEADER_EXT_SPACE)
127 return 0;
128 dst = hdr->h_exthdr;
129
130 *dst++ = type;
131 memcpy(dst, data, len);
132
133 dst[len] = RDS_EXTHDR_NONE;
134 return 1;
135}
136
137/*
138 * If a message has extension headers, retrieve them here.
139 * Call like this:
140 *
141 * unsigned int pos = 0;
142 *
143 * while (1) {
144 * buflen = sizeof(buffer);
145 * type = rds_message_next_extension(hdr, &pos, buffer, &buflen);
146 * if (type == RDS_EXTHDR_NONE)
147 * break;
148 * ...
149 * }
150 */
151int rds_message_next_extension(struct rds_header *hdr,
152 unsigned int *pos, void *buf, unsigned int *buflen)
153{
154 unsigned int offset, ext_type, ext_len;
155 u8 *src = hdr->h_exthdr;
156
157 offset = *pos;
158 if (offset >= RDS_HEADER_EXT_SPACE)
159 goto none;
160
161 /* Get the extension type and length. For now, the
162 * length is implied by the extension type. */
163 ext_type = src[offset++];
164
165 if (ext_type == RDS_EXTHDR_NONE || ext_type >= __RDS_EXTHDR_MAX)
166 goto none;
167 ext_len = rds_exthdr_size[ext_type];
168 if (offset + ext_len > RDS_HEADER_EXT_SPACE)
169 goto none;
170
171 *pos = offset + ext_len;
172 if (ext_len < *buflen)
173 *buflen = ext_len;
174 memcpy(buf, src + offset, *buflen);
175 return ext_type;
176
177none:
178 *pos = RDS_HEADER_EXT_SPACE;
179 *buflen = 0;
180 return RDS_EXTHDR_NONE;
181}
182
183int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version)
184{
185 struct rds_ext_header_version ext_hdr;
186
187 ext_hdr.h_version = cpu_to_be32(version);
188 return rds_message_add_extension(hdr, RDS_EXTHDR_VERSION, &ext_hdr, sizeof(ext_hdr));
189}
190
191int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version)
192{
193 struct rds_ext_header_version ext_hdr;
194 unsigned int pos = 0, len = sizeof(ext_hdr);
195
196 /* We assume the version extension is the only one present */
197 if (rds_message_next_extension(hdr, &pos, &ext_hdr, &len) != RDS_EXTHDR_VERSION)
198 return 0;
199 *version = be32_to_cpu(ext_hdr.h_version);
200 return 1;
201}
202
203int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset)
204{
205 struct rds_ext_header_rdma_dest ext_hdr;
206
207 ext_hdr.h_rdma_rkey = cpu_to_be32(r_key);
208 ext_hdr.h_rdma_offset = cpu_to_be32(offset);
209 return rds_message_add_extension(hdr, RDS_EXTHDR_RDMA_DEST, &ext_hdr, sizeof(ext_hdr));
210}
211
212struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp)
213{
214 struct rds_message *rm;
215
216 rm = kzalloc(sizeof(struct rds_message) +
217 (nents * sizeof(struct scatterlist)), gfp);
218 if (!rm)
219 goto out;
220
221 if (nents)
222 sg_init_table(rm->m_sg, nents);
223 atomic_set(&rm->m_refcount, 1);
224 INIT_LIST_HEAD(&rm->m_sock_item);
225 INIT_LIST_HEAD(&rm->m_conn_item);
226 spin_lock_init(&rm->m_rs_lock);
227
228out:
229 return rm;
230}
231
232struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
233{
234 struct rds_message *rm;
235 unsigned int i;
236
237 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL);
238 if (rm == NULL)
239 return ERR_PTR(-ENOMEM);
240
241 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
242 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
243 rm->m_nents = ceil(total_len, PAGE_SIZE);
244
245 for (i = 0; i < rm->m_nents; ++i) {
246 sg_set_page(&rm->m_sg[i],
247 virt_to_page(page_addrs[i]),
248 PAGE_SIZE, 0);
249 }
250
251 return rm;
252}
253
254struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
255 size_t total_len)
256{
257 unsigned long to_copy;
258 unsigned long iov_off;
259 unsigned long sg_off;
260 struct rds_message *rm;
261 struct iovec *iov;
262 struct scatterlist *sg;
263 int ret;
264
265 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL);
266 if (rm == NULL) {
267 ret = -ENOMEM;
268 goto out;
269 }
270
271 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
272
273 /*
274 * now allocate and copy in the data payload.
275 */
276 sg = rm->m_sg;
277 iov = first_iov;
278 iov_off = 0;
279 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
280
281 while (total_len) {
282 if (sg_page(sg) == NULL) {
283 ret = rds_page_remainder_alloc(sg, total_len,
284 GFP_HIGHUSER);
285 if (ret)
286 goto out;
287 rm->m_nents++;
288 sg_off = 0;
289 }
290
291 while (iov_off == iov->iov_len) {
292 iov_off = 0;
293 iov++;
294 }
295
296 to_copy = min(iov->iov_len - iov_off, sg->length - sg_off);
297 to_copy = min_t(size_t, to_copy, total_len);
298
299 rdsdebug("copying %lu bytes from user iov [%p, %zu] + %lu to "
300 "sg [%p, %u, %u] + %lu\n",
301 to_copy, iov->iov_base, iov->iov_len, iov_off,
302 (void *)sg_page(sg), sg->offset, sg->length, sg_off);
303
304 ret = rds_page_copy_from_user(sg_page(sg), sg->offset + sg_off,
305 iov->iov_base + iov_off,
306 to_copy);
307 if (ret)
308 goto out;
309
310 iov_off += to_copy;
311 total_len -= to_copy;
312 sg_off += to_copy;
313
314 if (sg_off == sg->length)
315 sg++;
316 }
317
318 ret = 0;
319out:
320 if (ret) {
321 if (rm)
322 rds_message_put(rm);
323 rm = ERR_PTR(ret);
324 }
325 return rm;
326}
327
328int rds_message_inc_copy_to_user(struct rds_incoming *inc,
329 struct iovec *first_iov, size_t size)
330{
331 struct rds_message *rm;
332 struct iovec *iov;
333 struct scatterlist *sg;
334 unsigned long to_copy;
335 unsigned long iov_off;
336 unsigned long vec_off;
337 int copied;
338 int ret;
339 u32 len;
340
341 rm = container_of(inc, struct rds_message, m_inc);
342 len = be32_to_cpu(rm->m_inc.i_hdr.h_len);
343
344 iov = first_iov;
345 iov_off = 0;
346 sg = rm->m_sg;
347 vec_off = 0;
348 copied = 0;
349
350 while (copied < size && copied < len) {
351 while (iov_off == iov->iov_len) {
352 iov_off = 0;
353 iov++;
354 }
355
356 to_copy = min(iov->iov_len - iov_off, sg->length - vec_off);
357 to_copy = min_t(size_t, to_copy, size - copied);
358 to_copy = min_t(unsigned long, to_copy, len - copied);
359
360 rdsdebug("copying %lu bytes to user iov [%p, %zu] + %lu to "
361 "sg [%p, %u, %u] + %lu\n",
362 to_copy, iov->iov_base, iov->iov_len, iov_off,
363 sg_page(sg), sg->offset, sg->length, vec_off);
364
365 ret = rds_page_copy_to_user(sg_page(sg), sg->offset + vec_off,
366 iov->iov_base + iov_off,
367 to_copy);
368 if (ret) {
369 copied = ret;
370 break;
371 }
372
373 iov_off += to_copy;
374 vec_off += to_copy;
375 copied += to_copy;
376
377 if (vec_off == sg->length) {
378 vec_off = 0;
379 sg++;
380 }
381 }
382
383 return copied;
384}
385
386/*
387 * If the message is still on the send queue, wait until the transport
388 * is done with it. This is particularly important for RDMA operations.
389 */
390void rds_message_wait(struct rds_message *rm)
391{
392 wait_event(rds_message_flush_waitq,
393 !test_bit(RDS_MSG_MAPPED, &rm->m_flags));
394}
395
396void rds_message_unmapped(struct rds_message *rm)
397{
398 clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
399 if (waitqueue_active(&rds_message_flush_waitq))
400 wake_up(&rds_message_flush_waitq);
401}
402
diff --git a/net/rds/page.c b/net/rds/page.c
new file mode 100644
index 000000000000..c460743a89ad
--- /dev/null
+++ b/net/rds/page.c
@@ -0,0 +1,221 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/highmem.h>
34
35#include "rds.h"
36
37struct rds_page_remainder {
38 struct page *r_page;
39 unsigned long r_offset;
40};
41
42DEFINE_PER_CPU(struct rds_page_remainder, rds_page_remainders) ____cacheline_aligned;
43
44/*
45 * returns 0 on success or -errno on failure.
46 *
47 * We don't have to worry about flush_dcache_page() as this only works
48 * with private pages. If, say, we were to do directed receive to pinned
49 * user pages we'd have to worry more about cache coherence. (Though
50 * the flush_dcache_page() in get_user_pages() would probably be enough).
51 */
52int rds_page_copy_user(struct page *page, unsigned long offset,
53 void __user *ptr, unsigned long bytes,
54 int to_user)
55{
56 unsigned long ret;
57 void *addr;
58
59 if (to_user)
60 rds_stats_add(s_copy_to_user, bytes);
61 else
62 rds_stats_add(s_copy_from_user, bytes);
63
64 addr = kmap_atomic(page, KM_USER0);
65 if (to_user)
66 ret = __copy_to_user_inatomic(ptr, addr + offset, bytes);
67 else
68 ret = __copy_from_user_inatomic(addr + offset, ptr, bytes);
69 kunmap_atomic(addr, KM_USER0);
70
71 if (ret) {
72 addr = kmap(page);
73 if (to_user)
74 ret = copy_to_user(ptr, addr + offset, bytes);
75 else
76 ret = copy_from_user(addr + offset, ptr, bytes);
77 kunmap(page);
78 if (ret)
79 return -EFAULT;
80 }
81
82 return 0;
83}
84
85/*
86 * Message allocation uses this to build up regions of a message.
87 *
88 * @bytes - the number of bytes needed.
89 * @gfp - the waiting behaviour of the allocation
90 *
91 * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to
92 * kmap the pages, etc.
93 *
94 * If @bytes is at least a full page then this just returns a page from
95 * alloc_page().
96 *
97 * If @bytes is a partial page then this stores the unused region of the
98 * page in a per-cpu structure. Future partial-page allocations may be
99 * satisfied from that cached region. This lets us waste less memory on
100 * small allocations with minimal complexity. It works because the transmit
101 * path passes read-only page regions down to devices. They hold a page
102 * reference until they are done with the region.
103 */
104int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
105 gfp_t gfp)
106{
107 struct rds_page_remainder *rem;
108 unsigned long flags;
109 struct page *page;
110 int ret;
111
112 gfp |= __GFP_HIGHMEM;
113
114 /* jump straight to allocation if we're trying for a huge page */
115 if (bytes >= PAGE_SIZE) {
116 page = alloc_page(gfp);
117 if (page == NULL) {
118 ret = -ENOMEM;
119 } else {
120 sg_set_page(scat, page, PAGE_SIZE, 0);
121 ret = 0;
122 }
123 goto out;
124 }
125
126 rem = &per_cpu(rds_page_remainders, get_cpu());
127 local_irq_save(flags);
128
129 while (1) {
130 /* avoid a tiny region getting stuck by tossing it */
131 if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) {
132 rds_stats_inc(s_page_remainder_miss);
133 __free_page(rem->r_page);
134 rem->r_page = NULL;
135 }
136
137 /* hand out a fragment from the cached page */
138 if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) {
139 sg_set_page(scat, rem->r_page, bytes, rem->r_offset);
140 get_page(sg_page(scat));
141
142 if (rem->r_offset != 0)
143 rds_stats_inc(s_page_remainder_hit);
144
145 rem->r_offset += bytes;
146 if (rem->r_offset == PAGE_SIZE) {
147 __free_page(rem->r_page);
148 rem->r_page = NULL;
149 }
150 ret = 0;
151 break;
152 }
153
154 /* alloc if there is nothing for us to use */
155 local_irq_restore(flags);
156 put_cpu();
157
158 page = alloc_page(gfp);
159
160 rem = &per_cpu(rds_page_remainders, get_cpu());
161 local_irq_save(flags);
162
163 if (page == NULL) {
164 ret = -ENOMEM;
165 break;
166 }
167
168 /* did someone race to fill the remainder before us? */
169 if (rem->r_page) {
170 __free_page(page);
171 continue;
172 }
173
174 /* otherwise install our page and loop around to alloc */
175 rem->r_page = page;
176 rem->r_offset = 0;
177 }
178
179 local_irq_restore(flags);
180 put_cpu();
181out:
182 rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret,
183 ret ? NULL : sg_page(scat), ret ? 0 : scat->offset,
184 ret ? 0 : scat->length);
185 return ret;
186}
187
188static int rds_page_remainder_cpu_notify(struct notifier_block *self,
189 unsigned long action, void *hcpu)
190{
191 struct rds_page_remainder *rem;
192 long cpu = (long)hcpu;
193
194 rem = &per_cpu(rds_page_remainders, cpu);
195
196 rdsdebug("cpu %ld action 0x%lx\n", cpu, action);
197
198 switch (action) {
199 case CPU_DEAD:
200 if (rem->r_page)
201 __free_page(rem->r_page);
202 rem->r_page = NULL;
203 break;
204 }
205
206 return 0;
207}
208
209static struct notifier_block rds_page_remainder_nb = {
210 .notifier_call = rds_page_remainder_cpu_notify,
211};
212
213void rds_page_exit(void)
214{
215 int i;
216
217 for_each_possible_cpu(i)
218 rds_page_remainder_cpu_notify(&rds_page_remainder_nb,
219 (unsigned long)CPU_DEAD,
220 (void *)(long)i);
221}
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
new file mode 100644
index 000000000000..eaeeb91e1119
--- /dev/null
+++ b/net/rds/rdma.c
@@ -0,0 +1,679 @@
1/*
2 * Copyright (c) 2007 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/pagemap.h>
34#include <linux/rbtree.h>
35#include <linux/dma-mapping.h> /* for DMA_*_DEVICE */
36
37#include "rdma.h"
38
39/*
40 * XXX
41 * - build with sparse
42 * - should we limit the size of a mr region? let transport return failure?
43 * - should we detect duplicate keys on a socket? hmm.
44 * - an rdma is an mlock, apply rlimit?
45 */
46
47/*
48 * get the number of pages by looking at the page indices that the start and
49 * end addresses fall in.
50 *
51 * Returns 0 if the vec is invalid. It is invalid if the number of bytes
52 * causes the address to wrap or overflows an unsigned int. This comes
53 * from being stored in the 'length' member of 'struct scatterlist'.
54 */
55static unsigned int rds_pages_in_vec(struct rds_iovec *vec)
56{
57 if ((vec->addr + vec->bytes <= vec->addr) ||
58 (vec->bytes > (u64)UINT_MAX))
59 return 0;
60
61 return ((vec->addr + vec->bytes + PAGE_SIZE - 1) >> PAGE_SHIFT) -
62 (vec->addr >> PAGE_SHIFT);
63}
64
65static struct rds_mr *rds_mr_tree_walk(struct rb_root *root, u64 key,
66 struct rds_mr *insert)
67{
68 struct rb_node **p = &root->rb_node;
69 struct rb_node *parent = NULL;
70 struct rds_mr *mr;
71
72 while (*p) {
73 parent = *p;
74 mr = rb_entry(parent, struct rds_mr, r_rb_node);
75
76 if (key < mr->r_key)
77 p = &(*p)->rb_left;
78 else if (key > mr->r_key)
79 p = &(*p)->rb_right;
80 else
81 return mr;
82 }
83
84 if (insert) {
85 rb_link_node(&insert->r_rb_node, parent, p);
86 rb_insert_color(&insert->r_rb_node, root);
87 atomic_inc(&insert->r_refcount);
88 }
89 return NULL;
90}
91
92/*
93 * Destroy the transport-specific part of a MR.
94 */
95static void rds_destroy_mr(struct rds_mr *mr)
96{
97 struct rds_sock *rs = mr->r_sock;
98 void *trans_private = NULL;
99 unsigned long flags;
100
101 rdsdebug("RDS: destroy mr key is %x refcnt %u\n",
102 mr->r_key, atomic_read(&mr->r_refcount));
103
104 if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state))
105 return;
106
107 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
108 if (!RB_EMPTY_NODE(&mr->r_rb_node))
109 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
110 trans_private = mr->r_trans_private;
111 mr->r_trans_private = NULL;
112 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
113
114 if (trans_private)
115 mr->r_trans->free_mr(trans_private, mr->r_invalidate);
116}
117
118void __rds_put_mr_final(struct rds_mr *mr)
119{
120 rds_destroy_mr(mr);
121 kfree(mr);
122}
123
124/*
125 * By the time this is called we can't have any more ioctls called on
126 * the socket so we don't need to worry about racing with others.
127 */
128void rds_rdma_drop_keys(struct rds_sock *rs)
129{
130 struct rds_mr *mr;
131 struct rb_node *node;
132
133 /* Release any MRs associated with this socket */
134 while ((node = rb_first(&rs->rs_rdma_keys))) {
135 mr = container_of(node, struct rds_mr, r_rb_node);
136 if (mr->r_trans == rs->rs_transport)
137 mr->r_invalidate = 0;
138 rds_mr_put(mr);
139 }
140
141 if (rs->rs_transport && rs->rs_transport->flush_mrs)
142 rs->rs_transport->flush_mrs();
143}
144
145/*
146 * Helper function to pin user pages.
147 */
148static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages,
149 struct page **pages, int write)
150{
151 int ret;
152
153 down_read(&current->mm->mmap_sem);
154 ret = get_user_pages(current, current->mm, user_addr,
155 nr_pages, write, 0, pages, NULL);
156 up_read(&current->mm->mmap_sem);
157
158 if (0 <= ret && (unsigned) ret < nr_pages) {
159 while (ret--)
160 put_page(pages[ret]);
161 ret = -EFAULT;
162 }
163
164 return ret;
165}
166
167static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
168 u64 *cookie_ret, struct rds_mr **mr_ret)
169{
170 struct rds_mr *mr = NULL, *found;
171 unsigned int nr_pages;
172 struct page **pages = NULL;
173 struct scatterlist *sg;
174 void *trans_private;
175 unsigned long flags;
176 rds_rdma_cookie_t cookie;
177 unsigned int nents;
178 long i;
179 int ret;
180
181 if (rs->rs_bound_addr == 0) {
182 ret = -ENOTCONN; /* XXX not a great errno */
183 goto out;
184 }
185
186 if (rs->rs_transport->get_mr == NULL) {
187 ret = -EOPNOTSUPP;
188 goto out;
189 }
190
191 nr_pages = rds_pages_in_vec(&args->vec);
192 if (nr_pages == 0) {
193 ret = -EINVAL;
194 goto out;
195 }
196
197 rdsdebug("RDS: get_mr addr %llx len %llu nr_pages %u\n",
198 args->vec.addr, args->vec.bytes, nr_pages);
199
200 /* XXX clamp nr_pages to limit the size of this alloc? */
201 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
202 if (pages == NULL) {
203 ret = -ENOMEM;
204 goto out;
205 }
206
207 mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL);
208 if (mr == NULL) {
209 ret = -ENOMEM;
210 goto out;
211 }
212
213 atomic_set(&mr->r_refcount, 1);
214 RB_CLEAR_NODE(&mr->r_rb_node);
215 mr->r_trans = rs->rs_transport;
216 mr->r_sock = rs;
217
218 if (args->flags & RDS_RDMA_USE_ONCE)
219 mr->r_use_once = 1;
220 if (args->flags & RDS_RDMA_INVALIDATE)
221 mr->r_invalidate = 1;
222 if (args->flags & RDS_RDMA_READWRITE)
223 mr->r_write = 1;
224
225 /*
226 * Pin the pages that make up the user buffer and transfer the page
227 * pointers to the mr's sg array. We check to see if we've mapped
228 * the whole region after transferring the partial page references
229 * to the sg array so that we can have one page ref cleanup path.
230 *
231 * For now we have no flag that tells us whether the mapping is
232 * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to
233 * the zero page.
234 */
235 ret = rds_pin_pages(args->vec.addr & PAGE_MASK, nr_pages, pages, 1);
236 if (ret < 0)
237 goto out;
238
239 nents = ret;
240 sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL);
241 if (sg == NULL) {
242 ret = -ENOMEM;
243 goto out;
244 }
245 WARN_ON(!nents);
246 sg_init_table(sg, nents);
247
248 /* Stick all pages into the scatterlist */
249 for (i = 0 ; i < nents; i++)
250 sg_set_page(&sg[i], pages[i], PAGE_SIZE, 0);
251
252 rdsdebug("RDS: trans_private nents is %u\n", nents);
253
254 /* Obtain a transport specific MR. If this succeeds, the
255 * s/g list is now owned by the MR.
256 * Note that dma_map() implies that pending writes are
257 * flushed to RAM, so no dma_sync is needed here. */
258 trans_private = rs->rs_transport->get_mr(sg, nents, rs,
259 &mr->r_key);
260
261 if (IS_ERR(trans_private)) {
262 for (i = 0 ; i < nents; i++)
263 put_page(sg_page(&sg[i]));
264 kfree(sg);
265 ret = PTR_ERR(trans_private);
266 goto out;
267 }
268
269 mr->r_trans_private = trans_private;
270
271 rdsdebug("RDS: get_mr put_user key is %x cookie_addr %p\n",
272 mr->r_key, (void *)(unsigned long) args->cookie_addr);
273
274 /* The user may pass us an unaligned address, but we can only
275 * map page aligned regions. So we keep the offset, and build
276 * a 64bit cookie containing <R_Key, offset> and pass that
277 * around. */
278 cookie = rds_rdma_make_cookie(mr->r_key, args->vec.addr & ~PAGE_MASK);
279 if (cookie_ret)
280 *cookie_ret = cookie;
281
282 if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long) args->cookie_addr)) {
283 ret = -EFAULT;
284 goto out;
285 }
286
287 /* Inserting the new MR into the rbtree bumps its
288 * reference count. */
289 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
290 found = rds_mr_tree_walk(&rs->rs_rdma_keys, mr->r_key, mr);
291 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
292
293 BUG_ON(found && found != mr);
294
295 rdsdebug("RDS: get_mr key is %x\n", mr->r_key);
296 if (mr_ret) {
297 atomic_inc(&mr->r_refcount);
298 *mr_ret = mr;
299 }
300
301 ret = 0;
302out:
303 kfree(pages);
304 if (mr)
305 rds_mr_put(mr);
306 return ret;
307}
308
309int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen)
310{
311 struct rds_get_mr_args args;
312
313 if (optlen != sizeof(struct rds_get_mr_args))
314 return -EINVAL;
315
316 if (copy_from_user(&args, (struct rds_get_mr_args __user *)optval,
317 sizeof(struct rds_get_mr_args)))
318 return -EFAULT;
319
320 return __rds_rdma_map(rs, &args, NULL, NULL);
321}
322
323/*
324 * Free the MR indicated by the given R_Key
325 */
326int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen)
327{
328 struct rds_free_mr_args args;
329 struct rds_mr *mr;
330 unsigned long flags;
331
332 if (optlen != sizeof(struct rds_free_mr_args))
333 return -EINVAL;
334
335 if (copy_from_user(&args, (struct rds_free_mr_args __user *)optval,
336 sizeof(struct rds_free_mr_args)))
337 return -EFAULT;
338
339 /* Special case - a null cookie means flush all unused MRs */
340 if (args.cookie == 0) {
341 if (!rs->rs_transport || !rs->rs_transport->flush_mrs)
342 return -EINVAL;
343 rs->rs_transport->flush_mrs();
344 return 0;
345 }
346
347 /* Look up the MR given its R_key and remove it from the rbtree
348 * so nobody else finds it.
349 * This should also prevent races with rds_rdma_unuse.
350 */
351 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
352 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, rds_rdma_cookie_key(args.cookie), NULL);
353 if (mr) {
354 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
355 RB_CLEAR_NODE(&mr->r_rb_node);
356 if (args.flags & RDS_RDMA_INVALIDATE)
357 mr->r_invalidate = 1;
358 }
359 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
360
361 if (!mr)
362 return -EINVAL;
363
364 /*
365 * call rds_destroy_mr() ourselves so that we're sure it's done by the time
366 * we return. If we let rds_mr_put() do it it might not happen until
367 * someone else drops their ref.
368 */
369 rds_destroy_mr(mr);
370 rds_mr_put(mr);
371 return 0;
372}
373
374/*
375 * This is called when we receive an extension header that
376 * tells us this MR was used. It allows us to implement
377 * use_once semantics
378 */
379void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
380{
381 struct rds_mr *mr;
382 unsigned long flags;
383 int zot_me = 0;
384
385 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
386 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
387 if (mr && (mr->r_use_once || force)) {
388 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
389 RB_CLEAR_NODE(&mr->r_rb_node);
390 zot_me = 1;
391 } else if (mr)
392 atomic_inc(&mr->r_refcount);
393 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
394
395 /* May have to issue a dma_sync on this memory region.
396 * Note we could avoid this if the operation was a RDMA READ,
397 * but at this point we can't tell. */
398 if (mr != NULL) {
399 if (mr->r_trans->sync_mr)
400 mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE);
401
402 /* If the MR was marked as invalidate, this will
403 * trigger an async flush. */
404 if (zot_me)
405 rds_destroy_mr(mr);
406 rds_mr_put(mr);
407 }
408}
409
410void rds_rdma_free_op(struct rds_rdma_op *ro)
411{
412 unsigned int i;
413
414 for (i = 0; i < ro->r_nents; i++) {
415 struct page *page = sg_page(&ro->r_sg[i]);
416
417 /* Mark page dirty if it was possibly modified, which
418 * is the case for a RDMA_READ which copies from remote
419 * to local memory */
420 if (!ro->r_write)
421 set_page_dirty(page);
422 put_page(page);
423 }
424
425 kfree(ro->r_notifier);
426 kfree(ro);
427}
428
429/*
430 * args is a pointer to an in-kernel copy in the sendmsg cmsg.
431 */
432static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
433 struct rds_rdma_args *args)
434{
435 struct rds_iovec vec;
436 struct rds_rdma_op *op = NULL;
437 unsigned int nr_pages;
438 unsigned int max_pages;
439 unsigned int nr_bytes;
440 struct page **pages = NULL;
441 struct rds_iovec __user *local_vec;
442 struct scatterlist *sg;
443 unsigned int nr;
444 unsigned int i, j;
445 int ret;
446
447
448 if (rs->rs_bound_addr == 0) {
449 ret = -ENOTCONN; /* XXX not a great errno */
450 goto out;
451 }
452
453 if (args->nr_local > (u64)UINT_MAX) {
454 ret = -EMSGSIZE;
455 goto out;
456 }
457
458 nr_pages = 0;
459 max_pages = 0;
460
461 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
462
463 /* figure out the number of pages in the vector */
464 for (i = 0; i < args->nr_local; i++) {
465 if (copy_from_user(&vec, &local_vec[i],
466 sizeof(struct rds_iovec))) {
467 ret = -EFAULT;
468 goto out;
469 }
470
471 nr = rds_pages_in_vec(&vec);
472 if (nr == 0) {
473 ret = -EINVAL;
474 goto out;
475 }
476
477 max_pages = max(nr, max_pages);
478 nr_pages += nr;
479 }
480
481 pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL);
482 if (pages == NULL) {
483 ret = -ENOMEM;
484 goto out;
485 }
486
487 op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL);
488 if (op == NULL) {
489 ret = -ENOMEM;
490 goto out;
491 }
492
493 op->r_write = !!(args->flags & RDS_RDMA_READWRITE);
494 op->r_fence = !!(args->flags & RDS_RDMA_FENCE);
495 op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
496 op->r_recverr = rs->rs_recverr;
497 WARN_ON(!nr_pages);
498 sg_init_table(op->r_sg, nr_pages);
499
500 if (op->r_notify || op->r_recverr) {
501 /* We allocate an uninitialized notifier here, because
502 * we don't want to do that in the completion handler. We
503 * would have to use GFP_ATOMIC there, and don't want to deal
504 * with failed allocations.
505 */
506 op->r_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL);
507 if (!op->r_notifier) {
508 ret = -ENOMEM;
509 goto out;
510 }
511 op->r_notifier->n_user_token = args->user_token;
512 op->r_notifier->n_status = RDS_RDMA_SUCCESS;
513 }
514
515 /* The cookie contains the R_Key of the remote memory region, and
516 * optionally an offset into it. This is how we implement RDMA into
517 * unaligned memory.
518 * When setting up the RDMA, we need to add that offset to the
519 * destination address (which is really an offset into the MR)
520 * FIXME: We may want to move this into ib_rdma.c
521 */
522 op->r_key = rds_rdma_cookie_key(args->cookie);
523 op->r_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie);
524
525 nr_bytes = 0;
526
527 rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n",
528 (unsigned long long)args->nr_local,
529 (unsigned long long)args->remote_vec.addr,
530 op->r_key);
531
532 for (i = 0; i < args->nr_local; i++) {
533 if (copy_from_user(&vec, &local_vec[i],
534 sizeof(struct rds_iovec))) {
535 ret = -EFAULT;
536 goto out;
537 }
538
539 nr = rds_pages_in_vec(&vec);
540 if (nr == 0) {
541 ret = -EINVAL;
542 goto out;
543 }
544
545 rs->rs_user_addr = vec.addr;
546 rs->rs_user_bytes = vec.bytes;
547
548 /* did the user change the vec under us? */
549 if (nr > max_pages || op->r_nents + nr > nr_pages) {
550 ret = -EINVAL;
551 goto out;
552 }
553 /* If it's a WRITE operation, we want to pin the pages for reading.
554 * If it's a READ operation, we need to pin the pages for writing.
555 */
556 ret = rds_pin_pages(vec.addr & PAGE_MASK, nr, pages, !op->r_write);
557 if (ret < 0)
558 goto out;
559
560 rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n",
561 nr_bytes, nr, vec.bytes, vec.addr);
562
563 nr_bytes += vec.bytes;
564
565 for (j = 0; j < nr; j++) {
566 unsigned int offset = vec.addr & ~PAGE_MASK;
567
568 sg = &op->r_sg[op->r_nents + j];
569 sg_set_page(sg, pages[j],
570 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset),
571 offset);
572
573 rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n",
574 sg->offset, sg->length, vec.addr, vec.bytes);
575
576 vec.addr += sg->length;
577 vec.bytes -= sg->length;
578 }
579
580 op->r_nents += nr;
581 }
582
583
584 if (nr_bytes > args->remote_vec.bytes) {
585 rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n",
586 nr_bytes,
587 (unsigned int) args->remote_vec.bytes);
588 ret = -EINVAL;
589 goto out;
590 }
591 op->r_bytes = nr_bytes;
592
593 ret = 0;
594out:
595 kfree(pages);
596 if (ret) {
597 if (op)
598 rds_rdma_free_op(op);
599 op = ERR_PTR(ret);
600 }
601 return op;
602}
603
604/*
605 * The application asks for a RDMA transfer.
606 * Extract all arguments and set up the rdma_op
607 */
608int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
609 struct cmsghdr *cmsg)
610{
611 struct rds_rdma_op *op;
612
613 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args))
614 || rm->m_rdma_op != NULL)
615 return -EINVAL;
616
617 op = rds_rdma_prepare(rs, CMSG_DATA(cmsg));
618 if (IS_ERR(op))
619 return PTR_ERR(op);
620 rds_stats_inc(s_send_rdma);
621 rm->m_rdma_op = op;
622 return 0;
623}
624
625/*
626 * The application wants us to pass an RDMA destination (aka MR)
627 * to the remote
628 */
629int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
630 struct cmsghdr *cmsg)
631{
632 unsigned long flags;
633 struct rds_mr *mr;
634 u32 r_key;
635 int err = 0;
636
637 if (cmsg->cmsg_len < CMSG_LEN(sizeof(rds_rdma_cookie_t))
638 || rm->m_rdma_cookie != 0)
639 return -EINVAL;
640
641 memcpy(&rm->m_rdma_cookie, CMSG_DATA(cmsg), sizeof(rm->m_rdma_cookie));
642
643 /* We are reusing a previously mapped MR here. Most likely, the
644 * application has written to the buffer, so we need to explicitly
645 * flush those writes to RAM. Otherwise the HCA may not see them
646 * when doing a DMA from that buffer.
647 */
648 r_key = rds_rdma_cookie_key(rm->m_rdma_cookie);
649
650 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
651 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
652 if (mr == NULL)
653 err = -EINVAL; /* invalid r_key */
654 else
655 atomic_inc(&mr->r_refcount);
656 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
657
658 if (mr) {
659 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
660 rm->m_rdma_mr = mr;
661 }
662 return err;
663}
664
665/*
666 * The application passes us an address range it wants to enable RDMA
667 * to/from. We map the area, and save the <R_Key,offset> pair
668 * in rm->m_rdma_cookie. This causes it to be sent along to the peer
669 * in an extension header.
670 */
671int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
672 struct cmsghdr *cmsg)
673{
674 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_get_mr_args))
675 || rm->m_rdma_cookie != 0)
676 return -EINVAL;
677
678 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr);
679}
diff --git a/net/rds/rdma.h b/net/rds/rdma.h
new file mode 100644
index 000000000000..425512098b0b
--- /dev/null
+++ b/net/rds/rdma.h
@@ -0,0 +1,84 @@
1#ifndef _RDS_RDMA_H
2#define _RDS_RDMA_H
3
4#include <linux/rbtree.h>
5#include <linux/spinlock.h>
6#include <linux/scatterlist.h>
7
8#include "rds.h"
9
10struct rds_mr {
11 struct rb_node r_rb_node;
12 atomic_t r_refcount;
13 u32 r_key;
14
15 /* A copy of the creation flags */
16 unsigned int r_use_once:1;
17 unsigned int r_invalidate:1;
18 unsigned int r_write:1;
19
20 /* This is for RDS_MR_DEAD.
21 * It would be nice & consistent to make this part of the above
22 * bit field here, but we need to use test_and_set_bit.
23 */
24 unsigned long r_state;
25 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
26 struct rds_transport *r_trans;
27 void *r_trans_private;
28};
29
30/* Flags for mr->r_state */
31#define RDS_MR_DEAD 0
32
33struct rds_rdma_op {
34 u32 r_key;
35 u64 r_remote_addr;
36 unsigned int r_write:1;
37 unsigned int r_fence:1;
38 unsigned int r_notify:1;
39 unsigned int r_recverr:1;
40 unsigned int r_mapped:1;
41 struct rds_notifier *r_notifier;
42 unsigned int r_bytes;
43 unsigned int r_nents;
44 unsigned int r_count;
45 struct scatterlist r_sg[0];
46};
47
48static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
49{
50 return r_key | (((u64) offset) << 32);
51}
52
53static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
54{
55 return cookie;
56}
57
58static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
59{
60 return cookie >> 32;
61}
62
63int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
64int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
65void rds_rdma_drop_keys(struct rds_sock *rs);
66int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
67 struct cmsghdr *cmsg);
68int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
69 struct cmsghdr *cmsg);
70int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
71 struct cmsghdr *cmsg);
72int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
73 struct cmsghdr *cmsg);
74void rds_rdma_free_op(struct rds_rdma_op *ro);
75void rds_rdma_send_complete(struct rds_message *rm, int);
76
77extern void __rds_put_mr_final(struct rds_mr *mr);
78static inline void rds_mr_put(struct rds_mr *mr)
79{
80 if (atomic_dec_and_test(&mr->r_refcount))
81 __rds_put_mr_final(mr);
82}
83
84#endif
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
new file mode 100644
index 000000000000..7b19024f9706
--- /dev/null
+++ b/net/rds/rdma_transport.c
@@ -0,0 +1,214 @@
1/*
2 * Copyright (c) 2009 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <rdma/rdma_cm.h>
34
35#include "rdma_transport.h"
36
37static struct rdma_cm_id *rds_iw_listen_id;
38
39int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
40 struct rdma_cm_event *event)
41{
42 /* this can be null in the listening path */
43 struct rds_connection *conn = cm_id->context;
44 struct rds_transport *trans;
45 int ret = 0;
46
47 rdsdebug("conn %p id %p handling event %u\n", conn, cm_id,
48 event->event);
49
50 if (cm_id->device->node_type == RDMA_NODE_RNIC)
51 trans = &rds_iw_transport;
52 else
53 trans = &rds_ib_transport;
54
55 /* Prevent shutdown from tearing down the connection
56 * while we're executing. */
57 if (conn) {
58 mutex_lock(&conn->c_cm_lock);
59
60 /* If the connection is being shut down, bail out
61 * right away. We return 0 so cm_id doesn't get
62 * destroyed prematurely */
63 if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING) {
64 /* Reject incoming connections while we're tearing
65 * down an existing one. */
66 if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
67 ret = 1;
68 goto out;
69 }
70 }
71
72 switch (event->event) {
73 case RDMA_CM_EVENT_CONNECT_REQUEST:
74 ret = trans->cm_handle_connect(cm_id, event);
75 break;
76
77 case RDMA_CM_EVENT_ADDR_RESOLVED:
78 /* XXX do we need to clean up if this fails? */
79 ret = rdma_resolve_route(cm_id,
80 RDS_RDMA_RESOLVE_TIMEOUT_MS);
81 break;
82
83 case RDMA_CM_EVENT_ROUTE_RESOLVED:
84 /* XXX worry about racing with listen acceptance */
85 ret = trans->cm_initiate_connect(cm_id);
86 break;
87
88 case RDMA_CM_EVENT_ESTABLISHED:
89 trans->cm_connect_complete(conn, event);
90 break;
91
92 case RDMA_CM_EVENT_ADDR_ERROR:
93 case RDMA_CM_EVENT_ROUTE_ERROR:
94 case RDMA_CM_EVENT_CONNECT_ERROR:
95 case RDMA_CM_EVENT_UNREACHABLE:
96 case RDMA_CM_EVENT_REJECTED:
97 case RDMA_CM_EVENT_DEVICE_REMOVAL:
98 case RDMA_CM_EVENT_ADDR_CHANGE:
99 if (conn)
100 rds_conn_drop(conn);
101 break;
102
103 case RDMA_CM_EVENT_DISCONNECTED:
104 printk(KERN_WARNING "RDS/IW: DISCONNECT event - dropping connection "
105 "%pI4->%pI4\n", &conn->c_laddr,
106 &conn->c_faddr);
107 rds_conn_drop(conn);
108 break;
109
110 default:
111 /* things like device disconnect? */
112 printk(KERN_ERR "unknown event %u\n", event->event);
113 BUG();
114 break;
115 }
116
117out:
118 if (conn)
119 mutex_unlock(&conn->c_cm_lock);
120
121 rdsdebug("id %p event %u handling ret %d\n", cm_id, event->event, ret);
122
123 return ret;
124}
125
126static int __init rds_rdma_listen_init(void)
127{
128 struct sockaddr_in sin;
129 struct rdma_cm_id *cm_id;
130 int ret;
131
132 cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP);
133 if (IS_ERR(cm_id)) {
134 ret = PTR_ERR(cm_id);
135 printk(KERN_ERR "RDS/IW: failed to setup listener, "
136 "rdma_create_id() returned %d\n", ret);
137 goto out;
138 }
139
140 sin.sin_family = PF_INET,
141 sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY);
142 sin.sin_port = (__force u16)htons(RDS_PORT);
143
144 /*
145 * XXX I bet this binds the cm_id to a device. If we want to support
146 * fail-over we'll have to take this into consideration.
147 */
148 ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
149 if (ret) {
150 printk(KERN_ERR "RDS/IW: failed to setup listener, "
151 "rdma_bind_addr() returned %d\n", ret);
152 goto out;
153 }
154
155 ret = rdma_listen(cm_id, 128);
156 if (ret) {
157 printk(KERN_ERR "RDS/IW: failed to setup listener, "
158 "rdma_listen() returned %d\n", ret);
159 goto out;
160 }
161
162 rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT);
163
164 rds_iw_listen_id = cm_id;
165 cm_id = NULL;
166out:
167 if (cm_id)
168 rdma_destroy_id(cm_id);
169 return ret;
170}
171
172static void rds_rdma_listen_stop(void)
173{
174 if (rds_iw_listen_id) {
175 rdsdebug("cm %p\n", rds_iw_listen_id);
176 rdma_destroy_id(rds_iw_listen_id);
177 rds_iw_listen_id = NULL;
178 }
179}
180
181int __init rds_rdma_init(void)
182{
183 int ret;
184
185 ret = rds_rdma_listen_init();
186 if (ret)
187 goto out;
188
189 ret = rds_iw_init();
190 if (ret)
191 goto err_iw_init;
192
193 ret = rds_ib_init();
194 if (ret)
195 goto err_ib_init;
196
197 goto out;
198
199err_ib_init:
200 rds_iw_exit();
201err_iw_init:
202 rds_rdma_listen_stop();
203out:
204 return ret;
205}
206
207void rds_rdma_exit(void)
208{
209 /* stop listening first to ensure no new connections are attempted */
210 rds_rdma_listen_stop();
211 rds_ib_exit();
212 rds_iw_exit();
213}
214
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
new file mode 100644
index 000000000000..2f2c7d976c21
--- /dev/null
+++ b/net/rds/rdma_transport.h
@@ -0,0 +1,28 @@
1#ifndef _RDMA_TRANSPORT_H
2#define _RDMA_TRANSPORT_H
3
4#include <rdma/ib_verbs.h>
5#include <rdma/rdma_cm.h>
6#include "rds.h"
7
8#define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000
9
10int rds_rdma_conn_connect(struct rds_connection *conn);
11int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
12 struct rdma_cm_event *event);
13
14/* from rdma_transport.c */
15int rds_rdma_init(void);
16void rds_rdma_exit(void);
17
18/* from ib.c */
19extern struct rds_transport rds_ib_transport;
20int rds_ib_init(void);
21void rds_ib_exit(void);
22
23/* from iw.c */
24extern struct rds_transport rds_iw_transport;
25int rds_iw_init(void);
26void rds_iw_exit(void);
27
28#endif
diff --git a/net/rds/rds.h b/net/rds/rds.h
new file mode 100644
index 000000000000..060400704979
--- /dev/null
+++ b/net/rds/rds.h
@@ -0,0 +1,686 @@
1#ifndef _RDS_RDS_H
2#define _RDS_RDS_H
3
4#include <net/sock.h>
5#include <linux/scatterlist.h>
6#include <linux/highmem.h>
7#include <rdma/rdma_cm.h>
8#include <linux/mutex.h>
9#include <linux/rds.h>
10
11#include "info.h"
12
13/*
14 * RDS Network protocol version
15 */
16#define RDS_PROTOCOL_3_0 0x0300
17#define RDS_PROTOCOL_3_1 0x0301
18#define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1
19#define RDS_PROTOCOL_MAJOR(v) ((v) >> 8)
20#define RDS_PROTOCOL_MINOR(v) ((v) & 255)
21#define RDS_PROTOCOL(maj, min) (((maj) << 8) | min)
22
23/*
24 * XXX randomly chosen, but at least seems to be unused:
25 * # 18464-18768 Unassigned
26 * We should do better. We want a reserved port to discourage unpriv'ed
27 * userspace from listening.
28 */
29#define RDS_PORT 18634
30
31#ifdef DEBUG
32#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args)
33#else
34/* sigh, pr_debug() causes unused variable warnings */
35static inline void __attribute__ ((format (printf, 1, 2)))
36rdsdebug(char *fmt, ...)
37{
38}
39#endif
40
41/* XXX is there one of these somewhere? */
42#define ceil(x, y) \
43 ({ unsigned long __x = (x), __y = (y); (__x + __y - 1) / __y; })
44
45#define RDS_FRAG_SHIFT 12
46#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT))
47
48#define RDS_CONG_MAP_BYTES (65536 / 8)
49#define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long))
50#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE)
51#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8)
52
53struct rds_cong_map {
54 struct rb_node m_rb_node;
55 __be32 m_addr;
56 wait_queue_head_t m_waitq;
57 struct list_head m_conn_list;
58 unsigned long m_page_addrs[RDS_CONG_MAP_PAGES];
59};
60
61
62/*
63 * This is how we will track the connection state:
64 * A connection is always in one of the following
65 * states. Updates to the state are atomic and imply
66 * a memory barrier.
67 */
68enum {
69 RDS_CONN_DOWN = 0,
70 RDS_CONN_CONNECTING,
71 RDS_CONN_DISCONNECTING,
72 RDS_CONN_UP,
73 RDS_CONN_ERROR,
74};
75
76/* Bits for c_flags */
77#define RDS_LL_SEND_FULL 0
78#define RDS_RECONNECT_PENDING 1
79
80struct rds_connection {
81 struct hlist_node c_hash_node;
82 __be32 c_laddr;
83 __be32 c_faddr;
84 unsigned int c_loopback:1;
85 struct rds_connection *c_passive;
86
87 struct rds_cong_map *c_lcong;
88 struct rds_cong_map *c_fcong;
89
90 struct mutex c_send_lock; /* protect send ring */
91 struct rds_message *c_xmit_rm;
92 unsigned long c_xmit_sg;
93 unsigned int c_xmit_hdr_off;
94 unsigned int c_xmit_data_off;
95 unsigned int c_xmit_rdma_sent;
96
97 spinlock_t c_lock; /* protect msg queues */
98 u64 c_next_tx_seq;
99 struct list_head c_send_queue;
100 struct list_head c_retrans;
101
102 u64 c_next_rx_seq;
103
104 struct rds_transport *c_trans;
105 void *c_transport_data;
106
107 atomic_t c_state;
108 unsigned long c_flags;
109 unsigned long c_reconnect_jiffies;
110 struct delayed_work c_send_w;
111 struct delayed_work c_recv_w;
112 struct delayed_work c_conn_w;
113 struct work_struct c_down_w;
114 struct mutex c_cm_lock; /* protect conn state & cm */
115
116 struct list_head c_map_item;
117 unsigned long c_map_queued;
118 unsigned long c_map_offset;
119 unsigned long c_map_bytes;
120
121 unsigned int c_unacked_packets;
122 unsigned int c_unacked_bytes;
123
124 /* Protocol version */
125 unsigned int c_version;
126};
127
128#define RDS_FLAG_CONG_BITMAP 0x01
129#define RDS_FLAG_ACK_REQUIRED 0x02
130#define RDS_FLAG_RETRANSMITTED 0x04
131#define RDS_MAX_ADV_CREDIT 127
132
133/*
134 * Maximum space available for extension headers.
135 */
136#define RDS_HEADER_EXT_SPACE 16
137
138struct rds_header {
139 __be64 h_sequence;
140 __be64 h_ack;
141 __be32 h_len;
142 __be16 h_sport;
143 __be16 h_dport;
144 u8 h_flags;
145 u8 h_credit;
146 u8 h_padding[4];
147 __sum16 h_csum;
148
149 u8 h_exthdr[RDS_HEADER_EXT_SPACE];
150};
151
152/*
153 * Reserved - indicates end of extensions
154 */
155#define RDS_EXTHDR_NONE 0
156
157/*
158 * This extension header is included in the very
159 * first message that is sent on a new connection,
160 * and identifies the protocol level. This will help
161 * rolling updates if a future change requires breaking
162 * the protocol.
163 * NB: This is no longer true for IB, where we do a version
164 * negotiation during the connection setup phase (protocol
165 * version information is included in the RDMA CM private data).
166 */
167#define RDS_EXTHDR_VERSION 1
168struct rds_ext_header_version {
169 __be32 h_version;
170};
171
172/*
173 * This extension header is included in the RDS message
174 * chasing an RDMA operation.
175 */
176#define RDS_EXTHDR_RDMA 2
177struct rds_ext_header_rdma {
178 __be32 h_rdma_rkey;
179};
180
181/*
182 * This extension header tells the peer about the
183 * destination <R_Key,offset> of the requested RDMA
184 * operation.
185 */
186#define RDS_EXTHDR_RDMA_DEST 3
187struct rds_ext_header_rdma_dest {
188 __be32 h_rdma_rkey;
189 __be32 h_rdma_offset;
190};
191
192#define __RDS_EXTHDR_MAX 16 /* for now */
193
194struct rds_incoming {
195 atomic_t i_refcount;
196 struct list_head i_item;
197 struct rds_connection *i_conn;
198 struct rds_header i_hdr;
199 unsigned long i_rx_jiffies;
200 __be32 i_saddr;
201
202 rds_rdma_cookie_t i_rdma_cookie;
203};
204
205/*
206 * m_sock_item and m_conn_item are on lists that are serialized under
207 * conn->c_lock. m_sock_item has additional meaning in that once it is empty
208 * the message will not be put back on the retransmit list after being sent.
209 * messages that are canceled while being sent rely on this.
210 *
211 * m_inc is used by loopback so that it can pass an incoming message straight
212 * back up into the rx path. It embeds a wire header which is also used by
213 * the send path, which is kind of awkward.
214 *
215 * m_sock_item indicates the message's presence on a socket's send or receive
216 * queue. m_rs will point to that socket.
217 *
218 * m_daddr is used by cancellation to prune messages to a given destination.
219 *
220 * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock
221 * nesting. As paths iterate over messages on a sock, or conn, they must
222 * also lock the conn, or sock, to remove the message from those lists too.
223 * Testing the flag to determine if the message is still on the lists lets
224 * us avoid testing the list_head directly. That means each path can use
225 * the message's list_head to keep it on a local list while juggling locks
226 * without confusing the other path.
227 *
228 * m_ack_seq is an optional field set by transports who need a different
229 * sequence number range to invalidate. They can use this in a callback
230 * that they pass to rds_send_drop_acked() to see if each message has been
231 * acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't
232 * had ack_seq set yet.
233 */
234#define RDS_MSG_ON_SOCK 1
235#define RDS_MSG_ON_CONN 2
236#define RDS_MSG_HAS_ACK_SEQ 3
237#define RDS_MSG_ACK_REQUIRED 4
238#define RDS_MSG_RETRANSMITTED 5
239#define RDS_MSG_MAPPED 6
240#define RDS_MSG_PAGEVEC 7
241
242struct rds_message {
243 atomic_t m_refcount;
244 struct list_head m_sock_item;
245 struct list_head m_conn_item;
246 struct rds_incoming m_inc;
247 u64 m_ack_seq;
248 __be32 m_daddr;
249 unsigned long m_flags;
250
251 /* Never access m_rs without holding m_rs_lock.
252 * Lock nesting is
253 * rm->m_rs_lock
254 * -> rs->rs_lock
255 */
256 spinlock_t m_rs_lock;
257 struct rds_sock *m_rs;
258 struct rds_rdma_op *m_rdma_op;
259 rds_rdma_cookie_t m_rdma_cookie;
260 struct rds_mr *m_rdma_mr;
261 unsigned int m_nents;
262 unsigned int m_count;
263 struct scatterlist m_sg[0];
264};
265
266/*
267 * The RDS notifier is used (optionally) to tell the application about
268 * completed RDMA operations. Rather than keeping the whole rds message
269 * around on the queue, we allocate a small notifier that is put on the
270 * socket's notifier_list. Notifications are delivered to the application
271 * through control messages.
272 */
273struct rds_notifier {
274 struct list_head n_list;
275 uint64_t n_user_token;
276 int n_status;
277};
278
279/**
280 * struct rds_transport - transport specific behavioural hooks
281 *
282 * @xmit: .xmit is called by rds_send_xmit() to tell the transport to send
283 * part of a message. The caller serializes on the send_sem so this
284 * doesn't need to be reentrant for a given conn. The header must be
285 * sent before the data payload. .xmit must be prepared to send a
286 * message with no data payload. .xmit should return the number of
287 * bytes that were sent down the connection, including header bytes.
288 * Returning 0 tells the caller that it doesn't need to perform any
289 * additional work now. This is usually the case when the transport has
290 * filled the sending queue for its connection and will handle
291 * triggering the rds thread to continue the send when space becomes
292 * available. Returning -EAGAIN tells the caller to retry the send
293 * immediately. Returning -ENOMEM tells the caller to retry the send at
294 * some point in the future.
295 *
296 * @conn_shutdown: conn_shutdown stops traffic on the given connection. Once
297 * it returns the connection can not call rds_recv_incoming().
298 * This will only be called once after conn_connect returns
299 * non-zero success and will The caller serializes this with
300 * the send and connecting paths (xmit_* and conn_*). The
301 * transport is responsible for other serialization, including
302 * rds_recv_incoming(). This is called in process context but
303 * should try hard not to block.
304 *
305 * @xmit_cong_map: This asks the transport to send the local bitmap down the
306 * given connection. XXX get a better story about the bitmap
307 * flag and header.
308 */
309
310struct rds_transport {
311 char t_name[TRANSNAMSIZ];
312 struct list_head t_item;
313 struct module *t_owner;
314 unsigned int t_prefer_loopback:1;
315
316 int (*laddr_check)(__be32 addr);
317 int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
318 void (*conn_free)(void *data);
319 int (*conn_connect)(struct rds_connection *conn);
320 void (*conn_shutdown)(struct rds_connection *conn);
321 void (*xmit_prepare)(struct rds_connection *conn);
322 void (*xmit_complete)(struct rds_connection *conn);
323 int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
324 unsigned int hdr_off, unsigned int sg, unsigned int off);
325 int (*xmit_cong_map)(struct rds_connection *conn,
326 struct rds_cong_map *map, unsigned long offset);
327 int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op);
328 int (*recv)(struct rds_connection *conn);
329 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov,
330 size_t size);
331 void (*inc_purge)(struct rds_incoming *inc);
332 void (*inc_free)(struct rds_incoming *inc);
333
334 int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
335 struct rdma_cm_event *event);
336 int (*cm_initiate_connect)(struct rdma_cm_id *cm_id);
337 void (*cm_connect_complete)(struct rds_connection *conn,
338 struct rdma_cm_event *event);
339
340 unsigned int (*stats_info_copy)(struct rds_info_iterator *iter,
341 unsigned int avail);
342 void (*exit)(void);
343 void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg,
344 struct rds_sock *rs, u32 *key_ret);
345 void (*sync_mr)(void *trans_private, int direction);
346 void (*free_mr)(void *trans_private, int invalidate);
347 void (*flush_mrs)(void);
348};
349
350struct rds_sock {
351 struct sock rs_sk;
352
353 u64 rs_user_addr;
354 u64 rs_user_bytes;
355
356 /*
357 * bound_addr used for both incoming and outgoing, no INADDR_ANY
358 * support.
359 */
360 struct rb_node rs_bound_node;
361 __be32 rs_bound_addr;
362 __be32 rs_conn_addr;
363 __be16 rs_bound_port;
364 __be16 rs_conn_port;
365
366 /*
367 * This is only used to communicate the transport between bind and
368 * initiating connections. All other trans use is referenced through
369 * the connection.
370 */
371 struct rds_transport *rs_transport;
372
373 /*
374 * rds_sendmsg caches the conn it used the last time around.
375 * This helps avoid costly lookups.
376 */
377 struct rds_connection *rs_conn;
378
379 /* flag indicating we were congested or not */
380 int rs_congested;
381
382 /* rs_lock protects all these adjacent members before the newline */
383 spinlock_t rs_lock;
384 struct list_head rs_send_queue;
385 u32 rs_snd_bytes;
386 int rs_rcv_bytes;
387 struct list_head rs_notify_queue; /* currently used for failed RDMAs */
388
389 /* Congestion wake_up. If rs_cong_monitor is set, we use cong_mask
390 * to decide whether the application should be woken up.
391 * If not set, we use rs_cong_track to find out whether a cong map
392 * update arrived.
393 */
394 uint64_t rs_cong_mask;
395 uint64_t rs_cong_notify;
396 struct list_head rs_cong_list;
397 unsigned long rs_cong_track;
398
399 /*
400 * rs_recv_lock protects the receive queue, and is
401 * used to serialize with rds_release.
402 */
403 rwlock_t rs_recv_lock;
404 struct list_head rs_recv_queue;
405
406 /* just for stats reporting */
407 struct list_head rs_item;
408
409 /* these have their own lock */
410 spinlock_t rs_rdma_lock;
411 struct rb_root rs_rdma_keys;
412
413 /* Socket options - in case there will be more */
414 unsigned char rs_recverr,
415 rs_cong_monitor;
416};
417
418static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk)
419{
420 return container_of(sk, struct rds_sock, rs_sk);
421}
422static inline struct sock *rds_rs_to_sk(struct rds_sock *rs)
423{
424 return &rs->rs_sk;
425}
426
427/*
428 * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value
429 * to account for overhead. We don't account for overhead, we just apply
430 * the number of payload bytes to the specified value.
431 */
432static inline int rds_sk_sndbuf(struct rds_sock *rs)
433{
434 return rds_rs_to_sk(rs)->sk_sndbuf / 2;
435}
436static inline int rds_sk_rcvbuf(struct rds_sock *rs)
437{
438 return rds_rs_to_sk(rs)->sk_rcvbuf / 2;
439}
440
441struct rds_statistics {
442 uint64_t s_conn_reset;
443 uint64_t s_recv_drop_bad_checksum;
444 uint64_t s_recv_drop_old_seq;
445 uint64_t s_recv_drop_no_sock;
446 uint64_t s_recv_drop_dead_sock;
447 uint64_t s_recv_deliver_raced;
448 uint64_t s_recv_delivered;
449 uint64_t s_recv_queued;
450 uint64_t s_recv_immediate_retry;
451 uint64_t s_recv_delayed_retry;
452 uint64_t s_recv_ack_required;
453 uint64_t s_recv_rdma_bytes;
454 uint64_t s_recv_ping;
455 uint64_t s_send_queue_empty;
456 uint64_t s_send_queue_full;
457 uint64_t s_send_sem_contention;
458 uint64_t s_send_sem_queue_raced;
459 uint64_t s_send_immediate_retry;
460 uint64_t s_send_delayed_retry;
461 uint64_t s_send_drop_acked;
462 uint64_t s_send_ack_required;
463 uint64_t s_send_queued;
464 uint64_t s_send_rdma;
465 uint64_t s_send_rdma_bytes;
466 uint64_t s_send_pong;
467 uint64_t s_page_remainder_hit;
468 uint64_t s_page_remainder_miss;
469 uint64_t s_copy_to_user;
470 uint64_t s_copy_from_user;
471 uint64_t s_cong_update_queued;
472 uint64_t s_cong_update_received;
473 uint64_t s_cong_send_error;
474 uint64_t s_cong_send_blocked;
475};
476
477/* af_rds.c */
478void rds_sock_addref(struct rds_sock *rs);
479void rds_sock_put(struct rds_sock *rs);
480void rds_wake_sk_sleep(struct rds_sock *rs);
481static inline void __rds_wake_sk_sleep(struct sock *sk)
482{
483 wait_queue_head_t *waitq = sk->sk_sleep;
484
485 if (!sock_flag(sk, SOCK_DEAD) && waitq)
486 wake_up(waitq);
487}
488extern wait_queue_head_t rds_poll_waitq;
489
490
491/* bind.c */
492int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
493void rds_remove_bound(struct rds_sock *rs);
494struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
495
496/* cong.c */
497int rds_cong_get_maps(struct rds_connection *conn);
498void rds_cong_add_conn(struct rds_connection *conn);
499void rds_cong_remove_conn(struct rds_connection *conn);
500void rds_cong_set_bit(struct rds_cong_map *map, __be16 port);
501void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port);
502int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock, struct rds_sock *rs);
503void rds_cong_queue_updates(struct rds_cong_map *map);
504void rds_cong_map_updated(struct rds_cong_map *map, uint64_t);
505int rds_cong_updated_since(unsigned long *recent);
506void rds_cong_add_socket(struct rds_sock *);
507void rds_cong_remove_socket(struct rds_sock *);
508void rds_cong_exit(void);
509struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
510
511/* conn.c */
512int __init rds_conn_init(void);
513void rds_conn_exit(void);
514struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
515 struct rds_transport *trans, gfp_t gfp);
516struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
517 struct rds_transport *trans, gfp_t gfp);
518void rds_conn_destroy(struct rds_connection *conn);
519void rds_conn_reset(struct rds_connection *conn);
520void rds_conn_drop(struct rds_connection *conn);
521void rds_for_each_conn_info(struct socket *sock, unsigned int len,
522 struct rds_info_iterator *iter,
523 struct rds_info_lengths *lens,
524 int (*visitor)(struct rds_connection *, void *),
525 size_t item_len);
526void __rds_conn_error(struct rds_connection *conn, const char *, ...)
527 __attribute__ ((format (printf, 2, 3)));
528#define rds_conn_error(conn, fmt...) \
529 __rds_conn_error(conn, KERN_WARNING "RDS: " fmt)
530
531static inline int
532rds_conn_transition(struct rds_connection *conn, int old, int new)
533{
534 return atomic_cmpxchg(&conn->c_state, old, new) == old;
535}
536
537static inline int
538rds_conn_state(struct rds_connection *conn)
539{
540 return atomic_read(&conn->c_state);
541}
542
543static inline int
544rds_conn_up(struct rds_connection *conn)
545{
546 return atomic_read(&conn->c_state) == RDS_CONN_UP;
547}
548
549static inline int
550rds_conn_connecting(struct rds_connection *conn)
551{
552 return atomic_read(&conn->c_state) == RDS_CONN_CONNECTING;
553}
554
555/* message.c */
556struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
557struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
558 size_t total_len);
559struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
560void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
561 __be16 dport, u64 seq);
562int rds_message_add_extension(struct rds_header *hdr,
563 unsigned int type, const void *data, unsigned int len);
564int rds_message_next_extension(struct rds_header *hdr,
565 unsigned int *pos, void *buf, unsigned int *buflen);
566int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version);
567int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version);
568int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
569int rds_message_inc_copy_to_user(struct rds_incoming *inc,
570 struct iovec *first_iov, size_t size);
571void rds_message_inc_purge(struct rds_incoming *inc);
572void rds_message_inc_free(struct rds_incoming *inc);
573void rds_message_addref(struct rds_message *rm);
574void rds_message_put(struct rds_message *rm);
575void rds_message_wait(struct rds_message *rm);
576void rds_message_unmapped(struct rds_message *rm);
577
578static inline void rds_message_make_checksum(struct rds_header *hdr)
579{
580 hdr->h_csum = 0;
581 hdr->h_csum = ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2);
582}
583
584static inline int rds_message_verify_checksum(const struct rds_header *hdr)
585{
586 return !hdr->h_csum || ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2) == 0;
587}
588
589
590/* page.c */
591int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
592 gfp_t gfp);
593int rds_page_copy_user(struct page *page, unsigned long offset,
594 void __user *ptr, unsigned long bytes,
595 int to_user);
596#define rds_page_copy_to_user(page, offset, ptr, bytes) \
597 rds_page_copy_user(page, offset, ptr, bytes, 1)
598#define rds_page_copy_from_user(page, offset, ptr, bytes) \
599 rds_page_copy_user(page, offset, ptr, bytes, 0)
600void rds_page_exit(void);
601
602/* recv.c */
603void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
604 __be32 saddr);
605void rds_inc_addref(struct rds_incoming *inc);
606void rds_inc_put(struct rds_incoming *inc);
607void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
608 struct rds_incoming *inc, gfp_t gfp, enum km_type km);
609int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
610 size_t size, int msg_flags);
611void rds_clear_recv_queue(struct rds_sock *rs);
612int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg);
613void rds_inc_info_copy(struct rds_incoming *inc,
614 struct rds_info_iterator *iter,
615 __be32 saddr, __be32 daddr, int flip);
616
617/* send.c */
618int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
619 size_t payload_len);
620void rds_send_reset(struct rds_connection *conn);
621int rds_send_xmit(struct rds_connection *conn);
622struct sockaddr_in;
623void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
624typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
625void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
626 is_acked_func is_acked);
627int rds_send_acked_before(struct rds_connection *conn, u64 seq);
628void rds_send_remove_from_sock(struct list_head *messages, int status);
629int rds_send_pong(struct rds_connection *conn, __be16 dport);
630struct rds_message *rds_send_get_message(struct rds_connection *,
631 struct rds_rdma_op *);
632
633/* rdma.c */
634void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
635
636/* stats.c */
637DECLARE_PER_CPU(struct rds_statistics, rds_stats);
638#define rds_stats_inc_which(which, member) do { \
639 per_cpu(which, get_cpu()).member++; \
640 put_cpu(); \
641} while (0)
642#define rds_stats_inc(member) rds_stats_inc_which(rds_stats, member)
643#define rds_stats_add_which(which, member, count) do { \
644 per_cpu(which, get_cpu()).member += count; \
645 put_cpu(); \
646} while (0)
647#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count)
648int __init rds_stats_init(void);
649void rds_stats_exit(void);
650void rds_stats_info_copy(struct rds_info_iterator *iter,
651 uint64_t *values, char **names, size_t nr);
652
653/* sysctl.c */
654int __init rds_sysctl_init(void);
655void rds_sysctl_exit(void);
656extern unsigned long rds_sysctl_sndbuf_min;
657extern unsigned long rds_sysctl_sndbuf_default;
658extern unsigned long rds_sysctl_sndbuf_max;
659extern unsigned long rds_sysctl_reconnect_min_jiffies;
660extern unsigned long rds_sysctl_reconnect_max_jiffies;
661extern unsigned int rds_sysctl_max_unacked_packets;
662extern unsigned int rds_sysctl_max_unacked_bytes;
663extern unsigned int rds_sysctl_ping_enable;
664extern unsigned long rds_sysctl_trace_flags;
665extern unsigned int rds_sysctl_trace_level;
666
667/* threads.c */
668int __init rds_threads_init(void);
669void rds_threads_exit(void);
670extern struct workqueue_struct *rds_wq;
671void rds_connect_worker(struct work_struct *);
672void rds_shutdown_worker(struct work_struct *);
673void rds_send_worker(struct work_struct *);
674void rds_recv_worker(struct work_struct *);
675void rds_connect_complete(struct rds_connection *conn);
676
677/* transport.c */
678int rds_trans_register(struct rds_transport *trans);
679void rds_trans_unregister(struct rds_transport *trans);
680struct rds_transport *rds_trans_get_preferred(__be32 addr);
681unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
682 unsigned int avail);
683int __init rds_trans_init(void);
684void rds_trans_exit(void);
685
686#endif
diff --git a/net/rds/recv.c b/net/rds/recv.c
new file mode 100644
index 000000000000..f2118c51cfa3
--- /dev/null
+++ b/net/rds/recv.c
@@ -0,0 +1,542 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <net/sock.h>
35#include <linux/in.h>
36
37#include "rds.h"
38#include "rdma.h"
39
40void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
41 __be32 saddr)
42{
43 atomic_set(&inc->i_refcount, 1);
44 INIT_LIST_HEAD(&inc->i_item);
45 inc->i_conn = conn;
46 inc->i_saddr = saddr;
47 inc->i_rdma_cookie = 0;
48}
49
50void rds_inc_addref(struct rds_incoming *inc)
51{
52 rdsdebug("addref inc %p ref %d\n", inc, atomic_read(&inc->i_refcount));
53 atomic_inc(&inc->i_refcount);
54}
55
56void rds_inc_put(struct rds_incoming *inc)
57{
58 rdsdebug("put inc %p ref %d\n", inc, atomic_read(&inc->i_refcount));
59 if (atomic_dec_and_test(&inc->i_refcount)) {
60 BUG_ON(!list_empty(&inc->i_item));
61
62 inc->i_conn->c_trans->inc_free(inc);
63 }
64}
65
66static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk,
67 struct rds_cong_map *map,
68 int delta, __be16 port)
69{
70 int now_congested;
71
72 if (delta == 0)
73 return;
74
75 rs->rs_rcv_bytes += delta;
76 now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs);
77
78 rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d "
79 "now_cong %d delta %d\n",
80 rs, &rs->rs_bound_addr,
81 ntohs(rs->rs_bound_port), rs->rs_rcv_bytes,
82 rds_sk_rcvbuf(rs), now_congested, delta);
83
84 /* wasn't -> am congested */
85 if (!rs->rs_congested && now_congested) {
86 rs->rs_congested = 1;
87 rds_cong_set_bit(map, port);
88 rds_cong_queue_updates(map);
89 }
90 /* was -> aren't congested */
91 /* Require more free space before reporting uncongested to prevent
92 bouncing cong/uncong state too often */
93 else if (rs->rs_congested && (rs->rs_rcv_bytes < (rds_sk_rcvbuf(rs)/2))) {
94 rs->rs_congested = 0;
95 rds_cong_clear_bit(map, port);
96 rds_cong_queue_updates(map);
97 }
98
99 /* do nothing if no change in cong state */
100}
101
102/*
103 * Process all extension headers that come with this message.
104 */
105static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock *rs)
106{
107 struct rds_header *hdr = &inc->i_hdr;
108 unsigned int pos = 0, type, len;
109 union {
110 struct rds_ext_header_version version;
111 struct rds_ext_header_rdma rdma;
112 struct rds_ext_header_rdma_dest rdma_dest;
113 } buffer;
114
115 while (1) {
116 len = sizeof(buffer);
117 type = rds_message_next_extension(hdr, &pos, &buffer, &len);
118 if (type == RDS_EXTHDR_NONE)
119 break;
120 /* Process extension header here */
121 switch (type) {
122 case RDS_EXTHDR_RDMA:
123 rds_rdma_unuse(rs, be32_to_cpu(buffer.rdma.h_rdma_rkey), 0);
124 break;
125
126 case RDS_EXTHDR_RDMA_DEST:
127 /* We ignore the size for now. We could stash it
128 * somewhere and use it for error checking. */
129 inc->i_rdma_cookie = rds_rdma_make_cookie(
130 be32_to_cpu(buffer.rdma_dest.h_rdma_rkey),
131 be32_to_cpu(buffer.rdma_dest.h_rdma_offset));
132
133 break;
134 }
135 }
136}
137
138/*
139 * The transport must make sure that this is serialized against other
140 * rx and conn reset on this specific conn.
141 *
142 * We currently assert that only one fragmented message will be sent
143 * down a connection at a time. This lets us reassemble in the conn
144 * instead of per-flow which means that we don't have to go digging through
145 * flows to tear down partial reassembly progress on conn failure and
146 * we save flow lookup and locking for each frag arrival. It does mean
147 * that small messages will wait behind large ones. Fragmenting at all
148 * is only to reduce the memory consumption of pre-posted buffers.
149 *
150 * The caller passes in saddr and daddr instead of us getting it from the
151 * conn. This lets loopback, who only has one conn for both directions,
152 * tell us which roles the addrs in the conn are playing for this message.
153 */
154void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
155 struct rds_incoming *inc, gfp_t gfp, enum km_type km)
156{
157 struct rds_sock *rs = NULL;
158 struct sock *sk;
159 unsigned long flags;
160
161 inc->i_conn = conn;
162 inc->i_rx_jiffies = jiffies;
163
164 rdsdebug("conn %p next %llu inc %p seq %llu len %u sport %u dport %u "
165 "flags 0x%x rx_jiffies %lu\n", conn,
166 (unsigned long long)conn->c_next_rx_seq,
167 inc,
168 (unsigned long long)be64_to_cpu(inc->i_hdr.h_sequence),
169 be32_to_cpu(inc->i_hdr.h_len),
170 be16_to_cpu(inc->i_hdr.h_sport),
171 be16_to_cpu(inc->i_hdr.h_dport),
172 inc->i_hdr.h_flags,
173 inc->i_rx_jiffies);
174
175 /*
176 * Sequence numbers should only increase. Messages get their
177 * sequence number as they're queued in a sending conn. They
178 * can be dropped, though, if the sending socket is closed before
179 * they hit the wire. So sequence numbers can skip forward
180 * under normal operation. They can also drop back in the conn
181 * failover case as previously sent messages are resent down the
182 * new instance of a conn. We drop those, otherwise we have
183 * to assume that the next valid seq does not come after a
184 * hole in the fragment stream.
185 *
186 * The headers don't give us a way to realize if fragments of
187 * a message have been dropped. We assume that frags that arrive
188 * to a flow are part of the current message on the flow that is
189 * being reassembled. This means that senders can't drop messages
190 * from the sending conn until all their frags are sent.
191 *
192 * XXX we could spend more on the wire to get more robust failure
193 * detection, arguably worth it to avoid data corruption.
194 */
195 if (be64_to_cpu(inc->i_hdr.h_sequence) < conn->c_next_rx_seq
196 && (inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) {
197 rds_stats_inc(s_recv_drop_old_seq);
198 goto out;
199 }
200 conn->c_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1;
201
202 if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) {
203 rds_stats_inc(s_recv_ping);
204 rds_send_pong(conn, inc->i_hdr.h_sport);
205 goto out;
206 }
207
208 rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
209 if (rs == NULL) {
210 rds_stats_inc(s_recv_drop_no_sock);
211 goto out;
212 }
213
214 /* Process extension headers */
215 rds_recv_incoming_exthdrs(inc, rs);
216
217 /* We can be racing with rds_release() which marks the socket dead. */
218 sk = rds_rs_to_sk(rs);
219
220 /* serialize with rds_release -> sock_orphan */
221 write_lock_irqsave(&rs->rs_recv_lock, flags);
222 if (!sock_flag(sk, SOCK_DEAD)) {
223 rdsdebug("adding inc %p to rs %p's recv queue\n", inc, rs);
224 rds_stats_inc(s_recv_queued);
225 rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
226 be32_to_cpu(inc->i_hdr.h_len),
227 inc->i_hdr.h_dport);
228 rds_inc_addref(inc);
229 list_add_tail(&inc->i_item, &rs->rs_recv_queue);
230 __rds_wake_sk_sleep(sk);
231 } else {
232 rds_stats_inc(s_recv_drop_dead_sock);
233 }
234 write_unlock_irqrestore(&rs->rs_recv_lock, flags);
235
236out:
237 if (rs)
238 rds_sock_put(rs);
239}
240
241/*
242 * be very careful here. This is being called as the condition in
243 * wait_event_*() needs to cope with being called many times.
244 */
245static int rds_next_incoming(struct rds_sock *rs, struct rds_incoming **inc)
246{
247 unsigned long flags;
248
249 if (*inc == NULL) {
250 read_lock_irqsave(&rs->rs_recv_lock, flags);
251 if (!list_empty(&rs->rs_recv_queue)) {
252 *inc = list_entry(rs->rs_recv_queue.next,
253 struct rds_incoming,
254 i_item);
255 rds_inc_addref(*inc);
256 }
257 read_unlock_irqrestore(&rs->rs_recv_lock, flags);
258 }
259
260 return *inc != NULL;
261}
262
263static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc,
264 int drop)
265{
266 struct sock *sk = rds_rs_to_sk(rs);
267 int ret = 0;
268 unsigned long flags;
269
270 write_lock_irqsave(&rs->rs_recv_lock, flags);
271 if (!list_empty(&inc->i_item)) {
272 ret = 1;
273 if (drop) {
274 /* XXX make sure this i_conn is reliable */
275 rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
276 -be32_to_cpu(inc->i_hdr.h_len),
277 inc->i_hdr.h_dport);
278 list_del_init(&inc->i_item);
279 rds_inc_put(inc);
280 }
281 }
282 write_unlock_irqrestore(&rs->rs_recv_lock, flags);
283
284 rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop);
285 return ret;
286}
287
288/*
289 * Pull errors off the error queue.
290 * If msghdr is NULL, we will just purge the error queue.
291 */
292int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
293{
294 struct rds_notifier *notifier;
295 struct rds_rdma_notify cmsg;
296 unsigned int count = 0, max_messages = ~0U;
297 unsigned long flags;
298 LIST_HEAD(copy);
299 int err = 0;
300
301
302 /* put_cmsg copies to user space and thus may sleep. We can't do this
303 * with rs_lock held, so first grab as many notifications as we can stuff
304 * in the user provided cmsg buffer. We don't try to copy more, to avoid
305 * losing notifications - except when the buffer is so small that it wouldn't
306 * even hold a single notification. Then we give him as much of this single
307 * msg as we can squeeze in, and set MSG_CTRUNC.
308 */
309 if (msghdr) {
310 max_messages = msghdr->msg_controllen / CMSG_SPACE(sizeof(cmsg));
311 if (!max_messages)
312 max_messages = 1;
313 }
314
315 spin_lock_irqsave(&rs->rs_lock, flags);
316 while (!list_empty(&rs->rs_notify_queue) && count < max_messages) {
317 notifier = list_entry(rs->rs_notify_queue.next,
318 struct rds_notifier, n_list);
319 list_move(&notifier->n_list, &copy);
320 count++;
321 }
322 spin_unlock_irqrestore(&rs->rs_lock, flags);
323
324 if (!count)
325 return 0;
326
327 while (!list_empty(&copy)) {
328 notifier = list_entry(copy.next, struct rds_notifier, n_list);
329
330 if (msghdr) {
331 cmsg.user_token = notifier->n_user_token;
332 cmsg.status = notifier->n_status;
333
334 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS,
335 sizeof(cmsg), &cmsg);
336 if (err)
337 break;
338 }
339
340 list_del_init(&notifier->n_list);
341 kfree(notifier);
342 }
343
344 /* If we bailed out because of an error in put_cmsg,
345 * we may be left with one or more notifications that we
346 * didn't process. Return them to the head of the list. */
347 if (!list_empty(&copy)) {
348 spin_lock_irqsave(&rs->rs_lock, flags);
349 list_splice(&copy, &rs->rs_notify_queue);
350 spin_unlock_irqrestore(&rs->rs_lock, flags);
351 }
352
353 return err;
354}
355
356/*
357 * Queue a congestion notification
358 */
359static int rds_notify_cong(struct rds_sock *rs, struct msghdr *msghdr)
360{
361 uint64_t notify = rs->rs_cong_notify;
362 unsigned long flags;
363 int err;
364
365 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_CONG_UPDATE,
366 sizeof(notify), &notify);
367 if (err)
368 return err;
369
370 spin_lock_irqsave(&rs->rs_lock, flags);
371 rs->rs_cong_notify &= ~notify;
372 spin_unlock_irqrestore(&rs->rs_lock, flags);
373
374 return 0;
375}
376
377/*
378 * Receive any control messages.
379 */
380static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg)
381{
382 int ret = 0;
383
384 if (inc->i_rdma_cookie) {
385 ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST,
386 sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie);
387 if (ret)
388 return ret;
389 }
390
391 return 0;
392}
393
394int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
395 size_t size, int msg_flags)
396{
397 struct sock *sk = sock->sk;
398 struct rds_sock *rs = rds_sk_to_rs(sk);
399 long timeo;
400 int ret = 0, nonblock = msg_flags & MSG_DONTWAIT;
401 struct sockaddr_in *sin;
402 struct rds_incoming *inc = NULL;
403
404 /* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */
405 timeo = sock_rcvtimeo(sk, nonblock);
406
407 rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo);
408
409 if (msg_flags & MSG_OOB)
410 goto out;
411
412 /* If there are pending notifications, do those - and nothing else */
413 if (!list_empty(&rs->rs_notify_queue)) {
414 ret = rds_notify_queue_get(rs, msg);
415 goto out;
416 }
417
418 if (rs->rs_cong_notify) {
419 ret = rds_notify_cong(rs, msg);
420 goto out;
421 }
422
423 while (1) {
424 if (!rds_next_incoming(rs, &inc)) {
425 if (nonblock) {
426 ret = -EAGAIN;
427 break;
428 }
429
430 timeo = wait_event_interruptible_timeout(*sk->sk_sleep,
431 rds_next_incoming(rs, &inc),
432 timeo);
433 rdsdebug("recvmsg woke inc %p timeo %ld\n", inc,
434 timeo);
435 if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT)
436 continue;
437
438 ret = timeo;
439 if (ret == 0)
440 ret = -ETIMEDOUT;
441 break;
442 }
443
444 rdsdebug("copying inc %p from %pI4:%u to user\n", inc,
445 &inc->i_conn->c_faddr,
446 ntohs(inc->i_hdr.h_sport));
447 ret = inc->i_conn->c_trans->inc_copy_to_user(inc, msg->msg_iov,
448 size);
449 if (ret < 0)
450 break;
451
452 /*
453 * if the message we just copied isn't at the head of the
454 * recv queue then someone else raced us to return it, try
455 * to get the next message.
456 */
457 if (!rds_still_queued(rs, inc, !(msg_flags & MSG_PEEK))) {
458 rds_inc_put(inc);
459 inc = NULL;
460 rds_stats_inc(s_recv_deliver_raced);
461 continue;
462 }
463
464 if (ret < be32_to_cpu(inc->i_hdr.h_len)) {
465 if (msg_flags & MSG_TRUNC)
466 ret = be32_to_cpu(inc->i_hdr.h_len);
467 msg->msg_flags |= MSG_TRUNC;
468 }
469
470 if (rds_cmsg_recv(inc, msg)) {
471 ret = -EFAULT;
472 goto out;
473 }
474
475 rds_stats_inc(s_recv_delivered);
476
477 sin = (struct sockaddr_in *)msg->msg_name;
478 if (sin) {
479 sin->sin_family = AF_INET;
480 sin->sin_port = inc->i_hdr.h_sport;
481 sin->sin_addr.s_addr = inc->i_saddr;
482 memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
483 }
484 break;
485 }
486
487 if (inc)
488 rds_inc_put(inc);
489
490out:
491 return ret;
492}
493
494/*
495 * The socket is being shut down and we're asked to drop messages that were
496 * queued for recvmsg. The caller has unbound the socket so the receive path
497 * won't queue any more incoming fragments or messages on the socket.
498 */
499void rds_clear_recv_queue(struct rds_sock *rs)
500{
501 struct sock *sk = rds_rs_to_sk(rs);
502 struct rds_incoming *inc, *tmp;
503 unsigned long flags;
504
505 write_lock_irqsave(&rs->rs_recv_lock, flags);
506 list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) {
507 rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong,
508 -be32_to_cpu(inc->i_hdr.h_len),
509 inc->i_hdr.h_dport);
510 list_del_init(&inc->i_item);
511 rds_inc_put(inc);
512 }
513 write_unlock_irqrestore(&rs->rs_recv_lock, flags);
514}
515
516/*
517 * inc->i_saddr isn't used here because it is only set in the receive
518 * path.
519 */
520void rds_inc_info_copy(struct rds_incoming *inc,
521 struct rds_info_iterator *iter,
522 __be32 saddr, __be32 daddr, int flip)
523{
524 struct rds_info_message minfo;
525
526 minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence);
527 minfo.len = be32_to_cpu(inc->i_hdr.h_len);
528
529 if (flip) {
530 minfo.laddr = daddr;
531 minfo.faddr = saddr;
532 minfo.lport = inc->i_hdr.h_dport;
533 minfo.fport = inc->i_hdr.h_sport;
534 } else {
535 minfo.laddr = saddr;
536 minfo.faddr = daddr;
537 minfo.lport = inc->i_hdr.h_sport;
538 minfo.fport = inc->i_hdr.h_dport;
539 }
540
541 rds_info_copy(iter, &minfo, sizeof(minfo));
542}
diff --git a/net/rds/send.c b/net/rds/send.c
new file mode 100644
index 000000000000..1b37364656f0
--- /dev/null
+++ b/net/rds/send.c
@@ -0,0 +1,1003 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <net/sock.h>
35#include <linux/in.h>
36#include <linux/list.h>
37
38#include "rds.h"
39#include "rdma.h"
40
41/* When transmitting messages in rds_send_xmit, we need to emerge from
42 * time to time and briefly release the CPU. Otherwise the softlock watchdog
43 * will kick our shin.
44 * Also, it seems fairer to not let one busy connection stall all the
45 * others.
46 *
47 * send_batch_count is the number of times we'll loop in send_xmit. Setting
48 * it to 0 will restore the old behavior (where we looped until we had
49 * drained the queue).
50 */
51static int send_batch_count = 64;
52module_param(send_batch_count, int, 0444);
53MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue");
54
55/*
56 * Reset the send state. Caller must hold c_send_lock when calling here.
57 */
58void rds_send_reset(struct rds_connection *conn)
59{
60 struct rds_message *rm, *tmp;
61 unsigned long flags;
62
63 if (conn->c_xmit_rm) {
64 /* Tell the user the RDMA op is no longer mapped by the
65 * transport. This isn't entirely true (it's flushed out
66 * independently) but as the connection is down, there's
67 * no ongoing RDMA to/from that memory */
68 rds_message_unmapped(conn->c_xmit_rm);
69 rds_message_put(conn->c_xmit_rm);
70 conn->c_xmit_rm = NULL;
71 }
72 conn->c_xmit_sg = 0;
73 conn->c_xmit_hdr_off = 0;
74 conn->c_xmit_data_off = 0;
75 conn->c_xmit_rdma_sent = 0;
76
77 conn->c_map_queued = 0;
78
79 conn->c_unacked_packets = rds_sysctl_max_unacked_packets;
80 conn->c_unacked_bytes = rds_sysctl_max_unacked_bytes;
81
82 /* Mark messages as retransmissions, and move them to the send q */
83 spin_lock_irqsave(&conn->c_lock, flags);
84 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
85 set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
86 set_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags);
87 }
88 list_splice_init(&conn->c_retrans, &conn->c_send_queue);
89 spin_unlock_irqrestore(&conn->c_lock, flags);
90}
91
92/*
93 * We're making the concious trade-off here to only send one message
94 * down the connection at a time.
95 * Pro:
96 * - tx queueing is a simple fifo list
97 * - reassembly is optional and easily done by transports per conn
98 * - no per flow rx lookup at all, straight to the socket
99 * - less per-frag memory and wire overhead
100 * Con:
101 * - queued acks can be delayed behind large messages
102 * Depends:
103 * - small message latency is higher behind queued large messages
104 * - large message latency isn't starved by intervening small sends
105 */
106int rds_send_xmit(struct rds_connection *conn)
107{
108 struct rds_message *rm;
109 unsigned long flags;
110 unsigned int tmp;
111 unsigned int send_quota = send_batch_count;
112 struct scatterlist *sg;
113 int ret = 0;
114 int was_empty = 0;
115 LIST_HEAD(to_be_dropped);
116
117 /*
118 * sendmsg calls here after having queued its message on the send
119 * queue. We only have one task feeding the connection at a time. If
120 * another thread is already feeding the queue then we back off. This
121 * avoids blocking the caller and trading per-connection data between
122 * caches per message.
123 *
124 * The sem holder will issue a retry if they notice that someone queued
125 * a message after they stopped walking the send queue but before they
126 * dropped the sem.
127 */
128 if (!mutex_trylock(&conn->c_send_lock)) {
129 rds_stats_inc(s_send_sem_contention);
130 ret = -ENOMEM;
131 goto out;
132 }
133
134 if (conn->c_trans->xmit_prepare)
135 conn->c_trans->xmit_prepare(conn);
136
137 /*
138 * spin trying to push headers and data down the connection until
139 * the connection doens't make forward progress.
140 */
141 while (--send_quota) {
142 /*
143 * See if need to send a congestion map update if we're
144 * between sending messages. The send_sem protects our sole
145 * use of c_map_offset and _bytes.
146 * Note this is used only by transports that define a special
147 * xmit_cong_map function. For all others, we create allocate
148 * a cong_map message and treat it just like any other send.
149 */
150 if (conn->c_map_bytes) {
151 ret = conn->c_trans->xmit_cong_map(conn, conn->c_lcong,
152 conn->c_map_offset);
153 if (ret <= 0)
154 break;
155
156 conn->c_map_offset += ret;
157 conn->c_map_bytes -= ret;
158 if (conn->c_map_bytes)
159 continue;
160 }
161
162 /* If we're done sending the current message, clear the
163 * offset and S/G temporaries.
164 */
165 rm = conn->c_xmit_rm;
166 if (rm != NULL &&
167 conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
168 conn->c_xmit_sg == rm->m_nents) {
169 conn->c_xmit_rm = NULL;
170 conn->c_xmit_sg = 0;
171 conn->c_xmit_hdr_off = 0;
172 conn->c_xmit_data_off = 0;
173 conn->c_xmit_rdma_sent = 0;
174
175 /* Release the reference to the previous message. */
176 rds_message_put(rm);
177 rm = NULL;
178 }
179
180 /* If we're asked to send a cong map update, do so.
181 */
182 if (rm == NULL && test_and_clear_bit(0, &conn->c_map_queued)) {
183 if (conn->c_trans->xmit_cong_map != NULL) {
184 conn->c_map_offset = 0;
185 conn->c_map_bytes = sizeof(struct rds_header) +
186 RDS_CONG_MAP_BYTES;
187 continue;
188 }
189
190 rm = rds_cong_update_alloc(conn);
191 if (IS_ERR(rm)) {
192 ret = PTR_ERR(rm);
193 break;
194 }
195
196 conn->c_xmit_rm = rm;
197 }
198
199 /*
200 * Grab the next message from the send queue, if there is one.
201 *
202 * c_xmit_rm holds a ref while we're sending this message down
203 * the connction. We can use this ref while holding the
204 * send_sem.. rds_send_reset() is serialized with it.
205 */
206 if (rm == NULL) {
207 unsigned int len;
208
209 spin_lock_irqsave(&conn->c_lock, flags);
210
211 if (!list_empty(&conn->c_send_queue)) {
212 rm = list_entry(conn->c_send_queue.next,
213 struct rds_message,
214 m_conn_item);
215 rds_message_addref(rm);
216
217 /*
218 * Move the message from the send queue to the retransmit
219 * list right away.
220 */
221 list_move_tail(&rm->m_conn_item, &conn->c_retrans);
222 }
223
224 spin_unlock_irqrestore(&conn->c_lock, flags);
225
226 if (rm == NULL) {
227 was_empty = 1;
228 break;
229 }
230
231 /* Unfortunately, the way Infiniband deals with
232 * RDMA to a bad MR key is by moving the entire
233 * queue pair to error state. We cold possibly
234 * recover from that, but right now we drop the
235 * connection.
236 * Therefore, we never retransmit messages with RDMA ops.
237 */
238 if (rm->m_rdma_op
239 && test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
240 spin_lock_irqsave(&conn->c_lock, flags);
241 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
242 list_move(&rm->m_conn_item, &to_be_dropped);
243 spin_unlock_irqrestore(&conn->c_lock, flags);
244 rds_message_put(rm);
245 continue;
246 }
247
248 /* Require an ACK every once in a while */
249 len = ntohl(rm->m_inc.i_hdr.h_len);
250 if (conn->c_unacked_packets == 0
251 || conn->c_unacked_bytes < len) {
252 __set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
253
254 conn->c_unacked_packets = rds_sysctl_max_unacked_packets;
255 conn->c_unacked_bytes = rds_sysctl_max_unacked_bytes;
256 rds_stats_inc(s_send_ack_required);
257 } else {
258 conn->c_unacked_bytes -= len;
259 conn->c_unacked_packets--;
260 }
261
262 conn->c_xmit_rm = rm;
263 }
264
265 /*
266 * Try and send an rdma message. Let's see if we can
267 * keep this simple and require that the transport either
268 * send the whole rdma or none of it.
269 */
270 if (rm->m_rdma_op && !conn->c_xmit_rdma_sent) {
271 ret = conn->c_trans->xmit_rdma(conn, rm->m_rdma_op);
272 if (ret)
273 break;
274 conn->c_xmit_rdma_sent = 1;
275 /* The transport owns the mapped memory for now.
276 * You can't unmap it while it's on the send queue */
277 set_bit(RDS_MSG_MAPPED, &rm->m_flags);
278 }
279
280 if (conn->c_xmit_hdr_off < sizeof(struct rds_header) ||
281 conn->c_xmit_sg < rm->m_nents) {
282 ret = conn->c_trans->xmit(conn, rm,
283 conn->c_xmit_hdr_off,
284 conn->c_xmit_sg,
285 conn->c_xmit_data_off);
286 if (ret <= 0)
287 break;
288
289 if (conn->c_xmit_hdr_off < sizeof(struct rds_header)) {
290 tmp = min_t(int, ret,
291 sizeof(struct rds_header) -
292 conn->c_xmit_hdr_off);
293 conn->c_xmit_hdr_off += tmp;
294 ret -= tmp;
295 }
296
297 sg = &rm->m_sg[conn->c_xmit_sg];
298 while (ret) {
299 tmp = min_t(int, ret, sg->length -
300 conn->c_xmit_data_off);
301 conn->c_xmit_data_off += tmp;
302 ret -= tmp;
303 if (conn->c_xmit_data_off == sg->length) {
304 conn->c_xmit_data_off = 0;
305 sg++;
306 conn->c_xmit_sg++;
307 BUG_ON(ret != 0 &&
308 conn->c_xmit_sg == rm->m_nents);
309 }
310 }
311 }
312 }
313
314 /* Nuke any messages we decided not to retransmit. */
315 if (!list_empty(&to_be_dropped))
316 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED);
317
318 if (conn->c_trans->xmit_complete)
319 conn->c_trans->xmit_complete(conn);
320
321 /*
322 * We might be racing with another sender who queued a message but
323 * backed off on noticing that we held the c_send_lock. If we check
324 * for queued messages after dropping the sem then either we'll
325 * see the queued message or the queuer will get the sem. If we
326 * notice the queued message then we trigger an immediate retry.
327 *
328 * We need to be careful only to do this when we stopped processing
329 * the send queue because it was empty. It's the only way we
330 * stop processing the loop when the transport hasn't taken
331 * responsibility for forward progress.
332 */
333 mutex_unlock(&conn->c_send_lock);
334
335 if (conn->c_map_bytes || (send_quota == 0 && !was_empty)) {
336 /* We exhausted the send quota, but there's work left to
337 * do. Return and (re-)schedule the send worker.
338 */
339 ret = -EAGAIN;
340 }
341
342 if (ret == 0 && was_empty) {
343 /* A simple bit test would be way faster than taking the
344 * spin lock */
345 spin_lock_irqsave(&conn->c_lock, flags);
346 if (!list_empty(&conn->c_send_queue)) {
347 rds_stats_inc(s_send_sem_queue_raced);
348 ret = -EAGAIN;
349 }
350 spin_unlock_irqrestore(&conn->c_lock, flags);
351 }
352out:
353 return ret;
354}
355
356static void rds_send_sndbuf_remove(struct rds_sock *rs, struct rds_message *rm)
357{
358 u32 len = be32_to_cpu(rm->m_inc.i_hdr.h_len);
359
360 assert_spin_locked(&rs->rs_lock);
361
362 BUG_ON(rs->rs_snd_bytes < len);
363 rs->rs_snd_bytes -= len;
364
365 if (rs->rs_snd_bytes == 0)
366 rds_stats_inc(s_send_queue_empty);
367}
368
369static inline int rds_send_is_acked(struct rds_message *rm, u64 ack,
370 is_acked_func is_acked)
371{
372 if (is_acked)
373 return is_acked(rm, ack);
374 return be64_to_cpu(rm->m_inc.i_hdr.h_sequence) <= ack;
375}
376
377/*
378 * Returns true if there are no messages on the send and retransmit queues
379 * which have a sequence number greater than or equal to the given sequence
380 * number.
381 */
382int rds_send_acked_before(struct rds_connection *conn, u64 seq)
383{
384 struct rds_message *rm, *tmp;
385 int ret = 1;
386
387 spin_lock(&conn->c_lock);
388
389 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
390 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq)
391 ret = 0;
392 break;
393 }
394
395 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
396 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq)
397 ret = 0;
398 break;
399 }
400
401 spin_unlock(&conn->c_lock);
402
403 return ret;
404}
405
406/*
407 * This is pretty similar to what happens below in the ACK
408 * handling code - except that we call here as soon as we get
409 * the IB send completion on the RDMA op and the accompanying
410 * message.
411 */
412void rds_rdma_send_complete(struct rds_message *rm, int status)
413{
414 struct rds_sock *rs = NULL;
415 struct rds_rdma_op *ro;
416 struct rds_notifier *notifier;
417
418 spin_lock(&rm->m_rs_lock);
419
420 ro = rm->m_rdma_op;
421 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)
422 && ro && ro->r_notify && ro->r_notifier) {
423 notifier = ro->r_notifier;
424 rs = rm->m_rs;
425 sock_hold(rds_rs_to_sk(rs));
426
427 notifier->n_status = status;
428 spin_lock(&rs->rs_lock);
429 list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
430 spin_unlock(&rs->rs_lock);
431
432 ro->r_notifier = NULL;
433 }
434
435 spin_unlock(&rm->m_rs_lock);
436
437 if (rs) {
438 rds_wake_sk_sleep(rs);
439 sock_put(rds_rs_to_sk(rs));
440 }
441}
442
443/*
444 * This is the same as rds_rdma_send_complete except we
445 * don't do any locking - we have all the ingredients (message,
446 * socket, socket lock) and can just move the notifier.
447 */
448static inline void
449__rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status)
450{
451 struct rds_rdma_op *ro;
452
453 ro = rm->m_rdma_op;
454 if (ro && ro->r_notify && ro->r_notifier) {
455 ro->r_notifier->n_status = status;
456 list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue);
457 ro->r_notifier = NULL;
458 }
459
460 /* No need to wake the app - caller does this */
461}
462
463/*
464 * This is called from the IB send completion when we detect
465 * a RDMA operation that failed with remote access error.
466 * So speed is not an issue here.
467 */
468struct rds_message *rds_send_get_message(struct rds_connection *conn,
469 struct rds_rdma_op *op)
470{
471 struct rds_message *rm, *tmp, *found = NULL;
472 unsigned long flags;
473
474 spin_lock_irqsave(&conn->c_lock, flags);
475
476 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
477 if (rm->m_rdma_op == op) {
478 atomic_inc(&rm->m_refcount);
479 found = rm;
480 goto out;
481 }
482 }
483
484 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
485 if (rm->m_rdma_op == op) {
486 atomic_inc(&rm->m_refcount);
487 found = rm;
488 break;
489 }
490 }
491
492out:
493 spin_unlock_irqrestore(&conn->c_lock, flags);
494
495 return found;
496}
497
498/*
499 * This removes messages from the socket's list if they're on it. The list
500 * argument must be private to the caller, we must be able to modify it
501 * without locks. The messages must have a reference held for their
502 * position on the list. This function will drop that reference after
503 * removing the messages from the 'messages' list regardless of if it found
504 * the messages on the socket list or not.
505 */
506void rds_send_remove_from_sock(struct list_head *messages, int status)
507{
508 unsigned long flags = 0; /* silence gcc :P */
509 struct rds_sock *rs = NULL;
510 struct rds_message *rm;
511
512 local_irq_save(flags);
513 while (!list_empty(messages)) {
514 rm = list_entry(messages->next, struct rds_message,
515 m_conn_item);
516 list_del_init(&rm->m_conn_item);
517
518 /*
519 * If we see this flag cleared then we're *sure* that someone
520 * else beat us to removing it from the sock. If we race
521 * with their flag update we'll get the lock and then really
522 * see that the flag has been cleared.
523 *
524 * The message spinlock makes sure nobody clears rm->m_rs
525 * while we're messing with it. It does not prevent the
526 * message from being removed from the socket, though.
527 */
528 spin_lock(&rm->m_rs_lock);
529 if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags))
530 goto unlock_and_drop;
531
532 if (rs != rm->m_rs) {
533 if (rs) {
534 spin_unlock(&rs->rs_lock);
535 rds_wake_sk_sleep(rs);
536 sock_put(rds_rs_to_sk(rs));
537 }
538 rs = rm->m_rs;
539 spin_lock(&rs->rs_lock);
540 sock_hold(rds_rs_to_sk(rs));
541 }
542
543 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
544 struct rds_rdma_op *ro = rm->m_rdma_op;
545 struct rds_notifier *notifier;
546
547 list_del_init(&rm->m_sock_item);
548 rds_send_sndbuf_remove(rs, rm);
549
550 if (ro && ro->r_notifier
551 && (status || ro->r_notify)) {
552 notifier = ro->r_notifier;
553 list_add_tail(&notifier->n_list,
554 &rs->rs_notify_queue);
555 if (!notifier->n_status)
556 notifier->n_status = status;
557 rm->m_rdma_op->r_notifier = NULL;
558 }
559 rds_message_put(rm);
560 rm->m_rs = NULL;
561 }
562
563unlock_and_drop:
564 spin_unlock(&rm->m_rs_lock);
565 rds_message_put(rm);
566 }
567
568 if (rs) {
569 spin_unlock(&rs->rs_lock);
570 rds_wake_sk_sleep(rs);
571 sock_put(rds_rs_to_sk(rs));
572 }
573 local_irq_restore(flags);
574}
575
576/*
577 * Transports call here when they've determined that the receiver queued
578 * messages up to, and including, the given sequence number. Messages are
579 * moved to the retrans queue when rds_send_xmit picks them off the send
580 * queue. This means that in the TCP case, the message may not have been
581 * assigned the m_ack_seq yet - but that's fine as long as tcp_is_acked
582 * checks the RDS_MSG_HAS_ACK_SEQ bit.
583 *
584 * XXX It's not clear to me how this is safely serialized with socket
585 * destruction. Maybe it should bail if it sees SOCK_DEAD.
586 */
587void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
588 is_acked_func is_acked)
589{
590 struct rds_message *rm, *tmp;
591 unsigned long flags;
592 LIST_HEAD(list);
593
594 spin_lock_irqsave(&conn->c_lock, flags);
595
596 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
597 if (!rds_send_is_acked(rm, ack, is_acked))
598 break;
599
600 list_move(&rm->m_conn_item, &list);
601 clear_bit(RDS_MSG_ON_CONN, &rm->m_flags);
602 }
603
604 /* order flag updates with spin locks */
605 if (!list_empty(&list))
606 smp_mb__after_clear_bit();
607
608 spin_unlock_irqrestore(&conn->c_lock, flags);
609
610 /* now remove the messages from the sock list as needed */
611 rds_send_remove_from_sock(&list, RDS_RDMA_SUCCESS);
612}
613
614void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
615{
616 struct rds_message *rm, *tmp;
617 struct rds_connection *conn;
618 unsigned long flags;
619 LIST_HEAD(list);
620 int wake = 0;
621
622 /* get all the messages we're dropping under the rs lock */
623 spin_lock_irqsave(&rs->rs_lock, flags);
624
625 list_for_each_entry_safe(rm, tmp, &rs->rs_send_queue, m_sock_item) {
626 if (dest && (dest->sin_addr.s_addr != rm->m_daddr ||
627 dest->sin_port != rm->m_inc.i_hdr.h_dport))
628 continue;
629
630 wake = 1;
631 list_move(&rm->m_sock_item, &list);
632 rds_send_sndbuf_remove(rs, rm);
633 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
634
635 /* If this is a RDMA operation, notify the app. */
636 __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
637 }
638
639 /* order flag updates with the rs lock */
640 if (wake)
641 smp_mb__after_clear_bit();
642
643 spin_unlock_irqrestore(&rs->rs_lock, flags);
644
645 if (wake)
646 rds_wake_sk_sleep(rs);
647
648 conn = NULL;
649
650 /* now remove the messages from the conn list as needed */
651 list_for_each_entry(rm, &list, m_sock_item) {
652 /* We do this here rather than in the loop above, so that
653 * we don't have to nest m_rs_lock under rs->rs_lock */
654 spin_lock(&rm->m_rs_lock);
655 rm->m_rs = NULL;
656 spin_unlock(&rm->m_rs_lock);
657
658 /*
659 * If we see this flag cleared then we're *sure* that someone
660 * else beat us to removing it from the conn. If we race
661 * with their flag update we'll get the lock and then really
662 * see that the flag has been cleared.
663 */
664 if (!test_bit(RDS_MSG_ON_CONN, &rm->m_flags))
665 continue;
666
667 if (conn != rm->m_inc.i_conn) {
668 if (conn)
669 spin_unlock_irqrestore(&conn->c_lock, flags);
670 conn = rm->m_inc.i_conn;
671 spin_lock_irqsave(&conn->c_lock, flags);
672 }
673
674 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
675 list_del_init(&rm->m_conn_item);
676 rds_message_put(rm);
677 }
678 }
679
680 if (conn)
681 spin_unlock_irqrestore(&conn->c_lock, flags);
682
683 while (!list_empty(&list)) {
684 rm = list_entry(list.next, struct rds_message, m_sock_item);
685 list_del_init(&rm->m_sock_item);
686
687 rds_message_wait(rm);
688 rds_message_put(rm);
689 }
690}
691
692/*
693 * we only want this to fire once so we use the callers 'queued'. It's
694 * possible that another thread can race with us and remove the
695 * message from the flow with RDS_CANCEL_SENT_TO.
696 */
697static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn,
698 struct rds_message *rm, __be16 sport,
699 __be16 dport, int *queued)
700{
701 unsigned long flags;
702 u32 len;
703
704 if (*queued)
705 goto out;
706
707 len = be32_to_cpu(rm->m_inc.i_hdr.h_len);
708
709 /* this is the only place which holds both the socket's rs_lock
710 * and the connection's c_lock */
711 spin_lock_irqsave(&rs->rs_lock, flags);
712
713 /*
714 * If there is a little space in sndbuf, we don't queue anything,
715 * and userspace gets -EAGAIN. But poll() indicates there's send
716 * room. This can lead to bad behavior (spinning) if snd_bytes isn't
717 * freed up by incoming acks. So we check the *old* value of
718 * rs_snd_bytes here to allow the last msg to exceed the buffer,
719 * and poll() now knows no more data can be sent.
720 */
721 if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) {
722 rs->rs_snd_bytes += len;
723
724 /* let recv side know we are close to send space exhaustion.
725 * This is probably not the optimal way to do it, as this
726 * means we set the flag on *all* messages as soon as our
727 * throughput hits a certain threshold.
728 */
729 if (rs->rs_snd_bytes >= rds_sk_sndbuf(rs) / 2)
730 __set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags);
731
732 list_add_tail(&rm->m_sock_item, &rs->rs_send_queue);
733 set_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
734 rds_message_addref(rm);
735 rm->m_rs = rs;
736
737 /* The code ordering is a little weird, but we're
738 trying to minimize the time we hold c_lock */
739 rds_message_populate_header(&rm->m_inc.i_hdr, sport, dport, 0);
740 rm->m_inc.i_conn = conn;
741 rds_message_addref(rm);
742
743 spin_lock(&conn->c_lock);
744 rm->m_inc.i_hdr.h_sequence = cpu_to_be64(conn->c_next_tx_seq++);
745 list_add_tail(&rm->m_conn_item, &conn->c_send_queue);
746 set_bit(RDS_MSG_ON_CONN, &rm->m_flags);
747 spin_unlock(&conn->c_lock);
748
749 rdsdebug("queued msg %p len %d, rs %p bytes %d seq %llu\n",
750 rm, len, rs, rs->rs_snd_bytes,
751 (unsigned long long)be64_to_cpu(rm->m_inc.i_hdr.h_sequence));
752
753 *queued = 1;
754 }
755
756 spin_unlock_irqrestore(&rs->rs_lock, flags);
757out:
758 return *queued;
759}
760
761static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
762 struct msghdr *msg, int *allocated_mr)
763{
764 struct cmsghdr *cmsg;
765 int ret = 0;
766
767 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
768 if (!CMSG_OK(msg, cmsg))
769 return -EINVAL;
770
771 if (cmsg->cmsg_level != SOL_RDS)
772 continue;
773
774 /* As a side effect, RDMA_DEST and RDMA_MAP will set
775 * rm->m_rdma_cookie and rm->m_rdma_mr.
776 */
777 switch (cmsg->cmsg_type) {
778 case RDS_CMSG_RDMA_ARGS:
779 ret = rds_cmsg_rdma_args(rs, rm, cmsg);
780 break;
781
782 case RDS_CMSG_RDMA_DEST:
783 ret = rds_cmsg_rdma_dest(rs, rm, cmsg);
784 break;
785
786 case RDS_CMSG_RDMA_MAP:
787 ret = rds_cmsg_rdma_map(rs, rm, cmsg);
788 if (!ret)
789 *allocated_mr = 1;
790 break;
791
792 default:
793 return -EINVAL;
794 }
795
796 if (ret)
797 break;
798 }
799
800 return ret;
801}
802
803int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
804 size_t payload_len)
805{
806 struct sock *sk = sock->sk;
807 struct rds_sock *rs = rds_sk_to_rs(sk);
808 struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name;
809 __be32 daddr;
810 __be16 dport;
811 struct rds_message *rm = NULL;
812 struct rds_connection *conn;
813 int ret = 0;
814 int queued = 0, allocated_mr = 0;
815 int nonblock = msg->msg_flags & MSG_DONTWAIT;
816 long timeo = sock_rcvtimeo(sk, nonblock);
817
818 /* Mirror Linux UDP mirror of BSD error message compatibility */
819 /* XXX: Perhaps MSG_MORE someday */
820 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
821 printk(KERN_INFO "msg_flags 0x%08X\n", msg->msg_flags);
822 ret = -EOPNOTSUPP;
823 goto out;
824 }
825
826 if (msg->msg_namelen) {
827 /* XXX fail non-unicast destination IPs? */
828 if (msg->msg_namelen < sizeof(*usin) || usin->sin_family != AF_INET) {
829 ret = -EINVAL;
830 goto out;
831 }
832 daddr = usin->sin_addr.s_addr;
833 dport = usin->sin_port;
834 } else {
835 /* We only care about consistency with ->connect() */
836 lock_sock(sk);
837 daddr = rs->rs_conn_addr;
838 dport = rs->rs_conn_port;
839 release_sock(sk);
840 }
841
842 /* racing with another thread binding seems ok here */
843 if (daddr == 0 || rs->rs_bound_addr == 0) {
844 ret = -ENOTCONN; /* XXX not a great errno */
845 goto out;
846 }
847
848 rm = rds_message_copy_from_user(msg->msg_iov, payload_len);
849 if (IS_ERR(rm)) {
850 ret = PTR_ERR(rm);
851 rm = NULL;
852 goto out;
853 }
854
855 rm->m_daddr = daddr;
856
857 /* Parse any control messages the user may have included. */
858 ret = rds_cmsg_send(rs, rm, msg, &allocated_mr);
859 if (ret)
860 goto out;
861
862 /* rds_conn_create has a spinlock that runs with IRQ off.
863 * Caching the conn in the socket helps a lot. */
864 if (rs->rs_conn && rs->rs_conn->c_faddr == daddr)
865 conn = rs->rs_conn;
866 else {
867 conn = rds_conn_create_outgoing(rs->rs_bound_addr, daddr,
868 rs->rs_transport,
869 sock->sk->sk_allocation);
870 if (IS_ERR(conn)) {
871 ret = PTR_ERR(conn);
872 goto out;
873 }
874 rs->rs_conn = conn;
875 }
876
877 if ((rm->m_rdma_cookie || rm->m_rdma_op)
878 && conn->c_trans->xmit_rdma == NULL) {
879 if (printk_ratelimit())
880 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
881 rm->m_rdma_op, conn->c_trans->xmit_rdma);
882 ret = -EOPNOTSUPP;
883 goto out;
884 }
885
886 /* If the connection is down, trigger a connect. We may
887 * have scheduled a delayed reconnect however - in this case
888 * we should not interfere.
889 */
890 if (rds_conn_state(conn) == RDS_CONN_DOWN
891 && !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
892 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
893
894 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
895 if (ret)
896 goto out;
897
898 while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port,
899 dport, &queued)) {
900 rds_stats_inc(s_send_queue_full);
901 /* XXX make sure this is reasonable */
902 if (payload_len > rds_sk_sndbuf(rs)) {
903 ret = -EMSGSIZE;
904 goto out;
905 }
906 if (nonblock) {
907 ret = -EAGAIN;
908 goto out;
909 }
910
911 timeo = wait_event_interruptible_timeout(*sk->sk_sleep,
912 rds_send_queue_rm(rs, conn, rm,
913 rs->rs_bound_port,
914 dport,
915 &queued),
916 timeo);
917 rdsdebug("sendmsg woke queued %d timeo %ld\n", queued, timeo);
918 if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT)
919 continue;
920
921 ret = timeo;
922 if (ret == 0)
923 ret = -ETIMEDOUT;
924 goto out;
925 }
926
927 /*
928 * By now we've committed to the send. We reuse rds_send_worker()
929 * to retry sends in the rds thread if the transport asks us to.
930 */
931 rds_stats_inc(s_send_queued);
932
933 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
934 rds_send_worker(&conn->c_send_w.work);
935
936 rds_message_put(rm);
937 return payload_len;
938
939out:
940 /* If the user included a RDMA_MAP cmsg, we allocated a MR on the fly.
941 * If the sendmsg goes through, we keep the MR. If it fails with EAGAIN
942 * or in any other way, we need to destroy the MR again */
943 if (allocated_mr)
944 rds_rdma_unuse(rs, rds_rdma_cookie_key(rm->m_rdma_cookie), 1);
945
946 if (rm)
947 rds_message_put(rm);
948 return ret;
949}
950
951/*
952 * Reply to a ping packet.
953 */
954int
955rds_send_pong(struct rds_connection *conn, __be16 dport)
956{
957 struct rds_message *rm;
958 unsigned long flags;
959 int ret = 0;
960
961 rm = rds_message_alloc(0, GFP_ATOMIC);
962 if (rm == NULL) {
963 ret = -ENOMEM;
964 goto out;
965 }
966
967 rm->m_daddr = conn->c_faddr;
968
969 /* If the connection is down, trigger a connect. We may
970 * have scheduled a delayed reconnect however - in this case
971 * we should not interfere.
972 */
973 if (rds_conn_state(conn) == RDS_CONN_DOWN
974 && !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
975 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
976
977 ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL);
978 if (ret)
979 goto out;
980
981 spin_lock_irqsave(&conn->c_lock, flags);
982 list_add_tail(&rm->m_conn_item, &conn->c_send_queue);
983 set_bit(RDS_MSG_ON_CONN, &rm->m_flags);
984 rds_message_addref(rm);
985 rm->m_inc.i_conn = conn;
986
987 rds_message_populate_header(&rm->m_inc.i_hdr, 0, dport,
988 conn->c_next_tx_seq);
989 conn->c_next_tx_seq++;
990 spin_unlock_irqrestore(&conn->c_lock, flags);
991
992 rds_stats_inc(s_send_queued);
993 rds_stats_inc(s_send_pong);
994
995 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
996 rds_message_put(rm);
997 return 0;
998
999out:
1000 if (rm)
1001 rds_message_put(rm);
1002 return ret;
1003}
diff --git a/net/rds/stats.c b/net/rds/stats.c
new file mode 100644
index 000000000000..637146893cf3
--- /dev/null
+++ b/net/rds/stats.c
@@ -0,0 +1,148 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/percpu.h>
34#include <linux/seq_file.h>
35#include <linux/proc_fs.h>
36
37#include "rds.h"
38
39DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
40
41/* :.,$s/unsigned long\>.*\<s_\(.*\);/"\1",/g */
42
43static char *rds_stat_names[] = {
44 "conn_reset",
45 "recv_drop_bad_checksum",
46 "recv_drop_old_seq",
47 "recv_drop_no_sock",
48 "recv_drop_dead_sock",
49 "recv_deliver_raced",
50 "recv_delivered",
51 "recv_queued",
52 "recv_immediate_retry",
53 "recv_delayed_retry",
54 "recv_ack_required",
55 "recv_rdma_bytes",
56 "recv_ping",
57 "send_queue_empty",
58 "send_queue_full",
59 "send_sem_contention",
60 "send_sem_queue_raced",
61 "send_immediate_retry",
62 "send_delayed_retry",
63 "send_drop_acked",
64 "send_ack_required",
65 "send_queued",
66 "send_rdma",
67 "send_rdma_bytes",
68 "send_pong",
69 "page_remainder_hit",
70 "page_remainder_miss",
71 "copy_to_user",
72 "copy_from_user",
73 "cong_update_queued",
74 "cong_update_received",
75 "cong_send_error",
76 "cong_send_blocked",
77};
78
79void rds_stats_info_copy(struct rds_info_iterator *iter,
80 uint64_t *values, char **names, size_t nr)
81{
82 struct rds_info_counter ctr;
83 size_t i;
84
85 for (i = 0; i < nr; i++) {
86 BUG_ON(strlen(names[i]) >= sizeof(ctr.name));
87 strncpy(ctr.name, names[i], sizeof(ctr.name) - 1);
88 ctr.value = values[i];
89
90 rds_info_copy(iter, &ctr, sizeof(ctr));
91 }
92}
93
94/*
95 * This gives global counters across all the transports. The strings
96 * are copied in so that the tool doesn't need knowledge of the specific
97 * stats that we're exporting. Some are pretty implementation dependent
98 * and may change over time. That doesn't stop them from being useful.
99 *
100 * This is the only function in the chain that knows about the byte granular
101 * length in userspace. It converts it to number of stat entries that the
102 * rest of the functions operate in.
103 */
104static void rds_stats_info(struct socket *sock, unsigned int len,
105 struct rds_info_iterator *iter,
106 struct rds_info_lengths *lens)
107{
108 struct rds_statistics stats = {0, };
109 uint64_t *src;
110 uint64_t *sum;
111 size_t i;
112 int cpu;
113 unsigned int avail;
114
115 avail = len / sizeof(struct rds_info_counter);
116
117 if (avail < ARRAY_SIZE(rds_stat_names)) {
118 avail = 0;
119 goto trans;
120 }
121
122 for_each_online_cpu(cpu) {
123 src = (uint64_t *)&(per_cpu(rds_stats, cpu));
124 sum = (uint64_t *)&stats;
125 for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++)
126 *(sum++) += *(src++);
127 }
128
129 rds_stats_info_copy(iter, (uint64_t *)&stats, rds_stat_names,
130 ARRAY_SIZE(rds_stat_names));
131 avail -= ARRAY_SIZE(rds_stat_names);
132
133trans:
134 lens->each = sizeof(struct rds_info_counter);
135 lens->nr = rds_trans_stats_info_copy(iter, avail) +
136 ARRAY_SIZE(rds_stat_names);
137}
138
139void rds_stats_exit(void)
140{
141 rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info);
142}
143
144int __init rds_stats_init(void)
145{
146 rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info);
147 return 0;
148}
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
new file mode 100644
index 000000000000..307dc5c1be15
--- /dev/null
+++ b/net/rds/sysctl.c
@@ -0,0 +1,122 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/sysctl.h>
35#include <linux/proc_fs.h>
36
37#include "rds.h"
38
39static struct ctl_table_header *rds_sysctl_reg_table;
40
41static unsigned long rds_sysctl_reconnect_min = 1;
42static unsigned long rds_sysctl_reconnect_max = ~0UL;
43
44unsigned long rds_sysctl_reconnect_min_jiffies;
45unsigned long rds_sysctl_reconnect_max_jiffies = HZ;
46
47unsigned int rds_sysctl_max_unacked_packets = 8;
48unsigned int rds_sysctl_max_unacked_bytes = (16 << 20);
49
50unsigned int rds_sysctl_ping_enable = 1;
51
52static ctl_table rds_sysctl_rds_table[] = {
53 {
54 .ctl_name = CTL_UNNUMBERED,
55 .procname = "reconnect_min_delay_ms",
56 .data = &rds_sysctl_reconnect_min_jiffies,
57 .maxlen = sizeof(unsigned long),
58 .mode = 0644,
59 .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
60 .extra1 = &rds_sysctl_reconnect_min,
61 .extra2 = &rds_sysctl_reconnect_max_jiffies,
62 },
63 {
64 .ctl_name = CTL_UNNUMBERED,
65 .procname = "reconnect_max_delay_ms",
66 .data = &rds_sysctl_reconnect_max_jiffies,
67 .maxlen = sizeof(unsigned long),
68 .mode = 0644,
69 .proc_handler = &proc_doulongvec_ms_jiffies_minmax,
70 .extra1 = &rds_sysctl_reconnect_min_jiffies,
71 .extra2 = &rds_sysctl_reconnect_max,
72 },
73 {
74 .ctl_name = CTL_UNNUMBERED,
75 .procname = "max_unacked_packets",
76 .data = &rds_sysctl_max_unacked_packets,
77 .maxlen = sizeof(unsigned long),
78 .mode = 0644,
79 .proc_handler = &proc_dointvec,
80 },
81 {
82 .ctl_name = CTL_UNNUMBERED,
83 .procname = "max_unacked_bytes",
84 .data = &rds_sysctl_max_unacked_bytes,
85 .maxlen = sizeof(unsigned long),
86 .mode = 0644,
87 .proc_handler = &proc_dointvec,
88 },
89 {
90 .ctl_name = CTL_UNNUMBERED,
91 .procname = "ping_enable",
92 .data = &rds_sysctl_ping_enable,
93 .maxlen = sizeof(int),
94 .mode = 0644,
95 .proc_handler = &proc_dointvec,
96 },
97 { .ctl_name = 0}
98};
99
100static struct ctl_path rds_sysctl_path[] = {
101 { .procname = "net", .ctl_name = CTL_NET, },
102 { .procname = "rds", .ctl_name = CTL_UNNUMBERED, },
103 { }
104};
105
106
107void rds_sysctl_exit(void)
108{
109 if (rds_sysctl_reg_table)
110 unregister_sysctl_table(rds_sysctl_reg_table);
111}
112
113int __init rds_sysctl_init(void)
114{
115 rds_sysctl_reconnect_min = msecs_to_jiffies(1);
116 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min;
117
118 rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table);
119 if (rds_sysctl_reg_table == NULL)
120 return -ENOMEM;
121 return 0;
122}
diff --git a/net/rds/threads.c b/net/rds/threads.c
new file mode 100644
index 000000000000..828a1bf9ea92
--- /dev/null
+++ b/net/rds/threads.c
@@ -0,0 +1,265 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/random.h>
35
36#include "rds.h"
37
38/*
39 * All of connection management is simplified by serializing it through
40 * work queues that execute in a connection managing thread.
41 *
42 * TCP wants to send acks through sendpage() in response to data_ready(),
43 * but it needs a process context to do so.
44 *
45 * The receive paths need to allocate but can't drop packets (!) so we have
46 * a thread around to block allocating if the receive fast path sees an
47 * allocation failure.
48 */
49
50/* Grand Unified Theory of connection life cycle:
51 * At any point in time, the connection can be in one of these states:
52 * DOWN, CONNECTING, UP, DISCONNECTING, ERROR
53 *
54 * The following transitions are possible:
55 * ANY -> ERROR
56 * UP -> DISCONNECTING
57 * ERROR -> DISCONNECTING
58 * DISCONNECTING -> DOWN
59 * DOWN -> CONNECTING
60 * CONNECTING -> UP
61 *
62 * Transition to state DISCONNECTING/DOWN:
63 * - Inside the shutdown worker; synchronizes with xmit path
64 * through c_send_lock, and with connection management callbacks
65 * via c_cm_lock.
66 *
67 * For receive callbacks, we rely on the underlying transport
68 * (TCP, IB/RDMA) to provide the necessary synchronisation.
69 */
70struct workqueue_struct *rds_wq;
71
72void rds_connect_complete(struct rds_connection *conn)
73{
74 if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) {
75 printk(KERN_WARNING "%s: Cannot transition to state UP, "
76 "current state is %d\n",
77 __func__,
78 atomic_read(&conn->c_state));
79 atomic_set(&conn->c_state, RDS_CONN_ERROR);
80 queue_work(rds_wq, &conn->c_down_w);
81 return;
82 }
83
84 rdsdebug("conn %p for %pI4 to %pI4 complete\n",
85 conn, &conn->c_laddr, &conn->c_faddr);
86
87 conn->c_reconnect_jiffies = 0;
88 set_bit(0, &conn->c_map_queued);
89 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
90 queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
91}
92
93/*
94 * This random exponential backoff is relied on to eventually resolve racing
95 * connects.
96 *
97 * If connect attempts race then both parties drop both connections and come
98 * here to wait for a random amount of time before trying again. Eventually
99 * the backoff range will be so much greater than the time it takes to
100 * establish a connection that one of the pair will establish the connection
101 * before the other's random delay fires.
102 *
103 * Connection attempts that arrive while a connection is already established
104 * are also considered to be racing connects. This lets a connection from
105 * a rebooted machine replace an existing stale connection before the transport
106 * notices that the connection has failed.
107 *
108 * We should *always* start with a random backoff; otherwise a broken connection
109 * will always take several iterations to be re-established.
110 */
111static void rds_queue_reconnect(struct rds_connection *conn)
112{
113 unsigned long rand;
114
115 rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n",
116 conn, &conn->c_laddr, &conn->c_faddr,
117 conn->c_reconnect_jiffies);
118
119 set_bit(RDS_RECONNECT_PENDING, &conn->c_flags);
120 if (conn->c_reconnect_jiffies == 0) {
121 conn->c_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies;
122 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
123 return;
124 }
125
126 get_random_bytes(&rand, sizeof(rand));
127 rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",
128 rand % conn->c_reconnect_jiffies, conn->c_reconnect_jiffies,
129 conn, &conn->c_laddr, &conn->c_faddr);
130 queue_delayed_work(rds_wq, &conn->c_conn_w,
131 rand % conn->c_reconnect_jiffies);
132
133 conn->c_reconnect_jiffies = min(conn->c_reconnect_jiffies * 2,
134 rds_sysctl_reconnect_max_jiffies);
135}
136
137void rds_connect_worker(struct work_struct *work)
138{
139 struct rds_connection *conn = container_of(work, struct rds_connection, c_conn_w.work);
140 int ret;
141
142 clear_bit(RDS_RECONNECT_PENDING, &conn->c_flags);
143 if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
144 ret = conn->c_trans->conn_connect(conn);
145 rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n",
146 conn, &conn->c_laddr, &conn->c_faddr, ret);
147
148 if (ret) {
149 if (rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_DOWN))
150 rds_queue_reconnect(conn);
151 else
152 rds_conn_error(conn, "RDS: connect failed\n");
153 }
154 }
155}
156
157void rds_shutdown_worker(struct work_struct *work)
158{
159 struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
160
161 /* shut it down unless it's down already */
162 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
163 /*
164 * Quiesce the connection mgmt handlers before we start tearing
165 * things down. We don't hold the mutex for the entire
166 * duration of the shutdown operation, else we may be
167 * deadlocking with the CM handler. Instead, the CM event
168 * handler is supposed to check for state DISCONNECTING
169 */
170 mutex_lock(&conn->c_cm_lock);
171 if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
172 && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
173 rds_conn_error(conn, "shutdown called in state %d\n",
174 atomic_read(&conn->c_state));
175 mutex_unlock(&conn->c_cm_lock);
176 return;
177 }
178 mutex_unlock(&conn->c_cm_lock);
179
180 mutex_lock(&conn->c_send_lock);
181 conn->c_trans->conn_shutdown(conn);
182 rds_conn_reset(conn);
183 mutex_unlock(&conn->c_send_lock);
184
185 if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
186 /* This can happen - eg when we're in the middle of tearing
187 * down the connection, and someone unloads the rds module.
188 * Quite reproduceable with loopback connections.
189 * Mostly harmless.
190 */
191 rds_conn_error(conn,
192 "%s: failed to transition to state DOWN, "
193 "current state is %d\n",
194 __func__,
195 atomic_read(&conn->c_state));
196 return;
197 }
198 }
199
200 /* Then reconnect if it's still live.
201 * The passive side of an IB loopback connection is never added
202 * to the conn hash, so we never trigger a reconnect on this
203 * conn - the reconnect is always triggered by the active peer. */
204 cancel_delayed_work(&conn->c_conn_w);
205 if (!hlist_unhashed(&conn->c_hash_node))
206 rds_queue_reconnect(conn);
207}
208
209void rds_send_worker(struct work_struct *work)
210{
211 struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work);
212 int ret;
213
214 if (rds_conn_state(conn) == RDS_CONN_UP) {
215 ret = rds_send_xmit(conn);
216 rdsdebug("conn %p ret %d\n", conn, ret);
217 switch (ret) {
218 case -EAGAIN:
219 rds_stats_inc(s_send_immediate_retry);
220 queue_delayed_work(rds_wq, &conn->c_send_w, 0);
221 break;
222 case -ENOMEM:
223 rds_stats_inc(s_send_delayed_retry);
224 queue_delayed_work(rds_wq, &conn->c_send_w, 2);
225 default:
226 break;
227 }
228 }
229}
230
231void rds_recv_worker(struct work_struct *work)
232{
233 struct rds_connection *conn = container_of(work, struct rds_connection, c_recv_w.work);
234 int ret;
235
236 if (rds_conn_state(conn) == RDS_CONN_UP) {
237 ret = conn->c_trans->recv(conn);
238 rdsdebug("conn %p ret %d\n", conn, ret);
239 switch (ret) {
240 case -EAGAIN:
241 rds_stats_inc(s_recv_immediate_retry);
242 queue_delayed_work(rds_wq, &conn->c_recv_w, 0);
243 break;
244 case -ENOMEM:
245 rds_stats_inc(s_recv_delayed_retry);
246 queue_delayed_work(rds_wq, &conn->c_recv_w, 2);
247 default:
248 break;
249 }
250 }
251}
252
253void rds_threads_exit(void)
254{
255 destroy_workqueue(rds_wq);
256}
257
258int __init rds_threads_init(void)
259{
260 rds_wq = create_singlethread_workqueue("krdsd");
261 if (rds_wq == NULL)
262 return -ENOMEM;
263
264 return 0;
265}
diff --git a/net/rds/transport.c b/net/rds/transport.c
new file mode 100644
index 000000000000..767da61ad2f3
--- /dev/null
+++ b/net/rds/transport.c
@@ -0,0 +1,117 @@
1/*
2 * Copyright (c) 2006 Oracle. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33#include <linux/kernel.h>
34#include <linux/module.h>
35#include <linux/in.h>
36
37#include "rds.h"
38#include "loop.h"
39
40static LIST_HEAD(rds_transports);
41static DECLARE_RWSEM(rds_trans_sem);
42
43int rds_trans_register(struct rds_transport *trans)
44{
45 BUG_ON(strlen(trans->t_name) + 1 > TRANSNAMSIZ);
46
47 down_write(&rds_trans_sem);
48
49 list_add_tail(&trans->t_item, &rds_transports);
50 printk(KERN_INFO "Registered RDS/%s transport\n", trans->t_name);
51
52 up_write(&rds_trans_sem);
53
54 return 0;
55}
56
57void rds_trans_unregister(struct rds_transport *trans)
58{
59 down_write(&rds_trans_sem);
60
61 list_del_init(&trans->t_item);
62 printk(KERN_INFO "Unregistered RDS/%s transport\n", trans->t_name);
63
64 up_write(&rds_trans_sem);
65}
66
67struct rds_transport *rds_trans_get_preferred(__be32 addr)
68{
69 struct rds_transport *trans;
70 struct rds_transport *ret = NULL;
71
72 if (IN_LOOPBACK(ntohl(addr)))
73 return &rds_loop_transport;
74
75 down_read(&rds_trans_sem);
76 list_for_each_entry(trans, &rds_transports, t_item) {
77 if (trans->laddr_check(addr) == 0) {
78 ret = trans;
79 break;
80 }
81 }
82 up_read(&rds_trans_sem);
83
84 return ret;
85}
86
87/*
88 * This returns the number of stats entries in the snapshot and only
89 * copies them using the iter if there is enough space for them. The
90 * caller passes in the global stats so that we can size and copy while
91 * holding the lock.
92 */
93unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
94 unsigned int avail)
95
96{
97 struct rds_transport *trans;
98 unsigned int total = 0;
99 unsigned int part;
100
101 rds_info_iter_unmap(iter);
102 down_read(&rds_trans_sem);
103
104 list_for_each_entry(trans, &rds_transports, t_item) {
105 if (trans->stats_info_copy == NULL)
106 continue;
107
108 part = trans->stats_info_copy(iter, avail);
109 avail -= min(avail, part);
110 total += part;
111 }
112
113 up_read(&rds_trans_sem);
114
115 return total;
116}
117
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 01392649b462..0f36e8d59b29 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1124,6 +1124,10 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
1124 1124
1125 /* Build a packet */ 1125 /* Build a packet */
1126 SOCK_DEBUG(sk, "ROSE: sendto: building packet.\n"); 1126 SOCK_DEBUG(sk, "ROSE: sendto: building packet.\n");
1127 /* Sanity check the packet size */
1128 if (len > 65535)
1129 return -EMSGSIZE;
1130
1127 size = len + AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN; 1131 size = len + AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN;
1128 1132
1129 if ((skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, &err)) == NULL) 1133 if ((skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, &err)) == NULL)
@@ -1587,8 +1591,7 @@ static int __init rose_proto_init(void)
1587 char name[IFNAMSIZ]; 1591 char name[IFNAMSIZ];
1588 1592
1589 sprintf(name, "rose%d", i); 1593 sprintf(name, "rose%d", i);
1590 dev = alloc_netdev(sizeof(struct net_device_stats), 1594 dev = alloc_netdev(0, name, rose_setup);
1591 name, rose_setup);
1592 if (!dev) { 1595 if (!dev) {
1593 printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n"); 1596 printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n");
1594 rc = -ENOMEM; 1597 rc = -ENOMEM;
diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c
index 12cfcf09556b..7dcf2569613b 100644
--- a/net/rose/rose_dev.c
+++ b/net/rose/rose_dev.c
@@ -57,7 +57,7 @@ static int rose_rebuild_header(struct sk_buff *skb)
57{ 57{
58#ifdef CONFIG_INET 58#ifdef CONFIG_INET
59 struct net_device *dev = skb->dev; 59 struct net_device *dev = skb->dev;
60 struct net_device_stats *stats = netdev_priv(dev); 60 struct net_device_stats *stats = &dev->stats;
61 unsigned char *bp = (unsigned char *)skb->data; 61 unsigned char *bp = (unsigned char *)skb->data;
62 struct sk_buff *skbn; 62 struct sk_buff *skbn;
63 unsigned int len; 63 unsigned int len;
@@ -133,7 +133,7 @@ static int rose_close(struct net_device *dev)
133 133
134static int rose_xmit(struct sk_buff *skb, struct net_device *dev) 134static int rose_xmit(struct sk_buff *skb, struct net_device *dev)
135{ 135{
136 struct net_device_stats *stats = netdev_priv(dev); 136 struct net_device_stats *stats = &dev->stats;
137 137
138 if (!netif_running(dev)) { 138 if (!netif_running(dev)) {
139 printk(KERN_ERR "ROSE: rose_xmit - called when iface is down\n"); 139 printk(KERN_ERR "ROSE: rose_xmit - called when iface is down\n");
@@ -144,30 +144,28 @@ static int rose_xmit(struct sk_buff *skb, struct net_device *dev)
144 return 0; 144 return 0;
145} 145}
146 146
147static struct net_device_stats *rose_get_stats(struct net_device *dev)
148{
149 return netdev_priv(dev);
150}
151
152static const struct header_ops rose_header_ops = { 147static const struct header_ops rose_header_ops = {
153 .create = rose_header, 148 .create = rose_header,
154 .rebuild= rose_rebuild_header, 149 .rebuild= rose_rebuild_header,
155}; 150};
156 151
152static const struct net_device_ops rose_netdev_ops = {
153 .ndo_open = rose_open,
154 .ndo_stop = rose_close,
155 .ndo_start_xmit = rose_xmit,
156 .ndo_set_mac_address = rose_set_mac_address,
157};
158
157void rose_setup(struct net_device *dev) 159void rose_setup(struct net_device *dev)
158{ 160{
159 dev->mtu = ROSE_MAX_PACKET_SIZE - 2; 161 dev->mtu = ROSE_MAX_PACKET_SIZE - 2;
160 dev->hard_start_xmit = rose_xmit; 162 dev->netdev_ops = &rose_netdev_ops;
161 dev->open = rose_open;
162 dev->stop = rose_close;
163 163
164 dev->header_ops = &rose_header_ops; 164 dev->header_ops = &rose_header_ops;
165 dev->hard_header_len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN; 165 dev->hard_header_len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN;
166 dev->addr_len = ROSE_ADDR_LEN; 166 dev->addr_len = ROSE_ADDR_LEN;
167 dev->type = ARPHRD_ROSE; 167 dev->type = ARPHRD_ROSE;
168 dev->set_mac_address = rose_set_mac_address;
169 168
170 /* New-style flags. */ 169 /* New-style flags. */
171 dev->flags = IFF_NOARP; 170 dev->flags = IFF_NOARP;
172 dev->get_stats = rose_get_stats;
173} 171}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 0fc4a18fd96f..32009793307b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -444,6 +444,17 @@ out:
444} 444}
445EXPORT_SYMBOL(qdisc_calculate_pkt_len); 445EXPORT_SYMBOL(qdisc_calculate_pkt_len);
446 446
447void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
448{
449 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
450 printk(KERN_WARNING
451 "%s: %s qdisc %X: is non-work-conserving?\n",
452 txt, qdisc->ops->id, qdisc->handle >> 16);
453 qdisc->flags |= TCQ_F_WARN_NONWC;
454 }
455}
456EXPORT_SYMBOL(qdisc_warn_nonwc);
457
447static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) 458static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
448{ 459{
449 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, 460 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 9e43ed949167..d728d8111732 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1960,8 +1960,11 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1960 cbq_rmprio(q, cl); 1960 cbq_rmprio(q, cl);
1961 sch_tree_unlock(sch); 1961 sch_tree_unlock(sch);
1962 1962
1963 if (--cl->refcnt == 0) 1963 BUG_ON(--cl->refcnt == 0);
1964 cbq_destroy_class(sch, cl); 1964 /*
1965 * This shouldn't happen: we "hold" one cops->get() when called
1966 * from tc_ctl_tclass; the destroy method is done from cops->put().
1967 */
1965 1968
1966 return 0; 1969 return 0;
1967} 1970}
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index e36e94ab4e10..7597fe146866 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -155,8 +155,11 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
155 drr_purge_queue(cl); 155 drr_purge_queue(cl);
156 qdisc_class_hash_remove(&q->clhash, &cl->common); 156 qdisc_class_hash_remove(&q->clhash, &cl->common);
157 157
158 if (--cl->refcnt == 0) 158 BUG_ON(--cl->refcnt == 0);
159 drr_destroy_class(sch, cl); 159 /*
160 * This shouldn't happen: we "hold" one cops->get() when called
161 * from tc_ctl_tclass; the destroy method is done from cops->put().
162 */
160 163
161 sch_tree_unlock(sch); 164 sch_tree_unlock(sch);
162 return 0; 165 return 0;
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 45c31b1a4e1d..5022f9c1f34b 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -887,8 +887,7 @@ qdisc_peek_len(struct Qdisc *sch)
887 887
888 skb = sch->ops->peek(sch); 888 skb = sch->ops->peek(sch);
889 if (skb == NULL) { 889 if (skb == NULL) {
890 if (net_ratelimit()) 890 qdisc_warn_nonwc("qdisc_peek_len", sch);
891 printk("qdisc_peek_len: non work-conserving qdisc ?\n");
892 return 0; 891 return 0;
893 } 892 }
894 len = qdisc_pkt_len(skb); 893 len = qdisc_pkt_len(skb);
@@ -1140,8 +1139,11 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
1140 hfsc_purge_queue(sch, cl); 1139 hfsc_purge_queue(sch, cl);
1141 qdisc_class_hash_remove(&q->clhash, &cl->cl_common); 1140 qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
1142 1141
1143 if (--cl->refcnt == 0) 1142 BUG_ON(--cl->refcnt == 0);
1144 hfsc_destroy_class(sch, cl); 1143 /*
1144 * This shouldn't happen: we "hold" one cops->get() when called
1145 * from tc_ctl_tclass; the destroy method is done from cops->put().
1146 */
1145 1147
1146 sch_tree_unlock(sch); 1148 sch_tree_unlock(sch);
1147 return 0; 1149 return 0;
@@ -1642,8 +1644,7 @@ hfsc_dequeue(struct Qdisc *sch)
1642 1644
1643 skb = qdisc_dequeue_peeked(cl->qdisc); 1645 skb = qdisc_dequeue_peeked(cl->qdisc);
1644 if (skb == NULL) { 1646 if (skb == NULL) {
1645 if (net_ratelimit()) 1647 qdisc_warn_nonwc("HFSC", cl->qdisc);
1646 printk("HFSC: Non-work-conserving qdisc ?\n");
1647 return NULL; 1648 return NULL;
1648 } 1649 }
1649 1650
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 2f0f0b04d3fb..88cd02626621 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -35,6 +35,7 @@
35#include <linux/list.h> 35#include <linux/list.h>
36#include <linux/compiler.h> 36#include <linux/compiler.h>
37#include <linux/rbtree.h> 37#include <linux/rbtree.h>
38#include <linux/workqueue.h>
38#include <net/netlink.h> 39#include <net/netlink.h>
39#include <net/pkt_sched.h> 40#include <net/pkt_sched.h>
40 41
@@ -114,8 +115,6 @@ struct htb_class {
114 struct tcf_proto *filter_list; 115 struct tcf_proto *filter_list;
115 int filter_cnt; 116 int filter_cnt;
116 117
117 int warned; /* only one warning about non work conserving .. */
118
119 /* token bucket parameters */ 118 /* token bucket parameters */
120 struct qdisc_rate_table *rate; /* rate table of the class itself */ 119 struct qdisc_rate_table *rate; /* rate table of the class itself */
121 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ 120 struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */
@@ -155,6 +154,10 @@ struct htb_sched {
155 int direct_qlen; /* max qlen of above */ 154 int direct_qlen; /* max qlen of above */
156 155
157 long direct_pkts; 156 long direct_pkts;
157
158#define HTB_WARN_TOOMANYEVENTS 0x1
159 unsigned int warned; /* only one warning */
160 struct work_struct work;
158}; 161};
159 162
160/* find class in global hash table using given handle */ 163/* find class in global hash table using given handle */
@@ -658,7 +661,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl,
658 * htb_do_events - make mode changes to classes at the level 661 * htb_do_events - make mode changes to classes at the level
659 * 662 *
660 * Scans event queue for pending events and applies them. Returns time of 663 * Scans event queue for pending events and applies them. Returns time of
661 * next pending event (0 for no event in pq). 664 * next pending event (0 for no event in pq, q->now for too many events).
662 * Note: Applied are events whose have cl->pq_key <= q->now. 665 * Note: Applied are events whose have cl->pq_key <= q->now.
663 */ 666 */
664static psched_time_t htb_do_events(struct htb_sched *q, int level, 667static psched_time_t htb_do_events(struct htb_sched *q, int level,
@@ -686,8 +689,14 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level,
686 if (cl->cmode != HTB_CAN_SEND) 689 if (cl->cmode != HTB_CAN_SEND)
687 htb_add_to_wait_tree(q, cl, diff); 690 htb_add_to_wait_tree(q, cl, diff);
688 } 691 }
689 /* too much load - let's continue on next jiffie (including above) */ 692
690 return q->now + 2 * PSCHED_TICKS_PER_SEC / HZ; 693 /* too much load - let's continue after a break for scheduling */
694 if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) {
695 printk(KERN_WARNING "htb: too many events!\n");
696 q->warned |= HTB_WARN_TOOMANYEVENTS;
697 }
698
699 return q->now;
691} 700}
692 701
693/* Returns class->node+prio from id-tree where classe's id is >= id. NULL 702/* Returns class->node+prio from id-tree where classe's id is >= id. NULL
@@ -809,13 +818,8 @@ next:
809 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q); 818 skb = cl->un.leaf.q->dequeue(cl->un.leaf.q);
810 if (likely(skb != NULL)) 819 if (likely(skb != NULL))
811 break; 820 break;
812 if (!cl->warned) {
813 printk(KERN_WARNING
814 "htb: class %X isn't work conserving ?!\n",
815 cl->common.classid);
816 cl->warned = 1;
817 }
818 821
822 qdisc_warn_nonwc("htb", cl->un.leaf.q);
819 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> 823 htb_next_rb_node((level ? cl->parent->un.inner.ptr : q->
820 ptr[0]) + prio); 824 ptr[0]) + prio);
821 cl = htb_lookup_leaf(q->row[level] + prio, prio, 825 cl = htb_lookup_leaf(q->row[level] + prio, prio,
@@ -892,7 +896,10 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
892 } 896 }
893 } 897 }
894 sch->qstats.overlimits++; 898 sch->qstats.overlimits++;
895 qdisc_watchdog_schedule(&q->watchdog, next_event); 899 if (likely(next_event > q->now))
900 qdisc_watchdog_schedule(&q->watchdog, next_event);
901 else
902 schedule_work(&q->work);
896fin: 903fin:
897 return skb; 904 return skb;
898} 905}
@@ -962,6 +969,14 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = {
962 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, 969 [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
963}; 970};
964 971
972static void htb_work_func(struct work_struct *work)
973{
974 struct htb_sched *q = container_of(work, struct htb_sched, work);
975 struct Qdisc *sch = q->watchdog.qdisc;
976
977 __netif_schedule(qdisc_root(sch));
978}
979
965static int htb_init(struct Qdisc *sch, struct nlattr *opt) 980static int htb_init(struct Qdisc *sch, struct nlattr *opt)
966{ 981{
967 struct htb_sched *q = qdisc_priv(sch); 982 struct htb_sched *q = qdisc_priv(sch);
@@ -996,6 +1011,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
996 INIT_LIST_HEAD(q->drops + i); 1011 INIT_LIST_HEAD(q->drops + i);
997 1012
998 qdisc_watchdog_init(&q->watchdog, sch); 1013 qdisc_watchdog_init(&q->watchdog, sch);
1014 INIT_WORK(&q->work, htb_work_func);
999 skb_queue_head_init(&q->direct_queue); 1015 skb_queue_head_init(&q->direct_queue);
1000 1016
1001 q->direct_qlen = qdisc_dev(sch)->tx_queue_len; 1017 q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
@@ -1188,7 +1204,6 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl)
1188 kfree(cl); 1204 kfree(cl);
1189} 1205}
1190 1206
1191/* always caled under BH & queue lock */
1192static void htb_destroy(struct Qdisc *sch) 1207static void htb_destroy(struct Qdisc *sch)
1193{ 1208{
1194 struct htb_sched *q = qdisc_priv(sch); 1209 struct htb_sched *q = qdisc_priv(sch);
@@ -1196,6 +1211,7 @@ static void htb_destroy(struct Qdisc *sch)
1196 struct htb_class *cl; 1211 struct htb_class *cl;
1197 unsigned int i; 1212 unsigned int i;
1198 1213
1214 cancel_work_sync(&q->work);
1199 qdisc_watchdog_cancel(&q->watchdog); 1215 qdisc_watchdog_cancel(&q->watchdog);
1200 /* This line used to be after htb_destroy_class call below 1216 /* This line used to be after htb_destroy_class call below
1201 and surprisingly it worked in 2.4. But it must precede it 1217 and surprisingly it worked in 2.4. But it must precede it
@@ -1259,8 +1275,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
1259 if (last_child) 1275 if (last_child)
1260 htb_parent_to_leaf(q, cl, new_q); 1276 htb_parent_to_leaf(q, cl, new_q);
1261 1277
1262 if (--cl->refcnt == 0) 1278 BUG_ON(--cl->refcnt == 0);
1263 htb_destroy_class(sch, cl); 1279 /*
1280 * This shouldn't happen: we "hold" one cops->get() when called
1281 * from tc_ctl_tclass; the destroy method is done from cops->put().
1282 */
1264 1283
1265 sch_tree_unlock(sch); 1284 sch_tree_unlock(sch);
1266 return 0; 1285 return 0;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 7e151861794b..912731203047 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -202,7 +202,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
202 int i; 202 int i;
203 203
204 if (!netif_is_multiqueue(qdisc_dev(sch))) 204 if (!netif_is_multiqueue(qdisc_dev(sch)))
205 return -EINVAL; 205 return -EOPNOTSUPP;
206 if (nla_len(opt) < sizeof(*qopt)) 206 if (nla_len(opt) < sizeof(*qopt))
207 return -EINVAL; 207 return -EINVAL;
208 208
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index a2f93c09f3cc..e22dfe85e43e 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -236,7 +236,6 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
236 struct tc_tbf_qopt *qopt; 236 struct tc_tbf_qopt *qopt;
237 struct qdisc_rate_table *rtab = NULL; 237 struct qdisc_rate_table *rtab = NULL;
238 struct qdisc_rate_table *ptab = NULL; 238 struct qdisc_rate_table *ptab = NULL;
239 struct qdisc_rate_table *tmp;
240 struct Qdisc *child = NULL; 239 struct Qdisc *child = NULL;
241 int max_size,n; 240 int max_size,n;
242 241
@@ -295,13 +294,9 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
295 q->tokens = q->buffer; 294 q->tokens = q->buffer;
296 q->ptokens = q->mtu; 295 q->ptokens = q->mtu;
297 296
298 tmp = q->R_tab; 297 swap(q->R_tab, rtab);
299 q->R_tab = rtab; 298 swap(q->P_tab, ptab);
300 rtab = tmp;
301 299
302 tmp = q->P_tab;
303 q->P_tab = ptab;
304 ptab = tmp;
305 sch_tree_unlock(sch); 300 sch_tree_unlock(sch);
306 err = 0; 301 err = 0;
307done: 302done:
diff --git a/net/sctp/debug.c b/net/sctp/debug.c
index 67715f4eb849..7ff548a30cfb 100644
--- a/net/sctp/debug.c
+++ b/net/sctp/debug.c
@@ -86,6 +86,9 @@ const char *sctp_cname(const sctp_subtype_t cid)
86 case SCTP_CID_FWD_TSN: 86 case SCTP_CID_FWD_TSN:
87 return "FWD_TSN"; 87 return "FWD_TSN";
88 88
89 case SCTP_CID_AUTH:
90 return "AUTH";
91
89 default: 92 default:
90 break; 93 break;
91 } 94 }
@@ -135,6 +138,7 @@ static const char *sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = {
135 "PRIMITIVE_ABORT", 138 "PRIMITIVE_ABORT",
136 "PRIMITIVE_SEND", 139 "PRIMITIVE_SEND",
137 "PRIMITIVE_REQUESTHEARTBEAT", 140 "PRIMITIVE_REQUESTHEARTBEAT",
141 "PRIMITIVE_ASCONF",
138}; 142};
139 143
140/* Lookup primitive debug name. */ 144/* Lookup primitive debug name. */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 4c8d9f45ce09..905fda582b92 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -111,7 +111,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
111 if (sctp_addip_enable) { 111 if (sctp_addip_enable) {
112 auth_chunks->chunks[0] = SCTP_CID_ASCONF; 112 auth_chunks->chunks[0] = SCTP_CID_ASCONF;
113 auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; 113 auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK;
114 auth_chunks->param_hdr.length += htons(2); 114 auth_chunks->param_hdr.length =
115 htons(sizeof(sctp_paramhdr_t) + 2);
115 } 116 }
116 } 117 }
117 118
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 2e4a8646dbc3..d2e98803ffe3 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -83,14 +83,15 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb)
83{ 83{
84 struct sk_buff *list = skb_shinfo(skb)->frag_list; 84 struct sk_buff *list = skb_shinfo(skb)->frag_list;
85 struct sctphdr *sh = sctp_hdr(skb); 85 struct sctphdr *sh = sctp_hdr(skb);
86 __be32 cmp = sh->checksum; 86 __le32 cmp = sh->checksum;
87 __be32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb)); 87 __le32 val;
88 __u32 tmp = sctp_start_cksum((__u8 *)sh, skb_headlen(skb));
88 89
89 for (; list; list = list->next) 90 for (; list; list = list->next)
90 val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list), 91 tmp = sctp_update_cksum((__u8 *)list->data, skb_headlen(list),
91 val); 92 tmp);
92 93
93 val = sctp_end_cksum(val); 94 val = sctp_end_cksum(tmp);
94 95
95 if (val != cmp) { 96 if (val != cmp) {
96 /* CRC failure, dump it. */ 97 /* CRC failure, dump it. */
@@ -142,7 +143,8 @@ int sctp_rcv(struct sk_buff *skb)
142 __skb_pull(skb, skb_transport_offset(skb)); 143 __skb_pull(skb, skb_transport_offset(skb));
143 if (skb->len < sizeof(struct sctphdr)) 144 if (skb->len < sizeof(struct sctphdr))
144 goto discard_it; 145 goto discard_it;
145 if (!skb_csum_unnecessary(skb) && sctp_rcv_checksum(skb) < 0) 146 if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) &&
147 sctp_rcv_checksum(skb) < 0)
146 goto discard_it; 148 goto discard_it;
147 149
148 skb_pull(skb, sizeof(struct sctphdr)); 150 skb_pull(skb, sizeof(struct sctphdr));
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ceaa4aa066ea..a63de3f7f185 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -97,8 +97,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
97 if (addr) { 97 if (addr) {
98 addr->a.v6.sin6_family = AF_INET6; 98 addr->a.v6.sin6_family = AF_INET6;
99 addr->a.v6.sin6_port = 0; 99 addr->a.v6.sin6_port = 0;
100 memcpy(&addr->a.v6.sin6_addr, &ifa->addr, 100 ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifa->addr);
101 sizeof(struct in6_addr));
102 addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; 101 addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
103 addr->valid = 1; 102 addr->valid = 1;
104 spin_lock_bh(&sctp_local_addr_lock); 103 spin_lock_bh(&sctp_local_addr_lock);
@@ -628,9 +627,7 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr)
628static struct sock *sctp_v6_create_accept_sk(struct sock *sk, 627static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
629 struct sctp_association *asoc) 628 struct sctp_association *asoc)
630{ 629{
631 struct inet_sock *inet = inet_sk(sk);
632 struct sock *newsk; 630 struct sock *newsk;
633 struct inet_sock *newinet;
634 struct ipv6_pinfo *newnp, *np = inet6_sk(sk); 631 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
635 struct sctp6_sock *newsctp6sk; 632 struct sctp6_sock *newsctp6sk;
636 633
@@ -640,17 +637,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
640 637
641 sock_init_data(NULL, newsk); 638 sock_init_data(NULL, newsk);
642 639
643 newsk->sk_type = SOCK_STREAM; 640 sctp_copy_sock(newsk, sk, asoc);
644
645 newsk->sk_prot = sk->sk_prot;
646 newsk->sk_no_check = sk->sk_no_check;
647 newsk->sk_reuse = sk->sk_reuse;
648
649 newsk->sk_destruct = inet_sock_destruct;
650 newsk->sk_family = PF_INET6;
651 newsk->sk_protocol = IPPROTO_SCTP;
652 newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
653 newsk->sk_shutdown = sk->sk_shutdown;
654 sock_reset_flag(sk, SOCK_ZAPPED); 641 sock_reset_flag(sk, SOCK_ZAPPED);
655 642
656 newsctp6sk = (struct sctp6_sock *)newsk; 643 newsctp6sk = (struct sctp6_sock *)newsk;
@@ -658,7 +645,6 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
658 645
659 sctp_sk(newsk)->v4mapped = sctp_sk(sk)->v4mapped; 646 sctp_sk(newsk)->v4mapped = sctp_sk(sk)->v4mapped;
660 647
661 newinet = inet_sk(newsk);
662 newnp = inet6_sk(newsk); 648 newnp = inet6_sk(newsk);
663 649
664 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 650 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
@@ -666,26 +652,8 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
666 /* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname() 652 /* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname()
667 * and getpeername(). 653 * and getpeername().
668 */ 654 */
669 newinet->sport = inet->sport;
670 newnp->saddr = np->saddr;
671 newnp->rcv_saddr = np->rcv_saddr;
672 newinet->dport = htons(asoc->peer.port);
673 sctp_v6_to_sk_daddr(&asoc->peer.primary_addr, newsk); 655 sctp_v6_to_sk_daddr(&asoc->peer.primary_addr, newsk);
674 656
675 /* Init the ipv4 part of the socket since we can have sockets
676 * using v6 API for ipv4.
677 */
678 newinet->uc_ttl = -1;
679 newinet->mc_loop = 1;
680 newinet->mc_ttl = 1;
681 newinet->mc_index = 0;
682 newinet->mc_list = NULL;
683
684 if (ipv4_config.no_pmtu_disc)
685 newinet->pmtudisc = IP_PMTUDISC_DONT;
686 else
687 newinet->pmtudisc = IP_PMTUDISC_WANT;
688
689 sk_refcnt_debug_inc(newsk); 657 sk_refcnt_debug_inc(newsk);
690 658
691 if (newsk->sk_prot->init(newsk)) { 659 if (newsk->sk_prot->init(newsk)) {
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 73639355157e..7d08f522ec84 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -49,13 +49,10 @@
49#include <linux/ipv6.h> 49#include <linux/ipv6.h>
50#include <linux/init.h> 50#include <linux/init.h>
51#include <net/inet_ecn.h> 51#include <net/inet_ecn.h>
52#include <net/ip.h>
52#include <net/icmp.h> 53#include <net/icmp.h>
53#include <net/net_namespace.h> 54#include <net/net_namespace.h>
54 55
55#ifndef TEST_FRAME
56#include <net/tcp.h>
57#endif /* TEST_FRAME (not defined) */
58
59#include <linux/socket.h> /* for sa_family_t */ 56#include <linux/socket.h> /* for sa_family_t */
60#include <net/sock.h> 57#include <net/sock.h>
61 58
@@ -367,7 +364,6 @@ int sctp_packet_transmit(struct sctp_packet *packet)
367 struct sctp_transport *tp = packet->transport; 364 struct sctp_transport *tp = packet->transport;
368 struct sctp_association *asoc = tp->asoc; 365 struct sctp_association *asoc = tp->asoc;
369 struct sctphdr *sh; 366 struct sctphdr *sh;
370 __be32 crc32 = __constant_cpu_to_be32(0);
371 struct sk_buff *nskb; 367 struct sk_buff *nskb;
372 struct sctp_chunk *chunk, *tmp; 368 struct sctp_chunk *chunk, *tmp;
373 struct sock *sk; 369 struct sock *sk;
@@ -531,17 +527,16 @@ int sctp_packet_transmit(struct sctp_packet *packet)
531 * Note: Adler-32 is no longer applicable, as has been replaced 527 * Note: Adler-32 is no longer applicable, as has been replaced
532 * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. 528 * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
533 */ 529 */
534 if (!(dst->dev->features & NETIF_F_NO_CSUM)) { 530 if (!sctp_checksum_disable && !(dst->dev->features & NETIF_F_NO_CSUM)) {
535 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); 531 __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
536 crc32 = sctp_end_cksum(crc32); 532
533 /* 3) Put the resultant value into the checksum field in the
534 * common header, and leave the rest of the bits unchanged.
535 */
536 sh->checksum = sctp_end_cksum(crc32);
537 } else 537 } else
538 nskb->ip_summed = CHECKSUM_UNNECESSARY; 538 nskb->ip_summed = CHECKSUM_UNNECESSARY;
539 539
540 /* 3) Put the resultant value into the checksum field in the
541 * common header, and leave the rest of the bits unchanged.
542 */
543 sh->checksum = crc32;
544
545 /* IP layer ECN support 540 /* IP layer ECN support
546 * From RFC 2481 541 * From RFC 2481
547 * "The ECN-Capable Transport (ECT) bit would be set by the 542 * "The ECN-Capable Transport (ECT) bit would be set by the
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index bc411c896216..d765fc53e74d 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -428,7 +428,8 @@ void sctp_retransmit_mark(struct sctp_outq *q,
428 * retransmitting due to T3 timeout. 428 * retransmitting due to T3 timeout.
429 */ 429 */
430 if (reason == SCTP_RTXR_T3_RTX && 430 if (reason == SCTP_RTXR_T3_RTX &&
431 (jiffies - chunk->sent_at) < transport->last_rto) 431 time_before(jiffies, chunk->sent_at +
432 transport->last_rto))
432 continue; 433 continue;
433 434
434 /* RFC 2960 6.2.1 Processing a Received SACK 435 /* RFC 2960 6.2.1 Processing a Received SACK
@@ -1757,6 +1758,9 @@ static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn)
1757 struct sctp_chunk *chunk; 1758 struct sctp_chunk *chunk;
1758 struct list_head *lchunk, *temp; 1759 struct list_head *lchunk, *temp;
1759 1760
1761 if (!asoc->peer.prsctp_capable)
1762 return;
1763
1760 /* PR-SCTP C1) Let SackCumAck be the Cumulative TSN ACK carried in the 1764 /* PR-SCTP C1) Let SackCumAck be the Cumulative TSN ACK carried in the
1761 * received SACK. 1765 * received SACK.
1762 * 1766 *
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index c4986d0f7419..cb198af8887c 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -589,46 +589,21 @@ static int sctp_v4_is_ce(const struct sk_buff *skb)
589static struct sock *sctp_v4_create_accept_sk(struct sock *sk, 589static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
590 struct sctp_association *asoc) 590 struct sctp_association *asoc)
591{ 591{
592 struct inet_sock *inet = inet_sk(sk);
593 struct inet_sock *newinet;
594 struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, 592 struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL,
595 sk->sk_prot); 593 sk->sk_prot);
594 struct inet_sock *newinet;
596 595
597 if (!newsk) 596 if (!newsk)
598 goto out; 597 goto out;
599 598
600 sock_init_data(NULL, newsk); 599 sock_init_data(NULL, newsk);
601 600
602 newsk->sk_type = SOCK_STREAM; 601 sctp_copy_sock(newsk, sk, asoc);
603
604 newsk->sk_no_check = sk->sk_no_check;
605 newsk->sk_reuse = sk->sk_reuse;
606 newsk->sk_shutdown = sk->sk_shutdown;
607
608 newsk->sk_destruct = inet_sock_destruct;
609 newsk->sk_family = PF_INET;
610 newsk->sk_protocol = IPPROTO_SCTP;
611 newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
612 sock_reset_flag(newsk, SOCK_ZAPPED); 602 sock_reset_flag(newsk, SOCK_ZAPPED);
613 603
614 newinet = inet_sk(newsk); 604 newinet = inet_sk(newsk);
615 605
616 /* Initialize sk's sport, dport, rcv_saddr and daddr for
617 * getsockname() and getpeername()
618 */
619 newinet->sport = inet->sport;
620 newinet->saddr = inet->saddr;
621 newinet->rcv_saddr = inet->rcv_saddr;
622 newinet->dport = htons(asoc->peer.port);
623 newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; 606 newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr;
624 newinet->pmtudisc = inet->pmtudisc;
625 newinet->id = asoc->next_tsn ^ jiffies;
626
627 newinet->uc_ttl = -1;
628 newinet->mc_loop = 1;
629 newinet->mc_ttl = 1;
630 newinet->mc_index = 0;
631 newinet->mc_list = NULL;
632 607
633 sk_refcnt_debug_inc(newsk); 608 sk_refcnt_debug_inc(newsk);
634 609
@@ -1413,4 +1388,6 @@ MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132");
1413MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132"); 1388MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132");
1414MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>"); 1389MODULE_AUTHOR("Linux Kernel SCTP developers <lksctp-developers@lists.sourceforge.net>");
1415MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)"); 1390MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)");
1391module_param_named(no_checksums, sctp_checksum_disable, bool, 0644);
1392MODULE_PARM_DESC(no_checksums, "Disable checksums computing and verification");
1416MODULE_LICENSE("GPL"); 1393MODULE_LICENSE("GPL");
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index fd8acb48c3f2..6851ee94e974 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -100,11 +100,11 @@ int sctp_chunk_iif(const struct sctp_chunk *chunk)
100 */ 100 */
101static const struct sctp_paramhdr ecap_param = { 101static const struct sctp_paramhdr ecap_param = {
102 SCTP_PARAM_ECN_CAPABLE, 102 SCTP_PARAM_ECN_CAPABLE,
103 __constant_htons(sizeof(struct sctp_paramhdr)), 103 cpu_to_be16(sizeof(struct sctp_paramhdr)),
104}; 104};
105static const struct sctp_paramhdr prsctp_param = { 105static const struct sctp_paramhdr prsctp_param = {
106 SCTP_PARAM_FWD_TSN_SUPPORT, 106 SCTP_PARAM_FWD_TSN_SUPPORT,
107 __constant_htons(sizeof(struct sctp_paramhdr)), 107 cpu_to_be16(sizeof(struct sctp_paramhdr)),
108}; 108};
109 109
110/* A helper to initialize to initialize an op error inside a 110/* A helper to initialize to initialize an op error inside a
@@ -224,7 +224,9 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
224 num_ext += 2; 224 num_ext += 2;
225 } 225 }
226 226
227 chunksize += sizeof(aiparam); 227 if (sp->adaptation_ind)
228 chunksize += sizeof(aiparam);
229
228 chunksize += vparam_len; 230 chunksize += vparam_len;
229 231
230 /* Account for AUTH related parameters */ 232 /* Account for AUTH related parameters */
@@ -304,10 +306,12 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
304 if (sctp_prsctp_enable) 306 if (sctp_prsctp_enable)
305 sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); 307 sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
306 308
307 aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; 309 if (sp->adaptation_ind) {
308 aiparam.param_hdr.length = htons(sizeof(aiparam)); 310 aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND;
309 aiparam.adaptation_ind = htonl(sp->adaptation_ind); 311 aiparam.param_hdr.length = htons(sizeof(aiparam));
310 sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); 312 aiparam.adaptation_ind = htonl(sp->adaptation_ind);
313 sctp_addto_chunk(retval, sizeof(aiparam), &aiparam);
314 }
311 315
312 /* Add SCTP-AUTH chunks to the parameter list */ 316 /* Add SCTP-AUTH chunks to the parameter list */
313 if (sctp_auth_enable) { 317 if (sctp_auth_enable) {
@@ -332,6 +336,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
332 sctp_inithdr_t initack; 336 sctp_inithdr_t initack;
333 struct sctp_chunk *retval; 337 struct sctp_chunk *retval;
334 union sctp_params addrs; 338 union sctp_params addrs;
339 struct sctp_sock *sp;
335 int addrs_len; 340 int addrs_len;
336 sctp_cookie_param_t *cookie; 341 sctp_cookie_param_t *cookie;
337 int cookie_len; 342 int cookie_len;
@@ -366,22 +371,24 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
366 /* Calculate the total size of allocation, include the reserved 371 /* Calculate the total size of allocation, include the reserved
367 * space for reporting unknown parameters if it is specified. 372 * space for reporting unknown parameters if it is specified.
368 */ 373 */
374 sp = sctp_sk(asoc->base.sk);
369 chunksize = sizeof(initack) + addrs_len + cookie_len + unkparam_len; 375 chunksize = sizeof(initack) + addrs_len + cookie_len + unkparam_len;
370 376
371 /* Tell peer that we'll do ECN only if peer advertised such cap. */ 377 /* Tell peer that we'll do ECN only if peer advertised such cap. */
372 if (asoc->peer.ecn_capable) 378 if (asoc->peer.ecn_capable)
373 chunksize += sizeof(ecap_param); 379 chunksize += sizeof(ecap_param);
374 380
375 if (sctp_prsctp_enable) 381 if (asoc->peer.prsctp_capable)
376 chunksize += sizeof(prsctp_param); 382 chunksize += sizeof(prsctp_param);
377 383
378 if (sctp_addip_enable) { 384 if (asoc->peer.asconf_capable) {
379 extensions[num_ext] = SCTP_CID_ASCONF; 385 extensions[num_ext] = SCTP_CID_ASCONF;
380 extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; 386 extensions[num_ext+1] = SCTP_CID_ASCONF_ACK;
381 num_ext += 2; 387 num_ext += 2;
382 } 388 }
383 389
384 chunksize += sizeof(aiparam); 390 if (sp->adaptation_ind)
391 chunksize += sizeof(aiparam);
385 392
386 if (asoc->peer.auth_capable) { 393 if (asoc->peer.auth_capable) {
387 auth_random = (sctp_paramhdr_t *)asoc->c.auth_random; 394 auth_random = (sctp_paramhdr_t *)asoc->c.auth_random;
@@ -432,10 +439,12 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
432 if (asoc->peer.prsctp_capable) 439 if (asoc->peer.prsctp_capable)
433 sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); 440 sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param);
434 441
435 aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; 442 if (sp->adaptation_ind) {
436 aiparam.param_hdr.length = htons(sizeof(aiparam)); 443 aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND;
437 aiparam.adaptation_ind = htonl(sctp_sk(asoc->base.sk)->adaptation_ind); 444 aiparam.param_hdr.length = htons(sizeof(aiparam));
438 sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); 445 aiparam.adaptation_ind = htonl(sp->adaptation_ind);
446 sctp_addto_chunk(retval, sizeof(aiparam), &aiparam);
447 }
439 448
440 if (asoc->peer.auth_capable) { 449 if (asoc->peer.auth_capable) {
441 sctp_addto_chunk(retval, ntohs(auth_random->length), 450 sctp_addto_chunk(retval, ntohs(auth_random->length),
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index b5495aecab60..e2020eb2c8ca 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -434,7 +434,8 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = {
434 * 434 *
435 */ 435 */
436static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, 436static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
437 struct sctp_transport *transport) 437 struct sctp_transport *transport,
438 int is_hb)
438{ 439{
439 /* The check for association's overall error counter exceeding the 440 /* The check for association's overall error counter exceeding the
440 * threshold is done in the state function. 441 * threshold is done in the state function.
@@ -461,9 +462,15 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc,
461 * expires, set RTO <- RTO * 2 ("back off the timer"). The 462 * expires, set RTO <- RTO * 2 ("back off the timer"). The
462 * maximum value discussed in rule C7 above (RTO.max) may be 463 * maximum value discussed in rule C7 above (RTO.max) may be
463 * used to provide an upper bound to this doubling operation. 464 * used to provide an upper bound to this doubling operation.
465 *
466 * Special Case: the first HB doesn't trigger exponential backoff.
467 * The first unacknowleged HB triggers it. We do this with a flag
468 * that indicates that we have an outstanding HB.
464 */ 469 */
465 transport->last_rto = transport->rto; 470 if (!is_hb || transport->hb_sent) {
466 transport->rto = min((transport->rto * 2), transport->asoc->rto_max); 471 transport->last_rto = transport->rto;
472 transport->rto = min((transport->rto * 2), transport->asoc->rto_max);
473 }
467} 474}
468 475
469/* Worker routine to handle INIT command failure. */ 476/* Worker routine to handle INIT command failure. */
@@ -621,6 +628,11 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
621 t->error_count = 0; 628 t->error_count = 0;
622 t->asoc->overall_error_count = 0; 629 t->asoc->overall_error_count = 0;
623 630
631 /* Clear the hb_sent flag to signal that we had a good
632 * acknowledgement.
633 */
634 t->hb_sent = 0;
635
624 /* Mark the destination transport address as active if it is not so 636 /* Mark the destination transport address as active if it is not so
625 * marked. 637 * marked.
626 */ 638 */
@@ -646,18 +658,6 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds,
646 sctp_transport_hold(t); 658 sctp_transport_hold(t);
647} 659}
648 660
649/* Helper function to do a transport reset at the expiry of the hearbeat
650 * timer.
651 */
652static void sctp_cmd_transport_reset(sctp_cmd_seq_t *cmds,
653 struct sctp_association *asoc,
654 struct sctp_transport *t)
655{
656 sctp_transport_lower_cwnd(t, SCTP_LOWER_CWND_INACTIVE);
657
658 /* Mark one strike against a transport. */
659 sctp_do_8_2_transport_strike(asoc, t);
660}
661 661
662/* Helper function to process the process SACK command. */ 662/* Helper function to process the process SACK command. */
663static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, 663static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds,
@@ -1458,12 +1458,19 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1458 1458
1459 case SCTP_CMD_STRIKE: 1459 case SCTP_CMD_STRIKE:
1460 /* Mark one strike against a transport. */ 1460 /* Mark one strike against a transport. */
1461 sctp_do_8_2_transport_strike(asoc, cmd->obj.transport); 1461 sctp_do_8_2_transport_strike(asoc, cmd->obj.transport,
1462 0);
1463 break;
1464
1465 case SCTP_CMD_TRANSPORT_IDLE:
1466 t = cmd->obj.transport;
1467 sctp_transport_lower_cwnd(t, SCTP_LOWER_CWND_INACTIVE);
1462 break; 1468 break;
1463 1469
1464 case SCTP_CMD_TRANSPORT_RESET: 1470 case SCTP_CMD_TRANSPORT_HB_SENT:
1465 t = cmd->obj.transport; 1471 t = cmd->obj.transport;
1466 sctp_cmd_transport_reset(commands, asoc, t); 1472 sctp_do_8_2_transport_strike(asoc, t, 1);
1473 t->hb_sent = 1;
1467 break; 1474 break;
1468 1475
1469 case SCTP_CMD_TRANSPORT_ON: 1476 case SCTP_CMD_TRANSPORT_ON:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index f88dfded0e3a..55a61aa69662 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -988,7 +988,9 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep,
988 /* Set transport error counter and association error counter 988 /* Set transport error counter and association error counter
989 * when sending heartbeat. 989 * when sending heartbeat.
990 */ 990 */
991 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_RESET, 991 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE,
992 SCTP_TRANSPORT(transport));
993 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT,
992 SCTP_TRANSPORT(transport)); 994 SCTP_TRANSPORT(transport));
993 } 995 }
994 sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE, 996 sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE,
@@ -4955,7 +4957,7 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat(
4955 * to that address and not acknowledged within one RTO. 4957 * to that address and not acknowledged within one RTO.
4956 * 4958 *
4957 */ 4959 */
4958 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_RESET, 4960 sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT,
4959 SCTP_TRANSPORT(arg)); 4961 SCTP_TRANSPORT(arg));
4960 return SCTP_DISPOSITION_CONSUME; 4962 return SCTP_DISPOSITION_CONSUME;
4961} 4963}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ff0a8f88de04..5fb3a8c9792e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3069,9 +3069,6 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
3069 int val; 3069 int val;
3070 int assoc_id = 0; 3070 int assoc_id = 0;
3071 3071
3072 if (optlen < sizeof(int))
3073 return -EINVAL;
3074
3075 if (optlen == sizeof(int)) { 3072 if (optlen == sizeof(int)) {
3076 printk(KERN_WARNING 3073 printk(KERN_WARNING
3077 "SCTP: Use of int in max_burst socket option deprecated\n"); 3074 "SCTP: Use of int in max_burst socket option deprecated\n");
@@ -3939,7 +3936,6 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc,
3939{ 3936{
3940 struct sock *sk = asoc->base.sk; 3937 struct sock *sk = asoc->base.sk;
3941 struct socket *sock; 3938 struct socket *sock;
3942 struct inet_sock *inetsk;
3943 struct sctp_af *af; 3939 struct sctp_af *af;
3944 int err = 0; 3940 int err = 0;
3945 3941
@@ -3954,18 +3950,18 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc,
3954 if (err < 0) 3950 if (err < 0)
3955 return err; 3951 return err;
3956 3952
3957 /* Populate the fields of the newsk from the oldsk and migrate the 3953 sctp_copy_sock(sock->sk, sk, asoc);
3958 * asoc to the newsk.
3959 */
3960 sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
3961 3954
3962 /* Make peeled-off sockets more like 1-1 accepted sockets. 3955 /* Make peeled-off sockets more like 1-1 accepted sockets.
3963 * Set the daddr and initialize id to something more random 3956 * Set the daddr and initialize id to something more random
3964 */ 3957 */
3965 af = sctp_get_af_specific(asoc->peer.primary_addr.sa.sa_family); 3958 af = sctp_get_af_specific(asoc->peer.primary_addr.sa.sa_family);
3966 af->to_sk_daddr(&asoc->peer.primary_addr, sk); 3959 af->to_sk_daddr(&asoc->peer.primary_addr, sk);
3967 inetsk = inet_sk(sock->sk); 3960
3968 inetsk->id = asoc->next_tsn ^ jiffies; 3961 /* Populate the fields of the newsk from the oldsk and migrate the
3962 * asoc to the newsk.
3963 */
3964 sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH);
3969 3965
3970 *sockp = sock; 3966 *sockp = sock;
3971 3967
@@ -5284,16 +5280,14 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
5284 struct sctp_sock *sp; 5280 struct sctp_sock *sp;
5285 struct sctp_association *asoc; 5281 struct sctp_association *asoc;
5286 5282
5287 if (len < sizeof(int))
5288 return -EINVAL;
5289
5290 if (len == sizeof(int)) { 5283 if (len == sizeof(int)) {
5291 printk(KERN_WARNING 5284 printk(KERN_WARNING
5292 "SCTP: Use of int in max_burst socket option deprecated\n"); 5285 "SCTP: Use of int in max_burst socket option deprecated\n");
5293 printk(KERN_WARNING 5286 printk(KERN_WARNING
5294 "SCTP: Use struct sctp_assoc_value instead\n"); 5287 "SCTP: Use struct sctp_assoc_value instead\n");
5295 params.assoc_id = 0; 5288 params.assoc_id = 0;
5296 } else if (len == sizeof (struct sctp_assoc_value)) { 5289 } else if (len >= sizeof(struct sctp_assoc_value)) {
5290 len = sizeof(struct sctp_assoc_value);
5297 if (copy_from_user(&params, optval, len)) 5291 if (copy_from_user(&params, optval, len))
5298 return -EFAULT; 5292 return -EFAULT;
5299 } else 5293 } else
@@ -5849,37 +5843,28 @@ static int sctp_get_port(struct sock *sk, unsigned short snum)
5849} 5843}
5850 5844
5851/* 5845/*
5852 * 3.1.3 listen() - UDP Style Syntax 5846 * Move a socket to LISTENING state.
5853 *
5854 * By default, new associations are not accepted for UDP style sockets.
5855 * An application uses listen() to mark a socket as being able to
5856 * accept new associations.
5857 */ 5847 */
5858SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) 5848SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog)
5859{ 5849{
5860 struct sctp_sock *sp = sctp_sk(sk); 5850 struct sctp_sock *sp = sctp_sk(sk);
5861 struct sctp_endpoint *ep = sp->ep; 5851 struct sctp_endpoint *ep = sp->ep;
5852 struct crypto_hash *tfm = NULL;
5862 5853
5863 /* Only UDP style sockets that are not peeled off are allowed to 5854 /* Allocate HMAC for generating cookie. */
5864 * listen(). 5855 if (!sctp_sk(sk)->hmac && sctp_hmac_alg) {
5865 */ 5856 tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
5866 if (!sctp_style(sk, UDP)) 5857 if (IS_ERR(tfm)) {
5867 return -EINVAL; 5858 if (net_ratelimit()) {
5868 5859 printk(KERN_INFO
5869 /* If backlog is zero, disable listening. */ 5860 "SCTP: failed to load transform for %s: %ld\n",
5870 if (!backlog) { 5861 sctp_hmac_alg, PTR_ERR(tfm));
5871 if (sctp_sstate(sk, CLOSED)) 5862 }
5872 return 0; 5863 return -ENOSYS;
5873 5864 }
5874 sctp_unhash_endpoint(ep); 5865 sctp_sk(sk)->hmac = tfm;
5875 sk->sk_state = SCTP_SS_CLOSED;
5876 return 0;
5877 } 5866 }
5878 5867
5879 /* Return if we are already listening. */
5880 if (sctp_sstate(sk, LISTENING))
5881 return 0;
5882
5883 /* 5868 /*
5884 * If a bind() or sctp_bindx() is not called prior to a listen() 5869 * If a bind() or sctp_bindx() is not called prior to a listen()
5885 * call that allows new associations to be accepted, the system 5870 * call that allows new associations to be accepted, the system
@@ -5890,7 +5875,6 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog)
5890 * extensions draft, but follows the practice as seen in TCP 5875 * extensions draft, but follows the practice as seen in TCP
5891 * sockets. 5876 * sockets.
5892 * 5877 *
5893 * Additionally, turn off fastreuse flag since we are not listening
5894 */ 5878 */
5895 sk->sk_state = SCTP_SS_LISTENING; 5879 sk->sk_state = SCTP_SS_LISTENING;
5896 if (!ep->base.bind_addr.port) { 5880 if (!ep->base.bind_addr.port) {
@@ -5901,113 +5885,71 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog)
5901 sk->sk_state = SCTP_SS_CLOSED; 5885 sk->sk_state = SCTP_SS_CLOSED;
5902 return -EADDRINUSE; 5886 return -EADDRINUSE;
5903 } 5887 }
5904 sctp_sk(sk)->bind_hash->fastreuse = 0;
5905 } 5888 }
5906 5889
5907 sctp_hash_endpoint(ep);
5908 return 0;
5909}
5910
5911/*
5912 * 4.1.3 listen() - TCP Style Syntax
5913 *
5914 * Applications uses listen() to ready the SCTP endpoint for accepting
5915 * inbound associations.
5916 */
5917SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog)
5918{
5919 struct sctp_sock *sp = sctp_sk(sk);
5920 struct sctp_endpoint *ep = sp->ep;
5921
5922 /* If backlog is zero, disable listening. */
5923 if (!backlog) {
5924 if (sctp_sstate(sk, CLOSED))
5925 return 0;
5926
5927 sctp_unhash_endpoint(ep);
5928 sk->sk_state = SCTP_SS_CLOSED;
5929 return 0;
5930 }
5931
5932 if (sctp_sstate(sk, LISTENING))
5933 return 0;
5934
5935 /*
5936 * If a bind() or sctp_bindx() is not called prior to a listen()
5937 * call that allows new associations to be accepted, the system
5938 * picks an ephemeral port and will choose an address set equivalent
5939 * to binding with a wildcard address.
5940 *
5941 * This is not currently spelled out in the SCTP sockets
5942 * extensions draft, but follows the practice as seen in TCP
5943 * sockets.
5944 */
5945 sk->sk_state = SCTP_SS_LISTENING;
5946 if (!ep->base.bind_addr.port) {
5947 if (sctp_autobind(sk))
5948 return -EAGAIN;
5949 } else
5950 sctp_sk(sk)->bind_hash->fastreuse = 0;
5951
5952 sk->sk_max_ack_backlog = backlog; 5890 sk->sk_max_ack_backlog = backlog;
5953 sctp_hash_endpoint(ep); 5891 sctp_hash_endpoint(ep);
5954 return 0; 5892 return 0;
5955} 5893}
5956 5894
5957/* 5895/*
5896 * 4.1.3 / 5.1.3 listen()
5897 *
5898 * By default, new associations are not accepted for UDP style sockets.
5899 * An application uses listen() to mark a socket as being able to
5900 * accept new associations.
5901 *
5902 * On TCP style sockets, applications use listen() to ready the SCTP
5903 * endpoint for accepting inbound associations.
5904 *
5905 * On both types of endpoints a backlog of '0' disables listening.
5906 *
5958 * Move a socket to LISTENING state. 5907 * Move a socket to LISTENING state.
5959 */ 5908 */
5960int sctp_inet_listen(struct socket *sock, int backlog) 5909int sctp_inet_listen(struct socket *sock, int backlog)
5961{ 5910{
5962 struct sock *sk = sock->sk; 5911 struct sock *sk = sock->sk;
5963 struct crypto_hash *tfm = NULL; 5912 struct sctp_endpoint *ep = sctp_sk(sk)->ep;
5964 int err = -EINVAL; 5913 int err = -EINVAL;
5965 5914
5966 if (unlikely(backlog < 0)) 5915 if (unlikely(backlog < 0))
5967 goto out; 5916 return err;
5968 5917
5969 sctp_lock_sock(sk); 5918 sctp_lock_sock(sk);
5970 5919
5920 /* Peeled-off sockets are not allowed to listen(). */
5921 if (sctp_style(sk, UDP_HIGH_BANDWIDTH))
5922 goto out;
5923
5971 if (sock->state != SS_UNCONNECTED) 5924 if (sock->state != SS_UNCONNECTED)
5972 goto out; 5925 goto out;
5973 5926
5974 /* Allocate HMAC for generating cookie. */ 5927 /* If backlog is zero, disable listening. */
5975 if (!sctp_sk(sk)->hmac && sctp_hmac_alg) { 5928 if (!backlog) {
5976 tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); 5929 if (sctp_sstate(sk, CLOSED))
5977 if (IS_ERR(tfm)) {
5978 if (net_ratelimit()) {
5979 printk(KERN_INFO
5980 "SCTP: failed to load transform for %s: %ld\n",
5981 sctp_hmac_alg, PTR_ERR(tfm));
5982 }
5983 err = -ENOSYS;
5984 goto out; 5930 goto out;
5985 }
5986 }
5987 5931
5988 switch (sock->type) { 5932 err = 0;
5989 case SOCK_SEQPACKET: 5933 sctp_unhash_endpoint(ep);
5990 err = sctp_seqpacket_listen(sk, backlog); 5934 sk->sk_state = SCTP_SS_CLOSED;
5991 break; 5935 if (sk->sk_reuse)
5992 case SOCK_STREAM: 5936 sctp_sk(sk)->bind_hash->fastreuse = 1;
5993 err = sctp_stream_listen(sk, backlog); 5937 goto out;
5994 break;
5995 default:
5996 break;
5997 } 5938 }
5998 5939
5999 if (err) 5940 /* If we are already listening, just update the backlog */
6000 goto cleanup; 5941 if (sctp_sstate(sk, LISTENING))
5942 sk->sk_max_ack_backlog = backlog;
5943 else {
5944 err = sctp_listen_start(sk, backlog);
5945 if (err)
5946 goto out;
5947 }
6001 5948
6002 /* Store away the transform reference. */ 5949 err = 0;
6003 if (!sctp_sk(sk)->hmac)
6004 sctp_sk(sk)->hmac = tfm;
6005out: 5950out:
6006 sctp_release_sock(sk); 5951 sctp_release_sock(sk);
6007 return err; 5952 return err;
6008cleanup:
6009 crypto_free_hash(tfm);
6010 goto out;
6011} 5953}
6012 5954
6013/* 5955/*
@@ -6700,6 +6642,48 @@ done:
6700 sctp_skb_set_owner_r(skb, sk); 6642 sctp_skb_set_owner_r(skb, sk);
6701} 6643}
6702 6644
6645void sctp_copy_sock(struct sock *newsk, struct sock *sk,
6646 struct sctp_association *asoc)
6647{
6648 struct inet_sock *inet = inet_sk(sk);
6649 struct inet_sock *newinet = inet_sk(newsk);
6650
6651 newsk->sk_type = sk->sk_type;
6652 newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
6653 newsk->sk_flags = sk->sk_flags;
6654 newsk->sk_no_check = sk->sk_no_check;
6655 newsk->sk_reuse = sk->sk_reuse;
6656
6657 newsk->sk_shutdown = sk->sk_shutdown;
6658 newsk->sk_destruct = inet_sock_destruct;
6659 newsk->sk_family = sk->sk_family;
6660 newsk->sk_protocol = IPPROTO_SCTP;
6661 newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
6662 newsk->sk_sndbuf = sk->sk_sndbuf;
6663 newsk->sk_rcvbuf = sk->sk_rcvbuf;
6664 newsk->sk_lingertime = sk->sk_lingertime;
6665 newsk->sk_rcvtimeo = sk->sk_rcvtimeo;
6666 newsk->sk_sndtimeo = sk->sk_sndtimeo;
6667
6668 newinet = inet_sk(newsk);
6669
6670 /* Initialize sk's sport, dport, rcv_saddr and daddr for
6671 * getsockname() and getpeername()
6672 */
6673 newinet->sport = inet->sport;
6674 newinet->saddr = inet->saddr;
6675 newinet->rcv_saddr = inet->rcv_saddr;
6676 newinet->dport = htons(asoc->peer.port);
6677 newinet->pmtudisc = inet->pmtudisc;
6678 newinet->id = asoc->next_tsn ^ jiffies;
6679
6680 newinet->uc_ttl = inet->uc_ttl;
6681 newinet->mc_loop = 1;
6682 newinet->mc_ttl = 1;
6683 newinet->mc_index = 0;
6684 newinet->mc_list = NULL;
6685}
6686
6703/* Populate the fields of the newsk from the oldsk and migrate the assoc 6687/* Populate the fields of the newsk from the oldsk and migrate the assoc
6704 * and its messages to the newsk. 6688 * and its messages to the newsk.
6705 */ 6689 */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index e745c118f239..e5dde45c79d3 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -79,6 +79,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
79 peer->rttvar = 0; 79 peer->rttvar = 0;
80 peer->srtt = 0; 80 peer->srtt = 0;
81 peer->rto_pending = 0; 81 peer->rto_pending = 0;
82 peer->hb_sent = 0;
82 peer->fast_recovery = 0; 83 peer->fast_recovery = 0;
83 84
84 peer->last_time_heard = jiffies; 85 peer->last_time_heard = jiffies;
@@ -542,8 +543,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
542 * congestion indications more than once every window of 543 * congestion indications more than once every window of
543 * data (or more loosely more than once every round-trip time). 544 * data (or more loosely more than once every round-trip time).
544 */ 545 */
545 if ((jiffies - transport->last_time_ecne_reduced) > 546 if (time_after(jiffies, transport->last_time_ecne_reduced +
546 transport->rtt) { 547 transport->rtt)) {
547 transport->ssthresh = max(transport->cwnd/2, 548 transport->ssthresh = max(transport->cwnd/2,
548 4*transport->asoc->pathmtu); 549 4*transport->asoc->pathmtu);
549 transport->cwnd = transport->ssthresh; 550 transport->cwnd = transport->ssthresh;
@@ -560,7 +561,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
560 * to be done every RTO interval, we do it every hearbeat 561 * to be done every RTO interval, we do it every hearbeat
561 * interval. 562 * interval.
562 */ 563 */
563 if ((jiffies - transport->last_time_used) > transport->rto) 564 if (time_after(jiffies, transport->last_time_used +
565 transport->rto))
564 transport->cwnd = max(transport->cwnd/2, 566 transport->cwnd = max(transport->cwnd/2,
565 4*transport->asoc->pathmtu); 567 4*transport->asoc->pathmtu);
566 break; 568 break;
@@ -608,6 +610,7 @@ void sctp_transport_reset(struct sctp_transport *t)
608 t->flight_size = 0; 610 t->flight_size = 0;
609 t->error_count = 0; 611 t->error_count = 0;
610 t->rto_pending = 0; 612 t->rto_pending = 0;
613 t->hb_sent = 0;
611 t->fast_recovery = 0; 614 t->fast_recovery = 0;
612 615
613 /* Initialize the state information for SFR-CACC */ 616 /* Initialize the state information for SFR-CACC */
diff --git a/net/socket.c b/net/socket.c
index 35dd7371752a..91d0c0254ffe 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -328,7 +328,7 @@ static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
328 dentry->d_inode->i_ino); 328 dentry->d_inode->i_ino);
329} 329}
330 330
331static struct dentry_operations sockfs_dentry_operations = { 331static const struct dentry_operations sockfs_dentry_operations = {
332 .d_delete = sockfs_delete_dentry, 332 .d_delete = sockfs_delete_dentry,
333 .d_dname = sockfs_dname, 333 .d_dname = sockfs_dname,
334}; 334};
@@ -545,6 +545,18 @@ void sock_release(struct socket *sock)
545 sock->file = NULL; 545 sock->file = NULL;
546} 546}
547 547
548int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
549 union skb_shared_tx *shtx)
550{
551 shtx->flags = 0;
552 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
553 shtx->hardware = 1;
554 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
555 shtx->software = 1;
556 return 0;
557}
558EXPORT_SYMBOL(sock_tx_timestamp);
559
548static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, 560static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
549 struct msghdr *msg, size_t size) 561 struct msghdr *msg, size_t size)
550{ 562{
@@ -595,33 +607,65 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
595 return result; 607 return result;
596} 608}
597 609
610static int ktime2ts(ktime_t kt, struct timespec *ts)
611{
612 if (kt.tv64) {
613 *ts = ktime_to_timespec(kt);
614 return 1;
615 } else {
616 return 0;
617 }
618}
619
598/* 620/*
599 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) 621 * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
600 */ 622 */
601void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, 623void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
602 struct sk_buff *skb) 624 struct sk_buff *skb)
603{ 625{
604 ktime_t kt = skb->tstamp; 626 int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
605 627 struct timespec ts[3];
606 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { 628 int empty = 1;
607 struct timeval tv; 629 struct skb_shared_hwtstamps *shhwtstamps =
608 /* Race occurred between timestamp enabling and packet 630 skb_hwtstamps(skb);
609 receiving. Fill in the current time for now. */ 631
610 if (kt.tv64 == 0) 632 /* Race occurred between timestamp enabling and packet
611 kt = ktime_get_real(); 633 receiving. Fill in the current time for now. */
612 skb->tstamp = kt; 634 if (need_software_tstamp && skb->tstamp.tv64 == 0)
613 tv = ktime_to_timeval(kt); 635 __net_timestamp(skb);
614 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); 636
615 } else { 637 if (need_software_tstamp) {
616 struct timespec ts; 638 if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
617 /* Race occurred between timestamp enabling and packet 639 struct timeval tv;
618 receiving. Fill in the current time for now. */ 640 skb_get_timestamp(skb, &tv);
619 if (kt.tv64 == 0) 641 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
620 kt = ktime_get_real(); 642 sizeof(tv), &tv);
621 skb->tstamp = kt; 643 } else {
622 ts = ktime_to_timespec(kt); 644 struct timespec ts;
623 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts); 645 skb_get_timestampns(skb, &ts);
646 put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
647 sizeof(ts), &ts);
648 }
649 }
650
651
652 memset(ts, 0, sizeof(ts));
653 if (skb->tstamp.tv64 &&
654 sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
655 skb_get_timestampns(skb, ts + 0);
656 empty = 0;
624 } 657 }
658 if (shhwtstamps) {
659 if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
660 ktime2ts(shhwtstamps->syststamp, ts + 1))
661 empty = 0;
662 if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
663 ktime2ts(shhwtstamps->hwtstamp, ts + 2))
664 empty = 0;
665 }
666 if (!empty)
667 put_cmsg(msg, SOL_SOCKET,
668 SCM_TIMESTAMPING, sizeof(ts), &ts);
625} 669}
626 670
627EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 671EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
@@ -1030,6 +1074,13 @@ static int sock_fasync(int fd, struct file *filp, int on)
1030 1074
1031 lock_sock(sk); 1075 lock_sock(sk);
1032 1076
1077 spin_lock(&filp->f_lock);
1078 if (on)
1079 filp->f_flags |= FASYNC;
1080 else
1081 filp->f_flags &= ~FASYNC;
1082 spin_unlock(&filp->f_lock);
1083
1033 prev = &(sock->fasync_list); 1084 prev = &(sock->fasync_list);
1034 1085
1035 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) 1086 for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
@@ -1485,8 +1536,6 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
1485 fd_install(newfd, newfile); 1536 fd_install(newfd, newfile);
1486 err = newfd; 1537 err = newfd;
1487 1538
1488 security_socket_post_accept(sock, newsock);
1489
1490out_put: 1539out_put:
1491 fput_light(sock->file, fput_needed); 1540 fput_light(sock->file, fput_needed);
1492out: 1541out:
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 577385a4a5dc..9ced0628d69c 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -480,7 +480,7 @@ static int rpc_delete_dentry(struct dentry *dentry)
480 return 1; 480 return 1;
481} 481}
482 482
483static struct dentry_operations rpc_dentry_operations = { 483static const struct dentry_operations rpc_dentry_operations = {
484 .d_delete = rpc_delete_dentry, 484 .d_delete = rpc_delete_dentry,
485}; 485};
486 486
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 385f427bedad..ff50a0546865 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -293,11 +293,6 @@ static void rpc_make_runnable(struct rpc_task *task)
293 rpc_clear_queued(task); 293 rpc_clear_queued(task);
294 if (rpc_test_and_set_running(task)) 294 if (rpc_test_and_set_running(task))
295 return; 295 return;
296 /* We might have raced */
297 if (RPC_IS_QUEUED(task)) {
298 rpc_clear_running(task);
299 return;
300 }
301 if (RPC_IS_ASYNC(task)) { 296 if (RPC_IS_ASYNC(task)) {
302 int status; 297 int status;
303 298
@@ -607,7 +602,9 @@ void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
607 */ 602 */
608static void __rpc_execute(struct rpc_task *task) 603static void __rpc_execute(struct rpc_task *task)
609{ 604{
610 int status = 0; 605 struct rpc_wait_queue *queue;
606 int task_is_async = RPC_IS_ASYNC(task);
607 int status = 0;
611 608
612 dprintk("RPC: %5u __rpc_execute flags=0x%x\n", 609 dprintk("RPC: %5u __rpc_execute flags=0x%x\n",
613 task->tk_pid, task->tk_flags); 610 task->tk_pid, task->tk_flags);
@@ -647,15 +644,25 @@ static void __rpc_execute(struct rpc_task *task)
647 */ 644 */
648 if (!RPC_IS_QUEUED(task)) 645 if (!RPC_IS_QUEUED(task))
649 continue; 646 continue;
650 rpc_clear_running(task); 647 /*
651 if (RPC_IS_ASYNC(task)) { 648 * The queue->lock protects against races with
652 /* Careful! we may have raced... */ 649 * rpc_make_runnable().
653 if (RPC_IS_QUEUED(task)) 650 *
654 return; 651 * Note that once we clear RPC_TASK_RUNNING on an asynchronous
655 if (rpc_test_and_set_running(task)) 652 * rpc_task, rpc_make_runnable() can assign it to a
656 return; 653 * different workqueue. We therefore cannot assume that the
654 * rpc_task pointer may still be dereferenced.
655 */
656 queue = task->tk_waitqueue;
657 spin_lock_bh(&queue->lock);
658 if (!RPC_IS_QUEUED(task)) {
659 spin_unlock_bh(&queue->lock);
657 continue; 660 continue;
658 } 661 }
662 rpc_clear_running(task);
663 spin_unlock_bh(&queue->lock);
664 if (task_is_async)
665 return;
659 666
660 /* sync task: sleep here */ 667 /* sync task: sleep here */
661 dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid); 668 dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 29e401bb612e..62098d101a1f 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -663,7 +663,7 @@ void xprt_connect(struct rpc_task *task)
663 xprt, (xprt_connected(xprt) ? "is" : "is not")); 663 xprt, (xprt_connected(xprt) ? "is" : "is not"));
664 664
665 if (!xprt_bound(xprt)) { 665 if (!xprt_bound(xprt)) {
666 task->tk_status = -EIO; 666 task->tk_status = -EAGAIN;
667 return; 667 return;
668 } 668 }
669 if (!xprt_lock_write(xprt, task)) 669 if (!xprt_lock_write(xprt, task))
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 5cbb404c4cdf..568330eebbfe 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -467,7 +467,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
467 int err, sent = 0; 467 int err, sent = 0;
468 468
469 if (unlikely(!sock)) 469 if (unlikely(!sock))
470 return -ENOTCONN; 470 return -ENOTSOCK;
471 471
472 clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); 472 clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
473 if (base != 0) { 473 if (base != 0) {
@@ -577,6 +577,8 @@ static int xs_udp_send_request(struct rpc_task *task)
577 req->rq_svec->iov_base, 577 req->rq_svec->iov_base,
578 req->rq_svec->iov_len); 578 req->rq_svec->iov_len);
579 579
580 if (!xprt_bound(xprt))
581 return -ENOTCONN;
580 status = xs_sendpages(transport->sock, 582 status = xs_sendpages(transport->sock,
581 xs_addr(xprt), 583 xs_addr(xprt),
582 xprt->addrlen, xdr, 584 xprt->addrlen, xdr,
@@ -594,6 +596,10 @@ static int xs_udp_send_request(struct rpc_task *task)
594 } 596 }
595 597
596 switch (status) { 598 switch (status) {
599 case -ENOTSOCK:
600 status = -ENOTCONN;
601 /* Should we call xs_close() here? */
602 break;
597 case -EAGAIN: 603 case -EAGAIN:
598 xs_nospace(task); 604 xs_nospace(task);
599 break; 605 break;
@@ -693,6 +699,10 @@ static int xs_tcp_send_request(struct rpc_task *task)
693 } 699 }
694 700
695 switch (status) { 701 switch (status) {
702 case -ENOTSOCK:
703 status = -ENOTCONN;
704 /* Should we call xs_close() here? */
705 break;
696 case -EAGAIN: 706 case -EAGAIN:
697 xs_nospace(task); 707 xs_nospace(task);
698 break; 708 break;
@@ -1215,6 +1225,23 @@ out:
1215 read_unlock(&sk->sk_callback_lock); 1225 read_unlock(&sk->sk_callback_lock);
1216} 1226}
1217 1227
1228static void xs_write_space(struct sock *sk)
1229{
1230 struct socket *sock;
1231 struct rpc_xprt *xprt;
1232
1233 if (unlikely(!(sock = sk->sk_socket)))
1234 return;
1235 clear_bit(SOCK_NOSPACE, &sock->flags);
1236
1237 if (unlikely(!(xprt = xprt_from_sock(sk))))
1238 return;
1239 if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
1240 return;
1241
1242 xprt_write_space(xprt);
1243}
1244
1218/** 1245/**
1219 * xs_udp_write_space - callback invoked when socket buffer space 1246 * xs_udp_write_space - callback invoked when socket buffer space
1220 * becomes available 1247 * becomes available
@@ -1230,23 +1257,9 @@ static void xs_udp_write_space(struct sock *sk)
1230 read_lock(&sk->sk_callback_lock); 1257 read_lock(&sk->sk_callback_lock);
1231 1258
1232 /* from net/core/sock.c:sock_def_write_space */ 1259 /* from net/core/sock.c:sock_def_write_space */
1233 if (sock_writeable(sk)) { 1260 if (sock_writeable(sk))
1234 struct socket *sock; 1261 xs_write_space(sk);
1235 struct rpc_xprt *xprt;
1236
1237 if (unlikely(!(sock = sk->sk_socket)))
1238 goto out;
1239 clear_bit(SOCK_NOSPACE, &sock->flags);
1240
1241 if (unlikely(!(xprt = xprt_from_sock(sk))))
1242 goto out;
1243 if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
1244 goto out;
1245 1262
1246 xprt_write_space(xprt);
1247 }
1248
1249 out:
1250 read_unlock(&sk->sk_callback_lock); 1263 read_unlock(&sk->sk_callback_lock);
1251} 1264}
1252 1265
@@ -1265,23 +1278,9 @@ static void xs_tcp_write_space(struct sock *sk)
1265 read_lock(&sk->sk_callback_lock); 1278 read_lock(&sk->sk_callback_lock);
1266 1279
1267 /* from net/core/stream.c:sk_stream_write_space */ 1280 /* from net/core/stream.c:sk_stream_write_space */
1268 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { 1281 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
1269 struct socket *sock; 1282 xs_write_space(sk);
1270 struct rpc_xprt *xprt;
1271
1272 if (unlikely(!(sock = sk->sk_socket)))
1273 goto out;
1274 clear_bit(SOCK_NOSPACE, &sock->flags);
1275
1276 if (unlikely(!(xprt = xprt_from_sock(sk))))
1277 goto out;
1278 if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0)
1279 goto out;
1280 1283
1281 xprt_write_space(xprt);
1282 }
1283
1284 out:
1285 read_unlock(&sk->sk_callback_lock); 1284 read_unlock(&sk->sk_callback_lock);
1286} 1285}
1287 1286
@@ -1523,7 +1522,7 @@ static void xs_udp_connect_worker4(struct work_struct *work)
1523 struct socket *sock = transport->sock; 1522 struct socket *sock = transport->sock;
1524 int err, status = -EIO; 1523 int err, status = -EIO;
1525 1524
1526 if (xprt->shutdown || !xprt_bound(xprt)) 1525 if (xprt->shutdown)
1527 goto out; 1526 goto out;
1528 1527
1529 /* Start by resetting any existing state */ 1528 /* Start by resetting any existing state */
@@ -1564,7 +1563,7 @@ static void xs_udp_connect_worker6(struct work_struct *work)
1564 struct socket *sock = transport->sock; 1563 struct socket *sock = transport->sock;
1565 int err, status = -EIO; 1564 int err, status = -EIO;
1566 1565
1567 if (xprt->shutdown || !xprt_bound(xprt)) 1566 if (xprt->shutdown)
1568 goto out; 1567 goto out;
1569 1568
1570 /* Start by resetting any existing state */ 1569 /* Start by resetting any existing state */
@@ -1648,6 +1647,9 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
1648 write_unlock_bh(&sk->sk_callback_lock); 1647 write_unlock_bh(&sk->sk_callback_lock);
1649 } 1648 }
1650 1649
1650 if (!xprt_bound(xprt))
1651 return -ENOTCONN;
1652
1651 /* Tell the socket layer to start connecting... */ 1653 /* Tell the socket layer to start connecting... */
1652 xprt->stat.connect_count++; 1654 xprt->stat.connect_count++;
1653 xprt->stat.connect_start = jiffies; 1655 xprt->stat.connect_start = jiffies;
@@ -1668,7 +1670,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
1668 struct socket *sock = transport->sock; 1670 struct socket *sock = transport->sock;
1669 int err, status = -EIO; 1671 int err, status = -EIO;
1670 1672
1671 if (xprt->shutdown || !xprt_bound(xprt)) 1673 if (xprt->shutdown)
1672 goto out; 1674 goto out;
1673 1675
1674 if (!sock) { 1676 if (!sock) {
@@ -1728,7 +1730,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work)
1728 struct socket *sock = transport->sock; 1730 struct socket *sock = transport->sock;
1729 int err, status = -EIO; 1731 int err, status = -EIO;
1730 1732
1731 if (xprt->shutdown || !xprt_bound(xprt)) 1733 if (xprt->shutdown)
1732 goto out; 1734 goto out;
1733 1735
1734 if (!sock) { 1736 if (!sock) {
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 972201cd5fa7..0b15d7250c40 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -61,7 +61,7 @@ static struct ctl_table_root net_sysctl_root = {
61static int net_ctl_ro_header_perms(struct ctl_table_root *root, 61static int net_ctl_ro_header_perms(struct ctl_table_root *root,
62 struct nsproxy *namespaces, struct ctl_table *table) 62 struct nsproxy *namespaces, struct ctl_table *table)
63{ 63{
64 if (namespaces->net_ns == &init_net) 64 if (net_eq(namespaces->net_ns, &init_net))
65 return table->mode; 65 return table->mode;
66 else 66 else
67 return table->mode & ~0222; 67 return table->mode & ~0222;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 3ddaff42d1bb..a3bfd4064912 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -119,7 +119,7 @@ static struct bclink *bclink = NULL;
119static struct link *bcl = NULL; 119static struct link *bcl = NULL;
120static DEFINE_SPINLOCK(bc_lock); 120static DEFINE_SPINLOCK(bc_lock);
121 121
122char tipc_bclink_name[] = "multicast-link"; 122const char tipc_bclink_name[] = "multicast-link";
123 123
124 124
125static u32 buf_seqno(struct sk_buff *buf) 125static u32 buf_seqno(struct sk_buff *buf)
@@ -800,7 +800,7 @@ int tipc_bclink_init(void)
800 tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); 800 tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
801 bcl->b_ptr = &bcbearer->bearer; 801 bcl->b_ptr = &bcbearer->bearer;
802 bcl->state = WORKING_WORKING; 802 bcl->state = WORKING_WORKING;
803 sprintf(bcl->name, tipc_bclink_name); 803 strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
804 804
805 if (BCLINK_LOG_BUF_SIZE) { 805 if (BCLINK_LOG_BUF_SIZE) {
806 char *pb = kmalloc(BCLINK_LOG_BUF_SIZE, GFP_ATOMIC); 806 char *pb = kmalloc(BCLINK_LOG_BUF_SIZE, GFP_ATOMIC);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 2f2d731bc1c2..4c1771e95c99 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -70,7 +70,7 @@ struct port_list {
70 70
71struct tipc_node; 71struct tipc_node;
72 72
73extern char tipc_bclink_name[]; 73extern const char tipc_bclink_name[];
74 74
75 75
76/** 76/**
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index 29ecae851668..1885a7edb0c8 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -258,7 +258,7 @@ void tipc_printf(struct print_buf *pb, const char *fmt, ...)
258 } 258 }
259 259
260 if (pb->echo) 260 if (pb->echo)
261 printk(print_string); 261 printk("%s", print_string);
262 262
263 spin_unlock_bh(&print_lock); 263 spin_unlock_bh(&print_lock);
264} 264}
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 20d98c56e152..2c24e7d6d950 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -703,7 +703,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
703 703
704 link_info.dest = htonl(tipc_own_addr & 0xfffff00); 704 link_info.dest = htonl(tipc_own_addr & 0xfffff00);
705 link_info.up = htonl(1); 705 link_info.up = htonl(1);
706 sprintf(link_info.str, tipc_bclink_name); 706 strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME);
707 tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); 707 tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info));
708 708
709 /* Add TLVs for any other links in scope */ 709 /* Add TLVs for any other links in scope */
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index d1b89820ab4f..baac91049b0e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1178,8 +1178,7 @@ out_unlock:
1178 unix_state_unlock(other); 1178 unix_state_unlock(other);
1179 1179
1180out: 1180out:
1181 if (skb) 1181 kfree_skb(skb);
1182 kfree_skb(skb);
1183 if (newsk) 1182 if (newsk)
1184 unix_release_sock(newsk, 0); 1183 unix_release_sock(newsk, 0);
1185 if (other) 1184 if (other)
diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c
index 39701dec1dba..466e2d22d256 100644
--- a/net/wanrouter/wanmain.c
+++ b/net/wanrouter/wanmain.c
@@ -86,8 +86,10 @@ static int wanrouter_device_del_if(struct wan_device *wandev,
86 86
87static struct wan_device *wanrouter_find_device(char *name); 87static struct wan_device *wanrouter_find_device(char *name);
88static int wanrouter_delete_interface(struct wan_device *wandev, char *name); 88static int wanrouter_delete_interface(struct wan_device *wandev, char *name);
89static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); 89static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
90static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); 90 __acquires(lock);
91static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
92 __releases(lock);
91 93
92 94
93 95
@@ -763,12 +765,14 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name)
763} 765}
764 766
765static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) 767static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
768 __acquires(lock)
766{ 769{
767 spin_lock_irqsave(lock, *smp_flags); 770 spin_lock_irqsave(lock, *smp_flags);
768} 771}
769 772
770 773
771static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) 774static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags)
775 __releases(lock)
772{ 776{
773 spin_unlock_irqrestore(lock, *smp_flags); 777 spin_unlock_irqrestore(lock, *smp_flags);
774} 778}
diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c
index 267f7ff49827..c44d96b3a437 100644
--- a/net/wanrouter/wanproc.c
+++ b/net/wanrouter/wanproc.c
@@ -80,6 +80,7 @@ static struct proc_dir_entry *proc_router;
80 * Iterator 80 * Iterator
81 */ 81 */
82static void *r_start(struct seq_file *m, loff_t *pos) 82static void *r_start(struct seq_file *m, loff_t *pos)
83 __acquires(kernel_lock)
83{ 84{
84 struct wan_device *wandev; 85 struct wan_device *wandev;
85 loff_t l = *pos; 86 loff_t l = *pos;
@@ -101,6 +102,7 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
101} 102}
102 103
103static void r_stop(struct seq_file *m, void *v) 104static void r_stop(struct seq_file *m, void *v)
105 __releases(kernel_lock)
104{ 106{
105 unlock_kernel(); 107 unlock_kernel();
106} 108}
diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c
index cb3b4ad53683..5d149c1b5f0d 100644
--- a/net/wimax/op-msg.c
+++ b/net/wimax/op-msg.c
@@ -258,7 +258,6 @@ EXPORT_SYMBOL_GPL(wimax_msg_len);
258 */ 258 */
259int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb) 259int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb)
260{ 260{
261 int result;
262 struct device *dev = wimax_dev->net_dev->dev.parent; 261 struct device *dev = wimax_dev->net_dev->dev.parent;
263 void *msg = skb->data; 262 void *msg = skb->data;
264 size_t size = skb->len; 263 size_t size = skb->len;
@@ -266,11 +265,9 @@ int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb)
266 265
267 d_printf(1, dev, "CTX: wimax msg, %zu bytes\n", size); 266 d_printf(1, dev, "CTX: wimax msg, %zu bytes\n", size);
268 d_dump(2, dev, msg, size); 267 d_dump(2, dev, msg, size);
269 result = genlmsg_multicast(skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); 268 genlmsg_multicast(skb, 0, wimax_gnl_mcg.id, GFP_KERNEL);
270 d_printf(1, dev, "CTX: genl multicast result %d\n", result); 269 d_printf(1, dev, "CTX: genl multicast done\n");
271 if (result == -ESRCH) /* Nobody connected, ignore it */ 270 return 0;
272 result = 0; /* btw, the skb is freed already */
273 return result;
274} 271}
275EXPORT_SYMBOL_GPL(wimax_msg_send); 272EXPORT_SYMBOL_GPL(wimax_msg_send);
276 273
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 3869c0327882..a0ee76b52510 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -163,16 +163,12 @@ int wimax_gnl_re_state_change_send(
163 struct device *dev = wimax_dev_to_dev(wimax_dev); 163 struct device *dev = wimax_dev_to_dev(wimax_dev);
164 d_fnstart(3, dev, "(wimax_dev %p report_skb %p)\n", 164 d_fnstart(3, dev, "(wimax_dev %p report_skb %p)\n",
165 wimax_dev, report_skb); 165 wimax_dev, report_skb);
166 if (report_skb == NULL) 166 if (report_skb == NULL) {
167 result = -ENOMEM;
167 goto out; 168 goto out;
168 genlmsg_end(report_skb, header);
169 result = genlmsg_multicast(report_skb, 0, wimax_gnl_mcg.id, GFP_KERNEL);
170 if (result == -ESRCH) /* Nobody connected, ignore it */
171 result = 0; /* btw, the skb is freed already */
172 if (result < 0) {
173 dev_err(dev, "RE_STCH: Error sending: %d\n", result);
174 nlmsg_free(report_skb);
175 } 169 }
170 genlmsg_end(report_skb, header);
171 genlmsg_multicast(report_skb, 0, wimax_gnl_mcg.id, GFP_KERNEL);
176out: 172out:
177 d_fnend(3, dev, "(wimax_dev %p report_skb %p) = %d\n", 173 d_fnend(3, dev, "(wimax_dev %p report_skb %p) = %d\n",
178 wimax_dev, report_skb, result); 174 wimax_dev, report_skb, result);
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index e28e2b8fa436..3c3bc9e579ed 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -10,51 +10,19 @@ config CFG80211_REG_DEBUG
10 10
11 If unsure, say N. 11 If unsure, say N.
12 12
13config NL80211
14 bool "nl80211 new netlink interface support"
15 depends on CFG80211
16 default y
17 ---help---
18 This option turns on the new netlink interface
19 (nl80211) support in cfg80211.
20
21 If =n, drivers using mac80211 will be configured via
22 wireless extension support provided by that subsystem.
23
24 If unsure, say Y.
25
26config WIRELESS_OLD_REGULATORY 13config WIRELESS_OLD_REGULATORY
27 bool "Old wireless static regulatory definitions" 14 bool "Old wireless static regulatory definitions"
28 default y 15 default n
29 ---help--- 16 ---help---
30 This option enables the old static regulatory information 17 This option enables the old static regulatory information
31 and uses it within the new framework. This is available 18 and uses it within the new framework. This option is available
32 temporarily as an option to help prevent immediate issues 19 for historical reasons and it is advised to leave it off.
33 due to the switch to the new regulatory framework which 20
34 does require a new userspace application which has the 21 For details see:
35 database of regulatory information (CRDA) and another for 22
36 setting regulatory domains (iw). 23 http://wireless.kernel.org/en/developers/Regulatory
37 24
38 For more information see: 25 Say N and if you say Y, please tell us why. The default is N.
39
40 http://wireless.kernel.org/en/developers/Regulatory/CRDA
41 http://wireless.kernel.org/en/users/Documentation/iw
42
43 It is important to note though that if you *do* have CRDA present
44 and if this option is enabled CRDA *will* be called to update the
45 regulatory domain (for US and JP only). Support for letting the user
46 set the regulatory domain through iw is also supported. This option
47 mainly exists to leave around for a kernel release some old static
48 regulatory domains that were defined and to keep around the old
49 ieee80211_regdom module parameter. This is being phased out and you
50 should stop using them ASAP.
51
52 Note: You will need CRDA if you want 802.11d support
53
54 Say Y unless you have installed a new userspace application.
55 Also say Y if have one currently depending on the ieee80211_regdom
56 module parameter and cannot port it to use the new userspace
57 interfaces.
58 26
59config WIRELESS_EXT 27config WIRELESS_EXT
60 bool "Wireless extensions" 28 bool "Wireless extensions"
@@ -102,3 +70,13 @@ config LIB80211_CRYPT_CCMP
102 70
103config LIB80211_CRYPT_TKIP 71config LIB80211_CRYPT_TKIP
104 tristate 72 tristate
73
74config LIB80211_DEBUG
75 bool "lib80211 debugging messages"
76 depends on LIB80211
77 default n
78 ---help---
79 You can enable this if you want verbose debugging messages
80 from lib80211.
81
82 If unsure, say N.
diff --git a/net/wireless/Makefile b/net/wireless/Makefile
index 938a334c8dbc..6d1e7b27b752 100644
--- a/net/wireless/Makefile
+++ b/net/wireless/Makefile
@@ -5,8 +5,7 @@ obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o
5obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o 5obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o
6obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o 6obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o
7 7
8cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o 8cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o mlme.o
9cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o 9cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o
10cfg80211-$(CONFIG_NL80211) += nl80211.o
11 10
12ccflags-y += -D__CHECK_ENDIAN__ 11ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/wireless/core.c b/net/wireless/core.c
index b96fc0c3f1c4..d1f556535f6d 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -7,7 +7,6 @@
7#include <linux/if.h> 7#include <linux/if.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/err.h> 9#include <linux/err.h>
10#include <linux/mutex.h>
11#include <linux/list.h> 10#include <linux/list.h>
12#include <linux/nl80211.h> 11#include <linux/nl80211.h>
13#include <linux/debugfs.h> 12#include <linux/debugfs.h>
@@ -31,18 +30,29 @@ MODULE_DESCRIPTION("wireless configuration support");
31 * only read the list, and that can happen quite 30 * only read the list, and that can happen quite
32 * often because we need to do it for each command */ 31 * often because we need to do it for each command */
33LIST_HEAD(cfg80211_drv_list); 32LIST_HEAD(cfg80211_drv_list);
34DEFINE_MUTEX(cfg80211_drv_mutex); 33
34/*
35 * This is used to protect the cfg80211_drv_list, cfg80211_regdomain,
36 * country_ie_regdomain, the reg_beacon_list and the the last regulatory
37 * request receipt (last_request).
38 */
39DEFINE_MUTEX(cfg80211_mutex);
35 40
36/* for debugfs */ 41/* for debugfs */
37static struct dentry *ieee80211_debugfs_dir; 42static struct dentry *ieee80211_debugfs_dir;
38 43
39/* requires cfg80211_drv_mutex to be held! */ 44/* requires cfg80211_mutex to be held! */
40static struct cfg80211_registered_device *cfg80211_drv_by_wiphy(int wiphy) 45struct cfg80211_registered_device *cfg80211_drv_by_wiphy_idx(int wiphy_idx)
41{ 46{
42 struct cfg80211_registered_device *result = NULL, *drv; 47 struct cfg80211_registered_device *result = NULL, *drv;
43 48
49 if (!wiphy_idx_valid(wiphy_idx))
50 return NULL;
51
52 assert_cfg80211_lock();
53
44 list_for_each_entry(drv, &cfg80211_drv_list, list) { 54 list_for_each_entry(drv, &cfg80211_drv_list, list) {
45 if (drv->idx == wiphy) { 55 if (drv->wiphy_idx == wiphy_idx) {
46 result = drv; 56 result = drv;
47 break; 57 break;
48 } 58 }
@@ -51,17 +61,44 @@ static struct cfg80211_registered_device *cfg80211_drv_by_wiphy(int wiphy)
51 return result; 61 return result;
52} 62}
53 63
64int get_wiphy_idx(struct wiphy *wiphy)
65{
66 struct cfg80211_registered_device *drv;
67 if (!wiphy)
68 return WIPHY_IDX_STALE;
69 drv = wiphy_to_dev(wiphy);
70 return drv->wiphy_idx;
71}
72
54/* requires cfg80211_drv_mutex to be held! */ 73/* requires cfg80211_drv_mutex to be held! */
55static struct cfg80211_registered_device * 74struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx)
75{
76 struct cfg80211_registered_device *drv;
77
78 if (!wiphy_idx_valid(wiphy_idx))
79 return NULL;
80
81 assert_cfg80211_lock();
82
83 drv = cfg80211_drv_by_wiphy_idx(wiphy_idx);
84 if (!drv)
85 return NULL;
86 return &drv->wiphy;
87}
88
89/* requires cfg80211_mutex to be held! */
90struct cfg80211_registered_device *
56__cfg80211_drv_from_info(struct genl_info *info) 91__cfg80211_drv_from_info(struct genl_info *info)
57{ 92{
58 int ifindex; 93 int ifindex;
59 struct cfg80211_registered_device *bywiphy = NULL, *byifidx = NULL; 94 struct cfg80211_registered_device *bywiphyidx = NULL, *byifidx = NULL;
60 struct net_device *dev; 95 struct net_device *dev;
61 int err = -EINVAL; 96 int err = -EINVAL;
62 97
98 assert_cfg80211_lock();
99
63 if (info->attrs[NL80211_ATTR_WIPHY]) { 100 if (info->attrs[NL80211_ATTR_WIPHY]) {
64 bywiphy = cfg80211_drv_by_wiphy( 101 bywiphyidx = cfg80211_drv_by_wiphy_idx(
65 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY])); 102 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY]));
66 err = -ENODEV; 103 err = -ENODEV;
67 } 104 }
@@ -78,14 +115,14 @@ __cfg80211_drv_from_info(struct genl_info *info)
78 err = -ENODEV; 115 err = -ENODEV;
79 } 116 }
80 117
81 if (bywiphy && byifidx) { 118 if (bywiphyidx && byifidx) {
82 if (bywiphy != byifidx) 119 if (bywiphyidx != byifidx)
83 return ERR_PTR(-EINVAL); 120 return ERR_PTR(-EINVAL);
84 else 121 else
85 return bywiphy; /* == byifidx */ 122 return bywiphyidx; /* == byifidx */
86 } 123 }
87 if (bywiphy) 124 if (bywiphyidx)
88 return bywiphy; 125 return bywiphyidx;
89 126
90 if (byifidx) 127 if (byifidx)
91 return byifidx; 128 return byifidx;
@@ -98,7 +135,7 @@ cfg80211_get_dev_from_info(struct genl_info *info)
98{ 135{
99 struct cfg80211_registered_device *drv; 136 struct cfg80211_registered_device *drv;
100 137
101 mutex_lock(&cfg80211_drv_mutex); 138 mutex_lock(&cfg80211_mutex);
102 drv = __cfg80211_drv_from_info(info); 139 drv = __cfg80211_drv_from_info(info);
103 140
104 /* if it is not an error we grab the lock on 141 /* if it is not an error we grab the lock on
@@ -107,7 +144,7 @@ cfg80211_get_dev_from_info(struct genl_info *info)
107 if (!IS_ERR(drv)) 144 if (!IS_ERR(drv))
108 mutex_lock(&drv->mtx); 145 mutex_lock(&drv->mtx);
109 146
110 mutex_unlock(&cfg80211_drv_mutex); 147 mutex_unlock(&cfg80211_mutex);
111 148
112 return drv; 149 return drv;
113} 150}
@@ -118,7 +155,7 @@ cfg80211_get_dev_from_ifindex(int ifindex)
118 struct cfg80211_registered_device *drv = ERR_PTR(-ENODEV); 155 struct cfg80211_registered_device *drv = ERR_PTR(-ENODEV);
119 struct net_device *dev; 156 struct net_device *dev;
120 157
121 mutex_lock(&cfg80211_drv_mutex); 158 mutex_lock(&cfg80211_mutex);
122 dev = dev_get_by_index(&init_net, ifindex); 159 dev = dev_get_by_index(&init_net, ifindex);
123 if (!dev) 160 if (!dev)
124 goto out; 161 goto out;
@@ -129,7 +166,7 @@ cfg80211_get_dev_from_ifindex(int ifindex)
129 drv = ERR_PTR(-ENODEV); 166 drv = ERR_PTR(-ENODEV);
130 dev_put(dev); 167 dev_put(dev);
131 out: 168 out:
132 mutex_unlock(&cfg80211_drv_mutex); 169 mutex_unlock(&cfg80211_mutex);
133 return drv; 170 return drv;
134} 171}
135 172
@@ -139,49 +176,43 @@ void cfg80211_put_dev(struct cfg80211_registered_device *drv)
139 mutex_unlock(&drv->mtx); 176 mutex_unlock(&drv->mtx);
140} 177}
141 178
179/* requires cfg80211_mutex to be held */
142int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, 180int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
143 char *newname) 181 char *newname)
144{ 182{
145 struct cfg80211_registered_device *drv; 183 struct cfg80211_registered_device *drv;
146 int idx, taken = -1, result, digits; 184 int wiphy_idx, taken = -1, result, digits;
147 185
148 mutex_lock(&cfg80211_drv_mutex); 186 assert_cfg80211_lock();
149 187
150 /* prohibit calling the thing phy%d when %d is not its number */ 188 /* prohibit calling the thing phy%d when %d is not its number */
151 sscanf(newname, PHY_NAME "%d%n", &idx, &taken); 189 sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken);
152 if (taken == strlen(newname) && idx != rdev->idx) { 190 if (taken == strlen(newname) && wiphy_idx != rdev->wiphy_idx) {
153 /* count number of places needed to print idx */ 191 /* count number of places needed to print wiphy_idx */
154 digits = 1; 192 digits = 1;
155 while (idx /= 10) 193 while (wiphy_idx /= 10)
156 digits++; 194 digits++;
157 /* 195 /*
158 * deny the name if it is phy<idx> where <idx> is printed 196 * deny the name if it is phy<idx> where <idx> is printed
159 * without leading zeroes. taken == strlen(newname) here 197 * without leading zeroes. taken == strlen(newname) here
160 */ 198 */
161 result = -EINVAL;
162 if (taken == strlen(PHY_NAME) + digits) 199 if (taken == strlen(PHY_NAME) + digits)
163 goto out_unlock; 200 return -EINVAL;
164 } 201 }
165 202
166 203
167 /* Ignore nop renames */ 204 /* Ignore nop renames */
168 result = 0;
169 if (strcmp(newname, dev_name(&rdev->wiphy.dev)) == 0) 205 if (strcmp(newname, dev_name(&rdev->wiphy.dev)) == 0)
170 goto out_unlock; 206 return 0;
171 207
172 /* Ensure another device does not already have this name. */ 208 /* Ensure another device does not already have this name. */
173 list_for_each_entry(drv, &cfg80211_drv_list, list) { 209 list_for_each_entry(drv, &cfg80211_drv_list, list)
174 result = -EINVAL;
175 if (strcmp(newname, dev_name(&drv->wiphy.dev)) == 0) 210 if (strcmp(newname, dev_name(&drv->wiphy.dev)) == 0)
176 goto out_unlock; 211 return -EINVAL;
177 }
178 212
179 /* this will only check for collisions in sysfs
180 * which is not even always compiled in.
181 */
182 result = device_rename(&rdev->wiphy.dev, newname); 213 result = device_rename(&rdev->wiphy.dev, newname);
183 if (result) 214 if (result)
184 goto out_unlock; 215 return result;
185 216
186 if (rdev->wiphy.debugfsdir && 217 if (rdev->wiphy.debugfsdir &&
187 !debugfs_rename(rdev->wiphy.debugfsdir->d_parent, 218 !debugfs_rename(rdev->wiphy.debugfsdir->d_parent,
@@ -191,13 +222,9 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
191 printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n", 222 printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n",
192 newname); 223 newname);
193 224
194 result = 0; 225 nl80211_notify_dev_rename(rdev);
195out_unlock:
196 mutex_unlock(&cfg80211_drv_mutex);
197 if (result == 0)
198 nl80211_notify_dev_rename(rdev);
199 226
200 return result; 227 return 0;
201} 228}
202 229
203/* exported functions */ 230/* exported functions */
@@ -220,26 +247,28 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv)
220 247
221 drv->ops = ops; 248 drv->ops = ops;
222 249
223 mutex_lock(&cfg80211_drv_mutex); 250 mutex_lock(&cfg80211_mutex);
224 251
225 drv->idx = wiphy_counter++; 252 drv->wiphy_idx = wiphy_counter++;
226 253
227 if (unlikely(drv->idx < 0)) { 254 if (unlikely(!wiphy_idx_valid(drv->wiphy_idx))) {
228 wiphy_counter--; 255 wiphy_counter--;
229 mutex_unlock(&cfg80211_drv_mutex); 256 mutex_unlock(&cfg80211_mutex);
230 /* ugh, wrapped! */ 257 /* ugh, wrapped! */
231 kfree(drv); 258 kfree(drv);
232 return NULL; 259 return NULL;
233 } 260 }
234 261
235 mutex_unlock(&cfg80211_drv_mutex); 262 mutex_unlock(&cfg80211_mutex);
236 263
237 /* give it a proper name */ 264 /* give it a proper name */
238 dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->idx); 265 dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->wiphy_idx);
239 266
240 mutex_init(&drv->mtx); 267 mutex_init(&drv->mtx);
241 mutex_init(&drv->devlist_mtx); 268 mutex_init(&drv->devlist_mtx);
242 INIT_LIST_HEAD(&drv->netdev_list); 269 INIT_LIST_HEAD(&drv->netdev_list);
270 spin_lock_init(&drv->bss_lock);
271 INIT_LIST_HEAD(&drv->bss_list);
243 272
244 device_initialize(&drv->wiphy.dev); 273 device_initialize(&drv->wiphy.dev);
245 drv->wiphy.dev.class = &ieee80211_class; 274 drv->wiphy.dev.class = &ieee80211_class;
@@ -259,6 +288,9 @@ int wiphy_register(struct wiphy *wiphy)
259 int i; 288 int i;
260 u16 ifmodes = wiphy->interface_modes; 289 u16 ifmodes = wiphy->interface_modes;
261 290
291 if (WARN_ON(wiphy->max_scan_ssids < 1))
292 return -EINVAL;
293
262 /* sanity check ifmodes */ 294 /* sanity check ifmodes */
263 WARN_ON(!ifmodes); 295 WARN_ON(!ifmodes);
264 ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; 296 ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1;
@@ -273,10 +305,16 @@ int wiphy_register(struct wiphy *wiphy)
273 305
274 sband->band = band; 306 sband->band = band;
275 307
276 if (!sband->n_channels || !sband->n_bitrates) { 308 if (WARN_ON(!sband->n_channels || !sband->n_bitrates))
277 WARN_ON(1); 309 return -EINVAL;
310
311 /*
312 * Since we use a u32 for rate bitmaps in
313 * ieee80211_get_response_rate, we cannot
314 * have more than 32 legacy rates.
315 */
316 if (WARN_ON(sband->n_bitrates > 32))
278 return -EINVAL; 317 return -EINVAL;
279 }
280 318
281 for (i = 0; i < sband->n_channels; i++) { 319 for (i = 0; i < sband->n_channels; i++) {
282 sband->channels[i].orig_flags = 320 sband->channels[i].orig_flags =
@@ -299,10 +337,10 @@ int wiphy_register(struct wiphy *wiphy)
299 /* check and set up bitrates */ 337 /* check and set up bitrates */
300 ieee80211_set_bitrate_flags(wiphy); 338 ieee80211_set_bitrate_flags(wiphy);
301 339
302 mutex_lock(&cfg80211_drv_mutex); 340 mutex_lock(&cfg80211_mutex);
303 341
304 /* set up regulatory info */ 342 /* set up regulatory info */
305 wiphy_update_regulatory(wiphy, REGDOM_SET_BY_CORE); 343 wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE);
306 344
307 res = device_add(&drv->wiphy.dev); 345 res = device_add(&drv->wiphy.dev);
308 if (res) 346 if (res)
@@ -317,9 +355,20 @@ int wiphy_register(struct wiphy *wiphy)
317 if (IS_ERR(drv->wiphy.debugfsdir)) 355 if (IS_ERR(drv->wiphy.debugfsdir))
318 drv->wiphy.debugfsdir = NULL; 356 drv->wiphy.debugfsdir = NULL;
319 357
358 if (wiphy->custom_regulatory) {
359 struct regulatory_request request;
360
361 request.wiphy_idx = get_wiphy_idx(wiphy);
362 request.initiator = NL80211_REGDOM_SET_BY_DRIVER;
363 request.alpha2[0] = '9';
364 request.alpha2[1] = '9';
365
366 nl80211_send_reg_change_event(&request);
367 }
368
320 res = 0; 369 res = 0;
321out_unlock: 370out_unlock:
322 mutex_unlock(&cfg80211_drv_mutex); 371 mutex_unlock(&cfg80211_mutex);
323 return res; 372 return res;
324} 373}
325EXPORT_SYMBOL(wiphy_register); 374EXPORT_SYMBOL(wiphy_register);
@@ -329,7 +378,7 @@ void wiphy_unregister(struct wiphy *wiphy)
329 struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy); 378 struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy);
330 379
331 /* protect the device list */ 380 /* protect the device list */
332 mutex_lock(&cfg80211_drv_mutex); 381 mutex_lock(&cfg80211_mutex);
333 382
334 BUG_ON(!list_empty(&drv->netdev_list)); 383 BUG_ON(!list_empty(&drv->netdev_list));
335 384
@@ -355,14 +404,17 @@ void wiphy_unregister(struct wiphy *wiphy)
355 device_del(&drv->wiphy.dev); 404 device_del(&drv->wiphy.dev);
356 debugfs_remove(drv->wiphy.debugfsdir); 405 debugfs_remove(drv->wiphy.debugfsdir);
357 406
358 mutex_unlock(&cfg80211_drv_mutex); 407 mutex_unlock(&cfg80211_mutex);
359} 408}
360EXPORT_SYMBOL(wiphy_unregister); 409EXPORT_SYMBOL(wiphy_unregister);
361 410
362void cfg80211_dev_free(struct cfg80211_registered_device *drv) 411void cfg80211_dev_free(struct cfg80211_registered_device *drv)
363{ 412{
413 struct cfg80211_internal_bss *scan, *tmp;
364 mutex_destroy(&drv->mtx); 414 mutex_destroy(&drv->mtx);
365 mutex_destroy(&drv->devlist_mtx); 415 mutex_destroy(&drv->devlist_mtx);
416 list_for_each_entry_safe(scan, tmp, &drv->bss_list, list)
417 cfg80211_put_bss(&scan->pub);
366 kfree(drv); 418 kfree(drv);
367} 419}
368 420
diff --git a/net/wireless/core.h b/net/wireless/core.h
index f7fb9f413028..d43daa236ef9 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -8,6 +8,9 @@
8#include <linux/mutex.h> 8#include <linux/mutex.h>
9#include <linux/list.h> 9#include <linux/list.h>
10#include <linux/netdevice.h> 10#include <linux/netdevice.h>
11#include <linux/kref.h>
12#include <linux/rbtree.h>
13#include <linux/mutex.h>
11#include <net/genetlink.h> 14#include <net/genetlink.h>
12#include <net/wireless.h> 15#include <net/wireless.h>
13#include <net/cfg80211.h> 16#include <net/cfg80211.h>
@@ -35,12 +38,20 @@ struct cfg80211_registered_device {
35 enum environment_cap env; 38 enum environment_cap env;
36 39
37 /* wiphy index, internal only */ 40 /* wiphy index, internal only */
38 int idx; 41 int wiphy_idx;
39 42
40 /* associate netdev list */ 43 /* associate netdev list */
41 struct mutex devlist_mtx; 44 struct mutex devlist_mtx;
42 struct list_head netdev_list; 45 struct list_head netdev_list;
43 46
47 /* BSSes/scanning */
48 spinlock_t bss_lock;
49 struct list_head bss_list;
50 struct rb_root bss_tree;
51 u32 bss_generation;
52 struct cfg80211_scan_request *scan_req; /* protected by RTNL */
53 unsigned long suspend_at;
54
44 /* must be last because of the way we do wiphy_priv(), 55 /* must be last because of the way we do wiphy_priv(),
45 * and it should at least be aligned to NETDEV_ALIGN */ 56 * and it should at least be aligned to NETDEV_ALIGN */
46 struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN))); 57 struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
@@ -53,9 +64,44 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
53 return container_of(wiphy, struct cfg80211_registered_device, wiphy); 64 return container_of(wiphy, struct cfg80211_registered_device, wiphy);
54} 65}
55 66
56extern struct mutex cfg80211_drv_mutex; 67/* Note 0 is valid, hence phy0 */
68static inline
69bool wiphy_idx_valid(int wiphy_idx)
70{
71 return (wiphy_idx >= 0);
72}
73
74extern struct mutex cfg80211_mutex;
57extern struct list_head cfg80211_drv_list; 75extern struct list_head cfg80211_drv_list;
58 76
77static inline void assert_cfg80211_lock(void)
78{
79 WARN_ON(!mutex_is_locked(&cfg80211_mutex));
80}
81
82/*
83 * You can use this to mark a wiphy_idx as not having an associated wiphy.
84 * It guarantees cfg80211_drv_by_wiphy_idx(wiphy_idx) will return NULL
85 */
86#define WIPHY_IDX_STALE -1
87
88struct cfg80211_internal_bss {
89 struct list_head list;
90 struct rb_node rbn;
91 unsigned long ts;
92 struct kref ref;
93 bool hold;
94
95 /* must be last because of priv member */
96 struct cfg80211_bss pub;
97};
98
99struct cfg80211_registered_device *cfg80211_drv_by_wiphy_idx(int wiphy_idx);
100int get_wiphy_idx(struct wiphy *wiphy);
101
102struct cfg80211_registered_device *
103__cfg80211_drv_from_info(struct genl_info *info);
104
59/* 105/*
60 * This function returns a pointer to the driver 106 * This function returns a pointer to the driver
61 * that the genl_info item that is passed refers to. 107 * that the genl_info item that is passed refers to.
@@ -63,13 +109,13 @@ extern struct list_head cfg80211_drv_list;
63 * the driver's mutex! 109 * the driver's mutex!
64 * 110 *
65 * This means that you need to call cfg80211_put_dev() 111 * This means that you need to call cfg80211_put_dev()
66 * before being allowed to acquire &cfg80211_drv_mutex! 112 * before being allowed to acquire &cfg80211_mutex!
67 * 113 *
68 * This is necessary because we need to lock the global 114 * This is necessary because we need to lock the global
69 * mutex to get an item off the list safely, and then 115 * mutex to get an item off the list safely, and then
70 * we lock the drv mutex so it doesn't go away under us. 116 * we lock the drv mutex so it doesn't go away under us.
71 * 117 *
72 * We don't want to keep cfg80211_drv_mutex locked 118 * We don't want to keep cfg80211_mutex locked
73 * for all the time in order to allow requests on 119 * for all the time in order to allow requests on
74 * other interfaces to go through at the same time. 120 * other interfaces to go through at the same time.
75 * 121 *
@@ -79,6 +125,9 @@ extern struct list_head cfg80211_drv_list;
79extern struct cfg80211_registered_device * 125extern struct cfg80211_registered_device *
80cfg80211_get_dev_from_info(struct genl_info *info); 126cfg80211_get_dev_from_info(struct genl_info *info);
81 127
128/* requires cfg80211_drv_mutex to be held! */
129struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx);
130
82/* identical to cfg80211_get_dev_from_info but only operate on ifindex */ 131/* identical to cfg80211_get_dev_from_info but only operate on ifindex */
83extern struct cfg80211_registered_device * 132extern struct cfg80211_registered_device *
84cfg80211_get_dev_from_ifindex(int ifindex); 133cfg80211_get_dev_from_ifindex(int ifindex);
@@ -92,6 +141,11 @@ extern int cfg80211_dev_rename(struct cfg80211_registered_device *drv,
92 char *newname); 141 char *newname);
93 142
94void ieee80211_set_bitrate_flags(struct wiphy *wiphy); 143void ieee80211_set_bitrate_flags(struct wiphy *wiphy);
95void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby); 144void wiphy_update_regulatory(struct wiphy *wiphy,
145 enum nl80211_reg_initiator setby);
146
147void cfg80211_bss_expire(struct cfg80211_registered_device *dev);
148void cfg80211_bss_age(struct cfg80211_registered_device *dev,
149 unsigned long age_secs);
96 150
97#endif /* __NET_WIRELESS_CORE_H */ 151#endif /* __NET_WIRELESS_CORE_H */
diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c
index db428194c16a..2301dc1edc4c 100644
--- a/net/wireless/lib80211_crypt_ccmp.c
+++ b/net/wireless/lib80211_crypt_ccmp.c
@@ -337,6 +337,7 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
337 pos += 8; 337 pos += 8;
338 338
339 if (ccmp_replay_check(pn, key->rx_pn)) { 339 if (ccmp_replay_check(pn, key->rx_pn)) {
340#ifdef CONFIG_LIB80211_DEBUG
340 if (net_ratelimit()) { 341 if (net_ratelimit()) {
341 printk(KERN_DEBUG "CCMP: replay detected: STA=%pM " 342 printk(KERN_DEBUG "CCMP: replay detected: STA=%pM "
342 "previous PN %02x%02x%02x%02x%02x%02x " 343 "previous PN %02x%02x%02x%02x%02x%02x "
@@ -346,6 +347,7 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
346 key->rx_pn[3], key->rx_pn[4], key->rx_pn[5], 347 key->rx_pn[3], key->rx_pn[4], key->rx_pn[5],
347 pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); 348 pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]);
348 } 349 }
350#endif
349 key->dot11RSNAStatsCCMPReplays++; 351 key->dot11RSNAStatsCCMPReplays++;
350 return -4; 352 return -4;
351 } 353 }
diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c
index 7e8e22bfed90..c36287399d7e 100644
--- a/net/wireless/lib80211_crypt_tkip.c
+++ b/net/wireless/lib80211_crypt_tkip.c
@@ -465,12 +465,14 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
465 pos += 8; 465 pos += 8;
466 466
467 if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { 467 if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) {
468#ifdef CONFIG_LIB80211_DEBUG
468 if (net_ratelimit()) { 469 if (net_ratelimit()) {
469 printk(KERN_DEBUG "TKIP: replay detected: STA=%pM" 470 printk(KERN_DEBUG "TKIP: replay detected: STA=%pM"
470 " previous TSC %08x%04x received TSC " 471 " previous TSC %08x%04x received TSC "
471 "%08x%04x\n", hdr->addr2, 472 "%08x%04x\n", hdr->addr2,
472 tkey->rx_iv32, tkey->rx_iv16, iv32, iv16); 473 tkey->rx_iv32, tkey->rx_iv16, iv32, iv16);
473 } 474 }
475#endif
474 tkey->dot11RSNAStatsTKIPReplays++; 476 tkey->dot11RSNAStatsTKIPReplays++;
475 return -4; 477 return -4;
476 } 478 }
@@ -505,10 +507,12 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
505 * it needs to be recalculated for the next packet. */ 507 * it needs to be recalculated for the next packet. */
506 tkey->rx_phase1_done = 0; 508 tkey->rx_phase1_done = 0;
507 } 509 }
510#ifdef CONFIG_LIB80211_DEBUG
508 if (net_ratelimit()) { 511 if (net_ratelimit()) {
509 printk(KERN_DEBUG "TKIP: ICV error detected: STA=" 512 printk(KERN_DEBUG "TKIP: ICV error detected: STA="
510 "%pM\n", hdr->addr2); 513 "%pM\n", hdr->addr2);
511 } 514 }
515#endif
512 tkey->dot11RSNAStatsTKIPICVErrors++; 516 tkey->dot11RSNAStatsTKIPICVErrors++;
513 return -5; 517 return -5;
514 } 518 }
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
new file mode 100644
index 000000000000..bec5721b6f99
--- /dev/null
+++ b/net/wireless/mlme.c
@@ -0,0 +1,46 @@
1/*
2 * cfg80211 MLME SAP interface
3 *
4 * Copyright (c) 2009, Jouni Malinen <j@w1.fi>
5 */
6
7#include <linux/kernel.h>
8#include <linux/module.h>
9#include <linux/netdevice.h>
10#include <linux/nl80211.h>
11#include <net/cfg80211.h>
12#include "core.h"
13#include "nl80211.h"
14
15void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len)
16{
17 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
18 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
19 nl80211_send_rx_auth(rdev, dev, buf, len);
20}
21EXPORT_SYMBOL(cfg80211_send_rx_auth);
22
23void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len)
24{
25 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
26 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
27 nl80211_send_rx_assoc(rdev, dev, buf, len);
28}
29EXPORT_SYMBOL(cfg80211_send_rx_assoc);
30
31void cfg80211_send_rx_deauth(struct net_device *dev, const u8 *buf, size_t len)
32{
33 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
34 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
35 nl80211_send_rx_deauth(rdev, dev, buf, len);
36}
37EXPORT_SYMBOL(cfg80211_send_rx_deauth);
38
39void cfg80211_send_rx_disassoc(struct net_device *dev, const u8 *buf,
40 size_t len)
41{
42 struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
43 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
44 nl80211_send_rx_disassoc(rdev, dev, buf, len);
45}
46EXPORT_SYMBOL(cfg80211_send_rx_disassoc);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 1e728fff474e..353e1a4ece83 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7,13 +7,13 @@
7#include <linux/if.h> 7#include <linux/if.h>
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/err.h> 9#include <linux/err.h>
10#include <linux/mutex.h>
11#include <linux/list.h> 10#include <linux/list.h>
12#include <linux/if_ether.h> 11#include <linux/if_ether.h>
13#include <linux/ieee80211.h> 12#include <linux/ieee80211.h>
14#include <linux/nl80211.h> 13#include <linux/nl80211.h>
15#include <linux/rtnetlink.h> 14#include <linux/rtnetlink.h>
16#include <linux/netlink.h> 15#include <linux/netlink.h>
16#include <linux/etherdevice.h>
17#include <net/genetlink.h> 17#include <net/genetlink.h>
18#include <net/cfg80211.h> 18#include <net/cfg80211.h>
19#include "core.h" 19#include "core.h"
@@ -105,6 +105,17 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
105 105
106 [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY, 106 [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY,
107 .len = NL80211_HT_CAPABILITY_LEN }, 107 .len = NL80211_HT_CAPABILITY_LEN },
108
109 [NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 },
110 [NL80211_ATTR_IE] = { .type = NLA_BINARY,
111 .len = IEEE80211_MAX_DATA_LEN },
112 [NL80211_ATTR_SCAN_FREQUENCIES] = { .type = NLA_NESTED },
113 [NL80211_ATTR_SCAN_SSIDS] = { .type = NLA_NESTED },
114
115 [NL80211_ATTR_SSID] = { .type = NLA_BINARY,
116 .len = IEEE80211_MAX_SSID_LEN },
117 [NL80211_ATTR_AUTH_TYPE] = { .type = NLA_U32 },
118 [NL80211_ATTR_REASON_CODE] = { .type = NLA_U16 },
108}; 119};
109 120
110/* message building helper */ 121/* message building helper */
@@ -125,6 +136,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
125 struct nlattr *nl_freqs, *nl_freq; 136 struct nlattr *nl_freqs, *nl_freq;
126 struct nlattr *nl_rates, *nl_rate; 137 struct nlattr *nl_rates, *nl_rate;
127 struct nlattr *nl_modes; 138 struct nlattr *nl_modes;
139 struct nlattr *nl_cmds;
128 enum ieee80211_band band; 140 enum ieee80211_band band;
129 struct ieee80211_channel *chan; 141 struct ieee80211_channel *chan;
130 struct ieee80211_rate *rate; 142 struct ieee80211_rate *rate;
@@ -135,8 +147,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
135 if (!hdr) 147 if (!hdr)
136 return -1; 148 return -1;
137 149
138 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->idx); 150 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx);
139 NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); 151 NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy));
152 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
153 dev->wiphy.max_scan_ssids);
140 154
141 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); 155 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
142 if (!nl_modes) 156 if (!nl_modes)
@@ -234,6 +248,35 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
234 } 248 }
235 nla_nest_end(msg, nl_bands); 249 nla_nest_end(msg, nl_bands);
236 250
251 nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS);
252 if (!nl_cmds)
253 goto nla_put_failure;
254
255 i = 0;
256#define CMD(op, n) \
257 do { \
258 if (dev->ops->op) { \
259 i++; \
260 NLA_PUT_U32(msg, i, NL80211_CMD_ ## n); \
261 } \
262 } while (0)
263
264 CMD(add_virtual_intf, NEW_INTERFACE);
265 CMD(change_virtual_intf, SET_INTERFACE);
266 CMD(add_key, NEW_KEY);
267 CMD(add_beacon, NEW_BEACON);
268 CMD(add_station, NEW_STATION);
269 CMD(add_mpath, NEW_MPATH);
270 CMD(set_mesh_params, SET_MESH_PARAMS);
271 CMD(change_bss, SET_BSS);
272 CMD(auth, AUTHENTICATE);
273 CMD(assoc, ASSOCIATE);
274 CMD(deauth, DEAUTHENTICATE);
275 CMD(disassoc, DISASSOCIATE);
276
277#undef CMD
278 nla_nest_end(msg, nl_cmds);
279
237 return genlmsg_end(msg, hdr); 280 return genlmsg_end(msg, hdr);
238 281
239 nla_put_failure: 282 nla_put_failure:
@@ -247,7 +290,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
247 int start = cb->args[0]; 290 int start = cb->args[0];
248 struct cfg80211_registered_device *dev; 291 struct cfg80211_registered_device *dev;
249 292
250 mutex_lock(&cfg80211_drv_mutex); 293 mutex_lock(&cfg80211_mutex);
251 list_for_each_entry(dev, &cfg80211_drv_list, list) { 294 list_for_each_entry(dev, &cfg80211_drv_list, list) {
252 if (++idx <= start) 295 if (++idx <= start)
253 continue; 296 continue;
@@ -258,7 +301,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
258 break; 301 break;
259 } 302 }
260 } 303 }
261 mutex_unlock(&cfg80211_drv_mutex); 304 mutex_unlock(&cfg80211_mutex);
262 305
263 cb->args[0] = idx; 306 cb->args[0] = idx;
264 307
@@ -323,16 +366,26 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
323 int result = 0, rem_txq_params = 0; 366 int result = 0, rem_txq_params = 0;
324 struct nlattr *nl_txq_params; 367 struct nlattr *nl_txq_params;
325 368
326 rdev = cfg80211_get_dev_from_info(info); 369 rtnl_lock();
327 if (IS_ERR(rdev)) 370
328 return PTR_ERR(rdev); 371 mutex_lock(&cfg80211_mutex);
372
373 rdev = __cfg80211_drv_from_info(info);
374 if (IS_ERR(rdev)) {
375 result = PTR_ERR(rdev);
376 goto unlock;
377 }
378
379 mutex_lock(&rdev->mtx);
329 380
330 if (info->attrs[NL80211_ATTR_WIPHY_NAME]) { 381 if (info->attrs[NL80211_ATTR_WIPHY_NAME])
331 result = cfg80211_dev_rename( 382 result = cfg80211_dev_rename(
332 rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); 383 rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME]));
333 if (result) 384
334 goto bad_res; 385 mutex_unlock(&cfg80211_mutex);
335 } 386
387 if (result)
388 goto bad_res;
336 389
337 if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) { 390 if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) {
338 struct ieee80211_txq_params txq_params; 391 struct ieee80211_txq_params txq_params;
@@ -428,7 +481,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
428 481
429 482
430 bad_res: 483 bad_res:
431 cfg80211_put_dev(rdev); 484 mutex_unlock(&rdev->mtx);
485 unlock:
486 rtnl_unlock();
432 return result; 487 return result;
433} 488}
434 489
@@ -461,7 +516,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
461 struct cfg80211_registered_device *dev; 516 struct cfg80211_registered_device *dev;
462 struct wireless_dev *wdev; 517 struct wireless_dev *wdev;
463 518
464 mutex_lock(&cfg80211_drv_mutex); 519 mutex_lock(&cfg80211_mutex);
465 list_for_each_entry(dev, &cfg80211_drv_list, list) { 520 list_for_each_entry(dev, &cfg80211_drv_list, list) {
466 if (wp_idx < wp_start) { 521 if (wp_idx < wp_start) {
467 wp_idx++; 522 wp_idx++;
@@ -488,7 +543,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
488 wp_idx++; 543 wp_idx++;
489 } 544 }
490 out: 545 out:
491 mutex_unlock(&cfg80211_drv_mutex); 546 mutex_unlock(&cfg80211_mutex);
492 547
493 cb->args[0] = wp_idx; 548 cb->args[0] = wp_idx;
494 cb->args[1] = if_idx; 549 cb->args[1] = if_idx;
@@ -564,21 +619,31 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
564 enum nl80211_iftype type; 619 enum nl80211_iftype type;
565 struct net_device *dev; 620 struct net_device *dev;
566 u32 _flags, *flags = NULL; 621 u32 _flags, *flags = NULL;
622 bool change = false;
567 623
568 memset(&params, 0, sizeof(params)); 624 memset(&params, 0, sizeof(params));
569 625
626 rtnl_lock();
627
570 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 628 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
571 if (err) 629 if (err)
572 return err; 630 goto unlock_rtnl;
631
573 ifindex = dev->ifindex; 632 ifindex = dev->ifindex;
574 type = dev->ieee80211_ptr->iftype; 633 type = dev->ieee80211_ptr->iftype;
575 dev_put(dev); 634 dev_put(dev);
576 635
577 err = -EINVAL;
578 if (info->attrs[NL80211_ATTR_IFTYPE]) { 636 if (info->attrs[NL80211_ATTR_IFTYPE]) {
579 type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); 637 enum nl80211_iftype ntype;
580 if (type > NL80211_IFTYPE_MAX) 638
639 ntype = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]);
640 if (type != ntype)
641 change = true;
642 type = ntype;
643 if (type > NL80211_IFTYPE_MAX) {
644 err = -EINVAL;
581 goto unlock; 645 goto unlock;
646 }
582 } 647 }
583 648
584 if (!drv->ops->change_virtual_intf || 649 if (!drv->ops->change_virtual_intf ||
@@ -594,6 +659,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
594 } 659 }
595 params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); 660 params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]);
596 params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); 661 params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
662 change = true;
597 } 663 }
598 664
599 if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) { 665 if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) {
@@ -603,20 +669,26 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
603 } 669 }
604 err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS], 670 err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS],
605 &_flags); 671 &_flags);
606 if (!err) 672 if (err)
607 flags = &_flags; 673 goto unlock;
674
675 flags = &_flags;
676 change = true;
608 } 677 }
609 rtnl_lock(); 678
610 err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, 679 if (change)
611 type, flags, &params); 680 err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex,
681 type, flags, &params);
682 else
683 err = 0;
612 684
613 dev = __dev_get_by_index(&init_net, ifindex); 685 dev = __dev_get_by_index(&init_net, ifindex);
614 WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != type)); 686 WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != type));
615 687
616 rtnl_unlock();
617
618 unlock: 688 unlock:
619 cfg80211_put_dev(drv); 689 cfg80211_put_dev(drv);
690 unlock_rtnl:
691 rtnl_unlock();
620 return err; 692 return err;
621} 693}
622 694
@@ -639,9 +711,13 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
639 return -EINVAL; 711 return -EINVAL;
640 } 712 }
641 713
714 rtnl_lock();
715
642 drv = cfg80211_get_dev_from_info(info); 716 drv = cfg80211_get_dev_from_info(info);
643 if (IS_ERR(drv)) 717 if (IS_ERR(drv)) {
644 return PTR_ERR(drv); 718 err = PTR_ERR(drv);
719 goto unlock_rtnl;
720 }
645 721
646 if (!drv->ops->add_virtual_intf || 722 if (!drv->ops->add_virtual_intf ||
647 !(drv->wiphy.interface_modes & (1 << type))) { 723 !(drv->wiphy.interface_modes & (1 << type))) {
@@ -655,18 +731,17 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
655 params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); 731 params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
656 } 732 }
657 733
658 rtnl_lock();
659 err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? 734 err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ?
660 info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, 735 info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL,
661 &flags); 736 &flags);
662 err = drv->ops->add_virtual_intf(&drv->wiphy, 737 err = drv->ops->add_virtual_intf(&drv->wiphy,
663 nla_data(info->attrs[NL80211_ATTR_IFNAME]), 738 nla_data(info->attrs[NL80211_ATTR_IFNAME]),
664 type, err ? NULL : &flags, &params); 739 type, err ? NULL : &flags, &params);
665 rtnl_unlock();
666
667 740
668 unlock: 741 unlock:
669 cfg80211_put_dev(drv); 742 cfg80211_put_dev(drv);
743 unlock_rtnl:
744 rtnl_unlock();
670 return err; 745 return err;
671} 746}
672 747
@@ -676,9 +751,11 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
676 int ifindex, err; 751 int ifindex, err;
677 struct net_device *dev; 752 struct net_device *dev;
678 753
754 rtnl_lock();
755
679 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 756 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
680 if (err) 757 if (err)
681 return err; 758 goto unlock_rtnl;
682 ifindex = dev->ifindex; 759 ifindex = dev->ifindex;
683 dev_put(dev); 760 dev_put(dev);
684 761
@@ -687,12 +764,12 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
687 goto out; 764 goto out;
688 } 765 }
689 766
690 rtnl_lock();
691 err = drv->ops->del_virtual_intf(&drv->wiphy, ifindex); 767 err = drv->ops->del_virtual_intf(&drv->wiphy, ifindex);
692 rtnl_unlock();
693 768
694 out: 769 out:
695 cfg80211_put_dev(drv); 770 cfg80211_put_dev(drv);
771 unlock_rtnl:
772 rtnl_unlock();
696 return err; 773 return err;
697} 774}
698 775
@@ -738,15 +815,17 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
738 if (info->attrs[NL80211_ATTR_KEY_IDX]) 815 if (info->attrs[NL80211_ATTR_KEY_IDX])
739 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); 816 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
740 817
741 if (key_idx > 3) 818 if (key_idx > 5)
742 return -EINVAL; 819 return -EINVAL;
743 820
744 if (info->attrs[NL80211_ATTR_MAC]) 821 if (info->attrs[NL80211_ATTR_MAC])
745 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 822 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
746 823
824 rtnl_lock();
825
747 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 826 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
748 if (err) 827 if (err)
749 return err; 828 goto unlock_rtnl;
750 829
751 if (!drv->ops->get_key) { 830 if (!drv->ops->get_key) {
752 err = -EOPNOTSUPP; 831 err = -EOPNOTSUPP;
@@ -774,10 +853,8 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
774 if (mac_addr) 853 if (mac_addr)
775 NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr); 854 NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr);
776 855
777 rtnl_lock();
778 err = drv->ops->get_key(&drv->wiphy, dev, key_idx, mac_addr, 856 err = drv->ops->get_key(&drv->wiphy, dev, key_idx, mac_addr,
779 &cookie, get_key_callback); 857 &cookie, get_key_callback);
780 rtnl_unlock();
781 858
782 if (err) 859 if (err)
783 goto out; 860 goto out;
@@ -795,6 +872,9 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
795 out: 872 out:
796 cfg80211_put_dev(drv); 873 cfg80211_put_dev(drv);
797 dev_put(dev); 874 dev_put(dev);
875 unlock_rtnl:
876 rtnl_unlock();
877
798 return err; 878 return err;
799} 879}
800 880
@@ -804,35 +884,50 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
804 int err; 884 int err;
805 struct net_device *dev; 885 struct net_device *dev;
806 u8 key_idx; 886 u8 key_idx;
887 int (*func)(struct wiphy *wiphy, struct net_device *netdev,
888 u8 key_index);
807 889
808 if (!info->attrs[NL80211_ATTR_KEY_IDX]) 890 if (!info->attrs[NL80211_ATTR_KEY_IDX])
809 return -EINVAL; 891 return -EINVAL;
810 892
811 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); 893 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
812 894
813 if (key_idx > 3) 895 if (info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]) {
896 if (key_idx < 4 || key_idx > 5)
897 return -EINVAL;
898 } else if (key_idx > 3)
814 return -EINVAL; 899 return -EINVAL;
815 900
816 /* currently only support setting default key */ 901 /* currently only support setting default key */
817 if (!info->attrs[NL80211_ATTR_KEY_DEFAULT]) 902 if (!info->attrs[NL80211_ATTR_KEY_DEFAULT] &&
903 !info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT])
818 return -EINVAL; 904 return -EINVAL;
819 905
906 rtnl_lock();
907
820 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 908 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
821 if (err) 909 if (err)
822 return err; 910 goto unlock_rtnl;
911
912 if (info->attrs[NL80211_ATTR_KEY_DEFAULT])
913 func = drv->ops->set_default_key;
914 else
915 func = drv->ops->set_default_mgmt_key;
823 916
824 if (!drv->ops->set_default_key) { 917 if (!func) {
825 err = -EOPNOTSUPP; 918 err = -EOPNOTSUPP;
826 goto out; 919 goto out;
827 } 920 }
828 921
829 rtnl_lock(); 922 err = func(&drv->wiphy, dev, key_idx);
830 err = drv->ops->set_default_key(&drv->wiphy, dev, key_idx);
831 rtnl_unlock();
832 923
833 out: 924 out:
834 cfg80211_put_dev(drv); 925 cfg80211_put_dev(drv);
835 dev_put(dev); 926 dev_put(dev);
927
928 unlock_rtnl:
929 rtnl_unlock();
930
836 return err; 931 return err;
837} 932}
838 933
@@ -863,7 +958,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
863 if (info->attrs[NL80211_ATTR_MAC]) 958 if (info->attrs[NL80211_ATTR_MAC])
864 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 959 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
865 960
866 if (key_idx > 3) 961 if (key_idx > 5)
867 return -EINVAL; 962 return -EINVAL;
868 963
869 /* 964 /*
@@ -894,26 +989,33 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
894 if (params.key_len != 13) 989 if (params.key_len != 13)
895 return -EINVAL; 990 return -EINVAL;
896 break; 991 break;
992 case WLAN_CIPHER_SUITE_AES_CMAC:
993 if (params.key_len != 16)
994 return -EINVAL;
995 break;
897 default: 996 default:
898 return -EINVAL; 997 return -EINVAL;
899 } 998 }
900 999
1000 rtnl_lock();
1001
901 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 1002 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
902 if (err) 1003 if (err)
903 return err; 1004 goto unlock_rtnl;
904 1005
905 if (!drv->ops->add_key) { 1006 if (!drv->ops->add_key) {
906 err = -EOPNOTSUPP; 1007 err = -EOPNOTSUPP;
907 goto out; 1008 goto out;
908 } 1009 }
909 1010
910 rtnl_lock();
911 err = drv->ops->add_key(&drv->wiphy, dev, key_idx, mac_addr, &params); 1011 err = drv->ops->add_key(&drv->wiphy, dev, key_idx, mac_addr, &params);
912 rtnl_unlock();
913 1012
914 out: 1013 out:
915 cfg80211_put_dev(drv); 1014 cfg80211_put_dev(drv);
916 dev_put(dev); 1015 dev_put(dev);
1016 unlock_rtnl:
1017 rtnl_unlock();
1018
917 return err; 1019 return err;
918} 1020}
919 1021
@@ -928,28 +1030,32 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
928 if (info->attrs[NL80211_ATTR_KEY_IDX]) 1030 if (info->attrs[NL80211_ATTR_KEY_IDX])
929 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); 1031 key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
930 1032
931 if (key_idx > 3) 1033 if (key_idx > 5)
932 return -EINVAL; 1034 return -EINVAL;
933 1035
934 if (info->attrs[NL80211_ATTR_MAC]) 1036 if (info->attrs[NL80211_ATTR_MAC])
935 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 1037 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
936 1038
1039 rtnl_lock();
1040
937 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 1041 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
938 if (err) 1042 if (err)
939 return err; 1043 goto unlock_rtnl;
940 1044
941 if (!drv->ops->del_key) { 1045 if (!drv->ops->del_key) {
942 err = -EOPNOTSUPP; 1046 err = -EOPNOTSUPP;
943 goto out; 1047 goto out;
944 } 1048 }
945 1049
946 rtnl_lock();
947 err = drv->ops->del_key(&drv->wiphy, dev, key_idx, mac_addr); 1050 err = drv->ops->del_key(&drv->wiphy, dev, key_idx, mac_addr);
948 rtnl_unlock();
949 1051
950 out: 1052 out:
951 cfg80211_put_dev(drv); 1053 cfg80211_put_dev(drv);
952 dev_put(dev); 1054 dev_put(dev);
1055
1056 unlock_rtnl:
1057 rtnl_unlock();
1058
953 return err; 1059 return err;
954} 1060}
955 1061
@@ -963,9 +1069,16 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
963 struct beacon_parameters params; 1069 struct beacon_parameters params;
964 int haveinfo = 0; 1070 int haveinfo = 0;
965 1071
1072 rtnl_lock();
1073
966 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 1074 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
967 if (err) 1075 if (err)
968 return err; 1076 goto unlock_rtnl;
1077
1078 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) {
1079 err = -EOPNOTSUPP;
1080 goto out;
1081 }
969 1082
970 switch (info->genlhdr->cmd) { 1083 switch (info->genlhdr->cmd) {
971 case NL80211_CMD_NEW_BEACON: 1084 case NL80211_CMD_NEW_BEACON:
@@ -1026,13 +1139,14 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1026 goto out; 1139 goto out;
1027 } 1140 }
1028 1141
1029 rtnl_lock();
1030 err = call(&drv->wiphy, dev, &params); 1142 err = call(&drv->wiphy, dev, &params);
1031 rtnl_unlock();
1032 1143
1033 out: 1144 out:
1034 cfg80211_put_dev(drv); 1145 cfg80211_put_dev(drv);
1035 dev_put(dev); 1146 dev_put(dev);
1147 unlock_rtnl:
1148 rtnl_unlock();
1149
1036 return err; 1150 return err;
1037} 1151}
1038 1152
@@ -1042,22 +1156,29 @@ static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
1042 int err; 1156 int err;
1043 struct net_device *dev; 1157 struct net_device *dev;
1044 1158
1159 rtnl_lock();
1160
1045 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 1161 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
1046 if (err) 1162 if (err)
1047 return err; 1163 goto unlock_rtnl;
1048 1164
1049 if (!drv->ops->del_beacon) { 1165 if (!drv->ops->del_beacon) {
1050 err = -EOPNOTSUPP; 1166 err = -EOPNOTSUPP;
1051 goto out; 1167 goto out;
1052 } 1168 }
1053 1169
1054 rtnl_lock(); 1170 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) {
1171 err = -EOPNOTSUPP;
1172 goto out;
1173 }
1055 err = drv->ops->del_beacon(&drv->wiphy, dev); 1174 err = drv->ops->del_beacon(&drv->wiphy, dev);
1056 rtnl_unlock();
1057 1175
1058 out: 1176 out:
1059 cfg80211_put_dev(drv); 1177 cfg80211_put_dev(drv);
1060 dev_put(dev); 1178 dev_put(dev);
1179 unlock_rtnl:
1180 rtnl_unlock();
1181
1061 return err; 1182 return err;
1062} 1183}
1063 1184
@@ -1182,6 +1303,12 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
1182 1303
1183 nla_nest_end(msg, txrate); 1304 nla_nest_end(msg, txrate);
1184 } 1305 }
1306 if (sinfo->filled & STATION_INFO_RX_PACKETS)
1307 NLA_PUT_U32(msg, NL80211_STA_INFO_RX_PACKETS,
1308 sinfo->rx_packets);
1309 if (sinfo->filled & STATION_INFO_TX_PACKETS)
1310 NLA_PUT_U32(msg, NL80211_STA_INFO_TX_PACKETS,
1311 sinfo->tx_packets);
1185 nla_nest_end(msg, sinfoattr); 1312 nla_nest_end(msg, sinfoattr);
1186 1313
1187 return genlmsg_end(msg, hdr); 1314 return genlmsg_end(msg, hdr);
@@ -1217,30 +1344,32 @@ static int nl80211_dump_station(struct sk_buff *skb,
1217 return -EINVAL; 1344 return -EINVAL;
1218 } 1345 }
1219 1346
1220 netdev = dev_get_by_index(&init_net, ifidx); 1347 rtnl_lock();
1221 if (!netdev) 1348
1222 return -ENODEV; 1349 netdev = __dev_get_by_index(&init_net, ifidx);
1350 if (!netdev) {
1351 err = -ENODEV;
1352 goto out_rtnl;
1353 }
1223 1354
1224 dev = cfg80211_get_dev_from_ifindex(ifidx); 1355 dev = cfg80211_get_dev_from_ifindex(ifidx);
1225 if (IS_ERR(dev)) { 1356 if (IS_ERR(dev)) {
1226 err = PTR_ERR(dev); 1357 err = PTR_ERR(dev);
1227 goto out_put_netdev; 1358 goto out_rtnl;
1228 } 1359 }
1229 1360
1230 if (!dev->ops->dump_station) { 1361 if (!dev->ops->dump_station) {
1231 err = -ENOSYS; 1362 err = -EOPNOTSUPP;
1232 goto out_err; 1363 goto out_err;
1233 } 1364 }
1234 1365
1235 rtnl_lock();
1236
1237 while (1) { 1366 while (1) {
1238 err = dev->ops->dump_station(&dev->wiphy, netdev, sta_idx, 1367 err = dev->ops->dump_station(&dev->wiphy, netdev, sta_idx,
1239 mac_addr, &sinfo); 1368 mac_addr, &sinfo);
1240 if (err == -ENOENT) 1369 if (err == -ENOENT)
1241 break; 1370 break;
1242 if (err) 1371 if (err)
1243 goto out_err_rtnl; 1372 goto out_err;
1244 1373
1245 if (nl80211_send_station(skb, 1374 if (nl80211_send_station(skb,
1246 NETLINK_CB(cb->skb).pid, 1375 NETLINK_CB(cb->skb).pid,
@@ -1256,12 +1385,10 @@ static int nl80211_dump_station(struct sk_buff *skb,
1256 out: 1385 out:
1257 cb->args[1] = sta_idx; 1386 cb->args[1] = sta_idx;
1258 err = skb->len; 1387 err = skb->len;
1259 out_err_rtnl:
1260 rtnl_unlock();
1261 out_err: 1388 out_err:
1262 cfg80211_put_dev(dev); 1389 cfg80211_put_dev(dev);
1263 out_put_netdev: 1390 out_rtnl:
1264 dev_put(netdev); 1391 rtnl_unlock();
1265 1392
1266 return err; 1393 return err;
1267} 1394}
@@ -1282,19 +1409,18 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
1282 1409
1283 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 1410 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
1284 1411
1412 rtnl_lock();
1413
1285 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 1414 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
1286 if (err) 1415 if (err)
1287 return err; 1416 goto out_rtnl;
1288 1417
1289 if (!drv->ops->get_station) { 1418 if (!drv->ops->get_station) {
1290 err = -EOPNOTSUPP; 1419 err = -EOPNOTSUPP;
1291 goto out; 1420 goto out;
1292 } 1421 }
1293 1422
1294 rtnl_lock();
1295 err = drv->ops->get_station(&drv->wiphy, dev, mac_addr, &sinfo); 1423 err = drv->ops->get_station(&drv->wiphy, dev, mac_addr, &sinfo);
1296 rtnl_unlock();
1297
1298 if (err) 1424 if (err)
1299 goto out; 1425 goto out;
1300 1426
@@ -1311,10 +1437,12 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
1311 1437
1312 out_free: 1438 out_free:
1313 nlmsg_free(msg); 1439 nlmsg_free(msg);
1314
1315 out: 1440 out:
1316 cfg80211_put_dev(drv); 1441 cfg80211_put_dev(drv);
1317 dev_put(dev); 1442 dev_put(dev);
1443 out_rtnl:
1444 rtnl_unlock();
1445
1318 return err; 1446 return err;
1319} 1447}
1320 1448
@@ -1382,9 +1510,11 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
1382 params.plink_action = 1510 params.plink_action =
1383 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); 1511 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
1384 1512
1513 rtnl_lock();
1514
1385 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 1515 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
1386 if (err) 1516 if (err)
1387 return err; 1517 goto out_rtnl;
1388 1518
1389 err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, &params.vlan); 1519 err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, &params.vlan);
1390 if (err) 1520 if (err)
@@ -1395,15 +1525,16 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
1395 goto out; 1525 goto out;
1396 } 1526 }
1397 1527
1398 rtnl_lock();
1399 err = drv->ops->change_station(&drv->wiphy, dev, mac_addr, &params); 1528 err = drv->ops->change_station(&drv->wiphy, dev, mac_addr, &params);
1400 rtnl_unlock();
1401 1529
1402 out: 1530 out:
1403 if (params.vlan) 1531 if (params.vlan)
1404 dev_put(params.vlan); 1532 dev_put(params.vlan);
1405 cfg80211_put_dev(drv); 1533 cfg80211_put_dev(drv);
1406 dev_put(dev); 1534 dev_put(dev);
1535 out_rtnl:
1536 rtnl_unlock();
1537
1407 return err; 1538 return err;
1408} 1539}
1409 1540
@@ -1445,9 +1576,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
1445 &params.station_flags)) 1576 &params.station_flags))
1446 return -EINVAL; 1577 return -EINVAL;
1447 1578
1579 rtnl_lock();
1580
1448 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 1581 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
1449 if (err) 1582 if (err)
1450 return err; 1583 goto out_rtnl;
1451 1584
1452 err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, &params.vlan); 1585 err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, &params.vlan);
1453 if (err) 1586 if (err)
@@ -1458,15 +1591,21 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
1458 goto out; 1591 goto out;
1459 } 1592 }
1460 1593
1461 rtnl_lock(); 1594 if (!netif_running(dev)) {
1595 err = -ENETDOWN;
1596 goto out;
1597 }
1598
1462 err = drv->ops->add_station(&drv->wiphy, dev, mac_addr, &params); 1599 err = drv->ops->add_station(&drv->wiphy, dev, mac_addr, &params);
1463 rtnl_unlock();
1464 1600
1465 out: 1601 out:
1466 if (params.vlan) 1602 if (params.vlan)
1467 dev_put(params.vlan); 1603 dev_put(params.vlan);
1468 cfg80211_put_dev(drv); 1604 cfg80211_put_dev(drv);
1469 dev_put(dev); 1605 dev_put(dev);
1606 out_rtnl:
1607 rtnl_unlock();
1608
1470 return err; 1609 return err;
1471} 1610}
1472 1611
@@ -1480,22 +1619,25 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
1480 if (info->attrs[NL80211_ATTR_MAC]) 1619 if (info->attrs[NL80211_ATTR_MAC])
1481 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 1620 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
1482 1621
1622 rtnl_lock();
1623
1483 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 1624 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
1484 if (err) 1625 if (err)
1485 return err; 1626 goto out_rtnl;
1486 1627
1487 if (!drv->ops->del_station) { 1628 if (!drv->ops->del_station) {
1488 err = -EOPNOTSUPP; 1629 err = -EOPNOTSUPP;
1489 goto out; 1630 goto out;
1490 } 1631 }
1491 1632
1492 rtnl_lock();
1493 err = drv->ops->del_station(&drv->wiphy, dev, mac_addr); 1633 err = drv->ops->del_station(&drv->wiphy, dev, mac_addr);
1494 rtnl_unlock();
1495 1634
1496 out: 1635 out:
1497 cfg80211_put_dev(drv); 1636 cfg80211_put_dev(drv);
1498 dev_put(dev); 1637 dev_put(dev);
1638 out_rtnl:
1639 rtnl_unlock();
1640
1499 return err; 1641 return err;
1500} 1642}
1501 1643
@@ -1576,22 +1718,29 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
1576 return -EINVAL; 1718 return -EINVAL;
1577 } 1719 }
1578 1720
1579 netdev = dev_get_by_index(&init_net, ifidx); 1721 rtnl_lock();
1580 if (!netdev) 1722
1581 return -ENODEV; 1723 netdev = __dev_get_by_index(&init_net, ifidx);
1724 if (!netdev) {
1725 err = -ENODEV;
1726 goto out_rtnl;
1727 }
1582 1728
1583 dev = cfg80211_get_dev_from_ifindex(ifidx); 1729 dev = cfg80211_get_dev_from_ifindex(ifidx);
1584 if (IS_ERR(dev)) { 1730 if (IS_ERR(dev)) {
1585 err = PTR_ERR(dev); 1731 err = PTR_ERR(dev);
1586 goto out_put_netdev; 1732 goto out_rtnl;
1587 } 1733 }
1588 1734
1589 if (!dev->ops->dump_mpath) { 1735 if (!dev->ops->dump_mpath) {
1590 err = -ENOSYS; 1736 err = -EOPNOTSUPP;
1591 goto out_err; 1737 goto out_err;
1592 } 1738 }
1593 1739
1594 rtnl_lock(); 1740 if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
1741 err = -EOPNOTSUPP;
1742 goto out;
1743 }
1595 1744
1596 while (1) { 1745 while (1) {
1597 err = dev->ops->dump_mpath(&dev->wiphy, netdev, path_idx, 1746 err = dev->ops->dump_mpath(&dev->wiphy, netdev, path_idx,
@@ -1599,7 +1748,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
1599 if (err == -ENOENT) 1748 if (err == -ENOENT)
1600 break; 1749 break;
1601 if (err) 1750 if (err)
1602 goto out_err_rtnl; 1751 goto out_err;
1603 1752
1604 if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).pid, 1753 if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).pid,
1605 cb->nlh->nlmsg_seq, NLM_F_MULTI, 1754 cb->nlh->nlmsg_seq, NLM_F_MULTI,
@@ -1614,12 +1763,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
1614 out: 1763 out:
1615 cb->args[1] = path_idx; 1764 cb->args[1] = path_idx;
1616 err = skb->len; 1765 err = skb->len;
1617 out_err_rtnl:
1618 rtnl_unlock();
1619 out_err: 1766 out_err:
1620 cfg80211_put_dev(dev); 1767 cfg80211_put_dev(dev);
1621 out_put_netdev: 1768 out_rtnl:
1622 dev_put(netdev); 1769 rtnl_unlock();
1623 1770
1624 return err; 1771 return err;
1625} 1772}
@@ -1641,19 +1788,23 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info)
1641 1788
1642 dst = nla_data(info->attrs[NL80211_ATTR_MAC]); 1789 dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
1643 1790
1791 rtnl_lock();
1792
1644 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 1793 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
1645 if (err) 1794 if (err)
1646 return err; 1795 goto out_rtnl;
1647 1796
1648 if (!drv->ops->get_mpath) { 1797 if (!drv->ops->get_mpath) {
1649 err = -EOPNOTSUPP; 1798 err = -EOPNOTSUPP;
1650 goto out; 1799 goto out;
1651 } 1800 }
1652 1801
1653 rtnl_lock(); 1802 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
1654 err = drv->ops->get_mpath(&drv->wiphy, dev, dst, next_hop, &pinfo); 1803 err = -EOPNOTSUPP;
1655 rtnl_unlock(); 1804 goto out;
1805 }
1656 1806
1807 err = drv->ops->get_mpath(&drv->wiphy, dev, dst, next_hop, &pinfo);
1657 if (err) 1808 if (err)
1658 goto out; 1809 goto out;
1659 1810
@@ -1670,10 +1821,12 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info)
1670 1821
1671 out_free: 1822 out_free:
1672 nlmsg_free(msg); 1823 nlmsg_free(msg);
1673
1674 out: 1824 out:
1675 cfg80211_put_dev(drv); 1825 cfg80211_put_dev(drv);
1676 dev_put(dev); 1826 dev_put(dev);
1827 out_rtnl:
1828 rtnl_unlock();
1829
1677 return err; 1830 return err;
1678} 1831}
1679 1832
@@ -1694,22 +1847,35 @@ static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info)
1694 dst = nla_data(info->attrs[NL80211_ATTR_MAC]); 1847 dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
1695 next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); 1848 next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]);
1696 1849
1850 rtnl_lock();
1851
1697 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 1852 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
1698 if (err) 1853 if (err)
1699 return err; 1854 goto out_rtnl;
1700 1855
1701 if (!drv->ops->change_mpath) { 1856 if (!drv->ops->change_mpath) {
1702 err = -EOPNOTSUPP; 1857 err = -EOPNOTSUPP;
1703 goto out; 1858 goto out;
1704 } 1859 }
1705 1860
1706 rtnl_lock(); 1861 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
1862 err = -EOPNOTSUPP;
1863 goto out;
1864 }
1865
1866 if (!netif_running(dev)) {
1867 err = -ENETDOWN;
1868 goto out;
1869 }
1870
1707 err = drv->ops->change_mpath(&drv->wiphy, dev, dst, next_hop); 1871 err = drv->ops->change_mpath(&drv->wiphy, dev, dst, next_hop);
1708 rtnl_unlock();
1709 1872
1710 out: 1873 out:
1711 cfg80211_put_dev(drv); 1874 cfg80211_put_dev(drv);
1712 dev_put(dev); 1875 dev_put(dev);
1876 out_rtnl:
1877 rtnl_unlock();
1878
1713 return err; 1879 return err;
1714} 1880}
1715static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info) 1881static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info)
@@ -1729,22 +1895,35 @@ static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info)
1729 dst = nla_data(info->attrs[NL80211_ATTR_MAC]); 1895 dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
1730 next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); 1896 next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]);
1731 1897
1898 rtnl_lock();
1899
1732 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 1900 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
1733 if (err) 1901 if (err)
1734 return err; 1902 goto out_rtnl;
1735 1903
1736 if (!drv->ops->add_mpath) { 1904 if (!drv->ops->add_mpath) {
1737 err = -EOPNOTSUPP; 1905 err = -EOPNOTSUPP;
1738 goto out; 1906 goto out;
1739 } 1907 }
1740 1908
1741 rtnl_lock(); 1909 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
1910 err = -EOPNOTSUPP;
1911 goto out;
1912 }
1913
1914 if (!netif_running(dev)) {
1915 err = -ENETDOWN;
1916 goto out;
1917 }
1918
1742 err = drv->ops->add_mpath(&drv->wiphy, dev, dst, next_hop); 1919 err = drv->ops->add_mpath(&drv->wiphy, dev, dst, next_hop);
1743 rtnl_unlock();
1744 1920
1745 out: 1921 out:
1746 cfg80211_put_dev(drv); 1922 cfg80211_put_dev(drv);
1747 dev_put(dev); 1923 dev_put(dev);
1924 out_rtnl:
1925 rtnl_unlock();
1926
1748 return err; 1927 return err;
1749} 1928}
1750 1929
@@ -1758,22 +1937,25 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info)
1758 if (info->attrs[NL80211_ATTR_MAC]) 1937 if (info->attrs[NL80211_ATTR_MAC])
1759 dst = nla_data(info->attrs[NL80211_ATTR_MAC]); 1938 dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
1760 1939
1940 rtnl_lock();
1941
1761 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 1942 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
1762 if (err) 1943 if (err)
1763 return err; 1944 goto out_rtnl;
1764 1945
1765 if (!drv->ops->del_mpath) { 1946 if (!drv->ops->del_mpath) {
1766 err = -EOPNOTSUPP; 1947 err = -EOPNOTSUPP;
1767 goto out; 1948 goto out;
1768 } 1949 }
1769 1950
1770 rtnl_lock();
1771 err = drv->ops->del_mpath(&drv->wiphy, dev, dst); 1951 err = drv->ops->del_mpath(&drv->wiphy, dev, dst);
1772 rtnl_unlock();
1773 1952
1774 out: 1953 out:
1775 cfg80211_put_dev(drv); 1954 cfg80211_put_dev(drv);
1776 dev_put(dev); 1955 dev_put(dev);
1956 out_rtnl:
1957 rtnl_unlock();
1958
1777 return err; 1959 return err;
1778} 1960}
1779 1961
@@ -1806,22 +1988,30 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
1806 nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); 1988 nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
1807 } 1989 }
1808 1990
1991 rtnl_lock();
1992
1809 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 1993 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
1810 if (err) 1994 if (err)
1811 return err; 1995 goto out_rtnl;
1812 1996
1813 if (!drv->ops->change_bss) { 1997 if (!drv->ops->change_bss) {
1814 err = -EOPNOTSUPP; 1998 err = -EOPNOTSUPP;
1815 goto out; 1999 goto out;
1816 } 2000 }
1817 2001
1818 rtnl_lock(); 2002 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) {
2003 err = -EOPNOTSUPP;
2004 goto out;
2005 }
2006
1819 err = drv->ops->change_bss(&drv->wiphy, dev, &params); 2007 err = drv->ops->change_bss(&drv->wiphy, dev, &params);
1820 rtnl_unlock();
1821 2008
1822 out: 2009 out:
1823 cfg80211_put_dev(drv); 2010 cfg80211_put_dev(drv);
1824 dev_put(dev); 2011 dev_put(dev);
2012 out_rtnl:
2013 rtnl_unlock();
2014
1825 return err; 2015 return err;
1826} 2016}
1827 2017
@@ -1876,6 +2066,19 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
1876 int r; 2066 int r;
1877 char *data = NULL; 2067 char *data = NULL;
1878 2068
2069 /*
2070 * You should only get this when cfg80211 hasn't yet initialized
2071 * completely when built-in to the kernel right between the time
2072 * window between nl80211_init() and regulatory_init(), if that is
2073 * even possible.
2074 */
2075 mutex_lock(&cfg80211_mutex);
2076 if (unlikely(!cfg80211_regdomain)) {
2077 mutex_unlock(&cfg80211_mutex);
2078 return -EINPROGRESS;
2079 }
2080 mutex_unlock(&cfg80211_mutex);
2081
1879 if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) 2082 if (!info->attrs[NL80211_ATTR_REG_ALPHA2])
1880 return -EINVAL; 2083 return -EINVAL;
1881 2084
@@ -1886,9 +2089,9 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
1886 if (is_world_regdom(data)) 2089 if (is_world_regdom(data))
1887 return -EINVAL; 2090 return -EINVAL;
1888#endif 2091#endif
1889 mutex_lock(&cfg80211_drv_mutex); 2092
1890 r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, 0, ENVIRON_ANY); 2093 r = regulatory_hint_user(data);
1891 mutex_unlock(&cfg80211_drv_mutex); 2094
1892 return r; 2095 return r;
1893} 2096}
1894 2097
@@ -1903,15 +2106,20 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
1903 struct nlattr *pinfoattr; 2106 struct nlattr *pinfoattr;
1904 struct sk_buff *msg; 2107 struct sk_buff *msg;
1905 2108
2109 rtnl_lock();
2110
1906 /* Look up our device */ 2111 /* Look up our device */
1907 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 2112 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
1908 if (err) 2113 if (err)
1909 return err; 2114 goto out_rtnl;
2115
2116 if (!drv->ops->get_mesh_params) {
2117 err = -EOPNOTSUPP;
2118 goto out;
2119 }
1910 2120
1911 /* Get the mesh params */ 2121 /* Get the mesh params */
1912 rtnl_lock();
1913 err = drv->ops->get_mesh_params(&drv->wiphy, dev, &cur_params); 2122 err = drv->ops->get_mesh_params(&drv->wiphy, dev, &cur_params);
1914 rtnl_unlock();
1915 if (err) 2123 if (err)
1916 goto out; 2124 goto out;
1917 2125
@@ -1960,13 +2168,16 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
1960 err = genlmsg_unicast(msg, info->snd_pid); 2168 err = genlmsg_unicast(msg, info->snd_pid);
1961 goto out; 2169 goto out;
1962 2170
1963nla_put_failure: 2171 nla_put_failure:
1964 genlmsg_cancel(msg, hdr); 2172 genlmsg_cancel(msg, hdr);
1965 err = -EMSGSIZE; 2173 err = -EMSGSIZE;
1966out: 2174 out:
1967 /* Cleanup */ 2175 /* Cleanup */
1968 cfg80211_put_dev(drv); 2176 cfg80211_put_dev(drv);
1969 dev_put(dev); 2177 dev_put(dev);
2178 out_rtnl:
2179 rtnl_unlock();
2180
1970 return err; 2181 return err;
1971} 2182}
1972 2183
@@ -2013,9 +2224,16 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
2013 parent_attr, nl80211_meshconf_params_policy)) 2224 parent_attr, nl80211_meshconf_params_policy))
2014 return -EINVAL; 2225 return -EINVAL;
2015 2226
2227 rtnl_lock();
2228
2016 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); 2229 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
2017 if (err) 2230 if (err)
2018 return err; 2231 goto out_rtnl;
2232
2233 if (!drv->ops->set_mesh_params) {
2234 err = -EOPNOTSUPP;
2235 goto out;
2236 }
2019 2237
2020 /* This makes sure that there aren't more than 32 mesh config 2238 /* This makes sure that there aren't more than 32 mesh config
2021 * parameters (otherwise our bitfield scheme would not work.) */ 2239 * parameters (otherwise our bitfield scheme would not work.) */
@@ -2057,18 +2275,95 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
2057 nla_get_u16); 2275 nla_get_u16);
2058 2276
2059 /* Apply changes */ 2277 /* Apply changes */
2060 rtnl_lock();
2061 err = drv->ops->set_mesh_params(&drv->wiphy, dev, &cfg, mask); 2278 err = drv->ops->set_mesh_params(&drv->wiphy, dev, &cfg, mask);
2062 rtnl_unlock();
2063 2279
2280 out:
2064 /* cleanup */ 2281 /* cleanup */
2065 cfg80211_put_dev(drv); 2282 cfg80211_put_dev(drv);
2066 dev_put(dev); 2283 dev_put(dev);
2284 out_rtnl:
2285 rtnl_unlock();
2286
2067 return err; 2287 return err;
2068} 2288}
2069 2289
2070#undef FILL_IN_MESH_PARAM_IF_SET 2290#undef FILL_IN_MESH_PARAM_IF_SET
2071 2291
2292static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
2293{
2294 struct sk_buff *msg;
2295 void *hdr = NULL;
2296 struct nlattr *nl_reg_rules;
2297 unsigned int i;
2298 int err = -EINVAL;
2299
2300 mutex_lock(&cfg80211_mutex);
2301
2302 if (!cfg80211_regdomain)
2303 goto out;
2304
2305 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
2306 if (!msg) {
2307 err = -ENOBUFS;
2308 goto out;
2309 }
2310
2311 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
2312 NL80211_CMD_GET_REG);
2313 if (!hdr)
2314 goto nla_put_failure;
2315
2316 NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2,
2317 cfg80211_regdomain->alpha2);
2318
2319 nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES);
2320 if (!nl_reg_rules)
2321 goto nla_put_failure;
2322
2323 for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) {
2324 struct nlattr *nl_reg_rule;
2325 const struct ieee80211_reg_rule *reg_rule;
2326 const struct ieee80211_freq_range *freq_range;
2327 const struct ieee80211_power_rule *power_rule;
2328
2329 reg_rule = &cfg80211_regdomain->reg_rules[i];
2330 freq_range = &reg_rule->freq_range;
2331 power_rule = &reg_rule->power_rule;
2332
2333 nl_reg_rule = nla_nest_start(msg, i);
2334 if (!nl_reg_rule)
2335 goto nla_put_failure;
2336
2337 NLA_PUT_U32(msg, NL80211_ATTR_REG_RULE_FLAGS,
2338 reg_rule->flags);
2339 NLA_PUT_U32(msg, NL80211_ATTR_FREQ_RANGE_START,
2340 freq_range->start_freq_khz);
2341 NLA_PUT_U32(msg, NL80211_ATTR_FREQ_RANGE_END,
2342 freq_range->end_freq_khz);
2343 NLA_PUT_U32(msg, NL80211_ATTR_FREQ_RANGE_MAX_BW,
2344 freq_range->max_bandwidth_khz);
2345 NLA_PUT_U32(msg, NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN,
2346 power_rule->max_antenna_gain);
2347 NLA_PUT_U32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP,
2348 power_rule->max_eirp);
2349
2350 nla_nest_end(msg, nl_reg_rule);
2351 }
2352
2353 nla_nest_end(msg, nl_reg_rules);
2354
2355 genlmsg_end(msg, hdr);
2356 err = genlmsg_unicast(msg, info->snd_pid);
2357 goto out;
2358
2359nla_put_failure:
2360 genlmsg_cancel(msg, hdr);
2361 err = -EMSGSIZE;
2362out:
2363 mutex_unlock(&cfg80211_mutex);
2364 return err;
2365}
2366
2072static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) 2367static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
2073{ 2368{
2074 struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; 2369 struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1];
@@ -2124,9 +2419,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
2124 2419
2125 BUG_ON(rule_idx != num_rules); 2420 BUG_ON(rule_idx != num_rules);
2126 2421
2127 mutex_lock(&cfg80211_drv_mutex); 2422 mutex_lock(&cfg80211_mutex);
2128 r = set_regdom(rd); 2423 r = set_regdom(rd);
2129 mutex_unlock(&cfg80211_drv_mutex); 2424 mutex_unlock(&cfg80211_mutex);
2130 return r; 2425 return r;
2131 2426
2132 bad_reg: 2427 bad_reg:
@@ -2134,6 +2429,553 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
2134 return -EINVAL; 2429 return -EINVAL;
2135} 2430}
2136 2431
2432static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
2433{
2434 struct cfg80211_registered_device *drv;
2435 struct net_device *dev;
2436 struct cfg80211_scan_request *request;
2437 struct cfg80211_ssid *ssid;
2438 struct ieee80211_channel *channel;
2439 struct nlattr *attr;
2440 struct wiphy *wiphy;
2441 int err, tmp, n_ssids = 0, n_channels = 0, i;
2442 enum ieee80211_band band;
2443 size_t ie_len;
2444
2445 rtnl_lock();
2446
2447 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
2448 if (err)
2449 goto out_rtnl;
2450
2451 wiphy = &drv->wiphy;
2452
2453 if (!drv->ops->scan) {
2454 err = -EOPNOTSUPP;
2455 goto out;
2456 }
2457
2458 if (!netif_running(dev)) {
2459 err = -ENETDOWN;
2460 goto out;
2461 }
2462
2463 if (drv->scan_req) {
2464 err = -EBUSY;
2465 goto out;
2466 }
2467
2468 if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
2469 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp)
2470 n_channels++;
2471 if (!n_channels) {
2472 err = -EINVAL;
2473 goto out;
2474 }
2475 } else {
2476 for (band = 0; band < IEEE80211_NUM_BANDS; band++)
2477 if (wiphy->bands[band])
2478 n_channels += wiphy->bands[band]->n_channels;
2479 }
2480
2481 if (info->attrs[NL80211_ATTR_SCAN_SSIDS])
2482 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp)
2483 n_ssids++;
2484
2485 if (n_ssids > wiphy->max_scan_ssids) {
2486 err = -EINVAL;
2487 goto out;
2488 }
2489
2490 if (info->attrs[NL80211_ATTR_IE])
2491 ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
2492 else
2493 ie_len = 0;
2494
2495 request = kzalloc(sizeof(*request)
2496 + sizeof(*ssid) * n_ssids
2497 + sizeof(channel) * n_channels
2498 + ie_len, GFP_KERNEL);
2499 if (!request) {
2500 err = -ENOMEM;
2501 goto out;
2502 }
2503
2504 request->channels = (void *)((char *)request + sizeof(*request));
2505 request->n_channels = n_channels;
2506 if (n_ssids)
2507 request->ssids = (void *)(request->channels + n_channels);
2508 request->n_ssids = n_ssids;
2509 if (ie_len) {
2510 if (request->ssids)
2511 request->ie = (void *)(request->ssids + n_ssids);
2512 else
2513 request->ie = (void *)(request->channels + n_channels);
2514 }
2515
2516 if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
2517 /* user specified, bail out if channel not found */
2518 request->n_channels = n_channels;
2519 i = 0;
2520 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp) {
2521 request->channels[i] = ieee80211_get_channel(wiphy, nla_get_u32(attr));
2522 if (!request->channels[i]) {
2523 err = -EINVAL;
2524 goto out_free;
2525 }
2526 i++;
2527 }
2528 } else {
2529 /* all channels */
2530 i = 0;
2531 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
2532 int j;
2533 if (!wiphy->bands[band])
2534 continue;
2535 for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
2536 request->channels[i] = &wiphy->bands[band]->channels[j];
2537 i++;
2538 }
2539 }
2540 }
2541
2542 i = 0;
2543 if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) {
2544 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) {
2545 if (request->ssids[i].ssid_len > IEEE80211_MAX_SSID_LEN) {
2546 err = -EINVAL;
2547 goto out_free;
2548 }
2549 memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr));
2550 request->ssids[i].ssid_len = nla_len(attr);
2551 i++;
2552 }
2553 }
2554
2555 if (info->attrs[NL80211_ATTR_IE]) {
2556 request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
2557 memcpy(request->ie, nla_data(info->attrs[NL80211_ATTR_IE]),
2558 request->ie_len);
2559 }
2560
2561 request->ifidx = dev->ifindex;
2562 request->wiphy = &drv->wiphy;
2563
2564 drv->scan_req = request;
2565 err = drv->ops->scan(&drv->wiphy, dev, request);
2566
2567 out_free:
2568 if (err) {
2569 drv->scan_req = NULL;
2570 kfree(request);
2571 }
2572 out:
2573 cfg80211_put_dev(drv);
2574 dev_put(dev);
2575 out_rtnl:
2576 rtnl_unlock();
2577
2578 return err;
2579}
2580
2581static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
2582 struct cfg80211_registered_device *rdev,
2583 struct net_device *dev,
2584 struct cfg80211_bss *res)
2585{
2586 void *hdr;
2587 struct nlattr *bss;
2588
2589 hdr = nl80211hdr_put(msg, pid, seq, flags,
2590 NL80211_CMD_NEW_SCAN_RESULTS);
2591 if (!hdr)
2592 return -1;
2593
2594 NLA_PUT_U32(msg, NL80211_ATTR_SCAN_GENERATION,
2595 rdev->bss_generation);
2596 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
2597
2598 bss = nla_nest_start(msg, NL80211_ATTR_BSS);
2599 if (!bss)
2600 goto nla_put_failure;
2601 if (!is_zero_ether_addr(res->bssid))
2602 NLA_PUT(msg, NL80211_BSS_BSSID, ETH_ALEN, res->bssid);
2603 if (res->information_elements && res->len_information_elements)
2604 NLA_PUT(msg, NL80211_BSS_INFORMATION_ELEMENTS,
2605 res->len_information_elements,
2606 res->information_elements);
2607 if (res->tsf)
2608 NLA_PUT_U64(msg, NL80211_BSS_TSF, res->tsf);
2609 if (res->beacon_interval)
2610 NLA_PUT_U16(msg, NL80211_BSS_BEACON_INTERVAL, res->beacon_interval);
2611 NLA_PUT_U16(msg, NL80211_BSS_CAPABILITY, res->capability);
2612 NLA_PUT_U32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq);
2613
2614 switch (rdev->wiphy.signal_type) {
2615 case CFG80211_SIGNAL_TYPE_MBM:
2616 NLA_PUT_U32(msg, NL80211_BSS_SIGNAL_MBM, res->signal);
2617 break;
2618 case CFG80211_SIGNAL_TYPE_UNSPEC:
2619 NLA_PUT_U8(msg, NL80211_BSS_SIGNAL_UNSPEC, res->signal);
2620 break;
2621 default:
2622 break;
2623 }
2624
2625 nla_nest_end(msg, bss);
2626
2627 return genlmsg_end(msg, hdr);
2628
2629 nla_put_failure:
2630 genlmsg_cancel(msg, hdr);
2631 return -EMSGSIZE;
2632}
2633
2634static int nl80211_dump_scan(struct sk_buff *skb,
2635 struct netlink_callback *cb)
2636{
2637 struct cfg80211_registered_device *dev;
2638 struct net_device *netdev;
2639 struct cfg80211_internal_bss *scan;
2640 int ifidx = cb->args[0];
2641 int start = cb->args[1], idx = 0;
2642 int err;
2643
2644 if (!ifidx) {
2645 err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
2646 nl80211_fam.attrbuf, nl80211_fam.maxattr,
2647 nl80211_policy);
2648 if (err)
2649 return err;
2650
2651 if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX])
2652 return -EINVAL;
2653
2654 ifidx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]);
2655 if (!ifidx)
2656 return -EINVAL;
2657 cb->args[0] = ifidx;
2658 }
2659
2660 netdev = dev_get_by_index(&init_net, ifidx);
2661 if (!netdev)
2662 return -ENODEV;
2663
2664 dev = cfg80211_get_dev_from_ifindex(ifidx);
2665 if (IS_ERR(dev)) {
2666 err = PTR_ERR(dev);
2667 goto out_put_netdev;
2668 }
2669
2670 spin_lock_bh(&dev->bss_lock);
2671 cfg80211_bss_expire(dev);
2672
2673 list_for_each_entry(scan, &dev->bss_list, list) {
2674 if (++idx <= start)
2675 continue;
2676 if (nl80211_send_bss(skb,
2677 NETLINK_CB(cb->skb).pid,
2678 cb->nlh->nlmsg_seq, NLM_F_MULTI,
2679 dev, netdev, &scan->pub) < 0) {
2680 idx--;
2681 goto out;
2682 }
2683 }
2684
2685 out:
2686 spin_unlock_bh(&dev->bss_lock);
2687
2688 cb->args[1] = idx;
2689 err = skb->len;
2690 cfg80211_put_dev(dev);
2691 out_put_netdev:
2692 dev_put(netdev);
2693
2694 return err;
2695}
2696
2697static bool nl80211_valid_auth_type(enum nl80211_auth_type auth_type)
2698{
2699 return auth_type == NL80211_AUTHTYPE_OPEN_SYSTEM ||
2700 auth_type == NL80211_AUTHTYPE_SHARED_KEY ||
2701 auth_type == NL80211_AUTHTYPE_FT ||
2702 auth_type == NL80211_AUTHTYPE_NETWORK_EAP;
2703}
2704
2705static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
2706{
2707 struct cfg80211_registered_device *drv;
2708 struct net_device *dev;
2709 struct cfg80211_auth_request req;
2710 struct wiphy *wiphy;
2711 int err;
2712
2713 rtnl_lock();
2714
2715 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
2716 if (err)
2717 goto unlock_rtnl;
2718
2719 if (!drv->ops->auth) {
2720 err = -EOPNOTSUPP;
2721 goto out;
2722 }
2723
2724 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
2725 err = -EOPNOTSUPP;
2726 goto out;
2727 }
2728
2729 if (!netif_running(dev)) {
2730 err = -ENETDOWN;
2731 goto out;
2732 }
2733
2734 if (!info->attrs[NL80211_ATTR_MAC]) {
2735 err = -EINVAL;
2736 goto out;
2737 }
2738
2739 wiphy = &drv->wiphy;
2740 memset(&req, 0, sizeof(req));
2741
2742 req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
2743
2744 if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
2745 req.chan = ieee80211_get_channel(
2746 wiphy,
2747 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
2748 if (!req.chan) {
2749 err = -EINVAL;
2750 goto out;
2751 }
2752 }
2753
2754 if (info->attrs[NL80211_ATTR_SSID]) {
2755 req.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
2756 req.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
2757 }
2758
2759 if (info->attrs[NL80211_ATTR_IE]) {
2760 req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
2761 req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
2762 }
2763
2764 if (info->attrs[NL80211_ATTR_AUTH_TYPE]) {
2765 req.auth_type =
2766 nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]);
2767 if (!nl80211_valid_auth_type(req.auth_type)) {
2768 err = -EINVAL;
2769 goto out;
2770 }
2771 }
2772
2773 err = drv->ops->auth(&drv->wiphy, dev, &req);
2774
2775out:
2776 cfg80211_put_dev(drv);
2777 dev_put(dev);
2778unlock_rtnl:
2779 rtnl_unlock();
2780 return err;
2781}
2782
2783static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
2784{
2785 struct cfg80211_registered_device *drv;
2786 struct net_device *dev;
2787 struct cfg80211_assoc_request req;
2788 struct wiphy *wiphy;
2789 int err;
2790
2791 rtnl_lock();
2792
2793 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
2794 if (err)
2795 goto unlock_rtnl;
2796
2797 if (!drv->ops->assoc) {
2798 err = -EOPNOTSUPP;
2799 goto out;
2800 }
2801
2802 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
2803 err = -EOPNOTSUPP;
2804 goto out;
2805 }
2806
2807 if (!netif_running(dev)) {
2808 err = -ENETDOWN;
2809 goto out;
2810 }
2811
2812 if (!info->attrs[NL80211_ATTR_MAC] ||
2813 !info->attrs[NL80211_ATTR_SSID]) {
2814 err = -EINVAL;
2815 goto out;
2816 }
2817
2818 wiphy = &drv->wiphy;
2819 memset(&req, 0, sizeof(req));
2820
2821 req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
2822
2823 if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
2824 req.chan = ieee80211_get_channel(
2825 wiphy,
2826 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
2827 if (!req.chan) {
2828 err = -EINVAL;
2829 goto out;
2830 }
2831 }
2832
2833 req.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
2834 req.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
2835
2836 if (info->attrs[NL80211_ATTR_IE]) {
2837 req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
2838 req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
2839 }
2840
2841 err = drv->ops->assoc(&drv->wiphy, dev, &req);
2842
2843out:
2844 cfg80211_put_dev(drv);
2845 dev_put(dev);
2846unlock_rtnl:
2847 rtnl_unlock();
2848 return err;
2849}
2850
2851static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
2852{
2853 struct cfg80211_registered_device *drv;
2854 struct net_device *dev;
2855 struct cfg80211_deauth_request req;
2856 struct wiphy *wiphy;
2857 int err;
2858
2859 rtnl_lock();
2860
2861 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
2862 if (err)
2863 goto unlock_rtnl;
2864
2865 if (!drv->ops->deauth) {
2866 err = -EOPNOTSUPP;
2867 goto out;
2868 }
2869
2870 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
2871 err = -EOPNOTSUPP;
2872 goto out;
2873 }
2874
2875 if (!netif_running(dev)) {
2876 err = -ENETDOWN;
2877 goto out;
2878 }
2879
2880 if (!info->attrs[NL80211_ATTR_MAC]) {
2881 err = -EINVAL;
2882 goto out;
2883 }
2884
2885 wiphy = &drv->wiphy;
2886 memset(&req, 0, sizeof(req));
2887
2888 req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
2889
2890 if (info->attrs[NL80211_ATTR_REASON_CODE]) {
2891 req.reason_code =
2892 nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
2893 if (req.reason_code == 0) {
2894 /* Reason Code 0 is reserved */
2895 err = -EINVAL;
2896 goto out;
2897 }
2898 }
2899
2900 if (info->attrs[NL80211_ATTR_IE]) {
2901 req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
2902 req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
2903 }
2904
2905 err = drv->ops->deauth(&drv->wiphy, dev, &req);
2906
2907out:
2908 cfg80211_put_dev(drv);
2909 dev_put(dev);
2910unlock_rtnl:
2911 rtnl_unlock();
2912 return err;
2913}
2914
2915static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
2916{
2917 struct cfg80211_registered_device *drv;
2918 struct net_device *dev;
2919 struct cfg80211_disassoc_request req;
2920 struct wiphy *wiphy;
2921 int err;
2922
2923 rtnl_lock();
2924
2925 err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev);
2926 if (err)
2927 goto unlock_rtnl;
2928
2929 if (!drv->ops->disassoc) {
2930 err = -EOPNOTSUPP;
2931 goto out;
2932 }
2933
2934 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
2935 err = -EOPNOTSUPP;
2936 goto out;
2937 }
2938
2939 if (!netif_running(dev)) {
2940 err = -ENETDOWN;
2941 goto out;
2942 }
2943
2944 if (!info->attrs[NL80211_ATTR_MAC]) {
2945 err = -EINVAL;
2946 goto out;
2947 }
2948
2949 wiphy = &drv->wiphy;
2950 memset(&req, 0, sizeof(req));
2951
2952 req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
2953
2954 if (info->attrs[NL80211_ATTR_REASON_CODE]) {
2955 req.reason_code =
2956 nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
2957 if (req.reason_code == 0) {
2958 /* Reason Code 0 is reserved */
2959 err = -EINVAL;
2960 goto out;
2961 }
2962 }
2963
2964 if (info->attrs[NL80211_ATTR_IE]) {
2965 req.ie = nla_data(info->attrs[NL80211_ATTR_IE]);
2966 req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
2967 }
2968
2969 err = drv->ops->disassoc(&drv->wiphy, dev, &req);
2970
2971out:
2972 cfg80211_put_dev(drv);
2973 dev_put(dev);
2974unlock_rtnl:
2975 rtnl_unlock();
2976 return err;
2977}
2978
2137static struct genl_ops nl80211_ops[] = { 2979static struct genl_ops nl80211_ops[] = {
2138 { 2980 {
2139 .cmd = NL80211_CMD_GET_WIPHY, 2981 .cmd = NL80211_CMD_GET_WIPHY,
@@ -2220,7 +3062,6 @@ static struct genl_ops nl80211_ops[] = {
2220 .doit = nl80211_get_station, 3062 .doit = nl80211_get_station,
2221 .dumpit = nl80211_dump_station, 3063 .dumpit = nl80211_dump_station,
2222 .policy = nl80211_policy, 3064 .policy = nl80211_policy,
2223 .flags = GENL_ADMIN_PERM,
2224 }, 3065 },
2225 { 3066 {
2226 .cmd = NL80211_CMD_SET_STATION, 3067 .cmd = NL80211_CMD_SET_STATION,
@@ -2272,6 +3113,12 @@ static struct genl_ops nl80211_ops[] = {
2272 .flags = GENL_ADMIN_PERM, 3113 .flags = GENL_ADMIN_PERM,
2273 }, 3114 },
2274 { 3115 {
3116 .cmd = NL80211_CMD_GET_REG,
3117 .doit = nl80211_get_reg,
3118 .policy = nl80211_policy,
3119 /* can be retrieved by unprivileged users */
3120 },
3121 {
2275 .cmd = NL80211_CMD_SET_REG, 3122 .cmd = NL80211_CMD_SET_REG,
2276 .doit = nl80211_set_reg, 3123 .doit = nl80211_set_reg,
2277 .policy = nl80211_policy, 3124 .policy = nl80211_policy,
@@ -2295,12 +3142,56 @@ static struct genl_ops nl80211_ops[] = {
2295 .policy = nl80211_policy, 3142 .policy = nl80211_policy,
2296 .flags = GENL_ADMIN_PERM, 3143 .flags = GENL_ADMIN_PERM,
2297 }, 3144 },
3145 {
3146 .cmd = NL80211_CMD_TRIGGER_SCAN,
3147 .doit = nl80211_trigger_scan,
3148 .policy = nl80211_policy,
3149 .flags = GENL_ADMIN_PERM,
3150 },
3151 {
3152 .cmd = NL80211_CMD_GET_SCAN,
3153 .policy = nl80211_policy,
3154 .dumpit = nl80211_dump_scan,
3155 },
3156 {
3157 .cmd = NL80211_CMD_AUTHENTICATE,
3158 .doit = nl80211_authenticate,
3159 .policy = nl80211_policy,
3160 .flags = GENL_ADMIN_PERM,
3161 },
3162 {
3163 .cmd = NL80211_CMD_ASSOCIATE,
3164 .doit = nl80211_associate,
3165 .policy = nl80211_policy,
3166 .flags = GENL_ADMIN_PERM,
3167 },
3168 {
3169 .cmd = NL80211_CMD_DEAUTHENTICATE,
3170 .doit = nl80211_deauthenticate,
3171 .policy = nl80211_policy,
3172 .flags = GENL_ADMIN_PERM,
3173 },
3174 {
3175 .cmd = NL80211_CMD_DISASSOCIATE,
3176 .doit = nl80211_disassociate,
3177 .policy = nl80211_policy,
3178 .flags = GENL_ADMIN_PERM,
3179 },
3180};
3181static struct genl_multicast_group nl80211_mlme_mcgrp = {
3182 .name = "mlme",
2298}; 3183};
2299 3184
2300/* multicast groups */ 3185/* multicast groups */
2301static struct genl_multicast_group nl80211_config_mcgrp = { 3186static struct genl_multicast_group nl80211_config_mcgrp = {
2302 .name = "config", 3187 .name = "config",
2303}; 3188};
3189static struct genl_multicast_group nl80211_scan_mcgrp = {
3190 .name = "scan",
3191};
3192static struct genl_multicast_group nl80211_regulatory_mcgrp = {
3193 .name = "regulatory",
3194};
2304 3195
2305/* notification functions */ 3196/* notification functions */
2306 3197
@@ -2320,6 +3211,186 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev)
2320 genlmsg_multicast(msg, 0, nl80211_config_mcgrp.id, GFP_KERNEL); 3211 genlmsg_multicast(msg, 0, nl80211_config_mcgrp.id, GFP_KERNEL);
2321} 3212}
2322 3213
3214static int nl80211_send_scan_donemsg(struct sk_buff *msg,
3215 struct cfg80211_registered_device *rdev,
3216 struct net_device *netdev,
3217 u32 pid, u32 seq, int flags,
3218 u32 cmd)
3219{
3220 void *hdr;
3221
3222 hdr = nl80211hdr_put(msg, pid, seq, flags, cmd);
3223 if (!hdr)
3224 return -1;
3225
3226 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
3227 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
3228
3229 /* XXX: we should probably bounce back the request? */
3230
3231 return genlmsg_end(msg, hdr);
3232
3233 nla_put_failure:
3234 genlmsg_cancel(msg, hdr);
3235 return -EMSGSIZE;
3236}
3237
3238void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
3239 struct net_device *netdev)
3240{
3241 struct sk_buff *msg;
3242
3243 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
3244 if (!msg)
3245 return;
3246
3247 if (nl80211_send_scan_donemsg(msg, rdev, netdev, 0, 0, 0,
3248 NL80211_CMD_NEW_SCAN_RESULTS) < 0) {
3249 nlmsg_free(msg);
3250 return;
3251 }
3252
3253 genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL);
3254}
3255
3256void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
3257 struct net_device *netdev)
3258{
3259 struct sk_buff *msg;
3260
3261 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
3262 if (!msg)
3263 return;
3264
3265 if (nl80211_send_scan_donemsg(msg, rdev, netdev, 0, 0, 0,
3266 NL80211_CMD_SCAN_ABORTED) < 0) {
3267 nlmsg_free(msg);
3268 return;
3269 }
3270
3271 genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL);
3272}
3273
3274/*
3275 * This can happen on global regulatory changes or device specific settings
3276 * based on custom world regulatory domains.
3277 */
3278void nl80211_send_reg_change_event(struct regulatory_request *request)
3279{
3280 struct sk_buff *msg;
3281 void *hdr;
3282
3283 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
3284 if (!msg)
3285 return;
3286
3287 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_REG_CHANGE);
3288 if (!hdr) {
3289 nlmsg_free(msg);
3290 return;
3291 }
3292
3293 /* Userspace can always count this one always being set */
3294 NLA_PUT_U8(msg, NL80211_ATTR_REG_INITIATOR, request->initiator);
3295
3296 if (request->alpha2[0] == '0' && request->alpha2[1] == '0')
3297 NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE,
3298 NL80211_REGDOM_TYPE_WORLD);
3299 else if (request->alpha2[0] == '9' && request->alpha2[1] == '9')
3300 NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE,
3301 NL80211_REGDOM_TYPE_CUSTOM_WORLD);
3302 else if ((request->alpha2[0] == '9' && request->alpha2[1] == '8') ||
3303 request->intersect)
3304 NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE,
3305 NL80211_REGDOM_TYPE_INTERSECTION);
3306 else {
3307 NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE,
3308 NL80211_REGDOM_TYPE_COUNTRY);
3309 NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2, request->alpha2);
3310 }
3311
3312 if (wiphy_idx_valid(request->wiphy_idx))
3313 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx);
3314
3315 if (genlmsg_end(msg, hdr) < 0) {
3316 nlmsg_free(msg);
3317 return;
3318 }
3319
3320 genlmsg_multicast(msg, 0, nl80211_regulatory_mcgrp.id, GFP_KERNEL);
3321
3322 return;
3323
3324nla_put_failure:
3325 genlmsg_cancel(msg, hdr);
3326 nlmsg_free(msg);
3327}
3328
3329static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev,
3330 struct net_device *netdev,
3331 const u8 *buf, size_t len,
3332 enum nl80211_commands cmd)
3333{
3334 struct sk_buff *msg;
3335 void *hdr;
3336
3337 msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
3338 if (!msg)
3339 return;
3340
3341 hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
3342 if (!hdr) {
3343 nlmsg_free(msg);
3344 return;
3345 }
3346
3347 NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
3348 NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
3349 NLA_PUT(msg, NL80211_ATTR_FRAME, len, buf);
3350
3351 if (genlmsg_end(msg, hdr) < 0) {
3352 nlmsg_free(msg);
3353 return;
3354 }
3355
3356 genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL);
3357 return;
3358
3359 nla_put_failure:
3360 genlmsg_cancel(msg, hdr);
3361 nlmsg_free(msg);
3362}
3363
3364void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
3365 struct net_device *netdev, const u8 *buf, size_t len)
3366{
3367 nl80211_send_mlme_event(rdev, netdev, buf, len,
3368 NL80211_CMD_AUTHENTICATE);
3369}
3370
3371void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
3372 struct net_device *netdev, const u8 *buf,
3373 size_t len)
3374{
3375 nl80211_send_mlme_event(rdev, netdev, buf, len, NL80211_CMD_ASSOCIATE);
3376}
3377
3378void nl80211_send_rx_deauth(struct cfg80211_registered_device *rdev,
3379 struct net_device *netdev, const u8 *buf,
3380 size_t len)
3381{
3382 nl80211_send_mlme_event(rdev, netdev, buf, len,
3383 NL80211_CMD_DEAUTHENTICATE);
3384}
3385
3386void nl80211_send_rx_disassoc(struct cfg80211_registered_device *rdev,
3387 struct net_device *netdev, const u8 *buf,
3388 size_t len)
3389{
3390 nl80211_send_mlme_event(rdev, netdev, buf, len,
3391 NL80211_CMD_DISASSOCIATE);
3392}
3393
2323/* initialisation/exit functions */ 3394/* initialisation/exit functions */
2324 3395
2325int nl80211_init(void) 3396int nl80211_init(void)
@@ -2340,6 +3411,18 @@ int nl80211_init(void)
2340 if (err) 3411 if (err)
2341 goto err_out; 3412 goto err_out;
2342 3413
3414 err = genl_register_mc_group(&nl80211_fam, &nl80211_scan_mcgrp);
3415 if (err)
3416 goto err_out;
3417
3418 err = genl_register_mc_group(&nl80211_fam, &nl80211_regulatory_mcgrp);
3419 if (err)
3420 goto err_out;
3421
3422 err = genl_register_mc_group(&nl80211_fam, &nl80211_mlme_mcgrp);
3423 if (err)
3424 goto err_out;
3425
2343 return 0; 3426 return 0;
2344 err_out: 3427 err_out:
2345 genl_unregister_family(&nl80211_fam); 3428 genl_unregister_family(&nl80211_fam);
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index f3ea5c029aee..b77af4ab80be 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -3,22 +3,25 @@
3 3
4#include "core.h" 4#include "core.h"
5 5
6#ifdef CONFIG_NL80211
7extern int nl80211_init(void); 6extern int nl80211_init(void);
8extern void nl80211_exit(void); 7extern void nl80211_exit(void);
9extern void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev); 8extern void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev);
10#else 9extern void nl80211_send_scan_done(struct cfg80211_registered_device *rdev,
11static inline int nl80211_init(void) 10 struct net_device *netdev);
12{ 11extern void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev,
13 return 0; 12 struct net_device *netdev);
14} 13extern void nl80211_send_reg_change_event(struct regulatory_request *request);
15static inline void nl80211_exit(void) 14extern void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
16{ 15 struct net_device *netdev,
17} 16 const u8 *buf, size_t len);
18static inline void nl80211_notify_dev_rename( 17extern void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev,
19 struct cfg80211_registered_device *rdev) 18 struct net_device *netdev,
20{ 19 const u8 *buf, size_t len);
21} 20extern void nl80211_send_rx_deauth(struct cfg80211_registered_device *rdev,
22#endif /* CONFIG_NL80211 */ 21 struct net_device *netdev,
22 const u8 *buf, size_t len);
23extern void nl80211_send_rx_disassoc(struct cfg80211_registered_device *rdev,
24 struct net_device *netdev,
25 const u8 *buf, size_t len);
23 26
24#endif /* __NET_WIRELESS_NL80211_H */ 27#endif /* __NET_WIRELESS_NL80211_H */
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index bd0a16c3de5e..6327e1617acb 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -41,38 +41,7 @@
41#include <net/cfg80211.h> 41#include <net/cfg80211.h>
42#include "core.h" 42#include "core.h"
43#include "reg.h" 43#include "reg.h"
44 44#include "nl80211.h"
45/**
46 * struct regulatory_request - receipt of last regulatory request
47 *
48 * @wiphy: this is set if this request's initiator is
49 * %REGDOM_SET_BY_COUNTRY_IE or %REGDOM_SET_BY_DRIVER. This
50 * can be used by the wireless core to deal with conflicts
51 * and potentially inform users of which devices specifically
52 * cased the conflicts.
53 * @initiator: indicates who sent this request, could be any of
54 * of those set in reg_set_by, %REGDOM_SET_BY_*
55 * @alpha2: the ISO / IEC 3166 alpha2 country code of the requested
56 * regulatory domain. We have a few special codes:
57 * 00 - World regulatory domain
58 * 99 - built by driver but a specific alpha2 cannot be determined
59 * 98 - result of an intersection between two regulatory domains
60 * @intersect: indicates whether the wireless core should intersect
61 * the requested regulatory domain with the presently set regulatory
62 * domain.
63 * @country_ie_checksum: checksum of the last processed and accepted
64 * country IE
65 * @country_ie_env: lets us know if the AP is telling us we are outdoor,
66 * indoor, or if it doesn't matter
67 */
68struct regulatory_request {
69 struct wiphy *wiphy;
70 enum reg_set_by initiator;
71 char alpha2[2];
72 bool intersect;
73 u32 country_ie_checksum;
74 enum environment_cap country_ie_env;
75};
76 45
77/* Receipt of information from last regulatory request */ 46/* Receipt of information from last regulatory request */
78static struct regulatory_request *last_request; 47static struct regulatory_request *last_request;
@@ -86,22 +55,63 @@ static u32 supported_bandwidths[] = {
86 MHZ_TO_KHZ(20), 55 MHZ_TO_KHZ(20),
87}; 56};
88 57
89/* Central wireless core regulatory domains, we only need two, 58/*
59 * Central wireless core regulatory domains, we only need two,
90 * the current one and a world regulatory domain in case we have no 60 * the current one and a world regulatory domain in case we have no
91 * information to give us an alpha2 */ 61 * information to give us an alpha2
92static const struct ieee80211_regdomain *cfg80211_regdomain; 62 */
63const struct ieee80211_regdomain *cfg80211_regdomain;
93 64
94/* We use this as a place for the rd structure built from the 65/*
66 * We use this as a place for the rd structure built from the
95 * last parsed country IE to rest until CRDA gets back to us with 67 * last parsed country IE to rest until CRDA gets back to us with
96 * what it thinks should apply for the same country */ 68 * what it thinks should apply for the same country
69 */
97static const struct ieee80211_regdomain *country_ie_regdomain; 70static const struct ieee80211_regdomain *country_ie_regdomain;
98 71
72/* Used to queue up regulatory hints */
73static LIST_HEAD(reg_requests_list);
74static spinlock_t reg_requests_lock;
75
76/* Used to queue up beacon hints for review */
77static LIST_HEAD(reg_pending_beacons);
78static spinlock_t reg_pending_beacons_lock;
79
80/* Used to keep track of processed beacon hints */
81static LIST_HEAD(reg_beacon_list);
82
83struct reg_beacon {
84 struct list_head list;
85 struct ieee80211_channel chan;
86};
87
99/* We keep a static world regulatory domain in case of the absence of CRDA */ 88/* We keep a static world regulatory domain in case of the absence of CRDA */
100static const struct ieee80211_regdomain world_regdom = { 89static const struct ieee80211_regdomain world_regdom = {
101 .n_reg_rules = 1, 90 .n_reg_rules = 5,
102 .alpha2 = "00", 91 .alpha2 = "00",
103 .reg_rules = { 92 .reg_rules = {
104 REG_RULE(2412-10, 2462+10, 40, 6, 20, 93 /* IEEE 802.11b/g, channels 1..11 */
94 REG_RULE(2412-10, 2462+10, 40, 6, 20, 0),
95 /* IEEE 802.11b/g, channels 12..13. No HT40
96 * channel fits here. */
97 REG_RULE(2467-10, 2472+10, 20, 6, 20,
98 NL80211_RRF_PASSIVE_SCAN |
99 NL80211_RRF_NO_IBSS),
100 /* IEEE 802.11 channel 14 - Only JP enables
101 * this and for 802.11b only */
102 REG_RULE(2484-10, 2484+10, 20, 6, 20,
103 NL80211_RRF_PASSIVE_SCAN |
104 NL80211_RRF_NO_IBSS |
105 NL80211_RRF_NO_OFDM),
106 /* IEEE 802.11a, channel 36..48 */
107 REG_RULE(5180-10, 5240+10, 40, 6, 20,
108 NL80211_RRF_PASSIVE_SCAN |
109 NL80211_RRF_NO_IBSS),
110
111 /* NB: 5260 MHz - 5700 MHz requies DFS */
112
113 /* IEEE 802.11a, channel 149..165 */
114 REG_RULE(5745-10, 5825+10, 40, 6, 20,
105 NL80211_RRF_PASSIVE_SCAN | 115 NL80211_RRF_PASSIVE_SCAN |
106 NL80211_RRF_NO_IBSS), 116 NL80211_RRF_NO_IBSS),
107 } 117 }
@@ -112,12 +122,19 @@ static const struct ieee80211_regdomain *cfg80211_world_regdom =
112 122
113#ifdef CONFIG_WIRELESS_OLD_REGULATORY 123#ifdef CONFIG_WIRELESS_OLD_REGULATORY
114static char *ieee80211_regdom = "US"; 124static char *ieee80211_regdom = "US";
125#else
126static char *ieee80211_regdom = "00";
127#endif
128
115module_param(ieee80211_regdom, charp, 0444); 129module_param(ieee80211_regdom, charp, 0444);
116MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); 130MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
117 131
118/* We assume 40 MHz bandwidth for the old regulatory work. 132#ifdef CONFIG_WIRELESS_OLD_REGULATORY
133/*
134 * We assume 40 MHz bandwidth for the old regulatory work.
119 * We make emphasis we are using the exact same frequencies 135 * We make emphasis we are using the exact same frequencies
120 * as before */ 136 * as before
137 */
121 138
122static const struct ieee80211_regdomain us_regdom = { 139static const struct ieee80211_regdomain us_regdom = {
123 .n_reg_rules = 6, 140 .n_reg_rules = 6,
@@ -156,8 +173,10 @@ static const struct ieee80211_regdomain jp_regdom = {
156 173
157static const struct ieee80211_regdomain eu_regdom = { 174static const struct ieee80211_regdomain eu_regdom = {
158 .n_reg_rules = 6, 175 .n_reg_rules = 6,
159 /* This alpha2 is bogus, we leave it here just for stupid 176 /*
160 * backward compatibility */ 177 * This alpha2 is bogus, we leave it here just for stupid
178 * backward compatibility
179 */
161 .alpha2 = "EU", 180 .alpha2 = "EU",
162 .reg_rules = { 181 .reg_rules = {
163 /* IEEE 802.11b/g, channels 1..13 */ 182 /* IEEE 802.11b/g, channels 1..13 */
@@ -226,8 +245,10 @@ static void reset_regdomains(void)
226 cfg80211_regdomain = NULL; 245 cfg80211_regdomain = NULL;
227} 246}
228 247
229/* Dynamic world regulatory domain requested by the wireless 248/*
230 * core upon initialization */ 249 * Dynamic world regulatory domain requested by the wireless
250 * core upon initialization
251 */
231static void update_world_regdomain(const struct ieee80211_regdomain *rd) 252static void update_world_regdomain(const struct ieee80211_regdomain *rd)
232{ 253{
233 BUG_ON(!last_request); 254 BUG_ON(!last_request);
@@ -268,8 +289,10 @@ static bool is_unknown_alpha2(const char *alpha2)
268{ 289{
269 if (!alpha2) 290 if (!alpha2)
270 return false; 291 return false;
271 /* Special case where regulatory domain was built by driver 292 /*
272 * but a specific alpha2 cannot be determined */ 293 * Special case where regulatory domain was built by driver
294 * but a specific alpha2 cannot be determined
295 */
273 if (alpha2[0] == '9' && alpha2[1] == '9') 296 if (alpha2[0] == '9' && alpha2[1] == '9')
274 return true; 297 return true;
275 return false; 298 return false;
@@ -279,9 +302,11 @@ static bool is_intersected_alpha2(const char *alpha2)
279{ 302{
280 if (!alpha2) 303 if (!alpha2)
281 return false; 304 return false;
282 /* Special case where regulatory domain is the 305 /*
306 * Special case where regulatory domain is the
283 * result of an intersection between two regulatory domain 307 * result of an intersection between two regulatory domain
284 * structures */ 308 * structures
309 */
285 if (alpha2[0] == '9' && alpha2[1] == '8') 310 if (alpha2[0] == '9' && alpha2[1] == '8')
286 return true; 311 return true;
287 return false; 312 return false;
@@ -306,8 +331,10 @@ static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y)
306 return false; 331 return false;
307} 332}
308 333
309static bool regdom_changed(const char *alpha2) 334static bool regdom_changes(const char *alpha2)
310{ 335{
336 assert_cfg80211_lock();
337
311 if (!cfg80211_regdomain) 338 if (!cfg80211_regdomain)
312 return true; 339 return true;
313 if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) 340 if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2))
@@ -334,8 +361,10 @@ static bool country_ie_integrity_changes(u32 checksum)
334 return false; 361 return false;
335} 362}
336 363
337/* This lets us keep regulatory code which is updated on a regulatory 364/*
338 * basis in userspace. */ 365 * This lets us keep regulatory code which is updated on a regulatory
366 * basis in userspace.
367 */
339static int call_crda(const char *alpha2) 368static int call_crda(const char *alpha2)
340{ 369{
341 char country_env[9 + 2] = "COUNTRY="; 370 char country_env[9 + 2] = "COUNTRY=";
@@ -447,10 +476,12 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range,
447#undef ONE_GHZ_IN_KHZ 476#undef ONE_GHZ_IN_KHZ
448} 477}
449 478
450/* Converts a country IE to a regulatory domain. A regulatory domain 479/*
480 * Converts a country IE to a regulatory domain. A regulatory domain
451 * structure has a lot of information which the IE doesn't yet have, 481 * structure has a lot of information which the IE doesn't yet have,
452 * so for the other values we use upper max values as we will intersect 482 * so for the other values we use upper max values as we will intersect
453 * with our userspace regulatory agent to get lower bounds. */ 483 * with our userspace regulatory agent to get lower bounds.
484 */
454static struct ieee80211_regdomain *country_ie_2_rd( 485static struct ieee80211_regdomain *country_ie_2_rd(
455 u8 *country_ie, 486 u8 *country_ie,
456 u8 country_ie_len, 487 u8 country_ie_len,
@@ -495,9 +526,11 @@ static struct ieee80211_regdomain *country_ie_2_rd(
495 526
496 *checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8); 527 *checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8);
497 528
498 /* We need to build a reg rule for each triplet, but first we must 529 /*
530 * We need to build a reg rule for each triplet, but first we must
499 * calculate the number of reg rules we will need. We will need one 531 * calculate the number of reg rules we will need. We will need one
500 * for each channel subband */ 532 * for each channel subband
533 */
501 while (country_ie_len >= 3) { 534 while (country_ie_len >= 3) {
502 int end_channel = 0; 535 int end_channel = 0;
503 struct ieee80211_country_ie_triplet *triplet = 536 struct ieee80211_country_ie_triplet *triplet =
@@ -535,9 +568,11 @@ static struct ieee80211_regdomain *country_ie_2_rd(
535 if (cur_sub_max_channel < cur_channel) 568 if (cur_sub_max_channel < cur_channel)
536 return NULL; 569 return NULL;
537 570
538 /* Do not allow overlapping channels. Also channels 571 /*
572 * Do not allow overlapping channels. Also channels
539 * passed in each subband must be monotonically 573 * passed in each subband must be monotonically
540 * increasing */ 574 * increasing
575 */
541 if (last_sub_max_channel) { 576 if (last_sub_max_channel) {
542 if (cur_channel <= last_sub_max_channel) 577 if (cur_channel <= last_sub_max_channel)
543 return NULL; 578 return NULL;
@@ -545,10 +580,12 @@ static struct ieee80211_regdomain *country_ie_2_rd(
545 return NULL; 580 return NULL;
546 } 581 }
547 582
548 /* When dot11RegulatoryClassesRequired is supported 583 /*
584 * When dot11RegulatoryClassesRequired is supported
549 * we can throw ext triplets as part of this soup, 585 * we can throw ext triplets as part of this soup,
550 * for now we don't care when those change as we 586 * for now we don't care when those change as we
551 * don't support them */ 587 * don't support them
588 */
552 *checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) | 589 *checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) |
553 ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) | 590 ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) |
554 ((triplet->chans.max_power ^ cur_sub_max_channel) << 24); 591 ((triplet->chans.max_power ^ cur_sub_max_channel) << 24);
@@ -559,8 +596,10 @@ static struct ieee80211_regdomain *country_ie_2_rd(
559 country_ie_len -= 3; 596 country_ie_len -= 3;
560 num_rules++; 597 num_rules++;
561 598
562 /* Note: this is not a IEEE requirement but 599 /*
563 * simply a memory requirement */ 600 * Note: this is not a IEEE requirement but
601 * simply a memory requirement
602 */
564 if (num_rules > NL80211_MAX_SUPP_REG_RULES) 603 if (num_rules > NL80211_MAX_SUPP_REG_RULES)
565 return NULL; 604 return NULL;
566 } 605 }
@@ -588,8 +627,10 @@ static struct ieee80211_regdomain *country_ie_2_rd(
588 struct ieee80211_freq_range *freq_range = NULL; 627 struct ieee80211_freq_range *freq_range = NULL;
589 struct ieee80211_power_rule *power_rule = NULL; 628 struct ieee80211_power_rule *power_rule = NULL;
590 629
591 /* Must parse if dot11RegulatoryClassesRequired is true, 630 /*
592 * we don't support this yet */ 631 * Must parse if dot11RegulatoryClassesRequired is true,
632 * we don't support this yet
633 */
593 if (triplet->ext.reg_extension_id >= 634 if (triplet->ext.reg_extension_id >=
594 IEEE80211_COUNTRY_EXTENSION_ID) { 635 IEEE80211_COUNTRY_EXTENSION_ID) {
595 country_ie += 3; 636 country_ie += 3;
@@ -611,10 +652,12 @@ static struct ieee80211_regdomain *country_ie_2_rd(
611 end_channel = triplet->chans.first_channel + 652 end_channel = triplet->chans.first_channel +
612 (4 * (triplet->chans.num_channels - 1)); 653 (4 * (triplet->chans.num_channels - 1));
613 654
614 /* The +10 is since the regulatory domain expects 655 /*
656 * The +10 is since the regulatory domain expects
615 * the actual band edge, not the center of freq for 657 * the actual band edge, not the center of freq for
616 * its start and end freqs, assuming 20 MHz bandwidth on 658 * its start and end freqs, assuming 20 MHz bandwidth on
617 * the channels passed */ 659 * the channels passed
660 */
618 freq_range->start_freq_khz = 661 freq_range->start_freq_khz =
619 MHZ_TO_KHZ(ieee80211_channel_to_frequency( 662 MHZ_TO_KHZ(ieee80211_channel_to_frequency(
620 triplet->chans.first_channel) - 10); 663 triplet->chans.first_channel) - 10);
@@ -622,9 +665,11 @@ static struct ieee80211_regdomain *country_ie_2_rd(
622 MHZ_TO_KHZ(ieee80211_channel_to_frequency( 665 MHZ_TO_KHZ(ieee80211_channel_to_frequency(
623 end_channel) + 10); 666 end_channel) + 10);
624 667
625 /* Large arbitrary values, we intersect later */ 668 /*
626 /* Increment this if we ever support >= 40 MHz channels 669 * These are large arbitrary values we use to intersect later.
627 * in IEEE 802.11 */ 670 * Increment this if we ever support >= 40 MHz channels
671 * in IEEE 802.11
672 */
628 freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40); 673 freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40);
629 power_rule->max_antenna_gain = DBI_TO_MBI(100); 674 power_rule->max_antenna_gain = DBI_TO_MBI(100);
630 power_rule->max_eirp = DBM_TO_MBM(100); 675 power_rule->max_eirp = DBM_TO_MBM(100);
@@ -640,8 +685,10 @@ static struct ieee80211_regdomain *country_ie_2_rd(
640} 685}
641 686
642 687
643/* Helper for regdom_intersect(), this does the real 688/*
644 * mathematical intersection fun */ 689 * Helper for regdom_intersect(), this does the real
690 * mathematical intersection fun
691 */
645static int reg_rules_intersect( 692static int reg_rules_intersect(
646 const struct ieee80211_reg_rule *rule1, 693 const struct ieee80211_reg_rule *rule1,
647 const struct ieee80211_reg_rule *rule2, 694 const struct ieee80211_reg_rule *rule2,
@@ -719,11 +766,13 @@ static struct ieee80211_regdomain *regdom_intersect(
719 if (!rd1 || !rd2) 766 if (!rd1 || !rd2)
720 return NULL; 767 return NULL;
721 768
722 /* First we get a count of the rules we'll need, then we actually 769 /*
770 * First we get a count of the rules we'll need, then we actually
723 * build them. This is to so we can malloc() and free() a 771 * build them. This is to so we can malloc() and free() a
724 * regdomain once. The reason we use reg_rules_intersect() here 772 * regdomain once. The reason we use reg_rules_intersect() here
725 * is it will return -EINVAL if the rule computed makes no sense. 773 * is it will return -EINVAL if the rule computed makes no sense.
726 * All rules that do check out OK are valid. */ 774 * All rules that do check out OK are valid.
775 */
727 776
728 for (x = 0; x < rd1->n_reg_rules; x++) { 777 for (x = 0; x < rd1->n_reg_rules; x++) {
729 rule1 = &rd1->reg_rules[x]; 778 rule1 = &rd1->reg_rules[x];
@@ -751,14 +800,18 @@ static struct ieee80211_regdomain *regdom_intersect(
751 rule1 = &rd1->reg_rules[x]; 800 rule1 = &rd1->reg_rules[x];
752 for (y = 0; y < rd2->n_reg_rules; y++) { 801 for (y = 0; y < rd2->n_reg_rules; y++) {
753 rule2 = &rd2->reg_rules[y]; 802 rule2 = &rd2->reg_rules[y];
754 /* This time around instead of using the stack lets 803 /*
804 * This time around instead of using the stack lets
755 * write to the target rule directly saving ourselves 805 * write to the target rule directly saving ourselves
756 * a memcpy() */ 806 * a memcpy()
807 */
757 intersected_rule = &rd->reg_rules[rule_idx]; 808 intersected_rule = &rd->reg_rules[rule_idx];
758 r = reg_rules_intersect(rule1, rule2, 809 r = reg_rules_intersect(rule1, rule2,
759 intersected_rule); 810 intersected_rule);
760 /* No need to memset here the intersected rule here as 811 /*
761 * we're not using the stack anymore */ 812 * No need to memset here the intersected rule here as
813 * we're not using the stack anymore
814 */
762 if (r) 815 if (r)
763 continue; 816 continue;
764 rule_idx++; 817 rule_idx++;
@@ -777,8 +830,10 @@ static struct ieee80211_regdomain *regdom_intersect(
777 return rd; 830 return rd;
778} 831}
779 832
780/* XXX: add support for the rest of enum nl80211_reg_rule_flags, we may 833/*
781 * want to just have the channel structure use these */ 834 * XXX: add support for the rest of enum nl80211_reg_rule_flags, we may
835 * want to just have the channel structure use these
836 */
782static u32 map_regdom_flags(u32 rd_flags) 837static u32 map_regdom_flags(u32 rd_flags)
783{ 838{
784 u32 channel_flags = 0; 839 u32 channel_flags = 0;
@@ -791,48 +846,45 @@ static u32 map_regdom_flags(u32 rd_flags)
791 return channel_flags; 846 return channel_flags;
792} 847}
793 848
794/** 849static int freq_reg_info_regd(struct wiphy *wiphy,
795 * freq_reg_info - get regulatory information for the given frequency 850 u32 center_freq,
796 * @center_freq: Frequency in KHz for which we want regulatory information for 851 u32 *bandwidth,
797 * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one 852 const struct ieee80211_reg_rule **reg_rule,
798 * you can set this to 0. If this frequency is allowed we then set 853 const struct ieee80211_regdomain *custom_regd)
799 * this value to the maximum allowed bandwidth.
800 * @reg_rule: the regulatory rule which we have for this frequency
801 *
802 * Use this function to get the regulatory rule for a specific frequency on
803 * a given wireless device. If the device has a specific regulatory domain
804 * it wants to follow we respect that unless a country IE has been received
805 * and processed already.
806 *
807 * Returns 0 if it was able to find a valid regulatory rule which does
808 * apply to the given center_freq otherwise it returns non-zero. It will
809 * also return -ERANGE if we determine the given center_freq does not even have
810 * a regulatory rule for a frequency range in the center_freq's band. See
811 * freq_in_rule_band() for our current definition of a band -- this is purely
812 * subjective and right now its 802.11 specific.
813 */
814static int freq_reg_info(u32 center_freq, u32 *bandwidth,
815 const struct ieee80211_reg_rule **reg_rule)
816{ 854{
817 int i; 855 int i;
818 bool band_rule_found = false; 856 bool band_rule_found = false;
857 const struct ieee80211_regdomain *regd;
819 u32 max_bandwidth = 0; 858 u32 max_bandwidth = 0;
820 859
821 if (!cfg80211_regdomain) 860 regd = custom_regd ? custom_regd : cfg80211_regdomain;
861
862 /*
863 * Follow the driver's regulatory domain, if present, unless a country
864 * IE has been processed or a user wants to help complaince further
865 */
866 if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
867 last_request->initiator != NL80211_REGDOM_SET_BY_USER &&
868 wiphy->regd)
869 regd = wiphy->regd;
870
871 if (!regd)
822 return -EINVAL; 872 return -EINVAL;
823 873
824 for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) { 874 for (i = 0; i < regd->n_reg_rules; i++) {
825 const struct ieee80211_reg_rule *rr; 875 const struct ieee80211_reg_rule *rr;
826 const struct ieee80211_freq_range *fr = NULL; 876 const struct ieee80211_freq_range *fr = NULL;
827 const struct ieee80211_power_rule *pr = NULL; 877 const struct ieee80211_power_rule *pr = NULL;
828 878
829 rr = &cfg80211_regdomain->reg_rules[i]; 879 rr = &regd->reg_rules[i];
830 fr = &rr->freq_range; 880 fr = &rr->freq_range;
831 pr = &rr->power_rule; 881 pr = &rr->power_rule;
832 882
833 /* We only need to know if one frequency rule was 883 /*
884 * We only need to know if one frequency rule was
834 * was in center_freq's band, that's enough, so lets 885 * was in center_freq's band, that's enough, so lets
835 * not overwrite it once found */ 886 * not overwrite it once found
887 */
836 if (!band_rule_found) 888 if (!band_rule_found)
837 band_rule_found = freq_in_rule_band(fr, center_freq); 889 band_rule_found = freq_in_rule_band(fr, center_freq);
838 890
@@ -850,6 +902,14 @@ static int freq_reg_info(u32 center_freq, u32 *bandwidth,
850 902
851 return !max_bandwidth; 903 return !max_bandwidth;
852} 904}
905EXPORT_SYMBOL(freq_reg_info);
906
907int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth,
908 const struct ieee80211_reg_rule **reg_rule)
909{
910 return freq_reg_info_regd(wiphy, center_freq,
911 bandwidth, reg_rule, NULL);
912}
853 913
854static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, 914static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
855 unsigned int chan_idx) 915 unsigned int chan_idx)
@@ -861,6 +921,11 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
861 const struct ieee80211_power_rule *power_rule = NULL; 921 const struct ieee80211_power_rule *power_rule = NULL;
862 struct ieee80211_supported_band *sband; 922 struct ieee80211_supported_band *sband;
863 struct ieee80211_channel *chan; 923 struct ieee80211_channel *chan;
924 struct wiphy *request_wiphy = NULL;
925
926 assert_cfg80211_lock();
927
928 request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
864 929
865 sband = wiphy->bands[band]; 930 sband = wiphy->bands[band];
866 BUG_ON(chan_idx >= sband->n_channels); 931 BUG_ON(chan_idx >= sband->n_channels);
@@ -868,11 +933,12 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
868 933
869 flags = chan->orig_flags; 934 flags = chan->orig_flags;
870 935
871 r = freq_reg_info(MHZ_TO_KHZ(chan->center_freq), 936 r = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq),
872 &max_bandwidth, &reg_rule); 937 &max_bandwidth, &reg_rule);
873 938
874 if (r) { 939 if (r) {
875 /* This means no regulatory rule was found in the country IE 940 /*
941 * This means no regulatory rule was found in the country IE
876 * with a frequency range on the center_freq's band, since 942 * with a frequency range on the center_freq's band, since
877 * IEEE-802.11 allows for a country IE to have a subset of the 943 * IEEE-802.11 allows for a country IE to have a subset of the
878 * regulatory information provided in a country we ignore 944 * regulatory information provided in a country we ignore
@@ -883,7 +949,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
883 * http://tinyurl.com/11d-clarification 949 * http://tinyurl.com/11d-clarification
884 */ 950 */
885 if (r == -ERANGE && 951 if (r == -ERANGE &&
886 last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { 952 last_request->initiator ==
953 NL80211_REGDOM_SET_BY_COUNTRY_IE) {
887#ifdef CONFIG_CFG80211_REG_DEBUG 954#ifdef CONFIG_CFG80211_REG_DEBUG
888 printk(KERN_DEBUG "cfg80211: Leaving channel %d MHz " 955 printk(KERN_DEBUG "cfg80211: Leaving channel %d MHz "
889 "intact on %s - no rule found in band on " 956 "intact on %s - no rule found in band on "
@@ -891,10 +958,13 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
891 chan->center_freq, wiphy_name(wiphy)); 958 chan->center_freq, wiphy_name(wiphy));
892#endif 959#endif
893 } else { 960 } else {
894 /* In this case we know the country IE has at least one reg rule 961 /*
895 * for the band so we respect its band definitions */ 962 * In this case we know the country IE has at least one reg rule
963 * for the band so we respect its band definitions
964 */
896#ifdef CONFIG_CFG80211_REG_DEBUG 965#ifdef CONFIG_CFG80211_REG_DEBUG
897 if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) 966 if (last_request->initiator ==
967 NL80211_REGDOM_SET_BY_COUNTRY_IE)
898 printk(KERN_DEBUG "cfg80211: Disabling " 968 printk(KERN_DEBUG "cfg80211: Disabling "
899 "channel %d MHz on %s due to " 969 "channel %d MHz on %s due to "
900 "Country IE\n", 970 "Country IE\n",
@@ -908,6 +978,24 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band,
908 978
909 power_rule = &reg_rule->power_rule; 979 power_rule = &reg_rule->power_rule;
910 980
981 if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
982 request_wiphy && request_wiphy == wiphy &&
983 request_wiphy->strict_regulatory) {
984 /*
985 * This gaurantees the driver's requested regulatory domain
986 * will always be used as a base for further regulatory
987 * settings
988 */
989 chan->flags = chan->orig_flags =
990 map_regdom_flags(reg_rule->flags);
991 chan->max_antenna_gain = chan->orig_mag =
992 (int) MBI_TO_DBI(power_rule->max_antenna_gain);
993 chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth);
994 chan->max_power = chan->orig_mpwr =
995 (int) MBM_TO_DBM(power_rule->max_eirp);
996 return;
997 }
998
911 chan->flags = flags | map_regdom_flags(reg_rule->flags); 999 chan->flags = flags | map_regdom_flags(reg_rule->flags);
912 chan->max_antenna_gain = min(chan->orig_mag, 1000 chan->max_antenna_gain = min(chan->orig_mag,
913 (int) MBI_TO_DBI(power_rule->max_antenna_gain)); 1001 (int) MBI_TO_DBI(power_rule->max_antenna_gain));
@@ -931,116 +1019,513 @@ static void handle_band(struct wiphy *wiphy, enum ieee80211_band band)
931 handle_channel(wiphy, band, i); 1019 handle_channel(wiphy, band, i);
932} 1020}
933 1021
934static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby) 1022static bool ignore_reg_update(struct wiphy *wiphy,
1023 enum nl80211_reg_initiator initiator)
935{ 1024{
936 if (!last_request) 1025 if (!last_request)
937 return true; 1026 return true;
938 if (setby == REGDOM_SET_BY_CORE && 1027 if (initiator == NL80211_REGDOM_SET_BY_CORE &&
939 wiphy->fw_handles_regulatory) 1028 wiphy->custom_regulatory)
1029 return true;
1030 /*
1031 * wiphy->regd will be set once the device has its own
1032 * desired regulatory domain set
1033 */
1034 if (wiphy->strict_regulatory && !wiphy->regd &&
1035 !is_world_regdom(last_request->alpha2))
940 return true; 1036 return true;
941 return false; 1037 return false;
942} 1038}
943 1039
944static void update_all_wiphy_regulatory(enum reg_set_by setby) 1040static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator)
945{ 1041{
946 struct cfg80211_registered_device *drv; 1042 struct cfg80211_registered_device *drv;
947 1043
948 list_for_each_entry(drv, &cfg80211_drv_list, list) 1044 list_for_each_entry(drv, &cfg80211_drv_list, list)
949 if (!ignore_reg_update(&drv->wiphy, setby)) 1045 wiphy_update_regulatory(&drv->wiphy, initiator);
950 wiphy_update_regulatory(&drv->wiphy, setby); 1046}
1047
1048static void handle_reg_beacon(struct wiphy *wiphy,
1049 unsigned int chan_idx,
1050 struct reg_beacon *reg_beacon)
1051{
1052#ifdef CONFIG_CFG80211_REG_DEBUG
1053#define REG_DEBUG_BEACON_FLAG(desc) \
1054 printk(KERN_DEBUG "cfg80211: Enabling " desc " on " \
1055 "frequency: %d MHz (Ch %d) on %s\n", \
1056 reg_beacon->chan.center_freq, \
1057 ieee80211_frequency_to_channel(reg_beacon->chan.center_freq), \
1058 wiphy_name(wiphy));
1059#else
1060#define REG_DEBUG_BEACON_FLAG(desc) do {} while (0)
1061#endif
1062 struct ieee80211_supported_band *sband;
1063 struct ieee80211_channel *chan;
1064
1065 assert_cfg80211_lock();
1066
1067 sband = wiphy->bands[reg_beacon->chan.band];
1068 chan = &sband->channels[chan_idx];
1069
1070 if (likely(chan->center_freq != reg_beacon->chan.center_freq))
1071 return;
1072
1073 if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) {
1074 chan->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN;
1075 REG_DEBUG_BEACON_FLAG("active scanning");
1076 }
1077
1078 if (chan->flags & IEEE80211_CHAN_NO_IBSS) {
1079 chan->flags &= ~IEEE80211_CHAN_NO_IBSS;
1080 REG_DEBUG_BEACON_FLAG("beaconing");
1081 }
1082
1083 chan->beacon_found = true;
1084#undef REG_DEBUG_BEACON_FLAG
951} 1085}
952 1086
953void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) 1087/*
1088 * Called when a scan on a wiphy finds a beacon on
1089 * new channel
1090 */
1091static void wiphy_update_new_beacon(struct wiphy *wiphy,
1092 struct reg_beacon *reg_beacon)
1093{
1094 unsigned int i;
1095 struct ieee80211_supported_band *sband;
1096
1097 assert_cfg80211_lock();
1098
1099 if (!wiphy->bands[reg_beacon->chan.band])
1100 return;
1101
1102 sband = wiphy->bands[reg_beacon->chan.band];
1103
1104 for (i = 0; i < sband->n_channels; i++)
1105 handle_reg_beacon(wiphy, i, reg_beacon);
1106}
1107
1108/*
1109 * Called upon reg changes or a new wiphy is added
1110 */
1111static void wiphy_update_beacon_reg(struct wiphy *wiphy)
1112{
1113 unsigned int i;
1114 struct ieee80211_supported_band *sband;
1115 struct reg_beacon *reg_beacon;
1116
1117 assert_cfg80211_lock();
1118
1119 if (list_empty(&reg_beacon_list))
1120 return;
1121
1122 list_for_each_entry(reg_beacon, &reg_beacon_list, list) {
1123 if (!wiphy->bands[reg_beacon->chan.band])
1124 continue;
1125 sband = wiphy->bands[reg_beacon->chan.band];
1126 for (i = 0; i < sband->n_channels; i++)
1127 handle_reg_beacon(wiphy, i, reg_beacon);
1128 }
1129}
1130
1131static bool reg_is_world_roaming(struct wiphy *wiphy)
1132{
1133 if (is_world_regdom(cfg80211_regdomain->alpha2) ||
1134 (wiphy->regd && is_world_regdom(wiphy->regd->alpha2)))
1135 return true;
1136 if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE &&
1137 wiphy->custom_regulatory)
1138 return true;
1139 return false;
1140}
1141
1142/* Reap the advantages of previously found beacons */
1143static void reg_process_beacons(struct wiphy *wiphy)
1144{
1145 if (!reg_is_world_roaming(wiphy))
1146 return;
1147 wiphy_update_beacon_reg(wiphy);
1148}
1149
1150void wiphy_update_regulatory(struct wiphy *wiphy,
1151 enum nl80211_reg_initiator initiator)
954{ 1152{
955 enum ieee80211_band band; 1153 enum ieee80211_band band;
1154
1155 if (ignore_reg_update(wiphy, initiator))
1156 goto out;
956 for (band = 0; band < IEEE80211_NUM_BANDS; band++) { 1157 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
957 if (wiphy->bands[band]) 1158 if (wiphy->bands[band])
958 handle_band(wiphy, band); 1159 handle_band(wiphy, band);
959 if (wiphy->reg_notifier)
960 wiphy->reg_notifier(wiphy, setby);
961 } 1160 }
1161out:
1162 reg_process_beacons(wiphy);
1163 if (wiphy->reg_notifier)
1164 wiphy->reg_notifier(wiphy, last_request);
962} 1165}
963 1166
964/* Return value which can be used by ignore_request() to indicate 1167static void handle_channel_custom(struct wiphy *wiphy,
965 * it has been determined we should intersect two regulatory domains */ 1168 enum ieee80211_band band,
1169 unsigned int chan_idx,
1170 const struct ieee80211_regdomain *regd)
1171{
1172 int r;
1173 u32 max_bandwidth = 0;
1174 const struct ieee80211_reg_rule *reg_rule = NULL;
1175 const struct ieee80211_power_rule *power_rule = NULL;
1176 struct ieee80211_supported_band *sband;
1177 struct ieee80211_channel *chan;
1178
1179 sband = wiphy->bands[band];
1180 BUG_ON(chan_idx >= sband->n_channels);
1181 chan = &sband->channels[chan_idx];
1182
1183 r = freq_reg_info_regd(wiphy, MHZ_TO_KHZ(chan->center_freq),
1184 &max_bandwidth, &reg_rule, regd);
1185
1186 if (r) {
1187 chan->flags = IEEE80211_CHAN_DISABLED;
1188 return;
1189 }
1190
1191 power_rule = &reg_rule->power_rule;
1192
1193 chan->flags |= map_regdom_flags(reg_rule->flags);
1194 chan->max_antenna_gain = (int) MBI_TO_DBI(power_rule->max_antenna_gain);
1195 chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth);
1196 chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp);
1197}
1198
1199static void handle_band_custom(struct wiphy *wiphy, enum ieee80211_band band,
1200 const struct ieee80211_regdomain *regd)
1201{
1202 unsigned int i;
1203 struct ieee80211_supported_band *sband;
1204
1205 BUG_ON(!wiphy->bands[band]);
1206 sband = wiphy->bands[band];
1207
1208 for (i = 0; i < sband->n_channels; i++)
1209 handle_channel_custom(wiphy, band, i, regd);
1210}
1211
1212/* Used by drivers prior to wiphy registration */
1213void wiphy_apply_custom_regulatory(struct wiphy *wiphy,
1214 const struct ieee80211_regdomain *regd)
1215{
1216 enum ieee80211_band band;
1217 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
1218 if (wiphy->bands[band])
1219 handle_band_custom(wiphy, band, regd);
1220 }
1221}
1222EXPORT_SYMBOL(wiphy_apply_custom_regulatory);
1223
1224static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd,
1225 const struct ieee80211_regdomain *src_regd)
1226{
1227 struct ieee80211_regdomain *regd;
1228 int size_of_regd = 0;
1229 unsigned int i;
1230
1231 size_of_regd = sizeof(struct ieee80211_regdomain) +
1232 ((src_regd->n_reg_rules + 1) * sizeof(struct ieee80211_reg_rule));
1233
1234 regd = kzalloc(size_of_regd, GFP_KERNEL);
1235 if (!regd)
1236 return -ENOMEM;
1237
1238 memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain));
1239
1240 for (i = 0; i < src_regd->n_reg_rules; i++)
1241 memcpy(&regd->reg_rules[i], &src_regd->reg_rules[i],
1242 sizeof(struct ieee80211_reg_rule));
1243
1244 *dst_regd = regd;
1245 return 0;
1246}
1247
1248/*
1249 * Return value which can be used by ignore_request() to indicate
1250 * it has been determined we should intersect two regulatory domains
1251 */
966#define REG_INTERSECT 1 1252#define REG_INTERSECT 1
967 1253
968/* This has the logic which determines when a new request 1254/* This has the logic which determines when a new request
969 * should be ignored. */ 1255 * should be ignored. */
970static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, 1256static int ignore_request(struct wiphy *wiphy,
971 const char *alpha2) 1257 struct regulatory_request *pending_request)
972{ 1258{
1259 struct wiphy *last_wiphy = NULL;
1260
1261 assert_cfg80211_lock();
1262
973 /* All initial requests are respected */ 1263 /* All initial requests are respected */
974 if (!last_request) 1264 if (!last_request)
975 return 0; 1265 return 0;
976 1266
977 switch (set_by) { 1267 switch (pending_request->initiator) {
978 case REGDOM_SET_BY_INIT: 1268 case NL80211_REGDOM_SET_BY_CORE:
979 return -EINVAL; 1269 return -EINVAL;
980 case REGDOM_SET_BY_CORE: 1270 case NL80211_REGDOM_SET_BY_COUNTRY_IE:
981 /* 1271
982 * Always respect new wireless core hints, should only happen 1272 last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
983 * when updating the world regulatory domain at init. 1273
984 */ 1274 if (unlikely(!is_an_alpha2(pending_request->alpha2)))
985 return 0;
986 case REGDOM_SET_BY_COUNTRY_IE:
987 if (unlikely(!is_an_alpha2(alpha2)))
988 return -EINVAL; 1275 return -EINVAL;
989 if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { 1276 if (last_request->initiator ==
990 if (last_request->wiphy != wiphy) { 1277 NL80211_REGDOM_SET_BY_COUNTRY_IE) {
1278 if (last_wiphy != wiphy) {
991 /* 1279 /*
992 * Two cards with two APs claiming different 1280 * Two cards with two APs claiming different
993 * different Country IE alpha2s. We could 1281 * different Country IE alpha2s. We could
994 * intersect them, but that seems unlikely 1282 * intersect them, but that seems unlikely
995 * to be correct. Reject second one for now. 1283 * to be correct. Reject second one for now.
996 */ 1284 */
997 if (!alpha2_equal(alpha2, 1285 if (regdom_changes(pending_request->alpha2))
998 cfg80211_regdomain->alpha2))
999 return -EOPNOTSUPP; 1286 return -EOPNOTSUPP;
1000 return -EALREADY; 1287 return -EALREADY;
1001 } 1288 }
1002 /* Two consecutive Country IE hints on the same wiphy. 1289 /*
1003 * This should be picked up early by the driver/stack */ 1290 * Two consecutive Country IE hints on the same wiphy.
1004 if (WARN_ON(!alpha2_equal(cfg80211_regdomain->alpha2, 1291 * This should be picked up early by the driver/stack
1005 alpha2))) 1292 */
1293 if (WARN_ON(regdom_changes(pending_request->alpha2)))
1006 return 0; 1294 return 0;
1007 return -EALREADY; 1295 return -EALREADY;
1008 } 1296 }
1009 return REG_INTERSECT; 1297 return REG_INTERSECT;
1010 case REGDOM_SET_BY_DRIVER: 1298 case NL80211_REGDOM_SET_BY_DRIVER:
1011 if (last_request->initiator == REGDOM_SET_BY_DRIVER) 1299 if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE) {
1300 if (is_old_static_regdom(cfg80211_regdomain))
1301 return 0;
1302 if (regdom_changes(pending_request->alpha2))
1303 return 0;
1012 return -EALREADY; 1304 return -EALREADY;
1013 return 0; 1305 }
1014 case REGDOM_SET_BY_USER: 1306
1015 if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) 1307 /*
1308 * This would happen if you unplug and plug your card
1309 * back in or if you add a new device for which the previously
1310 * loaded card also agrees on the regulatory domain.
1311 */
1312 if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
1313 !regdom_changes(pending_request->alpha2))
1314 return -EALREADY;
1315
1316 return REG_INTERSECT;
1317 case NL80211_REGDOM_SET_BY_USER:
1318 if (last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE)
1016 return REG_INTERSECT; 1319 return REG_INTERSECT;
1017 /* If the user knows better the user should set the regdom 1320 /*
1018 * to their country before the IE is picked up */ 1321 * If the user knows better the user should set the regdom
1019 if (last_request->initiator == REGDOM_SET_BY_USER && 1322 * to their country before the IE is picked up
1323 */
1324 if (last_request->initiator == NL80211_REGDOM_SET_BY_USER &&
1020 last_request->intersect) 1325 last_request->intersect)
1021 return -EOPNOTSUPP; 1326 return -EOPNOTSUPP;
1327 /*
1328 * Process user requests only after previous user/driver/core
1329 * requests have been processed
1330 */
1331 if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE ||
1332 last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER ||
1333 last_request->initiator == NL80211_REGDOM_SET_BY_USER) {
1334 if (regdom_changes(last_request->alpha2))
1335 return -EAGAIN;
1336 }
1337
1338 if (!is_old_static_regdom(cfg80211_regdomain) &&
1339 !regdom_changes(pending_request->alpha2))
1340 return -EALREADY;
1341
1022 return 0; 1342 return 0;
1023 } 1343 }
1024 1344
1025 return -EINVAL; 1345 return -EINVAL;
1026} 1346}
1027 1347
1028/* Caller must hold &cfg80211_drv_mutex */ 1348/**
1029int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, 1349 * __regulatory_hint - hint to the wireless core a regulatory domain
1030 const char *alpha2, 1350 * @wiphy: if the hint comes from country information from an AP, this
1031 u32 country_ie_checksum, 1351 * is required to be set to the wiphy that received the information
1032 enum environment_cap env) 1352 * @pending_request: the regulatory request currently being processed
1353 *
1354 * The Wireless subsystem can use this function to hint to the wireless core
1355 * what it believes should be the current regulatory domain.
1356 *
1357 * Returns zero if all went fine, %-EALREADY if a regulatory domain had
1358 * already been set or other standard error codes.
1359 *
1360 * Caller must hold &cfg80211_mutex
1361 */
1362static int __regulatory_hint(struct wiphy *wiphy,
1363 struct regulatory_request *pending_request)
1033{ 1364{
1034 struct regulatory_request *request;
1035 bool intersect = false; 1365 bool intersect = false;
1036 int r = 0; 1366 int r = 0;
1037 1367
1038 r = ignore_request(wiphy, set_by, alpha2); 1368 assert_cfg80211_lock();
1369
1370 r = ignore_request(wiphy, pending_request);
1039 1371
1040 if (r == REG_INTERSECT) 1372 if (r == REG_INTERSECT) {
1373 if (pending_request->initiator ==
1374 NL80211_REGDOM_SET_BY_DRIVER) {
1375 r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain);
1376 if (r) {
1377 kfree(pending_request);
1378 return r;
1379 }
1380 }
1041 intersect = true; 1381 intersect = true;
1042 else if (r) 1382 } else if (r) {
1383 /*
1384 * If the regulatory domain being requested by the
1385 * driver has already been set just copy it to the
1386 * wiphy
1387 */
1388 if (r == -EALREADY &&
1389 pending_request->initiator ==
1390 NL80211_REGDOM_SET_BY_DRIVER) {
1391 r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain);
1392 if (r) {
1393 kfree(pending_request);
1394 return r;
1395 }
1396 r = -EALREADY;
1397 goto new_request;
1398 }
1399 kfree(pending_request);
1400 return r;
1401 }
1402
1403new_request:
1404 kfree(last_request);
1405
1406 last_request = pending_request;
1407 last_request->intersect = intersect;
1408
1409 pending_request = NULL;
1410
1411 /* When r == REG_INTERSECT we do need to call CRDA */
1412 if (r < 0) {
1413 /*
1414 * Since CRDA will not be called in this case as we already
1415 * have applied the requested regulatory domain before we just
1416 * inform userspace we have processed the request
1417 */
1418 if (r == -EALREADY)
1419 nl80211_send_reg_change_event(last_request);
1043 return r; 1420 return r;
1421 }
1422
1423 return call_crda(last_request->alpha2);
1424}
1425
1426/* This currently only processes user and driver regulatory hints */
1427static void reg_process_hint(struct regulatory_request *reg_request)
1428{
1429 int r = 0;
1430 struct wiphy *wiphy = NULL;
1431
1432 BUG_ON(!reg_request->alpha2);
1433
1434 mutex_lock(&cfg80211_mutex);
1435
1436 if (wiphy_idx_valid(reg_request->wiphy_idx))
1437 wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx);
1438
1439 if (reg_request->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
1440 !wiphy) {
1441 kfree(reg_request);
1442 goto out;
1443 }
1444
1445 r = __regulatory_hint(wiphy, reg_request);
1446 /* This is required so that the orig_* parameters are saved */
1447 if (r == -EALREADY && wiphy && wiphy->strict_regulatory)
1448 wiphy_update_regulatory(wiphy, reg_request->initiator);
1449out:
1450 mutex_unlock(&cfg80211_mutex);
1451}
1452
1453/* Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* */
1454static void reg_process_pending_hints(void)
1455 {
1456 struct regulatory_request *reg_request;
1457
1458 spin_lock(&reg_requests_lock);
1459 while (!list_empty(&reg_requests_list)) {
1460 reg_request = list_first_entry(&reg_requests_list,
1461 struct regulatory_request,
1462 list);
1463 list_del_init(&reg_request->list);
1464
1465 spin_unlock(&reg_requests_lock);
1466 reg_process_hint(reg_request);
1467 spin_lock(&reg_requests_lock);
1468 }
1469 spin_unlock(&reg_requests_lock);
1470}
1471
1472/* Processes beacon hints -- this has nothing to do with country IEs */
1473static void reg_process_pending_beacon_hints(void)
1474{
1475 struct cfg80211_registered_device *drv;
1476 struct reg_beacon *pending_beacon, *tmp;
1477
1478 mutex_lock(&cfg80211_mutex);
1479
1480 /* This goes through the _pending_ beacon list */
1481 spin_lock_bh(&reg_pending_beacons_lock);
1482
1483 if (list_empty(&reg_pending_beacons)) {
1484 spin_unlock_bh(&reg_pending_beacons_lock);
1485 goto out;
1486 }
1487
1488 list_for_each_entry_safe(pending_beacon, tmp,
1489 &reg_pending_beacons, list) {
1490
1491 list_del_init(&pending_beacon->list);
1492
1493 /* Applies the beacon hint to current wiphys */
1494 list_for_each_entry(drv, &cfg80211_drv_list, list)
1495 wiphy_update_new_beacon(&drv->wiphy, pending_beacon);
1496
1497 /* Remembers the beacon hint for new wiphys or reg changes */
1498 list_add_tail(&pending_beacon->list, &reg_beacon_list);
1499 }
1500
1501 spin_unlock_bh(&reg_pending_beacons_lock);
1502out:
1503 mutex_unlock(&cfg80211_mutex);
1504}
1505
1506static void reg_todo(struct work_struct *work)
1507{
1508 reg_process_pending_hints();
1509 reg_process_pending_beacon_hints();
1510}
1511
1512static DECLARE_WORK(reg_work, reg_todo);
1513
1514static void queue_regulatory_request(struct regulatory_request *request)
1515{
1516 spin_lock(&reg_requests_lock);
1517 list_add_tail(&request->list, &reg_requests_list);
1518 spin_unlock(&reg_requests_lock);
1519
1520 schedule_work(&reg_work);
1521}
1522
1523/* Core regulatory hint -- happens once during cfg80211_init() */
1524static int regulatory_hint_core(const char *alpha2)
1525{
1526 struct regulatory_request *request;
1527
1528 BUG_ON(last_request);
1044 1529
1045 request = kzalloc(sizeof(struct regulatory_request), 1530 request = kzalloc(sizeof(struct regulatory_request),
1046 GFP_KERNEL); 1531 GFP_KERNEL);
@@ -1049,47 +1534,84 @@ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by,
1049 1534
1050 request->alpha2[0] = alpha2[0]; 1535 request->alpha2[0] = alpha2[0];
1051 request->alpha2[1] = alpha2[1]; 1536 request->alpha2[1] = alpha2[1];
1052 request->initiator = set_by; 1537 request->initiator = NL80211_REGDOM_SET_BY_CORE;
1053 request->wiphy = wiphy;
1054 request->intersect = intersect;
1055 request->country_ie_checksum = country_ie_checksum;
1056 request->country_ie_env = env;
1057 1538
1058 kfree(last_request); 1539 queue_regulatory_request(request);
1059 last_request = request; 1540
1060 /* 1541 return 0;
1061 * Note: When CONFIG_WIRELESS_OLD_REGULATORY is enabled 1542}
1062 * AND if CRDA is NOT present nothing will happen, if someone 1543
1063 * wants to bother with 11d with OLD_REG you can add a timer. 1544/* User hints */
1064 * If after x amount of time nothing happens you can call: 1545int regulatory_hint_user(const char *alpha2)
1065 * 1546{
1066 * return set_regdom(country_ie_regdomain); 1547 struct regulatory_request *request;
1067 * 1548
1068 * to intersect with the static rd 1549 BUG_ON(!alpha2);
1069 */ 1550
1070 return call_crda(alpha2); 1551 request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
1552 if (!request)
1553 return -ENOMEM;
1554
1555 request->wiphy_idx = WIPHY_IDX_STALE;
1556 request->alpha2[0] = alpha2[0];
1557 request->alpha2[1] = alpha2[1];
1558 request->initiator = NL80211_REGDOM_SET_BY_USER,
1559
1560 queue_regulatory_request(request);
1561
1562 return 0;
1071} 1563}
1072 1564
1073void regulatory_hint(struct wiphy *wiphy, const char *alpha2) 1565/* Driver hints */
1566int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
1074{ 1567{
1568 struct regulatory_request *request;
1569
1075 BUG_ON(!alpha2); 1570 BUG_ON(!alpha2);
1571 BUG_ON(!wiphy);
1076 1572
1077 mutex_lock(&cfg80211_drv_mutex); 1573 request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
1078 __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, 0, ENVIRON_ANY); 1574 if (!request)
1079 mutex_unlock(&cfg80211_drv_mutex); 1575 return -ENOMEM;
1576
1577 request->wiphy_idx = get_wiphy_idx(wiphy);
1578
1579 /* Must have registered wiphy first */
1580 BUG_ON(!wiphy_idx_valid(request->wiphy_idx));
1581
1582 request->alpha2[0] = alpha2[0];
1583 request->alpha2[1] = alpha2[1];
1584 request->initiator = NL80211_REGDOM_SET_BY_DRIVER;
1585
1586 queue_regulatory_request(request);
1587
1588 return 0;
1080} 1589}
1081EXPORT_SYMBOL(regulatory_hint); 1590EXPORT_SYMBOL(regulatory_hint);
1082 1591
1083static bool reg_same_country_ie_hint(struct wiphy *wiphy, 1592static bool reg_same_country_ie_hint(struct wiphy *wiphy,
1084 u32 country_ie_checksum) 1593 u32 country_ie_checksum)
1085{ 1594{
1086 if (!last_request->wiphy) 1595 struct wiphy *request_wiphy;
1596
1597 assert_cfg80211_lock();
1598
1599 if (unlikely(last_request->initiator !=
1600 NL80211_REGDOM_SET_BY_COUNTRY_IE))
1601 return false;
1602
1603 request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
1604
1605 if (!request_wiphy)
1087 return false; 1606 return false;
1088 if (likely(last_request->wiphy != wiphy)) 1607
1608 if (likely(request_wiphy != wiphy))
1089 return !country_ie_integrity_changes(country_ie_checksum); 1609 return !country_ie_integrity_changes(country_ie_checksum);
1090 /* We should not have let these through at this point, they 1610 /*
1611 * We should not have let these through at this point, they
1091 * should have been picked up earlier by the first alpha2 check 1612 * should have been picked up earlier by the first alpha2 check
1092 * on the device */ 1613 * on the device
1614 */
1093 if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum))) 1615 if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum)))
1094 return true; 1616 return true;
1095 return false; 1617 return false;
@@ -1103,11 +1625,14 @@ void regulatory_hint_11d(struct wiphy *wiphy,
1103 char alpha2[2]; 1625 char alpha2[2];
1104 u32 checksum = 0; 1626 u32 checksum = 0;
1105 enum environment_cap env = ENVIRON_ANY; 1627 enum environment_cap env = ENVIRON_ANY;
1628 struct regulatory_request *request;
1106 1629
1107 if (!last_request) 1630 mutex_lock(&cfg80211_mutex);
1108 return;
1109 1631
1110 mutex_lock(&cfg80211_drv_mutex); 1632 if (unlikely(!last_request)) {
1633 mutex_unlock(&cfg80211_mutex);
1634 return;
1635 }
1111 1636
1112 /* IE len must be evenly divisible by 2 */ 1637 /* IE len must be evenly divisible by 2 */
1113 if (country_ie_len & 0x01) 1638 if (country_ie_len & 0x01)
@@ -1116,9 +1641,11 @@ void regulatory_hint_11d(struct wiphy *wiphy,
1116 if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) 1641 if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN)
1117 goto out; 1642 goto out;
1118 1643
1119 /* Pending country IE processing, this can happen after we 1644 /*
1645 * Pending country IE processing, this can happen after we
1120 * call CRDA and wait for a response if a beacon was received before 1646 * call CRDA and wait for a response if a beacon was received before
1121 * we were able to process the last regulatory_hint_11d() call */ 1647 * we were able to process the last regulatory_hint_11d() call
1648 */
1122 if (country_ie_regdomain) 1649 if (country_ie_regdomain)
1123 goto out; 1650 goto out;
1124 1651
@@ -1130,33 +1657,46 @@ void regulatory_hint_11d(struct wiphy *wiphy,
1130 else if (country_ie[2] == 'O') 1657 else if (country_ie[2] == 'O')
1131 env = ENVIRON_OUTDOOR; 1658 env = ENVIRON_OUTDOOR;
1132 1659
1133 /* We will run this for *every* beacon processed for the BSSID, so 1660 /*
1661 * We will run this for *every* beacon processed for the BSSID, so
1134 * we optimize an early check to exit out early if we don't have to 1662 * we optimize an early check to exit out early if we don't have to
1135 * do anything */ 1663 * do anything
1136 if (likely(last_request->wiphy)) { 1664 */
1665 if (likely(last_request->initiator ==
1666 NL80211_REGDOM_SET_BY_COUNTRY_IE &&
1667 wiphy_idx_valid(last_request->wiphy_idx))) {
1137 struct cfg80211_registered_device *drv_last_ie; 1668 struct cfg80211_registered_device *drv_last_ie;
1138 1669
1139 drv_last_ie = wiphy_to_dev(last_request->wiphy); 1670 drv_last_ie =
1671 cfg80211_drv_by_wiphy_idx(last_request->wiphy_idx);
1140 1672
1141 /* Lets keep this simple -- we trust the first AP 1673 /*
1142 * after we intersect with CRDA */ 1674 * Lets keep this simple -- we trust the first AP
1143 if (likely(last_request->wiphy == wiphy)) { 1675 * after we intersect with CRDA
1144 /* Ignore IEs coming in on this wiphy with 1676 */
1145 * the same alpha2 and environment cap */ 1677 if (likely(&drv_last_ie->wiphy == wiphy)) {
1678 /*
1679 * Ignore IEs coming in on this wiphy with
1680 * the same alpha2 and environment cap
1681 */
1146 if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, 1682 if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2,
1147 alpha2) && 1683 alpha2) &&
1148 env == drv_last_ie->env)) { 1684 env == drv_last_ie->env)) {
1149 goto out; 1685 goto out;
1150 } 1686 }
1151 /* the wiphy moved on to another BSSID or the AP 1687 /*
1688 * the wiphy moved on to another BSSID or the AP
1152 * was reconfigured. XXX: We need to deal with the 1689 * was reconfigured. XXX: We need to deal with the
1153 * case where the user suspends and goes to goes 1690 * case where the user suspends and goes to goes
1154 * to another country, and then gets IEs from an 1691 * to another country, and then gets IEs from an
1155 * AP with different settings */ 1692 * AP with different settings
1693 */
1156 goto out; 1694 goto out;
1157 } else { 1695 } else {
1158 /* Ignore IEs coming in on two separate wiphys with 1696 /*
1159 * the same alpha2 and environment cap */ 1697 * Ignore IEs coming in on two separate wiphys with
1698 * the same alpha2 and environment cap
1699 */
1160 if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, 1700 if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2,
1161 alpha2) && 1701 alpha2) &&
1162 env == drv_last_ie->env)) { 1702 env == drv_last_ie->env)) {
@@ -1171,28 +1711,97 @@ void regulatory_hint_11d(struct wiphy *wiphy,
1171 if (!rd) 1711 if (!rd)
1172 goto out; 1712 goto out;
1173 1713
1174 /* This will not happen right now but we leave it here for the 1714 /*
1715 * This will not happen right now but we leave it here for the
1175 * the future when we want to add suspend/resume support and having 1716 * the future when we want to add suspend/resume support and having
1176 * the user move to another country after doing so, or having the user 1717 * the user move to another country after doing so, or having the user
1177 * move to another AP. Right now we just trust the first AP. This is why 1718 * move to another AP. Right now we just trust the first AP.
1178 * this is marked as likley(). If we hit this before we add this support 1719 *
1179 * we want to be informed of it as it would indicate a mistake in the 1720 * If we hit this before we add this support we want to be informed of
1180 * current design */ 1721 * it as it would indicate a mistake in the current design
1181 if (likely(WARN_ON(reg_same_country_ie_hint(wiphy, checksum)))) 1722 */
1182 goto out; 1723 if (WARN_ON(reg_same_country_ie_hint(wiphy, checksum)))
1724 goto free_rd_out;
1183 1725
1184 /* We keep this around for when CRDA comes back with a response so 1726 request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL);
1185 * we can intersect with that */ 1727 if (!request)
1728 goto free_rd_out;
1729
1730 /*
1731 * We keep this around for when CRDA comes back with a response so
1732 * we can intersect with that
1733 */
1186 country_ie_regdomain = rd; 1734 country_ie_regdomain = rd;
1187 1735
1188 __regulatory_hint(wiphy, REGDOM_SET_BY_COUNTRY_IE, 1736 request->wiphy_idx = get_wiphy_idx(wiphy);
1189 country_ie_regdomain->alpha2, checksum, env); 1737 request->alpha2[0] = rd->alpha2[0];
1738 request->alpha2[1] = rd->alpha2[1];
1739 request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE;
1740 request->country_ie_checksum = checksum;
1741 request->country_ie_env = env;
1742
1743 mutex_unlock(&cfg80211_mutex);
1744
1745 queue_regulatory_request(request);
1746
1747 return;
1190 1748
1749free_rd_out:
1750 kfree(rd);
1191out: 1751out:
1192 mutex_unlock(&cfg80211_drv_mutex); 1752 mutex_unlock(&cfg80211_mutex);
1193} 1753}
1194EXPORT_SYMBOL(regulatory_hint_11d); 1754EXPORT_SYMBOL(regulatory_hint_11d);
1195 1755
1756static bool freq_is_chan_12_13_14(u16 freq)
1757{
1758 if (freq == ieee80211_channel_to_frequency(12) ||
1759 freq == ieee80211_channel_to_frequency(13) ||
1760 freq == ieee80211_channel_to_frequency(14))
1761 return true;
1762 return false;
1763}
1764
1765int regulatory_hint_found_beacon(struct wiphy *wiphy,
1766 struct ieee80211_channel *beacon_chan,
1767 gfp_t gfp)
1768{
1769 struct reg_beacon *reg_beacon;
1770
1771 if (likely((beacon_chan->beacon_found ||
1772 (beacon_chan->flags & IEEE80211_CHAN_RADAR) ||
1773 (beacon_chan->band == IEEE80211_BAND_2GHZ &&
1774 !freq_is_chan_12_13_14(beacon_chan->center_freq)))))
1775 return 0;
1776
1777 reg_beacon = kzalloc(sizeof(struct reg_beacon), gfp);
1778 if (!reg_beacon)
1779 return -ENOMEM;
1780
1781#ifdef CONFIG_CFG80211_REG_DEBUG
1782 printk(KERN_DEBUG "cfg80211: Found new beacon on "
1783 "frequency: %d MHz (Ch %d) on %s\n",
1784 beacon_chan->center_freq,
1785 ieee80211_frequency_to_channel(beacon_chan->center_freq),
1786 wiphy_name(wiphy));
1787#endif
1788 memcpy(&reg_beacon->chan, beacon_chan,
1789 sizeof(struct ieee80211_channel));
1790
1791
1792 /*
1793 * Since we can be called from BH or and non-BH context
1794 * we must use spin_lock_bh()
1795 */
1796 spin_lock_bh(&reg_pending_beacons_lock);
1797 list_add_tail(&reg_beacon->list, &reg_pending_beacons);
1798 spin_unlock_bh(&reg_pending_beacons_lock);
1799
1800 schedule_work(&reg_work);
1801
1802 return 0;
1803}
1804
1196static void print_rd_rules(const struct ieee80211_regdomain *rd) 1805static void print_rd_rules(const struct ieee80211_regdomain *rd)
1197{ 1806{
1198 unsigned int i; 1807 unsigned int i;
@@ -1208,8 +1817,10 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
1208 freq_range = &reg_rule->freq_range; 1817 freq_range = &reg_rule->freq_range;
1209 power_rule = &reg_rule->power_rule; 1818 power_rule = &reg_rule->power_rule;
1210 1819
1211 /* There may not be documentation for max antenna gain 1820 /*
1212 * in certain regions */ 1821 * There may not be documentation for max antenna gain
1822 * in certain regions
1823 */
1213 if (power_rule->max_antenna_gain) 1824 if (power_rule->max_antenna_gain)
1214 printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), " 1825 printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), "
1215 "(%d mBi, %d mBm)\n", 1826 "(%d mBi, %d mBm)\n",
@@ -1232,13 +1843,13 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
1232{ 1843{
1233 1844
1234 if (is_intersected_alpha2(rd->alpha2)) { 1845 if (is_intersected_alpha2(rd->alpha2)) {
1235 struct wiphy *wiphy = NULL;
1236 struct cfg80211_registered_device *drv;
1237 1846
1238 if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { 1847 if (last_request->initiator ==
1239 if (last_request->wiphy) { 1848 NL80211_REGDOM_SET_BY_COUNTRY_IE) {
1240 wiphy = last_request->wiphy; 1849 struct cfg80211_registered_device *drv;
1241 drv = wiphy_to_dev(wiphy); 1850 drv = cfg80211_drv_by_wiphy_idx(
1851 last_request->wiphy_idx);
1852 if (drv) {
1242 printk(KERN_INFO "cfg80211: Current regulatory " 1853 printk(KERN_INFO "cfg80211: Current regulatory "
1243 "domain updated by AP to: %c%c\n", 1854 "domain updated by AP to: %c%c\n",
1244 drv->country_ie_alpha2[0], 1855 drv->country_ie_alpha2[0],
@@ -1248,7 +1859,7 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
1248 "domain intersected: \n"); 1859 "domain intersected: \n");
1249 } else 1860 } else
1250 printk(KERN_INFO "cfg80211: Current regulatory " 1861 printk(KERN_INFO "cfg80211: Current regulatory "
1251 "intersected: \n"); 1862 "domain intersected: \n");
1252 } else if (is_world_regdom(rd->alpha2)) 1863 } else if (is_world_regdom(rd->alpha2))
1253 printk(KERN_INFO "cfg80211: World regulatory " 1864 printk(KERN_INFO "cfg80211: World regulatory "
1254 "domain updated:\n"); 1865 "domain updated:\n");
@@ -1304,7 +1915,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1304{ 1915{
1305 const struct ieee80211_regdomain *intersected_rd = NULL; 1916 const struct ieee80211_regdomain *intersected_rd = NULL;
1306 struct cfg80211_registered_device *drv = NULL; 1917 struct cfg80211_registered_device *drv = NULL;
1307 struct wiphy *wiphy = NULL; 1918 struct wiphy *request_wiphy;
1308 /* Some basic sanity checks first */ 1919 /* Some basic sanity checks first */
1309 1920
1310 if (is_world_regdom(rd->alpha2)) { 1921 if (is_world_regdom(rd->alpha2)) {
@@ -1321,23 +1932,27 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1321 if (!last_request) 1932 if (!last_request)
1322 return -EINVAL; 1933 return -EINVAL;
1323 1934
1324 /* Lets only bother proceeding on the same alpha2 if the current 1935 /*
1936 * Lets only bother proceeding on the same alpha2 if the current
1325 * rd is non static (it means CRDA was present and was used last) 1937 * rd is non static (it means CRDA was present and was used last)
1326 * and the pending request came in from a country IE */ 1938 * and the pending request came in from a country IE
1327 if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) { 1939 */
1328 /* If someone else asked us to change the rd lets only bother 1940 if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) {
1329 * checking if the alpha2 changes if CRDA was already called */ 1941 /*
1942 * If someone else asked us to change the rd lets only bother
1943 * checking if the alpha2 changes if CRDA was already called
1944 */
1330 if (!is_old_static_regdom(cfg80211_regdomain) && 1945 if (!is_old_static_regdom(cfg80211_regdomain) &&
1331 !regdom_changed(rd->alpha2)) 1946 !regdom_changes(rd->alpha2))
1332 return -EINVAL; 1947 return -EINVAL;
1333 } 1948 }
1334 1949
1335 wiphy = last_request->wiphy; 1950 /*
1336 1951 * Now lets set the regulatory domain, update all driver channels
1337 /* Now lets set the regulatory domain, update all driver channels
1338 * and finally inform them of what we have done, in case they want 1952 * and finally inform them of what we have done, in case they want
1339 * to review or adjust their own settings based on their own 1953 * to review or adjust their own settings based on their own
1340 * internal EEPROM data */ 1954 * internal EEPROM data
1955 */
1341 1956
1342 if (WARN_ON(!reg_is_valid_request(rd->alpha2))) 1957 if (WARN_ON(!reg_is_valid_request(rd->alpha2)))
1343 return -EINVAL; 1958 return -EINVAL;
@@ -1349,7 +1964,28 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1349 return -EINVAL; 1964 return -EINVAL;
1350 } 1965 }
1351 1966
1967 request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
1968
1352 if (!last_request->intersect) { 1969 if (!last_request->intersect) {
1970 int r;
1971
1972 if (last_request->initiator != NL80211_REGDOM_SET_BY_DRIVER) {
1973 reset_regdomains();
1974 cfg80211_regdomain = rd;
1975 return 0;
1976 }
1977
1978 /*
1979 * For a driver hint, lets copy the regulatory domain the
1980 * driver wanted to the wiphy to deal with conflicts
1981 */
1982
1983 BUG_ON(request_wiphy->regd);
1984
1985 r = reg_copy_regd(&request_wiphy->regd, rd);
1986 if (r)
1987 return r;
1988
1353 reset_regdomains(); 1989 reset_regdomains();
1354 cfg80211_regdomain = rd; 1990 cfg80211_regdomain = rd;
1355 return 0; 1991 return 0;
@@ -1357,14 +1993,22 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1357 1993
1358 /* Intersection requires a bit more work */ 1994 /* Intersection requires a bit more work */
1359 1995
1360 if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) { 1996 if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) {
1361 1997
1362 intersected_rd = regdom_intersect(rd, cfg80211_regdomain); 1998 intersected_rd = regdom_intersect(rd, cfg80211_regdomain);
1363 if (!intersected_rd) 1999 if (!intersected_rd)
1364 return -EINVAL; 2000 return -EINVAL;
1365 2001
1366 /* We can trash what CRDA provided now */ 2002 /*
1367 kfree(rd); 2003 * We can trash what CRDA provided now.
2004 * However if a driver requested this specific regulatory
2005 * domain we keep it for its private use
2006 */
2007 if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER)
2008 request_wiphy->regd = rd;
2009 else
2010 kfree(rd);
2011
1368 rd = NULL; 2012 rd = NULL;
1369 2013
1370 reset_regdomains(); 2014 reset_regdomains();
@@ -1379,29 +2023,26 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1379 */ 2023 */
1380 2024
1381 BUG_ON(!country_ie_regdomain); 2025 BUG_ON(!country_ie_regdomain);
2026 BUG_ON(rd == country_ie_regdomain);
1382 2027
1383 if (rd != country_ie_regdomain) { 2028 /*
1384 /* Intersect what CRDA returned and our what we 2029 * Intersect what CRDA returned and our what we
1385 * had built from the Country IE received */ 2030 * had built from the Country IE received
2031 */
1386 2032
1387 intersected_rd = regdom_intersect(rd, country_ie_regdomain); 2033 intersected_rd = regdom_intersect(rd, country_ie_regdomain);
1388 2034
1389 reg_country_ie_process_debug(rd, country_ie_regdomain, 2035 reg_country_ie_process_debug(rd,
1390 intersected_rd); 2036 country_ie_regdomain,
2037 intersected_rd);
1391 2038
1392 kfree(country_ie_regdomain); 2039 kfree(country_ie_regdomain);
1393 country_ie_regdomain = NULL; 2040 country_ie_regdomain = NULL;
1394 } else {
1395 /* This would happen when CRDA was not present and
1396 * OLD_REGULATORY was enabled. We intersect our Country
1397 * IE rd and what was set on cfg80211 originally */
1398 intersected_rd = regdom_intersect(rd, cfg80211_regdomain);
1399 }
1400 2041
1401 if (!intersected_rd) 2042 if (!intersected_rd)
1402 return -EINVAL; 2043 return -EINVAL;
1403 2044
1404 drv = wiphy_to_dev(wiphy); 2045 drv = wiphy_to_dev(request_wiphy);
1405 2046
1406 drv->country_ie_alpha2[0] = rd->alpha2[0]; 2047 drv->country_ie_alpha2[0] = rd->alpha2[0];
1407 drv->country_ie_alpha2[1] = rd->alpha2[1]; 2048 drv->country_ie_alpha2[1] = rd->alpha2[1];
@@ -1419,13 +2060,17 @@ static int __set_regdom(const struct ieee80211_regdomain *rd)
1419} 2060}
1420 2061
1421 2062
1422/* Use this call to set the current regulatory domain. Conflicts with 2063/*
2064 * Use this call to set the current regulatory domain. Conflicts with
1423 * multiple drivers can be ironed out later. Caller must've already 2065 * multiple drivers can be ironed out later. Caller must've already
1424 * kmalloc'd the rd structure. Caller must hold cfg80211_drv_mutex */ 2066 * kmalloc'd the rd structure. Caller must hold cfg80211_mutex
2067 */
1425int set_regdom(const struct ieee80211_regdomain *rd) 2068int set_regdom(const struct ieee80211_regdomain *rd)
1426{ 2069{
1427 int r; 2070 int r;
1428 2071
2072 assert_cfg80211_lock();
2073
1429 /* Note that this doesn't update the wiphys, this is done below */ 2074 /* Note that this doesn't update the wiphys, this is done below */
1430 r = __set_regdom(rd); 2075 r = __set_regdom(rd);
1431 if (r) { 2076 if (r) {
@@ -1442,56 +2087,90 @@ int set_regdom(const struct ieee80211_regdomain *rd)
1442 2087
1443 print_regdomain(cfg80211_regdomain); 2088 print_regdomain(cfg80211_regdomain);
1444 2089
2090 nl80211_send_reg_change_event(last_request);
2091
1445 return r; 2092 return r;
1446} 2093}
1447 2094
1448/* Caller must hold cfg80211_drv_mutex */ 2095/* Caller must hold cfg80211_mutex */
1449void reg_device_remove(struct wiphy *wiphy) 2096void reg_device_remove(struct wiphy *wiphy)
1450{ 2097{
1451 if (!last_request || !last_request->wiphy) 2098 struct wiphy *request_wiphy;
2099
2100 assert_cfg80211_lock();
2101
2102 request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
2103
2104 kfree(wiphy->regd);
2105 if (!last_request || !request_wiphy)
1452 return; 2106 return;
1453 if (last_request->wiphy != wiphy) 2107 if (request_wiphy != wiphy)
1454 return; 2108 return;
1455 last_request->wiphy = NULL; 2109 last_request->wiphy_idx = WIPHY_IDX_STALE;
1456 last_request->country_ie_env = ENVIRON_ANY; 2110 last_request->country_ie_env = ENVIRON_ANY;
1457} 2111}
1458 2112
1459int regulatory_init(void) 2113int regulatory_init(void)
1460{ 2114{
1461 int err; 2115 int err = 0;
1462 2116
1463 reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); 2117 reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0);
1464 if (IS_ERR(reg_pdev)) 2118 if (IS_ERR(reg_pdev))
1465 return PTR_ERR(reg_pdev); 2119 return PTR_ERR(reg_pdev);
1466 2120
2121 spin_lock_init(&reg_requests_lock);
2122 spin_lock_init(&reg_pending_beacons_lock);
2123
1467#ifdef CONFIG_WIRELESS_OLD_REGULATORY 2124#ifdef CONFIG_WIRELESS_OLD_REGULATORY
1468 cfg80211_regdomain = static_regdom(ieee80211_regdom); 2125 cfg80211_regdomain = static_regdom(ieee80211_regdom);
1469 2126
1470 printk(KERN_INFO "cfg80211: Using static regulatory domain info\n"); 2127 printk(KERN_INFO "cfg80211: Using static regulatory domain info\n");
1471 print_regdomain_info(cfg80211_regdomain); 2128 print_regdomain_info(cfg80211_regdomain);
1472 /* The old code still requests for a new regdomain and if 2129 /*
2130 * The old code still requests for a new regdomain and if
1473 * you have CRDA you get it updated, otherwise you get 2131 * you have CRDA you get it updated, otherwise you get
1474 * stuck with the static values. We ignore "EU" code as 2132 * stuck with the static values. Since "EU" is not a valid
1475 * that is not a valid ISO / IEC 3166 alpha2 */ 2133 * ISO / IEC 3166 alpha2 code we can't expect userpace to
1476 if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U') 2134 * give us a regulatory domain for it. We need last_request
1477 err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, 2135 * iniitalized though so lets just send a request which we
1478 ieee80211_regdom, 0, ENVIRON_ANY); 2136 * know will be ignored... this crap will be removed once
2137 * OLD_REG dies.
2138 */
2139 err = regulatory_hint_core(ieee80211_regdom);
1479#else 2140#else
1480 cfg80211_regdomain = cfg80211_world_regdom; 2141 cfg80211_regdomain = cfg80211_world_regdom;
1481 2142
1482 err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", 0, ENVIRON_ANY); 2143 err = regulatory_hint_core(ieee80211_regdom);
1483 if (err) 2144#endif
1484 printk(KERN_ERR "cfg80211: calling CRDA failed - " 2145 if (err) {
1485 "unable to update world regulatory domain, " 2146 if (err == -ENOMEM)
1486 "using static definition\n"); 2147 return err;
2148 /*
2149 * N.B. kobject_uevent_env() can fail mainly for when we're out
2150 * memory which is handled and propagated appropriately above
2151 * but it can also fail during a netlink_broadcast() or during
2152 * early boot for call_usermodehelper(). For now treat these
2153 * errors as non-fatal.
2154 */
2155 printk(KERN_ERR "cfg80211: kobject_uevent_env() was unable "
2156 "to call CRDA during init");
2157#ifdef CONFIG_CFG80211_REG_DEBUG
2158 /* We want to find out exactly why when debugging */
2159 WARN_ON(err);
1487#endif 2160#endif
2161 }
1488 2162
1489 return 0; 2163 return 0;
1490} 2164}
1491 2165
1492void regulatory_exit(void) 2166void regulatory_exit(void)
1493{ 2167{
1494 mutex_lock(&cfg80211_drv_mutex); 2168 struct regulatory_request *reg_request, *tmp;
2169 struct reg_beacon *reg_beacon, *btmp;
2170
2171 cancel_work_sync(&reg_work);
2172
2173 mutex_lock(&cfg80211_mutex);
1495 2174
1496 reset_regdomains(); 2175 reset_regdomains();
1497 2176
@@ -1502,5 +2181,33 @@ void regulatory_exit(void)
1502 2181
1503 platform_device_unregister(reg_pdev); 2182 platform_device_unregister(reg_pdev);
1504 2183
1505 mutex_unlock(&cfg80211_drv_mutex); 2184 spin_lock_bh(&reg_pending_beacons_lock);
2185 if (!list_empty(&reg_pending_beacons)) {
2186 list_for_each_entry_safe(reg_beacon, btmp,
2187 &reg_pending_beacons, list) {
2188 list_del(&reg_beacon->list);
2189 kfree(reg_beacon);
2190 }
2191 }
2192 spin_unlock_bh(&reg_pending_beacons_lock);
2193
2194 if (!list_empty(&reg_beacon_list)) {
2195 list_for_each_entry_safe(reg_beacon, btmp,
2196 &reg_beacon_list, list) {
2197 list_del(&reg_beacon->list);
2198 kfree(reg_beacon);
2199 }
2200 }
2201
2202 spin_lock(&reg_requests_lock);
2203 if (!list_empty(&reg_requests_list)) {
2204 list_for_each_entry_safe(reg_request, tmp,
2205 &reg_requests_list, list) {
2206 list_del(&reg_request->list);
2207 kfree(reg_request);
2208 }
2209 }
2210 spin_unlock(&reg_requests_lock);
2211
2212 mutex_unlock(&cfg80211_mutex);
1506} 2213}
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index a76ea3ff7cd6..e37829a49dc4 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -1,9 +1,13 @@
1#ifndef __NET_WIRELESS_REG_H 1#ifndef __NET_WIRELESS_REG_H
2#define __NET_WIRELESS_REG_H 2#define __NET_WIRELESS_REG_H
3 3
4extern const struct ieee80211_regdomain *cfg80211_regdomain;
5
4bool is_world_regdom(const char *alpha2); 6bool is_world_regdom(const char *alpha2);
5bool reg_is_valid_request(const char *alpha2); 7bool reg_is_valid_request(const char *alpha2);
6 8
9int regulatory_hint_user(const char *alpha2);
10
7void reg_device_remove(struct wiphy *wiphy); 11void reg_device_remove(struct wiphy *wiphy);
8 12
9int regulatory_init(void); 13int regulatory_init(void);
@@ -11,34 +15,25 @@ void regulatory_exit(void);
11 15
12int set_regdom(const struct ieee80211_regdomain *rd); 16int set_regdom(const struct ieee80211_regdomain *rd);
13 17
14enum environment_cap {
15 ENVIRON_ANY,
16 ENVIRON_INDOOR,
17 ENVIRON_OUTDOOR,
18};
19
20
21/** 18/**
22 * __regulatory_hint - hint to the wireless core a regulatory domain 19 * regulatory_hint_found_beacon - hints a beacon was found on a channel
23 * @wiphy: if the hint comes from country information from an AP, this 20 * @wiphy: the wireless device where the beacon was found on
24 * is required to be set to the wiphy that received the information 21 * @beacon_chan: the channel on which the beacon was found on
25 * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain 22 * @gfp: context flags
26 * should be in.
27 * @country_ie_checksum: checksum of processed country IE, set this to 0
28 * if the hint did not come from a country IE
29 * @country_ie_env: the environment the IE told us we are in, %ENVIRON_*
30 *
31 * The Wireless subsystem can use this function to hint to the wireless core
32 * what it believes should be the current regulatory domain by giving it an
33 * ISO/IEC 3166 alpha2 country code it knows its regulatory domain should be
34 * in.
35 * 23 *
36 * Returns zero if all went fine, %-EALREADY if a regulatory domain had 24 * This informs the wireless core that a beacon from an AP was found on
37 * already been set or other standard error codes. 25 * the channel provided. This allows the wireless core to make educated
26 * guesses on regulatory to help with world roaming. This is only used for
27 * world roaming -- when we do not know our current location. This is
28 * only useful on channels 12, 13 and 14 on the 2 GHz band as channels
29 * 1-11 are already enabled by the world regulatory domain; and on
30 * non-radar 5 GHz channels.
38 * 31 *
32 * Drivers do not need to call this, cfg80211 will do it for after a scan
33 * on a newly found BSS.
39 */ 34 */
40extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, 35int regulatory_hint_found_beacon(struct wiphy *wiphy,
41 const char *alpha2, u32 country_ie_checksum, 36 struct ieee80211_channel *beacon_chan,
42 enum environment_cap country_ie_env); 37 gfp_t gfp);
43 38
44#endif /* __NET_WIRELESS_REG_H */ 39#endif /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
new file mode 100644
index 000000000000..2a00e362f5fe
--- /dev/null
+++ b/net/wireless/scan.c
@@ -0,0 +1,891 @@
1/*
2 * cfg80211 scan result handling
3 *
4 * Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
5 */
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/netdevice.h>
9#include <linux/wireless.h>
10#include <linux/nl80211.h>
11#include <linux/etherdevice.h>
12#include <net/arp.h>
13#include <net/cfg80211.h>
14#include <net/iw_handler.h>
15#include "core.h"
16#include "nl80211.h"
17
18#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ)
19
20void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted)
21{
22 struct net_device *dev;
23#ifdef CONFIG_WIRELESS_EXT
24 union iwreq_data wrqu;
25#endif
26
27 dev = dev_get_by_index(&init_net, request->ifidx);
28 if (!dev)
29 goto out;
30
31 WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req);
32 wiphy_to_dev(request->wiphy)->scan_req = NULL;
33
34 if (aborted)
35 nl80211_send_scan_aborted(wiphy_to_dev(request->wiphy), dev);
36 else
37 nl80211_send_scan_done(wiphy_to_dev(request->wiphy), dev);
38
39#ifdef CONFIG_WIRELESS_EXT
40 if (!aborted) {
41 memset(&wrqu, 0, sizeof(wrqu));
42
43 wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL);
44 }
45#endif
46
47 dev_put(dev);
48
49 out:
50 kfree(request);
51}
52EXPORT_SYMBOL(cfg80211_scan_done);
53
54static void bss_release(struct kref *ref)
55{
56 struct cfg80211_internal_bss *bss;
57
58 bss = container_of(ref, struct cfg80211_internal_bss, ref);
59 if (bss->pub.free_priv)
60 bss->pub.free_priv(&bss->pub);
61 kfree(bss);
62}
63
64/* must hold dev->bss_lock! */
65void cfg80211_bss_age(struct cfg80211_registered_device *dev,
66 unsigned long age_secs)
67{
68 struct cfg80211_internal_bss *bss;
69 unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC);
70
71 list_for_each_entry(bss, &dev->bss_list, list) {
72 bss->ts -= age_jiffies;
73 }
74}
75
76/* must hold dev->bss_lock! */
77void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
78{
79 struct cfg80211_internal_bss *bss, *tmp;
80 bool expired = false;
81
82 list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) {
83 if (bss->hold ||
84 !time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE))
85 continue;
86 list_del(&bss->list);
87 rb_erase(&bss->rbn, &dev->bss_tree);
88 kref_put(&bss->ref, bss_release);
89 expired = true;
90 }
91
92 if (expired)
93 dev->bss_generation++;
94}
95
96static u8 *find_ie(u8 num, u8 *ies, size_t len)
97{
98 while (len > 2 && ies[0] != num) {
99 len -= ies[1] + 2;
100 ies += ies[1] + 2;
101 }
102 if (len < 2)
103 return NULL;
104 if (len < 2 + ies[1])
105 return NULL;
106 return ies;
107}
108
109static int cmp_ies(u8 num, u8 *ies1, size_t len1, u8 *ies2, size_t len2)
110{
111 const u8 *ie1 = find_ie(num, ies1, len1);
112 const u8 *ie2 = find_ie(num, ies2, len2);
113 int r;
114
115 if (!ie1 && !ie2)
116 return 0;
117 if (!ie1)
118 return -1;
119
120 r = memcmp(ie1 + 2, ie2 + 2, min(ie1[1], ie2[1]));
121 if (r == 0 && ie1[1] != ie2[1])
122 return ie2[1] - ie1[1];
123 return r;
124}
125
126static bool is_bss(struct cfg80211_bss *a,
127 const u8 *bssid,
128 const u8 *ssid, size_t ssid_len)
129{
130 const u8 *ssidie;
131
132 if (bssid && compare_ether_addr(a->bssid, bssid))
133 return false;
134
135 if (!ssid)
136 return true;
137
138 ssidie = find_ie(WLAN_EID_SSID,
139 a->information_elements,
140 a->len_information_elements);
141 if (!ssidie)
142 return false;
143 if (ssidie[1] != ssid_len)
144 return false;
145 return memcmp(ssidie + 2, ssid, ssid_len) == 0;
146}
147
148static bool is_mesh(struct cfg80211_bss *a,
149 const u8 *meshid, size_t meshidlen,
150 const u8 *meshcfg)
151{
152 const u8 *ie;
153
154 if (!is_zero_ether_addr(a->bssid))
155 return false;
156
157 ie = find_ie(WLAN_EID_MESH_ID,
158 a->information_elements,
159 a->len_information_elements);
160 if (!ie)
161 return false;
162 if (ie[1] != meshidlen)
163 return false;
164 if (memcmp(ie + 2, meshid, meshidlen))
165 return false;
166
167 ie = find_ie(WLAN_EID_MESH_CONFIG,
168 a->information_elements,
169 a->len_information_elements);
170 if (ie[1] != IEEE80211_MESH_CONFIG_LEN)
171 return false;
172
173 /*
174 * Ignore mesh capability (last two bytes of the IE) when
175 * comparing since that may differ between stations taking
176 * part in the same mesh.
177 */
178 return memcmp(ie + 2, meshcfg, IEEE80211_MESH_CONFIG_LEN - 2) == 0;
179}
180
181static int cmp_bss(struct cfg80211_bss *a,
182 struct cfg80211_bss *b)
183{
184 int r;
185
186 if (a->channel != b->channel)
187 return b->channel->center_freq - a->channel->center_freq;
188
189 r = memcmp(a->bssid, b->bssid, ETH_ALEN);
190 if (r)
191 return r;
192
193 if (is_zero_ether_addr(a->bssid)) {
194 r = cmp_ies(WLAN_EID_MESH_ID,
195 a->information_elements,
196 a->len_information_elements,
197 b->information_elements,
198 b->len_information_elements);
199 if (r)
200 return r;
201 return cmp_ies(WLAN_EID_MESH_CONFIG,
202 a->information_elements,
203 a->len_information_elements,
204 b->information_elements,
205 b->len_information_elements);
206 }
207
208 return cmp_ies(WLAN_EID_SSID,
209 a->information_elements,
210 a->len_information_elements,
211 b->information_elements,
212 b->len_information_elements);
213}
214
215struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy,
216 struct ieee80211_channel *channel,
217 const u8 *bssid,
218 const u8 *ssid, size_t ssid_len,
219 u16 capa_mask, u16 capa_val)
220{
221 struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
222 struct cfg80211_internal_bss *bss, *res = NULL;
223
224 spin_lock_bh(&dev->bss_lock);
225
226 list_for_each_entry(bss, &dev->bss_list, list) {
227 if ((bss->pub.capability & capa_mask) != capa_val)
228 continue;
229 if (channel && bss->pub.channel != channel)
230 continue;
231 if (is_bss(&bss->pub, bssid, ssid, ssid_len)) {
232 res = bss;
233 kref_get(&res->ref);
234 break;
235 }
236 }
237
238 spin_unlock_bh(&dev->bss_lock);
239 if (!res)
240 return NULL;
241 return &res->pub;
242}
243EXPORT_SYMBOL(cfg80211_get_bss);
244
245struct cfg80211_bss *cfg80211_get_mesh(struct wiphy *wiphy,
246 struct ieee80211_channel *channel,
247 const u8 *meshid, size_t meshidlen,
248 const u8 *meshcfg)
249{
250 struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
251 struct cfg80211_internal_bss *bss, *res = NULL;
252
253 spin_lock_bh(&dev->bss_lock);
254
255 list_for_each_entry(bss, &dev->bss_list, list) {
256 if (channel && bss->pub.channel != channel)
257 continue;
258 if (is_mesh(&bss->pub, meshid, meshidlen, meshcfg)) {
259 res = bss;
260 kref_get(&res->ref);
261 break;
262 }
263 }
264
265 spin_unlock_bh(&dev->bss_lock);
266 if (!res)
267 return NULL;
268 return &res->pub;
269}
270EXPORT_SYMBOL(cfg80211_get_mesh);
271
272
273static void rb_insert_bss(struct cfg80211_registered_device *dev,
274 struct cfg80211_internal_bss *bss)
275{
276 struct rb_node **p = &dev->bss_tree.rb_node;
277 struct rb_node *parent = NULL;
278 struct cfg80211_internal_bss *tbss;
279 int cmp;
280
281 while (*p) {
282 parent = *p;
283 tbss = rb_entry(parent, struct cfg80211_internal_bss, rbn);
284
285 cmp = cmp_bss(&bss->pub, &tbss->pub);
286
287 if (WARN_ON(!cmp)) {
288 /* will sort of leak this BSS */
289 return;
290 }
291
292 if (cmp < 0)
293 p = &(*p)->rb_left;
294 else
295 p = &(*p)->rb_right;
296 }
297
298 rb_link_node(&bss->rbn, parent, p);
299 rb_insert_color(&bss->rbn, &dev->bss_tree);
300}
301
302static struct cfg80211_internal_bss *
303rb_find_bss(struct cfg80211_registered_device *dev,
304 struct cfg80211_internal_bss *res)
305{
306 struct rb_node *n = dev->bss_tree.rb_node;
307 struct cfg80211_internal_bss *bss;
308 int r;
309
310 while (n) {
311 bss = rb_entry(n, struct cfg80211_internal_bss, rbn);
312 r = cmp_bss(&res->pub, &bss->pub);
313
314 if (r == 0)
315 return bss;
316 else if (r < 0)
317 n = n->rb_left;
318 else
319 n = n->rb_right;
320 }
321
322 return NULL;
323}
324
325static struct cfg80211_internal_bss *
326cfg80211_bss_update(struct cfg80211_registered_device *dev,
327 struct cfg80211_internal_bss *res,
328 bool overwrite)
329{
330 struct cfg80211_internal_bss *found = NULL;
331 const u8 *meshid, *meshcfg;
332
333 /*
334 * The reference to "res" is donated to this function.
335 */
336
337 if (WARN_ON(!res->pub.channel)) {
338 kref_put(&res->ref, bss_release);
339 return NULL;
340 }
341
342 res->ts = jiffies;
343
344 if (is_zero_ether_addr(res->pub.bssid)) {
345 /* must be mesh, verify */
346 meshid = find_ie(WLAN_EID_MESH_ID, res->pub.information_elements,
347 res->pub.len_information_elements);
348 meshcfg = find_ie(WLAN_EID_MESH_CONFIG,
349 res->pub.information_elements,
350 res->pub.len_information_elements);
351 if (!meshid || !meshcfg ||
352 meshcfg[1] != IEEE80211_MESH_CONFIG_LEN) {
353 /* bogus mesh */
354 kref_put(&res->ref, bss_release);
355 return NULL;
356 }
357 }
358
359 spin_lock_bh(&dev->bss_lock);
360
361 found = rb_find_bss(dev, res);
362
363 if (found && overwrite) {
364 list_replace(&found->list, &res->list);
365 rb_replace_node(&found->rbn, &res->rbn,
366 &dev->bss_tree);
367 kref_put(&found->ref, bss_release);
368 found = res;
369 } else if (found) {
370 kref_get(&found->ref);
371 found->pub.beacon_interval = res->pub.beacon_interval;
372 found->pub.tsf = res->pub.tsf;
373 found->pub.signal = res->pub.signal;
374 found->pub.capability = res->pub.capability;
375 found->ts = res->ts;
376 kref_put(&res->ref, bss_release);
377 } else {
378 /* this "consumes" the reference */
379 list_add_tail(&res->list, &dev->bss_list);
380 rb_insert_bss(dev, res);
381 found = res;
382 }
383
384 dev->bss_generation++;
385 spin_unlock_bh(&dev->bss_lock);
386
387 kref_get(&found->ref);
388 return found;
389}
390
391struct cfg80211_bss *
392cfg80211_inform_bss_frame(struct wiphy *wiphy,
393 struct ieee80211_channel *channel,
394 struct ieee80211_mgmt *mgmt, size_t len,
395 s32 signal, gfp_t gfp)
396{
397 struct cfg80211_internal_bss *res;
398 size_t ielen = len - offsetof(struct ieee80211_mgmt,
399 u.probe_resp.variable);
400 bool overwrite;
401 size_t privsz = wiphy->bss_priv_size;
402
403 if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC &&
404 (signal < 0 || signal > 100)))
405 return NULL;
406
407 if (WARN_ON(!mgmt || !wiphy ||
408 len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable)))
409 return NULL;
410
411 res = kzalloc(sizeof(*res) + privsz + ielen, gfp);
412 if (!res)
413 return NULL;
414
415 memcpy(res->pub.bssid, mgmt->bssid, ETH_ALEN);
416 res->pub.channel = channel;
417 res->pub.signal = signal;
418 res->pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
419 res->pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
420 res->pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
421 /* point to after the private area */
422 res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz;
423 memcpy(res->pub.information_elements, mgmt->u.probe_resp.variable, ielen);
424 res->pub.len_information_elements = ielen;
425
426 kref_init(&res->ref);
427
428 overwrite = ieee80211_is_probe_resp(mgmt->frame_control);
429
430 res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, overwrite);
431 if (!res)
432 return NULL;
433
434 if (res->pub.capability & WLAN_CAPABILITY_ESS)
435 regulatory_hint_found_beacon(wiphy, channel, gfp);
436
437 /* cfg80211_bss_update gives us a referenced result */
438 return &res->pub;
439}
440EXPORT_SYMBOL(cfg80211_inform_bss_frame);
441
442void cfg80211_put_bss(struct cfg80211_bss *pub)
443{
444 struct cfg80211_internal_bss *bss;
445
446 if (!pub)
447 return;
448
449 bss = container_of(pub, struct cfg80211_internal_bss, pub);
450 kref_put(&bss->ref, bss_release);
451}
452EXPORT_SYMBOL(cfg80211_put_bss);
453
454void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
455{
456 struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy);
457 struct cfg80211_internal_bss *bss;
458
459 if (WARN_ON(!pub))
460 return;
461
462 bss = container_of(pub, struct cfg80211_internal_bss, pub);
463
464 spin_lock_bh(&dev->bss_lock);
465
466 list_del(&bss->list);
467 rb_erase(&bss->rbn, &dev->bss_tree);
468
469 spin_unlock_bh(&dev->bss_lock);
470
471 kref_put(&bss->ref, bss_release);
472}
473EXPORT_SYMBOL(cfg80211_unlink_bss);
474
475void cfg80211_hold_bss(struct cfg80211_bss *pub)
476{
477 struct cfg80211_internal_bss *bss;
478
479 if (!pub)
480 return;
481
482 bss = container_of(pub, struct cfg80211_internal_bss, pub);
483 bss->hold = true;
484}
485EXPORT_SYMBOL(cfg80211_hold_bss);
486
487void cfg80211_unhold_bss(struct cfg80211_bss *pub)
488{
489 struct cfg80211_internal_bss *bss;
490
491 if (!pub)
492 return;
493
494 bss = container_of(pub, struct cfg80211_internal_bss, pub);
495 bss->hold = false;
496}
497EXPORT_SYMBOL(cfg80211_unhold_bss);
498
499#ifdef CONFIG_WIRELESS_EXT
500int cfg80211_wext_siwscan(struct net_device *dev,
501 struct iw_request_info *info,
502 union iwreq_data *wrqu, char *extra)
503{
504 struct cfg80211_registered_device *rdev;
505 struct wiphy *wiphy;
506 struct iw_scan_req *wreq = NULL;
507 struct cfg80211_scan_request *creq;
508 int i, err, n_channels = 0;
509 enum ieee80211_band band;
510
511 if (!netif_running(dev))
512 return -ENETDOWN;
513
514 rdev = cfg80211_get_dev_from_ifindex(dev->ifindex);
515
516 if (IS_ERR(rdev))
517 return PTR_ERR(rdev);
518
519 if (rdev->scan_req) {
520 err = -EBUSY;
521 goto out;
522 }
523
524 wiphy = &rdev->wiphy;
525
526 for (band = 0; band < IEEE80211_NUM_BANDS; band++)
527 if (wiphy->bands[band])
528 n_channels += wiphy->bands[band]->n_channels;
529
530 creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) +
531 n_channels * sizeof(void *),
532 GFP_ATOMIC);
533 if (!creq) {
534 err = -ENOMEM;
535 goto out;
536 }
537
538 creq->wiphy = wiphy;
539 creq->ifidx = dev->ifindex;
540 creq->ssids = (void *)(creq + 1);
541 creq->channels = (void *)(creq->ssids + 1);
542 creq->n_channels = n_channels;
543 creq->n_ssids = 1;
544
545 /* all channels */
546 i = 0;
547 for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
548 int j;
549 if (!wiphy->bands[band])
550 continue;
551 for (j = 0; j < wiphy->bands[band]->n_channels; j++) {
552 creq->channels[i] = &wiphy->bands[band]->channels[j];
553 i++;
554 }
555 }
556
557 /* translate scan request */
558 if (wrqu->data.length == sizeof(struct iw_scan_req)) {
559 wreq = (struct iw_scan_req *)extra;
560
561 if (wrqu->data.flags & IW_SCAN_THIS_ESSID) {
562 if (wreq->essid_len > IEEE80211_MAX_SSID_LEN)
563 return -EINVAL;
564 memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len);
565 creq->ssids[0].ssid_len = wreq->essid_len;
566 }
567 if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE)
568 creq->n_ssids = 0;
569 }
570
571 rdev->scan_req = creq;
572 err = rdev->ops->scan(wiphy, dev, creq);
573 if (err) {
574 rdev->scan_req = NULL;
575 kfree(creq);
576 }
577 out:
578 cfg80211_put_dev(rdev);
579 return err;
580}
581EXPORT_SYMBOL(cfg80211_wext_siwscan);
582
583static void ieee80211_scan_add_ies(struct iw_request_info *info,
584 struct cfg80211_bss *bss,
585 char **current_ev, char *end_buf)
586{
587 u8 *pos, *end, *next;
588 struct iw_event iwe;
589
590 if (!bss->information_elements ||
591 !bss->len_information_elements)
592 return;
593
594 /*
595 * If needed, fragment the IEs buffer (at IE boundaries) into short
596 * enough fragments to fit into IW_GENERIC_IE_MAX octet messages.
597 */
598 pos = bss->information_elements;
599 end = pos + bss->len_information_elements;
600
601 while (end - pos > IW_GENERIC_IE_MAX) {
602 next = pos + 2 + pos[1];
603 while (next + 2 + next[1] - pos < IW_GENERIC_IE_MAX)
604 next = next + 2 + next[1];
605
606 memset(&iwe, 0, sizeof(iwe));
607 iwe.cmd = IWEVGENIE;
608 iwe.u.data.length = next - pos;
609 *current_ev = iwe_stream_add_point(info, *current_ev,
610 end_buf, &iwe, pos);
611
612 pos = next;
613 }
614
615 if (end > pos) {
616 memset(&iwe, 0, sizeof(iwe));
617 iwe.cmd = IWEVGENIE;
618 iwe.u.data.length = end - pos;
619 *current_ev = iwe_stream_add_point(info, *current_ev,
620 end_buf, &iwe, pos);
621 }
622}
623
624static inline unsigned int elapsed_jiffies_msecs(unsigned long start)
625{
626 unsigned long end = jiffies;
627
628 if (end >= start)
629 return jiffies_to_msecs(end - start);
630
631 return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1);
632}
633
634static char *
635ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info,
636 struct cfg80211_internal_bss *bss, char *current_ev,
637 char *end_buf)
638{
639 struct iw_event iwe;
640 u8 *buf, *cfg, *p;
641 u8 *ie = bss->pub.information_elements;
642 int rem = bss->pub.len_information_elements, i, sig;
643 bool ismesh = false;
644
645 memset(&iwe, 0, sizeof(iwe));
646 iwe.cmd = SIOCGIWAP;
647 iwe.u.ap_addr.sa_family = ARPHRD_ETHER;
648 memcpy(iwe.u.ap_addr.sa_data, bss->pub.bssid, ETH_ALEN);
649 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
650 IW_EV_ADDR_LEN);
651
652 memset(&iwe, 0, sizeof(iwe));
653 iwe.cmd = SIOCGIWFREQ;
654 iwe.u.freq.m = ieee80211_frequency_to_channel(bss->pub.channel->center_freq);
655 iwe.u.freq.e = 0;
656 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
657 IW_EV_FREQ_LEN);
658
659 memset(&iwe, 0, sizeof(iwe));
660 iwe.cmd = SIOCGIWFREQ;
661 iwe.u.freq.m = bss->pub.channel->center_freq;
662 iwe.u.freq.e = 6;
663 current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe,
664 IW_EV_FREQ_LEN);
665
666 if (wiphy->signal_type != CFG80211_SIGNAL_TYPE_NONE) {
667 memset(&iwe, 0, sizeof(iwe));
668 iwe.cmd = IWEVQUAL;
669 iwe.u.qual.updated = IW_QUAL_LEVEL_UPDATED |
670 IW_QUAL_NOISE_INVALID |
671 IW_QUAL_QUAL_UPDATED;
672 switch (wiphy->signal_type) {
673 case CFG80211_SIGNAL_TYPE_MBM:
674 sig = bss->pub.signal / 100;
675 iwe.u.qual.level = sig;
676 iwe.u.qual.updated |= IW_QUAL_DBM;
677 if (sig < -110) /* rather bad */
678 sig = -110;
679 else if (sig > -40) /* perfect */
680 sig = -40;
681 /* will give a range of 0 .. 70 */
682 iwe.u.qual.qual = sig + 110;
683 break;
684 case CFG80211_SIGNAL_TYPE_UNSPEC:
685 iwe.u.qual.level = bss->pub.signal;
686 /* will give range 0 .. 100 */
687 iwe.u.qual.qual = bss->pub.signal;
688 break;
689 default:
690 /* not reached */
691 break;
692 }
693 current_ev = iwe_stream_add_event(info, current_ev, end_buf,
694 &iwe, IW_EV_QUAL_LEN);
695 }
696
697 memset(&iwe, 0, sizeof(iwe));
698 iwe.cmd = SIOCGIWENCODE;
699 if (bss->pub.capability & WLAN_CAPABILITY_PRIVACY)
700 iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY;
701 else
702 iwe.u.data.flags = IW_ENCODE_DISABLED;
703 iwe.u.data.length = 0;
704 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
705 &iwe, "");
706
707 while (rem >= 2) {
708 /* invalid data */
709 if (ie[1] > rem - 2)
710 break;
711
712 switch (ie[0]) {
713 case WLAN_EID_SSID:
714 memset(&iwe, 0, sizeof(iwe));
715 iwe.cmd = SIOCGIWESSID;
716 iwe.u.data.length = ie[1];
717 iwe.u.data.flags = 1;
718 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
719 &iwe, ie + 2);
720 break;
721 case WLAN_EID_MESH_ID:
722 memset(&iwe, 0, sizeof(iwe));
723 iwe.cmd = SIOCGIWESSID;
724 iwe.u.data.length = ie[1];
725 iwe.u.data.flags = 1;
726 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
727 &iwe, ie + 2);
728 break;
729 case WLAN_EID_MESH_CONFIG:
730 ismesh = true;
731 if (ie[1] != IEEE80211_MESH_CONFIG_LEN)
732 break;
733 buf = kmalloc(50, GFP_ATOMIC);
734 if (!buf)
735 break;
736 cfg = ie + 2;
737 memset(&iwe, 0, sizeof(iwe));
738 iwe.cmd = IWEVCUSTOM;
739 sprintf(buf, "Mesh network (version %d)", cfg[0]);
740 iwe.u.data.length = strlen(buf);
741 current_ev = iwe_stream_add_point(info, current_ev,
742 end_buf,
743 &iwe, buf);
744 sprintf(buf, "Path Selection Protocol ID: "
745 "0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3],
746 cfg[4]);
747 iwe.u.data.length = strlen(buf);
748 current_ev = iwe_stream_add_point(info, current_ev,
749 end_buf,
750 &iwe, buf);
751 sprintf(buf, "Path Selection Metric ID: "
752 "0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7],
753 cfg[8]);
754 iwe.u.data.length = strlen(buf);
755 current_ev = iwe_stream_add_point(info, current_ev,
756 end_buf,
757 &iwe, buf);
758 sprintf(buf, "Congestion Control Mode ID: "
759 "0x%02X%02X%02X%02X", cfg[9], cfg[10],
760 cfg[11], cfg[12]);
761 iwe.u.data.length = strlen(buf);
762 current_ev = iwe_stream_add_point(info, current_ev,
763 end_buf,
764 &iwe, buf);
765 sprintf(buf, "Channel Precedence: "
766 "0x%02X%02X%02X%02X", cfg[13], cfg[14],
767 cfg[15], cfg[16]);
768 iwe.u.data.length = strlen(buf);
769 current_ev = iwe_stream_add_point(info, current_ev,
770 end_buf,
771 &iwe, buf);
772 kfree(buf);
773 break;
774 case WLAN_EID_SUPP_RATES:
775 case WLAN_EID_EXT_SUPP_RATES:
776 /* display all supported rates in readable format */
777 p = current_ev + iwe_stream_lcp_len(info);
778
779 memset(&iwe, 0, sizeof(iwe));
780 iwe.cmd = SIOCGIWRATE;
781 /* Those two flags are ignored... */
782 iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0;
783
784 for (i = 0; i < ie[1]; i++) {
785 iwe.u.bitrate.value =
786 ((ie[i + 2] & 0x7f) * 500000);
787 p = iwe_stream_add_value(info, current_ev, p,
788 end_buf, &iwe, IW_EV_PARAM_LEN);
789 }
790 current_ev = p;
791 break;
792 }
793 rem -= ie[1] + 2;
794 ie += ie[1] + 2;
795 }
796
797 if (bss->pub.capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS)
798 || ismesh) {
799 memset(&iwe, 0, sizeof(iwe));
800 iwe.cmd = SIOCGIWMODE;
801 if (ismesh)
802 iwe.u.mode = IW_MODE_MESH;
803 else if (bss->pub.capability & WLAN_CAPABILITY_ESS)
804 iwe.u.mode = IW_MODE_MASTER;
805 else
806 iwe.u.mode = IW_MODE_ADHOC;
807 current_ev = iwe_stream_add_event(info, current_ev, end_buf,
808 &iwe, IW_EV_UINT_LEN);
809 }
810
811 buf = kmalloc(30, GFP_ATOMIC);
812 if (buf) {
813 memset(&iwe, 0, sizeof(iwe));
814 iwe.cmd = IWEVCUSTOM;
815 sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->pub.tsf));
816 iwe.u.data.length = strlen(buf);
817 current_ev = iwe_stream_add_point(info, current_ev, end_buf,
818 &iwe, buf);
819 memset(&iwe, 0, sizeof(iwe));
820 iwe.cmd = IWEVCUSTOM;
821 sprintf(buf, " Last beacon: %ums ago",
822 elapsed_jiffies_msecs(bss->ts));
823 iwe.u.data.length = strlen(buf);
824 current_ev = iwe_stream_add_point(info, current_ev,
825 end_buf, &iwe, buf);
826 kfree(buf);
827 }
828
829 ieee80211_scan_add_ies(info, &bss->pub, &current_ev, end_buf);
830
831 return current_ev;
832}
833
834
835static int ieee80211_scan_results(struct cfg80211_registered_device *dev,
836 struct iw_request_info *info,
837 char *buf, size_t len)
838{
839 char *current_ev = buf;
840 char *end_buf = buf + len;
841 struct cfg80211_internal_bss *bss;
842
843 spin_lock_bh(&dev->bss_lock);
844 cfg80211_bss_expire(dev);
845
846 list_for_each_entry(bss, &dev->bss_list, list) {
847 if (buf + len - current_ev <= IW_EV_ADDR_LEN) {
848 spin_unlock_bh(&dev->bss_lock);
849 return -E2BIG;
850 }
851 current_ev = ieee80211_bss(&dev->wiphy, info, bss,
852 current_ev, end_buf);
853 }
854 spin_unlock_bh(&dev->bss_lock);
855 return current_ev - buf;
856}
857
858
859int cfg80211_wext_giwscan(struct net_device *dev,
860 struct iw_request_info *info,
861 struct iw_point *data, char *extra)
862{
863 struct cfg80211_registered_device *rdev;
864 int res;
865
866 if (!netif_running(dev))
867 return -ENETDOWN;
868
869 rdev = cfg80211_get_dev_from_ifindex(dev->ifindex);
870
871 if (IS_ERR(rdev))
872 return PTR_ERR(rdev);
873
874 if (rdev->scan_req) {
875 res = -EAGAIN;
876 goto out;
877 }
878
879 res = ieee80211_scan_results(rdev, info, extra, data->length);
880 data->length = 0;
881 if (res >= 0) {
882 data->length = res;
883 res = 0;
884 }
885
886 out:
887 cfg80211_put_dev(rdev);
888 return res;
889}
890EXPORT_SYMBOL(cfg80211_wext_giwscan);
891#endif
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 79a382877641..efe3c5c92b2d 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -31,7 +31,7 @@ static ssize_t name ## _show(struct device *dev, \
31 return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \ 31 return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \
32} 32}
33 33
34SHOW_FMT(index, "%d", idx); 34SHOW_FMT(index, "%d", wiphy_idx);
35SHOW_FMT(macaddress, "%pM", wiphy.perm_addr); 35SHOW_FMT(macaddress, "%pM", wiphy.perm_addr);
36 36
37static struct device_attribute ieee80211_dev_attrs[] = { 37static struct device_attribute ieee80211_dev_attrs[] = {
@@ -55,6 +55,41 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env)
55} 55}
56#endif 56#endif
57 57
58static int wiphy_suspend(struct device *dev, pm_message_t state)
59{
60 struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
61 int ret = 0;
62
63 rdev->suspend_at = get_seconds();
64
65 if (rdev->ops->suspend) {
66 rtnl_lock();
67 ret = rdev->ops->suspend(&rdev->wiphy);
68 rtnl_unlock();
69 }
70
71 return ret;
72}
73
74static int wiphy_resume(struct device *dev)
75{
76 struct cfg80211_registered_device *rdev = dev_to_rdev(dev);
77 int ret = 0;
78
79 /* Age scan results with time spent in suspend */
80 spin_lock_bh(&rdev->bss_lock);
81 cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at);
82 spin_unlock_bh(&rdev->bss_lock);
83
84 if (rdev->ops->resume) {
85 rtnl_lock();
86 ret = rdev->ops->resume(&rdev->wiphy);
87 rtnl_unlock();
88 }
89
90 return ret;
91}
92
58struct class ieee80211_class = { 93struct class ieee80211_class = {
59 .name = "ieee80211", 94 .name = "ieee80211",
60 .owner = THIS_MODULE, 95 .owner = THIS_MODULE,
@@ -63,6 +98,8 @@ struct class ieee80211_class = {
63#ifdef CONFIG_HOTPLUG 98#ifdef CONFIG_HOTPLUG
64 .dev_uevent = wiphy_uevent, 99 .dev_uevent = wiphy_uevent,
65#endif 100#endif
101 .suspend = wiphy_suspend,
102 .resume = wiphy_resume,
66}; 103};
67 104
68int wiphy_sysfs_init(void) 105int wiphy_sysfs_init(void)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index e76cc28b0345..487cdd9bcffc 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -9,7 +9,7 @@
9 9
10struct ieee80211_rate * 10struct ieee80211_rate *
11ieee80211_get_response_rate(struct ieee80211_supported_band *sband, 11ieee80211_get_response_rate(struct ieee80211_supported_band *sband,
12 u64 basic_rates, int bitrate) 12 u32 basic_rates, int bitrate)
13{ 13{
14 struct ieee80211_rate *result = &sband->bitrates[0]; 14 struct ieee80211_rate *result = &sband->bitrates[0];
15 int i; 15 int i;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 58e489fd4aed..0fd1db6e95bb 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -66,6 +66,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
66 struct cfg80211_registered_device *rdev; 66 struct cfg80211_registered_device *rdev;
67 struct vif_params vifparams; 67 struct vif_params vifparams;
68 enum nl80211_iftype type; 68 enum nl80211_iftype type;
69 int ret;
69 70
70 if (!wdev) 71 if (!wdev)
71 return -EOPNOTSUPP; 72 return -EOPNOTSUPP;
@@ -96,10 +97,16 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
96 return -EINVAL; 97 return -EINVAL;
97 } 98 }
98 99
100 if (type == wdev->iftype)
101 return 0;
102
99 memset(&vifparams, 0, sizeof(vifparams)); 103 memset(&vifparams, 0, sizeof(vifparams));
100 104
101 return rdev->ops->change_virtual_intf(wdev->wiphy, dev->ifindex, type, 105 ret = rdev->ops->change_virtual_intf(wdev->wiphy, dev->ifindex, type,
102 NULL, &vifparams); 106 NULL, &vifparams);
107 WARN_ON(!ret && wdev->iftype != type);
108
109 return ret;
103} 110}
104EXPORT_SYMBOL(cfg80211_wext_siwmode); 111EXPORT_SYMBOL(cfg80211_wext_siwmode);
105 112
@@ -137,3 +144,100 @@ int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info,
137 return 0; 144 return 0;
138} 145}
139EXPORT_SYMBOL(cfg80211_wext_giwmode); 146EXPORT_SYMBOL(cfg80211_wext_giwmode);
147
148
149int cfg80211_wext_giwrange(struct net_device *dev,
150 struct iw_request_info *info,
151 struct iw_point *data, char *extra)
152{
153 struct wireless_dev *wdev = dev->ieee80211_ptr;
154 struct iw_range *range = (struct iw_range *) extra;
155 enum ieee80211_band band;
156 int c = 0;
157
158 if (!wdev)
159 return -EOPNOTSUPP;
160
161 data->length = sizeof(struct iw_range);
162 memset(range, 0, sizeof(struct iw_range));
163
164 range->we_version_compiled = WIRELESS_EXT;
165 range->we_version_source = 21;
166 range->retry_capa = IW_RETRY_LIMIT;
167 range->retry_flags = IW_RETRY_LIMIT;
168 range->min_retry = 0;
169 range->max_retry = 255;
170 range->min_rts = 0;
171 range->max_rts = 2347;
172 range->min_frag = 256;
173 range->max_frag = 2346;
174
175 range->encoding_size[0] = 5;
176 range->encoding_size[1] = 13;
177 range->num_encoding_sizes = 2;
178 range->max_encoding_tokens = 4;
179
180 range->max_qual.updated = IW_QUAL_NOISE_INVALID;
181
182 switch (wdev->wiphy->signal_type) {
183 case CFG80211_SIGNAL_TYPE_NONE:
184 break;
185 case CFG80211_SIGNAL_TYPE_MBM:
186 range->max_qual.level = -110;
187 range->max_qual.qual = 70;
188 range->avg_qual.qual = 35;
189 range->max_qual.updated |= IW_QUAL_DBM;
190 range->max_qual.updated |= IW_QUAL_QUAL_UPDATED;
191 range->max_qual.updated |= IW_QUAL_LEVEL_UPDATED;
192 break;
193 case CFG80211_SIGNAL_TYPE_UNSPEC:
194 range->max_qual.level = 100;
195 range->max_qual.qual = 100;
196 range->avg_qual.qual = 50;
197 range->max_qual.updated |= IW_QUAL_QUAL_UPDATED;
198 range->max_qual.updated |= IW_QUAL_LEVEL_UPDATED;
199 break;
200 }
201
202 range->avg_qual.level = range->max_qual.level / 2;
203 range->avg_qual.noise = range->max_qual.noise / 2;
204 range->avg_qual.updated = range->max_qual.updated;
205
206 range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 |
207 IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP;
208
209
210 for (band = 0; band < IEEE80211_NUM_BANDS; band ++) {
211 int i;
212 struct ieee80211_supported_band *sband;
213
214 sband = wdev->wiphy->bands[band];
215
216 if (!sband)
217 continue;
218
219 for (i = 0; i < sband->n_channels && c < IW_MAX_FREQUENCIES; i++) {
220 struct ieee80211_channel *chan = &sband->channels[i];
221
222 if (!(chan->flags & IEEE80211_CHAN_DISABLED)) {
223 range->freq[c].i =
224 ieee80211_frequency_to_channel(
225 chan->center_freq);
226 range->freq[c].m = chan->center_freq;
227 range->freq[c].e = 6;
228 c++;
229 }
230 }
231 }
232 range->num_channels = c;
233 range->num_frequency = c;
234
235 IW_EVENT_CAPA_SET_KERNEL(range->event_capa);
236 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP);
237 IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN);
238
239 range->scan_capa |= IW_SCAN_CAPA_ESSID;
240
241 return 0;
242}
243EXPORT_SYMBOL(cfg80211_wext_giwrange);
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 9fc5b023d111..ed80af8ca5fb 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -951,10 +951,8 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
951 /* 951 /*
952 * Incoming Call User Data. 952 * Incoming Call User Data.
953 */ 953 */
954 if (skb->len >= 0) { 954 skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len);
955 skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len); 955 makex25->calluserdata.cudlength = skb->len;
956 makex25->calluserdata.cudlength = skb->len;
957 }
958 956
959 sk->sk_ack_backlog++; 957 sk->sk_ack_backlog++;
960 958
@@ -1037,6 +1035,12 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
1037 sx25.sx25_addr = x25->dest_addr; 1035 sx25.sx25_addr = x25->dest_addr;
1038 } 1036 }
1039 1037
1038 /* Sanity check the packet size */
1039 if (len > 65535) {
1040 rc = -EMSGSIZE;
1041 goto out;
1042 }
1043
1040 SOCK_DEBUG(sk, "x25_sendmsg: sendto: Addresses built.\n"); 1044 SOCK_DEBUG(sk, "x25_sendmsg: sendto: Addresses built.\n");
1041 1045
1042 /* Build a packet */ 1046 /* Build a packet */
@@ -1122,8 +1126,9 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
1122 if (msg->msg_flags & MSG_OOB) 1126 if (msg->msg_flags & MSG_OOB)
1123 skb_queue_tail(&x25->interrupt_out_queue, skb); 1127 skb_queue_tail(&x25->interrupt_out_queue, skb);
1124 else { 1128 else {
1125 len = x25_output(sk, skb); 1129 rc = x25_output(sk, skb);
1126 if (len < 0) 1130 len = rc;
1131 if (rc < 0)
1127 kfree_skb(skb); 1132 kfree_skb(skb);
1128 else if (x25->qbitincl) 1133 else if (x25->qbitincl)
1129 len++; 1134 len++;
@@ -1608,8 +1613,8 @@ static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = {
1608 1613
1609SOCKOPS_WRAP(x25_proto, AF_X25); 1614SOCKOPS_WRAP(x25_proto, AF_X25);
1610 1615
1611static struct packet_type x25_packet_type = { 1616static struct packet_type x25_packet_type __read_mostly = {
1612 .type = __constant_htons(ETH_P_X25), 1617 .type = cpu_to_be16(ETH_P_X25),
1613 .func = x25_lapb_receive_frame, 1618 .func = x25_lapb_receive_frame,
1614}; 1619};
1615 1620
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index e25ff62ab2a6..82271720d970 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -748,12 +748,51 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
748 schedule_work(&net->xfrm.state_hash_work); 748 schedule_work(&net->xfrm.state_hash_work);
749} 749}
750 750
751static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
752 struct flowi *fl, unsigned short family,
753 xfrm_address_t *daddr, xfrm_address_t *saddr,
754 struct xfrm_state **best, int *acq_in_progress,
755 int *error)
756{
757 /* Resolution logic:
758 * 1. There is a valid state with matching selector. Done.
759 * 2. Valid state with inappropriate selector. Skip.
760 *
761 * Entering area of "sysdeps".
762 *
763 * 3. If state is not valid, selector is temporary, it selects
764 * only session which triggered previous resolution. Key
765 * manager will do something to install a state with proper
766 * selector.
767 */
768 if (x->km.state == XFRM_STATE_VALID) {
769 if ((x->sel.family &&
770 !xfrm_selector_match(&x->sel, fl, x->sel.family)) ||
771 !security_xfrm_state_pol_flow_match(x, pol, fl))
772 return;
773
774 if (!*best ||
775 (*best)->km.dying > x->km.dying ||
776 ((*best)->km.dying == x->km.dying &&
777 (*best)->curlft.add_time < x->curlft.add_time))
778 *best = x;
779 } else if (x->km.state == XFRM_STATE_ACQ) {
780 *acq_in_progress = 1;
781 } else if (x->km.state == XFRM_STATE_ERROR ||
782 x->km.state == XFRM_STATE_EXPIRED) {
783 if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
784 security_xfrm_state_pol_flow_match(x, pol, fl))
785 *error = -ESRCH;
786 }
787}
788
751struct xfrm_state * 789struct xfrm_state *
752xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 790xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
753 struct flowi *fl, struct xfrm_tmpl *tmpl, 791 struct flowi *fl, struct xfrm_tmpl *tmpl,
754 struct xfrm_policy *pol, int *err, 792 struct xfrm_policy *pol, int *err,
755 unsigned short family) 793 unsigned short family)
756{ 794{
795 static xfrm_address_t saddr_wildcard = { };
757 struct net *net = xp_net(pol); 796 struct net *net = xp_net(pol);
758 unsigned int h; 797 unsigned int h;
759 struct hlist_node *entry; 798 struct hlist_node *entry;
@@ -773,40 +812,27 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
773 xfrm_state_addr_check(x, daddr, saddr, family) && 812 xfrm_state_addr_check(x, daddr, saddr, family) &&
774 tmpl->mode == x->props.mode && 813 tmpl->mode == x->props.mode &&
775 tmpl->id.proto == x->id.proto && 814 tmpl->id.proto == x->id.proto &&
776 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) { 815 (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
777 /* Resolution logic: 816 xfrm_state_look_at(pol, x, fl, family, daddr, saddr,
778 1. There is a valid state with matching selector. 817 &best, &acquire_in_progress, &error);
779 Done.
780 2. Valid state with inappropriate selector. Skip.
781
782 Entering area of "sysdeps".
783
784 3. If state is not valid, selector is temporary,
785 it selects only session which triggered
786 previous resolution. Key manager will do
787 something to install a state with proper
788 selector.
789 */
790 if (x->km.state == XFRM_STATE_VALID) {
791 if ((x->sel.family && !xfrm_selector_match(&x->sel, fl, x->sel.family)) ||
792 !security_xfrm_state_pol_flow_match(x, pol, fl))
793 continue;
794 if (!best ||
795 best->km.dying > x->km.dying ||
796 (best->km.dying == x->km.dying &&
797 best->curlft.add_time < x->curlft.add_time))
798 best = x;
799 } else if (x->km.state == XFRM_STATE_ACQ) {
800 acquire_in_progress = 1;
801 } else if (x->km.state == XFRM_STATE_ERROR ||
802 x->km.state == XFRM_STATE_EXPIRED) {
803 if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
804 security_xfrm_state_pol_flow_match(x, pol, fl))
805 error = -ESRCH;
806 }
807 }
808 } 818 }
819 if (best)
820 goto found;
809 821
822 h = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family);
823 hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) {
824 if (x->props.family == family &&
825 x->props.reqid == tmpl->reqid &&
826 !(x->props.flags & XFRM_STATE_WILDRECV) &&
827 xfrm_state_addr_check(x, daddr, saddr, family) &&
828 tmpl->mode == x->props.mode &&
829 tmpl->id.proto == x->id.proto &&
830 (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
831 xfrm_state_look_at(pol, x, fl, family, daddr, saddr,
832 &best, &acquire_in_progress, &error);
833 }
834
835found:
810 x = best; 836 x = best;
811 if (!x && !error && !acquire_in_progress) { 837 if (!x && !error && !acquire_in_progress) {
812 if (tmpl->id.spi && 838 if (tmpl->id.spi &&
@@ -1589,7 +1615,7 @@ void xfrm_state_walk_done(struct xfrm_state_walk *walk)
1589 1615
1590 spin_lock_bh(&xfrm_state_lock); 1616 spin_lock_bh(&xfrm_state_lock);
1591 list_del(&walk->all); 1617 list_del(&walk->all);
1592 spin_lock_bh(&xfrm_state_lock); 1618 spin_unlock_bh(&xfrm_state_lock);
1593} 1619}
1594EXPORT_SYMBOL(xfrm_state_walk_done); 1620EXPORT_SYMBOL(xfrm_state_walk_done);
1595 1621