aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-23 14:47:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-23 14:47:02 -0400
commit5f05647dd81c11a6a165ccc8f0c1370b16f3bcb0 (patch)
tree7851ef1c93aa1aba7ef327ca4b75fd35e6d10f29 /net
parent02f36038c568111ad4fc433f6fa760ff5e38fab4 (diff)
parentec37a48d1d16c30b655ac5280209edf52a6775d4 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6: (1699 commits) bnx2/bnx2x: Unsupported Ethtool operations should return -EINVAL. vlan: Calling vlan_hwaccel_do_receive() is always valid. tproxy: use the interface primary IP address as a default value for --on-ip tproxy: added IPv6 support to the socket match cxgb3: function namespace cleanup tproxy: added IPv6 support to the TPROXY target tproxy: added IPv6 socket lookup function to nf_tproxy_core be2net: Changes to use only priority codes allowed by f/w tproxy: allow non-local binds of IPv6 sockets if IP_TRANSPARENT is enabled tproxy: added tproxy sockopt interface in the IPV6 layer tproxy: added udp6_lib_lookup function tproxy: added const specifiers to udp lookup functions tproxy: split off ipv6 defragmentation to a separate module l2tp: small cleanup nf_nat: restrict ICMP translation for embedded header can: mcp251x: fix generation of error frames can: mcp251x: fix endless loop in interrupt handler if CANINTF_MERRF is set can-raw: add msg_flags to distinguish local traffic 9p: client code cleanup rds: make local functions/variables static ... Fix up conflicts in net/core/dev.c, drivers/net/pcmcia/smc91c92_cs.c and drivers/net/wireless/ath/ath9k/debug.c as per David
Diffstat (limited to 'net')
-rw-r--r--net/802/fc.c2
-rw-r--r--net/802/fddi.c12
-rw-r--r--net/802/hippi.c2
-rw-r--r--net/802/tr.c2
-rw-r--r--net/8021q/vlan.c93
-rw-r--r--net/8021q/vlan.h17
-rw-r--r--net/8021q/vlan_core.c121
-rw-r--r--net/8021q/vlan_dev.c10
-rw-r--r--net/9p/client.c55
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/lec.c1
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/ax25/ax25_route.c4
-rw-r--r--net/bluetooth/af_bluetooth.c114
-rw-r--r--net/bluetooth/cmtp/core.c6
-rw-r--r--net/bluetooth/hci_core.c1
-rw-r--r--net/bluetooth/hci_sysfs.c21
-rw-r--r--net/bluetooth/hidp/core.c8
-rw-r--r--net/bluetooth/l2cap.c60
-rw-r--r--net/bluetooth/lib.c4
-rw-r--r--net/bluetooth/rfcomm/core.c43
-rw-r--r--net/bluetooth/rfcomm/sock.c104
-rw-r--r--net/bluetooth/rfcomm/tty.c4
-rw-r--r--net/bridge/br_device.c8
-rw-r--r--net/bridge/br_if.c29
-rw-r--r--net/bridge/br_input.c4
-rw-r--r--net/bridge/br_netfilter.c134
-rw-r--r--net/bridge/netfilter/ebt_vlan.c25
-rw-r--r--net/bridge/netfilter/ebtables.c15
-rw-r--r--net/caif/caif_dev.c24
-rw-r--r--net/caif/caif_socket.c27
-rw-r--r--net/caif/cfcnfg.c49
-rw-r--r--net/caif/cfctrl.c59
-rw-r--r--net/caif/cfdbgl.c4
-rw-r--r--net/caif/cfdgml.c11
-rw-r--r--net/caif/cffrml.c14
-rw-r--r--net/caif/cfmuxl.c14
-rw-r--r--net/caif/cfpkt_skbuff.c48
-rw-r--r--net/caif/cfrfml.c12
-rw-r--r--net/caif/cfserl.c4
-rw-r--r--net/caif/cfsrvl.c17
-rw-r--r--net/caif/cfutill.c12
-rw-r--r--net/caif/cfveil.c11
-rw-r--r--net/caif/cfvidl.c6
-rw-r--r--net/caif/chnl_net.c47
-rw-r--r--net/can/raw.c37
-rw-r--r--net/core/datagram.c5
-rw-r--r--net/core/dev.c597
-rw-r--r--net/core/dst.c39
-rw-r--r--net/core/ethtool.c91
-rw-r--r--net/core/fib_rules.c16
-rw-r--r--net/core/filter.c10
-rw-r--r--net/core/flow.c82
-rw-r--r--net/core/gen_estimator.c4
-rw-r--r--net/core/iovec.c6
-rw-r--r--net/core/neighbour.c486
-rw-r--r--net/core/net-sysfs.c39
-rw-r--r--net/core/net-sysfs.h4
-rw-r--r--net/core/netpoll.c6
-rw-r--r--net/core/pktgen.c12
-rw-r--r--net/core/rtnetlink.c39
-rw-r--r--net/core/skbuff.c105
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/utils.c15
-rw-r--r--net/dccp/ccid.h52
-rw-r--r--net/dccp/ccids/Kconfig31
-rw-r--r--net/dccp/ccids/ccid2.c289
-rw-r--r--net/dccp/ccids/ccid2.h35
-rw-r--r--net/dccp/ccids/ccid3.c256
-rw-r--r--net/dccp/ccids/ccid3.h51
-rw-r--r--net/dccp/ccids/lib/loss_interval.c2
-rw-r--r--net/dccp/ccids/lib/packet_history.c39
-rw-r--r--net/dccp/ccids/lib/packet_history.h22
-rw-r--r--net/dccp/ccids/lib/tfrc.h1
-rw-r--r--net/dccp/ccids/lib/tfrc_equation.c14
-rw-r--r--net/dccp/dccp.h46
-rw-r--r--net/dccp/feat.c10
-rw-r--r--net/dccp/feat.h1
-rw-r--r--net/dccp/input.c20
-rw-r--r--net/dccp/ipv4.c10
-rw-r--r--net/dccp/ipv6.c10
-rw-r--r--net/dccp/minisocks.c30
-rw-r--r--net/dccp/options.c31
-rw-r--r--net/dccp/output.c20
-rw-r--r--net/dccp/proto.c50
-rw-r--r--net/decnet/dn_neigh.c13
-rw-r--r--net/decnet/dn_nsp_out.c8
-rw-r--r--net/decnet/dn_route.c3
-rw-r--r--net/econet/af_econet.c6
-rw-r--r--net/ethernet/eth.c8
-rw-r--r--net/ipv4/Kconfig8
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/arp.c245
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c11
-rw-r--r--net/ipv4/fib_frontend.c192
-rw-r--r--net/ipv4/fib_hash.c291
-rw-r--r--net/ipv4/fib_lookup.h11
-rw-r--r--net/ipv4/fib_rules.c13
-rw-r--r--net/ipv4/fib_semantics.c297
-rw-r--r--net/ipv4/fib_trie.c84
-rw-r--r--net/ipv4/gre.c151
-rw-r--r--net/ipv4/icmp.c4
-rw-r--r--net/ipv4/igmp.c22
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/ipv4/inet_hashtables.c28
-rw-r--r--net/ipv4/ip_fragment.c6
-rw-r--r--net/ipv4/ip_gre.c237
-rw-r--r--net/ipv4/ip_options.c3
-rw-r--r--net/ipv4/ip_output.c24
-rw-r--r--net/ipv4/ipip.c212
-rw-r--r--net/ipv4/ipmr.c428
-rw-r--r--net/ipv4/netfilter/Kconfig4
-rw-r--r--net/ipv4/netfilter/arp_tables.c64
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c2
-rw-r--r--net/ipv4/netfilter/ip_tables.c84
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c31
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c145
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c51
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c53
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c76
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c9
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c17
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c27
-rw-r--r--net/ipv4/protocol.c31
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c190
-rw-r--r--net/ipv4/tcp.c11
-rw-r--r--net/ipv4/tcp_input.c55
-rw-r--r--net/ipv4/tcp_ipv4.c12
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_output.c31
-rw-r--r--net/ipv4/tcp_timer.c50
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/tunnel4.c19
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv4/xfrm4_policy.c4
-rw-r--r--net/ipv4/xfrm4_tunnel.c4
-rw-r--r--net/ipv6/addrconf.c8
-rw-r--r--net/ipv6/addrlabel.c5
-rw-r--r--net/ipv6/af_inet6.c9
-rw-r--r--net/ipv6/datagram.c19
-rw-r--r--net/ipv6/exthdrs_core.c4
-rw-r--r--net/ipv6/fib6_rules.c3
-rw-r--r--net/ipv6/ip6_fib.c9
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/ip6_tunnel.c157
-rw-r--r--net/ipv6/ip6mr.c1
-rw-r--r--net/ipv6/ipv6_sockglue.c23
-rw-r--r--net/ipv6/ndisc.c36
-rw-r--r--net/ipv6/netfilter/Kconfig4
-rw-r--r--net/ipv6/netfilter/Makefile5
-rw-r--r--net/ipv6/netfilter/ip6_tables.c98
-rw-r--r--net/ipv6/netfilter/ip6t_LOG.c157
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c78
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c16
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c131
-rw-r--r--net/ipv6/protocol.c32
-rw-r--r--net/ipv6/raw.c12
-rw-r--r--net/ipv6/reassembly.c2
-rw-r--r--net/ipv6/route.c50
-rw-r--r--net/ipv6/sit.c165
-rw-r--r--net/ipv6/tcp_ipv6.c14
-rw-r--r--net/ipv6/tunnel6.c17
-rw-r--r--net/ipv6/udp.c16
-rw-r--r--net/ipv6/xfrm6_policy.c10
-rw-r--r--net/ipv6/xfrm6_tunnel.c8
-rw-r--r--net/irda/af_irda.c380
-rw-r--r--net/irda/discovery.c2
-rw-r--r--net/irda/ircomm/ircomm_tty.c4
-rw-r--r--net/irda/iriap.c3
-rw-r--r--net/irda/irlan/irlan_eth.c32
-rw-r--r--net/irda/irlan/irlan_event.c2
-rw-r--r--net/irda/irlmp.c2
-rw-r--r--net/irda/irlmp_frame.c2
-rw-r--r--net/irda/irnet/irnet.h2
-rw-r--r--net/irda/irnet/irnet_irda.c22
-rw-r--r--net/irda/irnet/irnet_ppp.c69
-rw-r--r--net/irda/irnet/irnet_ppp.h3
-rw-r--r--net/irda/parameters.c4
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/l2tp/l2tp_eth.c1
-rw-r--r--net/l2tp/l2tp_ip.c4
-rw-r--r--net/l2tp/l2tp_ppp.c2
-rw-r--r--net/mac80211/aes_ccm.c6
-rw-r--r--net/mac80211/aes_cmac.c6
-rw-r--r--net/mac80211/agg-rx.c30
-rw-r--r--net/mac80211/agg-tx.c14
-rw-r--r--net/mac80211/cfg.c244
-rw-r--r--net/mac80211/chan.c2
-rw-r--r--net/mac80211/debugfs.c7
-rw-r--r--net/mac80211/debugfs_key.c55
-rw-r--r--net/mac80211/debugfs_netdev.c3
-rw-r--r--net/mac80211/debugfs_sta.c5
-rw-r--r--net/mac80211/driver-ops.h14
-rw-r--r--net/mac80211/driver-trace.h42
-rw-r--r--net/mac80211/ht.c47
-rw-r--r--net/mac80211/ibss.c77
-rw-r--r--net/mac80211/ieee80211_i.h133
-rw-r--r--net/mac80211/iface.c460
-rw-r--r--net/mac80211/key.c168
-rw-r--r--net/mac80211/key.h13
-rw-r--r--net/mac80211/main.c194
-rw-r--r--net/mac80211/mesh_plink.c17
-rw-r--r--net/mac80211/mlme.c173
-rw-r--r--net/mac80211/offchannel.c26
-rw-r--r--net/mac80211/pm.c2
-rw-r--r--net/mac80211/rate.c11
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c7
-rw-r--r--net/mac80211/rc80211_pid_debugfs.c2
-rw-r--r--net/mac80211/rx.c819
-rw-r--r--net/mac80211/scan.c179
-rw-r--r--net/mac80211/sta_info.c52
-rw-r--r--net/mac80211/sta_info.h24
-rw-r--r--net/mac80211/status.c14
-rw-r--r--net/mac80211/tx.c73
-rw-r--r--net/mac80211/util.c102
-rw-r--r--net/mac80211/wep.c10
-rw-r--r--net/mac80211/work.c39
-rw-r--r--net/mac80211/wpa.c34
-rw-r--r--net/netfilter/core.c6
-rw-r--r--net/netfilter/ipvs/Kconfig20
-rw-r--r--net/netfilter/ipvs/Makefile10
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c6
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c286
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c819
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c392
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c194
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c292
-rw-r--r--net/netfilter/ipvs/ip_vs_pe.c147
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c169
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c99
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c27
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c52
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c51
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c47
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c46
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c696
-rw-r--r--net/netfilter/nf_conntrack_core.c131
-rw-r--r--net/netfilter/nf_conntrack_expect.c68
-rw-r--r--net/netfilter/nf_conntrack_netlink.c77
-rw-r--r--net/netfilter/nf_conntrack_sip.c42
-rw-r--r--net/netfilter/nf_tproxy_core.c35
-rw-r--r--net/netfilter/x_tables.c12
-rw-r--r--net/netfilter/xt_TPROXY.c366
-rw-r--r--net/netfilter/xt_hashlimit.c15
-rw-r--r--net/netfilter/xt_ipvs.c1
-rw-r--r--net/netfilter/xt_socket.c167
-rw-r--r--net/netlink/genetlink.c14
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/phonet/Kconfig12
-rw-r--r--net/phonet/af_phonet.c17
-rw-r--r--net/phonet/datagram.c13
-rw-r--r--net/phonet/pep.c388
-rw-r--r--net/phonet/pn_dev.c5
-rw-r--r--net/phonet/socket.c289
-rw-r--r--net/rds/af_rds.c26
-rw-r--r--net/rds/bind.c82
-rw-r--r--net/rds/cong.c8
-rw-r--r--net/rds/connection.c159
-rw-r--r--net/rds/ib.c200
-rw-r--r--net/rds/ib.h104
-rw-r--r--net/rds/ib_cm.c184
-rw-r--r--net/rds/ib_rdma.c318
-rw-r--r--net/rds/ib_recv.c549
-rw-r--r--net/rds/ib_send.c682
-rw-r--r--net/rds/ib_stats.c2
-rw-r--r--net/rds/ib_sysctl.c19
-rw-r--r--net/rds/info.c12
-rw-r--r--net/rds/iw.c8
-rw-r--r--net/rds/iw.h15
-rw-r--r--net/rds/iw_cm.c14
-rw-r--r--net/rds/iw_rdma.c8
-rw-r--r--net/rds/iw_recv.c24
-rw-r--r--net/rds/iw_send.c93
-rw-r--r--net/rds/iw_sysctl.c6
-rw-r--r--net/rds/loop.c31
-rw-r--r--net/rds/message.c142
-rw-r--r--net/rds/page.c8
-rw-r--r--net/rds/rdma.c339
-rw-r--r--net/rds/rdma.h85
-rw-r--r--net/rds/rdma_transport.c44
-rw-r--r--net/rds/rdma_transport.h4
-rw-r--r--net/rds/rds.h192
-rw-r--r--net/rds/recv.c12
-rw-r--r--net/rds/send.c548
-rw-r--r--net/rds/stats.c6
-rw-r--r--net/rds/sysctl.c4
-rw-r--r--net/rds/tcp.c12
-rw-r--r--net/rds/tcp.h9
-rw-r--r--net/rds/tcp_connect.c2
-rw-r--r--net/rds/tcp_listen.c6
-rw-r--r--net/rds/tcp_recv.c17
-rw-r--r--net/rds/tcp_send.c68
-rw-r--r--net/rds/threads.c69
-rw-r--r--net/rds/transport.c19
-rw-r--r--net/rds/xlist.h80
-rw-r--r--net/rfkill/input.c2
-rw-r--r--net/rose/rose_link.c4
-rw-r--r--net/sched/Kconfig10
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_csum.c595
-rw-r--r--net/sched/act_ipt.c14
-rw-r--r--net/sched/cls_flow.c74
-rw-r--r--net/sched/em_meta.c6
-rw-r--r--net/sched/sch_api.c44
-rw-r--r--net/sched/sch_atm.c5
-rw-r--r--net/sched/sch_cbq.c12
-rw-r--r--net/sched/sch_drr.c4
-rw-r--r--net/sched/sch_dsmark.c6
-rw-r--r--net/sched/sch_fifo.c3
-rw-r--r--net/sched/sch_generic.c24
-rw-r--r--net/sched/sch_hfsc.c8
-rw-r--r--net/sched/sch_htb.c12
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_multiq.c3
-rw-r--r--net/sched/sch_netem.c3
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_sfq.c33
-rw-r--r--net/sched/sch_teql.c8
-rw-r--r--net/sctp/associola.c2
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/inqueue.c2
-rw-r--r--net/sctp/ipv6.c4
-rw-r--r--net/sctp/objcnt.c5
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c34
-rw-r--r--net/sctp/probe.c4
-rw-r--r--net/sctp/protocol.c19
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_sideeffect.c21
-rw-r--r--net/sctp/sm_statefuns.c20
-rw-r--r--net/sctp/sm_statetable.c42
-rw-r--r--net/sctp/socket.c85
-rw-r--r--net/sctp/transport.c9
-rw-r--r--net/socket.c37
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c2
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c44
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c2
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c2
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/tipc/addr.c7
-rw-r--r--net/tipc/bcast.c51
-rw-r--r--net/tipc/bcast.h3
-rw-r--r--net/tipc/bearer.c42
-rw-r--r--net/tipc/cluster.c21
-rw-r--r--net/tipc/cluster.h2
-rw-r--r--net/tipc/config.c148
-rw-r--r--net/tipc/config.h6
-rw-r--r--net/tipc/core.c38
-rw-r--r--net/tipc/core.h9
-rw-r--r--net/tipc/dbg.c17
-rw-r--r--net/tipc/dbg.h3
-rw-r--r--net/tipc/discover.c44
-rw-r--r--net/tipc/discover.h5
-rw-r--r--net/tipc/eth_media.c48
-rw-r--r--net/tipc/link.c188
-rw-r--r--net/tipc/link.h24
-rw-r--r--net/tipc/msg.c2
-rw-r--r--net/tipc/msg.h6
-rw-r--r--net/tipc/name_distr.c2
-rw-r--r--net/tipc/name_table.c67
-rw-r--r--net/tipc/net.c10
-rw-r--r--net/tipc/node.c73
-rw-r--r--net/tipc/node.h3
-rw-r--r--net/tipc/port.c295
-rw-r--r--net/tipc/port.h4
-rw-r--r--net/tipc/ref.c17
-rw-r--r--net/tipc/ref.h1
-rw-r--r--net/tipc/socket.c83
-rw-r--r--net/tipc/subscr.c77
-rw-r--r--net/tipc/subscr.h2
-rw-r--r--net/tipc/zone.c11
-rw-r--r--net/tipc/zone.h1
-rw-r--r--net/unix/af_unix.c10
-rw-r--r--net/wireless/core.c66
-rw-r--r--net/wireless/core.h34
-rw-r--r--net/wireless/ibss.c21
-rw-r--r--net/wireless/mlme.c225
-rw-r--r--net/wireless/nl80211.c2189
-rw-r--r--net/wireless/nl80211.h14
-rw-r--r--net/wireless/radiotap.c61
-rw-r--r--net/wireless/reg.c22
-rw-r--r--net/wireless/scan.c12
-rw-r--r--net/wireless/sme.c11
-rw-r--r--net/wireless/sysfs.c18
-rw-r--r--net/wireless/util.c40
-rw-r--r--net/wireless/wext-compat.c42
-rw-r--r--net/wireless/wext-core.c2
-rw-r--r--net/wireless/wext-sme.c2
-rw-r--r--net/x25/af_x25.c34
-rw-r--r--net/xfrm/xfrm_policy.c7
398 files changed, 15632 insertions, 10876 deletions
diff --git a/net/802/fc.c b/net/802/fc.c
index 34cf1ee014b8..1e49f2d4ea96 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -70,7 +70,7 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev,
70 if(daddr) 70 if(daddr)
71 { 71 {
72 memcpy(fch->daddr,daddr,dev->addr_len); 72 memcpy(fch->daddr,daddr,dev->addr_len);
73 return(hdr_len); 73 return hdr_len;
74 } 74 }
75 return -hdr_len; 75 return -hdr_len;
76} 76}
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 3ef0ab0a543a..94b3ad08f39a 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -82,10 +82,10 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev,
82 if (daddr != NULL) 82 if (daddr != NULL)
83 { 83 {
84 memcpy(fddi->daddr, daddr, dev->addr_len); 84 memcpy(fddi->daddr, daddr, dev->addr_len);
85 return(hl); 85 return hl;
86 } 86 }
87 87
88 return(-hl); 88 return -hl;
89} 89}
90 90
91 91
@@ -108,7 +108,7 @@ static int fddi_rebuild_header(struct sk_buff *skb)
108 { 108 {
109 printk("%s: Don't know how to resolve type %04X addresses.\n", 109 printk("%s: Don't know how to resolve type %04X addresses.\n",
110 skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype)); 110 skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype));
111 return(0); 111 return 0;
112 } 112 }
113} 113}
114 114
@@ -162,7 +162,7 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
162 162
163 /* Assume 802.2 SNAP frames, for now */ 163 /* Assume 802.2 SNAP frames, for now */
164 164
165 return(type); 165 return type;
166} 166}
167 167
168EXPORT_SYMBOL(fddi_type_trans); 168EXPORT_SYMBOL(fddi_type_trans);
@@ -170,9 +170,9 @@ EXPORT_SYMBOL(fddi_type_trans);
170int fddi_change_mtu(struct net_device *dev, int new_mtu) 170int fddi_change_mtu(struct net_device *dev, int new_mtu)
171{ 171{
172 if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN)) 172 if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
173 return(-EINVAL); 173 return -EINVAL;
174 dev->mtu = new_mtu; 174 dev->mtu = new_mtu;
175 return(0); 175 return 0;
176} 176}
177EXPORT_SYMBOL(fddi_change_mtu); 177EXPORT_SYMBOL(fddi_change_mtu);
178 178
diff --git a/net/802/hippi.c b/net/802/hippi.c
index cd3e8e929529..91aca8780fd0 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -152,7 +152,7 @@ int hippi_change_mtu(struct net_device *dev, int new_mtu)
152 if ((new_mtu < 68) || (new_mtu > 65280)) 152 if ((new_mtu < 68) || (new_mtu > 65280))
153 return -EINVAL; 153 return -EINVAL;
154 dev->mtu = new_mtu; 154 dev->mtu = new_mtu;
155 return(0); 155 return 0;
156} 156}
157EXPORT_SYMBOL(hippi_change_mtu); 157EXPORT_SYMBOL(hippi_change_mtu);
158 158
diff --git a/net/802/tr.c b/net/802/tr.c
index 1c6e596074df..5e20cf8a074b 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -145,7 +145,7 @@ static int tr_header(struct sk_buff *skb, struct net_device *dev,
145 { 145 {
146 memcpy(trh->daddr,daddr,dev->addr_len); 146 memcpy(trh->daddr,daddr,dev->addr_len);
147 tr_source_route(skb, trh, dev); 147 tr_source_route(skb, trh, dev);
148 return(hdr_len); 148 return hdr_len;
149 } 149 }
150 150
151 return -hdr_len; 151 return -hdr_len;
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index a2ad15250575..05b867e43757 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -44,9 +44,6 @@
44 44
45int vlan_net_id __read_mostly; 45int vlan_net_id __read_mostly;
46 46
47/* Our listing of VLAN group(s) */
48static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE];
49
50const char vlan_fullname[] = "802.1Q VLAN Support"; 47const char vlan_fullname[] = "802.1Q VLAN Support";
51const char vlan_version[] = DRV_VERSION; 48const char vlan_version[] = DRV_VERSION;
52static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>"; 49static const char vlan_copyright[] = "Ben Greear <greearb@candelatech.com>";
@@ -59,40 +56,6 @@ static struct packet_type vlan_packet_type __read_mostly = {
59 56
60/* End of global variables definitions. */ 57/* End of global variables definitions. */
61 58
62static inline unsigned int vlan_grp_hashfn(unsigned int idx)
63{
64 return ((idx >> VLAN_GRP_HASH_SHIFT) ^ idx) & VLAN_GRP_HASH_MASK;
65}
66
67/* Must be invoked with RCU read lock (no preempt) */
68static struct vlan_group *__vlan_find_group(struct net_device *real_dev)
69{
70 struct vlan_group *grp;
71 struct hlist_node *n;
72 int hash = vlan_grp_hashfn(real_dev->ifindex);
73
74 hlist_for_each_entry_rcu(grp, n, &vlan_group_hash[hash], hlist) {
75 if (grp->real_dev == real_dev)
76 return grp;
77 }
78
79 return NULL;
80}
81
82/* Find the protocol handler. Assumes VID < VLAN_VID_MASK.
83 *
84 * Must be invoked with RCU read lock (no preempt)
85 */
86struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id)
87{
88 struct vlan_group *grp = __vlan_find_group(real_dev);
89
90 if (grp)
91 return vlan_group_get_device(grp, vlan_id);
92
93 return NULL;
94}
95
96static void vlan_group_free(struct vlan_group *grp) 59static void vlan_group_free(struct vlan_group *grp)
97{ 60{
98 int i; 61 int i;
@@ -111,8 +74,6 @@ static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
111 return NULL; 74 return NULL;
112 75
113 grp->real_dev = real_dev; 76 grp->real_dev = real_dev;
114 hlist_add_head_rcu(&grp->hlist,
115 &vlan_group_hash[vlan_grp_hashfn(real_dev->ifindex)]);
116 return grp; 77 return grp;
117} 78}
118 79
@@ -151,7 +112,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
151 112
152 ASSERT_RTNL(); 113 ASSERT_RTNL();
153 114
154 grp = __vlan_find_group(real_dev); 115 grp = real_dev->vlgrp;
155 BUG_ON(!grp); 116 BUG_ON(!grp);
156 117
157 /* Take it out of our own structures, but be sure to interlock with 118 /* Take it out of our own structures, but be sure to interlock with
@@ -173,11 +134,10 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
173 if (grp->nr_vlans == 0) { 134 if (grp->nr_vlans == 0) {
174 vlan_gvrp_uninit_applicant(real_dev); 135 vlan_gvrp_uninit_applicant(real_dev);
175 136
176 if (real_dev->features & NETIF_F_HW_VLAN_RX) 137 rcu_assign_pointer(real_dev->vlgrp, NULL);
138 if (ops->ndo_vlan_rx_register)
177 ops->ndo_vlan_rx_register(real_dev, NULL); 139 ops->ndo_vlan_rx_register(real_dev, NULL);
178 140
179 hlist_del_rcu(&grp->hlist);
180
181 /* Free the group, after all cpu's are done. */ 141 /* Free the group, after all cpu's are done. */
182 call_rcu(&grp->rcu, vlan_rcu_free); 142 call_rcu(&grp->rcu, vlan_rcu_free);
183 } 143 }
@@ -196,18 +156,13 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
196 return -EOPNOTSUPP; 156 return -EOPNOTSUPP;
197 } 157 }
198 158
199 if ((real_dev->features & NETIF_F_HW_VLAN_RX) && !ops->ndo_vlan_rx_register) {
200 pr_info("8021q: device %s has buggy VLAN hw accel\n", name);
201 return -EOPNOTSUPP;
202 }
203
204 if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) && 159 if ((real_dev->features & NETIF_F_HW_VLAN_FILTER) &&
205 (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) { 160 (!ops->ndo_vlan_rx_add_vid || !ops->ndo_vlan_rx_kill_vid)) {
206 pr_info("8021q: Device %s has buggy VLAN hw accel\n", name); 161 pr_info("8021q: Device %s has buggy VLAN hw accel\n", name);
207 return -EOPNOTSUPP; 162 return -EOPNOTSUPP;
208 } 163 }
209 164
210 if (__find_vlan_dev(real_dev, vlan_id) != NULL) 165 if (vlan_find_dev(real_dev, vlan_id) != NULL)
211 return -EEXIST; 166 return -EEXIST;
212 167
213 return 0; 168 return 0;
@@ -222,7 +177,7 @@ int register_vlan_dev(struct net_device *dev)
222 struct vlan_group *grp, *ngrp = NULL; 177 struct vlan_group *grp, *ngrp = NULL;
223 int err; 178 int err;
224 179
225 grp = __vlan_find_group(real_dev); 180 grp = real_dev->vlgrp;
226 if (!grp) { 181 if (!grp) {
227 ngrp = grp = vlan_group_alloc(real_dev); 182 ngrp = grp = vlan_group_alloc(real_dev);
228 if (!grp) 183 if (!grp)
@@ -252,8 +207,11 @@ int register_vlan_dev(struct net_device *dev)
252 vlan_group_set_device(grp, vlan_id, dev); 207 vlan_group_set_device(grp, vlan_id, dev);
253 grp->nr_vlans++; 208 grp->nr_vlans++;
254 209
255 if (ngrp && real_dev->features & NETIF_F_HW_VLAN_RX) 210 if (ngrp) {
256 ops->ndo_vlan_rx_register(real_dev, ngrp); 211 if (ops->ndo_vlan_rx_register)
212 ops->ndo_vlan_rx_register(real_dev, ngrp);
213 rcu_assign_pointer(real_dev->vlgrp, ngrp);
214 }
257 if (real_dev->features & NETIF_F_HW_VLAN_FILTER) 215 if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
258 ops->ndo_vlan_rx_add_vid(real_dev, vlan_id); 216 ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
259 217
@@ -264,7 +222,6 @@ out_uninit_applicant:
264 vlan_gvrp_uninit_applicant(real_dev); 222 vlan_gvrp_uninit_applicant(real_dev);
265out_free_group: 223out_free_group:
266 if (ngrp) { 224 if (ngrp) {
267 hlist_del_rcu(&ngrp->hlist);
268 /* Free the group, after all cpu's are done. */ 225 /* Free the group, after all cpu's are done. */
269 call_rcu(&ngrp->rcu, vlan_rcu_free); 226 call_rcu(&ngrp->rcu, vlan_rcu_free);
270 } 227 }
@@ -321,7 +278,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
321 if (new_dev == NULL) 278 if (new_dev == NULL)
322 return -ENOBUFS; 279 return -ENOBUFS;
323 280
324 new_dev->real_num_tx_queues = real_dev->real_num_tx_queues; 281 netif_copy_real_num_queues(new_dev, real_dev);
325 dev_net_set(new_dev, net); 282 dev_net_set(new_dev, net);
326 /* need 4 bytes for extra VLAN header info, 283 /* need 4 bytes for extra VLAN header info,
327 * hope the underlying device can handle it. 284 * hope the underlying device can handle it.
@@ -428,7 +385,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
428 dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0); 385 dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
429 } 386 }
430 387
431 grp = __vlan_find_group(dev); 388 grp = dev->vlgrp;
432 if (!grp) 389 if (!grp)
433 goto out; 390 goto out;
434 391
@@ -439,7 +396,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
439 switch (event) { 396 switch (event) {
440 case NETDEV_CHANGE: 397 case NETDEV_CHANGE:
441 /* Propagate real device state to vlan devices */ 398 /* Propagate real device state to vlan devices */
442 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 399 for (i = 0; i < VLAN_N_VID; i++) {
443 vlandev = vlan_group_get_device(grp, i); 400 vlandev = vlan_group_get_device(grp, i);
444 if (!vlandev) 401 if (!vlandev)
445 continue; 402 continue;
@@ -450,7 +407,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
450 407
451 case NETDEV_CHANGEADDR: 408 case NETDEV_CHANGEADDR:
452 /* Adjust unicast filters on underlying device */ 409 /* Adjust unicast filters on underlying device */
453 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 410 for (i = 0; i < VLAN_N_VID; i++) {
454 vlandev = vlan_group_get_device(grp, i); 411 vlandev = vlan_group_get_device(grp, i);
455 if (!vlandev) 412 if (!vlandev)
456 continue; 413 continue;
@@ -464,7 +421,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
464 break; 421 break;
465 422
466 case NETDEV_CHANGEMTU: 423 case NETDEV_CHANGEMTU:
467 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 424 for (i = 0; i < VLAN_N_VID; i++) {
468 vlandev = vlan_group_get_device(grp, i); 425 vlandev = vlan_group_get_device(grp, i);
469 if (!vlandev) 426 if (!vlandev)
470 continue; 427 continue;
@@ -478,7 +435,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
478 435
479 case NETDEV_FEAT_CHANGE: 436 case NETDEV_FEAT_CHANGE:
480 /* Propagate device features to underlying device */ 437 /* Propagate device features to underlying device */
481 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 438 for (i = 0; i < VLAN_N_VID; i++) {
482 vlandev = vlan_group_get_device(grp, i); 439 vlandev = vlan_group_get_device(grp, i);
483 if (!vlandev) 440 if (!vlandev)
484 continue; 441 continue;
@@ -490,7 +447,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
490 447
491 case NETDEV_DOWN: 448 case NETDEV_DOWN:
492 /* Put all VLANs for this dev in the down state too. */ 449 /* Put all VLANs for this dev in the down state too. */
493 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 450 for (i = 0; i < VLAN_N_VID; i++) {
494 vlandev = vlan_group_get_device(grp, i); 451 vlandev = vlan_group_get_device(grp, i);
495 if (!vlandev) 452 if (!vlandev)
496 continue; 453 continue;
@@ -508,7 +465,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
508 465
509 case NETDEV_UP: 466 case NETDEV_UP:
510 /* Put all VLANs for this dev in the up state too. */ 467 /* Put all VLANs for this dev in the up state too. */
511 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 468 for (i = 0; i < VLAN_N_VID; i++) {
512 vlandev = vlan_group_get_device(grp, i); 469 vlandev = vlan_group_get_device(grp, i);
513 if (!vlandev) 470 if (!vlandev)
514 continue; 471 continue;
@@ -525,10 +482,14 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
525 break; 482 break;
526 483
527 case NETDEV_UNREGISTER: 484 case NETDEV_UNREGISTER:
485 /* twiddle thumbs on netns device moves */
486 if (dev->reg_state != NETREG_UNREGISTERING)
487 break;
488
528 /* Delete all VLANs for this dev. */ 489 /* Delete all VLANs for this dev. */
529 grp->killall = 1; 490 grp->killall = 1;
530 491
531 for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { 492 for (i = 0; i < VLAN_N_VID; i++) {
532 vlandev = vlan_group_get_device(grp, i); 493 vlandev = vlan_group_get_device(grp, i);
533 if (!vlandev) 494 if (!vlandev)
534 continue; 495 continue;
@@ -536,7 +497,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
536 /* unregistration of last vlan destroys group, abort 497 /* unregistration of last vlan destroys group, abort
537 * afterwards */ 498 * afterwards */
538 if (grp->nr_vlans == 1) 499 if (grp->nr_vlans == 1)
539 i = VLAN_GROUP_ARRAY_LEN; 500 i = VLAN_N_VID;
540 501
541 unregister_vlan_dev(vlandev, &list); 502 unregister_vlan_dev(vlandev, &list);
542 } 503 }
@@ -742,8 +703,6 @@ err0:
742 703
743static void __exit vlan_cleanup_module(void) 704static void __exit vlan_cleanup_module(void)
744{ 705{
745 unsigned int i;
746
747 vlan_ioctl_set(NULL); 706 vlan_ioctl_set(NULL);
748 vlan_netlink_fini(); 707 vlan_netlink_fini();
749 708
@@ -751,10 +710,6 @@ static void __exit vlan_cleanup_module(void)
751 710
752 dev_remove_pack(&vlan_packet_type); 711 dev_remove_pack(&vlan_packet_type);
753 712
754 /* This table must be empty if there are no module references left. */
755 for (i = 0; i < VLAN_GRP_HASH_SIZE; i++)
756 BUG_ON(!hlist_empty(&vlan_group_hash[i]));
757
758 unregister_pernet_subsys(&vlan_net_ops); 713 unregister_pernet_subsys(&vlan_net_ops);
759 rcu_barrier(); /* Wait for completion of call_rcu()'s */ 714 rcu_barrier(); /* Wait for completion of call_rcu()'s */
760 715
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 8d9503ad01da..db01b3181fdc 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -72,23 +72,6 @@ static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
72 return netdev_priv(dev); 72 return netdev_priv(dev);
73} 73}
74 74
75#define VLAN_GRP_HASH_SHIFT 5
76#define VLAN_GRP_HASH_SIZE (1 << VLAN_GRP_HASH_SHIFT)
77#define VLAN_GRP_HASH_MASK (VLAN_GRP_HASH_SIZE - 1)
78
79/* Find a VLAN device by the MAC address of its Ethernet device, and
80 * it's VLAN ID. The default configuration is to have VLAN's scope
81 * to be box-wide, so the MAC will be ignored. The mac will only be
82 * looked at if we are configured to have a separate set of VLANs per
83 * each MAC addressable interface. Note that this latter option does
84 * NOT follow the spec for VLANs, but may be useful for doing very
85 * large quantities of VLAN MUX/DEMUX onto FrameRelay or ATM PVCs.
86 *
87 * Must be invoked with rcu_read_lock (ie preempt disabled)
88 * or with RTNL.
89 */
90struct net_device *__find_vlan_dev(struct net_device *real_dev, u16 vlan_id);
91
92/* found in vlan_dev.c */ 75/* found in vlan_dev.c */
93int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, 76int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
94 struct packet_type *ptype, struct net_device *orig_dev); 77 struct packet_type *ptype, struct net_device *orig_dev);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 0eb96f7e44be..69b2f79800a5 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -4,53 +4,29 @@
4#include <linux/netpoll.h> 4#include <linux/netpoll.h>
5#include "vlan.h" 5#include "vlan.h"
6 6
7/* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ 7bool vlan_hwaccel_do_receive(struct sk_buff **skbp)
8int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
9 u16 vlan_tci, int polling)
10{ 8{
9 struct sk_buff *skb = *skbp;
10 u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
11 struct net_device *vlan_dev; 11 struct net_device *vlan_dev;
12 u16 vlan_id; 12 struct vlan_rx_stats *rx_stats;
13
14 if (netpoll_rx(skb))
15 return NET_RX_DROP;
16
17 if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
18 skb->deliver_no_wcard = 1;
19 13
20 skb->skb_iif = skb->dev->ifindex; 14 vlan_dev = vlan_find_dev(skb->dev, vlan_id);
21 __vlan_hwaccel_put_tag(skb, vlan_tci); 15 if (!vlan_dev) {
22 vlan_id = vlan_tci & VLAN_VID_MASK; 16 if (vlan_id)
23 vlan_dev = vlan_group_get_device(grp, vlan_id); 17 skb->pkt_type = PACKET_OTHERHOST;
24 18 return false;
25 if (vlan_dev)
26 skb->dev = vlan_dev;
27 else if (vlan_id) {
28 if (!(skb->dev->flags & IFF_PROMISC))
29 goto drop;
30 skb->pkt_type = PACKET_OTHERHOST;
31 } 19 }
32 20
33 return (polling ? netif_receive_skb(skb) : netif_rx(skb)); 21 skb = *skbp = skb_share_check(skb, GFP_ATOMIC);
22 if (unlikely(!skb))
23 return false;
34 24
35drop: 25 skb->dev = vlan_dev;
36 dev_kfree_skb_any(skb); 26 skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
37 return NET_RX_DROP;
38}
39EXPORT_SYMBOL(__vlan_hwaccel_rx);
40
41int vlan_hwaccel_do_receive(struct sk_buff *skb)
42{
43 struct net_device *dev = skb->dev;
44 struct vlan_rx_stats *rx_stats;
45
46 skb->dev = vlan_dev_info(dev)->real_dev;
47 netif_nit_deliver(skb);
48
49 skb->dev = dev;
50 skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci);
51 skb->vlan_tci = 0; 27 skb->vlan_tci = 0;
52 28
53 rx_stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_rx_stats); 29 rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_rx_stats);
54 30
55 u64_stats_update_begin(&rx_stats->syncp); 31 u64_stats_update_begin(&rx_stats->syncp);
56 rx_stats->rx_packets++; 32 rx_stats->rx_packets++;
@@ -67,12 +43,13 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
67 * This allows the VLAN to have a different MAC than the 43 * This allows the VLAN to have a different MAC than the
68 * underlying device, and still route correctly. */ 44 * underlying device, and still route correctly. */
69 if (!compare_ether_addr(eth_hdr(skb)->h_dest, 45 if (!compare_ether_addr(eth_hdr(skb)->h_dest,
70 dev->dev_addr)) 46 vlan_dev->dev_addr))
71 skb->pkt_type = PACKET_HOST; 47 skb->pkt_type = PACKET_HOST;
72 break; 48 break;
73 } 49 }
74 u64_stats_update_end(&rx_stats->syncp); 50 u64_stats_update_end(&rx_stats->syncp);
75 return 0; 51
52 return true;
76} 53}
77 54
78struct net_device *vlan_dev_real_dev(const struct net_device *dev) 55struct net_device *vlan_dev_real_dev(const struct net_device *dev)
@@ -87,71 +64,27 @@ u16 vlan_dev_vlan_id(const struct net_device *dev)
87} 64}
88EXPORT_SYMBOL(vlan_dev_vlan_id); 65EXPORT_SYMBOL(vlan_dev_vlan_id);
89 66
90static gro_result_t 67/* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */
91vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, 68int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp,
92 unsigned int vlan_tci, struct sk_buff *skb) 69 u16 vlan_tci, int polling)
93{ 70{
94 struct sk_buff *p;
95 struct net_device *vlan_dev;
96 u16 vlan_id;
97
98 if (skb_bond_should_drop(skb, ACCESS_ONCE(skb->dev->master)))
99 skb->deliver_no_wcard = 1;
100
101 skb->skb_iif = skb->dev->ifindex;
102 __vlan_hwaccel_put_tag(skb, vlan_tci); 71 __vlan_hwaccel_put_tag(skb, vlan_tci);
103 vlan_id = vlan_tci & VLAN_VID_MASK; 72 return polling ? netif_receive_skb(skb) : netif_rx(skb);
104 vlan_dev = vlan_group_get_device(grp, vlan_id);
105
106 if (vlan_dev)
107 skb->dev = vlan_dev;
108 else if (vlan_id) {
109 if (!(skb->dev->flags & IFF_PROMISC))
110 goto drop;
111 skb->pkt_type = PACKET_OTHERHOST;
112 }
113
114 for (p = napi->gro_list; p; p = p->next) {
115 NAPI_GRO_CB(p)->same_flow =
116 p->dev == skb->dev && !compare_ether_header(
117 skb_mac_header(p), skb_gro_mac_header(skb));
118 NAPI_GRO_CB(p)->flush = 0;
119 }
120
121 return dev_gro_receive(napi, skb);
122
123drop:
124 return GRO_DROP;
125} 73}
74EXPORT_SYMBOL(__vlan_hwaccel_rx);
126 75
127gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, 76gro_result_t vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp,
128 unsigned int vlan_tci, struct sk_buff *skb) 77 unsigned int vlan_tci, struct sk_buff *skb)
129{ 78{
130 if (netpoll_rx_on(skb)) 79 __vlan_hwaccel_put_tag(skb, vlan_tci);
131 return vlan_hwaccel_receive_skb(skb, grp, vlan_tci) 80 return napi_gro_receive(napi, skb);
132 ? GRO_DROP : GRO_NORMAL;
133
134 skb_gro_reset_offset(skb);
135
136 return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb);
137} 81}
138EXPORT_SYMBOL(vlan_gro_receive); 82EXPORT_SYMBOL(vlan_gro_receive);
139 83
140gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, 84gro_result_t vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp,
141 unsigned int vlan_tci) 85 unsigned int vlan_tci)
142{ 86{
143 struct sk_buff *skb = napi_frags_skb(napi); 87 __vlan_hwaccel_put_tag(napi->skb, vlan_tci);
144 88 return napi_gro_frags(napi);
145 if (!skb)
146 return GRO_DROP;
147
148 if (netpoll_rx_on(skb)) {
149 skb->protocol = eth_type_trans(skb, skb->dev);
150 return vlan_hwaccel_receive_skb(skb, grp, vlan_tci)
151 ? GRO_DROP : GRO_NORMAL;
152 }
153
154 return napi_frags_finish(napi, skb,
155 vlan_gro_common(napi, grp, vlan_tci, skb));
156} 89}
157EXPORT_SYMBOL(vlan_gro_frags); 90EXPORT_SYMBOL(vlan_gro_frags);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 3bccdd12a264..14e3d1fa07a0 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -158,7 +158,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
158 vlan_id = vlan_tci & VLAN_VID_MASK; 158 vlan_id = vlan_tci & VLAN_VID_MASK;
159 159
160 rcu_read_lock(); 160 rcu_read_lock();
161 vlan_dev = __find_vlan_dev(dev, vlan_id); 161 vlan_dev = vlan_find_dev(dev, vlan_id);
162 162
163 /* If the VLAN device is defined, we use it. 163 /* If the VLAN device is defined, we use it.
164 * If not, and the VID is 0, it is a 802.1p packet (not 164 * If not, and the VID is 0, it is a 802.1p packet (not
@@ -177,8 +177,8 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
177 } else { 177 } else {
178 skb->dev = vlan_dev; 178 skb->dev = vlan_dev;
179 179
180 rx_stats = per_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats, 180 rx_stats = this_cpu_ptr(vlan_dev_info(skb->dev)->vlan_rx_stats);
181 smp_processor_id()); 181
182 u64_stats_update_begin(&rx_stats->syncp); 182 u64_stats_update_begin(&rx_stats->syncp);
183 rx_stats->rx_packets++; 183 rx_stats->rx_packets++;
184 rx_stats->rx_bytes += skb->len; 184 rx_stats->rx_bytes += skb->len;
@@ -226,12 +226,14 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev,
226 } 226 }
227 227
228 netif_rx(skb); 228 netif_rx(skb);
229
229 rcu_read_unlock(); 230 rcu_read_unlock();
230 return NET_RX_SUCCESS; 231 return NET_RX_SUCCESS;
231 232
232err_unlock: 233err_unlock:
233 rcu_read_unlock(); 234 rcu_read_unlock();
234err_free: 235err_free:
236 atomic_long_inc(&dev->rx_dropped);
235 kfree_skb(skb); 237 kfree_skb(skb);
236 return NET_RX_DROP; 238 return NET_RX_DROP;
237} 239}
@@ -843,7 +845,7 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
843 accum.rx_packets += rxpackets; 845 accum.rx_packets += rxpackets;
844 accum.rx_bytes += rxbytes; 846 accum.rx_bytes += rxbytes;
845 accum.rx_multicast += rxmulticast; 847 accum.rx_multicast += rxmulticast;
846 /* rx_errors is an ulong, not protected by syncp */ 848 /* rx_errors is ulong, not protected by syncp */
847 accum.rx_errors += p->rx_errors; 849 accum.rx_errors += p->rx_errors;
848 } 850 }
849 stats->rx_packets = accum.rx_packets; 851 stats->rx_packets = accum.rx_packets;
diff --git a/net/9p/client.c b/net/9p/client.c
index 9eb72505308f..83bf0541d66f 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -61,13 +61,13 @@ static const match_table_t tokens = {
61 61
62inline int p9_is_proto_dotl(struct p9_client *clnt) 62inline int p9_is_proto_dotl(struct p9_client *clnt)
63{ 63{
64 return (clnt->proto_version == p9_proto_2000L); 64 return clnt->proto_version == p9_proto_2000L;
65} 65}
66EXPORT_SYMBOL(p9_is_proto_dotl); 66EXPORT_SYMBOL(p9_is_proto_dotl);
67 67
68inline int p9_is_proto_dotu(struct p9_client *clnt) 68inline int p9_is_proto_dotu(struct p9_client *clnt)
69{ 69{
70 return (clnt->proto_version == p9_proto_2000u); 70 return clnt->proto_version == p9_proto_2000u;
71} 71}
72EXPORT_SYMBOL(p9_is_proto_dotu); 72EXPORT_SYMBOL(p9_is_proto_dotu);
73 73
@@ -671,7 +671,7 @@ static void p9_fid_destroy(struct p9_fid *fid)
671 kfree(fid); 671 kfree(fid);
672} 672}
673 673
674int p9_client_version(struct p9_client *c) 674static int p9_client_version(struct p9_client *c)
675{ 675{
676 int err = 0; 676 int err = 0;
677 struct p9_req_t *req; 677 struct p9_req_t *req;
@@ -730,7 +730,6 @@ error:
730 730
731 return err; 731 return err;
732} 732}
733EXPORT_SYMBOL(p9_client_version);
734 733
735struct p9_client *p9_client_create(const char *dev_name, char *options) 734struct p9_client *p9_client_create(const char *dev_name, char *options)
736{ 735{
@@ -887,54 +886,6 @@ error:
887} 886}
888EXPORT_SYMBOL(p9_client_attach); 887EXPORT_SYMBOL(p9_client_attach);
889 888
890struct p9_fid *
891p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname)
892{
893 int err;
894 struct p9_req_t *req;
895 struct p9_qid qid;
896 struct p9_fid *afid;
897
898 P9_DPRINTK(P9_DEBUG_9P, ">>> TAUTH uname %s aname %s\n", uname, aname);
899 err = 0;
900
901 afid = p9_fid_create(clnt);
902 if (IS_ERR(afid)) {
903 err = PTR_ERR(afid);
904 afid = NULL;
905 goto error;
906 }
907
908 req = p9_client_rpc(clnt, P9_TAUTH, "dss?d",
909 afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
910 if (IS_ERR(req)) {
911 err = PTR_ERR(req);
912 goto error;
913 }
914
915 err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);
916 if (err) {
917 p9pdu_dump(1, req->rc);
918 p9_free_req(clnt, req);
919 goto error;
920 }
921
922 P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n",
923 qid.type,
924 (unsigned long long)qid.path,
925 qid.version);
926
927 memmove(&afid->qid, &qid, sizeof(struct p9_qid));
928 p9_free_req(clnt, req);
929 return afid;
930
931error:
932 if (afid)
933 p9_fid_destroy(afid);
934 return ERR_PTR(err);
935}
936EXPORT_SYMBOL(p9_client_auth);
937
938struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, 889struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,
939 int clone) 890 int clone)
940{ 891{
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index c85109d809ca..078eb162d9bf 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -222,7 +222,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
222 } 222 }
223} 223}
224 224
225static unsigned int 225static int
226p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) 226p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
227{ 227{
228 int ret, n; 228 int ret, n;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 95fdd1185067..ff956d1115bc 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -310,9 +310,9 @@ static int clip_constructor(struct neighbour *neigh)
310 return 0; 310 return 0;
311} 311}
312 312
313static u32 clip_hash(const void *pkey, const struct net_device *dev) 313static u32 clip_hash(const void *pkey, const struct net_device *dev, __u32 rnd)
314{ 314{
315 return jhash_2words(*(u32 *) pkey, dev->ifindex, clip_tbl.hash_rnd); 315 return jhash_2words(*(u32 *) pkey, dev->ifindex, rnd);
316} 316}
317 317
318static struct neigh_table clip_tbl = { 318static struct neigh_table clip_tbl = {
diff --git a/net/atm/common.c b/net/atm/common.c
index 940404a73b3d..1b9c52a02cd3 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -792,7 +792,7 @@ int vcc_getsockopt(struct socket *sock, int level, int optname,
792 default: 792 default:
793 if (level == SOL_SOCKET) 793 if (level == SOL_SOCKET)
794 return -EINVAL; 794 return -EINVAL;
795 break; 795 break;
796 } 796 }
797 if (!vcc->dev || !vcc->dev->ops->getsockopt) 797 if (!vcc->dev || !vcc->dev->ops->getsockopt)
798 return -EINVAL; 798 return -EINVAL;
diff --git a/net/atm/lec.c b/net/atm/lec.c
index d98bde1a0ac8..181d70c73d70 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -220,7 +220,6 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc)
220static int lec_open(struct net_device *dev) 220static int lec_open(struct net_device *dev)
221{ 221{
222 netif_start_queue(dev); 222 netif_start_queue(dev);
223 memset(&dev->stats, 0, sizeof(struct net_device_stats));
224 223
225 return 0; 224 return 0;
226} 225}
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index cfdfd7e2a172..26eaebf4aaa9 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1103,7 +1103,7 @@ done:
1103out: 1103out:
1104 release_sock(sk); 1104 release_sock(sk);
1105 1105
1106 return 0; 1106 return err;
1107} 1107}
1108 1108
1109/* 1109/*
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 7805945a5fd6..a1690845dc6e 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -412,7 +412,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
412{ 412{
413 ax25_uid_assoc *user; 413 ax25_uid_assoc *user;
414 ax25_route *ax25_rt; 414 ax25_route *ax25_rt;
415 int err; 415 int err = 0;
416 416
417 if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL) 417 if ((ax25_rt = ax25_get_route(addr, NULL)) == NULL)
418 return -EHOSTUNREACH; 418 return -EHOSTUNREACH;
@@ -453,7 +453,7 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
453put: 453put:
454 ax25_put_route(ax25_rt); 454 ax25_put_route(ax25_rt);
455 455
456 return 0; 456 return err;
457} 457}
458 458
459struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src, 459struct sk_buff *ax25_rt_build_path(struct sk_buff *skb, ax25_address *src,
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 421c45bd1b95..c4cf3f595004 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -265,6 +265,115 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
265} 265}
266EXPORT_SYMBOL(bt_sock_recvmsg); 266EXPORT_SYMBOL(bt_sock_recvmsg);
267 267
268static long bt_sock_data_wait(struct sock *sk, long timeo)
269{
270 DECLARE_WAITQUEUE(wait, current);
271
272 add_wait_queue(sk_sleep(sk), &wait);
273 for (;;) {
274 set_current_state(TASK_INTERRUPTIBLE);
275
276 if (!skb_queue_empty(&sk->sk_receive_queue))
277 break;
278
279 if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN))
280 break;
281
282 if (signal_pending(current) || !timeo)
283 break;
284
285 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
286 release_sock(sk);
287 timeo = schedule_timeout(timeo);
288 lock_sock(sk);
289 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
290 }
291
292 __set_current_state(TASK_RUNNING);
293 remove_wait_queue(sk_sleep(sk), &wait);
294 return timeo;
295}
296
297int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
298 struct msghdr *msg, size_t size, int flags)
299{
300 struct sock *sk = sock->sk;
301 int err = 0;
302 size_t target, copied = 0;
303 long timeo;
304
305 if (flags & MSG_OOB)
306 return -EOPNOTSUPP;
307
308 msg->msg_namelen = 0;
309
310 BT_DBG("sk %p size %zu", sk, size);
311
312 lock_sock(sk);
313
314 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
315 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
316
317 do {
318 struct sk_buff *skb;
319 int chunk;
320
321 skb = skb_dequeue(&sk->sk_receive_queue);
322 if (!skb) {
323 if (copied >= target)
324 break;
325
326 if ((err = sock_error(sk)) != 0)
327 break;
328 if (sk->sk_shutdown & RCV_SHUTDOWN)
329 break;
330
331 err = -EAGAIN;
332 if (!timeo)
333 break;
334
335 timeo = bt_sock_data_wait(sk, timeo);
336
337 if (signal_pending(current)) {
338 err = sock_intr_errno(timeo);
339 goto out;
340 }
341 continue;
342 }
343
344 chunk = min_t(unsigned int, skb->len, size);
345 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
346 skb_queue_head(&sk->sk_receive_queue, skb);
347 if (!copied)
348 copied = -EFAULT;
349 break;
350 }
351 copied += chunk;
352 size -= chunk;
353
354 sock_recv_ts_and_drops(msg, sk, skb);
355
356 if (!(flags & MSG_PEEK)) {
357 skb_pull(skb, chunk);
358 if (skb->len) {
359 skb_queue_head(&sk->sk_receive_queue, skb);
360 break;
361 }
362 kfree_skb(skb);
363
364 } else {
365 /* put message back and return */
366 skb_queue_head(&sk->sk_receive_queue, skb);
367 break;
368 }
369 } while (size);
370
371out:
372 release_sock(sk);
373 return copied ? : err;
374}
375EXPORT_SYMBOL(bt_sock_stream_recvmsg);
376
268static inline unsigned int bt_accept_poll(struct sock *parent) 377static inline unsigned int bt_accept_poll(struct sock *parent)
269{ 378{
270 struct list_head *p, *n; 379 struct list_head *p, *n;
@@ -297,13 +406,12 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w
297 mask |= POLLERR; 406 mask |= POLLERR;
298 407
299 if (sk->sk_shutdown & RCV_SHUTDOWN) 408 if (sk->sk_shutdown & RCV_SHUTDOWN)
300 mask |= POLLRDHUP; 409 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
301 410
302 if (sk->sk_shutdown == SHUTDOWN_MASK) 411 if (sk->sk_shutdown == SHUTDOWN_MASK)
303 mask |= POLLHUP; 412 mask |= POLLHUP;
304 413
305 if (!skb_queue_empty(&sk->sk_receive_queue) || 414 if (!skb_queue_empty(&sk->sk_receive_queue))
306 (sk->sk_shutdown & RCV_SHUTDOWN))
307 mask |= POLLIN | POLLRDNORM; 415 mask |= POLLIN | POLLRDNORM;
308 416
309 if (sk->sk_state == BT_CLOSED) 417 if (sk->sk_state == BT_CLOSED)
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index d4c6af082d48..ec0a1347f933 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -321,14 +321,10 @@ static int cmtp_session(void *arg)
321int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock) 321int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
322{ 322{
323 struct cmtp_session *session, *s; 323 struct cmtp_session *session, *s;
324 bdaddr_t src, dst;
325 int i, err; 324 int i, err;
326 325
327 BT_DBG(""); 326 BT_DBG("");
328 327
329 baswap(&src, &bt_sk(sock->sk)->src);
330 baswap(&dst, &bt_sk(sock->sk)->dst);
331
332 session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL); 328 session = kzalloc(sizeof(struct cmtp_session), GFP_KERNEL);
333 if (!session) 329 if (!session)
334 return -ENOMEM; 330 return -ENOMEM;
@@ -347,7 +343,7 @@ int cmtp_add_connection(struct cmtp_connadd_req *req, struct socket *sock)
347 343
348 BT_DBG("mtu %d", session->mtu); 344 BT_DBG("mtu %d", session->mtu);
349 345
350 sprintf(session->name, "%s", batostr(&dst)); 346 sprintf(session->name, "%s", batostr(&bt_sk(sock->sk)->dst));
351 347
352 session->sock = sock; 348 session->sock = sock;
353 session->state = BT_CONFIG; 349 session->state = BT_CONFIG;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index c52f091ee6de..bc2a052e518b 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -562,7 +562,6 @@ static int hci_dev_do_close(struct hci_dev *hdev)
562 hci_dev_lock_bh(hdev); 562 hci_dev_lock_bh(hdev);
563 inquiry_cache_flush(hdev); 563 inquiry_cache_flush(hdev);
564 hci_conn_hash_flush(hdev); 564 hci_conn_hash_flush(hdev);
565 hci_blacklist_clear(hdev);
566 hci_dev_unlock_bh(hdev); 565 hci_dev_unlock_bh(hdev);
567 566
568 hci_notify(hdev, HCI_DEV_DOWN); 567 hci_notify(hdev, HCI_DEV_DOWN);
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 8fb967beee80..5fce3d6d07b4 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -37,9 +37,7 @@ static ssize_t show_link_type(struct device *dev, struct device_attribute *attr,
37static ssize_t show_link_address(struct device *dev, struct device_attribute *attr, char *buf) 37static ssize_t show_link_address(struct device *dev, struct device_attribute *attr, char *buf)
38{ 38{
39 struct hci_conn *conn = dev_get_drvdata(dev); 39 struct hci_conn *conn = dev_get_drvdata(dev);
40 bdaddr_t bdaddr; 40 return sprintf(buf, "%s\n", batostr(&conn->dst));
41 baswap(&bdaddr, &conn->dst);
42 return sprintf(buf, "%s\n", batostr(&bdaddr));
43} 41}
44 42
45static ssize_t show_link_features(struct device *dev, struct device_attribute *attr, char *buf) 43static ssize_t show_link_features(struct device *dev, struct device_attribute *attr, char *buf)
@@ -196,8 +194,8 @@ static inline char *host_typetostr(int type)
196 switch (type) { 194 switch (type) {
197 case HCI_BREDR: 195 case HCI_BREDR:
198 return "BR/EDR"; 196 return "BR/EDR";
199 case HCI_80211: 197 case HCI_AMP:
200 return "802.11"; 198 return "AMP";
201 default: 199 default:
202 return "UNKNOWN"; 200 return "UNKNOWN";
203 } 201 }
@@ -238,9 +236,7 @@ static ssize_t show_class(struct device *dev, struct device_attribute *attr, cha
238static ssize_t show_address(struct device *dev, struct device_attribute *attr, char *buf) 236static ssize_t show_address(struct device *dev, struct device_attribute *attr, char *buf)
239{ 237{
240 struct hci_dev *hdev = dev_get_drvdata(dev); 238 struct hci_dev *hdev = dev_get_drvdata(dev);
241 bdaddr_t bdaddr; 239 return sprintf(buf, "%s\n", batostr(&hdev->bdaddr));
242 baswap(&bdaddr, &hdev->bdaddr);
243 return sprintf(buf, "%s\n", batostr(&bdaddr));
244} 240}
245 241
246static ssize_t show_features(struct device *dev, struct device_attribute *attr, char *buf) 242static ssize_t show_features(struct device *dev, struct device_attribute *attr, char *buf)
@@ -408,10 +404,8 @@ static int inquiry_cache_show(struct seq_file *f, void *p)
408 404
409 for (e = cache->list; e; e = e->next) { 405 for (e = cache->list; e; e = e->next) {
410 struct inquiry_data *data = &e->data; 406 struct inquiry_data *data = &e->data;
411 bdaddr_t bdaddr;
412 baswap(&bdaddr, &data->bdaddr);
413 seq_printf(f, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n", 407 seq_printf(f, "%s %d %d %d 0x%.2x%.2x%.2x 0x%.4x %d %d %u\n",
414 batostr(&bdaddr), 408 batostr(&data->bdaddr),
415 data->pscan_rep_mode, data->pscan_period_mode, 409 data->pscan_rep_mode, data->pscan_period_mode,
416 data->pscan_mode, data->dev_class[2], 410 data->pscan_mode, data->dev_class[2],
417 data->dev_class[1], data->dev_class[0], 411 data->dev_class[1], data->dev_class[0],
@@ -445,13 +439,10 @@ static int blacklist_show(struct seq_file *f, void *p)
445 439
446 list_for_each(l, &hdev->blacklist) { 440 list_for_each(l, &hdev->blacklist) {
447 struct bdaddr_list *b; 441 struct bdaddr_list *b;
448 bdaddr_t bdaddr;
449 442
450 b = list_entry(l, struct bdaddr_list, list); 443 b = list_entry(l, struct bdaddr_list, list);
451 444
452 baswap(&bdaddr, &b->bdaddr); 445 seq_printf(f, "%s\n", batostr(&b->bdaddr));
453
454 seq_printf(f, "%s\n", batostr(&bdaddr));
455 } 446 }
456 447
457 hci_dev_unlock_bh(hdev); 448 hci_dev_unlock_bh(hdev);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index bfe641b7dfaf..c0ee8b3928ed 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -758,7 +758,6 @@ static int hidp_setup_hid(struct hidp_session *session,
758 struct hidp_connadd_req *req) 758 struct hidp_connadd_req *req)
759{ 759{
760 struct hid_device *hid; 760 struct hid_device *hid;
761 bdaddr_t src, dst;
762 int err; 761 int err;
763 762
764 session->rd_data = kzalloc(req->rd_size, GFP_KERNEL); 763 session->rd_data = kzalloc(req->rd_size, GFP_KERNEL);
@@ -781,9 +780,6 @@ static int hidp_setup_hid(struct hidp_session *session,
781 780
782 hid->driver_data = session; 781 hid->driver_data = session;
783 782
784 baswap(&src, &bt_sk(session->ctrl_sock->sk)->src);
785 baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst);
786
787 hid->bus = BUS_BLUETOOTH; 783 hid->bus = BUS_BLUETOOTH;
788 hid->vendor = req->vendor; 784 hid->vendor = req->vendor;
789 hid->product = req->product; 785 hid->product = req->product;
@@ -791,8 +787,8 @@ static int hidp_setup_hid(struct hidp_session *session,
791 hid->country = req->country; 787 hid->country = req->country;
792 788
793 strncpy(hid->name, req->name, 128); 789 strncpy(hid->name, req->name, 128);
794 strncpy(hid->phys, batostr(&src), 64); 790 strncpy(hid->phys, batostr(&bt_sk(session->ctrl_sock->sk)->src), 64);
795 strncpy(hid->uniq, batostr(&dst), 64); 791 strncpy(hid->uniq, batostr(&bt_sk(session->ctrl_sock->sk)->dst), 64);
796 792
797 hid->dev.parent = hidp_get_device(session); 793 hid->dev.parent = hidp_get_device(session);
798 hid->ll_driver = &hidp_hid_driver; 794 hid->ll_driver = &hidp_hid_driver;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 0b54b7dd8401..daa7a988d9a6 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1008,10 +1008,20 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
1008 goto done; 1008 goto done;
1009 } 1009 }
1010 1010
1011 if (la.l2_psm && __le16_to_cpu(la.l2_psm) < 0x1001 && 1011 if (la.l2_psm) {
1012 !capable(CAP_NET_BIND_SERVICE)) { 1012 __u16 psm = __le16_to_cpu(la.l2_psm);
1013 err = -EACCES; 1013
1014 goto done; 1014 /* PSM must be odd and lsb of upper byte must be 0 */
1015 if ((psm & 0x0101) != 0x0001) {
1016 err = -EINVAL;
1017 goto done;
1018 }
1019
1020 /* Restrict usage of well-known PSMs */
1021 if (psm < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) {
1022 err = -EACCES;
1023 goto done;
1024 }
1015 } 1025 }
1016 1026
1017 write_lock_bh(&l2cap_sk_list.lock); 1027 write_lock_bh(&l2cap_sk_list.lock);
@@ -1190,6 +1200,13 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
1190 goto done; 1200 goto done;
1191 } 1201 }
1192 1202
1203 /* PSM must be odd and lsb of upper byte must be 0 */
1204 if ((__le16_to_cpu(la.l2_psm) & 0x0101) != 0x0001 &&
1205 sk->sk_type != SOCK_RAW) {
1206 err = -EINVAL;
1207 goto done;
1208 }
1209
1193 /* Set destination address and psm */ 1210 /* Set destination address and psm */
1194 bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr); 1211 bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr);
1195 l2cap_pi(sk)->psm = la.l2_psm; 1212 l2cap_pi(sk)->psm = la.l2_psm;
@@ -1635,7 +1652,7 @@ static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, in
1635 1652
1636 *frag = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err); 1653 *frag = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err);
1637 if (!*frag) 1654 if (!*frag)
1638 return -EFAULT; 1655 return err;
1639 if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count)) 1656 if (memcpy_fromiovec(skb_put(*frag, count), msg->msg_iov, count))
1640 return -EFAULT; 1657 return -EFAULT;
1641 1658
@@ -1661,7 +1678,7 @@ static struct sk_buff *l2cap_create_connless_pdu(struct sock *sk, struct msghdr
1661 skb = bt_skb_send_alloc(sk, count + hlen, 1678 skb = bt_skb_send_alloc(sk, count + hlen,
1662 msg->msg_flags & MSG_DONTWAIT, &err); 1679 msg->msg_flags & MSG_DONTWAIT, &err);
1663 if (!skb) 1680 if (!skb)
1664 return ERR_PTR(-ENOMEM); 1681 return ERR_PTR(err);
1665 1682
1666 /* Create L2CAP header */ 1683 /* Create L2CAP header */
1667 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); 1684 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
@@ -1690,7 +1707,7 @@ static struct sk_buff *l2cap_create_basic_pdu(struct sock *sk, struct msghdr *ms
1690 skb = bt_skb_send_alloc(sk, count + hlen, 1707 skb = bt_skb_send_alloc(sk, count + hlen,
1691 msg->msg_flags & MSG_DONTWAIT, &err); 1708 msg->msg_flags & MSG_DONTWAIT, &err);
1692 if (!skb) 1709 if (!skb)
1693 return ERR_PTR(-ENOMEM); 1710 return ERR_PTR(err);
1694 1711
1695 /* Create L2CAP header */ 1712 /* Create L2CAP header */
1696 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); 1713 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
@@ -1727,7 +1744,7 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *m
1727 skb = bt_skb_send_alloc(sk, count + hlen, 1744 skb = bt_skb_send_alloc(sk, count + hlen,
1728 msg->msg_flags & MSG_DONTWAIT, &err); 1745 msg->msg_flags & MSG_DONTWAIT, &err);
1729 if (!skb) 1746 if (!skb)
1730 return ERR_PTR(-ENOMEM); 1747 return ERR_PTR(err);
1731 1748
1732 /* Create L2CAP header */ 1749 /* Create L2CAP header */
1733 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE); 1750 lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
@@ -1934,6 +1951,9 @@ static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct ms
1934 1951
1935 release_sock(sk); 1952 release_sock(sk);
1936 1953
1954 if (sock->type == SOCK_STREAM)
1955 return bt_sock_stream_recvmsg(iocb, sock, msg, len, flags);
1956
1937 return bt_sock_recvmsg(iocb, sock, msg, len, flags); 1957 return bt_sock_recvmsg(iocb, sock, msg, len, flags);
1938} 1958}
1939 1959
@@ -2891,7 +2911,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd
2891 struct l2cap_chan_list *list = &conn->chan_list; 2911 struct l2cap_chan_list *list = &conn->chan_list;
2892 struct l2cap_conn_req *req = (struct l2cap_conn_req *) data; 2912 struct l2cap_conn_req *req = (struct l2cap_conn_req *) data;
2893 struct l2cap_conn_rsp rsp; 2913 struct l2cap_conn_rsp rsp;
2894 struct sock *parent, *uninitialized_var(sk); 2914 struct sock *parent, *sk = NULL;
2895 int result, status = L2CAP_CS_NO_INFO; 2915 int result, status = L2CAP_CS_NO_INFO;
2896 2916
2897 u16 dcid = 0, scid = __le16_to_cpu(req->scid); 2917 u16 dcid = 0, scid = __le16_to_cpu(req->scid);
@@ -3000,7 +3020,7 @@ sendresp:
3000 L2CAP_INFO_REQ, sizeof(info), &info); 3020 L2CAP_INFO_REQ, sizeof(info), &info);
3001 } 3021 }
3002 3022
3003 if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) && 3023 if (sk && !(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT) &&
3004 result == L2CAP_CR_SUCCESS) { 3024 result == L2CAP_CR_SUCCESS) {
3005 u8 buf[128]; 3025 u8 buf[128];
3006 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; 3026 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
@@ -3151,6 +3171,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, struct l2cap_cmd_hdr
3151 3171
3152 if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) { 3172 if (!(l2cap_pi(sk)->conf_state & L2CAP_CONF_REQ_SENT)) {
3153 u8 buf[64]; 3173 u8 buf[64];
3174 l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT;
3154 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, 3175 l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ,
3155 l2cap_build_conf_req(sk, buf), buf); 3176 l2cap_build_conf_req(sk, buf), buf);
3156 l2cap_pi(sk)->num_conf_req++; 3177 l2cap_pi(sk)->num_conf_req++;
@@ -4643,6 +4664,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
4643 4664
4644 if (flags & ACL_START) { 4665 if (flags & ACL_START) {
4645 struct l2cap_hdr *hdr; 4666 struct l2cap_hdr *hdr;
4667 struct sock *sk;
4668 u16 cid;
4646 int len; 4669 int len;
4647 4670
4648 if (conn->rx_len) { 4671 if (conn->rx_len) {
@@ -4653,7 +4676,8 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
4653 l2cap_conn_unreliable(conn, ECOMM); 4676 l2cap_conn_unreliable(conn, ECOMM);
4654 } 4677 }
4655 4678
4656 if (skb->len < 2) { 4679 /* Start fragment always begin with Basic L2CAP header */
4680 if (skb->len < L2CAP_HDR_SIZE) {
4657 BT_ERR("Frame is too short (len %d)", skb->len); 4681 BT_ERR("Frame is too short (len %d)", skb->len);
4658 l2cap_conn_unreliable(conn, ECOMM); 4682 l2cap_conn_unreliable(conn, ECOMM);
4659 goto drop; 4683 goto drop;
@@ -4661,6 +4685,7 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
4661 4685
4662 hdr = (struct l2cap_hdr *) skb->data; 4686 hdr = (struct l2cap_hdr *) skb->data;
4663 len = __le16_to_cpu(hdr->len) + L2CAP_HDR_SIZE; 4687 len = __le16_to_cpu(hdr->len) + L2CAP_HDR_SIZE;
4688 cid = __le16_to_cpu(hdr->cid);
4664 4689
4665 if (len == skb->len) { 4690 if (len == skb->len) {
4666 /* Complete frame received */ 4691 /* Complete frame received */
@@ -4677,6 +4702,19 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl
4677 goto drop; 4702 goto drop;
4678 } 4703 }
4679 4704
4705 sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
4706
4707 if (sk && l2cap_pi(sk)->imtu < len - L2CAP_HDR_SIZE) {
4708 BT_ERR("Frame exceeding recv MTU (len %d, MTU %d)",
4709 len, l2cap_pi(sk)->imtu);
4710 bh_unlock_sock(sk);
4711 l2cap_conn_unreliable(conn, ECOMM);
4712 goto drop;
4713 }
4714
4715 if (sk)
4716 bh_unlock_sock(sk);
4717
4680 /* Allocate skb for the complete frame (with header) */ 4718 /* Allocate skb for the complete frame (with header) */
4681 conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC); 4719 conn->rx_skb = bt_skb_alloc(len, GFP_ATOMIC);
4682 if (!conn->rx_skb) 4720 if (!conn->rx_skb)
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index ad2af5814e40..b826d1bf10df 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -51,8 +51,8 @@ char *batostr(bdaddr_t *ba)
51 51
52 i ^= 1; 52 i ^= 1;
53 sprintf(str[i], "%2.2X:%2.2X:%2.2X:%2.2X:%2.2X:%2.2X", 53 sprintf(str[i], "%2.2X:%2.2X:%2.2X:%2.2X:%2.2X:%2.2X",
54 ba->b[0], ba->b[1], ba->b[2], 54 ba->b[5], ba->b[4], ba->b[3],
55 ba->b[3], ba->b[4], ba->b[5]); 55 ba->b[2], ba->b[1], ba->b[0]);
56 56
57 return str[i]; 57 return str[i];
58} 58}
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 7dca91bb8c57..39a5d87e33b4 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -113,11 +113,10 @@ static void rfcomm_session_del(struct rfcomm_session *s);
113#define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1) 113#define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1)
114#define __get_rpn_parity(line) (((line) >> 3) & 0x7) 114#define __get_rpn_parity(line) (((line) >> 3) & 0x7)
115 115
116static inline void rfcomm_schedule(uint event) 116static inline void rfcomm_schedule(void)
117{ 117{
118 if (!rfcomm_thread) 118 if (!rfcomm_thread)
119 return; 119 return;
120 //set_bit(event, &rfcomm_event);
121 set_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); 120 set_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event);
122 wake_up_process(rfcomm_thread); 121 wake_up_process(rfcomm_thread);
123} 122}
@@ -179,13 +178,13 @@ static unsigned char rfcomm_crc_table[256] = {
179/* FCS on 2 bytes */ 178/* FCS on 2 bytes */
180static inline u8 __fcs(u8 *data) 179static inline u8 __fcs(u8 *data)
181{ 180{
182 return (0xff - __crc(data)); 181 return 0xff - __crc(data);
183} 182}
184 183
185/* FCS on 3 bytes */ 184/* FCS on 3 bytes */
186static inline u8 __fcs2(u8 *data) 185static inline u8 __fcs2(u8 *data)
187{ 186{
188 return (0xff - rfcomm_crc_table[__crc(data) ^ data[2]]); 187 return 0xff - rfcomm_crc_table[__crc(data) ^ data[2]];
189} 188}
190 189
191/* Check FCS */ 190/* Check FCS */
@@ -203,13 +202,13 @@ static inline int __check_fcs(u8 *data, int type, u8 fcs)
203static void rfcomm_l2state_change(struct sock *sk) 202static void rfcomm_l2state_change(struct sock *sk)
204{ 203{
205 BT_DBG("%p state %d", sk, sk->sk_state); 204 BT_DBG("%p state %d", sk, sk->sk_state);
206 rfcomm_schedule(RFCOMM_SCHED_STATE); 205 rfcomm_schedule();
207} 206}
208 207
209static void rfcomm_l2data_ready(struct sock *sk, int bytes) 208static void rfcomm_l2data_ready(struct sock *sk, int bytes)
210{ 209{
211 BT_DBG("%p bytes %d", sk, bytes); 210 BT_DBG("%p bytes %d", sk, bytes);
212 rfcomm_schedule(RFCOMM_SCHED_RX); 211 rfcomm_schedule();
213} 212}
214 213
215static int rfcomm_l2sock_create(struct socket **sock) 214static int rfcomm_l2sock_create(struct socket **sock)
@@ -255,7 +254,7 @@ static void rfcomm_session_timeout(unsigned long arg)
255 BT_DBG("session %p state %ld", s, s->state); 254 BT_DBG("session %p state %ld", s, s->state);
256 255
257 set_bit(RFCOMM_TIMED_OUT, &s->flags); 256 set_bit(RFCOMM_TIMED_OUT, &s->flags);
258 rfcomm_schedule(RFCOMM_SCHED_TIMEO); 257 rfcomm_schedule();
259} 258}
260 259
261static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout) 260static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout)
@@ -283,7 +282,7 @@ static void rfcomm_dlc_timeout(unsigned long arg)
283 282
284 set_bit(RFCOMM_TIMED_OUT, &d->flags); 283 set_bit(RFCOMM_TIMED_OUT, &d->flags);
285 rfcomm_dlc_put(d); 284 rfcomm_dlc_put(d);
286 rfcomm_schedule(RFCOMM_SCHED_TIMEO); 285 rfcomm_schedule();
287} 286}
288 287
289static void rfcomm_dlc_set_timer(struct rfcomm_dlc *d, long timeout) 288static void rfcomm_dlc_set_timer(struct rfcomm_dlc *d, long timeout)
@@ -465,7 +464,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
465 case BT_CONFIG: 464 case BT_CONFIG:
466 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { 465 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
467 set_bit(RFCOMM_AUTH_REJECT, &d->flags); 466 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
468 rfcomm_schedule(RFCOMM_SCHED_AUTH); 467 rfcomm_schedule();
469 break; 468 break;
470 } 469 }
471 /* Fall through */ 470 /* Fall through */
@@ -485,7 +484,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err)
485 case BT_CONNECT2: 484 case BT_CONNECT2:
486 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { 485 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
487 set_bit(RFCOMM_AUTH_REJECT, &d->flags); 486 set_bit(RFCOMM_AUTH_REJECT, &d->flags);
488 rfcomm_schedule(RFCOMM_SCHED_AUTH); 487 rfcomm_schedule();
489 break; 488 break;
490 } 489 }
491 /* Fall through */ 490 /* Fall through */
@@ -533,7 +532,7 @@ int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb)
533 skb_queue_tail(&d->tx_queue, skb); 532 skb_queue_tail(&d->tx_queue, skb);
534 533
535 if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags)) 534 if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags))
536 rfcomm_schedule(RFCOMM_SCHED_TX); 535 rfcomm_schedule();
537 return len; 536 return len;
538} 537}
539 538
@@ -545,7 +544,7 @@ void __rfcomm_dlc_throttle(struct rfcomm_dlc *d)
545 d->v24_sig |= RFCOMM_V24_FC; 544 d->v24_sig |= RFCOMM_V24_FC;
546 set_bit(RFCOMM_MSC_PENDING, &d->flags); 545 set_bit(RFCOMM_MSC_PENDING, &d->flags);
547 } 546 }
548 rfcomm_schedule(RFCOMM_SCHED_TX); 547 rfcomm_schedule();
549} 548}
550 549
551void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d) 550void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d)
@@ -556,7 +555,7 @@ void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d)
556 d->v24_sig &= ~RFCOMM_V24_FC; 555 d->v24_sig &= ~RFCOMM_V24_FC;
557 set_bit(RFCOMM_MSC_PENDING, &d->flags); 556 set_bit(RFCOMM_MSC_PENDING, &d->flags);
558 } 557 }
559 rfcomm_schedule(RFCOMM_SCHED_TX); 558 rfcomm_schedule();
560} 559}
561 560
562/* 561/*
@@ -577,7 +576,7 @@ int rfcomm_dlc_set_modem_status(struct rfcomm_dlc *d, u8 v24_sig)
577 d->v24_sig = v24_sig; 576 d->v24_sig = v24_sig;
578 577
579 if (!test_and_set_bit(RFCOMM_MSC_PENDING, &d->flags)) 578 if (!test_and_set_bit(RFCOMM_MSC_PENDING, &d->flags))
580 rfcomm_schedule(RFCOMM_SCHED_TX); 579 rfcomm_schedule();
581 580
582 return 0; 581 return 0;
583} 582}
@@ -816,7 +815,7 @@ static int rfcomm_queue_disc(struct rfcomm_dlc *d)
816 cmd->fcs = __fcs2((u8 *) cmd); 815 cmd->fcs = __fcs2((u8 *) cmd);
817 816
818 skb_queue_tail(&d->tx_queue, skb); 817 skb_queue_tail(&d->tx_queue, skb);
819 rfcomm_schedule(RFCOMM_SCHED_TX); 818 rfcomm_schedule();
820 return 0; 819 return 0;
821} 820}
822 821
@@ -1415,8 +1414,8 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
1415 return 0; 1414 return 0;
1416 1415
1417 if (len == 1) { 1416 if (len == 1) {
1418 /* This is a request, return default settings */ 1417 /* This is a request, return default (according to ETSI TS 07.10) settings */
1419 bit_rate = RFCOMM_RPN_BR_115200; 1418 bit_rate = RFCOMM_RPN_BR_9600;
1420 data_bits = RFCOMM_RPN_DATA_8; 1419 data_bits = RFCOMM_RPN_DATA_8;
1421 stop_bits = RFCOMM_RPN_STOP_1; 1420 stop_bits = RFCOMM_RPN_STOP_1;
1422 parity = RFCOMM_RPN_PARITY_NONE; 1421 parity = RFCOMM_RPN_PARITY_NONE;
@@ -1431,9 +1430,9 @@ static int rfcomm_recv_rpn(struct rfcomm_session *s, int cr, int len, struct sk_
1431 1430
1432 if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_BITRATE)) { 1431 if (rpn->param_mask & cpu_to_le16(RFCOMM_RPN_PM_BITRATE)) {
1433 bit_rate = rpn->bit_rate; 1432 bit_rate = rpn->bit_rate;
1434 if (bit_rate != RFCOMM_RPN_BR_115200) { 1433 if (bit_rate > RFCOMM_RPN_BR_230400) {
1435 BT_DBG("RPN bit rate mismatch 0x%x", bit_rate); 1434 BT_DBG("RPN bit rate mismatch 0x%x", bit_rate);
1436 bit_rate = RFCOMM_RPN_BR_115200; 1435 bit_rate = RFCOMM_RPN_BR_9600;
1437 rpn_mask ^= RFCOMM_RPN_PM_BITRATE; 1436 rpn_mask ^= RFCOMM_RPN_PM_BITRATE;
1438 } 1437 }
1439 } 1438 }
@@ -1698,7 +1697,7 @@ static int rfcomm_recv_frame(struct rfcomm_session *s, struct sk_buff *skb)
1698 break; 1697 break;
1699 1698
1700 default: 1699 default:
1701 BT_ERR("Unknown packet type 0x%02x\n", type); 1700 BT_ERR("Unknown packet type 0x%02x", type);
1702 break; 1701 break;
1703 } 1702 }
1704 kfree_skb(skb); 1703 kfree_skb(skb);
@@ -1884,7 +1883,7 @@ static inline void rfcomm_accept_connection(struct rfcomm_session *s)
1884 * L2CAP MTU minus UIH header and FCS. */ 1883 * L2CAP MTU minus UIH header and FCS. */
1885 s->mtu = min(l2cap_pi(nsock->sk)->omtu, l2cap_pi(nsock->sk)->imtu) - 5; 1884 s->mtu = min(l2cap_pi(nsock->sk)->omtu, l2cap_pi(nsock->sk)->imtu) - 5;
1886 1885
1887 rfcomm_schedule(RFCOMM_SCHED_RX); 1886 rfcomm_schedule();
1888 } else 1887 } else
1889 sock_release(nsock); 1888 sock_release(nsock);
1890} 1889}
@@ -2093,7 +2092,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt)
2093 2092
2094 rfcomm_session_put(s); 2093 rfcomm_session_put(s);
2095 2094
2096 rfcomm_schedule(RFCOMM_SCHED_AUTH); 2095 rfcomm_schedule();
2097} 2096}
2098 2097
2099static struct hci_cb rfcomm_cb = { 2098static struct hci_cb rfcomm_cb = {
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 194b3a04cfd3..aec505f934df 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -621,121 +621,29 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
621 return sent; 621 return sent;
622} 622}
623 623
624static long rfcomm_sock_data_wait(struct sock *sk, long timeo)
625{
626 DECLARE_WAITQUEUE(wait, current);
627
628 add_wait_queue(sk_sleep(sk), &wait);
629 for (;;) {
630 set_current_state(TASK_INTERRUPTIBLE);
631
632 if (!skb_queue_empty(&sk->sk_receive_queue) ||
633 sk->sk_err ||
634 (sk->sk_shutdown & RCV_SHUTDOWN) ||
635 signal_pending(current) ||
636 !timeo)
637 break;
638
639 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
640 release_sock(sk);
641 timeo = schedule_timeout(timeo);
642 lock_sock(sk);
643 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
644 }
645
646 __set_current_state(TASK_RUNNING);
647 remove_wait_queue(sk_sleep(sk), &wait);
648 return timeo;
649}
650
651static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, 624static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
652 struct msghdr *msg, size_t size, int flags) 625 struct msghdr *msg, size_t size, int flags)
653{ 626{
654 struct sock *sk = sock->sk; 627 struct sock *sk = sock->sk;
655 struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; 628 struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc;
656 int err = 0; 629 int len;
657 size_t target, copied = 0;
658 long timeo;
659 630
660 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { 631 if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
661 rfcomm_dlc_accept(d); 632 rfcomm_dlc_accept(d);
662 return 0; 633 return 0;
663 } 634 }
664 635
665 if (flags & MSG_OOB) 636 len = bt_sock_stream_recvmsg(iocb, sock, msg, size, flags);
666 return -EOPNOTSUPP;
667
668 msg->msg_namelen = 0;
669
670 BT_DBG("sk %p size %zu", sk, size);
671 637
672 lock_sock(sk); 638 lock_sock(sk);
639 if (!(flags & MSG_PEEK) && len > 0)
640 atomic_sub(len, &sk->sk_rmem_alloc);
673 641
674 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
675 timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
676
677 do {
678 struct sk_buff *skb;
679 int chunk;
680
681 skb = skb_dequeue(&sk->sk_receive_queue);
682 if (!skb) {
683 if (copied >= target)
684 break;
685
686 if ((err = sock_error(sk)) != 0)
687 break;
688 if (sk->sk_shutdown & RCV_SHUTDOWN)
689 break;
690
691 err = -EAGAIN;
692 if (!timeo)
693 break;
694
695 timeo = rfcomm_sock_data_wait(sk, timeo);
696
697 if (signal_pending(current)) {
698 err = sock_intr_errno(timeo);
699 goto out;
700 }
701 continue;
702 }
703
704 chunk = min_t(unsigned int, skb->len, size);
705 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
706 skb_queue_head(&sk->sk_receive_queue, skb);
707 if (!copied)
708 copied = -EFAULT;
709 break;
710 }
711 copied += chunk;
712 size -= chunk;
713
714 sock_recv_ts_and_drops(msg, sk, skb);
715
716 if (!(flags & MSG_PEEK)) {
717 atomic_sub(chunk, &sk->sk_rmem_alloc);
718
719 skb_pull(skb, chunk);
720 if (skb->len) {
721 skb_queue_head(&sk->sk_receive_queue, skb);
722 break;
723 }
724 kfree_skb(skb);
725
726 } else {
727 /* put message back and return */
728 skb_queue_head(&sk->sk_receive_queue, skb);
729 break;
730 }
731 } while (size);
732
733out:
734 if (atomic_read(&sk->sk_rmem_alloc) <= (sk->sk_rcvbuf >> 2)) 642 if (atomic_read(&sk->sk_rmem_alloc) <= (sk->sk_rcvbuf >> 2))
735 rfcomm_dlc_unthrottle(rfcomm_pi(sk)->dlc); 643 rfcomm_dlc_unthrottle(rfcomm_pi(sk)->dlc);
736
737 release_sock(sk); 644 release_sock(sk);
738 return copied ? : err; 645
646 return len;
739} 647}
740 648
741static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen) 649static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, unsigned int optlen)
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 84c2a4d013c6..a9b81f5dacd1 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -183,9 +183,7 @@ static struct device *rfcomm_get_device(struct rfcomm_dev *dev)
183static ssize_t show_address(struct device *tty_dev, struct device_attribute *attr, char *buf) 183static ssize_t show_address(struct device *tty_dev, struct device_attribute *attr, char *buf)
184{ 184{
185 struct rfcomm_dev *dev = dev_get_drvdata(tty_dev); 185 struct rfcomm_dev *dev = dev_get_drvdata(tty_dev);
186 bdaddr_t bdaddr; 186 return sprintf(buf, "%s\n", batostr(&dev->dst));
187 baswap(&bdaddr, &dev->dst);
188 return sprintf(buf, "%s\n", batostr(&bdaddr));
189} 187}
190 188
191static ssize_t show_channel(struct device *tty_dev, struct device_attribute *attr, char *buf) 189static ssize_t show_channel(struct device *tty_dev, struct device_attribute *attr, char *buf)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index cf09fe591fc2..17cb0b633576 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -212,6 +212,11 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
212 return 0; 212 return 0;
213} 213}
214 214
215static int br_set_flags(struct net_device *netdev, u32 data)
216{
217 return ethtool_op_set_flags(netdev, data, ETH_FLAG_TXVLAN);
218}
219
215#ifdef CONFIG_NET_POLL_CONTROLLER 220#ifdef CONFIG_NET_POLL_CONTROLLER
216static void br_poll_controller(struct net_device *br_dev) 221static void br_poll_controller(struct net_device *br_dev)
217{ 222{
@@ -304,6 +309,7 @@ static const struct ethtool_ops br_ethtool_ops = {
304 .get_ufo = ethtool_op_get_ufo, 309 .get_ufo = ethtool_op_get_ufo,
305 .set_ufo = ethtool_op_set_ufo, 310 .set_ufo = ethtool_op_set_ufo,
306 .get_flags = ethtool_op_get_flags, 311 .get_flags = ethtool_op_get_flags,
312 .set_flags = br_set_flags,
307}; 313};
308 314
309static const struct net_device_ops br_netdev_ops = { 315static const struct net_device_ops br_netdev_ops = {
@@ -343,5 +349,5 @@ void br_dev_setup(struct net_device *dev)
343 349
344 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | 350 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
345 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | 351 NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
346 NETIF_F_NETNS_LOCAL | NETIF_F_GSO; 352 NETIF_F_NETNS_LOCAL | NETIF_F_GSO | NETIF_F_HW_VLAN_TX;
347} 353}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index c03d2c3ff03e..89ad25a76202 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -61,30 +61,27 @@ static int port_cost(struct net_device *dev)
61} 61}
62 62
63 63
64/* 64/* Check for port carrier transistions. */
65 * Check for port carrier transistions.
66 * Called from work queue to allow for calling functions that
67 * might sleep (such as speed check), and to debounce.
68 */
69void br_port_carrier_check(struct net_bridge_port *p) 65void br_port_carrier_check(struct net_bridge_port *p)
70{ 66{
71 struct net_device *dev = p->dev; 67 struct net_device *dev = p->dev;
72 struct net_bridge *br = p->br; 68 struct net_bridge *br = p->br;
73 69
74 if (netif_carrier_ok(dev)) 70 if (netif_running(dev) && netif_carrier_ok(dev))
75 p->path_cost = port_cost(dev); 71 p->path_cost = port_cost(dev);
76 72
77 if (netif_running(br->dev)) { 73 if (!netif_running(br->dev))
78 spin_lock_bh(&br->lock); 74 return;
79 if (netif_carrier_ok(dev)) { 75
80 if (p->state == BR_STATE_DISABLED) 76 spin_lock_bh(&br->lock);
81 br_stp_enable_port(p); 77 if (netif_running(dev) && netif_carrier_ok(dev)) {
82 } else { 78 if (p->state == BR_STATE_DISABLED)
83 if (p->state != BR_STATE_DISABLED) 79 br_stp_enable_port(p);
84 br_stp_disable_port(p); 80 } else {
85 } 81 if (p->state != BR_STATE_DISABLED)
86 spin_unlock_bh(&br->lock); 82 br_stp_disable_port(p);
87 } 83 }
84 spin_unlock_bh(&br->lock);
88} 85}
89 86
90static void release_nbp(struct kobject *kobj) 87static void release_nbp(struct kobject *kobj)
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 826cd5221536..25207a1f182b 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -141,7 +141,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
141 const unsigned char *dest = eth_hdr(skb)->h_dest; 141 const unsigned char *dest = eth_hdr(skb)->h_dest;
142 int (*rhook)(struct sk_buff *skb); 142 int (*rhook)(struct sk_buff *skb);
143 143
144 if (skb->pkt_type == PACKET_LOOPBACK) 144 if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
145 return skb; 145 return skb;
146 146
147 if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) 147 if (!is_valid_ether_addr(eth_hdr(skb)->h_source))
@@ -159,7 +159,7 @@ struct sk_buff *br_handle_frame(struct sk_buff *skb)
159 goto drop; 159 goto drop;
160 160
161 /* If STP is turned off, then forward */ 161 /* If STP is turned off, then forward */
162 if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0) 162 if (p->br->stp_enabled == BR_NO_STP)
163 goto forward; 163 goto forward;
164 164
165 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, 165 if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 137f23259a93..865fd7634b67 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -64,22 +64,24 @@ static int brnf_filter_pppoe_tagged __read_mostly = 0;
64 64
65static inline __be16 vlan_proto(const struct sk_buff *skb) 65static inline __be16 vlan_proto(const struct sk_buff *skb)
66{ 66{
67 return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto; 67 if (vlan_tx_tag_present(skb))
68 return skb->protocol;
69 else if (skb->protocol == htons(ETH_P_8021Q))
70 return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
71 else
72 return 0;
68} 73}
69 74
70#define IS_VLAN_IP(skb) \ 75#define IS_VLAN_IP(skb) \
71 (skb->protocol == htons(ETH_P_8021Q) && \ 76 (vlan_proto(skb) == htons(ETH_P_IP) && \
72 vlan_proto(skb) == htons(ETH_P_IP) && \
73 brnf_filter_vlan_tagged) 77 brnf_filter_vlan_tagged)
74 78
75#define IS_VLAN_IPV6(skb) \ 79#define IS_VLAN_IPV6(skb) \
76 (skb->protocol == htons(ETH_P_8021Q) && \ 80 (vlan_proto(skb) == htons(ETH_P_IPV6) && \
77 vlan_proto(skb) == htons(ETH_P_IPV6) &&\
78 brnf_filter_vlan_tagged) 81 brnf_filter_vlan_tagged)
79 82
80#define IS_VLAN_ARP(skb) \ 83#define IS_VLAN_ARP(skb) \
81 (skb->protocol == htons(ETH_P_8021Q) && \ 84 (vlan_proto(skb) == htons(ETH_P_ARP) && \
82 vlan_proto(skb) == htons(ETH_P_ARP) && \
83 brnf_filter_vlan_tagged) 85 brnf_filter_vlan_tagged)
84 86
85static inline __be16 pppoe_proto(const struct sk_buff *skb) 87static inline __be16 pppoe_proto(const struct sk_buff *skb)
@@ -106,7 +108,6 @@ static struct dst_ops fake_dst_ops = {
106 .family = AF_INET, 108 .family = AF_INET,
107 .protocol = cpu_to_be16(ETH_P_IP), 109 .protocol = cpu_to_be16(ETH_P_IP),
108 .update_pmtu = fake_update_pmtu, 110 .update_pmtu = fake_update_pmtu,
109 .entries = ATOMIC_INIT(0),
110}; 111};
111 112
112/* 113/*
@@ -209,6 +210,72 @@ static inline void nf_bridge_update_protocol(struct sk_buff *skb)
209 skb->protocol = htons(ETH_P_PPP_SES); 210 skb->protocol = htons(ETH_P_PPP_SES);
210} 211}
211 212
213/* When handing a packet over to the IP layer
214 * check whether we have a skb that is in the
215 * expected format
216 */
217
218static int br_parse_ip_options(struct sk_buff *skb)
219{
220 struct ip_options *opt;
221 struct iphdr *iph;
222 struct net_device *dev = skb->dev;
223 u32 len;
224
225 iph = ip_hdr(skb);
226 opt = &(IPCB(skb)->opt);
227
228 /* Basic sanity checks */
229 if (iph->ihl < 5 || iph->version != 4)
230 goto inhdr_error;
231
232 if (!pskb_may_pull(skb, iph->ihl*4))
233 goto inhdr_error;
234
235 iph = ip_hdr(skb);
236 if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
237 goto inhdr_error;
238
239 len = ntohs(iph->tot_len);
240 if (skb->len < len) {
241 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
242 goto drop;
243 } else if (len < (iph->ihl*4))
244 goto inhdr_error;
245
246 if (pskb_trim_rcsum(skb, len)) {
247 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
248 goto drop;
249 }
250
251 /* Zero out the CB buffer if no options present */
252 if (iph->ihl == 5) {
253 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
254 return 0;
255 }
256
257 opt->optlen = iph->ihl*4 - sizeof(struct iphdr);
258 if (ip_options_compile(dev_net(dev), opt, skb))
259 goto inhdr_error;
260
261 /* Check correct handling of SRR option */
262 if (unlikely(opt->srr)) {
263 struct in_device *in_dev = __in_dev_get_rcu(dev);
264 if (in_dev && !IN_DEV_SOURCE_ROUTE(in_dev))
265 goto drop;
266
267 if (ip_options_rcv_srr(skb))
268 goto drop;
269 }
270
271 return 0;
272
273inhdr_error:
274 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
275drop:
276 return -1;
277}
278
212/* Fill in the header for fragmented IP packets handled by 279/* Fill in the header for fragmented IP packets handled by
213 * the IPv4 connection tracking code. 280 * the IPv4 connection tracking code.
214 */ 281 */
@@ -549,7 +616,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
549{ 616{
550 struct net_bridge_port *p; 617 struct net_bridge_port *p;
551 struct net_bridge *br; 618 struct net_bridge *br;
552 struct iphdr *iph;
553 __u32 len = nf_bridge_encap_header_len(skb); 619 __u32 len = nf_bridge_encap_header_len(skb);
554 620
555 if (unlikely(!pskb_may_pull(skb, len))) 621 if (unlikely(!pskb_may_pull(skb, len)))
@@ -578,28 +644,9 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
578 644
579 nf_bridge_pull_encap_header_rcsum(skb); 645 nf_bridge_pull_encap_header_rcsum(skb);
580 646
581 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 647 if (br_parse_ip_options(skb))
582 goto inhdr_error; 648 /* Drop invalid packet */
583 649 goto out;
584 iph = ip_hdr(skb);
585 if (iph->ihl < 5 || iph->version != 4)
586 goto inhdr_error;
587
588 if (!pskb_may_pull(skb, 4 * iph->ihl))
589 goto inhdr_error;
590
591 iph = ip_hdr(skb);
592 if (ip_fast_csum((__u8 *) iph, iph->ihl) != 0)
593 goto inhdr_error;
594
595 len = ntohs(iph->tot_len);
596 if (skb->len < len || len < 4 * iph->ihl)
597 goto inhdr_error;
598
599 pskb_trim_rcsum(skb, len);
600
601 /* BUG: Should really parse the IP options here. */
602 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
603 650
604 nf_bridge_put(skb->nf_bridge); 651 nf_bridge_put(skb->nf_bridge);
605 if (!nf_bridge_alloc(skb)) 652 if (!nf_bridge_alloc(skb))
@@ -614,8 +661,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
614 661
615 return NF_STOLEN; 662 return NF_STOLEN;
616 663
617inhdr_error:
618// IP_INC_STATS_BH(IpInHdrErrors);
619out: 664out:
620 return NF_DROP; 665 return NF_DROP;
621} 666}
@@ -759,14 +804,19 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
759#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE) 804#if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
760static int br_nf_dev_queue_xmit(struct sk_buff *skb) 805static int br_nf_dev_queue_xmit(struct sk_buff *skb)
761{ 806{
807 int ret;
808
762 if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) && 809 if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
763 skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu && 810 skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
764 !skb_is_gso(skb)) { 811 !skb_is_gso(skb)) {
765 /* BUG: Should really parse the IP options here. */ 812 if (br_parse_ip_options(skb))
766 memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); 813 /* Drop invalid packet */
767 return ip_fragment(skb, br_dev_queue_push_xmit); 814 return NF_DROP;
815 ret = ip_fragment(skb, br_dev_queue_push_xmit);
768 } else 816 } else
769 return br_dev_queue_push_xmit(skb); 817 ret = br_dev_queue_push_xmit(skb);
818
819 return ret;
770} 820}
771#else 821#else
772static int br_nf_dev_queue_xmit(struct sk_buff *skb) 822static int br_nf_dev_queue_xmit(struct sk_buff *skb)
@@ -954,15 +1004,22 @@ int __init br_netfilter_init(void)
954{ 1004{
955 int ret; 1005 int ret;
956 1006
957 ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1007 ret = dst_entries_init(&fake_dst_ops);
958 if (ret < 0) 1008 if (ret < 0)
959 return ret; 1009 return ret;
1010
1011 ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
1012 if (ret < 0) {
1013 dst_entries_destroy(&fake_dst_ops);
1014 return ret;
1015 }
960#ifdef CONFIG_SYSCTL 1016#ifdef CONFIG_SYSCTL
961 brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table); 1017 brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table);
962 if (brnf_sysctl_header == NULL) { 1018 if (brnf_sysctl_header == NULL) {
963 printk(KERN_WARNING 1019 printk(KERN_WARNING
964 "br_netfilter: can't register to sysctl.\n"); 1020 "br_netfilter: can't register to sysctl.\n");
965 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); 1021 nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
1022 dst_entries_destroy(&fake_dst_ops);
966 return -ENOMEM; 1023 return -ENOMEM;
967 } 1024 }
968#endif 1025#endif
@@ -976,4 +1033,5 @@ void br_netfilter_fini(void)
976#ifdef CONFIG_SYSCTL 1033#ifdef CONFIG_SYSCTL
977 unregister_sysctl_table(brnf_sysctl_header); 1034 unregister_sysctl_table(brnf_sysctl_header);
978#endif 1035#endif
1036 dst_entries_destroy(&fake_dst_ops);
979} 1037}
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 87b53b3a921d..eae67bf0446c 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -39,8 +39,6 @@ static bool
39ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par) 39ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
40{ 40{
41 const struct ebt_vlan_info *info = par->matchinfo; 41 const struct ebt_vlan_info *info = par->matchinfo;
42 const struct vlan_hdr *fp;
43 struct vlan_hdr _frame;
44 42
45 unsigned short TCI; /* Whole TCI, given from parsed frame */ 43 unsigned short TCI; /* Whole TCI, given from parsed frame */
46 unsigned short id; /* VLAN ID, given from frame TCI */ 44 unsigned short id; /* VLAN ID, given from frame TCI */
@@ -48,9 +46,20 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
48 /* VLAN encapsulated Type/Length field, given from orig frame */ 46 /* VLAN encapsulated Type/Length field, given from orig frame */
49 __be16 encap; 47 __be16 encap;
50 48
51 fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame); 49 if (vlan_tx_tag_present(skb)) {
52 if (fp == NULL) 50 TCI = vlan_tx_tag_get(skb);
53 return false; 51 encap = skb->protocol;
52 } else {
53 const struct vlan_hdr *fp;
54 struct vlan_hdr _frame;
55
56 fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame);
57 if (fp == NULL)
58 return false;
59
60 TCI = ntohs(fp->h_vlan_TCI);
61 encap = fp->h_vlan_encapsulated_proto;
62 }
54 63
55 /* Tag Control Information (TCI) consists of the following elements: 64 /* Tag Control Information (TCI) consists of the following elements:
56 * - User_priority. The user_priority field is three bits in length, 65 * - User_priority. The user_priority field is three bits in length,
@@ -59,10 +68,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
59 * (CFI) is a single bit flag value. Currently ignored. 68 * (CFI) is a single bit flag value. Currently ignored.
60 * - VLAN Identifier (VID). The VID is encoded as 69 * - VLAN Identifier (VID). The VID is encoded as
61 * an unsigned binary number. */ 70 * an unsigned binary number. */
62 TCI = ntohs(fp->h_vlan_TCI);
63 id = TCI & VLAN_VID_MASK; 71 id = TCI & VLAN_VID_MASK;
64 prio = (TCI >> 13) & 0x7; 72 prio = (TCI >> 13) & 0x7;
65 encap = fp->h_vlan_encapsulated_proto;
66 73
67 /* Checking VLAN Identifier (VID) */ 74 /* Checking VLAN Identifier (VID) */
68 if (GET_BITMASK(EBT_VLAN_ID)) 75 if (GET_BITMASK(EBT_VLAN_ID))
@@ -111,10 +118,10 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
111 * 0 - The null VLAN ID. 118 * 0 - The null VLAN ID.
112 * 1 - The default Port VID (PVID) 119 * 1 - The default Port VID (PVID)
113 * 0x0FFF - Reserved for implementation use. 120 * 0x0FFF - Reserved for implementation use.
114 * if_vlan.h: VLAN_GROUP_ARRAY_LEN 4096. */ 121 * if_vlan.h: VLAN_N_VID 4096. */
115 if (GET_BITMASK(EBT_VLAN_ID)) { 122 if (GET_BITMASK(EBT_VLAN_ID)) {
116 if (!!info->id) { /* if id!=0 => check vid range */ 123 if (!!info->id) { /* if id!=0 => check vid range */
117 if (info->id > VLAN_GROUP_ARRAY_LEN) { 124 if (info->id > VLAN_N_VID) {
118 pr_debug("id %d is out of range (1-4096)\n", 125 pr_debug("id %d is out of range (1-4096)\n",
119 info->id); 126 info->id);
120 return -EINVAL; 127 return -EINVAL;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index bcc102e3be4d..a1dcf83f0d58 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -124,16 +124,23 @@ ebt_dev_check(const char *entry, const struct net_device *device)
124#define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg)) 124#define FWINV2(bool,invflg) ((bool) ^ !!(e->invflags & invflg))
125/* process standard matches */ 125/* process standard matches */
126static inline int 126static inline int
127ebt_basic_match(const struct ebt_entry *e, const struct ethhdr *h, 127ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
128 const struct net_device *in, const struct net_device *out) 128 const struct net_device *in, const struct net_device *out)
129{ 129{
130 const struct ethhdr *h = eth_hdr(skb);
131 __be16 ethproto;
130 int verdict, i; 132 int verdict, i;
131 133
134 if (vlan_tx_tag_present(skb))
135 ethproto = htons(ETH_P_8021Q);
136 else
137 ethproto = h->h_proto;
138
132 if (e->bitmask & EBT_802_3) { 139 if (e->bitmask & EBT_802_3) {
133 if (FWINV2(ntohs(h->h_proto) >= 1536, EBT_IPROTO)) 140 if (FWINV2(ntohs(ethproto) >= 1536, EBT_IPROTO))
134 return 1; 141 return 1;
135 } else if (!(e->bitmask & EBT_NOPROTO) && 142 } else if (!(e->bitmask & EBT_NOPROTO) &&
136 FWINV2(e->ethproto != h->h_proto, EBT_IPROTO)) 143 FWINV2(e->ethproto != ethproto, EBT_IPROTO))
137 return 1; 144 return 1;
138 145
139 if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN)) 146 if (FWINV2(ebt_dev_check(e->in, in), EBT_IIN))
@@ -213,7 +220,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
213 base = private->entries; 220 base = private->entries;
214 i = 0; 221 i = 0;
215 while (i < nentries) { 222 while (i < nentries) {
216 if (ebt_basic_match(point, eth_hdr(skb), in, out)) 223 if (ebt_basic_match(point, skb, in, out))
217 goto letscontinue; 224 goto letscontinue;
218 225
219 if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0) 226 if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 0b586e9d1378..b99369a055d1 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -9,6 +9,8 @@
9 * and Sakari Ailus <sakari.ailus@nokia.com> 9 * and Sakari Ailus <sakari.ailus@nokia.com>
10 */ 10 */
11 11
12#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
13
12#include <linux/version.h> 14#include <linux/version.h>
13#include <linux/module.h> 15#include <linux/module.h>
14#include <linux/kernel.h> 16#include <linux/kernel.h>
@@ -171,7 +173,7 @@ static int receive(struct sk_buff *skb, struct net_device *dev,
171 net = dev_net(dev); 173 net = dev_net(dev);
172 pkt = cfpkt_fromnative(CAIF_DIR_IN, skb); 174 pkt = cfpkt_fromnative(CAIF_DIR_IN, skb);
173 caifd = caif_get(dev); 175 caifd = caif_get(dev);
174 if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd) 176 if (!caifd || !caifd->layer.up || !caifd->layer.up->receive)
175 return NET_RX_DROP; 177 return NET_RX_DROP;
176 178
177 if (caifd->layer.up->receive(caifd->layer.up, pkt)) 179 if (caifd->layer.up->receive(caifd->layer.up, pkt))
@@ -214,7 +216,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
214 216
215 switch (what) { 217 switch (what) {
216 case NETDEV_REGISTER: 218 case NETDEV_REGISTER:
217 pr_info("CAIF: %s():register %s\n", __func__, dev->name); 219 netdev_info(dev, "register\n");
218 caifd = caif_device_alloc(dev); 220 caifd = caif_device_alloc(dev);
219 if (caifd == NULL) 221 if (caifd == NULL)
220 break; 222 break;
@@ -225,14 +227,13 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
225 break; 227 break;
226 228
227 case NETDEV_UP: 229 case NETDEV_UP:
228 pr_info("CAIF: %s(): up %s\n", __func__, dev->name); 230 netdev_info(dev, "up\n");
229 caifd = caif_get(dev); 231 caifd = caif_get(dev);
230 if (caifd == NULL) 232 if (caifd == NULL)
231 break; 233 break;
232 caifdev = netdev_priv(dev); 234 caifdev = netdev_priv(dev);
233 if (atomic_read(&caifd->state) == NETDEV_UP) { 235 if (atomic_read(&caifd->state) == NETDEV_UP) {
234 pr_info("CAIF: %s():%s already up\n", 236 netdev_info(dev, "already up\n");
235 __func__, dev->name);
236 break; 237 break;
237 } 238 }
238 atomic_set(&caifd->state, what); 239 atomic_set(&caifd->state, what);
@@ -273,7 +274,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
273 caifd = caif_get(dev); 274 caifd = caif_get(dev);
274 if (caifd == NULL) 275 if (caifd == NULL)
275 break; 276 break;
276 pr_info("CAIF: %s():going down %s\n", __func__, dev->name); 277 netdev_info(dev, "going down\n");
277 278
278 if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN || 279 if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN ||
279 atomic_read(&caifd->state) == NETDEV_DOWN) 280 atomic_read(&caifd->state) == NETDEV_DOWN)
@@ -295,11 +296,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
295 caifd = caif_get(dev); 296 caifd = caif_get(dev);
296 if (caifd == NULL) 297 if (caifd == NULL)
297 break; 298 break;
298 pr_info("CAIF: %s(): down %s\n", __func__, dev->name); 299 netdev_info(dev, "down\n");
299 if (atomic_read(&caifd->in_use)) 300 if (atomic_read(&caifd->in_use))
300 pr_warning("CAIF: %s(): " 301 netdev_warn(dev,
301 "Unregistering an active CAIF device: %s\n", 302 "Unregistering an active CAIF device\n");
302 __func__, dev->name);
303 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer); 303 cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
304 dev_put(dev); 304 dev_put(dev);
305 atomic_set(&caifd->state, what); 305 atomic_set(&caifd->state, what);
@@ -307,7 +307,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
307 307
308 case NETDEV_UNREGISTER: 308 case NETDEV_UNREGISTER:
309 caifd = caif_get(dev); 309 caifd = caif_get(dev);
310 pr_info("CAIF: %s(): unregister %s\n", __func__, dev->name); 310 netdev_info(dev, "unregister\n");
311 atomic_set(&caifd->state, what); 311 atomic_set(&caifd->state, what);
312 caif_device_destroy(dev); 312 caif_device_destroy(dev);
313 break; 313 break;
@@ -391,7 +391,7 @@ static int __init caif_device_init(void)
391 int result; 391 int result;
392 cfg = cfcnfg_create(); 392 cfg = cfcnfg_create();
393 if (!cfg) { 393 if (!cfg) {
394 pr_warning("CAIF: %s(): can't create cfcnfg.\n", __func__); 394 pr_warn("can't create cfcnfg\n");
395 goto err_cfcnfg_create_failed; 395 goto err_cfcnfg_create_failed;
396 } 396 }
397 result = register_pernet_device(&caif_net_ops); 397 result = register_pernet_device(&caif_net_ops);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 4bf28f25f368..2eca2dd0000f 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/fs.h> 9#include <linux/fs.h>
8#include <linux/init.h> 10#include <linux/init.h>
9#include <linux/module.h> 11#include <linux/module.h>
@@ -15,7 +17,6 @@
15#include <linux/poll.h> 17#include <linux/poll.h>
16#include <linux/tcp.h> 18#include <linux/tcp.h>
17#include <linux/uaccess.h> 19#include <linux/uaccess.h>
18#include <linux/mutex.h>
19#include <linux/debugfs.h> 20#include <linux/debugfs.h>
20#include <linux/caif/caif_socket.h> 21#include <linux/caif/caif_socket.h>
21#include <asm/atomic.h> 22#include <asm/atomic.h>
@@ -28,9 +29,6 @@
28MODULE_LICENSE("GPL"); 29MODULE_LICENSE("GPL");
29MODULE_ALIAS_NETPROTO(AF_CAIF); 30MODULE_ALIAS_NETPROTO(AF_CAIF);
30 31
31#define CAIF_DEF_SNDBUF (4096*10)
32#define CAIF_DEF_RCVBUF (4096*100)
33
34/* 32/*
35 * CAIF state is re-using the TCP socket states. 33 * CAIF state is re-using the TCP socket states.
36 * caif_states stored in sk_state reflect the state as reported by 34 * caif_states stored in sk_state reflect the state as reported by
@@ -157,9 +155,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
157 155
158 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= 156 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
159 (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) { 157 (unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
160 trace_printk("CAIF: %s():" 158 pr_debug("sending flow OFF (queue len = %d %d)\n",
161 " sending flow OFF (queue len = %d %d)\n",
162 __func__,
163 atomic_read(&cf_sk->sk.sk_rmem_alloc), 159 atomic_read(&cf_sk->sk.sk_rmem_alloc),
164 sk_rcvbuf_lowwater(cf_sk)); 160 sk_rcvbuf_lowwater(cf_sk));
165 set_rx_flow_off(cf_sk); 161 set_rx_flow_off(cf_sk);
@@ -172,9 +168,7 @@ static int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
172 return err; 168 return err;
173 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) { 169 if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) {
174 set_rx_flow_off(cf_sk); 170 set_rx_flow_off(cf_sk);
175 trace_printk("CAIF: %s():" 171 pr_debug("sending flow OFF due to rmem_schedule\n");
176 " sending flow OFF due to rmem_schedule\n",
177 __func__);
178 dbfs_atomic_inc(&cnt.num_rx_flow_off); 172 dbfs_atomic_inc(&cnt.num_rx_flow_off);
179 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ); 173 caif_flow_ctrl(sk, CAIF_MODEMCMD_FLOW_OFF_REQ);
180 } 174 }
@@ -275,8 +269,7 @@ static void caif_ctrl_cb(struct cflayer *layr,
275 break; 269 break;
276 270
277 default: 271 default:
278 pr_debug("CAIF: %s(): Unexpected flow command %d\n", 272 pr_debug("Unexpected flow command %d\n", flow);
279 __func__, flow);
280 } 273 }
281} 274}
282 275
@@ -536,8 +529,7 @@ static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
536 529
537 /* Slight paranoia, probably not needed. */ 530 /* Slight paranoia, probably not needed. */
538 if (unlikely(loopcnt++ > 1000)) { 531 if (unlikely(loopcnt++ > 1000)) {
539 pr_warning("CAIF: %s(): transmit retries failed," 532 pr_warn("transmit retries failed, error = %d\n", ret);
540 " error = %d\n", __func__, ret);
541 break; 533 break;
542 } 534 }
543 535
@@ -912,8 +904,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
912 cf_sk->tailroom = tailroom; 904 cf_sk->tailroom = tailroom;
913 cf_sk->maxframe = mtu - (headroom + tailroom); 905 cf_sk->maxframe = mtu - (headroom + tailroom);
914 if (cf_sk->maxframe < 1) { 906 if (cf_sk->maxframe < 1) {
915 pr_warning("CAIF: %s(): CAIF Interface MTU too small (%u)\n", 907 pr_warn("CAIF Interface MTU too small (%d)\n", dev->mtu);
916 __func__, mtu);
917 goto out; 908 goto out;
918 } 909 }
919 910
@@ -1132,10 +1123,6 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
1132 /* Store the protocol */ 1123 /* Store the protocol */
1133 sk->sk_protocol = (unsigned char) protocol; 1124 sk->sk_protocol = (unsigned char) protocol;
1134 1125
1135 /* Sendbuf dictates the amount of outbound packets not yet sent */
1136 sk->sk_sndbuf = CAIF_DEF_SNDBUF;
1137 sk->sk_rcvbuf = CAIF_DEF_RCVBUF;
1138
1139 /* 1126 /*
1140 * Lock in order to try to stop someone from opening the socket 1127 * Lock in order to try to stop someone from opening the socket
1141 * too early. 1128 * too early.
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 1c29189b344d..41adafd18914 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -3,6 +3,9 @@
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
6#include <linux/kernel.h> 9#include <linux/kernel.h>
7#include <linux/stddef.h> 10#include <linux/stddef.h>
8#include <linux/slab.h> 11#include <linux/slab.h>
@@ -78,7 +81,7 @@ struct cfcnfg *cfcnfg_create(void)
78 /* Initiate this layer */ 81 /* Initiate this layer */
79 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC); 82 this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
80 if (!this) { 83 if (!this) {
81 pr_warning("CAIF: %s(): Out of memory\n", __func__); 84 pr_warn("Out of memory\n");
82 return NULL; 85 return NULL;
83 } 86 }
84 this->mux = cfmuxl_create(); 87 this->mux = cfmuxl_create();
@@ -106,7 +109,7 @@ struct cfcnfg *cfcnfg_create(void)
106 layer_set_up(this->ctrl, this); 109 layer_set_up(this->ctrl, this);
107 return this; 110 return this;
108out_of_mem: 111out_of_mem:
109 pr_warning("CAIF: %s(): Out of memory\n", __func__); 112 pr_warn("Out of memory\n");
110 kfree(this->mux); 113 kfree(this->mux);
111 kfree(this->ctrl); 114 kfree(this->ctrl);
112 kfree(this); 115 kfree(this);
@@ -194,7 +197,7 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
194 caif_assert(adap_layer != NULL); 197 caif_assert(adap_layer != NULL);
195 channel_id = adap_layer->id; 198 channel_id = adap_layer->id;
196 if (adap_layer->dn == NULL || channel_id == 0) { 199 if (adap_layer->dn == NULL || channel_id == 0) {
197 pr_err("CAIF: %s():adap_layer->id is 0\n", __func__); 200 pr_err("adap_layer->dn == NULL or adap_layer->id is 0\n");
198 ret = -ENOTCONN; 201 ret = -ENOTCONN;
199 goto end; 202 goto end;
200 } 203 }
@@ -204,9 +207,8 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
204 layer_set_up(servl, NULL); 207 layer_set_up(servl, NULL);
205 ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer); 208 ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
206 if (servl == NULL) { 209 if (servl == NULL) {
207 pr_err("CAIF: %s(): PROTOCOL ERROR " 210 pr_err("PROTOCOL ERROR - Error removing service_layer Channel_Id(%d)",
208 "- Error removing service_layer Channel_Id(%d)", 211 channel_id);
209 __func__, channel_id);
210 ret = -EINVAL; 212 ret = -EINVAL;
211 goto end; 213 goto end;
212 } 214 }
@@ -216,18 +218,14 @@ int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
216 218
217 phyinfo = cfcnfg_get_phyinfo(cnfg, phyid); 219 phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
218 if (phyinfo == NULL) { 220 if (phyinfo == NULL) {
219 pr_warning("CAIF: %s(): " 221 pr_warn("No interface to send disconnect to\n");
220 "No interface to send disconnect to\n",
221 __func__);
222 ret = -ENODEV; 222 ret = -ENODEV;
223 goto end; 223 goto end;
224 } 224 }
225 if (phyinfo->id != phyid || 225 if (phyinfo->id != phyid ||
226 phyinfo->phy_layer->id != phyid || 226 phyinfo->phy_layer->id != phyid ||
227 phyinfo->frm_layer->id != phyid) { 227 phyinfo->frm_layer->id != phyid) {
228 pr_err("CAIF: %s(): " 228 pr_err("Inconsistency in phy registration\n");
229 "Inconsistency in phy registration\n",
230 __func__);
231 ret = -EINVAL; 229 ret = -EINVAL;
232 goto end; 230 goto end;
233 } 231 }
@@ -276,21 +274,20 @@ int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
276{ 274{
277 struct cflayer *frml; 275 struct cflayer *frml;
278 if (adap_layer == NULL) { 276 if (adap_layer == NULL) {
279 pr_err("CAIF: %s(): adap_layer is zero", __func__); 277 pr_err("adap_layer is zero\n");
280 return -EINVAL; 278 return -EINVAL;
281 } 279 }
282 if (adap_layer->receive == NULL) { 280 if (adap_layer->receive == NULL) {
283 pr_err("CAIF: %s(): adap_layer->receive is NULL", __func__); 281 pr_err("adap_layer->receive is NULL\n");
284 return -EINVAL; 282 return -EINVAL;
285 } 283 }
286 if (adap_layer->ctrlcmd == NULL) { 284 if (adap_layer->ctrlcmd == NULL) {
287 pr_err("CAIF: %s(): adap_layer->ctrlcmd == NULL", __func__); 285 pr_err("adap_layer->ctrlcmd == NULL\n");
288 return -EINVAL; 286 return -EINVAL;
289 } 287 }
290 frml = cnfg->phy_layers[param->phyid].frm_layer; 288 frml = cnfg->phy_layers[param->phyid].frm_layer;
291 if (frml == NULL) { 289 if (frml == NULL) {
292 pr_err("CAIF: %s(): Specified PHY type does not exist!", 290 pr_err("Specified PHY type does not exist!\n");
293 __func__);
294 return -ENODEV; 291 return -ENODEV;
295 } 292 }
296 caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id); 293 caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id);
@@ -330,9 +327,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
330 struct net_device *netdev; 327 struct net_device *netdev;
331 328
332 if (adapt_layer == NULL) { 329 if (adapt_layer == NULL) {
333 pr_debug("CAIF: %s(): link setup response " 330 pr_debug("link setup response but no client exist, send linkdown back\n");
334 "but no client exist, send linkdown back\n",
335 __func__);
336 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL); 331 cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
337 return; 332 return;
338 } 333 }
@@ -374,13 +369,11 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
374 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info); 369 servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
375 break; 370 break;
376 default: 371 default:
377 pr_err("CAIF: %s(): Protocol error. " 372 pr_err("Protocol error. Link setup response - unknown channel type\n");
378 "Link setup response - unknown channel type\n",
379 __func__);
380 return; 373 return;
381 } 374 }
382 if (!servicel) { 375 if (!servicel) {
383 pr_warning("CAIF: %s(): Out of memory\n", __func__); 376 pr_warn("Out of memory\n");
384 return; 377 return;
385 } 378 }
386 layer_set_dn(servicel, cnfg->mux); 379 layer_set_dn(servicel, cnfg->mux);
@@ -418,7 +411,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
418 } 411 }
419 } 412 }
420 if (*phyid == 0) { 413 if (*phyid == 0) {
421 pr_err("CAIF: %s(): No Available PHY ID\n", __func__); 414 pr_err("No Available PHY ID\n");
422 return; 415 return;
423 } 416 }
424 417
@@ -427,7 +420,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
427 phy_driver = 420 phy_driver =
428 cfserl_create(CFPHYTYPE_FRAG, *phyid, stx); 421 cfserl_create(CFPHYTYPE_FRAG, *phyid, stx);
429 if (!phy_driver) { 422 if (!phy_driver) {
430 pr_warning("CAIF: %s(): Out of memory\n", __func__); 423 pr_warn("Out of memory\n");
431 return; 424 return;
432 } 425 }
433 426
@@ -436,7 +429,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
436 phy_driver = NULL; 429 phy_driver = NULL;
437 break; 430 break;
438 default: 431 default:
439 pr_err("CAIF: %s(): %d", __func__, phy_type); 432 pr_err("%d\n", phy_type);
440 return; 433 return;
441 break; 434 break;
442 } 435 }
@@ -455,7 +448,7 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
455 phy_layer->type = phy_type; 448 phy_layer->type = phy_type;
456 frml = cffrml_create(*phyid, fcs); 449 frml = cffrml_create(*phyid, fcs);
457 if (!frml) { 450 if (!frml) {
458 pr_warning("CAIF: %s(): Out of memory\n", __func__); 451 pr_warn("Out of memory\n");
459 return; 452 return;
460 } 453 }
461 cnfg->phy_layers[*phyid].frm_layer = frml; 454 cnfg->phy_layers[*phyid].frm_layer = frml;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 563145fdc4c3..08f267a109aa 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -36,7 +38,7 @@ struct cflayer *cfctrl_create(void)
36 struct cfctrl *this = 38 struct cfctrl *this =
37 kmalloc(sizeof(struct cfctrl), GFP_ATOMIC); 39 kmalloc(sizeof(struct cfctrl), GFP_ATOMIC);
38 if (!this) { 40 if (!this) {
39 pr_warning("CAIF: %s(): Out of memory\n", __func__); 41 pr_warn("Out of memory\n");
40 return NULL; 42 return NULL;
41 } 43 }
42 caif_assert(offsetof(struct cfctrl, serv.layer) == 0); 44 caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
@@ -132,9 +134,7 @@ struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
132 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 134 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
133 if (cfctrl_req_eq(req, p)) { 135 if (cfctrl_req_eq(req, p)) {
134 if (p != first) 136 if (p != first)
135 pr_warning("CAIF: %s(): Requests are not " 137 pr_warn("Requests are not received in order\n");
136 "received in order\n",
137 __func__);
138 138
139 atomic_set(&ctrl->rsp_seq_no, 139 atomic_set(&ctrl->rsp_seq_no,
140 p->sequence_no); 140 p->sequence_no);
@@ -177,7 +177,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
177 int ret; 177 int ret;
178 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 178 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
179 if (!pkt) { 179 if (!pkt) {
180 pr_warning("CAIF: %s(): Out of memory\n", __func__); 180 pr_warn("Out of memory\n");
181 return; 181 return;
182 } 182 }
183 caif_assert(offsetof(struct cfctrl, serv.layer) == 0); 183 caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
@@ -189,8 +189,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
189 ret = 189 ret =
190 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 190 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
191 if (ret < 0) { 191 if (ret < 0) {
192 pr_err("CAIF: %s(): Could not transmit enum message\n", 192 pr_err("Could not transmit enum message\n");
193 __func__);
194 cfpkt_destroy(pkt); 193 cfpkt_destroy(pkt);
195 } 194 }
196} 195}
@@ -208,7 +207,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
208 char utility_name[16]; 207 char utility_name[16];
209 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 208 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
210 if (!pkt) { 209 if (!pkt) {
211 pr_warning("CAIF: %s(): Out of memory\n", __func__); 210 pr_warn("Out of memory\n");
212 return -ENOMEM; 211 return -ENOMEM;
213 } 212 }
214 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP); 213 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
@@ -253,13 +252,13 @@ int cfctrl_linkup_request(struct cflayer *layer,
253 param->u.utility.paramlen); 252 param->u.utility.paramlen);
254 break; 253 break;
255 default: 254 default:
256 pr_warning("CAIF: %s():Request setup of bad link type = %d\n", 255 pr_warn("Request setup of bad link type = %d\n",
257 __func__, param->linktype); 256 param->linktype);
258 return -EINVAL; 257 return -EINVAL;
259 } 258 }
260 req = kzalloc(sizeof(*req), GFP_KERNEL); 259 req = kzalloc(sizeof(*req), GFP_KERNEL);
261 if (!req) { 260 if (!req) {
262 pr_warning("CAIF: %s(): Out of memory\n", __func__); 261 pr_warn("Out of memory\n");
263 return -ENOMEM; 262 return -ENOMEM;
264 } 263 }
265 req->client_layer = user_layer; 264 req->client_layer = user_layer;
@@ -276,8 +275,7 @@ int cfctrl_linkup_request(struct cflayer *layer,
276 ret = 275 ret =
277 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 276 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
278 if (ret < 0) { 277 if (ret < 0) {
279 pr_err("CAIF: %s(): Could not transmit linksetup request\n", 278 pr_err("Could not transmit linksetup request\n");
280 __func__);
281 cfpkt_destroy(pkt); 279 cfpkt_destroy(pkt);
282 return -ENODEV; 280 return -ENODEV;
283 } 281 }
@@ -291,7 +289,7 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
291 struct cfctrl *cfctrl = container_obj(layer); 289 struct cfctrl *cfctrl = container_obj(layer);
292 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 290 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
293 if (!pkt) { 291 if (!pkt) {
294 pr_warning("CAIF: %s(): Out of memory\n", __func__); 292 pr_warn("Out of memory\n");
295 return -ENOMEM; 293 return -ENOMEM;
296 } 294 }
297 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY); 295 cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY);
@@ -300,8 +298,7 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
300 ret = 298 ret =
301 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt); 299 cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
302 if (ret < 0) { 300 if (ret < 0) {
303 pr_err("CAIF: %s(): Could not transmit link-down request\n", 301 pr_err("Could not transmit link-down request\n");
304 __func__);
305 cfpkt_destroy(pkt); 302 cfpkt_destroy(pkt);
306 } 303 }
307 return ret; 304 return ret;
@@ -313,7 +310,7 @@ void cfctrl_sleep_req(struct cflayer *layer)
313 struct cfctrl *cfctrl = container_obj(layer); 310 struct cfctrl *cfctrl = container_obj(layer);
314 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 311 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
315 if (!pkt) { 312 if (!pkt) {
316 pr_warning("CAIF: %s(): Out of memory\n", __func__); 313 pr_warn("Out of memory\n");
317 return; 314 return;
318 } 315 }
319 cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP); 316 cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP);
@@ -330,7 +327,7 @@ void cfctrl_wake_req(struct cflayer *layer)
330 struct cfctrl *cfctrl = container_obj(layer); 327 struct cfctrl *cfctrl = container_obj(layer);
331 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 328 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
332 if (!pkt) { 329 if (!pkt) {
333 pr_warning("CAIF: %s(): Out of memory\n", __func__); 330 pr_warn("Out of memory\n");
334 return; 331 return;
335 } 332 }
336 cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE); 333 cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE);
@@ -347,7 +344,7 @@ void cfctrl_getstartreason_req(struct cflayer *layer)
347 struct cfctrl *cfctrl = container_obj(layer); 344 struct cfctrl *cfctrl = container_obj(layer);
348 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN); 345 struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
349 if (!pkt) { 346 if (!pkt) {
350 pr_warning("CAIF: %s(): Out of memory\n", __func__); 347 pr_warn("Out of memory\n");
351 return; 348 return;
352 } 349 }
353 cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON); 350 cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON);
@@ -364,12 +361,11 @@ void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
364 struct cfctrl_request_info *p, *tmp; 361 struct cfctrl_request_info *p, *tmp;
365 struct cfctrl *ctrl = container_obj(layr); 362 struct cfctrl *ctrl = container_obj(layr);
366 spin_lock(&ctrl->info_list_lock); 363 spin_lock(&ctrl->info_list_lock);
367 pr_warning("CAIF: %s(): enter\n", __func__); 364 pr_warn("enter\n");
368 365
369 list_for_each_entry_safe(p, tmp, &ctrl->list, list) { 366 list_for_each_entry_safe(p, tmp, &ctrl->list, list) {
370 if (p->client_layer == adap_layer) { 367 if (p->client_layer == adap_layer) {
371 pr_warning("CAIF: %s(): cancel req :%d\n", __func__, 368 pr_warn("cancel req :%d\n", p->sequence_no);
372 p->sequence_no);
373 list_del(&p->list); 369 list_del(&p->list);
374 kfree(p); 370 kfree(p);
375 } 371 }
@@ -520,9 +516,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
520 cfpkt_extr_head(pkt, &param, len); 516 cfpkt_extr_head(pkt, &param, len);
521 break; 517 break;
522 default: 518 default:
523 pr_warning("CAIF: %s(): Request setup " 519 pr_warn("Request setup - invalid link type (%d)\n",
524 "- invalid link type (%d)", 520 serv);
525 __func__, serv);
526 goto error; 521 goto error;
527 } 522 }
528 523
@@ -532,9 +527,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
532 527
533 if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) || 528 if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
534 cfpkt_erroneous(pkt)) { 529 cfpkt_erroneous(pkt)) {
535 pr_err("CAIF: %s(): Invalid O/E bit or parse " 530 pr_err("Invalid O/E bit or parse error on CAIF control channel\n");
536 "error on CAIF control channel",
537 __func__);
538 cfctrl->res.reject_rsp(cfctrl->serv.layer.up, 531 cfctrl->res.reject_rsp(cfctrl->serv.layer.up,
539 0, 532 0,
540 req ? req->client_layer 533 req ? req->client_layer
@@ -556,8 +549,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
556 cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid); 549 cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid);
557 break; 550 break;
558 case CFCTRL_CMD_LINK_ERR: 551 case CFCTRL_CMD_LINK_ERR:
559 pr_err("CAIF: %s(): Frame Error Indication received\n", 552 pr_err("Frame Error Indication received\n");
560 __func__);
561 cfctrl->res.linkerror_ind(); 553 cfctrl->res.linkerror_ind();
562 break; 554 break;
563 case CFCTRL_CMD_ENUM: 555 case CFCTRL_CMD_ENUM:
@@ -576,7 +568,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
576 cfctrl->res.radioset_rsp(); 568 cfctrl->res.radioset_rsp();
577 break; 569 break;
578 default: 570 default:
579 pr_err("CAIF: %s(): Unrecognized Control Frame\n", __func__); 571 pr_err("Unrecognized Control Frame\n");
580 goto error; 572 goto error;
581 break; 573 break;
582 } 574 }
@@ -595,8 +587,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
595 case CAIF_CTRLCMD_FLOW_OFF_IND: 587 case CAIF_CTRLCMD_FLOW_OFF_IND:
596 spin_lock(&this->info_list_lock); 588 spin_lock(&this->info_list_lock);
597 if (!list_empty(&this->list)) { 589 if (!list_empty(&this->list)) {
598 pr_debug("CAIF: %s(): Received flow off in " 590 pr_debug("Received flow off in control layer\n");
599 "control layer", __func__);
600 } 591 }
601 spin_unlock(&this->info_list_lock); 592 spin_unlock(&this->info_list_lock);
602 break; 593 break;
@@ -620,7 +611,7 @@ static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
620 if (!ctrl->loop_linkused[linkid]) 611 if (!ctrl->loop_linkused[linkid])
621 goto found; 612 goto found;
622 spin_unlock(&ctrl->loop_linkid_lock); 613 spin_unlock(&ctrl->loop_linkid_lock);
623 pr_err("CAIF: %s(): Out of link-ids\n", __func__); 614 pr_err("Out of link-ids\n");
624 return -EINVAL; 615 return -EINVAL;
625found: 616found:
626 if (!ctrl->loop_linkused[linkid]) 617 if (!ctrl->loop_linkused[linkid])
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
index 676648cac8dd..496fda9ac66f 100644
--- a/net/caif/cfdbgl.c
+++ b/net/caif/cfdbgl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/slab.h> 10#include <linux/slab.h>
9#include <net/caif/caif_layer.h> 11#include <net/caif/caif_layer.h>
@@ -17,7 +19,7 @@ struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
17{ 19{
18 struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 20 struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
19 if (!dbg) { 21 if (!dbg) {
20 pr_warning("CAIF: %s(): Out of memory\n", __func__); 22 pr_warn("Out of memory\n");
21 return NULL; 23 return NULL;
22 } 24 }
23 caif_assert(offsetof(struct cfsrvl, layer) == 0); 25 caif_assert(offsetof(struct cfsrvl, layer) == 0);
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
index ed9d53aff280..d3ed264ad6c4 100644
--- a/net/caif/cfdgml.c
+++ b/net/caif/cfdgml.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -26,7 +28,7 @@ struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
26{ 28{
27 struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 29 struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
28 if (!dgm) { 30 if (!dgm) {
29 pr_warning("CAIF: %s(): Out of memory\n", __func__); 31 pr_warn("Out of memory\n");
30 return NULL; 32 return NULL;
31 } 33 }
32 caif_assert(offsetof(struct cfsrvl, layer) == 0); 34 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -49,14 +51,14 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
49 caif_assert(layr->ctrlcmd != NULL); 51 caif_assert(layr->ctrlcmd != NULL);
50 52
51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 53 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
52 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 54 pr_err("Packet is erroneous!\n");
53 cfpkt_destroy(pkt); 55 cfpkt_destroy(pkt);
54 return -EPROTO; 56 return -EPROTO;
55 } 57 }
56 58
57 if ((cmd & DGM_CMD_BIT) == 0) { 59 if ((cmd & DGM_CMD_BIT) == 0) {
58 if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) { 60 if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) {
59 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 61 pr_err("Packet is erroneous!\n");
60 cfpkt_destroy(pkt); 62 cfpkt_destroy(pkt);
61 return -EPROTO; 63 return -EPROTO;
62 } 64 }
@@ -75,8 +77,7 @@ static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
75 return 0; 77 return 0;
76 default: 78 default:
77 cfpkt_destroy(pkt); 79 cfpkt_destroy(pkt);
78 pr_info("CAIF: %s(): Unknown datagram control %d (0x%x)\n", 80 pr_info("Unknown datagram control %d (0x%x)\n", cmd, cmd);
79 __func__, cmd, cmd);
80 return -EPROTO; 81 return -EPROTO;
81 } 82 }
82} 83}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
index e86a4ca3b217..a445043931ae 100644
--- a/net/caif/cffrml.c
+++ b/net/caif/cffrml.c
@@ -6,6 +6,8 @@
6 * License terms: GNU General Public License (GPL) version 2 6 * License terms: GNU General Public License (GPL) version 2
7 */ 7 */
8 8
9#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
10
9#include <linux/stddef.h> 11#include <linux/stddef.h>
10#include <linux/spinlock.h> 12#include <linux/spinlock.h>
11#include <linux/slab.h> 13#include <linux/slab.h>
@@ -32,7 +34,7 @@ struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
32{ 34{
33 struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC); 35 struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC);
34 if (!this) { 36 if (!this) {
35 pr_warning("CAIF: %s(): Out of memory\n", __func__); 37 pr_warn("Out of memory\n");
36 return NULL; 38 return NULL;
37 } 39 }
38 caif_assert(offsetof(struct cffrml, layer) == 0); 40 caif_assert(offsetof(struct cffrml, layer) == 0);
@@ -83,7 +85,7 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
83 85
84 if (cfpkt_setlen(pkt, len) < 0) { 86 if (cfpkt_setlen(pkt, len) < 0) {
85 ++cffrml_rcv_error; 87 ++cffrml_rcv_error;
86 pr_err("CAIF: %s():Framing length error (%d)\n", __func__, len); 88 pr_err("Framing length error (%d)\n", len);
87 cfpkt_destroy(pkt); 89 cfpkt_destroy(pkt);
88 return -EPROTO; 90 return -EPROTO;
89 } 91 }
@@ -99,14 +101,14 @@ static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
99 cfpkt_add_trail(pkt, &tmp, 2); 101 cfpkt_add_trail(pkt, &tmp, 2);
100 ++cffrml_rcv_error; 102 ++cffrml_rcv_error;
101 ++cffrml_rcv_checsum_error; 103 ++cffrml_rcv_checsum_error;
102 pr_info("CAIF: %s(): Frame checksum error " 104 pr_info("Frame checksum error (0x%x != 0x%x)\n",
103 "(0x%x != 0x%x)\n", __func__, hdrchks, pktchks); 105 hdrchks, pktchks);
104 return -EILSEQ; 106 return -EILSEQ;
105 } 107 }
106 } 108 }
107 if (cfpkt_erroneous(pkt)) { 109 if (cfpkt_erroneous(pkt)) {
108 ++cffrml_rcv_error; 110 ++cffrml_rcv_error;
109 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 111 pr_err("Packet is erroneous!\n");
110 cfpkt_destroy(pkt); 112 cfpkt_destroy(pkt);
111 return -EPROTO; 113 return -EPROTO;
112 } 114 }
@@ -132,7 +134,7 @@ static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
132 cfpkt_add_head(pkt, &tmp, 2); 134 cfpkt_add_head(pkt, &tmp, 2);
133 cfpkt_info(pkt)->hdr_len += 2; 135 cfpkt_info(pkt)->hdr_len += 2;
134 if (cfpkt_erroneous(pkt)) { 136 if (cfpkt_erroneous(pkt)) {
135 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 137 pr_err("Packet is erroneous!\n");
136 return -EPROTO; 138 return -EPROTO;
137 } 139 }
138 ret = layr->dn->transmit(layr->dn, pkt); 140 ret = layr->dn->transmit(layr->dn, pkt);
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 80c8d332b258..46f34b2e0478 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -3,6 +3,9 @@
3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com 3 * Author: Sjur Brendeland/sjur.brandeland@stericsson.com
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
6#include <linux/stddef.h> 9#include <linux/stddef.h>
7#include <linux/spinlock.h> 10#include <linux/spinlock.h>
8#include <linux/slab.h> 11#include <linux/slab.h>
@@ -190,7 +193,7 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
190 u8 id; 193 u8 id;
191 struct cflayer *up; 194 struct cflayer *up;
192 if (cfpkt_extr_head(pkt, &id, 1) < 0) { 195 if (cfpkt_extr_head(pkt, &id, 1) < 0) {
193 pr_err("CAIF: %s(): erroneous Caif Packet\n", __func__); 196 pr_err("erroneous Caif Packet\n");
194 cfpkt_destroy(pkt); 197 cfpkt_destroy(pkt);
195 return -EPROTO; 198 return -EPROTO;
196 } 199 }
@@ -199,8 +202,8 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
199 up = get_up(muxl, id); 202 up = get_up(muxl, id);
200 spin_unlock(&muxl->receive_lock); 203 spin_unlock(&muxl->receive_lock);
201 if (up == NULL) { 204 if (up == NULL) {
202 pr_info("CAIF: %s():Received data on unknown link ID = %d " 205 pr_info("Received data on unknown link ID = %d (0x%x) up == NULL",
203 "(0x%x) up == NULL", __func__, id, id); 206 id, id);
204 cfpkt_destroy(pkt); 207 cfpkt_destroy(pkt);
205 /* 208 /*
206 * Don't return ERROR, since modem misbehaves and sends out 209 * Don't return ERROR, since modem misbehaves and sends out
@@ -223,9 +226,8 @@ static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
223 struct caif_payload_info *info = cfpkt_info(pkt); 226 struct caif_payload_info *info = cfpkt_info(pkt);
224 dn = get_dn(muxl, cfpkt_info(pkt)->dev_info); 227 dn = get_dn(muxl, cfpkt_info(pkt)->dev_info);
225 if (dn == NULL) { 228 if (dn == NULL) {
226 pr_warning("CAIF: %s(): Send data on unknown phy " 229 pr_warn("Send data on unknown phy ID = %d (0x%x)\n",
227 "ID = %d (0x%x)\n", 230 info->dev_info->id, info->dev_info->id);
228 __func__, info->dev_info->id, info->dev_info->id);
229 return -ENOTCONN; 231 return -ENOTCONN;
230 } 232 }
231 info->hdr_len += 1; 233 info->hdr_len += 1;
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
index c49a6695793a..d7e865e2ff65 100644
--- a/net/caif/cfpkt_skbuff.c
+++ b/net/caif/cfpkt_skbuff.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/string.h> 9#include <linux/string.h>
8#include <linux/skbuff.h> 10#include <linux/skbuff.h>
9#include <linux/hardirq.h> 11#include <linux/hardirq.h>
@@ -12,11 +14,12 @@
12#define PKT_PREFIX 48 14#define PKT_PREFIX 48
13#define PKT_POSTFIX 2 15#define PKT_POSTFIX 2
14#define PKT_LEN_WHEN_EXTENDING 128 16#define PKT_LEN_WHEN_EXTENDING 128
15#define PKT_ERROR(pkt, errmsg) do { \ 17#define PKT_ERROR(pkt, errmsg) \
16 cfpkt_priv(pkt)->erronous = true; \ 18do { \
17 skb_reset_tail_pointer(&pkt->skb); \ 19 cfpkt_priv(pkt)->erronous = true; \
18 pr_warning("CAIF: " errmsg);\ 20 skb_reset_tail_pointer(&pkt->skb); \
19 } while (0) 21 pr_warn(errmsg); \
22} while (0)
20 23
21struct cfpktq { 24struct cfpktq {
22 struct sk_buff_head head; 25 struct sk_buff_head head;
@@ -130,13 +133,13 @@ int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
130 return -EPROTO; 133 return -EPROTO;
131 134
132 if (unlikely(len > skb->len)) { 135 if (unlikely(len > skb->len)) {
133 PKT_ERROR(pkt, "cfpkt_extr_head read beyond end of packet\n"); 136 PKT_ERROR(pkt, "read beyond end of packet\n");
134 return -EPROTO; 137 return -EPROTO;
135 } 138 }
136 139
137 if (unlikely(len > skb_headlen(skb))) { 140 if (unlikely(len > skb_headlen(skb))) {
138 if (unlikely(skb_linearize(skb) != 0)) { 141 if (unlikely(skb_linearize(skb) != 0)) {
139 PKT_ERROR(pkt, "cfpkt_extr_head linearize failed\n"); 142 PKT_ERROR(pkt, "linearize failed\n");
140 return -EPROTO; 143 return -EPROTO;
141 } 144 }
142 } 145 }
@@ -156,11 +159,11 @@ int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
156 return -EPROTO; 159 return -EPROTO;
157 160
158 if (unlikely(skb_linearize(skb) != 0)) { 161 if (unlikely(skb_linearize(skb) != 0)) {
159 PKT_ERROR(pkt, "cfpkt_extr_trail linearize failed\n"); 162 PKT_ERROR(pkt, "linearize failed\n");
160 return -EPROTO; 163 return -EPROTO;
161 } 164 }
162 if (unlikely(skb->data + len > skb_tail_pointer(skb))) { 165 if (unlikely(skb->data + len > skb_tail_pointer(skb))) {
163 PKT_ERROR(pkt, "cfpkt_extr_trail read beyond end of packet\n"); 166 PKT_ERROR(pkt, "read beyond end of packet\n");
164 return -EPROTO; 167 return -EPROTO;
165 } 168 }
166 from = skb_tail_pointer(skb) - len; 169 from = skb_tail_pointer(skb) - len;
@@ -202,7 +205,7 @@ int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
202 205
203 /* Make sure data is writable */ 206 /* Make sure data is writable */
204 if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) { 207 if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) {
205 PKT_ERROR(pkt, "cfpkt_add_body: cow failed\n"); 208 PKT_ERROR(pkt, "cow failed\n");
206 return -EPROTO; 209 return -EPROTO;
207 } 210 }
208 /* 211 /*
@@ -211,8 +214,7 @@ int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
211 * lengths of the top SKB. 214 * lengths of the top SKB.
212 */ 215 */
213 if (lastskb != skb) { 216 if (lastskb != skb) {
214 pr_warning("CAIF: %s(): Packet is non-linear\n", 217 pr_warn("Packet is non-linear\n");
215 __func__);
216 skb->len += len; 218 skb->len += len;
217 skb->data_len += len; 219 skb->data_len += len;
218 } 220 }
@@ -242,14 +244,14 @@ int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
242 if (unlikely(is_erronous(pkt))) 244 if (unlikely(is_erronous(pkt)))
243 return -EPROTO; 245 return -EPROTO;
244 if (unlikely(skb_headroom(skb) < len)) { 246 if (unlikely(skb_headroom(skb) < len)) {
245 PKT_ERROR(pkt, "cfpkt_add_head: no headroom\n"); 247 PKT_ERROR(pkt, "no headroom\n");
246 return -EPROTO; 248 return -EPROTO;
247 } 249 }
248 250
249 /* Make sure data is writable */ 251 /* Make sure data is writable */
250 ret = skb_cow_data(skb, 0, &lastskb); 252 ret = skb_cow_data(skb, 0, &lastskb);
251 if (unlikely(ret < 0)) { 253 if (unlikely(ret < 0)) {
252 PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n"); 254 PKT_ERROR(pkt, "cow failed\n");
253 return ret; 255 return ret;
254 } 256 }
255 257
@@ -283,7 +285,7 @@ inline u16 cfpkt_iterate(struct cfpkt *pkt,
283 if (unlikely(is_erronous(pkt))) 285 if (unlikely(is_erronous(pkt)))
284 return -EPROTO; 286 return -EPROTO;
285 if (unlikely(skb_linearize(&pkt->skb) != 0)) { 287 if (unlikely(skb_linearize(&pkt->skb) != 0)) {
286 PKT_ERROR(pkt, "cfpkt_iterate: linearize failed\n"); 288 PKT_ERROR(pkt, "linearize failed\n");
287 return -EPROTO; 289 return -EPROTO;
288 } 290 }
289 return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt)); 291 return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt));
@@ -309,7 +311,7 @@ int cfpkt_setlen(struct cfpkt *pkt, u16 len)
309 311
310 /* Need to expand SKB */ 312 /* Need to expand SKB */
311 if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len))) 313 if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len)))
312 PKT_ERROR(pkt, "cfpkt_setlen: skb_pad_trail failed\n"); 314 PKT_ERROR(pkt, "skb_pad_trail failed\n");
313 315
314 return cfpkt_getlen(pkt); 316 return cfpkt_getlen(pkt);
315} 317}
@@ -380,8 +382,7 @@ struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
380 return NULL; 382 return NULL;
381 383
382 if (skb->data + pos > skb_tail_pointer(skb)) { 384 if (skb->data + pos > skb_tail_pointer(skb)) {
383 PKT_ERROR(pkt, 385 PKT_ERROR(pkt, "trying to split beyond end of packet\n");
384 "cfpkt_split: trying to split beyond end of packet");
385 return NULL; 386 return NULL;
386 } 387 }
387 388
@@ -455,17 +456,17 @@ int cfpkt_raw_append(struct cfpkt *pkt, void **buf, unsigned int buflen)
455 return -EPROTO; 456 return -EPROTO;
456 /* Make sure SKB is writable */ 457 /* Make sure SKB is writable */
457 if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) { 458 if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
458 PKT_ERROR(pkt, "cfpkt_raw_append: skb_cow_data failed\n"); 459 PKT_ERROR(pkt, "skb_cow_data failed\n");
459 return -EPROTO; 460 return -EPROTO;
460 } 461 }
461 462
462 if (unlikely(skb_linearize(skb) != 0)) { 463 if (unlikely(skb_linearize(skb) != 0)) {
463 PKT_ERROR(pkt, "cfpkt_raw_append: linearize failed\n"); 464 PKT_ERROR(pkt, "linearize failed\n");
464 return -EPROTO; 465 return -EPROTO;
465 } 466 }
466 467
467 if (unlikely(skb_tailroom(skb) < buflen)) { 468 if (unlikely(skb_tailroom(skb) < buflen)) {
468 PKT_ERROR(pkt, "cfpkt_raw_append: buffer too short - failed\n"); 469 PKT_ERROR(pkt, "buffer too short - failed\n");
469 return -EPROTO; 470 return -EPROTO;
470 } 471 }
471 472
@@ -483,14 +484,13 @@ int cfpkt_raw_extract(struct cfpkt *pkt, void **buf, unsigned int buflen)
483 return -EPROTO; 484 return -EPROTO;
484 485
485 if (unlikely(buflen > skb->len)) { 486 if (unlikely(buflen > skb->len)) {
486 PKT_ERROR(pkt, "cfpkt_raw_extract: buflen too large " 487 PKT_ERROR(pkt, "buflen too large - failed\n");
487 "- failed\n");
488 return -EPROTO; 488 return -EPROTO;
489 } 489 }
490 490
491 if (unlikely(buflen > skb_headlen(skb))) { 491 if (unlikely(buflen > skb_headlen(skb))) {
492 if (unlikely(skb_linearize(skb) != 0)) { 492 if (unlikely(skb_linearize(skb) != 0)) {
493 PKT_ERROR(pkt, "cfpkt_raw_extract: linearize failed\n"); 493 PKT_ERROR(pkt, "linearize failed\n");
494 return -EPROTO; 494 return -EPROTO;
495 } 495 }
496 } 496 }
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
index 9a699242d104..bde8481e8d25 100644
--- a/net/caif/cfrfml.c
+++ b/net/caif/cfrfml.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -48,7 +50,7 @@ struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info,
48 kzalloc(sizeof(struct cfrfml), GFP_ATOMIC); 50 kzalloc(sizeof(struct cfrfml), GFP_ATOMIC);
49 51
50 if (!this) { 52 if (!this) {
51 pr_warning("CAIF: %s(): Out of memory\n", __func__); 53 pr_warn("Out of memory\n");
52 return NULL; 54 return NULL;
53 } 55 }
54 56
@@ -178,9 +180,7 @@ out:
178 cfpkt_destroy(rfml->incomplete_frm); 180 cfpkt_destroy(rfml->incomplete_frm);
179 rfml->incomplete_frm = NULL; 181 rfml->incomplete_frm = NULL;
180 182
181 pr_info("CAIF: %s(): " 183 pr_info("Connection error %d triggered on RFM link\n", err);
182 "Connection error %d triggered on RFM link\n",
183 __func__, err);
184 184
185 /* Trigger connection error upon failure.*/ 185 /* Trigger connection error upon failure.*/
186 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 186 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
@@ -280,9 +280,7 @@ static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
280out: 280out:
281 281
282 if (err != 0) { 282 if (err != 0) {
283 pr_info("CAIF: %s(): " 283 pr_info("Connection error %d triggered on RFM link\n", err);
284 "Connection error %d triggered on RFM link\n",
285 __func__, err);
286 /* Trigger connection error upon failure.*/ 284 /* Trigger connection error upon failure.*/
287 285
288 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 286 layr->up->ctrlcmd(layr->up, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND,
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
index a11fbd68a13d..9297f7dea9d8 100644
--- a/net/caif/cfserl.c
+++ b/net/caif/cfserl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/spinlock.h> 10#include <linux/spinlock.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -34,7 +36,7 @@ struct cflayer *cfserl_create(int type, int instance, bool use_stx)
34{ 36{
35 struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC); 37 struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC);
36 if (!this) { 38 if (!this) {
37 pr_warning("CAIF: %s(): Out of memory\n", __func__); 39 pr_warn("Out of memory\n");
38 return NULL; 40 return NULL;
39 } 41 }
40 caif_assert(offsetof(struct cfserl, layer) == 0); 42 caif_assert(offsetof(struct cfserl, layer) == 0);
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index f40939a91211..ab5e542526bf 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/errno.h> 11#include <linux/errno.h>
@@ -79,8 +81,7 @@ static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
79 layr->up->ctrlcmd(layr->up, ctrl, phyid); 81 layr->up->ctrlcmd(layr->up, ctrl, phyid);
80 break; 82 break;
81 default: 83 default:
82 pr_warning("CAIF: %s(): " 84 pr_warn("Unexpected ctrl in cfsrvl (%d)\n", ctrl);
83 "Unexpected ctrl in cfsrvl (%d)\n", __func__, ctrl);
84 /* We have both modem and phy flow on, send flow on */ 85 /* We have both modem and phy flow on, send flow on */
85 layr->up->ctrlcmd(layr->up, ctrl, phyid); 86 layr->up->ctrlcmd(layr->up, ctrl, phyid);
86 service->phy_flow_on = true; 87 service->phy_flow_on = true;
@@ -107,14 +108,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
107 u8 flow_on = SRVL_FLOW_ON; 108 u8 flow_on = SRVL_FLOW_ON;
108 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); 109 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
109 if (!pkt) { 110 if (!pkt) {
110 pr_warning("CAIF: %s(): Out of memory\n", 111 pr_warn("Out of memory\n");
111 __func__);
112 return -ENOMEM; 112 return -ENOMEM;
113 } 113 }
114 114
115 if (cfpkt_add_head(pkt, &flow_on, 1) < 0) { 115 if (cfpkt_add_head(pkt, &flow_on, 1) < 0) {
116 pr_err("CAIF: %s(): Packet is erroneous!\n", 116 pr_err("Packet is erroneous!\n");
117 __func__);
118 cfpkt_destroy(pkt); 117 cfpkt_destroy(pkt);
119 return -EPROTO; 118 return -EPROTO;
120 } 119 }
@@ -131,14 +130,12 @@ static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
131 u8 flow_off = SRVL_FLOW_OFF; 130 u8 flow_off = SRVL_FLOW_OFF;
132 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE); 131 pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
133 if (!pkt) { 132 if (!pkt) {
134 pr_warning("CAIF: %s(): Out of memory\n", 133 pr_warn("Out of memory\n");
135 __func__);
136 return -ENOMEM; 134 return -ENOMEM;
137 } 135 }
138 136
139 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) { 137 if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
140 pr_err("CAIF: %s(): Packet is erroneous!\n", 138 pr_err("Packet is erroneous!\n");
141 __func__);
142 cfpkt_destroy(pkt); 139 cfpkt_destroy(pkt);
143 return -EPROTO; 140 return -EPROTO;
144 } 141 }
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
index 02795aff57a4..efad410e4c82 100644
--- a/net/caif/cfutill.c
+++ b/net/caif/cfutill.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -26,7 +28,7 @@ struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
26{ 28{
27 struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 29 struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
28 if (!util) { 30 if (!util) {
29 pr_warning("CAIF: %s(): Out of memory\n", __func__); 31 pr_warn("Out of memory\n");
30 return NULL; 32 return NULL;
31 } 33 }
32 caif_assert(offsetof(struct cfsrvl, layer) == 0); 34 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -47,7 +49,7 @@ static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
47 caif_assert(layr->up->receive != NULL); 49 caif_assert(layr->up->receive != NULL);
48 caif_assert(layr->up->ctrlcmd != NULL); 50 caif_assert(layr->up->ctrlcmd != NULL);
49 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
50 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 52 pr_err("Packet is erroneous!\n");
51 cfpkt_destroy(pkt); 53 cfpkt_destroy(pkt);
52 return -EPROTO; 54 return -EPROTO;
53 } 55 }
@@ -64,16 +66,14 @@ static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
64 cfpkt_destroy(pkt); 66 cfpkt_destroy(pkt);
65 return 0; 67 return 0;
66 case UTIL_REMOTE_SHUTDOWN: /* Remote Shutdown Request */ 68 case UTIL_REMOTE_SHUTDOWN: /* Remote Shutdown Request */
67 pr_err("CAIF: %s(): REMOTE SHUTDOWN REQUEST RECEIVED\n", 69 pr_err("REMOTE SHUTDOWN REQUEST RECEIVED\n");
68 __func__);
69 layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0); 70 layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0);
70 service->open = false; 71 service->open = false;
71 cfpkt_destroy(pkt); 72 cfpkt_destroy(pkt);
72 return 0; 73 return 0;
73 default: 74 default:
74 cfpkt_destroy(pkt); 75 cfpkt_destroy(pkt);
75 pr_warning("CAIF: %s(): Unknown service control %d (0x%x)\n", 76 pr_warn("Unknown service control %d (0x%x)\n", cmd, cmd);
76 __func__, cmd, cmd);
77 return -EPROTO; 77 return -EPROTO;
78 } 78 }
79} 79}
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
index 77cc09faac9a..3b425b189a99 100644
--- a/net/caif/cfveil.c
+++ b/net/caif/cfveil.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/stddef.h> 9#include <linux/stddef.h>
8#include <linux/slab.h> 10#include <linux/slab.h>
9#include <net/caif/caif_layer.h> 11#include <net/caif/caif_layer.h>
@@ -25,7 +27,7 @@ struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
25{ 27{
26 struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 28 struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
27 if (!vei) { 29 if (!vei) {
28 pr_warning("CAIF: %s(): Out of memory\n", __func__); 30 pr_warn("Out of memory\n");
29 return NULL; 31 return NULL;
30 } 32 }
31 caif_assert(offsetof(struct cfsrvl, layer) == 0); 33 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -47,7 +49,7 @@ static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
47 49
48 50
49 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) { 51 if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
50 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 52 pr_err("Packet is erroneous!\n");
51 cfpkt_destroy(pkt); 53 cfpkt_destroy(pkt);
52 return -EPROTO; 54 return -EPROTO;
53 } 55 }
@@ -67,8 +69,7 @@ static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
67 cfpkt_destroy(pkt); 69 cfpkt_destroy(pkt);
68 return 0; 70 return 0;
69 default: /* SET RS232 PIN */ 71 default: /* SET RS232 PIN */
70 pr_warning("CAIF: %s():Unknown VEI control packet %d (0x%x)!\n", 72 pr_warn("Unknown VEI control packet %d (0x%x)!\n", cmd, cmd);
71 __func__, cmd, cmd);
72 cfpkt_destroy(pkt); 73 cfpkt_destroy(pkt);
73 return -EPROTO; 74 return -EPROTO;
74 } 75 }
@@ -86,7 +87,7 @@ static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
86 caif_assert(layr->dn->transmit != NULL); 87 caif_assert(layr->dn->transmit != NULL);
87 88
88 if (cfpkt_add_head(pkt, &tmp, 1) < 0) { 89 if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
89 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 90 pr_err("Packet is erroneous!\n");
90 return -EPROTO; 91 return -EPROTO;
91 } 92 }
92 93
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
index ada6ee2d48f5..bf6fef2a0eff 100644
--- a/net/caif/cfvidl.c
+++ b/net/caif/cfvidl.c
@@ -4,6 +4,8 @@
4 * License terms: GNU General Public License (GPL) version 2 4 * License terms: GNU General Public License (GPL) version 2
5 */ 5 */
6 6
7#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
8
7#include <linux/kernel.h> 9#include <linux/kernel.h>
8#include <linux/types.h> 10#include <linux/types.h>
9#include <linux/slab.h> 11#include <linux/slab.h>
@@ -21,7 +23,7 @@ struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
21{ 23{
22 struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC); 24 struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
23 if (!vid) { 25 if (!vid) {
24 pr_warning("CAIF: %s(): Out of memory\n", __func__); 26 pr_warn("Out of memory\n");
25 return NULL; 27 return NULL;
26 } 28 }
27 caif_assert(offsetof(struct cfsrvl, layer) == 0); 29 caif_assert(offsetof(struct cfsrvl, layer) == 0);
@@ -38,7 +40,7 @@ static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt)
38{ 40{
39 u32 videoheader; 41 u32 videoheader;
40 if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) { 42 if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) {
41 pr_err("CAIF: %s(): Packet is erroneous!\n", __func__); 43 pr_err("Packet is erroneous!\n");
42 cfpkt_destroy(pkt); 44 cfpkt_destroy(pkt);
43 return -EPROTO; 45 return -EPROTO;
44 } 46 }
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 4293e190ec53..84a422c98941 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -5,6 +5,8 @@
5 * License terms: GNU General Public License (GPL) version 2 5 * License terms: GNU General Public License (GPL) version 2
6 */ 6 */
7 7
8#define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__
9
8#include <linux/version.h> 10#include <linux/version.h>
9#include <linux/fs.h> 11#include <linux/fs.h>
10#include <linux/init.h> 12#include <linux/init.h>
@@ -28,9 +30,6 @@
28#define CONNECT_TIMEOUT (5 * HZ) 30#define CONNECT_TIMEOUT (5 * HZ)
29#define CAIF_NET_DEFAULT_QUEUE_LEN 500 31#define CAIF_NET_DEFAULT_QUEUE_LEN 500
30 32
31#undef pr_debug
32#define pr_debug pr_warning
33
34/*This list is protected by the rtnl lock. */ 33/*This list is protected by the rtnl lock. */
35static LIST_HEAD(chnl_net_list); 34static LIST_HEAD(chnl_net_list);
36 35
@@ -142,8 +141,7 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
142 int phyid) 141 int phyid)
143{ 142{
144 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); 143 struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
145 pr_debug("CAIF: %s(): NET flowctrl func called flow: %s\n", 144 pr_debug("NET flowctrl func called flow: %s\n",
146 __func__,
147 flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" : 145 flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
148 flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" : 146 flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" :
149 flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" : 147 flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
@@ -196,12 +194,12 @@ static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
196 priv = netdev_priv(dev); 194 priv = netdev_priv(dev);
197 195
198 if (skb->len > priv->netdev->mtu) { 196 if (skb->len > priv->netdev->mtu) {
199 pr_warning("CAIF: %s(): Size of skb exceeded MTU\n", __func__); 197 pr_warn("Size of skb exceeded MTU\n");
200 return -ENOSPC; 198 return -ENOSPC;
201 } 199 }
202 200
203 if (!priv->flowenabled) { 201 if (!priv->flowenabled) {
204 pr_debug("CAIF: %s(): dropping packets flow off\n", __func__); 202 pr_debug("dropping packets flow off\n");
205 return NETDEV_TX_BUSY; 203 return NETDEV_TX_BUSY;
206 } 204 }
207 205
@@ -237,7 +235,7 @@ static int chnl_net_open(struct net_device *dev)
237 ASSERT_RTNL(); 235 ASSERT_RTNL();
238 priv = netdev_priv(dev); 236 priv = netdev_priv(dev);
239 if (!priv) { 237 if (!priv) {
240 pr_debug("CAIF: %s(): chnl_net_open: no priv\n", __func__); 238 pr_debug("chnl_net_open: no priv\n");
241 return -ENODEV; 239 return -ENODEV;
242 } 240 }
243 241
@@ -246,18 +244,17 @@ static int chnl_net_open(struct net_device *dev)
246 result = caif_connect_client(&priv->conn_req, &priv->chnl, 244 result = caif_connect_client(&priv->conn_req, &priv->chnl,
247 &llifindex, &headroom, &tailroom); 245 &llifindex, &headroom, &tailroom);
248 if (result != 0) { 246 if (result != 0) {
249 pr_debug("CAIF: %s(): err: " 247 pr_debug("err: "
250 "Unable to register and open device," 248 "Unable to register and open device,"
251 " Err:%d\n", 249 " Err:%d\n",
252 __func__, 250 result);
253 result);
254 goto error; 251 goto error;
255 } 252 }
256 253
257 lldev = dev_get_by_index(dev_net(dev), llifindex); 254 lldev = dev_get_by_index(dev_net(dev), llifindex);
258 255
259 if (lldev == NULL) { 256 if (lldev == NULL) {
260 pr_debug("CAIF: %s(): no interface?\n", __func__); 257 pr_debug("no interface?\n");
261 result = -ENODEV; 258 result = -ENODEV;
262 goto error; 259 goto error;
263 } 260 }
@@ -279,9 +276,7 @@ static int chnl_net_open(struct net_device *dev)
279 dev_put(lldev); 276 dev_put(lldev);
280 277
281 if (mtu < 100) { 278 if (mtu < 100) {
282 pr_warning("CAIF: %s(): " 279 pr_warn("CAIF Interface MTU too small (%d)\n", mtu);
283 "CAIF Interface MTU too small (%d)\n",
284 __func__, mtu);
285 result = -ENODEV; 280 result = -ENODEV;
286 goto error; 281 goto error;
287 } 282 }
@@ -296,33 +291,32 @@ static int chnl_net_open(struct net_device *dev)
296 rtnl_lock(); 291 rtnl_lock();
297 292
298 if (result == -ERESTARTSYS) { 293 if (result == -ERESTARTSYS) {
299 pr_debug("CAIF: %s(): wait_event_interruptible" 294 pr_debug("wait_event_interruptible woken by a signal\n");
300 " woken by a signal\n", __func__);
301 result = -ERESTARTSYS; 295 result = -ERESTARTSYS;
302 goto error; 296 goto error;
303 } 297 }
304 298
305 if (result == 0) { 299 if (result == 0) {
306 pr_debug("CAIF: %s(): connect timeout\n", __func__); 300 pr_debug("connect timeout\n");
307 caif_disconnect_client(&priv->chnl); 301 caif_disconnect_client(&priv->chnl);
308 priv->state = CAIF_DISCONNECTED; 302 priv->state = CAIF_DISCONNECTED;
309 pr_debug("CAIF: %s(): state disconnected\n", __func__); 303 pr_debug("state disconnected\n");
310 result = -ETIMEDOUT; 304 result = -ETIMEDOUT;
311 goto error; 305 goto error;
312 } 306 }
313 307
314 if (priv->state != CAIF_CONNECTED) { 308 if (priv->state != CAIF_CONNECTED) {
315 pr_debug("CAIF: %s(): connect failed\n", __func__); 309 pr_debug("connect failed\n");
316 result = -ECONNREFUSED; 310 result = -ECONNREFUSED;
317 goto error; 311 goto error;
318 } 312 }
319 pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__); 313 pr_debug("CAIF Netdevice connected\n");
320 return 0; 314 return 0;
321 315
322error: 316error:
323 caif_disconnect_client(&priv->chnl); 317 caif_disconnect_client(&priv->chnl);
324 priv->state = CAIF_DISCONNECTED; 318 priv->state = CAIF_DISCONNECTED;
325 pr_debug("CAIF: %s(): state disconnected\n", __func__); 319 pr_debug("state disconnected\n");
326 return result; 320 return result;
327 321
328} 322}
@@ -413,7 +407,7 @@ static void caif_netlink_parms(struct nlattr *data[],
413 struct caif_connect_request *conn_req) 407 struct caif_connect_request *conn_req)
414{ 408{
415 if (!data) { 409 if (!data) {
416 pr_warning("CAIF: %s: no params data found\n", __func__); 410 pr_warn("no params data found\n");
417 return; 411 return;
418 } 412 }
419 if (data[IFLA_CAIF_IPV4_CONNID]) 413 if (data[IFLA_CAIF_IPV4_CONNID])
@@ -442,8 +436,7 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
442 436
443 ret = register_netdevice(dev); 437 ret = register_netdevice(dev);
444 if (ret) 438 if (ret)
445 pr_warning("CAIF: %s(): device rtml registration failed\n", 439 pr_warn("device rtml registration failed\n");
446 __func__);
447 return ret; 440 return ret;
448} 441}
449 442
diff --git a/net/can/raw.c b/net/can/raw.c
index a10e3338f084..e88f610fdb7b 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -90,23 +90,39 @@ struct raw_sock {
90 can_err_mask_t err_mask; 90 can_err_mask_t err_mask;
91}; 91};
92 92
93/*
94 * Return pointer to store the extra msg flags for raw_recvmsg().
95 * We use the space of one unsigned int beyond the 'struct sockaddr_can'
96 * in skb->cb.
97 */
98static inline unsigned int *raw_flags(struct sk_buff *skb)
99{
100 BUILD_BUG_ON(sizeof(skb->cb) <= (sizeof(struct sockaddr_can) +
101 sizeof(unsigned int)));
102
103 /* return pointer after struct sockaddr_can */
104 return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]);
105}
106
93static inline struct raw_sock *raw_sk(const struct sock *sk) 107static inline struct raw_sock *raw_sk(const struct sock *sk)
94{ 108{
95 return (struct raw_sock *)sk; 109 return (struct raw_sock *)sk;
96} 110}
97 111
98static void raw_rcv(struct sk_buff *skb, void *data) 112static void raw_rcv(struct sk_buff *oskb, void *data)
99{ 113{
100 struct sock *sk = (struct sock *)data; 114 struct sock *sk = (struct sock *)data;
101 struct raw_sock *ro = raw_sk(sk); 115 struct raw_sock *ro = raw_sk(sk);
102 struct sockaddr_can *addr; 116 struct sockaddr_can *addr;
117 struct sk_buff *skb;
118 unsigned int *pflags;
103 119
104 /* check the received tx sock reference */ 120 /* check the received tx sock reference */
105 if (!ro->recv_own_msgs && skb->sk == sk) 121 if (!ro->recv_own_msgs && oskb->sk == sk)
106 return; 122 return;
107 123
108 /* clone the given skb to be able to enqueue it into the rcv queue */ 124 /* clone the given skb to be able to enqueue it into the rcv queue */
109 skb = skb_clone(skb, GFP_ATOMIC); 125 skb = skb_clone(oskb, GFP_ATOMIC);
110 if (!skb) 126 if (!skb)
111 return; 127 return;
112 128
@@ -123,6 +139,14 @@ static void raw_rcv(struct sk_buff *skb, void *data)
123 addr->can_family = AF_CAN; 139 addr->can_family = AF_CAN;
124 addr->can_ifindex = skb->dev->ifindex; 140 addr->can_ifindex = skb->dev->ifindex;
125 141
142 /* add CAN specific message flags for raw_recvmsg() */
143 pflags = raw_flags(skb);
144 *pflags = 0;
145 if (oskb->sk)
146 *pflags |= MSG_DONTROUTE;
147 if (oskb->sk == sk)
148 *pflags |= MSG_CONFIRM;
149
126 if (sock_queue_rcv_skb(sk, skb) < 0) 150 if (sock_queue_rcv_skb(sk, skb) < 0)
127 kfree_skb(skb); 151 kfree_skb(skb);
128} 152}
@@ -647,12 +671,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
647 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); 671 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
648 if (err < 0) 672 if (err < 0)
649 goto free_skb; 673 goto free_skb;
650 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 674 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
651 if (err < 0) 675 if (err < 0)
652 goto free_skb; 676 goto free_skb;
653 677
654 /* to be able to check the received tx sock reference in raw_rcv() */ 678 /* to be able to check the received tx sock reference in raw_rcv() */
655 skb_tx(skb)->prevent_sk_orphan = 1; 679 skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF;
656 680
657 skb->dev = dev; 681 skb->dev = dev;
658 skb->sk = sk; 682 skb->sk = sk;
@@ -707,6 +731,9 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
707 memcpy(msg->msg_name, skb->cb, msg->msg_namelen); 731 memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
708 } 732 }
709 733
734 /* assign the flags that have been recorded in raw_rcv() */
735 msg->msg_flags |= *(raw_flags(skb));
736
710 skb_free_datagram(sk, skb); 737 skb_free_datagram(sk, skb);
711 738
712 return size; 739 return size;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 282806ba7a57..cd1e039c8755 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -747,13 +747,12 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
747 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 747 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
748 mask |= POLLERR; 748 mask |= POLLERR;
749 if (sk->sk_shutdown & RCV_SHUTDOWN) 749 if (sk->sk_shutdown & RCV_SHUTDOWN)
750 mask |= POLLRDHUP; 750 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
751 if (sk->sk_shutdown == SHUTDOWN_MASK) 751 if (sk->sk_shutdown == SHUTDOWN_MASK)
752 mask |= POLLHUP; 752 mask |= POLLHUP;
753 753
754 /* readable? */ 754 /* readable? */
755 if (!skb_queue_empty(&sk->sk_receive_queue) || 755 if (!skb_queue_empty(&sk->sk_receive_queue))
756 (sk->sk_shutdown & RCV_SHUTDOWN))
757 mask |= POLLIN | POLLRDNORM; 756 mask |= POLLIN | POLLRDNORM;
758 757
759 /* Connection-based need to check for termination and startup */ 758 /* Connection-based need to check for termination and startup */
diff --git a/net/core/dev.c b/net/core/dev.c
index 7ec85e27beed..78b5a89b0f40 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -131,6 +131,7 @@
131#include <trace/events/net.h> 131#include <trace/events/net.h>
132#include <trace/events/skb.h> 132#include <trace/events/skb.h>
133#include <linux/pci.h> 133#include <linux/pci.h>
134#include <linux/inetdevice.h>
134 135
135#include "net-sysfs.h" 136#include "net-sysfs.h"
136 137
@@ -373,6 +374,14 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
373 * --ANK (980803) 374 * --ANK (980803)
374 */ 375 */
375 376
377static inline struct list_head *ptype_head(const struct packet_type *pt)
378{
379 if (pt->type == htons(ETH_P_ALL))
380 return &ptype_all;
381 else
382 return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
383}
384
376/** 385/**
377 * dev_add_pack - add packet handler 386 * dev_add_pack - add packet handler
378 * @pt: packet type declaration 387 * @pt: packet type declaration
@@ -388,16 +397,11 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
388 397
389void dev_add_pack(struct packet_type *pt) 398void dev_add_pack(struct packet_type *pt)
390{ 399{
391 int hash; 400 struct list_head *head = ptype_head(pt);
392 401
393 spin_lock_bh(&ptype_lock); 402 spin_lock(&ptype_lock);
394 if (pt->type == htons(ETH_P_ALL)) 403 list_add_rcu(&pt->list, head);
395 list_add_rcu(&pt->list, &ptype_all); 404 spin_unlock(&ptype_lock);
396 else {
397 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
398 list_add_rcu(&pt->list, &ptype_base[hash]);
399 }
400 spin_unlock_bh(&ptype_lock);
401} 405}
402EXPORT_SYMBOL(dev_add_pack); 406EXPORT_SYMBOL(dev_add_pack);
403 407
@@ -416,15 +420,10 @@ EXPORT_SYMBOL(dev_add_pack);
416 */ 420 */
417void __dev_remove_pack(struct packet_type *pt) 421void __dev_remove_pack(struct packet_type *pt)
418{ 422{
419 struct list_head *head; 423 struct list_head *head = ptype_head(pt);
420 struct packet_type *pt1; 424 struct packet_type *pt1;
421 425
422 spin_lock_bh(&ptype_lock); 426 spin_lock(&ptype_lock);
423
424 if (pt->type == htons(ETH_P_ALL))
425 head = &ptype_all;
426 else
427 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
428 427
429 list_for_each_entry(pt1, head, list) { 428 list_for_each_entry(pt1, head, list) {
430 if (pt == pt1) { 429 if (pt == pt1) {
@@ -435,7 +434,7 @@ void __dev_remove_pack(struct packet_type *pt)
435 434
436 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); 435 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
437out: 436out:
438 spin_unlock_bh(&ptype_lock); 437 spin_unlock(&ptype_lock);
439} 438}
440EXPORT_SYMBOL(__dev_remove_pack); 439EXPORT_SYMBOL(__dev_remove_pack);
441 440
@@ -1486,8 +1485,9 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
1486 skb_orphan(skb); 1485 skb_orphan(skb);
1487 nf_reset(skb); 1486 nf_reset(skb);
1488 1487
1489 if (!(dev->flags & IFF_UP) || 1488 if (unlikely(!(dev->flags & IFF_UP) ||
1490 (skb->len > (dev->mtu + dev->hard_header_len))) { 1489 (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
1490 atomic_long_inc(&dev->rx_dropped);
1491 kfree_skb(skb); 1491 kfree_skb(skb);
1492 return NET_RX_DROP; 1492 return NET_RX_DROP;
1493 } 1493 }
@@ -1555,21 +1555,56 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1555 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues 1555 * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
1556 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. 1556 * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
1557 */ 1557 */
1558void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) 1558int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
1559{ 1559{
1560 unsigned int real_num = dev->real_num_tx_queues; 1560 if (txq < 1 || txq > dev->num_tx_queues)
1561 return -EINVAL;
1561 1562
1562 if (unlikely(txq > dev->num_tx_queues)) 1563 if (dev->reg_state == NETREG_REGISTERED) {
1563 ; 1564 ASSERT_RTNL();
1564 else if (txq > real_num) 1565
1565 dev->real_num_tx_queues = txq; 1566 if (txq < dev->real_num_tx_queues)
1566 else if (txq < real_num) { 1567 qdisc_reset_all_tx_gt(dev, txq);
1567 dev->real_num_tx_queues = txq;
1568 qdisc_reset_all_tx_gt(dev, txq);
1569 } 1568 }
1569
1570 dev->real_num_tx_queues = txq;
1571 return 0;
1570} 1572}
1571EXPORT_SYMBOL(netif_set_real_num_tx_queues); 1573EXPORT_SYMBOL(netif_set_real_num_tx_queues);
1572 1574
1575#ifdef CONFIG_RPS
1576/**
1577 * netif_set_real_num_rx_queues - set actual number of RX queues used
1578 * @dev: Network device
1579 * @rxq: Actual number of RX queues
1580 *
1581 * This must be called either with the rtnl_lock held or before
1582 * registration of the net device. Returns 0 on success, or a
1583 * negative error code. If called before registration, it always
1584 * succeeds.
1585 */
1586int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
1587{
1588 int rc;
1589
1590 if (rxq < 1 || rxq > dev->num_rx_queues)
1591 return -EINVAL;
1592
1593 if (dev->reg_state == NETREG_REGISTERED) {
1594 ASSERT_RTNL();
1595
1596 rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
1597 rxq);
1598 if (rc)
1599 return rc;
1600 }
1601
1602 dev->real_num_rx_queues = rxq;
1603 return 0;
1604}
1605EXPORT_SYMBOL(netif_set_real_num_rx_queues);
1606#endif
1607
1573static inline void __netif_reschedule(struct Qdisc *q) 1608static inline void __netif_reschedule(struct Qdisc *q)
1574{ 1609{
1575 struct softnet_data *sd; 1610 struct softnet_data *sd;
@@ -1661,7 +1696,12 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1661 1696
1662static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) 1697static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1663{ 1698{
1664 if (can_checksum_protocol(dev->features, skb->protocol)) 1699 int features = dev->features;
1700
1701 if (vlan_tx_tag_present(skb))
1702 features &= dev->vlan_features;
1703
1704 if (can_checksum_protocol(features, skb->protocol))
1665 return true; 1705 return true;
1666 1706
1667 if (skb->protocol == htons(ETH_P_8021Q)) { 1707 if (skb->protocol == htons(ETH_P_8021Q)) {
@@ -1760,6 +1800,16 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1760 __be16 type = skb->protocol; 1800 __be16 type = skb->protocol;
1761 int err; 1801 int err;
1762 1802
1803 if (type == htons(ETH_P_8021Q)) {
1804 struct vlan_ethhdr *veh;
1805
1806 if (unlikely(!pskb_may_pull(skb, VLAN_ETH_HLEN)))
1807 return ERR_PTR(-EINVAL);
1808
1809 veh = (struct vlan_ethhdr *)skb->data;
1810 type = veh->h_vlan_encapsulated_proto;
1811 }
1812
1763 skb_reset_mac_header(skb); 1813 skb_reset_mac_header(skb);
1764 skb->mac_len = skb->network_header - skb->mac_header; 1814 skb->mac_len = skb->network_header - skb->mac_header;
1765 __skb_pull(skb, skb->mac_len); 1815 __skb_pull(skb, skb->mac_len);
@@ -1904,14 +1954,14 @@ static int dev_gso_segment(struct sk_buff *skb)
1904 1954
1905/* 1955/*
1906 * Try to orphan skb early, right before transmission by the device. 1956 * Try to orphan skb early, right before transmission by the device.
1907 * We cannot orphan skb if tx timestamp is requested, since 1957 * We cannot orphan skb if tx timestamp is requested or the sk-reference
1908 * drivers need to call skb_tstamp_tx() to send the timestamp. 1958 * is needed on driver level for other reasons, e.g. see net/can/raw.c
1909 */ 1959 */
1910static inline void skb_orphan_try(struct sk_buff *skb) 1960static inline void skb_orphan_try(struct sk_buff *skb)
1911{ 1961{
1912 struct sock *sk = skb->sk; 1962 struct sock *sk = skb->sk;
1913 1963
1914 if (sk && !skb_tx(skb)->flags) { 1964 if (sk && !skb_shinfo(skb)->tx_flags) {
1915 /* skb_tx_hash() wont be able to get sk. 1965 /* skb_tx_hash() wont be able to get sk.
1916 * We copy sk_hash into skb->rxhash 1966 * We copy sk_hash into skb->rxhash
1917 */ 1967 */
@@ -1931,9 +1981,14 @@ static inline void skb_orphan_try(struct sk_buff *skb)
1931static inline int skb_needs_linearize(struct sk_buff *skb, 1981static inline int skb_needs_linearize(struct sk_buff *skb,
1932 struct net_device *dev) 1982 struct net_device *dev)
1933{ 1983{
1984 int features = dev->features;
1985
1986 if (skb->protocol == htons(ETH_P_8021Q) || vlan_tx_tag_present(skb))
1987 features &= dev->vlan_features;
1988
1934 return skb_is_nonlinear(skb) && 1989 return skb_is_nonlinear(skb) &&
1935 ((skb_has_frags(skb) && !(dev->features & NETIF_F_FRAGLIST)) || 1990 ((skb_has_frag_list(skb) && !(features & NETIF_F_FRAGLIST)) ||
1936 (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || 1991 (skb_shinfo(skb)->nr_frags && (!(features & NETIF_F_SG) ||
1937 illegal_highdma(dev, skb)))); 1992 illegal_highdma(dev, skb))));
1938} 1993}
1939 1994
@@ -1956,6 +2011,15 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1956 2011
1957 skb_orphan_try(skb); 2012 skb_orphan_try(skb);
1958 2013
2014 if (vlan_tx_tag_present(skb) &&
2015 !(dev->features & NETIF_F_HW_VLAN_TX)) {
2016 skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
2017 if (unlikely(!skb))
2018 goto out;
2019
2020 skb->vlan_tci = 0;
2021 }
2022
1959 if (netif_needs_gso(dev, skb)) { 2023 if (netif_needs_gso(dev, skb)) {
1960 if (unlikely(dev_gso_segment(skb))) 2024 if (unlikely(dev_gso_segment(skb)))
1961 goto out_kfree_skb; 2025 goto out_kfree_skb;
@@ -2019,6 +2083,7 @@ out_kfree_gso_skb:
2019 skb->destructor = DEV_GSO_CB(skb)->destructor; 2083 skb->destructor = DEV_GSO_CB(skb)->destructor;
2020out_kfree_skb: 2084out_kfree_skb:
2021 kfree_skb(skb); 2085 kfree_skb(skb);
2086out:
2022 return rc; 2087 return rc;
2023} 2088}
2024 2089
@@ -2147,6 +2212,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
2147 return rc; 2212 return rc;
2148} 2213}
2149 2214
2215static DEFINE_PER_CPU(int, xmit_recursion);
2216#define RECURSION_LIMIT 3
2217
2150/** 2218/**
2151 * dev_queue_xmit - transmit a buffer 2219 * dev_queue_xmit - transmit a buffer
2152 * @skb: buffer to transmit 2220 * @skb: buffer to transmit
@@ -2213,10 +2281,15 @@ int dev_queue_xmit(struct sk_buff *skb)
2213 2281
2214 if (txq->xmit_lock_owner != cpu) { 2282 if (txq->xmit_lock_owner != cpu) {
2215 2283
2284 if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
2285 goto recursion_alert;
2286
2216 HARD_TX_LOCK(dev, txq, cpu); 2287 HARD_TX_LOCK(dev, txq, cpu);
2217 2288
2218 if (!netif_tx_queue_stopped(txq)) { 2289 if (!netif_tx_queue_stopped(txq)) {
2290 __this_cpu_inc(xmit_recursion);
2219 rc = dev_hard_start_xmit(skb, dev, txq); 2291 rc = dev_hard_start_xmit(skb, dev, txq);
2292 __this_cpu_dec(xmit_recursion);
2220 if (dev_xmit_complete(rc)) { 2293 if (dev_xmit_complete(rc)) {
2221 HARD_TX_UNLOCK(dev, txq); 2294 HARD_TX_UNLOCK(dev, txq);
2222 goto out; 2295 goto out;
@@ -2228,7 +2301,9 @@ int dev_queue_xmit(struct sk_buff *skb)
2228 "queue packet!\n", dev->name); 2301 "queue packet!\n", dev->name);
2229 } else { 2302 } else {
2230 /* Recursion is detected! It is possible, 2303 /* Recursion is detected! It is possible,
2231 * unfortunately */ 2304 * unfortunately
2305 */
2306recursion_alert:
2232 if (net_ratelimit()) 2307 if (net_ratelimit())
2233 printk(KERN_CRIT "Dead loop on virtual device " 2308 printk(KERN_CRIT "Dead loop on virtual device "
2234 "%s, fix it urgently!\n", dev->name); 2309 "%s, fix it urgently!\n", dev->name);
@@ -2264,69 +2339,44 @@ static inline void ____napi_schedule(struct softnet_data *sd,
2264 __raise_softirq_irqoff(NET_RX_SOFTIRQ); 2339 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2265} 2340}
2266 2341
2267#ifdef CONFIG_RPS
2268
2269/* One global table that all flow-based protocols share. */
2270struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2271EXPORT_SYMBOL(rps_sock_flow_table);
2272
2273/* 2342/*
2274 * get_rps_cpu is called from netif_receive_skb and returns the target 2343 * __skb_get_rxhash: calculate a flow hash based on src/dst addresses
2275 * CPU from the RPS map of the receiving queue for a given skb. 2344 * and src/dst port numbers. Returns a non-zero hash number on success
2276 * rcu_read_lock must be held on entry. 2345 * and 0 on failure.
2277 */ 2346 */
2278static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, 2347__u32 __skb_get_rxhash(struct sk_buff *skb)
2279 struct rps_dev_flow **rflowp)
2280{ 2348{
2349 int nhoff, hash = 0, poff;
2281 struct ipv6hdr *ip6; 2350 struct ipv6hdr *ip6;
2282 struct iphdr *ip; 2351 struct iphdr *ip;
2283 struct netdev_rx_queue *rxqueue;
2284 struct rps_map *map;
2285 struct rps_dev_flow_table *flow_table;
2286 struct rps_sock_flow_table *sock_flow_table;
2287 int cpu = -1;
2288 u8 ip_proto; 2352 u8 ip_proto;
2289 u16 tcpu;
2290 u32 addr1, addr2, ihl; 2353 u32 addr1, addr2, ihl;
2291 union { 2354 union {
2292 u32 v32; 2355 u32 v32;
2293 u16 v16[2]; 2356 u16 v16[2];
2294 } ports; 2357 } ports;
2295 2358
2296 if (skb_rx_queue_recorded(skb)) { 2359 nhoff = skb_network_offset(skb);
2297 u16 index = skb_get_rx_queue(skb);
2298 if (unlikely(index >= dev->num_rx_queues)) {
2299 WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
2300 "on queue %u, but number of RX queues is %u\n",
2301 dev->name, index, dev->num_rx_queues);
2302 goto done;
2303 }
2304 rxqueue = dev->_rx + index;
2305 } else
2306 rxqueue = dev->_rx;
2307
2308 if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
2309 goto done;
2310
2311 if (skb->rxhash)
2312 goto got_hash; /* Skip hash computation on packet header */
2313 2360
2314 switch (skb->protocol) { 2361 switch (skb->protocol) {
2315 case __constant_htons(ETH_P_IP): 2362 case __constant_htons(ETH_P_IP):
2316 if (!pskb_may_pull(skb, sizeof(*ip))) 2363 if (!pskb_may_pull(skb, sizeof(*ip) + nhoff))
2317 goto done; 2364 goto done;
2318 2365
2319 ip = (struct iphdr *) skb->data; 2366 ip = (struct iphdr *) (skb->data + nhoff);
2320 ip_proto = ip->protocol; 2367 if (ip->frag_off & htons(IP_MF | IP_OFFSET))
2368 ip_proto = 0;
2369 else
2370 ip_proto = ip->protocol;
2321 addr1 = (__force u32) ip->saddr; 2371 addr1 = (__force u32) ip->saddr;
2322 addr2 = (__force u32) ip->daddr; 2372 addr2 = (__force u32) ip->daddr;
2323 ihl = ip->ihl; 2373 ihl = ip->ihl;
2324 break; 2374 break;
2325 case __constant_htons(ETH_P_IPV6): 2375 case __constant_htons(ETH_P_IPV6):
2326 if (!pskb_may_pull(skb, sizeof(*ip6))) 2376 if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff))
2327 goto done; 2377 goto done;
2328 2378
2329 ip6 = (struct ipv6hdr *) skb->data; 2379 ip6 = (struct ipv6hdr *) (skb->data + nhoff);
2330 ip_proto = ip6->nexthdr; 2380 ip_proto = ip6->nexthdr;
2331 addr1 = (__force u32) ip6->saddr.s6_addr32[3]; 2381 addr1 = (__force u32) ip6->saddr.s6_addr32[3];
2332 addr2 = (__force u32) ip6->daddr.s6_addr32[3]; 2382 addr2 = (__force u32) ip6->daddr.s6_addr32[3];
@@ -2335,33 +2385,81 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2335 default: 2385 default:
2336 goto done; 2386 goto done;
2337 } 2387 }
2338 switch (ip_proto) { 2388
2339 case IPPROTO_TCP: 2389 ports.v32 = 0;
2340 case IPPROTO_UDP: 2390 poff = proto_ports_offset(ip_proto);
2341 case IPPROTO_DCCP: 2391 if (poff >= 0) {
2342 case IPPROTO_ESP: 2392 nhoff += ihl * 4 + poff;
2343 case IPPROTO_AH: 2393 if (pskb_may_pull(skb, nhoff + 4)) {
2344 case IPPROTO_SCTP: 2394 ports.v32 = * (__force u32 *) (skb->data + nhoff);
2345 case IPPROTO_UDPLITE:
2346 if (pskb_may_pull(skb, (ihl * 4) + 4)) {
2347 ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
2348 if (ports.v16[1] < ports.v16[0]) 2395 if (ports.v16[1] < ports.v16[0])
2349 swap(ports.v16[0], ports.v16[1]); 2396 swap(ports.v16[0], ports.v16[1]);
2350 break;
2351 } 2397 }
2352 default:
2353 ports.v32 = 0;
2354 break;
2355 } 2398 }
2356 2399
2357 /* get a consistent hash (same value on both flow directions) */ 2400 /* get a consistent hash (same value on both flow directions) */
2358 if (addr2 < addr1) 2401 if (addr2 < addr1)
2359 swap(addr1, addr2); 2402 swap(addr1, addr2);
2360 skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2361 if (!skb->rxhash)
2362 skb->rxhash = 1;
2363 2403
2364got_hash: 2404 hash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
2405 if (!hash)
2406 hash = 1;
2407
2408done:
2409 return hash;
2410}
2411EXPORT_SYMBOL(__skb_get_rxhash);
2412
2413#ifdef CONFIG_RPS
2414
2415/* One global table that all flow-based protocols share. */
2416struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
2417EXPORT_SYMBOL(rps_sock_flow_table);
2418
2419/*
2420 * get_rps_cpu is called from netif_receive_skb and returns the target
2421 * CPU from the RPS map of the receiving queue for a given skb.
2422 * rcu_read_lock must be held on entry.
2423 */
2424static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
2425 struct rps_dev_flow **rflowp)
2426{
2427 struct netdev_rx_queue *rxqueue;
2428 struct rps_map *map = NULL;
2429 struct rps_dev_flow_table *flow_table;
2430 struct rps_sock_flow_table *sock_flow_table;
2431 int cpu = -1;
2432 u16 tcpu;
2433
2434 if (skb_rx_queue_recorded(skb)) {
2435 u16 index = skb_get_rx_queue(skb);
2436 if (unlikely(index >= dev->real_num_rx_queues)) {
2437 WARN_ONCE(dev->real_num_rx_queues > 1,
2438 "%s received packet on queue %u, but number "
2439 "of RX queues is %u\n",
2440 dev->name, index, dev->real_num_rx_queues);
2441 goto done;
2442 }
2443 rxqueue = dev->_rx + index;
2444 } else
2445 rxqueue = dev->_rx;
2446
2447 if (rxqueue->rps_map) {
2448 map = rcu_dereference(rxqueue->rps_map);
2449 if (map && map->len == 1) {
2450 tcpu = map->cpus[0];
2451 if (cpu_online(tcpu))
2452 cpu = tcpu;
2453 goto done;
2454 }
2455 } else if (!rxqueue->rps_flow_table) {
2456 goto done;
2457 }
2458
2459 skb_reset_network_header(skb);
2460 if (!skb_get_rxhash(skb))
2461 goto done;
2462
2365 flow_table = rcu_dereference(rxqueue->rps_flow_table); 2463 flow_table = rcu_dereference(rxqueue->rps_flow_table);
2366 sock_flow_table = rcu_dereference(rps_sock_flow_table); 2464 sock_flow_table = rcu_dereference(rps_sock_flow_table);
2367 if (flow_table && sock_flow_table) { 2465 if (flow_table && sock_flow_table) {
@@ -2401,7 +2499,6 @@ got_hash:
2401 } 2499 }
2402 } 2500 }
2403 2501
2404 map = rcu_dereference(rxqueue->rps_map);
2405 if (map) { 2502 if (map) {
2406 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; 2503 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
2407 2504
@@ -2487,6 +2584,7 @@ enqueue:
2487 2584
2488 local_irq_restore(flags); 2585 local_irq_restore(flags);
2489 2586
2587 atomic_long_inc(&skb->dev->rx_dropped);
2490 kfree_skb(skb); 2588 kfree_skb(skb);
2491 return NET_RX_DROP; 2589 return NET_RX_DROP;
2492} 2590}
@@ -2643,11 +2741,10 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
2643 * the ingress scheduler, you just cant add policies on ingress. 2741 * the ingress scheduler, you just cant add policies on ingress.
2644 * 2742 *
2645 */ 2743 */
2646static int ing_filter(struct sk_buff *skb) 2744static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
2647{ 2745{
2648 struct net_device *dev = skb->dev; 2746 struct net_device *dev = skb->dev;
2649 u32 ttl = G_TC_RTTL(skb->tc_verd); 2747 u32 ttl = G_TC_RTTL(skb->tc_verd);
2650 struct netdev_queue *rxq;
2651 int result = TC_ACT_OK; 2748 int result = TC_ACT_OK;
2652 struct Qdisc *q; 2749 struct Qdisc *q;
2653 2750
@@ -2661,8 +2758,6 @@ static int ing_filter(struct sk_buff *skb)
2661 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); 2758 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2662 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); 2759 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2663 2760
2664 rxq = &dev->rx_queue;
2665
2666 q = rxq->qdisc; 2761 q = rxq->qdisc;
2667 if (q != &noop_qdisc) { 2762 if (q != &noop_qdisc) {
2668 spin_lock(qdisc_lock(q)); 2763 spin_lock(qdisc_lock(q));
@@ -2678,7 +2773,9 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2678 struct packet_type **pt_prev, 2773 struct packet_type **pt_prev,
2679 int *ret, struct net_device *orig_dev) 2774 int *ret, struct net_device *orig_dev)
2680{ 2775{
2681 if (skb->dev->rx_queue.qdisc == &noop_qdisc) 2776 struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
2777
2778 if (!rxq || rxq->qdisc == &noop_qdisc)
2682 goto out; 2779 goto out;
2683 2780
2684 if (*pt_prev) { 2781 if (*pt_prev) {
@@ -2686,7 +2783,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2686 *pt_prev = NULL; 2783 *pt_prev = NULL;
2687 } 2784 }
2688 2785
2689 switch (ing_filter(skb)) { 2786 switch (ing_filter(skb, rxq)) {
2690 case TC_ACT_SHOT: 2787 case TC_ACT_SHOT:
2691 case TC_ACT_STOLEN: 2788 case TC_ACT_STOLEN:
2692 kfree_skb(skb); 2789 kfree_skb(skb);
@@ -2699,33 +2796,6 @@ out:
2699} 2796}
2700#endif 2797#endif
2701 2798
2702/*
2703 * netif_nit_deliver - deliver received packets to network taps
2704 * @skb: buffer
2705 *
2706 * This function is used to deliver incoming packets to network
2707 * taps. It should be used when the normal netif_receive_skb path
2708 * is bypassed, for example because of VLAN acceleration.
2709 */
2710void netif_nit_deliver(struct sk_buff *skb)
2711{
2712 struct packet_type *ptype;
2713
2714 if (list_empty(&ptype_all))
2715 return;
2716
2717 skb_reset_network_header(skb);
2718 skb_reset_transport_header(skb);
2719 skb->mac_len = skb->network_header - skb->mac_header;
2720
2721 rcu_read_lock();
2722 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2723 if (!ptype->dev || ptype->dev == skb->dev)
2724 deliver_skb(skb, ptype, skb->dev);
2725 }
2726 rcu_read_unlock();
2727}
2728
2729/** 2799/**
2730 * netdev_rx_handler_register - register receive handler 2800 * netdev_rx_handler_register - register receive handler
2731 * @dev: device to register a handler for 2801 * @dev: device to register a handler for
@@ -2836,8 +2906,6 @@ static int __netif_receive_skb(struct sk_buff *skb)
2836 net_timestamp_check(skb); 2906 net_timestamp_check(skb);
2837 2907
2838 trace_netif_receive_skb(skb); 2908 trace_netif_receive_skb(skb);
2839 if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
2840 return NET_RX_SUCCESS;
2841 2909
2842 /* if we've gotten here through NAPI, check netpoll */ 2910 /* if we've gotten here through NAPI, check netpoll */
2843 if (netpoll_receive_skb(skb)) 2911 if (netpoll_receive_skb(skb))
@@ -2851,8 +2919,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
2851 * be delivered to pkt handlers that are exact matches. Also 2919 * be delivered to pkt handlers that are exact matches. Also
2852 * the deliver_no_wcard flag will be set. If packet handlers 2920 * the deliver_no_wcard flag will be set. If packet handlers
2853 * are sensitive to duplicate packets these skbs will need to 2921 * are sensitive to duplicate packets these skbs will need to
2854 * be dropped at the handler. The vlan accel path may have 2922 * be dropped at the handler.
2855 * already set the deliver_no_wcard flag.
2856 */ 2923 */
2857 null_or_orig = NULL; 2924 null_or_orig = NULL;
2858 orig_dev = skb->dev; 2925 orig_dev = skb->dev;
@@ -2911,6 +2978,18 @@ ncls:
2911 goto out; 2978 goto out;
2912 } 2979 }
2913 2980
2981 if (vlan_tx_tag_present(skb)) {
2982 if (pt_prev) {
2983 ret = deliver_skb(skb, pt_prev, orig_dev);
2984 pt_prev = NULL;
2985 }
2986 if (vlan_hwaccel_do_receive(&skb)) {
2987 ret = __netif_receive_skb(skb);
2988 goto out;
2989 } else if (unlikely(!skb))
2990 goto out;
2991 }
2992
2914 /* 2993 /*
2915 * Make sure frames received on VLAN interfaces stacked on 2994 * Make sure frames received on VLAN interfaces stacked on
2916 * bonding interfaces still make their way to any base bonding 2995 * bonding interfaces still make their way to any base bonding
@@ -2938,6 +3017,7 @@ ncls:
2938 if (pt_prev) { 3017 if (pt_prev) {
2939 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); 3018 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2940 } else { 3019 } else {
3020 atomic_long_inc(&skb->dev->rx_dropped);
2941 kfree_skb(skb); 3021 kfree_skb(skb);
2942 /* Jamal, now you will not able to escape explaining 3022 /* Jamal, now you will not able to escape explaining
2943 * me how you were going to use this. :-) 3023 * me how you were going to use this. :-)
@@ -3058,7 +3138,7 @@ out:
3058 return netif_receive_skb(skb); 3138 return netif_receive_skb(skb);
3059} 3139}
3060 3140
3061static void napi_gro_flush(struct napi_struct *napi) 3141inline void napi_gro_flush(struct napi_struct *napi)
3062{ 3142{
3063 struct sk_buff *skb, *next; 3143 struct sk_buff *skb, *next;
3064 3144
@@ -3071,6 +3151,7 @@ static void napi_gro_flush(struct napi_struct *napi)
3071 napi->gro_count = 0; 3151 napi->gro_count = 0;
3072 napi->gro_list = NULL; 3152 napi->gro_list = NULL;
3073} 3153}
3154EXPORT_SYMBOL(napi_gro_flush);
3074 3155
3075enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3156enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3076{ 3157{
@@ -3085,7 +3166,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3085 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) 3166 if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb))
3086 goto normal; 3167 goto normal;
3087 3168
3088 if (skb_is_gso(skb) || skb_has_frags(skb)) 3169 if (skb_is_gso(skb) || skb_has_frag_list(skb))
3089 goto normal; 3170 goto normal;
3090 3171
3091 rcu_read_lock(); 3172 rcu_read_lock();
@@ -3164,16 +3245,19 @@ normal:
3164} 3245}
3165EXPORT_SYMBOL(dev_gro_receive); 3246EXPORT_SYMBOL(dev_gro_receive);
3166 3247
3167static gro_result_t 3248static inline gro_result_t
3168__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) 3249__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3169{ 3250{
3170 struct sk_buff *p; 3251 struct sk_buff *p;
3171 3252
3172 for (p = napi->gro_list; p; p = p->next) { 3253 for (p = napi->gro_list; p; p = p->next) {
3173 NAPI_GRO_CB(p)->same_flow = 3254 unsigned long diffs;
3174 (p->dev == skb->dev) && 3255
3175 !compare_ether_header(skb_mac_header(p), 3256 diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
3257 diffs |= p->vlan_tci ^ skb->vlan_tci;
3258 diffs |= compare_ether_header(skb_mac_header(p),
3176 skb_gro_mac_header(skb)); 3259 skb_gro_mac_header(skb));
3260 NAPI_GRO_CB(p)->same_flow = !diffs;
3177 NAPI_GRO_CB(p)->flush = 0; 3261 NAPI_GRO_CB(p)->flush = 0;
3178 } 3262 }
3179 3263
@@ -3226,14 +3310,14 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3226} 3310}
3227EXPORT_SYMBOL(napi_gro_receive); 3311EXPORT_SYMBOL(napi_gro_receive);
3228 3312
3229void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) 3313static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3230{ 3314{
3231 __skb_pull(skb, skb_headlen(skb)); 3315 __skb_pull(skb, skb_headlen(skb));
3232 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb)); 3316 skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
3317 skb->vlan_tci = 0;
3233 3318
3234 napi->skb = skb; 3319 napi->skb = skb;
3235} 3320}
3236EXPORT_SYMBOL(napi_reuse_skb);
3237 3321
3238struct sk_buff *napi_get_frags(struct napi_struct *napi) 3322struct sk_buff *napi_get_frags(struct napi_struct *napi)
3239{ 3323{
@@ -4867,21 +4951,6 @@ static void rollback_registered(struct net_device *dev)
4867 rollback_registered_many(&single); 4951 rollback_registered_many(&single);
4868} 4952}
4869 4953
4870static void __netdev_init_queue_locks_one(struct net_device *dev,
4871 struct netdev_queue *dev_queue,
4872 void *_unused)
4873{
4874 spin_lock_init(&dev_queue->_xmit_lock);
4875 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
4876 dev_queue->xmit_lock_owner = -1;
4877}
4878
4879static void netdev_init_queue_locks(struct net_device *dev)
4880{
4881 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
4882 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
4883}
4884
4885unsigned long netdev_fix_features(unsigned long features, const char *name) 4954unsigned long netdev_fix_features(unsigned long features, const char *name)
4886{ 4955{
4887 /* Fix illegal SG+CSUM combinations. */ 4956 /* Fix illegal SG+CSUM combinations. */
@@ -4949,6 +5018,66 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
4949} 5018}
4950EXPORT_SYMBOL(netif_stacked_transfer_operstate); 5019EXPORT_SYMBOL(netif_stacked_transfer_operstate);
4951 5020
5021static int netif_alloc_rx_queues(struct net_device *dev)
5022{
5023#ifdef CONFIG_RPS
5024 unsigned int i, count = dev->num_rx_queues;
5025 struct netdev_rx_queue *rx;
5026
5027 BUG_ON(count < 1);
5028
5029 rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5030 if (!rx) {
5031 pr_err("netdev: Unable to allocate %u rx queues.\n", count);
5032 return -ENOMEM;
5033 }
5034 dev->_rx = rx;
5035
5036 /*
5037 * Set a pointer to first element in the array which holds the
5038 * reference count.
5039 */
5040 for (i = 0; i < count; i++)
5041 rx[i].first = rx;
5042#endif
5043 return 0;
5044}
5045
5046static int netif_alloc_netdev_queues(struct net_device *dev)
5047{
5048 unsigned int count = dev->num_tx_queues;
5049 struct netdev_queue *tx;
5050
5051 BUG_ON(count < 1);
5052
5053 tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
5054 if (!tx) {
5055 pr_err("netdev: Unable to allocate %u tx queues.\n",
5056 count);
5057 return -ENOMEM;
5058 }
5059 dev->_tx = tx;
5060 return 0;
5061}
5062
5063static void netdev_init_one_queue(struct net_device *dev,
5064 struct netdev_queue *queue,
5065 void *_unused)
5066{
5067 queue->dev = dev;
5068
5069 /* Initialize queue lock */
5070 spin_lock_init(&queue->_xmit_lock);
5071 netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
5072 queue->xmit_lock_owner = -1;
5073}
5074
5075static void netdev_init_queues(struct net_device *dev)
5076{
5077 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5078 spin_lock_init(&dev->tx_global_lock);
5079}
5080
4952/** 5081/**
4953 * register_netdevice - register a network device 5082 * register_netdevice - register a network device
4954 * @dev: device to register 5083 * @dev: device to register
@@ -4982,28 +5111,19 @@ int register_netdevice(struct net_device *dev)
4982 5111
4983 spin_lock_init(&dev->addr_list_lock); 5112 spin_lock_init(&dev->addr_list_lock);
4984 netdev_set_addr_lockdep_class(dev); 5113 netdev_set_addr_lockdep_class(dev);
4985 netdev_init_queue_locks(dev);
4986 5114
4987 dev->iflink = -1; 5115 dev->iflink = -1;
4988 5116
4989#ifdef CONFIG_RPS 5117 ret = netif_alloc_rx_queues(dev);
4990 if (!dev->num_rx_queues) { 5118 if (ret)
4991 /* 5119 goto out;
4992 * Allocate a single RX queue if driver never called
4993 * alloc_netdev_mq
4994 */
4995 5120
4996 dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL); 5121 ret = netif_alloc_netdev_queues(dev);
4997 if (!dev->_rx) { 5122 if (ret)
4998 ret = -ENOMEM; 5123 goto out;
4999 goto out; 5124
5000 } 5125 netdev_init_queues(dev);
5001 5126
5002 dev->_rx->first = dev->_rx;
5003 atomic_set(&dev->_rx->count, 1);
5004 dev->num_rx_queues = 1;
5005 }
5006#endif
5007 /* Init, if this function is available */ 5127 /* Init, if this function is available */
5008 if (dev->netdev_ops->ndo_init) { 5128 if (dev->netdev_ops->ndo_init) {
5009 ret = dev->netdev_ops->ndo_init(dev); 5129 ret = dev->netdev_ops->ndo_init(dev);
@@ -5043,6 +5163,12 @@ int register_netdevice(struct net_device *dev)
5043 if (dev->features & NETIF_F_SG) 5163 if (dev->features & NETIF_F_SG)
5044 dev->features |= NETIF_F_GSO; 5164 dev->features |= NETIF_F_GSO;
5045 5165
5166 /* Enable GRO and NETIF_F_HIGHDMA for vlans by default,
5167 * vlan_dev_init() will do the dev->features check, so these features
5168 * are enabled only if supported by underlying device.
5169 */
5170 dev->vlan_features |= (NETIF_F_GRO | NETIF_F_HIGHDMA);
5171
5046 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev); 5172 ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
5047 ret = notifier_to_errno(ret); 5173 ret = notifier_to_errno(ret);
5048 if (ret) 5174 if (ret)
@@ -5113,9 +5239,6 @@ int init_dummy_netdev(struct net_device *dev)
5113 */ 5239 */
5114 dev->reg_state = NETREG_DUMMY; 5240 dev->reg_state = NETREG_DUMMY;
5115 5241
5116 /* initialize the ref count */
5117 atomic_set(&dev->refcnt, 1);
5118
5119 /* NAPI wants this */ 5242 /* NAPI wants this */
5120 INIT_LIST_HEAD(&dev->napi_list); 5243 INIT_LIST_HEAD(&dev->napi_list);
5121 5244
@@ -5123,6 +5246,11 @@ int init_dummy_netdev(struct net_device *dev)
5123 set_bit(__LINK_STATE_PRESENT, &dev->state); 5246 set_bit(__LINK_STATE_PRESENT, &dev->state);
5124 set_bit(__LINK_STATE_START, &dev->state); 5247 set_bit(__LINK_STATE_START, &dev->state);
5125 5248
5249 /* Note : We dont allocate pcpu_refcnt for dummy devices,
5250 * because users of this 'device' dont need to change
5251 * its refcount.
5252 */
5253
5126 return 0; 5254 return 0;
5127} 5255}
5128EXPORT_SYMBOL_GPL(init_dummy_netdev); 5256EXPORT_SYMBOL_GPL(init_dummy_netdev);
@@ -5164,6 +5292,16 @@ out:
5164} 5292}
5165EXPORT_SYMBOL(register_netdev); 5293EXPORT_SYMBOL(register_netdev);
5166 5294
5295int netdev_refcnt_read(const struct net_device *dev)
5296{
5297 int i, refcnt = 0;
5298
5299 for_each_possible_cpu(i)
5300 refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
5301 return refcnt;
5302}
5303EXPORT_SYMBOL(netdev_refcnt_read);
5304
5167/* 5305/*
5168 * netdev_wait_allrefs - wait until all references are gone. 5306 * netdev_wait_allrefs - wait until all references are gone.
5169 * 5307 *
@@ -5178,11 +5316,14 @@ EXPORT_SYMBOL(register_netdev);
5178static void netdev_wait_allrefs(struct net_device *dev) 5316static void netdev_wait_allrefs(struct net_device *dev)
5179{ 5317{
5180 unsigned long rebroadcast_time, warning_time; 5318 unsigned long rebroadcast_time, warning_time;
5319 int refcnt;
5181 5320
5182 linkwatch_forget_dev(dev); 5321 linkwatch_forget_dev(dev);
5183 5322
5184 rebroadcast_time = warning_time = jiffies; 5323 rebroadcast_time = warning_time = jiffies;
5185 while (atomic_read(&dev->refcnt) != 0) { 5324 refcnt = netdev_refcnt_read(dev);
5325
5326 while (refcnt != 0) {
5186 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { 5327 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5187 rtnl_lock(); 5328 rtnl_lock();
5188 5329
@@ -5209,11 +5350,13 @@ static void netdev_wait_allrefs(struct net_device *dev)
5209 5350
5210 msleep(250); 5351 msleep(250);
5211 5352
5353 refcnt = netdev_refcnt_read(dev);
5354
5212 if (time_after(jiffies, warning_time + 10 * HZ)) { 5355 if (time_after(jiffies, warning_time + 10 * HZ)) {
5213 printk(KERN_EMERG "unregister_netdevice: " 5356 printk(KERN_EMERG "unregister_netdevice: "
5214 "waiting for %s to become free. Usage " 5357 "waiting for %s to become free. Usage "
5215 "count = %d\n", 5358 "count = %d\n",
5216 dev->name, atomic_read(&dev->refcnt)); 5359 dev->name, refcnt);
5217 warning_time = jiffies; 5360 warning_time = jiffies;
5218 } 5361 }
5219 } 5362 }
@@ -5271,8 +5414,8 @@ void netdev_run_todo(void)
5271 netdev_wait_allrefs(dev); 5414 netdev_wait_allrefs(dev);
5272 5415
5273 /* paranoia */ 5416 /* paranoia */
5274 BUG_ON(atomic_read(&dev->refcnt)); 5417 BUG_ON(netdev_refcnt_read(dev));
5275 WARN_ON(dev->ip_ptr); 5418 WARN_ON(rcu_dereference_raw(dev->ip_ptr));
5276 WARN_ON(dev->ip6_ptr); 5419 WARN_ON(dev->ip6_ptr);
5277 WARN_ON(dev->dn_ptr); 5420 WARN_ON(dev->dn_ptr);
5278 5421
@@ -5350,30 +5493,34 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
5350 5493
5351 if (ops->ndo_get_stats64) { 5494 if (ops->ndo_get_stats64) {
5352 memset(storage, 0, sizeof(*storage)); 5495 memset(storage, 0, sizeof(*storage));
5353 return ops->ndo_get_stats64(dev, storage); 5496 ops->ndo_get_stats64(dev, storage);
5354 } 5497 } else if (ops->ndo_get_stats) {
5355 if (ops->ndo_get_stats) {
5356 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev)); 5498 netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
5357 return storage; 5499 } else {
5500 netdev_stats_to_stats64(storage, &dev->stats);
5501 dev_txq_stats_fold(dev, storage);
5358 } 5502 }
5359 netdev_stats_to_stats64(storage, &dev->stats); 5503 storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
5360 dev_txq_stats_fold(dev, storage);
5361 return storage; 5504 return storage;
5362} 5505}
5363EXPORT_SYMBOL(dev_get_stats); 5506EXPORT_SYMBOL(dev_get_stats);
5364 5507
5365static void netdev_init_one_queue(struct net_device *dev, 5508struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
5366 struct netdev_queue *queue,
5367 void *_unused)
5368{ 5509{
5369 queue->dev = dev; 5510 struct netdev_queue *queue = dev_ingress_queue(dev);
5370}
5371 5511
5372static void netdev_init_queues(struct net_device *dev) 5512#ifdef CONFIG_NET_CLS_ACT
5373{ 5513 if (queue)
5374 netdev_init_one_queue(dev, &dev->rx_queue, NULL); 5514 return queue;
5375 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); 5515 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
5376 spin_lock_init(&dev->tx_global_lock); 5516 if (!queue)
5517 return NULL;
5518 netdev_init_one_queue(dev, queue, NULL);
5519 queue->qdisc = &noop_qdisc;
5520 queue->qdisc_sleeping = &noop_qdisc;
5521 rcu_assign_pointer(dev->ingress_queue, queue);
5522#endif
5523 return queue;
5377} 5524}
5378 5525
5379/** 5526/**
@@ -5390,17 +5537,18 @@ static void netdev_init_queues(struct net_device *dev)
5390struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name, 5537struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5391 void (*setup)(struct net_device *), unsigned int queue_count) 5538 void (*setup)(struct net_device *), unsigned int queue_count)
5392{ 5539{
5393 struct netdev_queue *tx;
5394 struct net_device *dev; 5540 struct net_device *dev;
5395 size_t alloc_size; 5541 size_t alloc_size;
5396 struct net_device *p; 5542 struct net_device *p;
5397#ifdef CONFIG_RPS
5398 struct netdev_rx_queue *rx;
5399 int i;
5400#endif
5401 5543
5402 BUG_ON(strlen(name) >= sizeof(dev->name)); 5544 BUG_ON(strlen(name) >= sizeof(dev->name));
5403 5545
5546 if (queue_count < 1) {
5547 pr_err("alloc_netdev: Unable to allocate device "
5548 "with zero queues.\n");
5549 return NULL;
5550 }
5551
5404 alloc_size = sizeof(struct net_device); 5552 alloc_size = sizeof(struct net_device);
5405 if (sizeof_priv) { 5553 if (sizeof_priv) {
5406 /* ensure 32-byte alignment of private area */ 5554 /* ensure 32-byte alignment of private area */
@@ -5416,55 +5564,31 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5416 return NULL; 5564 return NULL;
5417 } 5565 }
5418 5566
5419 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5420 if (!tx) {
5421 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5422 "tx qdiscs.\n");
5423 goto free_p;
5424 }
5425
5426#ifdef CONFIG_RPS
5427 rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
5428 if (!rx) {
5429 printk(KERN_ERR "alloc_netdev: Unable to allocate "
5430 "rx queues.\n");
5431 goto free_tx;
5432 }
5433
5434 atomic_set(&rx->count, queue_count);
5435
5436 /*
5437 * Set a pointer to first element in the array which holds the
5438 * reference count.
5439 */
5440 for (i = 0; i < queue_count; i++)
5441 rx[i].first = rx;
5442#endif
5443
5444 dev = PTR_ALIGN(p, NETDEV_ALIGN); 5567 dev = PTR_ALIGN(p, NETDEV_ALIGN);
5445 dev->padded = (char *)dev - (char *)p; 5568 dev->padded = (char *)dev - (char *)p;
5446 5569
5570 dev->pcpu_refcnt = alloc_percpu(int);
5571 if (!dev->pcpu_refcnt)
5572 goto free_p;
5573
5447 if (dev_addr_init(dev)) 5574 if (dev_addr_init(dev))
5448 goto free_rx; 5575 goto free_pcpu;
5449 5576
5450 dev_mc_init(dev); 5577 dev_mc_init(dev);
5451 dev_uc_init(dev); 5578 dev_uc_init(dev);
5452 5579
5453 dev_net_set(dev, &init_net); 5580 dev_net_set(dev, &init_net);
5454 5581
5455 dev->_tx = tx;
5456 dev->num_tx_queues = queue_count; 5582 dev->num_tx_queues = queue_count;
5457 dev->real_num_tx_queues = queue_count; 5583 dev->real_num_tx_queues = queue_count;
5458 5584
5459#ifdef CONFIG_RPS 5585#ifdef CONFIG_RPS
5460 dev->_rx = rx;
5461 dev->num_rx_queues = queue_count; 5586 dev->num_rx_queues = queue_count;
5587 dev->real_num_rx_queues = queue_count;
5462#endif 5588#endif
5463 5589
5464 dev->gso_max_size = GSO_MAX_SIZE; 5590 dev->gso_max_size = GSO_MAX_SIZE;
5465 5591
5466 netdev_init_queues(dev);
5467
5468 INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list); 5592 INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
5469 dev->ethtool_ntuple_list.count = 0; 5593 dev->ethtool_ntuple_list.count = 0;
5470 INIT_LIST_HEAD(&dev->napi_list); 5594 INIT_LIST_HEAD(&dev->napi_list);
@@ -5475,12 +5599,8 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5475 strcpy(dev->name, name); 5599 strcpy(dev->name, name);
5476 return dev; 5600 return dev;
5477 5601
5478free_rx: 5602free_pcpu:
5479#ifdef CONFIG_RPS 5603 free_percpu(dev->pcpu_refcnt);
5480 kfree(rx);
5481free_tx:
5482#endif
5483 kfree(tx);
5484free_p: 5604free_p:
5485 kfree(p); 5605 kfree(p);
5486 return NULL; 5606 return NULL;
@@ -5503,6 +5623,8 @@ void free_netdev(struct net_device *dev)
5503 5623
5504 kfree(dev->_tx); 5624 kfree(dev->_tx);
5505 5625
5626 kfree(rcu_dereference_raw(dev->ingress_queue));
5627
5506 /* Flush device addresses */ 5628 /* Flush device addresses */
5507 dev_addr_flush(dev); 5629 dev_addr_flush(dev);
5508 5630
@@ -5512,6 +5634,9 @@ void free_netdev(struct net_device *dev)
5512 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) 5634 list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5513 netif_napi_del(p); 5635 netif_napi_del(p);
5514 5636
5637 free_percpu(dev->pcpu_refcnt);
5638 dev->pcpu_refcnt = NULL;
5639
5515 /* Compatibility with error handling in drivers */ 5640 /* Compatibility with error handling in drivers */
5516 if (dev->reg_state == NETREG_UNINITIALIZED) { 5641 if (dev->reg_state == NETREG_UNINITIALIZED) {
5517 kfree((char *)dev - dev->padded); 5642 kfree((char *)dev - dev->padded);
@@ -5666,6 +5791,10 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
5666 5791
5667 /* Notify protocols, that we are about to destroy 5792 /* Notify protocols, that we are about to destroy
5668 this device. They should clean all the things. 5793 this device. They should clean all the things.
5794
5795 Note that dev->reg_state stays at NETREG_REGISTERED.
5796 This is wanted because this way 8021q and macvlan know
5797 the device is just moving and can keep their slaves up.
5669 */ 5798 */
5670 call_netdevice_notifiers(NETDEV_UNREGISTER, dev); 5799 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5671 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); 5800 call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev);
diff --git a/net/core/dst.c b/net/core/dst.c
index 6c41b1fac3db..8abe628b79f1 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -168,7 +168,7 @@ void *dst_alloc(struct dst_ops *ops)
168{ 168{
169 struct dst_entry *dst; 169 struct dst_entry *dst;
170 170
171 if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { 171 if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
172 if (ops->gc(ops)) 172 if (ops->gc(ops))
173 return NULL; 173 return NULL;
174 } 174 }
@@ -183,7 +183,7 @@ void *dst_alloc(struct dst_ops *ops)
183#if RT_CACHE_DEBUG >= 2 183#if RT_CACHE_DEBUG >= 2
184 atomic_inc(&dst_total); 184 atomic_inc(&dst_total);
185#endif 185#endif
186 atomic_inc(&ops->entries); 186 dst_entries_add(ops, 1);
187 return dst; 187 return dst;
188} 188}
189EXPORT_SYMBOL(dst_alloc); 189EXPORT_SYMBOL(dst_alloc);
@@ -228,15 +228,15 @@ again:
228 child = dst->child; 228 child = dst->child;
229 229
230 dst->hh = NULL; 230 dst->hh = NULL;
231 if (hh && atomic_dec_and_test(&hh->hh_refcnt)) 231 if (hh)
232 kfree(hh); 232 hh_cache_put(hh);
233 233
234 if (neigh) { 234 if (neigh) {
235 dst->neighbour = NULL; 235 dst->neighbour = NULL;
236 neigh_release(neigh); 236 neigh_release(neigh);
237 } 237 }
238 238
239 atomic_dec(&dst->ops->entries); 239 dst_entries_add(dst->ops, -1);
240 240
241 if (dst->ops->destroy) 241 if (dst->ops->destroy)
242 dst->ops->destroy(dst); 242 dst->ops->destroy(dst);
@@ -271,13 +271,40 @@ void dst_release(struct dst_entry *dst)
271 if (dst) { 271 if (dst) {
272 int newrefcnt; 272 int newrefcnt;
273 273
274 smp_mb__before_atomic_dec();
275 newrefcnt = atomic_dec_return(&dst->__refcnt); 274 newrefcnt = atomic_dec_return(&dst->__refcnt);
276 WARN_ON(newrefcnt < 0); 275 WARN_ON(newrefcnt < 0);
276 if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) {
277 dst = dst_destroy(dst);
278 if (dst)
279 __dst_free(dst);
280 }
277 } 281 }
278} 282}
279EXPORT_SYMBOL(dst_release); 283EXPORT_SYMBOL(dst_release);
280 284
285/**
286 * skb_dst_set_noref - sets skb dst, without a reference
287 * @skb: buffer
288 * @dst: dst entry
289 *
290 * Sets skb dst, assuming a reference was not taken on dst
291 * skb_dst_drop() should not dst_release() this dst
292 */
293void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
294{
295 WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
296 /* If dst not in cache, we must take a reference, because
297 * dst_release() will destroy dst as soon as its refcount becomes zero
298 */
299 if (unlikely(dst->flags & DST_NOCACHE)) {
300 dst_hold(dst);
301 skb_dst_set(skb, dst);
302 } else {
303 skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
304 }
305}
306EXPORT_SYMBOL(skb_dst_set_noref);
307
281/* Dirty hack. We did it in 2.2 (in __dst_free), 308/* Dirty hack. We did it in 2.2 (in __dst_free),
282 * we have _very_ good reasons not to repeat 309 * we have _very_ good reasons not to repeat
283 * this mistake in 2.3, but we have no choice 310 * this mistake in 2.3, but we have no choice
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 8451ab481095..956a9f4971cb 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -19,6 +19,7 @@
19#include <linux/netdevice.h> 19#include <linux/netdevice.h>
20#include <linux/bitops.h> 20#include <linux/bitops.h>
21#include <linux/uaccess.h> 21#include <linux/uaccess.h>
22#include <linux/vmalloc.h>
22#include <linux/slab.h> 23#include <linux/slab.h>
23 24
24/* 25/*
@@ -131,7 +132,8 @@ EXPORT_SYMBOL(ethtool_op_set_ufo);
131 * NETIF_F_xxx values in include/linux/netdevice.h 132 * NETIF_F_xxx values in include/linux/netdevice.h
132 */ 133 */
133static const u32 flags_dup_features = 134static const u32 flags_dup_features =
134 (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH); 135 (ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE |
136 ETH_FLAG_RXHASH);
135 137
136u32 ethtool_op_get_flags(struct net_device *dev) 138u32 ethtool_op_get_flags(struct net_device *dev)
137{ 139{
@@ -205,18 +207,24 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
205 struct ethtool_drvinfo info; 207 struct ethtool_drvinfo info;
206 const struct ethtool_ops *ops = dev->ethtool_ops; 208 const struct ethtool_ops *ops = dev->ethtool_ops;
207 209
208 if (!ops->get_drvinfo)
209 return -EOPNOTSUPP;
210
211 memset(&info, 0, sizeof(info)); 210 memset(&info, 0, sizeof(info));
212 info.cmd = ETHTOOL_GDRVINFO; 211 info.cmd = ETHTOOL_GDRVINFO;
213 ops->get_drvinfo(dev, &info); 212 if (ops && ops->get_drvinfo) {
213 ops->get_drvinfo(dev, &info);
214 } else if (dev->dev.parent && dev->dev.parent->driver) {
215 strlcpy(info.bus_info, dev_name(dev->dev.parent),
216 sizeof(info.bus_info));
217 strlcpy(info.driver, dev->dev.parent->driver->name,
218 sizeof(info.driver));
219 } else {
220 return -EOPNOTSUPP;
221 }
214 222
215 /* 223 /*
216 * this method of obtaining string set info is deprecated; 224 * this method of obtaining string set info is deprecated;
217 * Use ETHTOOL_GSSET_INFO instead. 225 * Use ETHTOOL_GSSET_INFO instead.
218 */ 226 */
219 if (ops->get_sset_count) { 227 if (ops && ops->get_sset_count) {
220 int rc; 228 int rc;
221 229
222 rc = ops->get_sset_count(dev, ETH_SS_TEST); 230 rc = ops->get_sset_count(dev, ETH_SS_TEST);
@@ -229,9 +237,9 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
229 if (rc >= 0) 237 if (rc >= 0)
230 info.n_priv_flags = rc; 238 info.n_priv_flags = rc;
231 } 239 }
232 if (ops->get_regs_len) 240 if (ops && ops->get_regs_len)
233 info.regdump_len = ops->get_regs_len(dev); 241 info.regdump_len = ops->get_regs_len(dev);
234 if (ops->get_eeprom_len) 242 if (ops && ops->get_eeprom_len)
235 info.eedump_len = ops->get_eeprom_len(dev); 243 info.eedump_len = ops->get_eeprom_len(dev);
236 244
237 if (copy_to_user(useraddr, &info, sizeof(info))) 245 if (copy_to_user(useraddr, &info, sizeof(info)))
@@ -479,6 +487,38 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
479 list->count++; 487 list->count++;
480} 488}
481 489
490/*
491 * ethtool does not (or did not) set masks for flow parameters that are
492 * not specified, so if both value and mask are 0 then this must be
493 * treated as equivalent to a mask with all bits set. Implement that
494 * here rather than in drivers.
495 */
496static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs)
497{
498 struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec;
499 struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec;
500
501 if (fs->flow_type != TCP_V4_FLOW &&
502 fs->flow_type != UDP_V4_FLOW &&
503 fs->flow_type != SCTP_V4_FLOW)
504 return;
505
506 if (!(entry->ip4src | mask->ip4src))
507 mask->ip4src = htonl(0xffffffff);
508 if (!(entry->ip4dst | mask->ip4dst))
509 mask->ip4dst = htonl(0xffffffff);
510 if (!(entry->psrc | mask->psrc))
511 mask->psrc = htons(0xffff);
512 if (!(entry->pdst | mask->pdst))
513 mask->pdst = htons(0xffff);
514 if (!(entry->tos | mask->tos))
515 mask->tos = 0xff;
516 if (!(fs->vlan_tag | fs->vlan_tag_mask))
517 fs->vlan_tag_mask = 0xffff;
518 if (!(fs->data | fs->data_mask))
519 fs->data_mask = 0xffffffffffffffffULL;
520}
521
482static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, 522static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
483 void __user *useraddr) 523 void __user *useraddr)
484{ 524{
@@ -493,6 +533,8 @@ static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
493 if (copy_from_user(&cmd, useraddr, sizeof(cmd))) 533 if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
494 return -EFAULT; 534 return -EFAULT;
495 535
536 rx_ntuple_fix_masks(&cmd.fs);
537
496 /* 538 /*
497 * Cache filter in dev struct for GET operation only if 539 * Cache filter in dev struct for GET operation only if
498 * the underlying driver doesn't have its own GET operation, and 540 * the underlying driver doesn't have its own GET operation, and
@@ -667,19 +709,19 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
667 break; 709 break;
668 case IP_USER_FLOW: 710 case IP_USER_FLOW:
669 sprintf(p, "\tSrc IP addr: 0x%x\n", 711 sprintf(p, "\tSrc IP addr: 0x%x\n",
670 fsc->fs.h_u.raw_ip4_spec.ip4src); 712 fsc->fs.h_u.usr_ip4_spec.ip4src);
671 p += ETH_GSTRING_LEN; 713 p += ETH_GSTRING_LEN;
672 num_strings++; 714 num_strings++;
673 sprintf(p, "\tSrc IP mask: 0x%x\n", 715 sprintf(p, "\tSrc IP mask: 0x%x\n",
674 fsc->fs.m_u.raw_ip4_spec.ip4src); 716 fsc->fs.m_u.usr_ip4_spec.ip4src);
675 p += ETH_GSTRING_LEN; 717 p += ETH_GSTRING_LEN;
676 num_strings++; 718 num_strings++;
677 sprintf(p, "\tDest IP addr: 0x%x\n", 719 sprintf(p, "\tDest IP addr: 0x%x\n",
678 fsc->fs.h_u.raw_ip4_spec.ip4dst); 720 fsc->fs.h_u.usr_ip4_spec.ip4dst);
679 p += ETH_GSTRING_LEN; 721 p += ETH_GSTRING_LEN;
680 num_strings++; 722 num_strings++;
681 sprintf(p, "\tDest IP mask: 0x%x\n", 723 sprintf(p, "\tDest IP mask: 0x%x\n",
682 fsc->fs.m_u.raw_ip4_spec.ip4dst); 724 fsc->fs.m_u.usr_ip4_spec.ip4dst);
683 p += ETH_GSTRING_LEN; 725 p += ETH_GSTRING_LEN;
684 num_strings++; 726 num_strings++;
685 break; 727 break;
@@ -775,7 +817,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
775 if (regs.len > reglen) 817 if (regs.len > reglen)
776 regs.len = reglen; 818 regs.len = reglen;
777 819
778 regbuf = kzalloc(reglen, GFP_USER); 820 regbuf = vmalloc(reglen);
779 if (!regbuf) 821 if (!regbuf)
780 return -ENOMEM; 822 return -ENOMEM;
781 823
@@ -790,7 +832,7 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
790 ret = 0; 832 ret = 0;
791 833
792 out: 834 out:
793 kfree(regbuf); 835 vfree(regbuf);
794 return ret; 836 return ret;
795} 837}
796 838
@@ -1175,8 +1217,11 @@ static int ethtool_set_gro(struct net_device *dev, char __user *useraddr)
1175 return -EFAULT; 1217 return -EFAULT;
1176 1218
1177 if (edata.data) { 1219 if (edata.data) {
1178 if (!dev->ethtool_ops->get_rx_csum || 1220 u32 rxcsum = dev->ethtool_ops->get_rx_csum ?
1179 !dev->ethtool_ops->get_rx_csum(dev)) 1221 dev->ethtool_ops->get_rx_csum(dev) :
1222 ethtool_op_get_rx_csum(dev);
1223
1224 if (!rxcsum)
1180 return -EINVAL; 1225 return -EINVAL;
1181 dev->features |= NETIF_F_GRO; 1226 dev->features |= NETIF_F_GRO;
1182 } else 1227 } else
@@ -1402,14 +1447,22 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
1402 if (!dev || !netif_device_present(dev)) 1447 if (!dev || !netif_device_present(dev))
1403 return -ENODEV; 1448 return -ENODEV;
1404 1449
1405 if (!dev->ethtool_ops)
1406 return -EOPNOTSUPP;
1407
1408 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd))) 1450 if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
1409 return -EFAULT; 1451 return -EFAULT;
1410 1452
1453 if (!dev->ethtool_ops) {
1454 /* ETHTOOL_GDRVINFO does not require any driver support.
1455 * It is also unprivileged and does not change anything,
1456 * so we can take a shortcut to it. */
1457 if (ethcmd == ETHTOOL_GDRVINFO)
1458 return ethtool_get_drvinfo(dev, useraddr);
1459 else
1460 return -EOPNOTSUPP;
1461 }
1462
1411 /* Allow some commands to be done by anyone */ 1463 /* Allow some commands to be done by anyone */
1412 switch (ethcmd) { 1464 switch (ethcmd) {
1465 case ETHTOOL_GSET:
1413 case ETHTOOL_GDRVINFO: 1466 case ETHTOOL_GDRVINFO:
1414 case ETHTOOL_GMSGLVL: 1467 case ETHTOOL_GMSGLVL:
1415 case ETHTOOL_GCOALESCE: 1468 case ETHTOOL_GCOALESCE:
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 42e84e08a1be..1bc3f253ba6c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -144,7 +144,7 @@ fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
144} 144}
145EXPORT_SYMBOL_GPL(fib_rules_register); 145EXPORT_SYMBOL_GPL(fib_rules_register);
146 146
147void fib_rules_cleanup_ops(struct fib_rules_ops *ops) 147static void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
148{ 148{
149 struct fib_rule *rule, *tmp; 149 struct fib_rule *rule, *tmp;
150 150
@@ -153,7 +153,6 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
153 fib_rule_put(rule); 153 fib_rule_put(rule);
154 } 154 }
155} 155}
156EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops);
157 156
158static void fib_rules_put_rcu(struct rcu_head *head) 157static void fib_rules_put_rcu(struct rcu_head *head)
159{ 158{
@@ -182,7 +181,8 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
182{ 181{
183 int ret = 0; 182 int ret = 0;
184 183
185 if (rule->iifindex && (rule->iifindex != fl->iif)) 184 if (rule->iifindex && (rule->iifindex != fl->iif) &&
185 !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF))
186 goto out; 186 goto out;
187 187
188 if (rule->oifindex && (rule->oifindex != fl->oif)) 188 if (rule->oifindex && (rule->oifindex != fl->oif))
@@ -225,9 +225,12 @@ jumped:
225 err = ops->action(rule, fl, flags, arg); 225 err = ops->action(rule, fl, flags, arg);
226 226
227 if (err != -EAGAIN) { 227 if (err != -EAGAIN) {
228 fib_rule_get(rule); 228 if ((arg->flags & FIB_LOOKUP_NOREF) ||
229 arg->rule = rule; 229 likely(atomic_inc_not_zero(&rule->refcnt))) {
230 goto out; 230 arg->rule = rule;
231 goto out;
232 }
233 break;
231 } 234 }
232 } 235 }
233 236
@@ -491,7 +494,6 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
491 } 494 }
492 } 495 }
493 496
494 synchronize_rcu();
495 notify_rule_change(RTM_DELRULE, rule, ops, nlh, 497 notify_rule_change(RTM_DELRULE, rule, ops, nlh,
496 NETLINK_CB(skb).pid); 498 NETLINK_CB(skb).pid);
497 fib_rule_put(rule); 499 fib_rule_put(rule);
diff --git a/net/core/filter.c b/net/core/filter.c
index 52b051f82a01..7adf50352918 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -638,10 +638,9 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
638 return err; 638 return err;
639 } 639 }
640 640
641 rcu_read_lock_bh(); 641 old_fp = rcu_dereference_protected(sk->sk_filter,
642 old_fp = rcu_dereference_bh(sk->sk_filter); 642 sock_owned_by_user(sk));
643 rcu_assign_pointer(sk->sk_filter, fp); 643 rcu_assign_pointer(sk->sk_filter, fp);
644 rcu_read_unlock_bh();
645 644
646 if (old_fp) 645 if (old_fp)
647 sk_filter_delayed_uncharge(sk, old_fp); 646 sk_filter_delayed_uncharge(sk, old_fp);
@@ -654,14 +653,13 @@ int sk_detach_filter(struct sock *sk)
654 int ret = -ENOENT; 653 int ret = -ENOENT;
655 struct sk_filter *filter; 654 struct sk_filter *filter;
656 655
657 rcu_read_lock_bh(); 656 filter = rcu_dereference_protected(sk->sk_filter,
658 filter = rcu_dereference_bh(sk->sk_filter); 657 sock_owned_by_user(sk));
659 if (filter) { 658 if (filter) {
660 rcu_assign_pointer(sk->sk_filter, NULL); 659 rcu_assign_pointer(sk->sk_filter, NULL);
661 sk_filter_delayed_uncharge(sk, filter); 660 sk_filter_delayed_uncharge(sk, filter);
662 ret = 0; 661 ret = 0;
663 } 662 }
664 rcu_read_unlock_bh();
665 return ret; 663 return ret;
666} 664}
667EXPORT_SYMBOL_GPL(sk_detach_filter); 665EXPORT_SYMBOL_GPL(sk_detach_filter);
diff --git a/net/core/flow.c b/net/core/flow.c
index f67dcbfe54ef..127c8a7ffd61 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -53,8 +53,7 @@ struct flow_flush_info {
53 53
54struct flow_cache { 54struct flow_cache {
55 u32 hash_shift; 55 u32 hash_shift;
56 unsigned long order; 56 struct flow_cache_percpu __percpu *percpu;
57 struct flow_cache_percpu *percpu;
58 struct notifier_block hotcpu_notifier; 57 struct notifier_block hotcpu_notifier;
59 int low_watermark; 58 int low_watermark;
60 int high_watermark; 59 int high_watermark;
@@ -64,7 +63,7 @@ struct flow_cache {
64atomic_t flow_cache_genid = ATOMIC_INIT(0); 63atomic_t flow_cache_genid = ATOMIC_INIT(0);
65EXPORT_SYMBOL(flow_cache_genid); 64EXPORT_SYMBOL(flow_cache_genid);
66static struct flow_cache flow_cache_global; 65static struct flow_cache flow_cache_global;
67static struct kmem_cache *flow_cachep; 66static struct kmem_cache *flow_cachep __read_mostly;
68 67
69static DEFINE_SPINLOCK(flow_cache_gc_lock); 68static DEFINE_SPINLOCK(flow_cache_gc_lock);
70static LIST_HEAD(flow_cache_gc_list); 69static LIST_HEAD(flow_cache_gc_list);
@@ -177,15 +176,11 @@ static u32 flow_hash_code(struct flow_cache *fc,
177{ 176{
178 u32 *k = (u32 *) key; 177 u32 *k = (u32 *) key;
179 178
180 return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) 179 return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
181 & (flow_cache_hash_size(fc) - 1)); 180 & (flow_cache_hash_size(fc) - 1);
182} 181}
183 182
184#if (BITS_PER_LONG == 64) 183typedef unsigned long flow_compare_t;
185typedef u64 flow_compare_t;
186#else
187typedef u32 flow_compare_t;
188#endif
189 184
190/* I hear what you're saying, use memcmp. But memcmp cannot make 185/* I hear what you're saying, use memcmp. But memcmp cannot make
191 * important assumptions that we can here, such as alignment and 186 * important assumptions that we can here, such as alignment and
@@ -357,62 +352,73 @@ void flow_cache_flush(void)
357 put_online_cpus(); 352 put_online_cpus();
358} 353}
359 354
360static void __init flow_cache_cpu_prepare(struct flow_cache *fc, 355static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
361 struct flow_cache_percpu *fcp)
362{ 356{
363 fcp->hash_table = (struct hlist_head *) 357 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
364 __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order); 358 size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc);
365 if (!fcp->hash_table)
366 panic("NET: failed to allocate flow cache order %lu\n", fc->order);
367 359
368 fcp->hash_rnd_recalc = 1; 360 if (!fcp->hash_table) {
369 fcp->hash_count = 0; 361 fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
370 tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); 362 if (!fcp->hash_table) {
363 pr_err("NET: failed to allocate flow cache sz %zu\n", sz);
364 return -ENOMEM;
365 }
366 fcp->hash_rnd_recalc = 1;
367 fcp->hash_count = 0;
368 tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
369 }
370 return 0;
371} 371}
372 372
373static int flow_cache_cpu(struct notifier_block *nfb, 373static int __cpuinit flow_cache_cpu(struct notifier_block *nfb,
374 unsigned long action, 374 unsigned long action,
375 void *hcpu) 375 void *hcpu)
376{ 376{
377 struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); 377 struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
378 int cpu = (unsigned long) hcpu; 378 int res, cpu = (unsigned long) hcpu;
379 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); 379 struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
380 380
381 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) 381 switch (action) {
382 case CPU_UP_PREPARE:
383 case CPU_UP_PREPARE_FROZEN:
384 res = flow_cache_cpu_prepare(fc, cpu);
385 if (res)
386 return notifier_from_errno(res);
387 break;
388 case CPU_DEAD:
389 case CPU_DEAD_FROZEN:
382 __flow_cache_shrink(fc, fcp, 0); 390 __flow_cache_shrink(fc, fcp, 0);
391 break;
392 }
383 return NOTIFY_OK; 393 return NOTIFY_OK;
384} 394}
385 395
386static int flow_cache_init(struct flow_cache *fc) 396static int __init flow_cache_init(struct flow_cache *fc)
387{ 397{
388 unsigned long order;
389 int i; 398 int i;
390 399
391 fc->hash_shift = 10; 400 fc->hash_shift = 10;
392 fc->low_watermark = 2 * flow_cache_hash_size(fc); 401 fc->low_watermark = 2 * flow_cache_hash_size(fc);
393 fc->high_watermark = 4 * flow_cache_hash_size(fc); 402 fc->high_watermark = 4 * flow_cache_hash_size(fc);
394 403
395 for (order = 0;
396 (PAGE_SIZE << order) <
397 (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
398 order++)
399 /* NOTHING */;
400 fc->order = order;
401 fc->percpu = alloc_percpu(struct flow_cache_percpu); 404 fc->percpu = alloc_percpu(struct flow_cache_percpu);
405 if (!fc->percpu)
406 return -ENOMEM;
402 407
403 setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, 408 for_each_online_cpu(i) {
404 (unsigned long) fc); 409 if (flow_cache_cpu_prepare(fc, i))
405 fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; 410 return -ENOMEM;
406 add_timer(&fc->rnd_timer); 411 }
407
408 for_each_possible_cpu(i)
409 flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
410
411 fc->hotcpu_notifier = (struct notifier_block){ 412 fc->hotcpu_notifier = (struct notifier_block){
412 .notifier_call = flow_cache_cpu, 413 .notifier_call = flow_cache_cpu,
413 }; 414 };
414 register_hotcpu_notifier(&fc->hotcpu_notifier); 415 register_hotcpu_notifier(&fc->hotcpu_notifier);
415 416
417 setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
418 (unsigned long) fc);
419 fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
420 add_timer(&fc->rnd_timer);
421
416 return 0; 422 return 0;
417} 423}
418 424
diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c
index 6743146e4d6b..7c2373321b74 100644
--- a/net/core/gen_estimator.c
+++ b/net/core/gen_estimator.c
@@ -274,9 +274,9 @@ void gen_kill_estimator(struct gnet_stats_basic_packed *bstats,
274 while ((e = gen_find_node(bstats, rate_est))) { 274 while ((e = gen_find_node(bstats, rate_est))) {
275 rb_erase(&e->node, &est_root); 275 rb_erase(&e->node, &est_root);
276 276
277 write_lock_bh(&est_lock); 277 write_lock(&est_lock);
278 e->bstats = NULL; 278 e->bstats = NULL;
279 write_unlock_bh(&est_lock); 279 write_unlock(&est_lock);
280 280
281 list_del_rcu(&e->list); 281 list_del_rcu(&e->list);
282 call_rcu(&e->e_rcu, __gen_kill_estimator); 282 call_rcu(&e->e_rcu, __gen_kill_estimator);
diff --git a/net/core/iovec.c b/net/core/iovec.c
index e6b133b77ccb..72aceb1fe4fa 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -42,7 +42,9 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
42 42
43 if (m->msg_namelen) { 43 if (m->msg_namelen) {
44 if (mode == VERIFY_READ) { 44 if (mode == VERIFY_READ) {
45 err = move_addr_to_kernel(m->msg_name, m->msg_namelen, 45 void __user *namep;
46 namep = (void __user __force *) m->msg_name;
47 err = move_addr_to_kernel(namep, m->msg_namelen,
46 address); 48 address);
47 if (err < 0) 49 if (err < 0)
48 return err; 50 return err;
@@ -53,7 +55,7 @@ long verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr *address,
53 } 55 }
54 56
55 size = m->msg_iovlen * sizeof(struct iovec); 57 size = m->msg_iovlen * sizeof(struct iovec);
56 if (copy_from_user(iov, m->msg_iov, size)) 58 if (copy_from_user(iov, (void __user __force *) m->msg_iov, size))
57 return -EFAULT; 59 return -EFAULT;
58 60
59 m->msg_iov = iov; 61 m->msg_iov = iov;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index a4e0a7482c2b..8cc8f9a79db9 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -122,7 +122,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh)
122 122
123unsigned long neigh_rand_reach_time(unsigned long base) 123unsigned long neigh_rand_reach_time(unsigned long base)
124{ 124{
125 return (base ? (net_random() % base) + (base >> 1) : 0); 125 return base ? (net_random() % base) + (base >> 1) : 0;
126} 126}
127EXPORT_SYMBOL(neigh_rand_reach_time); 127EXPORT_SYMBOL(neigh_rand_reach_time);
128 128
@@ -131,15 +131,20 @@ static int neigh_forced_gc(struct neigh_table *tbl)
131{ 131{
132 int shrunk = 0; 132 int shrunk = 0;
133 int i; 133 int i;
134 struct neigh_hash_table *nht;
134 135
135 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); 136 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
136 137
137 write_lock_bh(&tbl->lock); 138 write_lock_bh(&tbl->lock);
138 for (i = 0; i <= tbl->hash_mask; i++) { 139 nht = rcu_dereference_protected(tbl->nht,
139 struct neighbour *n, **np; 140 lockdep_is_held(&tbl->lock));
141 for (i = 0; i <= nht->hash_mask; i++) {
142 struct neighbour *n;
143 struct neighbour __rcu **np;
140 144
141 np = &tbl->hash_buckets[i]; 145 np = &nht->hash_buckets[i];
142 while ((n = *np) != NULL) { 146 while ((n = rcu_dereference_protected(*np,
147 lockdep_is_held(&tbl->lock))) != NULL) {
143 /* Neighbour record may be discarded if: 148 /* Neighbour record may be discarded if:
144 * - nobody refers to it. 149 * - nobody refers to it.
145 * - it is not permanent 150 * - it is not permanent
@@ -147,7 +152,9 @@ static int neigh_forced_gc(struct neigh_table *tbl)
147 write_lock(&n->lock); 152 write_lock(&n->lock);
148 if (atomic_read(&n->refcnt) == 1 && 153 if (atomic_read(&n->refcnt) == 1 &&
149 !(n->nud_state & NUD_PERMANENT)) { 154 !(n->nud_state & NUD_PERMANENT)) {
150 *np = n->next; 155 rcu_assign_pointer(*np,
156 rcu_dereference_protected(n->next,
157 lockdep_is_held(&tbl->lock)));
151 n->dead = 1; 158 n->dead = 1;
152 shrunk = 1; 159 shrunk = 1;
153 write_unlock(&n->lock); 160 write_unlock(&n->lock);
@@ -199,16 +206,24 @@ static void pneigh_queue_purge(struct sk_buff_head *list)
199static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) 206static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
200{ 207{
201 int i; 208 int i;
209 struct neigh_hash_table *nht;
202 210
203 for (i = 0; i <= tbl->hash_mask; i++) { 211 nht = rcu_dereference_protected(tbl->nht,
204 struct neighbour *n, **np = &tbl->hash_buckets[i]; 212 lockdep_is_held(&tbl->lock));
205 213
206 while ((n = *np) != NULL) { 214 for (i = 0; i <= nht->hash_mask; i++) {
215 struct neighbour *n;
216 struct neighbour __rcu **np = &nht->hash_buckets[i];
217
218 while ((n = rcu_dereference_protected(*np,
219 lockdep_is_held(&tbl->lock))) != NULL) {
207 if (dev && n->dev != dev) { 220 if (dev && n->dev != dev) {
208 np = &n->next; 221 np = &n->next;
209 continue; 222 continue;
210 } 223 }
211 *np = n->next; 224 rcu_assign_pointer(*np,
225 rcu_dereference_protected(n->next,
226 lockdep_is_held(&tbl->lock)));
212 write_lock(&n->lock); 227 write_lock(&n->lock);
213 neigh_del_timer(n); 228 neigh_del_timer(n);
214 n->dead = 1; 229 n->dead = 1;
@@ -279,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
279 294
280 skb_queue_head_init(&n->arp_queue); 295 skb_queue_head_init(&n->arp_queue);
281 rwlock_init(&n->lock); 296 rwlock_init(&n->lock);
297 seqlock_init(&n->ha_lock);
282 n->updated = n->used = now; 298 n->updated = n->used = now;
283 n->nud_state = NUD_NONE; 299 n->nud_state = NUD_NONE;
284 n->output = neigh_blackhole; 300 n->output = neigh_blackhole;
@@ -297,64 +313,86 @@ out_entries:
297 goto out; 313 goto out;
298} 314}
299 315
300static struct neighbour **neigh_hash_alloc(unsigned int entries) 316static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries)
301{ 317{
302 unsigned long size = entries * sizeof(struct neighbour *); 318 size_t size = entries * sizeof(struct neighbour *);
303 struct neighbour **ret; 319 struct neigh_hash_table *ret;
320 struct neighbour **buckets;
304 321
305 if (size <= PAGE_SIZE) { 322 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
306 ret = kzalloc(size, GFP_ATOMIC); 323 if (!ret)
307 } else { 324 return NULL;
308 ret = (struct neighbour **) 325 if (size <= PAGE_SIZE)
309 __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size)); 326 buckets = kzalloc(size, GFP_ATOMIC);
327 else
328 buckets = (struct neighbour **)
329 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
330 get_order(size));
331 if (!buckets) {
332 kfree(ret);
333 return NULL;
310 } 334 }
335 rcu_assign_pointer(ret->hash_buckets, buckets);
336 ret->hash_mask = entries - 1;
337 get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd));
311 return ret; 338 return ret;
312} 339}
313 340
314static void neigh_hash_free(struct neighbour **hash, unsigned int entries) 341static void neigh_hash_free_rcu(struct rcu_head *head)
315{ 342{
316 unsigned long size = entries * sizeof(struct neighbour *); 343 struct neigh_hash_table *nht = container_of(head,
344 struct neigh_hash_table,
345 rcu);
346 size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *);
347 struct neighbour **buckets = nht->hash_buckets;
317 348
318 if (size <= PAGE_SIZE) 349 if (size <= PAGE_SIZE)
319 kfree(hash); 350 kfree(buckets);
320 else 351 else
321 free_pages((unsigned long)hash, get_order(size)); 352 free_pages((unsigned long)buckets, get_order(size));
353 kfree(nht);
322} 354}
323 355
324static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) 356static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
357 unsigned long new_entries)
325{ 358{
326 struct neighbour **new_hash, **old_hash; 359 unsigned int i, hash;
327 unsigned int i, new_hash_mask, old_entries; 360 struct neigh_hash_table *new_nht, *old_nht;
328 361
329 NEIGH_CACHE_STAT_INC(tbl, hash_grows); 362 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
330 363
331 BUG_ON(!is_power_of_2(new_entries)); 364 BUG_ON(!is_power_of_2(new_entries));
332 new_hash = neigh_hash_alloc(new_entries); 365 old_nht = rcu_dereference_protected(tbl->nht,
333 if (!new_hash) 366 lockdep_is_held(&tbl->lock));
334 return; 367 new_nht = neigh_hash_alloc(new_entries);
335 368 if (!new_nht)
336 old_entries = tbl->hash_mask + 1; 369 return old_nht;
337 new_hash_mask = new_entries - 1;
338 old_hash = tbl->hash_buckets;
339 370
340 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); 371 for (i = 0; i <= old_nht->hash_mask; i++) {
341 for (i = 0; i < old_entries; i++) {
342 struct neighbour *n, *next; 372 struct neighbour *n, *next;
343 373
344 for (n = old_hash[i]; n; n = next) { 374 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
345 unsigned int hash_val = tbl->hash(n->primary_key, n->dev); 375 lockdep_is_held(&tbl->lock));
346 376 n != NULL;
347 hash_val &= new_hash_mask; 377 n = next) {
348 next = n->next; 378 hash = tbl->hash(n->primary_key, n->dev,
349 379 new_nht->hash_rnd);
350 n->next = new_hash[hash_val]; 380
351 new_hash[hash_val] = n; 381 hash &= new_nht->hash_mask;
382 next = rcu_dereference_protected(n->next,
383 lockdep_is_held(&tbl->lock));
384
385 rcu_assign_pointer(n->next,
386 rcu_dereference_protected(
387 new_nht->hash_buckets[hash],
388 lockdep_is_held(&tbl->lock)));
389 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
352 } 390 }
353 } 391 }
354 tbl->hash_buckets = new_hash;
355 tbl->hash_mask = new_hash_mask;
356 392
357 neigh_hash_free(old_hash, old_entries); 393 rcu_assign_pointer(tbl->nht, new_nht);
394 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
395 return new_nht;
358} 396}
359 397
360struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, 398struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
@@ -363,19 +401,26 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
363 struct neighbour *n; 401 struct neighbour *n;
364 int key_len = tbl->key_len; 402 int key_len = tbl->key_len;
365 u32 hash_val; 403 u32 hash_val;
404 struct neigh_hash_table *nht;
366 405
367 NEIGH_CACHE_STAT_INC(tbl, lookups); 406 NEIGH_CACHE_STAT_INC(tbl, lookups);
368 407
369 read_lock_bh(&tbl->lock); 408 rcu_read_lock_bh();
370 hash_val = tbl->hash(pkey, dev); 409 nht = rcu_dereference_bh(tbl->nht);
371 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { 410 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
411
412 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
413 n != NULL;
414 n = rcu_dereference_bh(n->next)) {
372 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { 415 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
373 neigh_hold(n); 416 if (!atomic_inc_not_zero(&n->refcnt))
417 n = NULL;
374 NEIGH_CACHE_STAT_INC(tbl, hits); 418 NEIGH_CACHE_STAT_INC(tbl, hits);
375 break; 419 break;
376 } 420 }
377 } 421 }
378 read_unlock_bh(&tbl->lock); 422
423 rcu_read_unlock_bh();
379 return n; 424 return n;
380} 425}
381EXPORT_SYMBOL(neigh_lookup); 426EXPORT_SYMBOL(neigh_lookup);
@@ -386,20 +431,27 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
386 struct neighbour *n; 431 struct neighbour *n;
387 int key_len = tbl->key_len; 432 int key_len = tbl->key_len;
388 u32 hash_val; 433 u32 hash_val;
434 struct neigh_hash_table *nht;
389 435
390 NEIGH_CACHE_STAT_INC(tbl, lookups); 436 NEIGH_CACHE_STAT_INC(tbl, lookups);
391 437
392 read_lock_bh(&tbl->lock); 438 rcu_read_lock_bh();
393 hash_val = tbl->hash(pkey, NULL); 439 nht = rcu_dereference_bh(tbl->nht);
394 for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { 440 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask;
441
442 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
443 n != NULL;
444 n = rcu_dereference_bh(n->next)) {
395 if (!memcmp(n->primary_key, pkey, key_len) && 445 if (!memcmp(n->primary_key, pkey, key_len) &&
396 net_eq(dev_net(n->dev), net)) { 446 net_eq(dev_net(n->dev), net)) {
397 neigh_hold(n); 447 if (!atomic_inc_not_zero(&n->refcnt))
448 n = NULL;
398 NEIGH_CACHE_STAT_INC(tbl, hits); 449 NEIGH_CACHE_STAT_INC(tbl, hits);
399 break; 450 break;
400 } 451 }
401 } 452 }
402 read_unlock_bh(&tbl->lock); 453
454 rcu_read_unlock_bh();
403 return n; 455 return n;
404} 456}
405EXPORT_SYMBOL(neigh_lookup_nodev); 457EXPORT_SYMBOL(neigh_lookup_nodev);
@@ -411,6 +463,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
411 int key_len = tbl->key_len; 463 int key_len = tbl->key_len;
412 int error; 464 int error;
413 struct neighbour *n1, *rc, *n = neigh_alloc(tbl); 465 struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
466 struct neigh_hash_table *nht;
414 467
415 if (!n) { 468 if (!n) {
416 rc = ERR_PTR(-ENOBUFS); 469 rc = ERR_PTR(-ENOBUFS);
@@ -437,18 +490,24 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
437 n->confirmed = jiffies - (n->parms->base_reachable_time << 1); 490 n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
438 491
439 write_lock_bh(&tbl->lock); 492 write_lock_bh(&tbl->lock);
493 nht = rcu_dereference_protected(tbl->nht,
494 lockdep_is_held(&tbl->lock));
440 495
441 if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1)) 496 if (atomic_read(&tbl->entries) > (nht->hash_mask + 1))
442 neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1); 497 nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1);
443 498
444 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; 499 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask;
445 500
446 if (n->parms->dead) { 501 if (n->parms->dead) {
447 rc = ERR_PTR(-EINVAL); 502 rc = ERR_PTR(-EINVAL);
448 goto out_tbl_unlock; 503 goto out_tbl_unlock;
449 } 504 }
450 505
451 for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) { 506 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
507 lockdep_is_held(&tbl->lock));
508 n1 != NULL;
509 n1 = rcu_dereference_protected(n1->next,
510 lockdep_is_held(&tbl->lock))) {
452 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { 511 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
453 neigh_hold(n1); 512 neigh_hold(n1);
454 rc = n1; 513 rc = n1;
@@ -456,10 +515,12 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
456 } 515 }
457 } 516 }
458 517
459 n->next = tbl->hash_buckets[hash_val];
460 tbl->hash_buckets[hash_val] = n;
461 n->dead = 0; 518 n->dead = 0;
462 neigh_hold(n); 519 neigh_hold(n);
520 rcu_assign_pointer(n->next,
521 rcu_dereference_protected(nht->hash_buckets[hash_val],
522 lockdep_is_held(&tbl->lock)));
523 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
463 write_unlock_bh(&tbl->lock); 524 write_unlock_bh(&tbl->lock);
464 NEIGH_PRINTK2("neigh %p is created.\n", n); 525 NEIGH_PRINTK2("neigh %p is created.\n", n);
465 rc = n; 526 rc = n;
@@ -616,6 +677,12 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
616 neigh_parms_destroy(parms); 677 neigh_parms_destroy(parms);
617} 678}
618 679
680static void neigh_destroy_rcu(struct rcu_head *head)
681{
682 struct neighbour *neigh = container_of(head, struct neighbour, rcu);
683
684 kmem_cache_free(neigh->tbl->kmem_cachep, neigh);
685}
619/* 686/*
620 * neighbour must already be out of the table; 687 * neighbour must already be out of the table;
621 * 688 *
@@ -643,8 +710,7 @@ void neigh_destroy(struct neighbour *neigh)
643 write_seqlock_bh(&hh->hh_lock); 710 write_seqlock_bh(&hh->hh_lock);
644 hh->hh_output = neigh_blackhole; 711 hh->hh_output = neigh_blackhole;
645 write_sequnlock_bh(&hh->hh_lock); 712 write_sequnlock_bh(&hh->hh_lock);
646 if (atomic_dec_and_test(&hh->hh_refcnt)) 713 hh_cache_put(hh);
647 kfree(hh);
648 } 714 }
649 715
650 skb_queue_purge(&neigh->arp_queue); 716 skb_queue_purge(&neigh->arp_queue);
@@ -655,7 +721,7 @@ void neigh_destroy(struct neighbour *neigh)
655 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); 721 NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
656 722
657 atomic_dec(&neigh->tbl->entries); 723 atomic_dec(&neigh->tbl->entries);
658 kmem_cache_free(neigh->tbl->kmem_cachep, neigh); 724 call_rcu(&neigh->rcu, neigh_destroy_rcu);
659} 725}
660EXPORT_SYMBOL(neigh_destroy); 726EXPORT_SYMBOL(neigh_destroy);
661 727
@@ -696,12 +762,16 @@ static void neigh_connect(struct neighbour *neigh)
696static void neigh_periodic_work(struct work_struct *work) 762static void neigh_periodic_work(struct work_struct *work)
697{ 763{
698 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); 764 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
699 struct neighbour *n, **np; 765 struct neighbour *n;
766 struct neighbour __rcu **np;
700 unsigned int i; 767 unsigned int i;
768 struct neigh_hash_table *nht;
701 769
702 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); 770 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
703 771
704 write_lock_bh(&tbl->lock); 772 write_lock_bh(&tbl->lock);
773 nht = rcu_dereference_protected(tbl->nht,
774 lockdep_is_held(&tbl->lock));
705 775
706 /* 776 /*
707 * periodically recompute ReachableTime from random function 777 * periodically recompute ReachableTime from random function
@@ -715,10 +785,11 @@ static void neigh_periodic_work(struct work_struct *work)
715 neigh_rand_reach_time(p->base_reachable_time); 785 neigh_rand_reach_time(p->base_reachable_time);
716 } 786 }
717 787
718 for (i = 0 ; i <= tbl->hash_mask; i++) { 788 for (i = 0 ; i <= nht->hash_mask; i++) {
719 np = &tbl->hash_buckets[i]; 789 np = &nht->hash_buckets[i];
720 790
721 while ((n = *np) != NULL) { 791 while ((n = rcu_dereference_protected(*np,
792 lockdep_is_held(&tbl->lock))) != NULL) {
722 unsigned int state; 793 unsigned int state;
723 794
724 write_lock(&n->lock); 795 write_lock(&n->lock);
@@ -766,9 +837,9 @@ next_elt:
766static __inline__ int neigh_max_probes(struct neighbour *n) 837static __inline__ int neigh_max_probes(struct neighbour *n)
767{ 838{
768 struct neigh_parms *p = n->parms; 839 struct neigh_parms *p = n->parms;
769 return (n->nud_state & NUD_PROBE ? 840 return (n->nud_state & NUD_PROBE) ?
770 p->ucast_probes : 841 p->ucast_probes :
771 p->ucast_probes + p->app_probes + p->mcast_probes); 842 p->ucast_probes + p->app_probes + p->mcast_probes;
772} 843}
773 844
774static void neigh_invalidate(struct neighbour *neigh) 845static void neigh_invalidate(struct neighbour *neigh)
@@ -945,7 +1016,7 @@ out_unlock_bh:
945} 1016}
946EXPORT_SYMBOL(__neigh_event_send); 1017EXPORT_SYMBOL(__neigh_event_send);
947 1018
948static void neigh_update_hhs(struct neighbour *neigh) 1019static void neigh_update_hhs(const struct neighbour *neigh)
949{ 1020{
950 struct hh_cache *hh; 1021 struct hh_cache *hh;
951 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 1022 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
@@ -1081,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1081 } 1152 }
1082 1153
1083 if (lladdr != neigh->ha) { 1154 if (lladdr != neigh->ha) {
1155 write_seqlock(&neigh->ha_lock);
1084 memcpy(&neigh->ha, lladdr, dev->addr_len); 1156 memcpy(&neigh->ha, lladdr, dev->addr_len);
1157 write_sequnlock(&neigh->ha_lock);
1085 neigh_update_hhs(neigh); 1158 neigh_update_hhs(neigh);
1086 if (!(new & NUD_CONNECTED)) 1159 if (!(new & NUD_CONNECTED))
1087 neigh->confirmed = jiffies - 1160 neigh->confirmed = jiffies -
@@ -1139,44 +1212,73 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1139} 1212}
1140EXPORT_SYMBOL(neigh_event_ns); 1213EXPORT_SYMBOL(neigh_event_ns);
1141 1214
1215static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst,
1216 __be16 protocol)
1217{
1218 struct hh_cache *hh;
1219
1220 smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */
1221 for (hh = n->hh; hh; hh = hh->hh_next) {
1222 if (hh->hh_type == protocol) {
1223 atomic_inc(&hh->hh_refcnt);
1224 if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1225 hh_cache_put(hh);
1226 return true;
1227 }
1228 }
1229 return false;
1230}
1231
1232/* called with read_lock_bh(&n->lock); */
1142static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, 1233static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,
1143 __be16 protocol) 1234 __be16 protocol)
1144{ 1235{
1145 struct hh_cache *hh; 1236 struct hh_cache *hh;
1146 struct net_device *dev = dst->dev; 1237 struct net_device *dev = dst->dev;
1147 1238
1148 for (hh = n->hh; hh; hh = hh->hh_next) 1239 if (likely(neigh_hh_lookup(n, dst, protocol)))
1149 if (hh->hh_type == protocol) 1240 return;
1150 break;
1151 1241
1152 if (!hh && (hh = kzalloc(sizeof(*hh), GFP_ATOMIC)) != NULL) { 1242 /* slow path */
1153 seqlock_init(&hh->hh_lock); 1243 hh = kzalloc(sizeof(*hh), GFP_ATOMIC);
1154 hh->hh_type = protocol; 1244 if (!hh)
1155 atomic_set(&hh->hh_refcnt, 0); 1245 return;
1156 hh->hh_next = NULL;
1157 1246
1158 if (dev->header_ops->cache(n, hh)) { 1247 seqlock_init(&hh->hh_lock);
1159 kfree(hh); 1248 hh->hh_type = protocol;
1160 hh = NULL; 1249 atomic_set(&hh->hh_refcnt, 2);
1161 } else { 1250
1162 atomic_inc(&hh->hh_refcnt); 1251 if (dev->header_ops->cache(n, hh)) {
1163 hh->hh_next = n->hh; 1252 kfree(hh);
1164 n->hh = hh; 1253 return;
1165 if (n->nud_state & NUD_CONNECTED)
1166 hh->hh_output = n->ops->hh_output;
1167 else
1168 hh->hh_output = n->ops->output;
1169 }
1170 } 1254 }
1171 if (hh) { 1255
1172 atomic_inc(&hh->hh_refcnt); 1256 write_lock_bh(&n->lock);
1173 dst->hh = hh; 1257
1258 /* must check if another thread already did the insert */
1259 if (neigh_hh_lookup(n, dst, protocol)) {
1260 kfree(hh);
1261 goto end;
1174 } 1262 }
1263
1264 if (n->nud_state & NUD_CONNECTED)
1265 hh->hh_output = n->ops->hh_output;
1266 else
1267 hh->hh_output = n->ops->output;
1268
1269 hh->hh_next = n->hh;
1270 smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */
1271 n->hh = hh;
1272
1273 if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL))
1274 hh_cache_put(hh);
1275end:
1276 write_unlock_bh(&n->lock);
1175} 1277}
1176 1278
1177/* This function can be used in contexts, where only old dev_queue_xmit 1279/* This function can be used in contexts, where only old dev_queue_xmit
1178 worked, f.e. if you want to override normal output path (eql, shaper), 1280 * worked, f.e. if you want to override normal output path (eql, shaper),
1179 but resolution is not made yet. 1281 * but resolution is not made yet.
1180 */ 1282 */
1181 1283
1182int neigh_compat_output(struct sk_buff *skb) 1284int neigh_compat_output(struct sk_buff *skb)
@@ -1210,19 +1312,19 @@ int neigh_resolve_output(struct sk_buff *skb)
1210 if (!neigh_event_send(neigh, skb)) { 1312 if (!neigh_event_send(neigh, skb)) {
1211 int err; 1313 int err;
1212 struct net_device *dev = neigh->dev; 1314 struct net_device *dev = neigh->dev;
1213 if (dev->header_ops->cache && !dst->hh) { 1315 unsigned int seq;
1214 write_lock_bh(&neigh->lock); 1316
1215 if (!dst->hh) 1317 if (dev->header_ops->cache &&
1216 neigh_hh_init(neigh, dst, dst->ops->protocol); 1318 !dst->hh &&
1217 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1319 !(dst->flags & DST_NOCACHE))
1218 neigh->ha, NULL, skb->len); 1320 neigh_hh_init(neigh, dst, dst->ops->protocol);
1219 write_unlock_bh(&neigh->lock); 1321
1220 } else { 1322 do {
1221 read_lock_bh(&neigh->lock); 1323 seq = read_seqbegin(&neigh->ha_lock);
1222 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1324 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1223 neigh->ha, NULL, skb->len); 1325 neigh->ha, NULL, skb->len);
1224 read_unlock_bh(&neigh->lock); 1326 } while (read_seqretry(&neigh->ha_lock, seq));
1225 } 1327
1226 if (err >= 0) 1328 if (err >= 0)
1227 rc = neigh->ops->queue_xmit(skb); 1329 rc = neigh->ops->queue_xmit(skb);
1228 else 1330 else
@@ -1248,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb)
1248 struct dst_entry *dst = skb_dst(skb); 1350 struct dst_entry *dst = skb_dst(skb);
1249 struct neighbour *neigh = dst->neighbour; 1351 struct neighbour *neigh = dst->neighbour;
1250 struct net_device *dev = neigh->dev; 1352 struct net_device *dev = neigh->dev;
1353 unsigned int seq;
1251 1354
1252 __skb_pull(skb, skb_network_offset(skb)); 1355 __skb_pull(skb, skb_network_offset(skb));
1253 1356
1254 read_lock_bh(&neigh->lock); 1357 do {
1255 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1358 seq = read_seqbegin(&neigh->ha_lock);
1256 neigh->ha, NULL, skb->len); 1359 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1257 read_unlock_bh(&neigh->lock); 1360 neigh->ha, NULL, skb->len);
1361 } while (read_seqretry(&neigh->ha_lock, seq));
1362
1258 if (err >= 0) 1363 if (err >= 0)
1259 err = neigh->ops->queue_xmit(skb); 1364 err = neigh->ops->queue_xmit(skb);
1260 else { 1365 else {
@@ -1436,17 +1541,14 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
1436 panic("cannot create neighbour proc dir entry"); 1541 panic("cannot create neighbour proc dir entry");
1437#endif 1542#endif
1438 1543
1439 tbl->hash_mask = 1; 1544 tbl->nht = neigh_hash_alloc(8);
1440 tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1);
1441 1545
1442 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); 1546 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1443 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 1547 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1444 1548
1445 if (!tbl->hash_buckets || !tbl->phash_buckets) 1549 if (!tbl->nht || !tbl->phash_buckets)
1446 panic("cannot allocate neighbour cache hashes"); 1550 panic("cannot allocate neighbour cache hashes");
1447 1551
1448 get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd));
1449
1450 rwlock_init(&tbl->lock); 1552 rwlock_init(&tbl->lock);
1451 INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work); 1553 INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work);
1452 schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); 1554 schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
@@ -1486,8 +1588,7 @@ int neigh_table_clear(struct neigh_table *tbl)
1486 struct neigh_table **tp; 1588 struct neigh_table **tp;
1487 1589
1488 /* It is not clean... Fix it to unload IPv6 module safely */ 1590 /* It is not clean... Fix it to unload IPv6 module safely */
1489 cancel_delayed_work(&tbl->gc_work); 1591 cancel_delayed_work_sync(&tbl->gc_work);
1490 flush_scheduled_work();
1491 del_timer_sync(&tbl->proxy_timer); 1592 del_timer_sync(&tbl->proxy_timer);
1492 pneigh_queue_purge(&tbl->proxy_queue); 1593 pneigh_queue_purge(&tbl->proxy_queue);
1493 neigh_ifdown(tbl, NULL); 1594 neigh_ifdown(tbl, NULL);
@@ -1502,8 +1603,8 @@ int neigh_table_clear(struct neigh_table *tbl)
1502 } 1603 }
1503 write_unlock(&neigh_tbl_lock); 1604 write_unlock(&neigh_tbl_lock);
1504 1605
1505 neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1); 1606 call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu);
1506 tbl->hash_buckets = NULL; 1607 tbl->nht = NULL;
1507 1608
1508 kfree(tbl->phash_buckets); 1609 kfree(tbl->phash_buckets);
1509 tbl->phash_buckets = NULL; 1610 tbl->phash_buckets = NULL;
@@ -1529,6 +1630,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1529 struct net_device *dev = NULL; 1630 struct net_device *dev = NULL;
1530 int err = -EINVAL; 1631 int err = -EINVAL;
1531 1632
1633 ASSERT_RTNL();
1532 if (nlmsg_len(nlh) < sizeof(*ndm)) 1634 if (nlmsg_len(nlh) < sizeof(*ndm))
1533 goto out; 1635 goto out;
1534 1636
@@ -1538,7 +1640,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1538 1640
1539 ndm = nlmsg_data(nlh); 1641 ndm = nlmsg_data(nlh);
1540 if (ndm->ndm_ifindex) { 1642 if (ndm->ndm_ifindex) {
1541 dev = dev_get_by_index(net, ndm->ndm_ifindex); 1643 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1542 if (dev == NULL) { 1644 if (dev == NULL) {
1543 err = -ENODEV; 1645 err = -ENODEV;
1544 goto out; 1646 goto out;
@@ -1554,34 +1656,31 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1554 read_unlock(&neigh_tbl_lock); 1656 read_unlock(&neigh_tbl_lock);
1555 1657
1556 if (nla_len(dst_attr) < tbl->key_len) 1658 if (nla_len(dst_attr) < tbl->key_len)
1557 goto out_dev_put; 1659 goto out;
1558 1660
1559 if (ndm->ndm_flags & NTF_PROXY) { 1661 if (ndm->ndm_flags & NTF_PROXY) {
1560 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); 1662 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1561 goto out_dev_put; 1663 goto out;
1562 } 1664 }
1563 1665
1564 if (dev == NULL) 1666 if (dev == NULL)
1565 goto out_dev_put; 1667 goto out;
1566 1668
1567 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); 1669 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1568 if (neigh == NULL) { 1670 if (neigh == NULL) {
1569 err = -ENOENT; 1671 err = -ENOENT;
1570 goto out_dev_put; 1672 goto out;
1571 } 1673 }
1572 1674
1573 err = neigh_update(neigh, NULL, NUD_FAILED, 1675 err = neigh_update(neigh, NULL, NUD_FAILED,
1574 NEIGH_UPDATE_F_OVERRIDE | 1676 NEIGH_UPDATE_F_OVERRIDE |
1575 NEIGH_UPDATE_F_ADMIN); 1677 NEIGH_UPDATE_F_ADMIN);
1576 neigh_release(neigh); 1678 neigh_release(neigh);
1577 goto out_dev_put; 1679 goto out;
1578 } 1680 }
1579 read_unlock(&neigh_tbl_lock); 1681 read_unlock(&neigh_tbl_lock);
1580 err = -EAFNOSUPPORT; 1682 err = -EAFNOSUPPORT;
1581 1683
1582out_dev_put:
1583 if (dev)
1584 dev_put(dev);
1585out: 1684out:
1586 return err; 1685 return err;
1587} 1686}
@@ -1595,6 +1694,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1595 struct net_device *dev = NULL; 1694 struct net_device *dev = NULL;
1596 int err; 1695 int err;
1597 1696
1697 ASSERT_RTNL();
1598 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); 1698 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1599 if (err < 0) 1699 if (err < 0)
1600 goto out; 1700 goto out;
@@ -1605,14 +1705,14 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1605 1705
1606 ndm = nlmsg_data(nlh); 1706 ndm = nlmsg_data(nlh);
1607 if (ndm->ndm_ifindex) { 1707 if (ndm->ndm_ifindex) {
1608 dev = dev_get_by_index(net, ndm->ndm_ifindex); 1708 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1609 if (dev == NULL) { 1709 if (dev == NULL) {
1610 err = -ENODEV; 1710 err = -ENODEV;
1611 goto out; 1711 goto out;
1612 } 1712 }
1613 1713
1614 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) 1714 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1615 goto out_dev_put; 1715 goto out;
1616 } 1716 }
1617 1717
1618 read_lock(&neigh_tbl_lock); 1718 read_lock(&neigh_tbl_lock);
@@ -1626,7 +1726,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1626 read_unlock(&neigh_tbl_lock); 1726 read_unlock(&neigh_tbl_lock);
1627 1727
1628 if (nla_len(tb[NDA_DST]) < tbl->key_len) 1728 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1629 goto out_dev_put; 1729 goto out;
1630 dst = nla_data(tb[NDA_DST]); 1730 dst = nla_data(tb[NDA_DST]);
1631 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; 1731 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1632 1732
@@ -1639,29 +1739,29 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1639 pn->flags = ndm->ndm_flags; 1739 pn->flags = ndm->ndm_flags;
1640 err = 0; 1740 err = 0;
1641 } 1741 }
1642 goto out_dev_put; 1742 goto out;
1643 } 1743 }
1644 1744
1645 if (dev == NULL) 1745 if (dev == NULL)
1646 goto out_dev_put; 1746 goto out;
1647 1747
1648 neigh = neigh_lookup(tbl, dst, dev); 1748 neigh = neigh_lookup(tbl, dst, dev);
1649 if (neigh == NULL) { 1749 if (neigh == NULL) {
1650 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { 1750 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1651 err = -ENOENT; 1751 err = -ENOENT;
1652 goto out_dev_put; 1752 goto out;
1653 } 1753 }
1654 1754
1655 neigh = __neigh_lookup_errno(tbl, dst, dev); 1755 neigh = __neigh_lookup_errno(tbl, dst, dev);
1656 if (IS_ERR(neigh)) { 1756 if (IS_ERR(neigh)) {
1657 err = PTR_ERR(neigh); 1757 err = PTR_ERR(neigh);
1658 goto out_dev_put; 1758 goto out;
1659 } 1759 }
1660 } else { 1760 } else {
1661 if (nlh->nlmsg_flags & NLM_F_EXCL) { 1761 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1662 err = -EEXIST; 1762 err = -EEXIST;
1663 neigh_release(neigh); 1763 neigh_release(neigh);
1664 goto out_dev_put; 1764 goto out;
1665 } 1765 }
1666 1766
1667 if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) 1767 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
@@ -1674,15 +1774,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
1674 } else 1774 } else
1675 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); 1775 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1676 neigh_release(neigh); 1776 neigh_release(neigh);
1677 goto out_dev_put; 1777 goto out;
1678 } 1778 }
1679 1779
1680 read_unlock(&neigh_tbl_lock); 1780 read_unlock(&neigh_tbl_lock);
1681 err = -EAFNOSUPPORT; 1781 err = -EAFNOSUPPORT;
1682
1683out_dev_put:
1684 if (dev)
1685 dev_put(dev);
1686out: 1782out:
1687 return err; 1783 return err;
1688} 1784}
@@ -1748,18 +1844,22 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1748 unsigned long now = jiffies; 1844 unsigned long now = jiffies;
1749 unsigned int flush_delta = now - tbl->last_flush; 1845 unsigned int flush_delta = now - tbl->last_flush;
1750 unsigned int rand_delta = now - tbl->last_rand; 1846 unsigned int rand_delta = now - tbl->last_rand;
1751 1847 struct neigh_hash_table *nht;
1752 struct ndt_config ndc = { 1848 struct ndt_config ndc = {
1753 .ndtc_key_len = tbl->key_len, 1849 .ndtc_key_len = tbl->key_len,
1754 .ndtc_entry_size = tbl->entry_size, 1850 .ndtc_entry_size = tbl->entry_size,
1755 .ndtc_entries = atomic_read(&tbl->entries), 1851 .ndtc_entries = atomic_read(&tbl->entries),
1756 .ndtc_last_flush = jiffies_to_msecs(flush_delta), 1852 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1757 .ndtc_last_rand = jiffies_to_msecs(rand_delta), 1853 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1758 .ndtc_hash_rnd = tbl->hash_rnd,
1759 .ndtc_hash_mask = tbl->hash_mask,
1760 .ndtc_proxy_qlen = tbl->proxy_queue.qlen, 1854 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1761 }; 1855 };
1762 1856
1857 rcu_read_lock_bh();
1858 nht = rcu_dereference_bh(tbl->nht);
1859 ndc.ndtc_hash_rnd = nht->hash_rnd;
1860 ndc.ndtc_hash_mask = nht->hash_mask;
1861 rcu_read_unlock_bh();
1862
1763 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); 1863 NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc);
1764 } 1864 }
1765 1865
@@ -2056,10 +2156,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2056 2156
2057 read_lock_bh(&neigh->lock); 2157 read_lock_bh(&neigh->lock);
2058 ndm->ndm_state = neigh->nud_state; 2158 ndm->ndm_state = neigh->nud_state;
2059 if ((neigh->nud_state & NUD_VALID) && 2159 if (neigh->nud_state & NUD_VALID) {
2060 nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { 2160 char haddr[MAX_ADDR_LEN];
2061 read_unlock_bh(&neigh->lock); 2161
2062 goto nla_put_failure; 2162 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2163 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2164 read_unlock_bh(&neigh->lock);
2165 goto nla_put_failure;
2166 }
2063 } 2167 }
2064 2168
2065 ci.ndm_used = jiffies_to_clock_t(now - neigh->used); 2169 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
@@ -2087,18 +2191,23 @@ static void neigh_update_notify(struct neighbour *neigh)
2087static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2191static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2088 struct netlink_callback *cb) 2192 struct netlink_callback *cb)
2089{ 2193{
2090 struct net * net = sock_net(skb->sk); 2194 struct net *net = sock_net(skb->sk);
2091 struct neighbour *n; 2195 struct neighbour *n;
2092 int rc, h, s_h = cb->args[1]; 2196 int rc, h, s_h = cb->args[1];
2093 int idx, s_idx = idx = cb->args[2]; 2197 int idx, s_idx = idx = cb->args[2];
2198 struct neigh_hash_table *nht;
2094 2199
2095 read_lock_bh(&tbl->lock); 2200 rcu_read_lock_bh();
2096 for (h = 0; h <= tbl->hash_mask; h++) { 2201 nht = rcu_dereference_bh(tbl->nht);
2202
2203 for (h = 0; h <= nht->hash_mask; h++) {
2097 if (h < s_h) 2204 if (h < s_h)
2098 continue; 2205 continue;
2099 if (h > s_h) 2206 if (h > s_h)
2100 s_idx = 0; 2207 s_idx = 0;
2101 for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { 2208 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2209 n != NULL;
2210 n = rcu_dereference_bh(n->next)) {
2102 if (!net_eq(dev_net(n->dev), net)) 2211 if (!net_eq(dev_net(n->dev), net))
2103 continue; 2212 continue;
2104 if (idx < s_idx) 2213 if (idx < s_idx)
@@ -2107,17 +2216,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2107 cb->nlh->nlmsg_seq, 2216 cb->nlh->nlmsg_seq,
2108 RTM_NEWNEIGH, 2217 RTM_NEWNEIGH,
2109 NLM_F_MULTI) <= 0) { 2218 NLM_F_MULTI) <= 0) {
2110 read_unlock_bh(&tbl->lock);
2111 rc = -1; 2219 rc = -1;
2112 goto out; 2220 goto out;
2113 } 2221 }
2114 next: 2222next:
2115 idx++; 2223 idx++;
2116 } 2224 }
2117 } 2225 }
2118 read_unlock_bh(&tbl->lock);
2119 rc = skb->len; 2226 rc = skb->len;
2120out: 2227out:
2228 rcu_read_unlock_bh();
2121 cb->args[1] = h; 2229 cb->args[1] = h;
2122 cb->args[2] = idx; 2230 cb->args[2] = idx;
2123 return rc; 2231 return rc;
@@ -2150,15 +2258,22 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2150void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) 2258void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2151{ 2259{
2152 int chain; 2260 int chain;
2261 struct neigh_hash_table *nht;
2153 2262
2154 read_lock_bh(&tbl->lock); 2263 rcu_read_lock_bh();
2155 for (chain = 0; chain <= tbl->hash_mask; chain++) { 2264 nht = rcu_dereference_bh(tbl->nht);
2265
2266 read_lock(&tbl->lock); /* avoid resizes */
2267 for (chain = 0; chain <= nht->hash_mask; chain++) {
2156 struct neighbour *n; 2268 struct neighbour *n;
2157 2269
2158 for (n = tbl->hash_buckets[chain]; n; n = n->next) 2270 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2271 n != NULL;
2272 n = rcu_dereference_bh(n->next))
2159 cb(n, cookie); 2273 cb(n, cookie);
2160 } 2274 }
2161 read_unlock_bh(&tbl->lock); 2275 read_unlock(&tbl->lock);
2276 rcu_read_unlock_bh();
2162} 2277}
2163EXPORT_SYMBOL(neigh_for_each); 2278EXPORT_SYMBOL(neigh_for_each);
2164 2279
@@ -2167,18 +2282,25 @@ void __neigh_for_each_release(struct neigh_table *tbl,
2167 int (*cb)(struct neighbour *)) 2282 int (*cb)(struct neighbour *))
2168{ 2283{
2169 int chain; 2284 int chain;
2285 struct neigh_hash_table *nht;
2170 2286
2171 for (chain = 0; chain <= tbl->hash_mask; chain++) { 2287 nht = rcu_dereference_protected(tbl->nht,
2172 struct neighbour *n, **np; 2288 lockdep_is_held(&tbl->lock));
2289 for (chain = 0; chain <= nht->hash_mask; chain++) {
2290 struct neighbour *n;
2291 struct neighbour __rcu **np;
2173 2292
2174 np = &tbl->hash_buckets[chain]; 2293 np = &nht->hash_buckets[chain];
2175 while ((n = *np) != NULL) { 2294 while ((n = rcu_dereference_protected(*np,
2295 lockdep_is_held(&tbl->lock))) != NULL) {
2176 int release; 2296 int release;
2177 2297
2178 write_lock(&n->lock); 2298 write_lock(&n->lock);
2179 release = cb(n); 2299 release = cb(n);
2180 if (release) { 2300 if (release) {
2181 *np = n->next; 2301 rcu_assign_pointer(*np,
2302 rcu_dereference_protected(n->next,
2303 lockdep_is_held(&tbl->lock)));
2182 n->dead = 1; 2304 n->dead = 1;
2183 } else 2305 } else
2184 np = &n->next; 2306 np = &n->next;
@@ -2196,13 +2318,13 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
2196{ 2318{
2197 struct neigh_seq_state *state = seq->private; 2319 struct neigh_seq_state *state = seq->private;
2198 struct net *net = seq_file_net(seq); 2320 struct net *net = seq_file_net(seq);
2199 struct neigh_table *tbl = state->tbl; 2321 struct neigh_hash_table *nht = state->nht;
2200 struct neighbour *n = NULL; 2322 struct neighbour *n = NULL;
2201 int bucket = state->bucket; 2323 int bucket = state->bucket;
2202 2324
2203 state->flags &= ~NEIGH_SEQ_IS_PNEIGH; 2325 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2204 for (bucket = 0; bucket <= tbl->hash_mask; bucket++) { 2326 for (bucket = 0; bucket <= nht->hash_mask; bucket++) {
2205 n = tbl->hash_buckets[bucket]; 2327 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2206 2328
2207 while (n) { 2329 while (n) {
2208 if (!net_eq(dev_net(n->dev), net)) 2330 if (!net_eq(dev_net(n->dev), net))
@@ -2219,8 +2341,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq)
2219 break; 2341 break;
2220 if (n->nud_state & ~NUD_NOARP) 2342 if (n->nud_state & ~NUD_NOARP)
2221 break; 2343 break;
2222 next: 2344next:
2223 n = n->next; 2345 n = rcu_dereference_bh(n->next);
2224 } 2346 }
2225 2347
2226 if (n) 2348 if (n)
@@ -2237,14 +2359,14 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
2237{ 2359{
2238 struct neigh_seq_state *state = seq->private; 2360 struct neigh_seq_state *state = seq->private;
2239 struct net *net = seq_file_net(seq); 2361 struct net *net = seq_file_net(seq);
2240 struct neigh_table *tbl = state->tbl; 2362 struct neigh_hash_table *nht = state->nht;
2241 2363
2242 if (state->neigh_sub_iter) { 2364 if (state->neigh_sub_iter) {
2243 void *v = state->neigh_sub_iter(state, n, pos); 2365 void *v = state->neigh_sub_iter(state, n, pos);
2244 if (v) 2366 if (v)
2245 return n; 2367 return n;
2246 } 2368 }
2247 n = n->next; 2369 n = rcu_dereference_bh(n->next);
2248 2370
2249 while (1) { 2371 while (1) {
2250 while (n) { 2372 while (n) {
@@ -2261,17 +2383,17 @@ static struct neighbour *neigh_get_next(struct seq_file *seq,
2261 2383
2262 if (n->nud_state & ~NUD_NOARP) 2384 if (n->nud_state & ~NUD_NOARP)
2263 break; 2385 break;
2264 next: 2386next:
2265 n = n->next; 2387 n = rcu_dereference_bh(n->next);
2266 } 2388 }
2267 2389
2268 if (n) 2390 if (n)
2269 break; 2391 break;
2270 2392
2271 if (++state->bucket > tbl->hash_mask) 2393 if (++state->bucket > nht->hash_mask)
2272 break; 2394 break;
2273 2395
2274 n = tbl->hash_buckets[state->bucket]; 2396 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2275 } 2397 }
2276 2398
2277 if (n && pos) 2399 if (n && pos)
@@ -2369,7 +2491,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2369} 2491}
2370 2492
2371void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) 2493void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2372 __acquires(tbl->lock) 2494 __acquires(rcu_bh)
2373{ 2495{
2374 struct neigh_seq_state *state = seq->private; 2496 struct neigh_seq_state *state = seq->private;
2375 2497
@@ -2377,7 +2499,8 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl
2377 state->bucket = 0; 2499 state->bucket = 0;
2378 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); 2500 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2379 2501
2380 read_lock_bh(&tbl->lock); 2502 rcu_read_lock_bh();
2503 state->nht = rcu_dereference_bh(tbl->nht);
2381 2504
2382 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; 2505 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2383} 2506}
@@ -2411,12 +2534,9 @@ out:
2411EXPORT_SYMBOL(neigh_seq_next); 2534EXPORT_SYMBOL(neigh_seq_next);
2412 2535
2413void neigh_seq_stop(struct seq_file *seq, void *v) 2536void neigh_seq_stop(struct seq_file *seq, void *v)
2414 __releases(tbl->lock) 2537 __releases(rcu_bh)
2415{ 2538{
2416 struct neigh_seq_state *state = seq->private; 2539 rcu_read_unlock_bh();
2417 struct neigh_table *tbl = state->tbl;
2418
2419 read_unlock_bh(&tbl->lock);
2420} 2540}
2421EXPORT_SYMBOL(neigh_seq_stop); 2541EXPORT_SYMBOL(neigh_seq_stop);
2422 2542
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index af4dfbadf2a0..b143173e3eb2 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -515,7 +515,7 @@ static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
515 return attribute->store(queue, attribute, buf, count); 515 return attribute->store(queue, attribute, buf, count);
516} 516}
517 517
518static struct sysfs_ops rx_queue_sysfs_ops = { 518static const struct sysfs_ops rx_queue_sysfs_ops = {
519 .show = rx_queue_attr_show, 519 .show = rx_queue_attr_show,
520 .store = rx_queue_attr_store, 520 .store = rx_queue_attr_store,
521}; 521};
@@ -726,6 +726,7 @@ static struct kobj_type rx_queue_ktype = {
726static int rx_queue_add_kobject(struct net_device *net, int index) 726static int rx_queue_add_kobject(struct net_device *net, int index)
727{ 727{
728 struct netdev_rx_queue *queue = net->_rx + index; 728 struct netdev_rx_queue *queue = net->_rx + index;
729 struct netdev_rx_queue *first = queue->first;
729 struct kobject *kobj = &queue->kobj; 730 struct kobject *kobj = &queue->kobj;
730 int error = 0; 731 int error = 0;
731 732
@@ -738,38 +739,43 @@ static int rx_queue_add_kobject(struct net_device *net, int index)
738 } 739 }
739 740
740 kobject_uevent(kobj, KOBJ_ADD); 741 kobject_uevent(kobj, KOBJ_ADD);
742 atomic_inc(&first->count);
741 743
742 return error; 744 return error;
743} 745}
744 746
745static int rx_queue_register_kobjects(struct net_device *net) 747int
748net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
746{ 749{
747 int i; 750 int i;
748 int error = 0; 751 int error = 0;
749 752
750 net->queues_kset = kset_create_and_add("queues", 753 for (i = old_num; i < new_num; i++) {
751 NULL, &net->dev.kobj);
752 if (!net->queues_kset)
753 return -ENOMEM;
754 for (i = 0; i < net->num_rx_queues; i++) {
755 error = rx_queue_add_kobject(net, i); 754 error = rx_queue_add_kobject(net, i);
756 if (error) 755 if (error) {
756 new_num = old_num;
757 break; 757 break;
758 }
758 } 759 }
759 760
760 if (error) 761 while (--i >= new_num)
761 while (--i >= 0) 762 kobject_put(&net->_rx[i].kobj);
762 kobject_put(&net->_rx[i].kobj);
763 763
764 return error; 764 return error;
765} 765}
766 766
767static void rx_queue_remove_kobjects(struct net_device *net) 767static int rx_queue_register_kobjects(struct net_device *net)
768{ 768{
769 int i; 769 net->queues_kset = kset_create_and_add("queues",
770 NULL, &net->dev.kobj);
771 if (!net->queues_kset)
772 return -ENOMEM;
773 return net_rx_queue_update_kobjects(net, 0, net->real_num_rx_queues);
774}
770 775
771 for (i = 0; i < net->num_rx_queues; i++) 776static void rx_queue_remove_kobjects(struct net_device *net)
772 kobject_put(&net->_rx[i].kobj); 777{
778 net_rx_queue_update_kobjects(net, net->real_num_rx_queues, 0);
773 kset_unregister(net->queues_kset); 779 kset_unregister(net->queues_kset);
774} 780}
775#endif /* CONFIG_RPS */ 781#endif /* CONFIG_RPS */
@@ -789,12 +795,13 @@ static const void *net_netlink_ns(struct sock *sk)
789 return sock_net(sk); 795 return sock_net(sk);
790} 796}
791 797
792static struct kobj_ns_type_operations net_ns_type_operations = { 798struct kobj_ns_type_operations net_ns_type_operations = {
793 .type = KOBJ_NS_TYPE_NET, 799 .type = KOBJ_NS_TYPE_NET,
794 .current_ns = net_current_ns, 800 .current_ns = net_current_ns,
795 .netlink_ns = net_netlink_ns, 801 .netlink_ns = net_netlink_ns,
796 .initial_ns = net_initial_ns, 802 .initial_ns = net_initial_ns,
797}; 803};
804EXPORT_SYMBOL_GPL(net_ns_type_operations);
798 805
799static void net_kobj_ns_exit(struct net *net) 806static void net_kobj_ns_exit(struct net *net)
800{ 807{
diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h
index 805555e8b187..778e1571548d 100644
--- a/net/core/net-sysfs.h
+++ b/net/core/net-sysfs.h
@@ -4,4 +4,8 @@
4int netdev_kobject_init(void); 4int netdev_kobject_init(void);
5int netdev_register_kobject(struct net_device *); 5int netdev_register_kobject(struct net_device *);
6void netdev_unregister_kobject(struct net_device *); 6void netdev_unregister_kobject(struct net_device *);
7#ifdef CONFIG_RPS
8int net_rx_queue_update_kobjects(struct net_device *, int old_num, int new_num);
9#endif
10
7#endif 11#endif
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 537e01afd81b..4e98ffac3af0 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -288,11 +288,11 @@ static int netpoll_owner_active(struct net_device *dev)
288 return 0; 288 return 0;
289} 289}
290 290
291void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) 291void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
292 struct net_device *dev)
292{ 293{
293 int status = NETDEV_TX_BUSY; 294 int status = NETDEV_TX_BUSY;
294 unsigned long tries; 295 unsigned long tries;
295 struct net_device *dev = np->dev;
296 const struct net_device_ops *ops = dev->netdev_ops; 296 const struct net_device_ops *ops = dev->netdev_ops;
297 /* It is up to the caller to keep npinfo alive. */ 297 /* It is up to the caller to keep npinfo alive. */
298 struct netpoll_info *npinfo = np->dev->npinfo; 298 struct netpoll_info *npinfo = np->dev->npinfo;
@@ -346,7 +346,7 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
346 schedule_delayed_work(&npinfo->tx_work,0); 346 schedule_delayed_work(&npinfo->tx_work,0);
347 } 347 }
348} 348}
349EXPORT_SYMBOL(netpoll_send_skb); 349EXPORT_SYMBOL(netpoll_send_skb_on_dev);
350 350
351void netpoll_send_udp(struct netpoll *np, const char *msg, int len) 351void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
352{ 352{
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 10a1ea72010d..2c0df0f95b3d 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -729,16 +729,14 @@ static int hex32_arg(const char __user *user_buffer, unsigned long maxlen,
729 *num = 0; 729 *num = 0;
730 730
731 for (; i < maxlen; i++) { 731 for (; i < maxlen; i++) {
732 int value;
732 char c; 733 char c;
733 *num <<= 4; 734 *num <<= 4;
734 if (get_user(c, &user_buffer[i])) 735 if (get_user(c, &user_buffer[i]))
735 return -EFAULT; 736 return -EFAULT;
736 if ((c >= '0') && (c <= '9')) 737 value = hex_to_bin(c);
737 *num |= c - '0'; 738 if (value >= 0)
738 else if ((c >= 'a') && (c <= 'f')) 739 *num |= value;
739 *num |= c - 'a' + 10;
740 else if ((c >= 'A') && (c <= 'F'))
741 *num |= c - 'A' + 10;
742 else 740 else
743 break; 741 break;
744 } 742 }
@@ -3907,8 +3905,6 @@ static void __exit pg_cleanup(void)
3907{ 3905{
3908 struct pktgen_thread *t; 3906 struct pktgen_thread *t;
3909 struct list_head *q, *n; 3907 struct list_head *q, *n;
3910 wait_queue_head_t queue;
3911 init_waitqueue_head(&queue);
3912 3908
3913 /* Stop all interfaces & threads */ 3909 /* Stop all interfaces & threads */
3914 3910
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f78d821bd935..8121268ddbdd 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -299,14 +299,6 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
299 unregister_netdevice_many(&list_kill); 299 unregister_netdevice_many(&list_kill);
300} 300}
301 301
302void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops)
303{
304 rtnl_lock();
305 __rtnl_kill_links(net, ops);
306 rtnl_unlock();
307}
308EXPORT_SYMBOL_GPL(rtnl_kill_links);
309
310/** 302/**
311 * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. 303 * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink.
312 * @ops: struct rtnl_link_ops * to unregister 304 * @ops: struct rtnl_link_ops * to unregister
@@ -612,36 +604,7 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
612 604
613static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b) 605static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
614{ 606{
615 struct rtnl_link_stats64 a; 607 memcpy(v, b, sizeof(*b));
616
617 a.rx_packets = b->rx_packets;
618 a.tx_packets = b->tx_packets;
619 a.rx_bytes = b->rx_bytes;
620 a.tx_bytes = b->tx_bytes;
621 a.rx_errors = b->rx_errors;
622 a.tx_errors = b->tx_errors;
623 a.rx_dropped = b->rx_dropped;
624 a.tx_dropped = b->tx_dropped;
625
626 a.multicast = b->multicast;
627 a.collisions = b->collisions;
628
629 a.rx_length_errors = b->rx_length_errors;
630 a.rx_over_errors = b->rx_over_errors;
631 a.rx_crc_errors = b->rx_crc_errors;
632 a.rx_frame_errors = b->rx_frame_errors;
633 a.rx_fifo_errors = b->rx_fifo_errors;
634 a.rx_missed_errors = b->rx_missed_errors;
635
636 a.tx_aborted_errors = b->tx_aborted_errors;
637 a.tx_carrier_errors = b->tx_carrier_errors;
638 a.tx_fifo_errors = b->tx_fifo_errors;
639 a.tx_heartbeat_errors = b->tx_heartbeat_errors;
640 a.tx_window_errors = b->tx_window_errors;
641
642 a.rx_compressed = b->rx_compressed;
643 a.tx_compressed = b->tx_compressed;
644 memcpy(v, &a, sizeof(a));
645} 608}
646 609
647/* All VF info */ 610/* All VF info */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 56ba3c4e4761..104f8444754a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -202,8 +202,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
202 skb->data = data; 202 skb->data = data;
203 skb_reset_tail_pointer(skb); 203 skb_reset_tail_pointer(skb);
204 skb->end = skb->tail + size; 204 skb->end = skb->tail + size;
205 kmemcheck_annotate_bitfield(skb, flags1);
206 kmemcheck_annotate_bitfield(skb, flags2);
207#ifdef NET_SKBUFF_DATA_USES_OFFSET 205#ifdef NET_SKBUFF_DATA_USES_OFFSET
208 skb->mac_header = ~0U; 206 skb->mac_header = ~0U;
209#endif 207#endif
@@ -249,10 +247,9 @@ EXPORT_SYMBOL(__alloc_skb);
249struct sk_buff *__netdev_alloc_skb(struct net_device *dev, 247struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
250 unsigned int length, gfp_t gfp_mask) 248 unsigned int length, gfp_t gfp_mask)
251{ 249{
252 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
253 struct sk_buff *skb; 250 struct sk_buff *skb;
254 251
255 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node); 252 skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, NUMA_NO_NODE);
256 if (likely(skb)) { 253 if (likely(skb)) {
257 skb_reserve(skb, NET_SKB_PAD); 254 skb_reserve(skb, NET_SKB_PAD);
258 skb->dev = dev; 255 skb->dev = dev;
@@ -261,16 +258,6 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
261} 258}
262EXPORT_SYMBOL(__netdev_alloc_skb); 259EXPORT_SYMBOL(__netdev_alloc_skb);
263 260
264struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
265{
266 int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
267 struct page *page;
268
269 page = alloc_pages_node(node, gfp_mask, 0);
270 return page;
271}
272EXPORT_SYMBOL(__netdev_alloc_page);
273
274void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, 261void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
275 int size) 262 int size)
276{ 263{
@@ -340,7 +327,7 @@ static void skb_release_data(struct sk_buff *skb)
340 put_page(skb_shinfo(skb)->frags[i].page); 327 put_page(skb_shinfo(skb)->frags[i].page);
341 } 328 }
342 329
343 if (skb_has_frags(skb)) 330 if (skb_has_frag_list(skb))
344 skb_drop_fraglist(skb); 331 skb_drop_fraglist(skb);
345 332
346 kfree(skb->head); 333 kfree(skb->head);
@@ -686,16 +673,10 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
686 673
687struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) 674struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
688{ 675{
689 int headerlen = skb->data - skb->head; 676 int headerlen = skb_headroom(skb);
690 /* 677 unsigned int size = (skb_end_pointer(skb) - skb->head) + skb->data_len;
691 * Allocate the copy buffer 678 struct sk_buff *n = alloc_skb(size, gfp_mask);
692 */ 679
693 struct sk_buff *n;
694#ifdef NET_SKBUFF_DATA_USES_OFFSET
695 n = alloc_skb(skb->end + skb->data_len, gfp_mask);
696#else
697 n = alloc_skb(skb->end - skb->head + skb->data_len, gfp_mask);
698#endif
699 if (!n) 680 if (!n)
700 return NULL; 681 return NULL;
701 682
@@ -727,20 +708,14 @@ EXPORT_SYMBOL(skb_copy);
727 708
728struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) 709struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
729{ 710{
730 /* 711 unsigned int size = skb_end_pointer(skb) - skb->head;
731 * Allocate the copy buffer 712 struct sk_buff *n = alloc_skb(size, gfp_mask);
732 */ 713
733 struct sk_buff *n;
734#ifdef NET_SKBUFF_DATA_USES_OFFSET
735 n = alloc_skb(skb->end, gfp_mask);
736#else
737 n = alloc_skb(skb->end - skb->head, gfp_mask);
738#endif
739 if (!n) 714 if (!n)
740 goto out; 715 goto out;
741 716
742 /* Set the data pointer */ 717 /* Set the data pointer */
743 skb_reserve(n, skb->data - skb->head); 718 skb_reserve(n, skb_headroom(skb));
744 /* Set the tail pointer and length */ 719 /* Set the tail pointer and length */
745 skb_put(n, skb_headlen(skb)); 720 skb_put(n, skb_headlen(skb));
746 /* Copy the bytes */ 721 /* Copy the bytes */
@@ -760,7 +735,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
760 skb_shinfo(n)->nr_frags = i; 735 skb_shinfo(n)->nr_frags = i;
761 } 736 }
762 737
763 if (skb_has_frags(skb)) { 738 if (skb_has_frag_list(skb)) {
764 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; 739 skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list;
765 skb_clone_fraglist(n); 740 skb_clone_fraglist(n);
766 } 741 }
@@ -792,12 +767,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
792{ 767{
793 int i; 768 int i;
794 u8 *data; 769 u8 *data;
795#ifdef NET_SKBUFF_DATA_USES_OFFSET 770 int size = nhead + (skb_end_pointer(skb) - skb->head) + ntail;
796 int size = nhead + skb->end + ntail;
797#else
798 int size = nhead + (skb->end - skb->head) + ntail;
799#endif
800 long off; 771 long off;
772 bool fastpath;
801 773
802 BUG_ON(nhead < 0); 774 BUG_ON(nhead < 0);
803 775
@@ -811,23 +783,36 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
811 goto nodata; 783 goto nodata;
812 784
813 /* Copy only real data... and, alas, header. This should be 785 /* Copy only real data... and, alas, header. This should be
814 * optimized for the cases when header is void. */ 786 * optimized for the cases when header is void.
815#ifdef NET_SKBUFF_DATA_USES_OFFSET 787 */
816 memcpy(data + nhead, skb->head, skb->tail); 788 memcpy(data + nhead, skb->head, skb_tail_pointer(skb) - skb->head);
817#else 789
818 memcpy(data + nhead, skb->head, skb->tail - skb->head); 790 memcpy((struct skb_shared_info *)(data + size),
819#endif 791 skb_shinfo(skb),
820 memcpy(data + size, skb_end_pointer(skb),
821 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags])); 792 offsetof(struct skb_shared_info, frags[skb_shinfo(skb)->nr_frags]));
822 793
823 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) 794 /* Check if we can avoid taking references on fragments if we own
824 get_page(skb_shinfo(skb)->frags[i].page); 795 * the last reference on skb->head. (see skb_release_data())
796 */
797 if (!skb->cloned)
798 fastpath = true;
799 else {
800 int delta = skb->nohdr ? (1 << SKB_DATAREF_SHIFT) + 1 : 1;
825 801
826 if (skb_has_frags(skb)) 802 fastpath = atomic_read(&skb_shinfo(skb)->dataref) == delta;
827 skb_clone_fraglist(skb); 803 }
828 804
829 skb_release_data(skb); 805 if (fastpath) {
806 kfree(skb->head);
807 } else {
808 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
809 get_page(skb_shinfo(skb)->frags[i].page);
830 810
811 if (skb_has_frag_list(skb))
812 skb_clone_fraglist(skb);
813
814 skb_release_data(skb);
815 }
831 off = (data + nhead) - skb->head; 816 off = (data + nhead) - skb->head;
832 817
833 skb->head = data; 818 skb->head = data;
@@ -1100,7 +1085,7 @@ drop_pages:
1100 for (; i < nfrags; i++) 1085 for (; i < nfrags; i++)
1101 put_page(skb_shinfo(skb)->frags[i].page); 1086 put_page(skb_shinfo(skb)->frags[i].page);
1102 1087
1103 if (skb_has_frags(skb)) 1088 if (skb_has_frag_list(skb))
1104 skb_drop_fraglist(skb); 1089 skb_drop_fraglist(skb);
1105 goto done; 1090 goto done;
1106 } 1091 }
@@ -1195,7 +1180,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
1195 /* Optimization: no fragments, no reasons to preestimate 1180 /* Optimization: no fragments, no reasons to preestimate
1196 * size of pulled pages. Superb. 1181 * size of pulled pages. Superb.
1197 */ 1182 */
1198 if (!skb_has_frags(skb)) 1183 if (!skb_has_frag_list(skb))
1199 goto pull_pages; 1184 goto pull_pages;
1200 1185
1201 /* Estimate size of pulled pages. */ 1186 /* Estimate size of pulled pages. */
@@ -2324,7 +2309,7 @@ next_skb:
2324 st->frag_data = NULL; 2309 st->frag_data = NULL;
2325 } 2310 }
2326 2311
2327 if (st->root_skb == st->cur_skb && skb_has_frags(st->root_skb)) { 2312 if (st->root_skb == st->cur_skb && skb_has_frag_list(st->root_skb)) {
2328 st->cur_skb = skb_shinfo(st->root_skb)->frag_list; 2313 st->cur_skb = skb_shinfo(st->root_skb)->frag_list;
2329 st->frag_idx = 0; 2314 st->frag_idx = 0;
2330 goto next_skb; 2315 goto next_skb;
@@ -2894,7 +2879,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2894 return -ENOMEM; 2879 return -ENOMEM;
2895 2880
2896 /* Easy case. Most of packets will go this way. */ 2881 /* Easy case. Most of packets will go this way. */
2897 if (!skb_has_frags(skb)) { 2882 if (!skb_has_frag_list(skb)) {
2898 /* A little of trouble, not enough of space for trailer. 2883 /* A little of trouble, not enough of space for trailer.
2899 * This should not happen, when stack is tuned to generate 2884 * This should not happen, when stack is tuned to generate
2900 * good frames. OK, on miss we reallocate and reserve even more 2885 * good frames. OK, on miss we reallocate and reserve even more
@@ -2929,7 +2914,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2929 2914
2930 if (skb1->next == NULL && tailbits) { 2915 if (skb1->next == NULL && tailbits) {
2931 if (skb_shinfo(skb1)->nr_frags || 2916 if (skb_shinfo(skb1)->nr_frags ||
2932 skb_has_frags(skb1) || 2917 skb_has_frag_list(skb1) ||
2933 skb_tailroom(skb1) < tailbits) 2918 skb_tailroom(skb1) < tailbits)
2934 ntail = tailbits + 128; 2919 ntail = tailbits + 128;
2935 } 2920 }
@@ -2938,7 +2923,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
2938 skb_cloned(skb1) || 2923 skb_cloned(skb1) ||
2939 ntail || 2924 ntail ||
2940 skb_shinfo(skb1)->nr_frags || 2925 skb_shinfo(skb1)->nr_frags ||
2941 skb_has_frags(skb1)) { 2926 skb_has_frag_list(skb1)) {
2942 struct sk_buff *skb2; 2927 struct sk_buff *skb2;
2943 2928
2944 /* Fuck, we are miserable poor guys... */ 2929 /* Fuck, we are miserable poor guys... */
@@ -3021,7 +3006,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
3021 } else { 3006 } else {
3022 /* 3007 /*
3023 * no hardware time stamps available, 3008 * no hardware time stamps available,
3024 * so keep the skb_shared_tx and only 3009 * so keep the shared tx_flags and only
3025 * store software time stamp 3010 * store software time stamp
3026 */ 3011 */
3027 skb->tstamp = ktime_get_real(); 3012 skb->tstamp = ktime_get_real();
diff --git a/net/core/sock.c b/net/core/sock.c
index 7d99e13148e6..11db43632df8 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1560,6 +1560,8 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
1560EXPORT_SYMBOL(sock_alloc_send_skb); 1560EXPORT_SYMBOL(sock_alloc_send_skb);
1561 1561
1562static void __lock_sock(struct sock *sk) 1562static void __lock_sock(struct sock *sk)
1563 __releases(&sk->sk_lock.slock)
1564 __acquires(&sk->sk_lock.slock)
1563{ 1565{
1564 DEFINE_WAIT(wait); 1566 DEFINE_WAIT(wait);
1565 1567
@@ -1576,6 +1578,8 @@ static void __lock_sock(struct sock *sk)
1576} 1578}
1577 1579
1578static void __release_sock(struct sock *sk) 1580static void __release_sock(struct sock *sk)
1581 __releases(&sk->sk_lock.slock)
1582 __acquires(&sk->sk_lock.slock)
1579{ 1583{
1580 struct sk_buff *skb = sk->sk_backlog.head; 1584 struct sk_buff *skb = sk->sk_backlog.head;
1581 1585
diff --git a/net/core/utils.c b/net/core/utils.c
index f41854470539..5fea0ab21902 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -75,7 +75,7 @@ __be32 in_aton(const char *str)
75 str++; 75 str++;
76 } 76 }
77 } 77 }
78 return(htonl(l)); 78 return htonl(l);
79} 79}
80EXPORT_SYMBOL(in_aton); 80EXPORT_SYMBOL(in_aton);
81 81
@@ -92,18 +92,19 @@ EXPORT_SYMBOL(in_aton);
92 92
93static inline int xdigit2bin(char c, int delim) 93static inline int xdigit2bin(char c, int delim)
94{ 94{
95 int val;
96
95 if (c == delim || c == '\0') 97 if (c == delim || c == '\0')
96 return IN6PTON_DELIM; 98 return IN6PTON_DELIM;
97 if (c == ':') 99 if (c == ':')
98 return IN6PTON_COLON_MASK; 100 return IN6PTON_COLON_MASK;
99 if (c == '.') 101 if (c == '.')
100 return IN6PTON_DOT; 102 return IN6PTON_DOT;
101 if (c >= '0' && c <= '9') 103
102 return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0')); 104 val = hex_to_bin(c);
103 if (c >= 'a' && c <= 'f') 105 if (val >= 0)
104 return (IN6PTON_XDIGIT | (c - 'a' + 10)); 106 return val | IN6PTON_XDIGIT | (val < 10 ? IN6PTON_DIGIT : 0);
105 if (c >= 'A' && c <= 'F') 107
106 return (IN6PTON_XDIGIT | (c - 'A' + 10));
107 if (delim == -1) 108 if (delim == -1)
108 return IN6PTON_DELIM; 109 return IN6PTON_DELIM;
109 return IN6PTON_UNKNOWN; 110 return IN6PTON_UNKNOWN;
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index 6df6f8ac9636..117fb093dcaf 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -62,22 +62,18 @@ struct ccid_operations {
62 void (*ccid_hc_tx_exit)(struct sock *sk); 62 void (*ccid_hc_tx_exit)(struct sock *sk);
63 void (*ccid_hc_rx_packet_recv)(struct sock *sk, 63 void (*ccid_hc_rx_packet_recv)(struct sock *sk,
64 struct sk_buff *skb); 64 struct sk_buff *skb);
65 int (*ccid_hc_rx_parse_options)(struct sock *sk, 65 int (*ccid_hc_rx_parse_options)(struct sock *sk, u8 pkt,
66 unsigned char option, 66 u8 opt, u8 *val, u8 len);
67 unsigned char len, u16 idx,
68 unsigned char* value);
69 int (*ccid_hc_rx_insert_options)(struct sock *sk, 67 int (*ccid_hc_rx_insert_options)(struct sock *sk,
70 struct sk_buff *skb); 68 struct sk_buff *skb);
71 void (*ccid_hc_tx_packet_recv)(struct sock *sk, 69 void (*ccid_hc_tx_packet_recv)(struct sock *sk,
72 struct sk_buff *skb); 70 struct sk_buff *skb);
73 int (*ccid_hc_tx_parse_options)(struct sock *sk, 71 int (*ccid_hc_tx_parse_options)(struct sock *sk, u8 pkt,
74 unsigned char option, 72 u8 opt, u8 *val, u8 len);
75 unsigned char len, u16 idx,
76 unsigned char* value);
77 int (*ccid_hc_tx_send_packet)(struct sock *sk, 73 int (*ccid_hc_tx_send_packet)(struct sock *sk,
78 struct sk_buff *skb); 74 struct sk_buff *skb);
79 void (*ccid_hc_tx_packet_sent)(struct sock *sk, 75 void (*ccid_hc_tx_packet_sent)(struct sock *sk,
80 int more, unsigned int len); 76 unsigned int len);
81 void (*ccid_hc_rx_get_info)(struct sock *sk, 77 void (*ccid_hc_rx_get_info)(struct sock *sk,
82 struct tcp_info *info); 78 struct tcp_info *info);
83 void (*ccid_hc_tx_get_info)(struct sock *sk, 79 void (*ccid_hc_tx_get_info)(struct sock *sk,
@@ -148,10 +144,10 @@ static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
148} 144}
149 145
150static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk, 146static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
151 int more, unsigned int len) 147 unsigned int len)
152{ 148{
153 if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL) 149 if (ccid->ccid_ops->ccid_hc_tx_packet_sent != NULL)
154 ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, more, len); 150 ccid->ccid_ops->ccid_hc_tx_packet_sent(sk, len);
155} 151}
156 152
157static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk, 153static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
@@ -168,27 +164,31 @@ static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
168 ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb); 164 ccid->ccid_ops->ccid_hc_tx_packet_recv(sk, skb);
169} 165}
170 166
167/**
168 * ccid_hc_tx_parse_options - Parse CCID-specific options sent by the receiver
169 * @pkt: type of packet that @opt appears on (RFC 4340, 5.1)
170 * @opt: the CCID-specific option type (RFC 4340, 5.8 and 10.3)
171 * @val: value of @opt
172 * @len: length of @val in bytes
173 */
171static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk, 174static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
172 unsigned char option, 175 u8 pkt, u8 opt, u8 *val, u8 len)
173 unsigned char len, u16 idx,
174 unsigned char* value)
175{ 176{
176 int rc = 0; 177 if (ccid->ccid_ops->ccid_hc_tx_parse_options == NULL)
177 if (ccid->ccid_ops->ccid_hc_tx_parse_options != NULL) 178 return 0;
178 rc = ccid->ccid_ops->ccid_hc_tx_parse_options(sk, option, len, idx, 179 return ccid->ccid_ops->ccid_hc_tx_parse_options(sk, pkt, opt, val, len);
179 value);
180 return rc;
181} 180}
182 181
182/**
183 * ccid_hc_rx_parse_options - Parse CCID-specific options sent by the sender
184 * Arguments are analogous to ccid_hc_tx_parse_options()
185 */
183static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk, 186static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
184 unsigned char option, 187 u8 pkt, u8 opt, u8 *val, u8 len)
185 unsigned char len, u16 idx,
186 unsigned char* value)
187{ 188{
188 int rc = 0; 189 if (ccid->ccid_ops->ccid_hc_rx_parse_options == NULL)
189 if (ccid->ccid_ops->ccid_hc_rx_parse_options != NULL) 190 return 0;
190 rc = ccid->ccid_ops->ccid_hc_rx_parse_options(sk, option, len, idx, value); 191 return ccid->ccid_ops->ccid_hc_rx_parse_options(sk, pkt, opt, val, len);
191 return rc;
192} 192}
193 193
194static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk, 194static inline int ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
index 8408398cd44e..0581143cb800 100644
--- a/net/dccp/ccids/Kconfig
+++ b/net/dccp/ccids/Kconfig
@@ -47,37 +47,6 @@ config IP_DCCP_CCID3_DEBUG
47 47
48 If in doubt, say N. 48 If in doubt, say N.
49 49
50config IP_DCCP_CCID3_RTO
51 int "Use higher bound for nofeedback timer"
52 default 100
53 depends on IP_DCCP_CCID3 && EXPERIMENTAL
54 ---help---
55 Use higher lower bound for nofeedback timer expiration.
56
57 The TFRC nofeedback timer normally expires after the maximum of 4
58 RTTs and twice the current send interval (RFC 3448, 4.3). On LANs
59 with a small RTT this can mean a high processing load and reduced
60 performance, since then the nofeedback timer is triggered very
61 frequently.
62
63 This option enables to set a higher lower bound for the nofeedback
64 value. Values in units of milliseconds can be set here.
65
66 A value of 0 disables this feature by enforcing the value specified
67 in RFC 3448. The following values have been suggested as bounds for
68 experimental use:
69 * 16-20ms to match the typical multimedia inter-frame interval
70 * 100ms as a reasonable compromise [default]
71 * 1000ms corresponds to the lower TCP RTO bound (RFC 2988, 2.4)
72
73 The default of 100ms is a compromise between a large value for
74 efficient DCCP implementations, and a small value to avoid disrupting
75 the network in times of congestion.
76
77 The purpose of the nofeedback timer is to slow DCCP down when there
78 is serious network congestion: experimenting with larger values should
79 therefore not be performed on WANs.
80
81config IP_DCCP_TFRC_LIB 50config IP_DCCP_TFRC_LIB
82 def_bool y if IP_DCCP_CCID3 51 def_bool y if IP_DCCP_CCID3
83 52
diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c
index 9b3ae9922be1..d850e291f87c 100644
--- a/net/dccp/ccids/ccid2.c
+++ b/net/dccp/ccids/ccid2.c
@@ -25,59 +25,14 @@
25 */ 25 */
26#include <linux/slab.h> 26#include <linux/slab.h>
27#include "../feat.h" 27#include "../feat.h"
28#include "../ccid.h"
29#include "../dccp.h"
30#include "ccid2.h" 28#include "ccid2.h"
31 29
32 30
33#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 31#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
34static int ccid2_debug; 32static int ccid2_debug;
35#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a) 33#define ccid2_pr_debug(format, a...) DCCP_PR_DEBUG(ccid2_debug, format, ##a)
36
37static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hc)
38{
39 int len = 0;
40 int pipe = 0;
41 struct ccid2_seq *seqp = hc->tx_seqh;
42
43 /* there is data in the chain */
44 if (seqp != hc->tx_seqt) {
45 seqp = seqp->ccid2s_prev;
46 len++;
47 if (!seqp->ccid2s_acked)
48 pipe++;
49
50 while (seqp != hc->tx_seqt) {
51 struct ccid2_seq *prev = seqp->ccid2s_prev;
52
53 len++;
54 if (!prev->ccid2s_acked)
55 pipe++;
56
57 /* packets are sent sequentially */
58 BUG_ON(dccp_delta_seqno(seqp->ccid2s_seq,
59 prev->ccid2s_seq ) >= 0);
60 BUG_ON(time_before(seqp->ccid2s_sent,
61 prev->ccid2s_sent));
62
63 seqp = prev;
64 }
65 }
66
67 BUG_ON(pipe != hc->tx_pipe);
68 ccid2_pr_debug("len of chain=%d\n", len);
69
70 do {
71 seqp = seqp->ccid2s_prev;
72 len++;
73 } while (seqp != hc->tx_seqh);
74
75 ccid2_pr_debug("total len=%d\n", len);
76 BUG_ON(len != hc->tx_seqbufc * CCID2_SEQBUF_LEN);
77}
78#else 34#else
79#define ccid2_pr_debug(format, a...) 35#define ccid2_pr_debug(format, a...)
80#define ccid2_hc_tx_check_sanity(hc)
81#endif 36#endif
82 37
83static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc) 38static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hc)
@@ -156,19 +111,10 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, u32 val)
156 dp->dccps_l_ack_ratio = val; 111 dp->dccps_l_ack_ratio = val;
157} 112}
158 113
159static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hc, long val)
160{
161 ccid2_pr_debug("change SRTT to %ld\n", val);
162 hc->tx_srtt = val;
163}
164
165static void ccid2_start_rto_timer(struct sock *sk);
166
167static void ccid2_hc_tx_rto_expire(unsigned long data) 114static void ccid2_hc_tx_rto_expire(unsigned long data)
168{ 115{
169 struct sock *sk = (struct sock *)data; 116 struct sock *sk = (struct sock *)data;
170 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 117 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
171 long s;
172 118
173 bh_lock_sock(sk); 119 bh_lock_sock(sk);
174 if (sock_owned_by_user(sk)) { 120 if (sock_owned_by_user(sk)) {
@@ -178,23 +124,19 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
178 124
179 ccid2_pr_debug("RTO_EXPIRE\n"); 125 ccid2_pr_debug("RTO_EXPIRE\n");
180 126
181 ccid2_hc_tx_check_sanity(hc);
182
183 /* back-off timer */ 127 /* back-off timer */
184 hc->tx_rto <<= 1; 128 hc->tx_rto <<= 1;
129 if (hc->tx_rto > DCCP_RTO_MAX)
130 hc->tx_rto = DCCP_RTO_MAX;
185 131
186 s = hc->tx_rto / HZ; 132 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
187 if (s > 60)
188 hc->tx_rto = 60 * HZ;
189
190 ccid2_start_rto_timer(sk);
191 133
192 /* adjust pipe, cwnd etc */ 134 /* adjust pipe, cwnd etc */
193 hc->tx_ssthresh = hc->tx_cwnd / 2; 135 hc->tx_ssthresh = hc->tx_cwnd / 2;
194 if (hc->tx_ssthresh < 2) 136 if (hc->tx_ssthresh < 2)
195 hc->tx_ssthresh = 2; 137 hc->tx_ssthresh = 2;
196 hc->tx_cwnd = 1; 138 hc->tx_cwnd = 1;
197 hc->tx_pipe = 0; 139 hc->tx_pipe = 0;
198 140
199 /* clear state about stuff we sent */ 141 /* clear state about stuff we sent */
200 hc->tx_seqt = hc->tx_seqh; 142 hc->tx_seqt = hc->tx_seqh;
@@ -204,23 +146,12 @@ static void ccid2_hc_tx_rto_expire(unsigned long data)
204 hc->tx_rpseq = 0; 146 hc->tx_rpseq = 0;
205 hc->tx_rpdupack = -1; 147 hc->tx_rpdupack = -1;
206 ccid2_change_l_ack_ratio(sk, 1); 148 ccid2_change_l_ack_ratio(sk, 1);
207 ccid2_hc_tx_check_sanity(hc);
208out: 149out:
209 bh_unlock_sock(sk); 150 bh_unlock_sock(sk);
210 sock_put(sk); 151 sock_put(sk);
211} 152}
212 153
213static void ccid2_start_rto_timer(struct sock *sk) 154static void ccid2_hc_tx_packet_sent(struct sock *sk, unsigned int len)
214{
215 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
216
217 ccid2_pr_debug("setting RTO timeout=%ld\n", hc->tx_rto);
218
219 BUG_ON(timer_pending(&hc->tx_rtotimer));
220 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
221}
222
223static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
224{ 155{
225 struct dccp_sock *dp = dccp_sk(sk); 156 struct dccp_sock *dp = dccp_sk(sk);
226 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 157 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
@@ -230,7 +161,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
230 161
231 hc->tx_seqh->ccid2s_seq = dp->dccps_gss; 162 hc->tx_seqh->ccid2s_seq = dp->dccps_gss;
232 hc->tx_seqh->ccid2s_acked = 0; 163 hc->tx_seqh->ccid2s_acked = 0;
233 hc->tx_seqh->ccid2s_sent = jiffies; 164 hc->tx_seqh->ccid2s_sent = ccid2_time_stamp;
234 165
235 next = hc->tx_seqh->ccid2s_next; 166 next = hc->tx_seqh->ccid2s_next;
236 /* check if we need to alloc more space */ 167 /* check if we need to alloc more space */
@@ -296,23 +227,20 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, unsigned int len)
296 } 227 }
297#endif 228#endif
298 229
299 /* setup RTO timer */ 230 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
300 if (!timer_pending(&hc->tx_rtotimer))
301 ccid2_start_rto_timer(sk);
302 231
303#ifdef CONFIG_IP_DCCP_CCID2_DEBUG 232#ifdef CONFIG_IP_DCCP_CCID2_DEBUG
304 do { 233 do {
305 struct ccid2_seq *seqp = hc->tx_seqt; 234 struct ccid2_seq *seqp = hc->tx_seqt;
306 235
307 while (seqp != hc->tx_seqh) { 236 while (seqp != hc->tx_seqh) {
308 ccid2_pr_debug("out seq=%llu acked=%d time=%lu\n", 237 ccid2_pr_debug("out seq=%llu acked=%d time=%u\n",
309 (unsigned long long)seqp->ccid2s_seq, 238 (unsigned long long)seqp->ccid2s_seq,
310 seqp->ccid2s_acked, seqp->ccid2s_sent); 239 seqp->ccid2s_acked, seqp->ccid2s_sent);
311 seqp = seqp->ccid2s_next; 240 seqp = seqp->ccid2s_next;
312 } 241 }
313 } while (0); 242 } while (0);
314 ccid2_pr_debug("=========\n"); 243 ccid2_pr_debug("=========\n");
315 ccid2_hc_tx_check_sanity(hc);
316#endif 244#endif
317} 245}
318 246
@@ -378,17 +306,87 @@ out_invalid_option:
378 return -1; 306 return -1;
379} 307}
380 308
381static void ccid2_hc_tx_kill_rto_timer(struct sock *sk) 309/**
310 * ccid2_rtt_estimator - Sample RTT and compute RTO using RFC2988 algorithm
311 * This code is almost identical with TCP's tcp_rtt_estimator(), since
312 * - it has a higher sampling frequency (recommended by RFC 1323),
313 * - the RTO does not collapse into RTT due to RTTVAR going towards zero,
314 * - it is simple (cf. more complex proposals such as Eifel timer or research
315 * which suggests that the gain should be set according to window size),
316 * - in tests it was found to work well with CCID2 [gerrit].
317 */
318static void ccid2_rtt_estimator(struct sock *sk, const long mrtt)
382{ 319{
383 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 320 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
321 long m = mrtt ? : 1;
384 322
385 sk_stop_timer(sk, &hc->tx_rtotimer); 323 if (hc->tx_srtt == 0) {
386 ccid2_pr_debug("deleted RTO timer\n"); 324 /* First measurement m */
325 hc->tx_srtt = m << 3;
326 hc->tx_mdev = m << 1;
327
328 hc->tx_mdev_max = max(hc->tx_mdev, tcp_rto_min(sk));
329 hc->tx_rttvar = hc->tx_mdev_max;
330
331 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
332 } else {
333 /* Update scaled SRTT as SRTT += 1/8 * (m - SRTT) */
334 m -= (hc->tx_srtt >> 3);
335 hc->tx_srtt += m;
336
337 /* Similarly, update scaled mdev with regard to |m| */
338 if (m < 0) {
339 m = -m;
340 m -= (hc->tx_mdev >> 2);
341 /*
342 * This neutralises RTO increase when RTT < SRTT - mdev
343 * (see P. Sarolahti, A. Kuznetsov,"Congestion Control
344 * in Linux TCP", USENIX 2002, pp. 49-62).
345 */
346 if (m > 0)
347 m >>= 3;
348 } else {
349 m -= (hc->tx_mdev >> 2);
350 }
351 hc->tx_mdev += m;
352
353 if (hc->tx_mdev > hc->tx_mdev_max) {
354 hc->tx_mdev_max = hc->tx_mdev;
355 if (hc->tx_mdev_max > hc->tx_rttvar)
356 hc->tx_rttvar = hc->tx_mdev_max;
357 }
358
359 /*
360 * Decay RTTVAR at most once per flight, exploiting that
361 * 1) pipe <= cwnd <= Sequence_Window = W (RFC 4340, 7.5.2)
362 * 2) AWL = GSS-W+1 <= GAR <= GSS (RFC 4340, 7.5.1)
363 * GAR is a useful bound for FlightSize = pipe.
364 * AWL is probably too low here, as it over-estimates pipe.
365 */
366 if (after48(dccp_sk(sk)->dccps_gar, hc->tx_rtt_seq)) {
367 if (hc->tx_mdev_max < hc->tx_rttvar)
368 hc->tx_rttvar -= (hc->tx_rttvar -
369 hc->tx_mdev_max) >> 2;
370 hc->tx_rtt_seq = dccp_sk(sk)->dccps_gss;
371 hc->tx_mdev_max = tcp_rto_min(sk);
372 }
373 }
374
375 /*
376 * Set RTO from SRTT and RTTVAR
377 * As in TCP, 4 * RTTVAR >= TCP_RTO_MIN, giving a minimum RTO of 200 ms.
378 * This agrees with RFC 4341, 5:
379 * "Because DCCP does not retransmit data, DCCP does not require
380 * TCP's recommended minimum timeout of one second".
381 */
382 hc->tx_rto = (hc->tx_srtt >> 3) + hc->tx_rttvar;
383
384 if (hc->tx_rto > DCCP_RTO_MAX)
385 hc->tx_rto = DCCP_RTO_MAX;
387} 386}
388 387
389static inline void ccid2_new_ack(struct sock *sk, 388static void ccid2_new_ack(struct sock *sk, struct ccid2_seq *seqp,
390 struct ccid2_seq *seqp, 389 unsigned int *maxincr)
391 unsigned int *maxincr)
392{ 390{
393 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 391 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
394 392
@@ -402,93 +400,27 @@ static inline void ccid2_new_ack(struct sock *sk,
402 hc->tx_cwnd += 1; 400 hc->tx_cwnd += 1;
403 hc->tx_packets_acked = 0; 401 hc->tx_packets_acked = 0;
404 } 402 }
405 403 /*
406 /* update RTO */ 404 * FIXME: RTT is sampled several times per acknowledgment (for each
407 if (hc->tx_srtt == -1 || 405 * entry in the Ack Vector), instead of once per Ack (as in TCP SACK).
408 time_after(jiffies, hc->tx_lastrtt + hc->tx_srtt)) { 406 * This causes the RTT to be over-estimated, since the older entries
409 unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; 407 * in the Ack Vector have earlier sending times.
410 int s; 408 * The cleanest solution is to not use the ccid2s_sent field at all
411 409 * and instead use DCCP timestamps: requires changes in other places.
412 /* first measurement */ 410 */
413 if (hc->tx_srtt == -1) { 411 ccid2_rtt_estimator(sk, ccid2_time_stamp - seqp->ccid2s_sent);
414 ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n",
415 r, jiffies,
416 (unsigned long long)seqp->ccid2s_seq);
417 ccid2_change_srtt(hc, r);
418 hc->tx_rttvar = r >> 1;
419 } else {
420 /* RTTVAR */
421 long tmp = hc->tx_srtt - r;
422 long srtt;
423
424 if (tmp < 0)
425 tmp *= -1;
426
427 tmp >>= 2;
428 hc->tx_rttvar *= 3;
429 hc->tx_rttvar >>= 2;
430 hc->tx_rttvar += tmp;
431
432 /* SRTT */
433 srtt = hc->tx_srtt;
434 srtt *= 7;
435 srtt >>= 3;
436 tmp = r >> 3;
437 srtt += tmp;
438 ccid2_change_srtt(hc, srtt);
439 }
440 s = hc->tx_rttvar << 2;
441 /* clock granularity is 1 when based on jiffies */
442 if (!s)
443 s = 1;
444 hc->tx_rto = hc->tx_srtt + s;
445
446 /* must be at least a second */
447 s = hc->tx_rto / HZ;
448 /* DCCP doesn't require this [but I like it cuz my code sux] */
449#if 1
450 if (s < 1)
451 hc->tx_rto = HZ;
452#endif
453 /* max 60 seconds */
454 if (s > 60)
455 hc->tx_rto = HZ * 60;
456
457 hc->tx_lastrtt = jiffies;
458
459 ccid2_pr_debug("srtt: %ld rttvar: %ld rto: %ld (HZ=%d) R=%lu\n",
460 hc->tx_srtt, hc->tx_rttvar,
461 hc->tx_rto, HZ, r);
462 }
463
464 /* we got a new ack, so re-start RTO timer */
465 ccid2_hc_tx_kill_rto_timer(sk);
466 ccid2_start_rto_timer(sk);
467}
468
469static void ccid2_hc_tx_dec_pipe(struct sock *sk)
470{
471 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
472
473 if (hc->tx_pipe == 0)
474 DCCP_BUG("pipe == 0");
475 else
476 hc->tx_pipe--;
477
478 if (hc->tx_pipe == 0)
479 ccid2_hc_tx_kill_rto_timer(sk);
480} 412}
481 413
482static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp) 414static void ccid2_congestion_event(struct sock *sk, struct ccid2_seq *seqp)
483{ 415{
484 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 416 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
485 417
486 if (time_before(seqp->ccid2s_sent, hc->tx_last_cong)) { 418 if ((s32)(seqp->ccid2s_sent - hc->tx_last_cong) < 0) {
487 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); 419 ccid2_pr_debug("Multiple losses in an RTT---treating as one\n");
488 return; 420 return;
489 } 421 }
490 422
491 hc->tx_last_cong = jiffies; 423 hc->tx_last_cong = ccid2_time_stamp;
492 424
493 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U; 425 hc->tx_cwnd = hc->tx_cwnd / 2 ? : 1U;
494 hc->tx_ssthresh = max(hc->tx_cwnd, 2U); 426 hc->tx_ssthresh = max(hc->tx_cwnd, 2U);
@@ -510,7 +442,6 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
510 int done = 0; 442 int done = 0;
511 unsigned int maxincr = 0; 443 unsigned int maxincr = 0;
512 444
513 ccid2_hc_tx_check_sanity(hc);
514 /* check reverse path congestion */ 445 /* check reverse path congestion */
515 seqno = DCCP_SKB_CB(skb)->dccpd_seq; 446 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
516 447
@@ -620,7 +551,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
620 seqp->ccid2s_acked = 1; 551 seqp->ccid2s_acked = 1;
621 ccid2_pr_debug("Got ack for %llu\n", 552 ccid2_pr_debug("Got ack for %llu\n",
622 (unsigned long long)seqp->ccid2s_seq); 553 (unsigned long long)seqp->ccid2s_seq);
623 ccid2_hc_tx_dec_pipe(sk); 554 hc->tx_pipe--;
624 } 555 }
625 if (seqp == hc->tx_seqt) { 556 if (seqp == hc->tx_seqt) {
626 done = 1; 557 done = 1;
@@ -677,7 +608,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
677 * one ack vector. 608 * one ack vector.
678 */ 609 */
679 ccid2_congestion_event(sk, seqp); 610 ccid2_congestion_event(sk, seqp);
680 ccid2_hc_tx_dec_pipe(sk); 611 hc->tx_pipe--;
681 } 612 }
682 if (seqp == hc->tx_seqt) 613 if (seqp == hc->tx_seqt)
683 break; 614 break;
@@ -695,7 +626,11 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
695 hc->tx_seqt = hc->tx_seqt->ccid2s_next; 626 hc->tx_seqt = hc->tx_seqt->ccid2s_next;
696 } 627 }
697 628
698 ccid2_hc_tx_check_sanity(hc); 629 /* restart RTO timer if not all outstanding data has been acked */
630 if (hc->tx_pipe == 0)
631 sk_stop_timer(sk, &hc->tx_rtotimer);
632 else
633 sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto);
699} 634}
700 635
701static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) 636static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -707,12 +642,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
707 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ 642 /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */
708 hc->tx_ssthresh = ~0U; 643 hc->tx_ssthresh = ~0U;
709 644
710 /* 645 /* Use larger initial windows (RFC 4341, section 5). */
711 * RFC 4341, 5: "The cwnd parameter is initialized to at most four 646 hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache);
712 * packets for new connections, following the rules from [RFC3390]".
713 * We need to convert the bytes of RFC3390 into the packets of RFC 4341.
714 */
715 hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U);
716 647
717 /* Make sure that Ack Ratio is enabled and within bounds. */ 648 /* Make sure that Ack Ratio is enabled and within bounds. */
718 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); 649 max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2);
@@ -723,15 +654,11 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk)
723 if (ccid2_hc_tx_alloc_seq(hc)) 654 if (ccid2_hc_tx_alloc_seq(hc))
724 return -ENOMEM; 655 return -ENOMEM;
725 656
726 hc->tx_rto = 3 * HZ; 657 hc->tx_rto = DCCP_TIMEOUT_INIT;
727 ccid2_change_srtt(hc, -1);
728 hc->tx_rttvar = -1;
729 hc->tx_rpdupack = -1; 658 hc->tx_rpdupack = -1;
730 hc->tx_last_cong = jiffies; 659 hc->tx_last_cong = ccid2_time_stamp;
731 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 660 setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire,
732 (unsigned long)sk); 661 (unsigned long)sk);
733
734 ccid2_hc_tx_check_sanity(hc);
735 return 0; 662 return 0;
736} 663}
737 664
@@ -740,7 +667,7 @@ static void ccid2_hc_tx_exit(struct sock *sk)
740 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); 667 struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk);
741 int i; 668 int i;
742 669
743 ccid2_hc_tx_kill_rto_timer(sk); 670 sk_stop_timer(sk, &hc->tx_rtotimer);
744 671
745 for (i = 0; i < hc->tx_seqbufc; i++) 672 for (i = 0; i < hc->tx_seqbufc; i++)
746 kfree(hc->tx_seqbuf[i]); 673 kfree(hc->tx_seqbuf[i]);
diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h
index 1ec6a30103bb..9731c2dc1487 100644
--- a/net/dccp/ccids/ccid2.h
+++ b/net/dccp/ccids/ccid2.h
@@ -18,18 +18,23 @@
18#ifndef _DCCP_CCID2_H_ 18#ifndef _DCCP_CCID2_H_
19#define _DCCP_CCID2_H_ 19#define _DCCP_CCID2_H_
20 20
21#include <linux/dccp.h>
22#include <linux/timer.h> 21#include <linux/timer.h>
23#include <linux/types.h> 22#include <linux/types.h>
24#include "../ccid.h" 23#include "../ccid.h"
24#include "../dccp.h"
25
26/*
27 * CCID-2 timestamping faces the same issues as TCP timestamping.
28 * Hence we reuse/share as much of the code as possible.
29 */
30#define ccid2_time_stamp tcp_time_stamp
31
25/* NUMDUPACK parameter from RFC 4341, p. 6 */ 32/* NUMDUPACK parameter from RFC 4341, p. 6 */
26#define NUMDUPACK 3 33#define NUMDUPACK 3
27 34
28struct sock;
29
30struct ccid2_seq { 35struct ccid2_seq {
31 u64 ccid2s_seq; 36 u64 ccid2s_seq;
32 unsigned long ccid2s_sent; 37 u32 ccid2s_sent;
33 int ccid2s_acked; 38 int ccid2s_acked;
34 struct ccid2_seq *ccid2s_prev; 39 struct ccid2_seq *ccid2s_prev;
35 struct ccid2_seq *ccid2s_next; 40 struct ccid2_seq *ccid2s_next;
@@ -42,7 +47,12 @@ struct ccid2_seq {
42 * struct ccid2_hc_tx_sock - CCID2 TX half connection 47 * struct ccid2_hc_tx_sock - CCID2 TX half connection
43 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5 48 * @tx_{cwnd,ssthresh,pipe}: as per RFC 4341, section 5
44 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465) 49 * @tx_packets_acked: Ack counter for deriving cwnd growth (RFC 3465)
45 * @tx_lastrtt: time RTT was last measured 50 * @tx_srtt: smoothed RTT estimate, scaled by 2^3
51 * @tx_mdev: smoothed RTT variation, scaled by 2^2
52 * @tx_mdev_max: maximum of @mdev during one flight
53 * @tx_rttvar: moving average/maximum of @mdev_max
54 * @tx_rto: RTO value deriving from SRTT and RTTVAR (RFC 2988)
55 * @tx_rtt_seq: to decay RTTVAR at most once per flight
46 * @tx_rpseq: last consecutive seqno 56 * @tx_rpseq: last consecutive seqno
47 * @tx_rpdupack: dupacks since rpseq 57 * @tx_rpdupack: dupacks since rpseq
48 */ 58 */
@@ -55,14 +65,19 @@ struct ccid2_hc_tx_sock {
55 int tx_seqbufc; 65 int tx_seqbufc;
56 struct ccid2_seq *tx_seqh; 66 struct ccid2_seq *tx_seqh;
57 struct ccid2_seq *tx_seqt; 67 struct ccid2_seq *tx_seqt;
58 long tx_rto; 68
59 long tx_srtt; 69 /* RTT measurement: variables/principles are the same as in TCP */
60 long tx_rttvar; 70 u32 tx_srtt,
61 unsigned long tx_lastrtt; 71 tx_mdev,
72 tx_mdev_max,
73 tx_rttvar,
74 tx_rto;
75 u64 tx_rtt_seq:48;
62 struct timer_list tx_rtotimer; 76 struct timer_list tx_rtotimer;
77
63 u64 tx_rpseq; 78 u64 tx_rpseq;
64 int tx_rpdupack; 79 int tx_rpdupack;
65 unsigned long tx_last_cong; 80 u32 tx_last_cong;
66 u64 tx_high_ack; 81 u64 tx_high_ack;
67}; 82};
68 83
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 95f752986497..3060a60ed5ab 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -54,7 +54,6 @@ static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
54 [TFRC_SSTATE_NO_SENT] = "NO_SENT", 54 [TFRC_SSTATE_NO_SENT] = "NO_SENT",
55 [TFRC_SSTATE_NO_FBACK] = "NO_FBACK", 55 [TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
56 [TFRC_SSTATE_FBACK] = "FBACK", 56 [TFRC_SSTATE_FBACK] = "FBACK",
57 [TFRC_SSTATE_TERM] = "TERM",
58 }; 57 };
59 58
60 return ccid3_state_names[state]; 59 return ccid3_state_names[state];
@@ -91,19 +90,16 @@ static inline u64 rfc3390_initial_rate(struct sock *sk)
91 return scaled_div(w_init << 6, hc->tx_rtt); 90 return scaled_div(w_init << 6, hc->tx_rtt);
92} 91}
93 92
94/* 93/**
95 * Recalculate t_ipi and delta (should be called whenever X changes) 94 * ccid3_update_send_interval - Calculate new t_ipi = s / X_inst
95 * This respects the granularity of X_inst (64 * bytes/second).
96 */ 96 */
97static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc) 97static void ccid3_update_send_interval(struct ccid3_hc_tx_sock *hc)
98{ 98{
99 /* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
100 hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x); 99 hc->tx_t_ipi = scaled_div32(((u64)hc->tx_s) << 6, hc->tx_x);
101 100
102 /* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */ 101 ccid3_pr_debug("t_ipi=%u, s=%u, X=%u\n", hc->tx_t_ipi,
103 hc->tx_delta = min_t(u32, hc->tx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN); 102 hc->tx_s, (unsigned)(hc->tx_x >> 6));
104
105 ccid3_pr_debug("t_ipi=%u, delta=%u, s=%u, X=%u\n", hc->tx_t_ipi,
106 hc->tx_delta, hc->tx_s, (unsigned)(hc->tx_x >> 6));
107} 103}
108 104
109static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now) 105static u32 ccid3_hc_tx_idle_rtt(struct ccid3_hc_tx_sock *hc, ktime_t now)
@@ -211,16 +207,19 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
211 ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk, 207 ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk,
212 ccid3_tx_state_name(hc->tx_state)); 208 ccid3_tx_state_name(hc->tx_state));
213 209
210 /* Ignore and do not restart after leaving the established state */
211 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
212 goto out;
213
214 /* Reset feedback state to "no feedback received" */
214 if (hc->tx_state == TFRC_SSTATE_FBACK) 215 if (hc->tx_state == TFRC_SSTATE_FBACK)
215 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); 216 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
216 else if (hc->tx_state != TFRC_SSTATE_NO_FBACK)
217 goto out;
218 217
219 /* 218 /*
220 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4 219 * Determine new allowed sending rate X as per draft rfc3448bis-00, 4.4
220 * RTO is 0 if and only if no feedback has been received yet.
221 */ 221 */
222 if (hc->tx_t_rto == 0 || /* no feedback received yet */ 222 if (hc->tx_t_rto == 0 || hc->tx_p == 0) {
223 hc->tx_p == 0) {
224 223
225 /* halve send rate directly */ 224 /* halve send rate directly */
226 hc->tx_x = max(hc->tx_x / 2, 225 hc->tx_x = max(hc->tx_x / 2,
@@ -256,7 +255,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
256 * Set new timeout for the nofeedback timer. 255 * Set new timeout for the nofeedback timer.
257 * See comments in packet_recv() regarding the value of t_RTO. 256 * See comments in packet_recv() regarding the value of t_RTO.
258 */ 257 */
259 if (unlikely(hc->tx_t_rto == 0)) /* no feedback yet */ 258 if (unlikely(hc->tx_t_rto == 0)) /* no feedback received yet */
260 t_nfb = TFRC_INITIAL_TIMEOUT; 259 t_nfb = TFRC_INITIAL_TIMEOUT;
261 else 260 else
262 t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi); 261 t_nfb = max(hc->tx_t_rto, 2 * hc->tx_t_ipi);
@@ -290,8 +289,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
290 if (unlikely(skb->len == 0)) 289 if (unlikely(skb->len == 0))
291 return -EBADMSG; 290 return -EBADMSG;
292 291
293 switch (hc->tx_state) { 292 if (hc->tx_state == TFRC_SSTATE_NO_SENT) {
294 case TFRC_SSTATE_NO_SENT:
295 sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies + 293 sk_reset_timer(sk, &hc->tx_no_feedback_timer, (jiffies +
296 usecs_to_jiffies(TFRC_INITIAL_TIMEOUT))); 294 usecs_to_jiffies(TFRC_INITIAL_TIMEOUT)));
297 hc->tx_last_win_count = 0; 295 hc->tx_last_win_count = 0;
@@ -326,27 +324,22 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
326 ccid3_update_send_interval(hc); 324 ccid3_update_send_interval(hc);
327 325
328 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK); 326 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
329 break; 327
330 case TFRC_SSTATE_NO_FBACK: 328 } else {
331 case TFRC_SSTATE_FBACK:
332 delay = ktime_us_delta(hc->tx_t_nom, now); 329 delay = ktime_us_delta(hc->tx_t_nom, now);
333 ccid3_pr_debug("delay=%ld\n", (long)delay); 330 ccid3_pr_debug("delay=%ld\n", (long)delay);
334 /* 331 /*
335 * Scheduling of packet transmissions [RFC 3448, 4.6] 332 * Scheduling of packet transmissions (RFC 5348, 8.3)
336 * 333 *
337 * if (t_now > t_nom - delta) 334 * if (t_now > t_nom - delta)
338 * // send the packet now 335 * // send the packet now
339 * else 336 * else
340 * // send the packet in (t_nom - t_now) milliseconds. 337 * // send the packet in (t_nom - t_now) milliseconds.
341 */ 338 */
342 if (delay - (s64)hc->tx_delta >= 1000) 339 if (delay >= TFRC_T_DELTA)
343 return (u32)delay / 1000L; 340 return (u32)delay / USEC_PER_MSEC;
344 341
345 ccid3_hc_tx_update_win_count(hc, now); 342 ccid3_hc_tx_update_win_count(hc, now);
346 break;
347 case TFRC_SSTATE_TERM:
348 DCCP_BUG("%s(%p) - Illegal state TERM", dccp_role(sk), sk);
349 return -EINVAL;
350 } 343 }
351 344
352 /* prepare to send now (add options etc.) */ 345 /* prepare to send now (add options etc.) */
@@ -358,8 +351,7 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
358 return 0; 351 return 0;
359} 352}
360 353
361static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, 354static void ccid3_hc_tx_packet_sent(struct sock *sk, unsigned int len)
362 unsigned int len)
363{ 355{
364 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 356 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
365 357
@@ -372,48 +364,34 @@ static void ccid3_hc_tx_packet_sent(struct sock *sk, int more,
372static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) 364static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
373{ 365{
374 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 366 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
375 struct ccid3_options_received *opt_recv; 367 struct tfrc_tx_hist_entry *acked;
376 ktime_t now; 368 ktime_t now;
377 unsigned long t_nfb; 369 unsigned long t_nfb;
378 u32 pinv, r_sample; 370 u32 r_sample;
379 371
380 /* we are only interested in ACKs */ 372 /* we are only interested in ACKs */
381 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK || 373 if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
382 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK)) 374 DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
383 return; 375 return;
384 /* ... and only in the established state */
385 if (hc->tx_state != TFRC_SSTATE_FBACK &&
386 hc->tx_state != TFRC_SSTATE_NO_FBACK)
387 return;
388
389 opt_recv = &hc->tx_options_received;
390 now = ktime_get_real();
391
392 /* Estimate RTT from history if ACK number is valid */
393 r_sample = tfrc_tx_hist_rtt(hc->tx_hist,
394 DCCP_SKB_CB(skb)->dccpd_ack_seq, now);
395 if (r_sample == 0) {
396 DCCP_WARN("%s(%p): %s with bogus ACK-%llu\n", dccp_role(sk), sk,
397 dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type),
398 (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq);
399 return;
400 }
401
402 /* Update receive rate in units of 64 * bytes/second */
403 hc->tx_x_recv = opt_recv->ccid3or_receive_rate;
404 hc->tx_x_recv <<= 6;
405
406 /* Update loss event rate (which is scaled by 1e6) */
407 pinv = opt_recv->ccid3or_loss_event_rate;
408 if (pinv == ~0U || pinv == 0) /* see RFC 4342, 8.5 */
409 hc->tx_p = 0;
410 else /* can not exceed 100% */
411 hc->tx_p = scaled_div(1, pinv);
412 /* 376 /*
413 * Validate new RTT sample and update moving average 377 * Locate the acknowledged packet in the TX history.
378 *
379 * Returning "entry not found" here can for instance happen when
380 * - the host has not sent out anything (e.g. a passive server),
381 * - the Ack is outdated (packet with higher Ack number was received),
382 * - it is a bogus Ack (for a packet not sent on this connection).
414 */ 383 */
415 r_sample = dccp_sample_rtt(sk, r_sample); 384 acked = tfrc_tx_hist_find_entry(hc->tx_hist, dccp_hdr_ack_seq(skb));
385 if (acked == NULL)
386 return;
387 /* For the sake of RTT sampling, ignore/remove all older entries */
388 tfrc_tx_hist_purge(&acked->next);
389
390 /* Update the moving average for the RTT estimate (RFC 3448, 4.3) */
391 now = ktime_get_real();
392 r_sample = dccp_sample_rtt(sk, ktime_us_delta(now, acked->stamp));
416 hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9); 393 hc->tx_rtt = tfrc_ewma(hc->tx_rtt, r_sample, 9);
394
417 /* 395 /*
418 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3 396 * Update allowed sending rate X as per draft rfc3448bis-00, 4.2/3
419 */ 397 */
@@ -461,13 +439,12 @@ done_computing_x:
461 sk->sk_write_space(sk); 439 sk->sk_write_space(sk);
462 440
463 /* 441 /*
464 * Update timeout interval for the nofeedback timer. 442 * Update timeout interval for the nofeedback timer. In order to control
465 * We use a configuration option to increase the lower bound. 443 * rate halving on networks with very low RTTs (<= 1 ms), use per-route
466 * This can help avoid triggering the nofeedback timer too 444 * tunable RTAX_RTO_MIN value as the lower bound.
467 * often ('spinning') on LANs with small RTTs.
468 */ 445 */
469 hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt, (CONFIG_IP_DCCP_CCID3_RTO * 446 hc->tx_t_rto = max_t(u32, 4 * hc->tx_rtt,
470 (USEC_PER_SEC / 1000))); 447 USEC_PER_SEC/HZ * tcp_rto_min(sk));
471 /* 448 /*
472 * Schedule no feedback timer to expire in 449 * Schedule no feedback timer to expire in
473 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi) 450 * max(t_RTO, 2 * s/X) = max(t_RTO, 2 * t_ipi)
@@ -482,66 +459,41 @@ done_computing_x:
482 jiffies + usecs_to_jiffies(t_nfb)); 459 jiffies + usecs_to_jiffies(t_nfb));
483} 460}
484 461
485static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option, 462static int ccid3_hc_tx_parse_options(struct sock *sk, u8 packet_type,
486 unsigned char len, u16 idx, 463 u8 option, u8 *optval, u8 optlen)
487 unsigned char *value)
488{ 464{
489 int rc = 0;
490 const struct dccp_sock *dp = dccp_sk(sk);
491 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 465 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
492 struct ccid3_options_received *opt_recv;
493 __be32 opt_val; 466 __be32 opt_val;
494 467
495 opt_recv = &hc->tx_options_received;
496
497 if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
498 opt_recv->ccid3or_seqno = dp->dccps_gsr;
499 opt_recv->ccid3or_loss_event_rate = ~0;
500 opt_recv->ccid3or_loss_intervals_idx = 0;
501 opt_recv->ccid3or_loss_intervals_len = 0;
502 opt_recv->ccid3or_receive_rate = 0;
503 }
504
505 switch (option) { 468 switch (option) {
469 case TFRC_OPT_RECEIVE_RATE:
506 case TFRC_OPT_LOSS_EVENT_RATE: 470 case TFRC_OPT_LOSS_EVENT_RATE:
507 if (unlikely(len != 4)) { 471 /* Must be ignored on Data packets, cf. RFC 4342 8.3 and 8.5 */
508 DCCP_WARN("%s(%p), invalid len %d " 472 if (packet_type == DCCP_PKT_DATA)
509 "for TFRC_OPT_LOSS_EVENT_RATE\n", 473 break;
510 dccp_role(sk), sk, len); 474 if (unlikely(optlen != 4)) {
511 rc = -EINVAL; 475 DCCP_WARN("%s(%p), invalid len %d for %u\n",
512 } else { 476 dccp_role(sk), sk, optlen, option);
513 opt_val = get_unaligned((__be32 *)value); 477 return -EINVAL;
514 opt_recv->ccid3or_loss_event_rate = ntohl(opt_val);
515 ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
516 dccp_role(sk), sk,
517 opt_recv->ccid3or_loss_event_rate);
518 } 478 }
519 break; 479 opt_val = ntohl(get_unaligned((__be32 *)optval));
520 case TFRC_OPT_LOSS_INTERVALS: 480
521 opt_recv->ccid3or_loss_intervals_idx = idx; 481 if (option == TFRC_OPT_RECEIVE_RATE) {
522 opt_recv->ccid3or_loss_intervals_len = len; 482 /* Receive Rate is kept in units of 64 bytes/second */
523 ccid3_pr_debug("%s(%p), LOSS_INTERVALS=(%u, %u)\n", 483 hc->tx_x_recv = opt_val;
524 dccp_role(sk), sk, 484 hc->tx_x_recv <<= 6;
525 opt_recv->ccid3or_loss_intervals_idx, 485
526 opt_recv->ccid3or_loss_intervals_len);
527 break;
528 case TFRC_OPT_RECEIVE_RATE:
529 if (unlikely(len != 4)) {
530 DCCP_WARN("%s(%p), invalid len %d "
531 "for TFRC_OPT_RECEIVE_RATE\n",
532 dccp_role(sk), sk, len);
533 rc = -EINVAL;
534 } else {
535 opt_val = get_unaligned((__be32 *)value);
536 opt_recv->ccid3or_receive_rate = ntohl(opt_val);
537 ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n", 486 ccid3_pr_debug("%s(%p), RECEIVE_RATE=%u\n",
538 dccp_role(sk), sk, 487 dccp_role(sk), sk, opt_val);
539 opt_recv->ccid3or_receive_rate); 488 } else {
489 /* Update the fixpoint Loss Event Rate fraction */
490 hc->tx_p = tfrc_invert_loss_event_rate(opt_val);
491
492 ccid3_pr_debug("%s(%p), LOSS_EVENT_RATE=%u\n",
493 dccp_role(sk), sk, opt_val);
540 } 494 }
541 break;
542 } 495 }
543 496 return 0;
544 return rc;
545} 497}
546 498
547static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) 499static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk)
@@ -559,42 +511,36 @@ static void ccid3_hc_tx_exit(struct sock *sk)
559{ 511{
560 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); 512 struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
561 513
562 ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
563 sk_stop_timer(sk, &hc->tx_no_feedback_timer); 514 sk_stop_timer(sk, &hc->tx_no_feedback_timer);
564
565 tfrc_tx_hist_purge(&hc->tx_hist); 515 tfrc_tx_hist_purge(&hc->tx_hist);
566} 516}
567 517
568static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) 518static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
569{ 519{
570 struct ccid3_hc_tx_sock *hc; 520 info->tcpi_rto = ccid3_hc_tx_sk(sk)->tx_t_rto;
571 521 info->tcpi_rtt = ccid3_hc_tx_sk(sk)->tx_rtt;
572 /* Listen socks doesn't have a private CCID block */
573 if (sk->sk_state == DCCP_LISTEN)
574 return;
575
576 hc = ccid3_hc_tx_sk(sk);
577 info->tcpi_rto = hc->tx_t_rto;
578 info->tcpi_rtt = hc->tx_rtt;
579} 522}
580 523
581static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, 524static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len,
582 u32 __user *optval, int __user *optlen) 525 u32 __user *optval, int __user *optlen)
583{ 526{
584 const struct ccid3_hc_tx_sock *hc; 527 const struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk);
528 struct tfrc_tx_info tfrc;
585 const void *val; 529 const void *val;
586 530
587 /* Listen socks doesn't have a private CCID block */
588 if (sk->sk_state == DCCP_LISTEN)
589 return -EINVAL;
590
591 hc = ccid3_hc_tx_sk(sk);
592 switch (optname) { 531 switch (optname) {
593 case DCCP_SOCKOPT_CCID_TX_INFO: 532 case DCCP_SOCKOPT_CCID_TX_INFO:
594 if (len < sizeof(hc->tx_tfrc)) 533 if (len < sizeof(tfrc))
595 return -EINVAL; 534 return -EINVAL;
596 len = sizeof(hc->tx_tfrc); 535 tfrc.tfrctx_x = hc->tx_x;
597 val = &hc->tx_tfrc; 536 tfrc.tfrctx_x_recv = hc->tx_x_recv;
537 tfrc.tfrctx_x_calc = hc->tx_x_calc;
538 tfrc.tfrctx_rtt = hc->tx_rtt;
539 tfrc.tfrctx_p = hc->tx_p;
540 tfrc.tfrctx_rto = hc->tx_t_rto;
541 tfrc.tfrctx_ipi = hc->tx_t_ipi;
542 len = sizeof(tfrc);
543 val = &tfrc;
598 break; 544 break;
599 default: 545 default:
600 return -ENOPROTOOPT; 546 return -ENOPROTOOPT;
@@ -624,7 +570,6 @@ static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
624 static const char *const ccid3_rx_state_names[] = { 570 static const char *const ccid3_rx_state_names[] = {
625 [TFRC_RSTATE_NO_DATA] = "NO_DATA", 571 [TFRC_RSTATE_NO_DATA] = "NO_DATA",
626 [TFRC_RSTATE_DATA] = "DATA", 572 [TFRC_RSTATE_DATA] = "DATA",
627 [TFRC_RSTATE_TERM] = "TERM",
628 }; 573 };
629 574
630 return ccid3_rx_state_names[state]; 575 return ccid3_rx_state_names[state];
@@ -650,14 +595,9 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
650{ 595{
651 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); 596 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
652 struct dccp_sock *dp = dccp_sk(sk); 597 struct dccp_sock *dp = dccp_sk(sk);
653 ktime_t now; 598 ktime_t now = ktime_get_real();
654 s64 delta = 0; 599 s64 delta = 0;
655 600
656 if (unlikely(hc->rx_state == TFRC_RSTATE_TERM))
657 return;
658
659 now = ktime_get_real();
660
661 switch (fbtype) { 601 switch (fbtype) {
662 case CCID3_FBACK_INITIAL: 602 case CCID3_FBACK_INITIAL:
663 hc->rx_x_recv = 0; 603 hc->rx_x_recv = 0;
@@ -701,14 +641,12 @@ static void ccid3_hc_rx_send_feedback(struct sock *sk,
701 641
702static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb) 642static int ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
703{ 643{
704 const struct ccid3_hc_rx_sock *hc; 644 const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
705 __be32 x_recv, pinv; 645 __be32 x_recv, pinv;
706 646
707 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN)) 647 if (!(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
708 return 0; 648 return 0;
709 649
710 hc = ccid3_hc_rx_sk(sk);
711
712 if (dccp_packet_without_ack(skb)) 650 if (dccp_packet_without_ack(skb))
713 return 0; 651 return 0;
714 652
@@ -749,10 +687,11 @@ static u32 ccid3_first_li(struct sock *sk)
749 x_recv = scaled_div32(hc->rx_bytes_recv, delta); 687 x_recv = scaled_div32(hc->rx_bytes_recv, delta);
750 if (x_recv == 0) { /* would also trigger divide-by-zero */ 688 if (x_recv == 0) { /* would also trigger divide-by-zero */
751 DCCP_WARN("X_recv==0\n"); 689 DCCP_WARN("X_recv==0\n");
752 if ((x_recv = hc->rx_x_recv) == 0) { 690 if (hc->rx_x_recv == 0) {
753 DCCP_BUG("stored value of X_recv is zero"); 691 DCCP_BUG("stored value of X_recv is zero");
754 return ~0U; 692 return ~0U;
755 } 693 }
694 x_recv = hc->rx_x_recv;
756 } 695 }
757 696
758 fval = scaled_div(hc->rx_s, hc->rx_rtt); 697 fval = scaled_div(hc->rx_s, hc->rx_rtt);
@@ -862,46 +801,31 @@ static void ccid3_hc_rx_exit(struct sock *sk)
862{ 801{
863 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk); 802 struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
864 803
865 ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
866
867 tfrc_rx_hist_purge(&hc->rx_hist); 804 tfrc_rx_hist_purge(&hc->rx_hist);
868 tfrc_lh_cleanup(&hc->rx_li_hist); 805 tfrc_lh_cleanup(&hc->rx_li_hist);
869} 806}
870 807
871static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info) 808static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
872{ 809{
873 const struct ccid3_hc_rx_sock *hc; 810 info->tcpi_ca_state = ccid3_hc_rx_sk(sk)->rx_state;
874
875 /* Listen socks doesn't have a private CCID block */
876 if (sk->sk_state == DCCP_LISTEN)
877 return;
878
879 hc = ccid3_hc_rx_sk(sk);
880 info->tcpi_ca_state = hc->rx_state;
881 info->tcpi_options |= TCPI_OPT_TIMESTAMPS; 811 info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
882 info->tcpi_rcv_rtt = hc->rx_rtt; 812 info->tcpi_rcv_rtt = ccid3_hc_rx_sk(sk)->rx_rtt;
883} 813}
884 814
885static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, 815static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len,
886 u32 __user *optval, int __user *optlen) 816 u32 __user *optval, int __user *optlen)
887{ 817{
888 const struct ccid3_hc_rx_sock *hc; 818 const struct ccid3_hc_rx_sock *hc = ccid3_hc_rx_sk(sk);
889 struct tfrc_rx_info rx_info; 819 struct tfrc_rx_info rx_info;
890 const void *val; 820 const void *val;
891 821
892 /* Listen socks doesn't have a private CCID block */
893 if (sk->sk_state == DCCP_LISTEN)
894 return -EINVAL;
895
896 hc = ccid3_hc_rx_sk(sk);
897 switch (optname) { 822 switch (optname) {
898 case DCCP_SOCKOPT_CCID_RX_INFO: 823 case DCCP_SOCKOPT_CCID_RX_INFO:
899 if (len < sizeof(rx_info)) 824 if (len < sizeof(rx_info))
900 return -EINVAL; 825 return -EINVAL;
901 rx_info.tfrcrx_x_recv = hc->rx_x_recv; 826 rx_info.tfrcrx_x_recv = hc->rx_x_recv;
902 rx_info.tfrcrx_rtt = hc->rx_rtt; 827 rx_info.tfrcrx_rtt = hc->rx_rtt;
903 rx_info.tfrcrx_p = hc->rx_pinv == 0 ? ~0U : 828 rx_info.tfrcrx_p = tfrc_invert_loss_event_rate(hc->rx_pinv);
904 scaled_div(1, hc->rx_pinv);
905 len = sizeof(rx_info); 829 len = sizeof(rx_info);
906 val = &rx_info; 830 val = &rx_info;
907 break; 831 break;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index 032635776653..1a9933c29672 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -42,35 +42,36 @@
42#include "lib/tfrc.h" 42#include "lib/tfrc.h"
43#include "../ccid.h" 43#include "../ccid.h"
44 44
45/* Two seconds as per RFC 3448 4.2 */ 45/* Two seconds as per RFC 5348, 4.2 */
46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC) 46#define TFRC_INITIAL_TIMEOUT (2 * USEC_PER_SEC)
47 47
48/* In usecs - half the scheduling granularity as per RFC3448 4.6 */
49#define TFRC_OPSYS_HALF_TIME_GRAN (USEC_PER_SEC / (2 * HZ))
50
51/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */ 48/* Parameter t_mbi from [RFC 3448, 4.3]: backoff interval in seconds */
52#define TFRC_T_MBI 64 49#define TFRC_T_MBI 64
53 50
51/*
52 * The t_delta parameter (RFC 5348, 8.3): delays of less than %USEC_PER_MSEC are
53 * rounded down to 0, since sk_reset_timer() here uses millisecond granularity.
54 * Hence we can use a constant t_delta = %USEC_PER_MSEC when HZ >= 500. A coarse
55 * resolution of HZ < 500 means that the error is below one timer tick (t_gran)
56 * when using the constant t_delta = t_gran / 2 = %USEC_PER_SEC / (2 * HZ).
57 */
58#if (HZ >= 500)
59# define TFRC_T_DELTA USEC_PER_MSEC
60#else
61# define TFRC_T_DELTA (USEC_PER_SEC / (2 * HZ))
62#endif
63
54enum ccid3_options { 64enum ccid3_options {
55 TFRC_OPT_LOSS_EVENT_RATE = 192, 65 TFRC_OPT_LOSS_EVENT_RATE = 192,
56 TFRC_OPT_LOSS_INTERVALS = 193, 66 TFRC_OPT_LOSS_INTERVALS = 193,
57 TFRC_OPT_RECEIVE_RATE = 194, 67 TFRC_OPT_RECEIVE_RATE = 194,
58}; 68};
59 69
60struct ccid3_options_received {
61 u64 ccid3or_seqno:48,
62 ccid3or_loss_intervals_idx:16;
63 u16 ccid3or_loss_intervals_len;
64 u32 ccid3or_loss_event_rate;
65 u32 ccid3or_receive_rate;
66};
67
68/* TFRC sender states */ 70/* TFRC sender states */
69enum ccid3_hc_tx_states { 71enum ccid3_hc_tx_states {
70 TFRC_SSTATE_NO_SENT = 1, 72 TFRC_SSTATE_NO_SENT = 1,
71 TFRC_SSTATE_NO_FBACK, 73 TFRC_SSTATE_NO_FBACK,
72 TFRC_SSTATE_FBACK, 74 TFRC_SSTATE_FBACK,
73 TFRC_SSTATE_TERM,
74}; 75};
75 76
76/** 77/**
@@ -90,19 +91,16 @@ enum ccid3_hc_tx_states {
90 * @tx_no_feedback_timer: Handle to no feedback timer 91 * @tx_no_feedback_timer: Handle to no feedback timer
91 * @tx_t_ld: Time last doubled during slow start 92 * @tx_t_ld: Time last doubled during slow start
92 * @tx_t_nom: Nominal send time of next packet 93 * @tx_t_nom: Nominal send time of next packet
93 * @tx_delta: Send timer delta (RFC 3448, 4.6) in usecs
94 * @tx_hist: Packet history 94 * @tx_hist: Packet history
95 * @tx_options_received: Parsed set of retrieved options
96 */ 95 */
97struct ccid3_hc_tx_sock { 96struct ccid3_hc_tx_sock {
98 struct tfrc_tx_info tx_tfrc; 97 u64 tx_x;
99#define tx_x tx_tfrc.tfrctx_x 98 u64 tx_x_recv;
100#define tx_x_recv tx_tfrc.tfrctx_x_recv 99 u32 tx_x_calc;
101#define tx_x_calc tx_tfrc.tfrctx_x_calc 100 u32 tx_rtt;
102#define tx_rtt tx_tfrc.tfrctx_rtt 101 u32 tx_p;
103#define tx_p tx_tfrc.tfrctx_p 102 u32 tx_t_rto;
104#define tx_t_rto tx_tfrc.tfrctx_rto 103 u32 tx_t_ipi;
105#define tx_t_ipi tx_tfrc.tfrctx_ipi
106 u16 tx_s; 104 u16 tx_s;
107 enum ccid3_hc_tx_states tx_state:8; 105 enum ccid3_hc_tx_states tx_state:8;
108 u8 tx_last_win_count; 106 u8 tx_last_win_count;
@@ -110,9 +108,7 @@ struct ccid3_hc_tx_sock {
110 struct timer_list tx_no_feedback_timer; 108 struct timer_list tx_no_feedback_timer;
111 ktime_t tx_t_ld; 109 ktime_t tx_t_ld;
112 ktime_t tx_t_nom; 110 ktime_t tx_t_nom;
113 u32 tx_delta;
114 struct tfrc_tx_hist_entry *tx_hist; 111 struct tfrc_tx_hist_entry *tx_hist;
115 struct ccid3_options_received tx_options_received;
116}; 112};
117 113
118static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) 114static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
@@ -126,21 +122,16 @@ static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
126enum ccid3_hc_rx_states { 122enum ccid3_hc_rx_states {
127 TFRC_RSTATE_NO_DATA = 1, 123 TFRC_RSTATE_NO_DATA = 1,
128 TFRC_RSTATE_DATA, 124 TFRC_RSTATE_DATA,
129 TFRC_RSTATE_TERM = 127,
130}; 125};
131 126
132/** 127/**
133 * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket 128 * struct ccid3_hc_rx_sock - CCID3 receiver half-connection socket
134 * @rx_x_recv: Receiver estimate of send rate (RFC 3448 4.3)
135 * @rx_rtt: Receiver estimate of rtt (non-standard)
136 * @rx_p: Current loss event rate (RFC 3448 5.4)
137 * @rx_last_counter: Tracks window counter (RFC 4342, 8.1) 129 * @rx_last_counter: Tracks window counter (RFC 4342, 8.1)
138 * @rx_state: Receiver state, one of %ccid3_hc_rx_states 130 * @rx_state: Receiver state, one of %ccid3_hc_rx_states
139 * @rx_bytes_recv: Total sum of DCCP payload bytes 131 * @rx_bytes_recv: Total sum of DCCP payload bytes
140 * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3) 132 * @rx_x_recv: Receiver estimate of send rate (RFC 3448, sec. 4.3)
141 * @rx_rtt: Receiver estimate of RTT 133 * @rx_rtt: Receiver estimate of RTT
142 * @rx_tstamp_last_feedback: Time at which last feedback was sent 134 * @rx_tstamp_last_feedback: Time at which last feedback was sent
143 * @rx_tstamp_last_ack: Time at which last feedback was sent
144 * @rx_hist: Packet history (loss detection + RTT sampling) 135 * @rx_hist: Packet history (loss detection + RTT sampling)
145 * @rx_li_hist: Loss Interval database 136 * @rx_li_hist: Loss Interval database
146 * @rx_s: Received packet size in bytes 137 * @rx_s: Received packet size in bytes
diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c
index 8fc3cbf79071..497723c4d4bb 100644
--- a/net/dccp/ccids/lib/loss_interval.c
+++ b/net/dccp/ccids/lib/loss_interval.c
@@ -116,7 +116,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb)
116 cur->li_length = len; 116 cur->li_length = len;
117 tfrc_lh_calc_i_mean(lh); 117 tfrc_lh_calc_i_mean(lh);
118 118
119 return (lh->i_mean < old_i_mean); 119 return lh->i_mean < old_i_mean;
120} 120}
121 121
122/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ 122/* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */
diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c
index 3a4f414e94a0..de8fe294bf0b 100644
--- a/net/dccp/ccids/lib/packet_history.c
+++ b/net/dccp/ccids/lib/packet_history.c
@@ -38,18 +38,6 @@
38#include "packet_history.h" 38#include "packet_history.h"
39#include "../../dccp.h" 39#include "../../dccp.h"
40 40
41/**
42 * tfrc_tx_hist_entry - Simple singly-linked TX history list
43 * @next: next oldest entry (LIFO order)
44 * @seqno: sequence number of this entry
45 * @stamp: send time of packet with sequence number @seqno
46 */
47struct tfrc_tx_hist_entry {
48 struct tfrc_tx_hist_entry *next;
49 u64 seqno;
50 ktime_t stamp;
51};
52
53/* 41/*
54 * Transmitter History Routines 42 * Transmitter History Routines
55 */ 43 */
@@ -71,15 +59,6 @@ void tfrc_tx_packet_history_exit(void)
71 } 59 }
72} 60}
73 61
74static struct tfrc_tx_hist_entry *
75 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
76{
77 while (head != NULL && head->seqno != seqno)
78 head = head->next;
79
80 return head;
81}
82
83int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno) 62int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno)
84{ 63{
85 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any()); 64 struct tfrc_tx_hist_entry *entry = kmem_cache_alloc(tfrc_tx_hist_slab, gfp_any());
@@ -107,24 +86,6 @@ void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp)
107 *headp = NULL; 86 *headp = NULL;
108} 87}
109 88
110u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head, const u64 seqno,
111 const ktime_t now)
112{
113 u32 rtt = 0;
114 struct tfrc_tx_hist_entry *packet = tfrc_tx_hist_find_entry(head, seqno);
115
116 if (packet != NULL) {
117 rtt = ktime_us_delta(now, packet->stamp);
118 /*
119 * Garbage-collect older (irrelevant) entries:
120 */
121 tfrc_tx_hist_purge(&packet->next);
122 }
123
124 return rtt;
125}
126
127
128/* 89/*
129 * Receiver History Routines 90 * Receiver History Routines
130 */ 91 */
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index 7df6c5299999..7ee4a9d9d335 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -40,12 +40,28 @@
40#include <linux/slab.h> 40#include <linux/slab.h>
41#include "tfrc.h" 41#include "tfrc.h"
42 42
43struct tfrc_tx_hist_entry; 43/**
44 * tfrc_tx_hist_entry - Simple singly-linked TX history list
45 * @next: next oldest entry (LIFO order)
46 * @seqno: sequence number of this entry
47 * @stamp: send time of packet with sequence number @seqno
48 */
49struct tfrc_tx_hist_entry {
50 struct tfrc_tx_hist_entry *next;
51 u64 seqno;
52 ktime_t stamp;
53};
54
55static inline struct tfrc_tx_hist_entry *
56 tfrc_tx_hist_find_entry(struct tfrc_tx_hist_entry *head, u64 seqno)
57{
58 while (head != NULL && head->seqno != seqno)
59 head = head->next;
60 return head;
61}
44 62
45extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno); 63extern int tfrc_tx_hist_add(struct tfrc_tx_hist_entry **headp, u64 seqno);
46extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp); 64extern void tfrc_tx_hist_purge(struct tfrc_tx_hist_entry **headp);
47extern u32 tfrc_tx_hist_rtt(struct tfrc_tx_hist_entry *head,
48 const u64 seqno, const ktime_t now);
49 65
50/* Subtraction a-b modulo-16, respects circular wrap-around */ 66/* Subtraction a-b modulo-16, respects circular wrap-around */
51#define SUB16(a, b) (((a) + 16 - (b)) & 0xF) 67#define SUB16(a, b) (((a) + 16 - (b)) & 0xF)
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 01bb48e96c2e..f8ee3f549770 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -57,6 +57,7 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
57 57
58extern u32 tfrc_calc_x(u16 s, u32 R, u32 p); 58extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
59extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue); 59extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
60extern u32 tfrc_invert_loss_event_rate(u32 loss_event_rate);
60 61
61extern int tfrc_tx_packet_history_init(void); 62extern int tfrc_tx_packet_history_init(void);
62extern void tfrc_tx_packet_history_exit(void); 63extern void tfrc_tx_packet_history_exit(void);
diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index 22ca1cf0eb55..a052a4377e26 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -687,3 +687,17 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
687 index = tfrc_binsearch(fvalue, 0); 687 index = tfrc_binsearch(fvalue, 0);
688 return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE; 688 return (index + 1) * 1000000 / TFRC_CALC_X_ARRSIZE;
689} 689}
690
691/**
692 * tfrc_invert_loss_event_rate - Compute p so that 10^6 corresponds to 100%
693 * When @loss_event_rate is large, there is a chance that p is truncated to 0.
694 * To avoid re-entering slow-start in that case, we set p = TFRC_SMALLEST_P > 0.
695 */
696u32 tfrc_invert_loss_event_rate(u32 loss_event_rate)
697{
698 if (loss_event_rate == UINT_MAX) /* see RFC 4342, 8.5 */
699 return 0;
700 if (unlikely(loss_event_rate == 0)) /* map 1/0 into 100% */
701 return 1000000;
702 return max_t(u32, scaled_div(1, loss_event_rate), TFRC_SMALLEST_P);
703}
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 3ccef1b70fee..3eb264b60823 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -153,18 +153,27 @@ static inline u64 max48(const u64 seq1, const u64 seq2)
153} 153}
154 154
155/** 155/**
156 * dccp_loss_free - Evaluates condition for data loss from RFC 4340, 7.7.1 156 * dccp_loss_count - Approximate the number of lost data packets in a burst loss
157 * @s1: start sequence number 157 * @s1: last known sequence number before the loss ('hole')
158 * @s2: end sequence number 158 * @s2: first sequence number seen after the 'hole'
159 * @ndp: NDP count on packet with sequence number @s2 159 * @ndp: NDP count on packet with sequence number @s2
160 * Returns true if the sequence range s1...s2 has no data loss.
161 */ 160 */
162static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp) 161static inline u64 dccp_loss_count(const u64 s1, const u64 s2, const u64 ndp)
163{ 162{
164 s64 delta = dccp_delta_seqno(s1, s2); 163 s64 delta = dccp_delta_seqno(s1, s2);
165 164
166 WARN_ON(delta < 0); 165 WARN_ON(delta < 0);
167 return (u64)delta <= ndp + 1; 166 delta -= ndp + 1;
167
168 return delta > 0 ? delta : 0;
169}
170
171/**
172 * dccp_loss_free - Evaluate condition for data loss from RFC 4340, 7.7.1
173 */
174static inline bool dccp_loss_free(const u64 s1, const u64 s2, const u64 ndp)
175{
176 return dccp_loss_count(s1, s2, ndp) == 0;
168} 177}
169 178
170enum { 179enum {
@@ -246,7 +255,6 @@ static inline void dccp_clear_xmit_timers(struct sock *sk)
246extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu); 255extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
247 256
248extern const char *dccp_packet_name(const int type); 257extern const char *dccp_packet_name(const int type);
249extern const char *dccp_state_name(const int state);
250 258
251extern void dccp_set_state(struct sock *sk, const int state); 259extern void dccp_set_state(struct sock *sk, const int state);
252extern void dccp_done(struct sock *sk); 260extern void dccp_done(struct sock *sk);
@@ -415,6 +423,23 @@ static inline void dccp_update_gsr(struct sock *sk, u64 seq)
415 dp->dccps_gsr = seq; 423 dp->dccps_gsr = seq;
416 /* Sequence validity window depends on remote Sequence Window (7.5.1) */ 424 /* Sequence validity window depends on remote Sequence Window (7.5.1) */
417 dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4); 425 dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4);
426 /*
427 * Adjust SWL so that it is not below ISR. In contrast to RFC 4340,
428 * 7.5.1 we perform this check beyond the initial handshake: W/W' are
429 * always > 32, so for the first W/W' packets in the lifetime of a
430 * connection we always have to adjust SWL.
431 * A second reason why we are doing this is that the window depends on
432 * the feature-remote value of Sequence Window: nothing stops the peer
433 * from updating this value while we are busy adjusting SWL for the
434 * first W packets (we would have to count from scratch again then).
435 * Therefore it is safer to always make sure that the Sequence Window
436 * is not artificially extended by a peer who grows SWL downwards by
437 * continually updating the feature-remote Sequence-Window.
438 * If sequence numbers wrap it is bad luck. But that will take a while
439 * (48 bit), and this measure prevents Sequence-number attacks.
440 */
441 if (before48(dp->dccps_swl, dp->dccps_isr))
442 dp->dccps_swl = dp->dccps_isr;
418 dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4); 443 dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4);
419} 444}
420 445
@@ -425,14 +450,16 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq)
425 dp->dccps_gss = seq; 450 dp->dccps_gss = seq;
426 /* Ack validity window depends on local Sequence Window value (7.5.1) */ 451 /* Ack validity window depends on local Sequence Window value (7.5.1) */
427 dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win); 452 dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win);
453 /* Adjust AWL so that it is not below ISS - see comment above for SWL */
454 if (before48(dp->dccps_awl, dp->dccps_iss))
455 dp->dccps_awl = dp->dccps_iss;
428 dp->dccps_awh = dp->dccps_gss; 456 dp->dccps_awh = dp->dccps_gss;
429} 457}
430 458
431static inline int dccp_ack_pending(const struct sock *sk) 459static inline int dccp_ack_pending(const struct sock *sk)
432{ 460{
433 const struct dccp_sock *dp = dccp_sk(sk); 461 const struct dccp_sock *dp = dccp_sk(sk);
434 return dp->dccps_timestamp_echo != 0 || 462 return (dp->dccps_hc_rx_ackvec != NULL &&
435 (dp->dccps_hc_rx_ackvec != NULL &&
436 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || 463 dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) ||
437 inet_csk_ack_scheduled(sk); 464 inet_csk_ack_scheduled(sk);
438} 465}
@@ -449,7 +476,6 @@ extern int dccp_insert_options_rsk(struct dccp_request_sock*, struct sk_buff*);
449extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed); 476extern int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed);
450extern u32 dccp_timestamp(void); 477extern u32 dccp_timestamp(void);
451extern void dccp_timestamping_init(void); 478extern void dccp_timestamping_init(void);
452extern int dccp_insert_option_timestamp(struct sk_buff *skb);
453extern int dccp_insert_option(struct sk_buff *skb, unsigned char option, 479extern int dccp_insert_option(struct sk_buff *skb, unsigned char option,
454 const void *value, unsigned char len); 480 const void *value, unsigned char len);
455 481
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index df7dd26cf07e..568def952722 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -730,16 +730,6 @@ int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
730 0, list, len); 730 0, list, len);
731} 731}
732 732
733/* Analogous to dccp_feat_register_sp(), but for non-negotiable values */
734int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val)
735{
736 /* any changes must be registered before establishing the connection */
737 if (sk->sk_state != DCCP_CLOSED)
738 return -EISCONN;
739 if (dccp_feat_type(feat) != FEAT_NN)
740 return -EINVAL;
741 return __feat_register_nn(&dccp_sk(sk)->dccps_featneg, feat, 0, val);
742}
743 733
744/* 734/*
745 * Tracking features whose value depend on the choice of CCID 735 * Tracking features whose value depend on the choice of CCID
diff --git a/net/dccp/feat.h b/net/dccp/feat.h
index f96721619def..e56a4e5e634e 100644
--- a/net/dccp/feat.h
+++ b/net/dccp/feat.h
@@ -111,7 +111,6 @@ extern int dccp_feat_init(struct sock *sk);
111extern void dccp_feat_initialise_sysctls(void); 111extern void dccp_feat_initialise_sysctls(void);
112extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, 112extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local,
113 u8 const *list, u8 len); 113 u8 const *list, u8 len);
114extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val);
115extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, 114extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *,
116 u8 mand, u8 opt, u8 feat, u8 *val, u8 len); 115 u8 mand, u8 opt, u8 feat, u8 *val, u8 len);
117extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); 116extern int dccp_feat_clone_list(struct list_head const *, struct list_head *);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 10c957a88f4f..265985370fa1 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -259,7 +259,7 @@ static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
259 sysctl_dccp_sync_ratelimit))) 259 sysctl_dccp_sync_ratelimit)))
260 return 0; 260 return 0;
261 261
262 DCCP_WARN("DCCP: Step 6 failed for %s packet, " 262 DCCP_WARN("Step 6 failed for %s packet, "
263 "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and " 263 "(LSWL(%llu) <= P.seqno(%llu) <= S.SWH(%llu)) and "
264 "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), " 264 "(P.ackno %s or LAWL(%llu) <= P.ackno(%llu) <= S.AWH(%llu), "
265 "sending SYNC...\n", dccp_packet_name(dh->dccph_type), 265 "sending SYNC...\n", dccp_packet_name(dh->dccph_type),
@@ -441,20 +441,14 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
441 kfree_skb(sk->sk_send_head); 441 kfree_skb(sk->sk_send_head);
442 sk->sk_send_head = NULL; 442 sk->sk_send_head = NULL;
443 443
444 dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
445 dccp_update_gsr(sk, dp->dccps_isr);
446 /* 444 /*
447 * SWL and AWL are initially adjusted so that they are not less than 445 * Set ISR, GSR from packet. ISS was set in dccp_v{4,6}_connect
448 * the initial Sequence Numbers received and sent, respectively: 446 * and GSS in dccp_transmit_skb(). Setting AWL/AWH and SWL/SWH
449 * SWL := max(GSR + 1 - floor(W/4), ISR), 447 * is done as part of activating the feature values below, since
450 * AWL := max(GSS - W' + 1, ISS). 448 * these settings depend on the local/remote Sequence Window
451 * These adjustments MUST be applied only at the beginning of the 449 * features, which were undefined or not confirmed until now.
452 * connection.
453 *
454 * AWL was adjusted in dccp_v4_connect -acme
455 */ 450 */
456 dccp_set_seqno(&dp->dccps_swl, 451 dp->dccps_gsr = dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
457 max48(dp->dccps_swl, dp->dccps_isr));
458 452
459 dccp_sync_mss(sk, icsk->icsk_pmtu_cookie); 453 dccp_sync_mss(sk, icsk->icsk_pmtu_cookie);
460 454
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index d4a166f0f391..3f69ea114829 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -392,7 +392,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
392 392
393 newsk = dccp_create_openreq_child(sk, req, skb); 393 newsk = dccp_create_openreq_child(sk, req, skb);
394 if (newsk == NULL) 394 if (newsk == NULL)
395 goto exit; 395 goto exit_nonewsk;
396 396
397 sk_setup_caps(newsk, dst); 397 sk_setup_caps(newsk, dst);
398 398
@@ -409,16 +409,20 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
409 409
410 dccp_sync_mss(newsk, dst_mtu(dst)); 410 dccp_sync_mss(newsk, dst_mtu(dst));
411 411
412 if (__inet_inherit_port(sk, newsk) < 0) {
413 sock_put(newsk);
414 goto exit;
415 }
412 __inet_hash_nolisten(newsk, NULL); 416 __inet_hash_nolisten(newsk, NULL);
413 __inet_inherit_port(sk, newsk);
414 417
415 return newsk; 418 return newsk;
416 419
417exit_overflow: 420exit_overflow:
418 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 421 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
422exit_nonewsk:
423 dst_release(dst);
419exit: 424exit:
420 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 425 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
421 dst_release(dst);
422 return NULL; 426 return NULL;
423} 427}
424 428
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6e3f32575df7..dca711df9b60 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -564,7 +564,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
564 564
565 newsk = dccp_create_openreq_child(sk, req, skb); 565 newsk = dccp_create_openreq_child(sk, req, skb);
566 if (newsk == NULL) 566 if (newsk == NULL)
567 goto out; 567 goto out_nonewsk;
568 568
569 /* 569 /*
570 * No need to charge this sock to the relevant IPv6 refcnt debug socks 570 * No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -632,18 +632,22 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
632 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 632 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
633 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 633 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
634 634
635 if (__inet_inherit_port(sk, newsk) < 0) {
636 sock_put(newsk);
637 goto out;
638 }
635 __inet6_hash(newsk, NULL); 639 __inet6_hash(newsk, NULL);
636 __inet_inherit_port(sk, newsk);
637 640
638 return newsk; 641 return newsk;
639 642
640out_overflow: 643out_overflow:
641 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 644 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
645out_nonewsk:
646 dst_release(dst);
642out: 647out:
643 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 648 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
644 if (opt != NULL && opt != np->opt) 649 if (opt != NULL && opt != np->opt)
645 sock_kfree_s(sk, opt, opt->tot_len); 650 sock_kfree_s(sk, opt, opt->tot_len);
646 dst_release(dst);
647 return NULL; 651 return NULL;
648} 652}
649 653
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 128b089d3aef..d7041a0963af 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -121,30 +121,18 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
121 * 121 *
122 * Choose S.ISS (initial seqno) or set from Init Cookies 122 * Choose S.ISS (initial seqno) or set from Init Cookies
123 * Initialize S.GAR := S.ISS 123 * Initialize S.GAR := S.ISS
124 * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies 124 * Set S.ISR, S.GSR from packet (or Init Cookies)
125 */ 125 *
126 newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss; 126 * Setting AWL/AWH and SWL/SWH happens as part of the feature
127 dccp_update_gss(newsk, dreq->dreq_iss); 127 * activation below, as these windows all depend on the local
128 128 * and remote Sequence Window feature values (7.5.2).
129 newdp->dccps_isr = dreq->dreq_isr;
130 dccp_update_gsr(newsk, dreq->dreq_isr);
131
132 /*
133 * SWL and AWL are initially adjusted so that they are not less than
134 * the initial Sequence Numbers received and sent, respectively:
135 * SWL := max(GSR + 1 - floor(W/4), ISR),
136 * AWL := max(GSS - W' + 1, ISS).
137 * These adjustments MUST be applied only at the beginning of the
138 * connection.
139 */ 129 */
140 dccp_set_seqno(&newdp->dccps_swl, 130 newdp->dccps_gss = newdp->dccps_iss = dreq->dreq_iss;
141 max48(newdp->dccps_swl, newdp->dccps_isr)); 131 newdp->dccps_gar = newdp->dccps_iss;
142 dccp_set_seqno(&newdp->dccps_awl, 132 newdp->dccps_gsr = newdp->dccps_isr = dreq->dreq_isr;
143 max48(newdp->dccps_awl, newdp->dccps_iss));
144 133
145 /* 134 /*
146 * Activate features after initialising the sequence numbers, 135 * Activate features: initialise CCIDs, sequence windows etc.
147 * since CCID initialisation may depend on GSS, ISR, ISS etc.
148 */ 136 */
149 if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) { 137 if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
150 /* It is still raw copy of parent, so invalidate 138 /* It is still raw copy of parent, so invalidate
diff --git a/net/dccp/options.c b/net/dccp/options.c
index bfda087bd90d..cd3061813009 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -96,18 +96,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
96 } 96 }
97 97
98 /* 98 /*
99 * CCID-Specific Options (from RFC 4340, sec. 10.3):
100 *
101 * Option numbers 128 through 191 are for options sent from the
102 * HC-Sender to the HC-Receiver; option numbers 192 through 255
103 * are for options sent from the HC-Receiver to the HC-Sender.
104 *
105 * CCID-specific options are ignored during connection setup, as 99 * CCID-specific options are ignored during connection setup, as
106 * negotiation may still be in progress (see RFC 4340, 10.3). 100 * negotiation may still be in progress (see RFC 4340, 10.3).
107 * The same applies to Ack Vectors, as these depend on the CCID. 101 * The same applies to Ack Vectors, as these depend on the CCID.
108 *
109 */ 102 */
110 if (dreq != NULL && (opt >= 128 || 103 if (dreq != NULL && (opt >= DCCPO_MIN_RX_CCID_SPECIFIC ||
111 opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1)) 104 opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1))
112 goto ignore_option; 105 goto ignore_option;
113 106
@@ -170,6 +163,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
170 dccp_role(sk), ntohl(opt_val), 163 dccp_role(sk), ntohl(opt_val),
171 (unsigned long long) 164 (unsigned long long)
172 DCCP_SKB_CB(skb)->dccpd_ack_seq); 165 DCCP_SKB_CB(skb)->dccpd_ack_seq);
166 /* schedule an Ack in case this sender is quiescent */
167 inet_csk_schedule_ack(sk);
173 break; 168 break;
174 case DCCPO_TIMESTAMP_ECHO: 169 case DCCPO_TIMESTAMP_ECHO:
175 if (len != 4 && len != 6 && len != 8) 170 if (len != 4 && len != 6 && len != 8)
@@ -226,23 +221,15 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
226 dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n", 221 dccp_pr_debug("%s rx opt: ELAPSED_TIME=%d\n",
227 dccp_role(sk), elapsed_time); 222 dccp_role(sk), elapsed_time);
228 break; 223 break;
229 case 128 ... 191: { 224 case DCCPO_MIN_RX_CCID_SPECIFIC ... DCCPO_MAX_RX_CCID_SPECIFIC:
230 const u16 idx = value - options;
231
232 if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, 225 if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk,
233 opt, len, idx, 226 pkt_type, opt, value, len))
234 value) != 0)
235 goto out_invalid_option; 227 goto out_invalid_option;
236 }
237 break; 228 break;
238 case 192 ... 255: { 229 case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC:
239 const u16 idx = value - options;
240
241 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, 230 if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk,
242 opt, len, idx, 231 pkt_type, opt, value, len))
243 value) != 0)
244 goto out_invalid_option; 232 goto out_invalid_option;
245 }
246 break; 233 break;
247 default: 234 default:
248 DCCP_CRIT("DCCP(%p): option %d(len=%d) not " 235 DCCP_CRIT("DCCP(%p): option %d(len=%d) not "
@@ -384,7 +371,7 @@ int dccp_insert_option_elapsed_time(struct sk_buff *skb, u32 elapsed_time)
384 371
385EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); 372EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time);
386 373
387int dccp_insert_option_timestamp(struct sk_buff *skb) 374static int dccp_insert_option_timestamp(struct sk_buff *skb)
388{ 375{
389 __be32 now = htonl(dccp_timestamp()); 376 __be32 now = htonl(dccp_timestamp());
390 /* yes this will overflow but that is the point as we want a 377 /* yes this will overflow but that is the point as we want a
@@ -393,8 +380,6 @@ int dccp_insert_option_timestamp(struct sk_buff *skb)
393 return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now)); 380 return dccp_insert_option(skb, DCCPO_TIMESTAMP, &now, sizeof(now));
394} 381}
395 382
396EXPORT_SYMBOL_GPL(dccp_insert_option_timestamp);
397
398static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp, 383static int dccp_insert_option_timestamp_echo(struct dccp_sock *dp,
399 struct dccp_request_sock *dreq, 384 struct dccp_request_sock *dreq,
400 struct sk_buff *skb) 385 struct sk_buff *skb)
diff --git a/net/dccp/output.c b/net/dccp/output.c
index aadbdb58758b..a988fe9ffcba 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -304,7 +304,7 @@ void dccp_write_xmit(struct sock *sk, int block)
304 dcb->dccpd_type = DCCP_PKT_DATA; 304 dcb->dccpd_type = DCCP_PKT_DATA;
305 305
306 err = dccp_transmit_skb(sk, skb); 306 err = dccp_transmit_skb(sk, skb);
307 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); 307 ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, len);
308 if (err) 308 if (err)
309 DCCP_BUG("err=%d after ccid_hc_tx_packet_sent", 309 DCCP_BUG("err=%d after ccid_hc_tx_packet_sent",
310 err); 310 err);
@@ -474,8 +474,9 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code)
474/* 474/*
475 * Do all connect socket setups that can be done AF independent. 475 * Do all connect socket setups that can be done AF independent.
476 */ 476 */
477static inline void dccp_connect_init(struct sock *sk) 477int dccp_connect(struct sock *sk)
478{ 478{
479 struct sk_buff *skb;
479 struct dccp_sock *dp = dccp_sk(sk); 480 struct dccp_sock *dp = dccp_sk(sk);
480 struct dst_entry *dst = __sk_dst_get(sk); 481 struct dst_entry *dst = __sk_dst_get(sk);
481 struct inet_connection_sock *icsk = inet_csk(sk); 482 struct inet_connection_sock *icsk = inet_csk(sk);
@@ -485,22 +486,12 @@ static inline void dccp_connect_init(struct sock *sk)
485 486
486 dccp_sync_mss(sk, dst_mtu(dst)); 487 dccp_sync_mss(sk, dst_mtu(dst));
487 488
488 /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
489 dp->dccps_gar = dp->dccps_iss;
490
491 icsk->icsk_retransmits = 0;
492}
493
494int dccp_connect(struct sock *sk)
495{
496 struct sk_buff *skb;
497 struct inet_connection_sock *icsk = inet_csk(sk);
498
499 /* do not connect if feature negotiation setup fails */ 489 /* do not connect if feature negotiation setup fails */
500 if (dccp_feat_finalise_settings(dccp_sk(sk))) 490 if (dccp_feat_finalise_settings(dccp_sk(sk)))
501 return -EPROTO; 491 return -EPROTO;
502 492
503 dccp_connect_init(sk); 493 /* Initialise GAR as per 8.5; AWL/AWH are set in dccp_transmit_skb() */
494 dp->dccps_gar = dp->dccps_iss;
504 495
505 skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation); 496 skb = alloc_skb(sk->sk_prot->max_header, sk->sk_allocation);
506 if (unlikely(skb == NULL)) 497 if (unlikely(skb == NULL))
@@ -516,6 +507,7 @@ int dccp_connect(struct sock *sk)
516 DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS); 507 DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
517 508
518 /* Timer for repeating the REQUEST until an answer. */ 509 /* Timer for repeating the REQUEST until an answer. */
510 icsk->icsk_retransmits = 0;
519 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 511 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
520 icsk->icsk_rto, DCCP_RTO_MAX); 512 icsk->icsk_rto, DCCP_RTO_MAX);
521 return 0; 513 return 0;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 096250d1323b..7e5fc04eb6d1 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -50,6 +50,30 @@ EXPORT_SYMBOL_GPL(dccp_hashinfo);
50/* the maximum queue length for tx in packets. 0 is no limit */ 50/* the maximum queue length for tx in packets. 0 is no limit */
51int sysctl_dccp_tx_qlen __read_mostly = 5; 51int sysctl_dccp_tx_qlen __read_mostly = 5;
52 52
53#ifdef CONFIG_IP_DCCP_DEBUG
54static const char *dccp_state_name(const int state)
55{
56 static const char *const dccp_state_names[] = {
57 [DCCP_OPEN] = "OPEN",
58 [DCCP_REQUESTING] = "REQUESTING",
59 [DCCP_PARTOPEN] = "PARTOPEN",
60 [DCCP_LISTEN] = "LISTEN",
61 [DCCP_RESPOND] = "RESPOND",
62 [DCCP_CLOSING] = "CLOSING",
63 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
64 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
65 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
66 [DCCP_TIME_WAIT] = "TIME_WAIT",
67 [DCCP_CLOSED] = "CLOSED",
68 };
69
70 if (state >= DCCP_MAX_STATES)
71 return "INVALID STATE!";
72 else
73 return dccp_state_names[state];
74}
75#endif
76
53void dccp_set_state(struct sock *sk, const int state) 77void dccp_set_state(struct sock *sk, const int state)
54{ 78{
55 const int oldstate = sk->sk_state; 79 const int oldstate = sk->sk_state;
@@ -146,30 +170,6 @@ const char *dccp_packet_name(const int type)
146 170
147EXPORT_SYMBOL_GPL(dccp_packet_name); 171EXPORT_SYMBOL_GPL(dccp_packet_name);
148 172
149const char *dccp_state_name(const int state)
150{
151 static const char *const dccp_state_names[] = {
152 [DCCP_OPEN] = "OPEN",
153 [DCCP_REQUESTING] = "REQUESTING",
154 [DCCP_PARTOPEN] = "PARTOPEN",
155 [DCCP_LISTEN] = "LISTEN",
156 [DCCP_RESPOND] = "RESPOND",
157 [DCCP_CLOSING] = "CLOSING",
158 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
159 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
160 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
161 [DCCP_TIME_WAIT] = "TIME_WAIT",
162 [DCCP_CLOSED] = "CLOSED",
163 };
164
165 if (state >= DCCP_MAX_STATES)
166 return "INVALID STATE!";
167 else
168 return dccp_state_names[state];
169}
170
171EXPORT_SYMBOL_GPL(dccp_state_name);
172
173int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) 173int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
174{ 174{
175 struct dccp_sock *dp = dccp_sk(sk); 175 struct dccp_sock *dp = dccp_sk(sk);
@@ -944,7 +944,7 @@ void dccp_close(struct sock *sk, long timeout)
944 944
945 if (data_was_unread) { 945 if (data_was_unread) {
946 /* Unread data was tossed, send an appropriate Reset Code */ 946 /* Unread data was tossed, send an appropriate Reset Code */
947 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread); 947 DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
948 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); 948 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
949 dccp_set_state(sk, DCCP_CLOSED); 949 dccp_set_state(sk, DCCP_CLOSED);
950 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { 950 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 0363bb95cc7d..a085dbcf5c7f 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -48,7 +48,6 @@
48#include <net/dn_neigh.h> 48#include <net/dn_neigh.h>
49#include <net/dn_route.h> 49#include <net/dn_route.h>
50 50
51static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev);
52static int dn_neigh_construct(struct neighbour *); 51static int dn_neigh_construct(struct neighbour *);
53static void dn_long_error_report(struct neighbour *, struct sk_buff *); 52static void dn_long_error_report(struct neighbour *, struct sk_buff *);
54static void dn_short_error_report(struct neighbour *, struct sk_buff *); 53static void dn_short_error_report(struct neighbour *, struct sk_buff *);
@@ -93,6 +92,13 @@ static const struct neigh_ops dn_phase3_ops = {
93 .queue_xmit = dev_queue_xmit 92 .queue_xmit = dev_queue_xmit
94}; 93};
95 94
95static u32 dn_neigh_hash(const void *pkey,
96 const struct net_device *dev,
97 __u32 hash_rnd)
98{
99 return jhash_2words(*(__u16 *)pkey, 0, hash_rnd);
100}
101
96struct neigh_table dn_neigh_table = { 102struct neigh_table dn_neigh_table = {
97 .family = PF_DECnet, 103 .family = PF_DECnet,
98 .entry_size = sizeof(struct dn_neigh), 104 .entry_size = sizeof(struct dn_neigh),
@@ -122,11 +128,6 @@ struct neigh_table dn_neigh_table = {
122 .gc_thresh3 = 1024, 128 .gc_thresh3 = 1024,
123}; 129};
124 130
125static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev)
126{
127 return jhash_2words(*(__u16 *)pkey, 0, dn_neigh_table.hash_rnd);
128}
129
130static int dn_neigh_construct(struct neighbour *neigh) 131static int dn_neigh_construct(struct neighbour *neigh)
131{ 132{
132 struct net_device *dev = neigh->dev; 133 struct net_device *dev = neigh->dev;
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index baeb1eaf011b..2ef115277bea 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -693,22 +693,22 @@ void dn_nsp_send_conninit(struct sock *sk, unsigned char msgflg)
693 aux = scp->accessdata.acc_userl; 693 aux = scp->accessdata.acc_userl;
694 *skb_put(skb, 1) = aux; 694 *skb_put(skb, 1) = aux;
695 if (aux > 0) 695 if (aux > 0)
696 memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux); 696 memcpy(skb_put(skb, aux), scp->accessdata.acc_user, aux);
697 697
698 aux = scp->accessdata.acc_passl; 698 aux = scp->accessdata.acc_passl;
699 *skb_put(skb, 1) = aux; 699 *skb_put(skb, 1) = aux;
700 if (aux > 0) 700 if (aux > 0)
701 memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux); 701 memcpy(skb_put(skb, aux), scp->accessdata.acc_pass, aux);
702 702
703 aux = scp->accessdata.acc_accl; 703 aux = scp->accessdata.acc_accl;
704 *skb_put(skb, 1) = aux; 704 *skb_put(skb, 1) = aux;
705 if (aux > 0) 705 if (aux > 0)
706 memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux); 706 memcpy(skb_put(skb, aux), scp->accessdata.acc_acc, aux);
707 707
708 aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl); 708 aux = (__u8)le16_to_cpu(scp->conndata_out.opt_optl);
709 *skb_put(skb, 1) = aux; 709 *skb_put(skb, 1) = aux;
710 if (aux > 0) 710 if (aux > 0)
711 memcpy(skb_put(skb,aux), scp->conndata_out.opt_data, aux); 711 memcpy(skb_put(skb, aux), scp->conndata_out.opt_data, aux);
712 712
713 scp->persist = dn_nsp_persist(sk); 713 scp->persist = dn_nsp_persist(sk);
714 scp->persist_fxn = dn_nsp_retrans_conninit; 714 scp->persist_fxn = dn_nsp_retrans_conninit;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 6585ea6d1182..df0f3e54ff8a 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -132,7 +132,6 @@ static struct dst_ops dn_dst_ops = {
132 .negative_advice = dn_dst_negative_advice, 132 .negative_advice = dn_dst_negative_advice,
133 .link_failure = dn_dst_link_failure, 133 .link_failure = dn_dst_link_failure,
134 .update_pmtu = dn_dst_update_pmtu, 134 .update_pmtu = dn_dst_update_pmtu,
135 .entries = ATOMIC_INIT(0),
136}; 135};
137 136
138static __inline__ unsigned dn_hash(__le16 src, __le16 dst) 137static __inline__ unsigned dn_hash(__le16 src, __le16 dst)
@@ -1758,6 +1757,7 @@ void __init dn_route_init(void)
1758 dn_dst_ops.kmem_cachep = 1757 dn_dst_ops.kmem_cachep =
1759 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, 1758 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0,
1760 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); 1759 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
1760 dst_entries_init(&dn_dst_ops);
1761 setup_timer(&dn_route_timer, dn_dst_check_expire, 0); 1761 setup_timer(&dn_route_timer, dn_dst_check_expire, 0);
1762 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; 1762 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ;
1763 add_timer(&dn_route_timer); 1763 add_timer(&dn_route_timer);
@@ -1816,5 +1816,6 @@ void __exit dn_route_cleanup(void)
1816 dn_run_flush(0); 1816 dn_run_flush(0);
1817 1817
1818 proc_net_remove(&init_net, "decnet_cache"); 1818 proc_net_remove(&init_net, "decnet_cache");
1819 dst_entries_destroy(&dn_dst_ops);
1819} 1820}
1820 1821
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index dc54bd0d083b..f8c1ae4b41f0 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -392,7 +392,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock,
392 dev_queue_xmit(skb); 392 dev_queue_xmit(skb);
393 dev_put(dev); 393 dev_put(dev);
394 mutex_unlock(&econet_mutex); 394 mutex_unlock(&econet_mutex);
395 return(len); 395 return len;
396 396
397 out_free: 397 out_free:
398 kfree_skb(skb); 398 kfree_skb(skb);
@@ -637,7 +637,7 @@ static int econet_create(struct net *net, struct socket *sock, int protocol,
637 eo->num = protocol; 637 eo->num = protocol;
638 638
639 econet_insert_socket(&econet_sklist, sk); 639 econet_insert_socket(&econet_sklist, sk);
640 return(0); 640 return 0;
641out: 641out:
642 return err; 642 return err;
643} 643}
@@ -1009,7 +1009,6 @@ static int __init aun_udp_initialise(void)
1009 struct sockaddr_in sin; 1009 struct sockaddr_in sin;
1010 1010
1011 skb_queue_head_init(&aun_queue); 1011 skb_queue_head_init(&aun_queue);
1012 spin_lock_init(&aun_queue_lock);
1013 setup_timer(&ab_cleanup_timer, ab_cleanup, 0); 1012 setup_timer(&ab_cleanup_timer, ab_cleanup, 0);
1014 ab_cleanup_timer.expires = jiffies + (HZ*2); 1013 ab_cleanup_timer.expires = jiffies + (HZ*2);
1015 add_timer(&ab_cleanup_timer); 1014 add_timer(&ab_cleanup_timer);
@@ -1167,7 +1166,6 @@ static int __init econet_proto_init(void)
1167 goto out; 1166 goto out;
1168 sock_register(&econet_family_ops); 1167 sock_register(&econet_family_ops);
1169#ifdef CONFIG_ECONET_AUNUDP 1168#ifdef CONFIG_ECONET_AUNUDP
1170 spin_lock_init(&aun_queue_lock);
1171 aun_udp_initialise(); 1169 aun_udp_initialise();
1172#endif 1170#endif
1173#ifdef CONFIG_ECONET_NATIVE 1171#ifdef CONFIG_ECONET_NATIVE
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 215c83986a9d..f00ef2f1d814 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -367,7 +367,7 @@ struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
367EXPORT_SYMBOL(alloc_etherdev_mq); 367EXPORT_SYMBOL(alloc_etherdev_mq);
368 368
369static size_t _format_mac_addr(char *buf, int buflen, 369static size_t _format_mac_addr(char *buf, int buflen,
370 const unsigned char *addr, int len) 370 const unsigned char *addr, int len)
371{ 371{
372 int i; 372 int i;
373 char *cp = buf; 373 char *cp = buf;
@@ -376,7 +376,7 @@ static size_t _format_mac_addr(char *buf, int buflen,
376 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]); 376 cp += scnprintf(cp, buflen - (cp - buf), "%02x", addr[i]);
377 if (i == len - 1) 377 if (i == len - 1)
378 break; 378 break;
379 cp += strlcpy(cp, ":", buflen - (cp - buf)); 379 cp += scnprintf(cp, buflen - (cp - buf), ":");
380 } 380 }
381 return cp - buf; 381 return cp - buf;
382} 382}
@@ -386,7 +386,7 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
386 size_t l; 386 size_t l;
387 387
388 l = _format_mac_addr(buf, PAGE_SIZE, addr, len); 388 l = _format_mac_addr(buf, PAGE_SIZE, addr, len);
389 l += strlcpy(buf + l, "\n", PAGE_SIZE - l); 389 l += scnprintf(buf + l, PAGE_SIZE - l, "\n");
390 return ((ssize_t) l); 390 return (ssize_t)l;
391} 391}
392EXPORT_SYMBOL(sysfs_format_mac); 392EXPORT_SYMBOL(sysfs_format_mac);
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 7cd7760144f7..e848e6c062cd 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -215,9 +215,15 @@ config NET_IPIP
215 be inserted in and removed from the running kernel whenever you 215 be inserted in and removed from the running kernel whenever you
216 want). Most people won't need this and can say N. 216 want). Most people won't need this and can say N.
217 217
218config NET_IPGRE_DEMUX
219 tristate "IP: GRE demultiplexer"
220 help
221 This is helper module to demultiplex GRE packets on GRE version field criteria.
222 Required by ip_gre and pptp modules.
223
218config NET_IPGRE 224config NET_IPGRE
219 tristate "IP: GRE tunnels over IP" 225 tristate "IP: GRE tunnels over IP"
220 depends on IPV6 || IPV6=n 226 depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX
221 help 227 help
222 Tunneling means encapsulating data of one protocol type within 228 Tunneling means encapsulating data of one protocol type within
223 another protocol and sending it over a channel that understands the 229 another protocol and sending it over a channel that understands the
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index 80ff87ce43aa..4978d22f9a75 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_PROC_FS) += proc.o
20obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o 20obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
21obj-$(CONFIG_IP_MROUTE) += ipmr.o 21obj-$(CONFIG_IP_MROUTE) += ipmr.o
22obj-$(CONFIG_NET_IPIP) += ipip.o 22obj-$(CONFIG_NET_IPIP) += ipip.o
23obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
23obj-$(CONFIG_NET_IPGRE) += ip_gre.o 24obj-$(CONFIG_NET_IPGRE) += ip_gre.o
24obj-$(CONFIG_SYN_COOKIES) += syncookies.o 25obj-$(CONFIG_SYN_COOKIES) += syncookies.o
25obj-$(CONFIG_INET_AH) += ah4.o 26obj-$(CONFIG_INET_AH) += ah4.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6a1100c25a9f..f581f77d1097 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -227,18 +227,16 @@ EXPORT_SYMBOL(inet_ehash_secret);
227 227
228/* 228/*
229 * inet_ehash_secret must be set exactly once 229 * inet_ehash_secret must be set exactly once
230 * Instead of using a dedicated spinlock, we (ab)use inetsw_lock
231 */ 230 */
232void build_ehash_secret(void) 231void build_ehash_secret(void)
233{ 232{
234 u32 rnd; 233 u32 rnd;
234
235 do { 235 do {
236 get_random_bytes(&rnd, sizeof(rnd)); 236 get_random_bytes(&rnd, sizeof(rnd));
237 } while (rnd == 0); 237 } while (rnd == 0);
238 spin_lock_bh(&inetsw_lock); 238
239 if (!inet_ehash_secret) 239 cmpxchg(&inet_ehash_secret, 0, rnd);
240 inet_ehash_secret = rnd;
241 spin_unlock_bh(&inetsw_lock);
242} 240}
243EXPORT_SYMBOL(build_ehash_secret); 241EXPORT_SYMBOL(build_ehash_secret);
244 242
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 96c1955b3e2f..d8e540c5b071 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -55,7 +55,7 @@
55 * Stuart Cheshire : Metricom and grat arp fixes 55 * Stuart Cheshire : Metricom and grat arp fixes
56 * *** FOR 2.1 clean this up *** 56 * *** FOR 2.1 clean this up ***
57 * Lawrence V. Stefani: (08/12/96) Added FDDI support. 57 * Lawrence V. Stefani: (08/12/96) Added FDDI support.
58 * Alan Cox : Took the AP1000 nasty FDDI hack and 58 * Alan Cox : Took the AP1000 nasty FDDI hack and
59 * folded into the mainstream FDDI code. 59 * folded into the mainstream FDDI code.
60 * Ack spit, Linus how did you allow that 60 * Ack spit, Linus how did you allow that
61 * one in... 61 * one in...
@@ -120,14 +120,14 @@ EXPORT_SYMBOL(clip_tbl_hook);
120#endif 120#endif
121 121
122#include <asm/system.h> 122#include <asm/system.h>
123#include <asm/uaccess.h> 123#include <linux/uaccess.h>
124 124
125#include <linux/netfilter_arp.h> 125#include <linux/netfilter_arp.h>
126 126
127/* 127/*
128 * Interface to generic neighbour cache. 128 * Interface to generic neighbour cache.
129 */ 129 */
130static u32 arp_hash(const void *pkey, const struct net_device *dev); 130static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 rnd);
131static int arp_constructor(struct neighbour *neigh); 131static int arp_constructor(struct neighbour *neigh);
132static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); 132static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb);
133static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); 133static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -161,7 +161,7 @@ static const struct neigh_ops arp_direct_ops = {
161 .queue_xmit = dev_queue_xmit, 161 .queue_xmit = dev_queue_xmit,
162}; 162};
163 163
164const struct neigh_ops arp_broken_ops = { 164static const struct neigh_ops arp_broken_ops = {
165 .family = AF_INET, 165 .family = AF_INET,
166 .solicit = arp_solicit, 166 .solicit = arp_solicit,
167 .error_report = arp_error_report, 167 .error_report = arp_error_report,
@@ -170,35 +170,34 @@ const struct neigh_ops arp_broken_ops = {
170 .hh_output = dev_queue_xmit, 170 .hh_output = dev_queue_xmit,
171 .queue_xmit = dev_queue_xmit, 171 .queue_xmit = dev_queue_xmit,
172}; 172};
173EXPORT_SYMBOL(arp_broken_ops);
174 173
175struct neigh_table arp_tbl = { 174struct neigh_table arp_tbl = {
176 .family = AF_INET, 175 .family = AF_INET,
177 .entry_size = sizeof(struct neighbour) + 4, 176 .entry_size = sizeof(struct neighbour) + 4,
178 .key_len = 4, 177 .key_len = 4,
179 .hash = arp_hash, 178 .hash = arp_hash,
180 .constructor = arp_constructor, 179 .constructor = arp_constructor,
181 .proxy_redo = parp_redo, 180 .proxy_redo = parp_redo,
182 .id = "arp_cache", 181 .id = "arp_cache",
183 .parms = { 182 .parms = {
184 .tbl = &arp_tbl, 183 .tbl = &arp_tbl,
185 .base_reachable_time = 30 * HZ, 184 .base_reachable_time = 30 * HZ,
186 .retrans_time = 1 * HZ, 185 .retrans_time = 1 * HZ,
187 .gc_staletime = 60 * HZ, 186 .gc_staletime = 60 * HZ,
188 .reachable_time = 30 * HZ, 187 .reachable_time = 30 * HZ,
189 .delay_probe_time = 5 * HZ, 188 .delay_probe_time = 5 * HZ,
190 .queue_len = 3, 189 .queue_len = 3,
191 .ucast_probes = 3, 190 .ucast_probes = 3,
192 .mcast_probes = 3, 191 .mcast_probes = 3,
193 .anycast_delay = 1 * HZ, 192 .anycast_delay = 1 * HZ,
194 .proxy_delay = (8 * HZ) / 10, 193 .proxy_delay = (8 * HZ) / 10,
195 .proxy_qlen = 64, 194 .proxy_qlen = 64,
196 .locktime = 1 * HZ, 195 .locktime = 1 * HZ,
197 }, 196 },
198 .gc_interval = 30 * HZ, 197 .gc_interval = 30 * HZ,
199 .gc_thresh1 = 128, 198 .gc_thresh1 = 128,
200 .gc_thresh2 = 512, 199 .gc_thresh2 = 512,
201 .gc_thresh3 = 1024, 200 .gc_thresh3 = 1024,
202}; 201};
203EXPORT_SYMBOL(arp_tbl); 202EXPORT_SYMBOL(arp_tbl);
204 203
@@ -226,14 +225,16 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir)
226} 225}
227 226
228 227
229static u32 arp_hash(const void *pkey, const struct net_device *dev) 228static u32 arp_hash(const void *pkey,
229 const struct net_device *dev,
230 __u32 hash_rnd)
230{ 231{
231 return jhash_2words(*(u32 *)pkey, dev->ifindex, arp_tbl.hash_rnd); 232 return jhash_2words(*(u32 *)pkey, dev->ifindex, hash_rnd);
232} 233}
233 234
234static int arp_constructor(struct neighbour *neigh) 235static int arp_constructor(struct neighbour *neigh)
235{ 236{
236 __be32 addr = *(__be32*)neigh->primary_key; 237 __be32 addr = *(__be32 *)neigh->primary_key;
237 struct net_device *dev = neigh->dev; 238 struct net_device *dev = neigh->dev;
238 struct in_device *in_dev; 239 struct in_device *in_dev;
239 struct neigh_parms *parms; 240 struct neigh_parms *parms;
@@ -296,16 +297,19 @@ static int arp_constructor(struct neighbour *neigh)
296 neigh->ops = &arp_broken_ops; 297 neigh->ops = &arp_broken_ops;
297 neigh->output = neigh->ops->output; 298 neigh->output = neigh->ops->output;
298 return 0; 299 return 0;
300#else
301 break;
299#endif 302#endif
300 ;} 303 }
301#endif 304#endif
302 if (neigh->type == RTN_MULTICAST) { 305 if (neigh->type == RTN_MULTICAST) {
303 neigh->nud_state = NUD_NOARP; 306 neigh->nud_state = NUD_NOARP;
304 arp_mc_map(addr, neigh->ha, dev, 1); 307 arp_mc_map(addr, neigh->ha, dev, 1);
305 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) { 308 } else if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) {
306 neigh->nud_state = NUD_NOARP; 309 neigh->nud_state = NUD_NOARP;
307 memcpy(neigh->ha, dev->dev_addr, dev->addr_len); 310 memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
308 } else if (neigh->type == RTN_BROADCAST || dev->flags&IFF_POINTOPOINT) { 311 } else if (neigh->type == RTN_BROADCAST ||
312 (dev->flags & IFF_POINTOPOINT)) {
309 neigh->nud_state = NUD_NOARP; 313 neigh->nud_state = NUD_NOARP;
310 memcpy(neigh->ha, dev->broadcast, dev->addr_len); 314 memcpy(neigh->ha, dev->broadcast, dev->addr_len);
311 } 315 }
@@ -315,7 +319,7 @@ static int arp_constructor(struct neighbour *neigh)
315 else 319 else
316 neigh->ops = &arp_generic_ops; 320 neigh->ops = &arp_generic_ops;
317 321
318 if (neigh->nud_state&NUD_VALID) 322 if (neigh->nud_state & NUD_VALID)
319 neigh->output = neigh->ops->connected_output; 323 neigh->output = neigh->ops->connected_output;
320 else 324 else
321 neigh->output = neigh->ops->output; 325 neigh->output = neigh->ops->output;
@@ -334,7 +338,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
334 __be32 saddr = 0; 338 __be32 saddr = 0;
335 u8 *dst_ha = NULL; 339 u8 *dst_ha = NULL;
336 struct net_device *dev = neigh->dev; 340 struct net_device *dev = neigh->dev;
337 __be32 target = *(__be32*)neigh->primary_key; 341 __be32 target = *(__be32 *)neigh->primary_key;
338 int probes = atomic_read(&neigh->probes); 342 int probes = atomic_read(&neigh->probes);
339 struct in_device *in_dev; 343 struct in_device *in_dev;
340 344
@@ -347,7 +351,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
347 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) { 351 switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
348 default: 352 default:
349 case 0: /* By default announce any local IP */ 353 case 0: /* By default announce any local IP */
350 if (skb && inet_addr_type(dev_net(dev), ip_hdr(skb)->saddr) == RTN_LOCAL) 354 if (skb && inet_addr_type(dev_net(dev),
355 ip_hdr(skb)->saddr) == RTN_LOCAL)
351 saddr = ip_hdr(skb)->saddr; 356 saddr = ip_hdr(skb)->saddr;
352 break; 357 break;
353 case 1: /* Restrict announcements of saddr in same subnet */ 358 case 1: /* Restrict announcements of saddr in same subnet */
@@ -369,16 +374,21 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
369 if (!saddr) 374 if (!saddr)
370 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); 375 saddr = inet_select_addr(dev, target, RT_SCOPE_LINK);
371 376
372 if ((probes -= neigh->parms->ucast_probes) < 0) { 377 probes -= neigh->parms->ucast_probes;
373 if (!(neigh->nud_state&NUD_VALID)) 378 if (probes < 0) {
374 printk(KERN_DEBUG "trying to ucast probe in NUD_INVALID\n"); 379 if (!(neigh->nud_state & NUD_VALID))
380 printk(KERN_DEBUG
381 "trying to ucast probe in NUD_INVALID\n");
375 dst_ha = neigh->ha; 382 dst_ha = neigh->ha;
376 read_lock_bh(&neigh->lock); 383 read_lock_bh(&neigh->lock);
377 } else if ((probes -= neigh->parms->app_probes) < 0) { 384 } else {
385 probes -= neigh->parms->app_probes;
386 if (probes < 0) {
378#ifdef CONFIG_ARPD 387#ifdef CONFIG_ARPD
379 neigh_app_ns(neigh); 388 neigh_app_ns(neigh);
380#endif 389#endif
381 return; 390 return;
391 }
382 } 392 }
383 393
384 arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr, 394 arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
@@ -451,7 +461,8 @@ static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev)
451 * is allowed to use this function, it is scheduled to be removed. --ANK 461 * is allowed to use this function, it is scheduled to be removed. --ANK
452 */ 462 */
453 463
454static int arp_set_predefined(int addr_hint, unsigned char * haddr, __be32 paddr, struct net_device * dev) 464static int arp_set_predefined(int addr_hint, unsigned char *haddr,
465 __be32 paddr, struct net_device *dev)
455{ 466{
456 switch (addr_hint) { 467 switch (addr_hint) {
457 case RTN_LOCAL: 468 case RTN_LOCAL:
@@ -483,17 +494,16 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb)
483 494
484 paddr = skb_rtable(skb)->rt_gateway; 495 paddr = skb_rtable(skb)->rt_gateway;
485 496
486 if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr, paddr, dev)) 497 if (arp_set_predefined(inet_addr_type(dev_net(dev), paddr), haddr,
498 paddr, dev))
487 return 0; 499 return 0;
488 500
489 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1); 501 n = __neigh_lookup(&arp_tbl, &paddr, dev, 1);
490 502
491 if (n) { 503 if (n) {
492 n->used = jiffies; 504 n->used = jiffies;
493 if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) { 505 if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) {
494 read_lock_bh(&n->lock); 506 neigh_ha_snapshot(haddr, n, dev);
495 memcpy(haddr, n->ha, dev->addr_len);
496 read_unlock_bh(&n->lock);
497 neigh_release(n); 507 neigh_release(n);
498 return 0; 508 return 0;
499 } 509 }
@@ -515,13 +525,14 @@ int arp_bind_neighbour(struct dst_entry *dst)
515 return -EINVAL; 525 return -EINVAL;
516 if (n == NULL) { 526 if (n == NULL) {
517 __be32 nexthop = ((struct rtable *)dst)->rt_gateway; 527 __be32 nexthop = ((struct rtable *)dst)->rt_gateway;
518 if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT)) 528 if (dev->flags & (IFF_LOOPBACK | IFF_POINTOPOINT))
519 nexthop = 0; 529 nexthop = 0;
520 n = __neigh_lookup_errno( 530 n = __neigh_lookup_errno(
521#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE) 531#if defined(CONFIG_ATM_CLIP) || defined(CONFIG_ATM_CLIP_MODULE)
522 dev->type == ARPHRD_ATM ? clip_tbl_hook : 532 dev->type == ARPHRD_ATM ?
533 clip_tbl_hook :
523#endif 534#endif
524 &arp_tbl, &nexthop, dev); 535 &arp_tbl, &nexthop, dev);
525 if (IS_ERR(n)) 536 if (IS_ERR(n))
526 return PTR_ERR(n); 537 return PTR_ERR(n);
527 dst->neighbour = n; 538 dst->neighbour = n;
@@ -543,8 +554,8 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
543 554
544 if (!IN_DEV_PROXY_ARP(in_dev)) 555 if (!IN_DEV_PROXY_ARP(in_dev))
545 return 0; 556 return 0;
546 557 imi = IN_DEV_MEDIUM_ID(in_dev);
547 if ((imi = IN_DEV_MEDIUM_ID(in_dev)) == 0) 558 if (imi == 0)
548 return 1; 559 return 1;
549 if (imi == -1) 560 if (imi == -1)
550 return 0; 561 return 0;
@@ -555,7 +566,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev,
555 if (out_dev) 566 if (out_dev)
556 omi = IN_DEV_MEDIUM_ID(out_dev); 567 omi = IN_DEV_MEDIUM_ID(out_dev);
557 568
558 return (omi != imi && omi != -1); 569 return omi != imi && omi != -1;
559} 570}
560 571
561/* 572/*
@@ -685,7 +696,7 @@ struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip,
685 arp->ar_pln = 4; 696 arp->ar_pln = 4;
686 arp->ar_op = htons(type); 697 arp->ar_op = htons(type);
687 698
688 arp_ptr=(unsigned char *)(arp+1); 699 arp_ptr = (unsigned char *)(arp + 1);
689 700
690 memcpy(arp_ptr, src_hw, dev->addr_len); 701 memcpy(arp_ptr, src_hw, dev->addr_len);
691 arp_ptr += dev->addr_len; 702 arp_ptr += dev->addr_len;
@@ -735,9 +746,8 @@ void arp_send(int type, int ptype, __be32 dest_ip,
735 746
736 skb = arp_create(type, ptype, dest_ip, dev, src_ip, 747 skb = arp_create(type, ptype, dest_ip, dev, src_ip,
737 dest_hw, src_hw, target_hw); 748 dest_hw, src_hw, target_hw);
738 if (skb == NULL) { 749 if (skb == NULL)
739 return; 750 return;
740 }
741 751
742 arp_xmit(skb); 752 arp_xmit(skb);
743} 753}
@@ -815,7 +825,7 @@ static int arp_process(struct sk_buff *skb)
815/* 825/*
816 * Extract fields 826 * Extract fields
817 */ 827 */
818 arp_ptr= (unsigned char *)(arp+1); 828 arp_ptr = (unsigned char *)(arp + 1);
819 sha = arp_ptr; 829 sha = arp_ptr;
820 arp_ptr += dev->addr_len; 830 arp_ptr += dev->addr_len;
821 memcpy(&sip, arp_ptr, 4); 831 memcpy(&sip, arp_ptr, 4);
@@ -869,16 +879,17 @@ static int arp_process(struct sk_buff *skb)
869 addr_type = rt->rt_type; 879 addr_type = rt->rt_type;
870 880
871 if (addr_type == RTN_LOCAL) { 881 if (addr_type == RTN_LOCAL) {
872 int dont_send = 0; 882 int dont_send;
873 883
874 if (!dont_send) 884 dont_send = arp_ignore(in_dev, sip, tip);
875 dont_send |= arp_ignore(in_dev,sip,tip);
876 if (!dont_send && IN_DEV_ARPFILTER(in_dev)) 885 if (!dont_send && IN_DEV_ARPFILTER(in_dev))
877 dont_send |= arp_filter(sip,tip,dev); 886 dont_send |= arp_filter(sip, tip, dev);
878 if (!dont_send) { 887 if (!dont_send) {
879 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 888 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
880 if (n) { 889 if (n) {
881 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 890 arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
891 dev, tip, sha, dev->dev_addr,
892 sha);
882 neigh_release(n); 893 neigh_release(n);
883 } 894 }
884 } 895 }
@@ -887,8 +898,7 @@ static int arp_process(struct sk_buff *skb)
887 if (addr_type == RTN_UNICAST && 898 if (addr_type == RTN_UNICAST &&
888 (arp_fwd_proxy(in_dev, dev, rt) || 899 (arp_fwd_proxy(in_dev, dev, rt) ||
889 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) || 900 arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
890 pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) 901 pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
891 {
892 n = neigh_event_ns(&arp_tbl, sha, &sip, dev); 902 n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
893 if (n) 903 if (n)
894 neigh_release(n); 904 neigh_release(n);
@@ -896,9 +906,12 @@ static int arp_process(struct sk_buff *skb)
896 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED || 906 if (NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED ||
897 skb->pkt_type == PACKET_HOST || 907 skb->pkt_type == PACKET_HOST ||
898 in_dev->arp_parms->proxy_delay == 0) { 908 in_dev->arp_parms->proxy_delay == 0) {
899 arp_send(ARPOP_REPLY,ETH_P_ARP,sip,dev,tip,sha,dev->dev_addr,sha); 909 arp_send(ARPOP_REPLY, ETH_P_ARP, sip,
910 dev, tip, sha, dev->dev_addr,
911 sha);
900 } else { 912 } else {
901 pneigh_enqueue(&arp_tbl, in_dev->arp_parms, skb); 913 pneigh_enqueue(&arp_tbl,
914 in_dev->arp_parms, skb);
902 return 0; 915 return 0;
903 } 916 }
904 goto out; 917 goto out;
@@ -939,7 +952,8 @@ static int arp_process(struct sk_buff *skb)
939 if (arp->ar_op != htons(ARPOP_REPLY) || 952 if (arp->ar_op != htons(ARPOP_REPLY) ||
940 skb->pkt_type != PACKET_HOST) 953 skb->pkt_type != PACKET_HOST)
941 state = NUD_STALE; 954 state = NUD_STALE;
942 neigh_update(n, sha, state, override ? NEIGH_UPDATE_F_OVERRIDE : 0); 955 neigh_update(n, sha, state,
956 override ? NEIGH_UPDATE_F_OVERRIDE : 0);
943 neigh_release(n); 957 neigh_release(n);
944 } 958 }
945 959
@@ -975,7 +989,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
975 arp->ar_pln != 4) 989 arp->ar_pln != 4)
976 goto freeskb; 990 goto freeskb;
977 991
978 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 992 skb = skb_share_check(skb, GFP_ATOMIC);
993 if (skb == NULL)
979 goto out_of_mem; 994 goto out_of_mem;
980 995
981 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); 996 memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
@@ -1019,7 +1034,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
1019 return -EINVAL; 1034 return -EINVAL;
1020 if (!dev && (r->arp_flags & ATF_COM)) { 1035 if (!dev && (r->arp_flags & ATF_COM)) {
1021 dev = dev_getbyhwaddr(net, r->arp_ha.sa_family, 1036 dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
1022 r->arp_ha.sa_data); 1037 r->arp_ha.sa_data);
1023 if (!dev) 1038 if (!dev)
1024 return -ENODEV; 1039 return -ENODEV;
1025 } 1040 }
@@ -1033,7 +1048,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
1033} 1048}
1034 1049
1035static int arp_req_set(struct net *net, struct arpreq *r, 1050static int arp_req_set(struct net *net, struct arpreq *r,
1036 struct net_device * dev) 1051 struct net_device *dev)
1037{ 1052{
1038 __be32 ip; 1053 __be32 ip;
1039 struct neighbour *neigh; 1054 struct neighbour *neigh;
@@ -1046,10 +1061,11 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1046 if (r->arp_flags & ATF_PERM) 1061 if (r->arp_flags & ATF_PERM)
1047 r->arp_flags |= ATF_COM; 1062 r->arp_flags |= ATF_COM;
1048 if (dev == NULL) { 1063 if (dev == NULL) {
1049 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1064 struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
1050 .tos = RTO_ONLINK } } }; 1065 .tos = RTO_ONLINK } };
1051 struct rtable * rt; 1066 struct rtable *rt;
1052 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1067 err = ip_route_output_key(net, &rt, &fl);
1068 if (err != 0)
1053 return err; 1069 return err;
1054 dev = rt->dst.dev; 1070 dev = rt->dst.dev;
1055 ip_rt_put(rt); 1071 ip_rt_put(rt);
@@ -1083,9 +1099,9 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1083 unsigned state = NUD_STALE; 1099 unsigned state = NUD_STALE;
1084 if (r->arp_flags & ATF_PERM) 1100 if (r->arp_flags & ATF_PERM)
1085 state = NUD_PERMANENT; 1101 state = NUD_PERMANENT;
1086 err = neigh_update(neigh, (r->arp_flags&ATF_COM) ? 1102 err = neigh_update(neigh, (r->arp_flags & ATF_COM) ?
1087 r->arp_ha.sa_data : NULL, state, 1103 r->arp_ha.sa_data : NULL, state,
1088 NEIGH_UPDATE_F_OVERRIDE| 1104 NEIGH_UPDATE_F_OVERRIDE |
1089 NEIGH_UPDATE_F_ADMIN); 1105 NEIGH_UPDATE_F_ADMIN);
1090 neigh_release(neigh); 1106 neigh_release(neigh);
1091 } 1107 }
@@ -1094,12 +1110,12 @@ static int arp_req_set(struct net *net, struct arpreq *r,
1094 1110
1095static unsigned arp_state_to_flags(struct neighbour *neigh) 1111static unsigned arp_state_to_flags(struct neighbour *neigh)
1096{ 1112{
1097 unsigned flags = 0;
1098 if (neigh->nud_state&NUD_PERMANENT) 1113 if (neigh->nud_state&NUD_PERMANENT)
1099 flags = ATF_PERM|ATF_COM; 1114 return ATF_PERM | ATF_COM;
1100 else if (neigh->nud_state&NUD_VALID) 1115 else if (neigh->nud_state&NUD_VALID)
1101 flags = ATF_COM; 1116 return ATF_COM;
1102 return flags; 1117 else
1118 return 0;
1103} 1119}
1104 1120
1105/* 1121/*
@@ -1142,7 +1158,7 @@ static int arp_req_delete_public(struct net *net, struct arpreq *r,
1142} 1158}
1143 1159
1144static int arp_req_delete(struct net *net, struct arpreq *r, 1160static int arp_req_delete(struct net *net, struct arpreq *r,
1145 struct net_device * dev) 1161 struct net_device *dev)
1146{ 1162{
1147 int err; 1163 int err;
1148 __be32 ip; 1164 __be32 ip;
@@ -1153,10 +1169,11 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1153 1169
1154 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; 1170 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr;
1155 if (dev == NULL) { 1171 if (dev == NULL) {
1156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, 1172 struct flowi fl = { .nl_u.ip4_u = { .daddr = ip,
1157 .tos = RTO_ONLINK } } }; 1173 .tos = RTO_ONLINK } };
1158 struct rtable * rt; 1174 struct rtable *rt;
1159 if ((err = ip_route_output_key(net, &rt, &fl)) != 0) 1175 err = ip_route_output_key(net, &rt, &fl);
1176 if (err != 0)
1160 return err; 1177 return err;
1161 dev = rt->dst.dev; 1178 dev = rt->dst.dev;
1162 ip_rt_put(rt); 1179 ip_rt_put(rt);
@@ -1166,7 +1183,7 @@ static int arp_req_delete(struct net *net, struct arpreq *r,
1166 err = -ENXIO; 1183 err = -ENXIO;
1167 neigh = neigh_lookup(&arp_tbl, &ip, dev); 1184 neigh = neigh_lookup(&arp_tbl, &ip, dev);
1168 if (neigh) { 1185 if (neigh) {
1169 if (neigh->nud_state&~NUD_NOARP) 1186 if (neigh->nud_state & ~NUD_NOARP)
1170 err = neigh_update(neigh, NULL, NUD_FAILED, 1187 err = neigh_update(neigh, NULL, NUD_FAILED,
1171 NEIGH_UPDATE_F_OVERRIDE| 1188 NEIGH_UPDATE_F_OVERRIDE|
1172 NEIGH_UPDATE_F_ADMIN); 1189 NEIGH_UPDATE_F_ADMIN);
@@ -1186,24 +1203,24 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1186 struct net_device *dev = NULL; 1203 struct net_device *dev = NULL;
1187 1204
1188 switch (cmd) { 1205 switch (cmd) {
1189 case SIOCDARP: 1206 case SIOCDARP:
1190 case SIOCSARP: 1207 case SIOCSARP:
1191 if (!capable(CAP_NET_ADMIN)) 1208 if (!capable(CAP_NET_ADMIN))
1192 return -EPERM; 1209 return -EPERM;
1193 case SIOCGARP: 1210 case SIOCGARP:
1194 err = copy_from_user(&r, arg, sizeof(struct arpreq)); 1211 err = copy_from_user(&r, arg, sizeof(struct arpreq));
1195 if (err) 1212 if (err)
1196 return -EFAULT; 1213 return -EFAULT;
1197 break; 1214 break;
1198 default: 1215 default:
1199 return -EINVAL; 1216 return -EINVAL;
1200 } 1217 }
1201 1218
1202 if (r.arp_pa.sa_family != AF_INET) 1219 if (r.arp_pa.sa_family != AF_INET)
1203 return -EPFNOSUPPORT; 1220 return -EPFNOSUPPORT;
1204 1221
1205 if (!(r.arp_flags & ATF_PUBL) && 1222 if (!(r.arp_flags & ATF_PUBL) &&
1206 (r.arp_flags & (ATF_NETMASK|ATF_DONTPUB))) 1223 (r.arp_flags & (ATF_NETMASK | ATF_DONTPUB)))
1207 return -EINVAL; 1224 return -EINVAL;
1208 if (!(r.arp_flags & ATF_NETMASK)) 1225 if (!(r.arp_flags & ATF_NETMASK))
1209 ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr = 1226 ((struct sockaddr_in *)&r.arp_netmask)->sin_addr.s_addr =
@@ -1211,7 +1228,8 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1211 rtnl_lock(); 1228 rtnl_lock();
1212 if (r.arp_dev[0]) { 1229 if (r.arp_dev[0]) {
1213 err = -ENODEV; 1230 err = -ENODEV;
1214 if ((dev = __dev_get_by_name(net, r.arp_dev)) == NULL) 1231 dev = __dev_get_by_name(net, r.arp_dev);
1232 if (dev == NULL)
1215 goto out; 1233 goto out;
1216 1234
1217 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */ 1235 /* Mmmm... It is wrong... ARPHRD_NETROM==0 */
@@ -1243,7 +1261,8 @@ out:
1243 return err; 1261 return err;
1244} 1262}
1245 1263
1246static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 1264static int arp_netdev_event(struct notifier_block *this, unsigned long event,
1265 void *ptr)
1247{ 1266{
1248 struct net_device *dev = ptr; 1267 struct net_device *dev = ptr;
1249 1268
@@ -1311,12 +1330,13 @@ static char *ax2asc2(ax25_address *a, char *buf)
1311 for (n = 0, s = buf; n < 6; n++) { 1330 for (n = 0, s = buf; n < 6; n++) {
1312 c = (a->ax25_call[n] >> 1) & 0x7F; 1331 c = (a->ax25_call[n] >> 1) & 0x7F;
1313 1332
1314 if (c != ' ') *s++ = c; 1333 if (c != ' ')
1334 *s++ = c;
1315 } 1335 }
1316 1336
1317 *s++ = '-'; 1337 *s++ = '-';
1318 1338 n = (a->ax25_call[6] >> 1) & 0x0F;
1319 if ((n = ((a->ax25_call[6] >> 1) & 0x0F)) > 9) { 1339 if (n > 9) {
1320 *s++ = '1'; 1340 *s++ = '1';
1321 n -= 10; 1341 n -= 10;
1322 } 1342 }
@@ -1325,10 +1345,9 @@ static char *ax2asc2(ax25_address *a, char *buf)
1325 *s++ = '\0'; 1345 *s++ = '\0';
1326 1346
1327 if (*buf == '\0' || *buf == '-') 1347 if (*buf == '\0' || *buf == '-')
1328 return "*"; 1348 return "*";
1329 1349
1330 return buf; 1350 return buf;
1331
1332} 1351}
1333#endif /* CONFIG_AX25 */ 1352#endif /* CONFIG_AX25 */
1334 1353
@@ -1408,10 +1427,10 @@ static void *arp_seq_start(struct seq_file *seq, loff_t *pos)
1408/* ------------------------------------------------------------------------ */ 1427/* ------------------------------------------------------------------------ */
1409 1428
1410static const struct seq_operations arp_seq_ops = { 1429static const struct seq_operations arp_seq_ops = {
1411 .start = arp_seq_start, 1430 .start = arp_seq_start,
1412 .next = neigh_seq_next, 1431 .next = neigh_seq_next,
1413 .stop = neigh_seq_stop, 1432 .stop = neigh_seq_stop,
1414 .show = arp_seq_show, 1433 .show = arp_seq_show,
1415}; 1434};
1416 1435
1417static int arp_seq_open(struct inode *inode, struct file *file) 1436static int arp_seq_open(struct inode *inode, struct file *file)
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 721a8a37b45c..174be6caa5c8 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -73,6 +73,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
73 inet->inet_id = jiffies; 73 inet->inet_id = jiffies;
74 74
75 sk_dst_set(sk, &rt->dst); 75 sk_dst_set(sk, &rt->dst);
76 return(0); 76 return 0;
77} 77}
78EXPORT_SYMBOL(ip4_datagram_connect); 78EXPORT_SYMBOL(ip4_datagram_connect);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index da14c49284f4..dc94b0316b78 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -209,7 +209,7 @@ static void inetdev_destroy(struct in_device *in_dev)
209 inet_free_ifa(ifa); 209 inet_free_ifa(ifa);
210 } 210 }
211 211
212 dev->ip_ptr = NULL; 212 rcu_assign_pointer(dev->ip_ptr, NULL);
213 213
214 devinet_sysctl_unregister(in_dev); 214 devinet_sysctl_unregister(in_dev);
215 neigh_parms_release(&arp_tbl, in_dev->arp_parms); 215 neigh_parms_release(&arp_tbl, in_dev->arp_parms);
@@ -403,6 +403,9 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
403 return inet_insert_ifa(ifa); 403 return inet_insert_ifa(ifa);
404} 404}
405 405
406/* Caller must hold RCU or RTNL :
407 * We dont take a reference on found in_device
408 */
406struct in_device *inetdev_by_index(struct net *net, int ifindex) 409struct in_device *inetdev_by_index(struct net *net, int ifindex)
407{ 410{
408 struct net_device *dev; 411 struct net_device *dev;
@@ -411,7 +414,7 @@ struct in_device *inetdev_by_index(struct net *net, int ifindex)
411 rcu_read_lock(); 414 rcu_read_lock();
412 dev = dev_get_by_index_rcu(net, ifindex); 415 dev = dev_get_by_index_rcu(net, ifindex);
413 if (dev) 416 if (dev)
414 in_dev = in_dev_get(dev); 417 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
415 rcu_read_unlock(); 418 rcu_read_unlock();
416 return in_dev; 419 return in_dev;
417} 420}
@@ -453,8 +456,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg
453 goto errout; 456 goto errout;
454 } 457 }
455 458
456 __in_dev_put(in_dev);
457
458 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; 459 for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
459 ifap = &ifa->ifa_next) { 460 ifap = &ifa->ifa_next) {
460 if (tb[IFA_LOCAL] && 461 if (tb[IFA_LOCAL] &&
@@ -1059,7 +1060,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
1059 switch (event) { 1060 switch (event) {
1060 case NETDEV_REGISTER: 1061 case NETDEV_REGISTER:
1061 printk(KERN_DEBUG "inetdev_event: bug\n"); 1062 printk(KERN_DEBUG "inetdev_event: bug\n");
1062 dev->ip_ptr = NULL; 1063 rcu_assign_pointer(dev->ip_ptr, NULL);
1063 break; 1064 break;
1064 case NETDEV_UP: 1065 case NETDEV_UP:
1065 if (!inetdev_valid_mtu(dev->mtu)) 1066 if (!inetdev_valid_mtu(dev->mtu))
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7d02a9f999fa..36e27c2107de 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -147,35 +147,43 @@ static void fib_flush(struct net *net)
147 rt_cache_flush(net, -1); 147 rt_cache_flush(net, -1);
148} 148}
149 149
150/* 150/**
151 * Find the first device with a given source address. 151 * __ip_dev_find - find the first device with a given source address.
152 * @net: the net namespace
153 * @addr: the source address
154 * @devref: if true, take a reference on the found device
155 *
156 * If a caller uses devref=false, it should be protected by RCU, or RTNL
152 */ 157 */
153 158struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
154struct net_device * ip_dev_find(struct net *net, __be32 addr)
155{ 159{
156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 160 struct flowi fl = {
157 struct fib_result res; 161 .nl_u = {
162 .ip4_u = {
163 .daddr = addr
164 }
165 },
166 .flags = FLOWI_FLAG_MATCH_ANY_IIF
167 };
168 struct fib_result res = { 0 };
158 struct net_device *dev = NULL; 169 struct net_device *dev = NULL;
159 struct fib_table *local_table;
160 170
161#ifdef CONFIG_IP_MULTIPLE_TABLES 171 rcu_read_lock();
162 res.r = NULL; 172 if (fib_lookup(net, &fl, &res)) {
163#endif 173 rcu_read_unlock();
164
165 local_table = fib_get_table(net, RT_TABLE_LOCAL);
166 if (!local_table || fib_table_lookup(local_table, &fl, &res))
167 return NULL; 174 return NULL;
175 }
168 if (res.type != RTN_LOCAL) 176 if (res.type != RTN_LOCAL)
169 goto out; 177 goto out;
170 dev = FIB_RES_DEV(res); 178 dev = FIB_RES_DEV(res);
171 179
172 if (dev) 180 if (dev && devref)
173 dev_hold(dev); 181 dev_hold(dev);
174out: 182out:
175 fib_res_put(&res); 183 rcu_read_unlock();
176 return dev; 184 return dev;
177} 185}
178EXPORT_SYMBOL(ip_dev_find); 186EXPORT_SYMBOL(__ip_dev_find);
179 187
180/* 188/*
181 * Find address type as if only "dev" was present in the system. If 189 * Find address type as if only "dev" was present in the system. If
@@ -202,11 +210,12 @@ static inline unsigned __inet_dev_addr_type(struct net *net,
202 local_table = fib_get_table(net, RT_TABLE_LOCAL); 210 local_table = fib_get_table(net, RT_TABLE_LOCAL);
203 if (local_table) { 211 if (local_table) {
204 ret = RTN_UNICAST; 212 ret = RTN_UNICAST;
205 if (!fib_table_lookup(local_table, &fl, &res)) { 213 rcu_read_lock();
214 if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) {
206 if (!dev || dev == res.fi->fib_dev) 215 if (!dev || dev == res.fi->fib_dev)
207 ret = res.type; 216 ret = res.type;
208 fib_res_put(&res);
209 } 217 }
218 rcu_read_unlock();
210 } 219 }
211 return ret; 220 return ret;
212} 221}
@@ -220,30 +229,34 @@ EXPORT_SYMBOL(inet_addr_type);
220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 229unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
221 __be32 addr) 230 __be32 addr)
222{ 231{
223 return __inet_dev_addr_type(net, dev, addr); 232 return __inet_dev_addr_type(net, dev, addr);
224} 233}
225EXPORT_SYMBOL(inet_dev_addr_type); 234EXPORT_SYMBOL(inet_dev_addr_type);
226 235
227/* Given (packet source, input interface) and optional (dst, oif, tos): 236/* Given (packet source, input interface) and optional (dst, oif, tos):
228 - (main) check, that source is valid i.e. not broadcast or our local 237 * - (main) check, that source is valid i.e. not broadcast or our local
229 address. 238 * address.
230 - figure out what "logical" interface this packet arrived 239 * - figure out what "logical" interface this packet arrived
231 and calculate "specific destination" address. 240 * and calculate "specific destination" address.
232 - check, that packet arrived from expected physical interface. 241 * - check, that packet arrived from expected physical interface.
242 * called with rcu_read_lock()
233 */ 243 */
234
235int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 244int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
236 struct net_device *dev, __be32 *spec_dst, 245 struct net_device *dev, __be32 *spec_dst,
237 u32 *itag, u32 mark) 246 u32 *itag, u32 mark)
238{ 247{
239 struct in_device *in_dev; 248 struct in_device *in_dev;
240 struct flowi fl = { .nl_u = { .ip4_u = 249 struct flowi fl = {
241 { .daddr = src, 250 .nl_u = {
242 .saddr = dst, 251 .ip4_u = {
243 .tos = tos } }, 252 .daddr = src,
244 .mark = mark, 253 .saddr = dst,
245 .iif = oif }; 254 .tos = tos
246 255 }
256 },
257 .mark = mark,
258 .iif = oif
259 };
247 struct fib_result res; 260 struct fib_result res;
248 int no_addr, rpf, accept_local; 261 int no_addr, rpf, accept_local;
249 bool dev_match; 262 bool dev_match;
@@ -251,7 +264,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
251 struct net *net; 264 struct net *net;
252 265
253 no_addr = rpf = accept_local = 0; 266 no_addr = rpf = accept_local = 0;
254 rcu_read_lock();
255 in_dev = __in_dev_get_rcu(dev); 267 in_dev = __in_dev_get_rcu(dev);
256 if (in_dev) { 268 if (in_dev) {
257 no_addr = in_dev->ifa_list == NULL; 269 no_addr = in_dev->ifa_list == NULL;
@@ -260,7 +272,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
260 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 272 if (mark && !IN_DEV_SRC_VMARK(in_dev))
261 fl.mark = 0; 273 fl.mark = 0;
262 } 274 }
263 rcu_read_unlock();
264 275
265 if (in_dev == NULL) 276 if (in_dev == NULL)
266 goto e_inval; 277 goto e_inval;
@@ -270,7 +281,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
270 goto last_resort; 281 goto last_resort;
271 if (res.type != RTN_UNICAST) { 282 if (res.type != RTN_UNICAST) {
272 if (res.type != RTN_LOCAL || !accept_local) 283 if (res.type != RTN_LOCAL || !accept_local)
273 goto e_inval_res; 284 goto e_inval;
274 } 285 }
275 *spec_dst = FIB_RES_PREFSRC(res); 286 *spec_dst = FIB_RES_PREFSRC(res);
276 fib_combine_itag(itag, &res); 287 fib_combine_itag(itag, &res);
@@ -291,10 +302,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
291#endif 302#endif
292 if (dev_match) { 303 if (dev_match) {
293 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 304 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
294 fib_res_put(&res);
295 return ret; 305 return ret;
296 } 306 }
297 fib_res_put(&res);
298 if (no_addr) 307 if (no_addr)
299 goto last_resort; 308 goto last_resort;
300 if (rpf == 1) 309 if (rpf == 1)
@@ -307,7 +316,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
307 *spec_dst = FIB_RES_PREFSRC(res); 316 *spec_dst = FIB_RES_PREFSRC(res);
308 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 317 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
309 } 318 }
310 fib_res_put(&res);
311 } 319 }
312 return ret; 320 return ret;
313 321
@@ -318,8 +326,6 @@ last_resort:
318 *itag = 0; 326 *itag = 0;
319 return 0; 327 return 0;
320 328
321e_inval_res:
322 fib_res_put(&res);
323e_inval: 329e_inval:
324 return -EINVAL; 330 return -EINVAL;
325e_rpf: 331e_rpf:
@@ -472,9 +478,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
472} 478}
473 479
474/* 480/*
475 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 481 * Handle IP routing ioctl calls.
482 * These are used to manipulate the routing tables
476 */ 483 */
477
478int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) 484int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
479{ 485{
480 struct fib_config cfg; 486 struct fib_config cfg;
@@ -518,7 +524,7 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg)
518 return -EINVAL; 524 return -EINVAL;
519} 525}
520 526
521const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { 527const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
522 [RTA_DST] = { .type = NLA_U32 }, 528 [RTA_DST] = { .type = NLA_U32 },
523 [RTA_SRC] = { .type = NLA_U32 }, 529 [RTA_SRC] = { .type = NLA_U32 },
524 [RTA_IIF] = { .type = NLA_U32 }, 530 [RTA_IIF] = { .type = NLA_U32 },
@@ -532,7 +538,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = {
532}; 538};
533 539
534static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 540static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
535 struct nlmsghdr *nlh, struct fib_config *cfg) 541 struct nlmsghdr *nlh, struct fib_config *cfg)
536{ 542{
537 struct nlattr *attr; 543 struct nlattr *attr;
538 int err, remaining; 544 int err, remaining;
@@ -687,12 +693,11 @@ out:
687} 693}
688 694
689/* Prepare and feed intra-kernel routing request. 695/* Prepare and feed intra-kernel routing request.
690 Really, it should be netlink message, but :-( netlink 696 * Really, it should be netlink message, but :-( netlink
691 can be not configured, so that we feed it directly 697 * can be not configured, so that we feed it directly
692 to fib engine. It is legal, because all events occur 698 * to fib engine. It is legal, because all events occur
693 only when netlink is already locked. 699 * only when netlink is already locked.
694 */ 700 */
695
696static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 701static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
697{ 702{
698 struct net *net = dev_net(ifa->ifa_dev->dev); 703 struct net *net = dev_net(ifa->ifa_dev->dev);
@@ -738,9 +743,9 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
738 struct in_ifaddr *prim = ifa; 743 struct in_ifaddr *prim = ifa;
739 __be32 mask = ifa->ifa_mask; 744 __be32 mask = ifa->ifa_mask;
740 __be32 addr = ifa->ifa_local; 745 __be32 addr = ifa->ifa_local;
741 __be32 prefix = ifa->ifa_address&mask; 746 __be32 prefix = ifa->ifa_address & mask;
742 747
743 if (ifa->ifa_flags&IFA_F_SECONDARY) { 748 if (ifa->ifa_flags & IFA_F_SECONDARY) {
744 prim = inet_ifa_byprefix(in_dev, prefix, mask); 749 prim = inet_ifa_byprefix(in_dev, prefix, mask);
745 if (prim == NULL) { 750 if (prim == NULL) {
746 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); 751 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n");
@@ -750,22 +755,24 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
750 755
751 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 756 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
752 757
753 if (!(dev->flags&IFF_UP)) 758 if (!(dev->flags & IFF_UP))
754 return; 759 return;
755 760
756 /* Add broadcast address, if it is explicitly assigned. */ 761 /* Add broadcast address, if it is explicitly assigned. */
757 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 762 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
758 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 763 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
759 764
760 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 765 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
761 (prefix != addr || ifa->ifa_prefixlen < 32)) { 766 (prefix != addr || ifa->ifa_prefixlen < 32)) {
762 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 767 fib_magic(RTM_NEWROUTE,
763 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 768 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
769 prefix, ifa->ifa_prefixlen, prim);
764 770
765 /* Add network specific broadcasts, when it takes a sense */ 771 /* Add network specific broadcasts, when it takes a sense */
766 if (ifa->ifa_prefixlen < 31) { 772 if (ifa->ifa_prefixlen < 31) {
767 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 773 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
768 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); 774 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
775 32, prim);
769 } 776 }
770 } 777 }
771} 778}
@@ -776,17 +783,18 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
776 struct net_device *dev = in_dev->dev; 783 struct net_device *dev = in_dev->dev;
777 struct in_ifaddr *ifa1; 784 struct in_ifaddr *ifa1;
778 struct in_ifaddr *prim = ifa; 785 struct in_ifaddr *prim = ifa;
779 __be32 brd = ifa->ifa_address|~ifa->ifa_mask; 786 __be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
780 __be32 any = ifa->ifa_address&ifa->ifa_mask; 787 __be32 any = ifa->ifa_address & ifa->ifa_mask;
781#define LOCAL_OK 1 788#define LOCAL_OK 1
782#define BRD_OK 2 789#define BRD_OK 2
783#define BRD0_OK 4 790#define BRD0_OK 4
784#define BRD1_OK 8 791#define BRD1_OK 8
785 unsigned ok = 0; 792 unsigned ok = 0;
786 793
787 if (!(ifa->ifa_flags&IFA_F_SECONDARY)) 794 if (!(ifa->ifa_flags & IFA_F_SECONDARY))
788 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 795 fib_magic(RTM_DELROUTE,
789 RTN_UNICAST, any, ifa->ifa_prefixlen, prim); 796 dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
797 any, ifa->ifa_prefixlen, prim);
790 else { 798 else {
791 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 799 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask);
792 if (prim == NULL) { 800 if (prim == NULL) {
@@ -796,9 +804,9 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
796 } 804 }
797 805
798 /* Deletion is more complicated than add. 806 /* Deletion is more complicated than add.
799 We should take care of not to delete too much :-) 807 * We should take care of not to delete too much :-)
800 808 *
801 Scan address list to be sure that addresses are really gone. 809 * Scan address list to be sure that addresses are really gone.
802 */ 810 */
803 811
804 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 812 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) {
@@ -812,23 +820,23 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
812 ok |= BRD0_OK; 820 ok |= BRD0_OK;
813 } 821 }
814 822
815 if (!(ok&BRD_OK)) 823 if (!(ok & BRD_OK))
816 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 824 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
817 if (!(ok&BRD1_OK)) 825 if (!(ok & BRD1_OK))
818 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 826 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
819 if (!(ok&BRD0_OK)) 827 if (!(ok & BRD0_OK))
820 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 828 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
821 if (!(ok&LOCAL_OK)) { 829 if (!(ok & LOCAL_OK)) {
822 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 830 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
823 831
824 /* Check, that this local address finally disappeared. */ 832 /* Check, that this local address finally disappeared. */
825 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { 833 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
826 /* And the last, but not the least thing. 834 /* And the last, but not the least thing.
827 We must flush stray FIB entries. 835 * We must flush stray FIB entries.
828 836 *
829 First of all, we scan fib_info list searching 837 * First of all, we scan fib_info list searching
830 for stray nexthop entries, then ignite fib_flush. 838 * for stray nexthop entries, then ignite fib_flush.
831 */ 839 */
832 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) 840 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local))
833 fib_flush(dev_net(dev)); 841 fib_flush(dev_net(dev));
834 } 842 }
@@ -839,14 +847,20 @@ static void fib_del_ifaddr(struct in_ifaddr *ifa)
839#undef BRD1_OK 847#undef BRD1_OK
840} 848}
841 849
842static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) 850static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
843{ 851{
844 852
845 struct fib_result res; 853 struct fib_result res;
846 struct flowi fl = { .mark = frn->fl_mark, 854 struct flowi fl = {
847 .nl_u = { .ip4_u = { .daddr = frn->fl_addr, 855 .mark = frn->fl_mark,
848 .tos = frn->fl_tos, 856 .nl_u = {
849 .scope = frn->fl_scope } } }; 857 .ip4_u = {
858 .daddr = frn->fl_addr,
859 .tos = frn->fl_tos,
860 .scope = frn->fl_scope
861 }
862 }
863 };
850 864
851#ifdef CONFIG_IP_MULTIPLE_TABLES 865#ifdef CONFIG_IP_MULTIPLE_TABLES
852 res.r = NULL; 866 res.r = NULL;
@@ -857,15 +871,16 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb )
857 local_bh_disable(); 871 local_bh_disable();
858 872
859 frn->tb_id = tb->tb_id; 873 frn->tb_id = tb->tb_id;
860 frn->err = fib_table_lookup(tb, &fl, &res); 874 rcu_read_lock();
875 frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF);
861 876
862 if (!frn->err) { 877 if (!frn->err) {
863 frn->prefixlen = res.prefixlen; 878 frn->prefixlen = res.prefixlen;
864 frn->nh_sel = res.nh_sel; 879 frn->nh_sel = res.nh_sel;
865 frn->type = res.type; 880 frn->type = res.type;
866 frn->scope = res.scope; 881 frn->scope = res.scope;
867 fib_res_put(&res);
868 } 882 }
883 rcu_read_unlock();
869 local_bh_enable(); 884 local_bh_enable();
870 } 885 }
871} 886}
@@ -894,8 +909,8 @@ static void nl_fib_input(struct sk_buff *skb)
894 909
895 nl_fib_lookup(frn, tb); 910 nl_fib_lookup(frn, tb);
896 911
897 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 912 pid = NETLINK_CB(skb).pid; /* pid of sending process */
898 NETLINK_CB(skb).pid = 0; /* from kernel */ 913 NETLINK_CB(skb).pid = 0; /* from kernel */
899 NETLINK_CB(skb).dst_group = 0; /* unicast */ 914 NETLINK_CB(skb).dst_group = 0; /* unicast */
900 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); 915 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT);
901} 916}
@@ -942,7 +957,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
942 fib_del_ifaddr(ifa); 957 fib_del_ifaddr(ifa);
943 if (ifa->ifa_dev->ifa_list == NULL) { 958 if (ifa->ifa_dev->ifa_list == NULL) {
944 /* Last address was deleted from this interface. 959 /* Last address was deleted from this interface.
945 Disable IP. 960 * Disable IP.
946 */ 961 */
947 fib_disable_ip(dev, 1, 0); 962 fib_disable_ip(dev, 1, 0);
948 } else { 963 } else {
@@ -1001,16 +1016,15 @@ static struct notifier_block fib_netdev_notifier = {
1001static int __net_init ip_fib_net_init(struct net *net) 1016static int __net_init ip_fib_net_init(struct net *net)
1002{ 1017{
1003 int err; 1018 int err;
1004 unsigned int i; 1019 size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ;
1005 1020
1006 net->ipv4.fib_table_hash = kzalloc( 1021 /* Avoid false sharing : Use at least a full cache line */
1007 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); 1022 size = max_t(size_t, size, L1_CACHE_BYTES);
1023
1024 net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL);
1008 if (net->ipv4.fib_table_hash == NULL) 1025 if (net->ipv4.fib_table_hash == NULL)
1009 return -ENOMEM; 1026 return -ENOMEM;
1010 1027
1011 for (i = 0; i < FIB_TABLE_HASHSZ; i++)
1012 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]);
1013
1014 err = fib4_rules_init(net); 1028 err = fib4_rules_init(net);
1015 if (err < 0) 1029 if (err < 0)
1016 goto fail; 1030 goto fail;
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 4ed7e0dea1bc..43e1c594ce8f 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -54,36 +54,37 @@ struct fib_node {
54 struct fib_alias fn_embedded_alias; 54 struct fib_alias fn_embedded_alias;
55}; 55};
56 56
57struct fn_zone { 57#define EMBEDDED_HASH_SIZE (L1_CACHE_BYTES / sizeof(struct hlist_head))
58 struct fn_zone *fz_next; /* Next not empty zone */
59 struct hlist_head *fz_hash; /* Hash table pointer */
60 int fz_nent; /* Number of entries */
61 58
62 int fz_divisor; /* Hash divisor */ 59struct fn_zone {
60 struct fn_zone __rcu *fz_next; /* Next not empty zone */
61 struct hlist_head __rcu *fz_hash; /* Hash table pointer */
62 seqlock_t fz_lock;
63 u32 fz_hashmask; /* (fz_divisor - 1) */ 63 u32 fz_hashmask; /* (fz_divisor - 1) */
64#define FZ_HASHMASK(fz) ((fz)->fz_hashmask)
65 64
66 int fz_order; /* Zone order */ 65 u8 fz_order; /* Zone order (0..32) */
67 __be32 fz_mask; 66 u8 fz_revorder; /* 32 - fz_order */
67 __be32 fz_mask; /* inet_make_mask(order) */
68#define FZ_MASK(fz) ((fz)->fz_mask) 68#define FZ_MASK(fz) ((fz)->fz_mask)
69};
70 69
71/* NOTE. On fast computers evaluation of fz_hashmask and fz_mask 70 struct hlist_head fz_embedded_hash[EMBEDDED_HASH_SIZE];
72 * can be cheaper than memory lookup, so that FZ_* macros are used. 71
73 */ 72 int fz_nent; /* Number of entries */
73 int fz_divisor; /* Hash size (mask+1) */
74};
74 75
75struct fn_hash { 76struct fn_hash {
76 struct fn_zone *fn_zones[33]; 77 struct fn_zone *fn_zones[33];
77 struct fn_zone *fn_zone_list; 78 struct fn_zone __rcu *fn_zone_list;
78}; 79};
79 80
80static inline u32 fn_hash(__be32 key, struct fn_zone *fz) 81static inline u32 fn_hash(__be32 key, struct fn_zone *fz)
81{ 82{
82 u32 h = ntohl(key)>>(32 - fz->fz_order); 83 u32 h = ntohl(key) >> fz->fz_revorder;
83 h ^= (h>>20); 84 h ^= (h>>20);
84 h ^= (h>>10); 85 h ^= (h>>10);
85 h ^= (h>>5); 86 h ^= (h>>5);
86 h &= FZ_HASHMASK(fz); 87 h &= fz->fz_hashmask;
87 return h; 88 return h;
88} 89}
89 90
@@ -92,7 +93,6 @@ static inline __be32 fz_key(__be32 dst, struct fn_zone *fz)
92 return dst & FZ_MASK(fz); 93 return dst & FZ_MASK(fz);
93} 94}
94 95
95static DEFINE_RWLOCK(fib_hash_lock);
96static unsigned int fib_hash_genid; 96static unsigned int fib_hash_genid;
97 97
98#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head)) 98#define FZ_MAX_DIVISOR ((PAGE_SIZE<<MAX_ORDER) / sizeof(struct hlist_head))
@@ -101,12 +101,11 @@ static struct hlist_head *fz_hash_alloc(int divisor)
101{ 101{
102 unsigned long size = divisor * sizeof(struct hlist_head); 102 unsigned long size = divisor * sizeof(struct hlist_head);
103 103
104 if (size <= PAGE_SIZE) { 104 if (size <= PAGE_SIZE)
105 return kzalloc(size, GFP_KERNEL); 105 return kzalloc(size, GFP_KERNEL);
106 } else { 106
107 return (struct hlist_head *) 107 return (struct hlist_head *)
108 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size)); 108 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(size));
109 }
110} 109}
111 110
112/* The fib hash lock must be held when this is called. */ 111/* The fib hash lock must be held when this is called. */
@@ -121,12 +120,12 @@ static inline void fn_rebuild_zone(struct fn_zone *fz,
121 struct fib_node *f; 120 struct fib_node *f;
122 121
123 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) { 122 hlist_for_each_entry_safe(f, node, n, &old_ht[i], fn_hash) {
124 struct hlist_head *new_head; 123 struct hlist_head __rcu *new_head;
125 124
126 hlist_del(&f->fn_hash); 125 hlist_del_rcu(&f->fn_hash);
127 126
128 new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; 127 new_head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
129 hlist_add_head(&f->fn_hash, new_head); 128 hlist_add_head_rcu(&f->fn_hash, new_head);
130 } 129 }
131 } 130 }
132} 131}
@@ -147,14 +146,14 @@ static void fn_rehash_zone(struct fn_zone *fz)
147 int old_divisor, new_divisor; 146 int old_divisor, new_divisor;
148 u32 new_hashmask; 147 u32 new_hashmask;
149 148
150 old_divisor = fz->fz_divisor; 149 new_divisor = old_divisor = fz->fz_divisor;
151 150
152 switch (old_divisor) { 151 switch (old_divisor) {
153 case 16: 152 case EMBEDDED_HASH_SIZE:
154 new_divisor = 256; 153 new_divisor *= EMBEDDED_HASH_SIZE;
155 break; 154 break;
156 case 256: 155 case EMBEDDED_HASH_SIZE*EMBEDDED_HASH_SIZE:
157 new_divisor = 1024; 156 new_divisor *= (EMBEDDED_HASH_SIZE/2);
158 break; 157 break;
159 default: 158 default:
160 if ((old_divisor << 1) > FZ_MAX_DIVISOR) { 159 if ((old_divisor << 1) > FZ_MAX_DIVISOR) {
@@ -175,31 +174,55 @@ static void fn_rehash_zone(struct fn_zone *fz)
175 ht = fz_hash_alloc(new_divisor); 174 ht = fz_hash_alloc(new_divisor);
176 175
177 if (ht) { 176 if (ht) {
178 write_lock_bh(&fib_hash_lock); 177 struct fn_zone nfz;
178
179 memcpy(&nfz, fz, sizeof(nfz));
180
181 write_seqlock_bh(&fz->fz_lock);
179 old_ht = fz->fz_hash; 182 old_ht = fz->fz_hash;
180 fz->fz_hash = ht; 183 nfz.fz_hash = ht;
184 nfz.fz_hashmask = new_hashmask;
185 nfz.fz_divisor = new_divisor;
186 fn_rebuild_zone(&nfz, old_ht, old_divisor);
187 fib_hash_genid++;
188 rcu_assign_pointer(fz->fz_hash, ht);
181 fz->fz_hashmask = new_hashmask; 189 fz->fz_hashmask = new_hashmask;
182 fz->fz_divisor = new_divisor; 190 fz->fz_divisor = new_divisor;
183 fn_rebuild_zone(fz, old_ht, old_divisor); 191 write_sequnlock_bh(&fz->fz_lock);
184 fib_hash_genid++;
185 write_unlock_bh(&fib_hash_lock);
186 192
187 fz_hash_free(old_ht, old_divisor); 193 if (old_ht != fz->fz_embedded_hash) {
194 synchronize_rcu();
195 fz_hash_free(old_ht, old_divisor);
196 }
188 } 197 }
189} 198}
190 199
191static inline void fn_free_node(struct fib_node * f) 200static void fn_free_node_rcu(struct rcu_head *head)
192{ 201{
202 struct fib_node *f = container_of(head, struct fib_node, fn_embedded_alias.rcu);
203
193 kmem_cache_free(fn_hash_kmem, f); 204 kmem_cache_free(fn_hash_kmem, f);
194} 205}
195 206
207static inline void fn_free_node(struct fib_node *f)
208{
209 call_rcu(&f->fn_embedded_alias.rcu, fn_free_node_rcu);
210}
211
212static void fn_free_alias_rcu(struct rcu_head *head)
213{
214 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
215
216 kmem_cache_free(fn_alias_kmem, fa);
217}
218
196static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f) 219static inline void fn_free_alias(struct fib_alias *fa, struct fib_node *f)
197{ 220{
198 fib_release_info(fa->fa_info); 221 fib_release_info(fa->fa_info);
199 if (fa == &f->fn_embedded_alias) 222 if (fa == &f->fn_embedded_alias)
200 fa->fa_info = NULL; 223 fa->fa_info = NULL;
201 else 224 else
202 kmem_cache_free(fn_alias_kmem, fa); 225 call_rcu(&fa->rcu, fn_free_alias_rcu);
203} 226}
204 227
205static struct fn_zone * 228static struct fn_zone *
@@ -210,68 +233,71 @@ fn_new_zone(struct fn_hash *table, int z)
210 if (!fz) 233 if (!fz)
211 return NULL; 234 return NULL;
212 235
213 if (z) { 236 seqlock_init(&fz->fz_lock);
214 fz->fz_divisor = 16; 237 fz->fz_divisor = z ? EMBEDDED_HASH_SIZE : 1;
215 } else { 238 fz->fz_hashmask = fz->fz_divisor - 1;
216 fz->fz_divisor = 1; 239 fz->fz_hash = fz->fz_embedded_hash;
217 }
218 fz->fz_hashmask = (fz->fz_divisor - 1);
219 fz->fz_hash = fz_hash_alloc(fz->fz_divisor);
220 if (!fz->fz_hash) {
221 kfree(fz);
222 return NULL;
223 }
224 fz->fz_order = z; 240 fz->fz_order = z;
241 fz->fz_revorder = 32 - z;
225 fz->fz_mask = inet_make_mask(z); 242 fz->fz_mask = inet_make_mask(z);
226 243
227 /* Find the first not empty zone with more specific mask */ 244 /* Find the first not empty zone with more specific mask */
228 for (i=z+1; i<=32; i++) 245 for (i = z + 1; i <= 32; i++)
229 if (table->fn_zones[i]) 246 if (table->fn_zones[i])
230 break; 247 break;
231 write_lock_bh(&fib_hash_lock); 248 if (i > 32) {
232 if (i>32) {
233 /* No more specific masks, we are the first. */ 249 /* No more specific masks, we are the first. */
234 fz->fz_next = table->fn_zone_list; 250 rcu_assign_pointer(fz->fz_next,
235 table->fn_zone_list = fz; 251 rtnl_dereference(table->fn_zone_list));
252 rcu_assign_pointer(table->fn_zone_list, fz);
236 } else { 253 } else {
237 fz->fz_next = table->fn_zones[i]->fz_next; 254 rcu_assign_pointer(fz->fz_next,
238 table->fn_zones[i]->fz_next = fz; 255 rtnl_dereference(table->fn_zones[i]->fz_next));
256 rcu_assign_pointer(table->fn_zones[i]->fz_next, fz);
239 } 257 }
240 table->fn_zones[z] = fz; 258 table->fn_zones[z] = fz;
241 fib_hash_genid++; 259 fib_hash_genid++;
242 write_unlock_bh(&fib_hash_lock);
243 return fz; 260 return fz;
244} 261}
245 262
246int fib_table_lookup(struct fib_table *tb, 263int fib_table_lookup(struct fib_table *tb,
247 const struct flowi *flp, struct fib_result *res) 264 const struct flowi *flp, struct fib_result *res,
265 int fib_flags)
248{ 266{
249 int err; 267 int err;
250 struct fn_zone *fz; 268 struct fn_zone *fz;
251 struct fn_hash *t = (struct fn_hash *)tb->tb_data; 269 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
252 270
253 read_lock(&fib_hash_lock); 271 rcu_read_lock();
254 for (fz = t->fn_zone_list; fz; fz = fz->fz_next) { 272 for (fz = rcu_dereference(t->fn_zone_list);
255 struct hlist_head *head; 273 fz != NULL;
274 fz = rcu_dereference(fz->fz_next)) {
275 struct hlist_head __rcu *head;
256 struct hlist_node *node; 276 struct hlist_node *node;
257 struct fib_node *f; 277 struct fib_node *f;
258 __be32 k = fz_key(flp->fl4_dst, fz); 278 __be32 k;
279 unsigned int seq;
259 280
260 head = &fz->fz_hash[fn_hash(k, fz)]; 281 do {
261 hlist_for_each_entry(f, node, head, fn_hash) { 282 seq = read_seqbegin(&fz->fz_lock);
262 if (f->fn_key != k) 283 k = fz_key(flp->fl4_dst, fz);
263 continue; 284
285 head = &fz->fz_hash[fn_hash(k, fz)];
286 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
287 if (f->fn_key != k)
288 continue;
264 289
265 err = fib_semantic_match(&f->fn_alias, 290 err = fib_semantic_match(&f->fn_alias,
266 flp, res, 291 flp, res,
267 fz->fz_order); 292 fz->fz_order, fib_flags);
268 if (err <= 0) 293 if (err <= 0)
269 goto out; 294 goto out;
270 } 295 }
296 } while (read_seqretry(&fz->fz_lock, seq));
271 } 297 }
272 err = 1; 298 err = 1;
273out: 299out:
274 read_unlock(&fib_hash_lock); 300 rcu_read_unlock();
275 return err; 301 return err;
276} 302}
277 303
@@ -293,11 +319,11 @@ void fib_table_select_default(struct fib_table *tb,
293 last_resort = NULL; 319 last_resort = NULL;
294 order = -1; 320 order = -1;
295 321
296 read_lock(&fib_hash_lock); 322 rcu_read_lock();
297 hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) { 323 hlist_for_each_entry_rcu(f, node, &fz->fz_hash[0], fn_hash) {
298 struct fib_alias *fa; 324 struct fib_alias *fa;
299 325
300 list_for_each_entry(fa, &f->fn_alias, fa_list) { 326 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
301 struct fib_info *next_fi = fa->fa_info; 327 struct fib_info *next_fi = fa->fa_info;
302 328
303 if (fa->fa_scope != res->scope || 329 if (fa->fa_scope != res->scope ||
@@ -309,7 +335,8 @@ void fib_table_select_default(struct fib_table *tb,
309 if (!next_fi->fib_nh[0].nh_gw || 335 if (!next_fi->fib_nh[0].nh_gw ||
310 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 336 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
311 continue; 337 continue;
312 fa->fa_state |= FA_S_ACCESSED; 338
339 fib_alias_accessed(fa);
313 340
314 if (fi == NULL) { 341 if (fi == NULL) {
315 if (next_fi != res->fi) 342 if (next_fi != res->fi)
@@ -341,7 +368,7 @@ void fib_table_select_default(struct fib_table *tb,
341 fib_result_assign(res, last_resort); 368 fib_result_assign(res, last_resort);
342 tb->tb_default = last_idx; 369 tb->tb_default = last_idx;
343out: 370out:
344 read_unlock(&fib_hash_lock); 371 rcu_read_unlock();
345} 372}
346 373
347/* Insert node F to FZ. */ 374/* Insert node F to FZ. */
@@ -349,7 +376,7 @@ static inline void fib_insert_node(struct fn_zone *fz, struct fib_node *f)
349{ 376{
350 struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)]; 377 struct hlist_head *head = &fz->fz_hash[fn_hash(f->fn_key, fz)];
351 378
352 hlist_add_head(&f->fn_hash, head); 379 hlist_add_head_rcu(&f->fn_hash, head);
353} 380}
354 381
355/* Return the node in FZ matching KEY. */ 382/* Return the node in FZ matching KEY. */
@@ -359,7 +386,7 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
359 struct hlist_node *node; 386 struct hlist_node *node;
360 struct fib_node *f; 387 struct fib_node *f;
361 388
362 hlist_for_each_entry(f, node, head, fn_hash) { 389 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
363 if (f->fn_key == key) 390 if (f->fn_key == key)
364 return f; 391 return f;
365 } 392 }
@@ -367,6 +394,17 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, __be32 key)
367 return NULL; 394 return NULL;
368} 395}
369 396
397
398static struct fib_alias *fib_fast_alloc(struct fib_node *f)
399{
400 struct fib_alias *fa = &f->fn_embedded_alias;
401
402 if (fa->fa_info != NULL)
403 fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
404 return fa;
405}
406
407/* Caller must hold RTNL. */
370int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) 408int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
371{ 409{
372 struct fn_hash *table = (struct fn_hash *) tb->tb_data; 410 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
@@ -451,7 +489,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
451 } 489 }
452 490
453 if (cfg->fc_nlflags & NLM_F_REPLACE) { 491 if (cfg->fc_nlflags & NLM_F_REPLACE) {
454 struct fib_info *fi_drop;
455 u8 state; 492 u8 state;
456 493
457 fa = fa_first; 494 fa = fa_first;
@@ -460,21 +497,25 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
460 err = 0; 497 err = 0;
461 goto out; 498 goto out;
462 } 499 }
463 write_lock_bh(&fib_hash_lock); 500 err = -ENOBUFS;
464 fi_drop = fa->fa_info; 501 new_fa = fib_fast_alloc(f);
465 fa->fa_info = fi; 502 if (new_fa == NULL)
466 fa->fa_type = cfg->fc_type; 503 goto out;
467 fa->fa_scope = cfg->fc_scope; 504
505 new_fa->fa_tos = fa->fa_tos;
506 new_fa->fa_info = fi;
507 new_fa->fa_type = cfg->fc_type;
508 new_fa->fa_scope = cfg->fc_scope;
468 state = fa->fa_state; 509 state = fa->fa_state;
469 fa->fa_state &= ~FA_S_ACCESSED; 510 new_fa->fa_state = state & ~FA_S_ACCESSED;
470 fib_hash_genid++; 511 fib_hash_genid++;
471 write_unlock_bh(&fib_hash_lock); 512 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
472 513
473 fib_release_info(fi_drop); 514 fn_free_alias(fa, f);
474 if (state & FA_S_ACCESSED) 515 if (state & FA_S_ACCESSED)
475 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); 516 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
476 rtmsg_fib(RTM_NEWROUTE, key, fa, cfg->fc_dst_len, tb->tb_id, 517 rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len,
477 &cfg->fc_nlinfo, NLM_F_REPLACE); 518 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
478 return 0; 519 return 0;
479 } 520 }
480 521
@@ -506,12 +547,10 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
506 f = new_f; 547 f = new_f;
507 } 548 }
508 549
509 new_fa = &f->fn_embedded_alias; 550 new_fa = fib_fast_alloc(f);
510 if (new_fa->fa_info != NULL) { 551 if (new_fa == NULL)
511 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); 552 goto out;
512 if (new_fa == NULL) 553
513 goto out;
514 }
515 new_fa->fa_info = fi; 554 new_fa->fa_info = fi;
516 new_fa->fa_tos = tos; 555 new_fa->fa_tos = tos;
517 new_fa->fa_type = cfg->fc_type; 556 new_fa->fa_type = cfg->fc_type;
@@ -522,13 +561,11 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
522 * Insert new entry to the list. 561 * Insert new entry to the list.
523 */ 562 */
524 563
525 write_lock_bh(&fib_hash_lock);
526 if (new_f) 564 if (new_f)
527 fib_insert_node(fz, new_f); 565 fib_insert_node(fz, new_f);
528 list_add_tail(&new_fa->fa_list, 566 list_add_tail_rcu(&new_fa->fa_list,
529 (fa ? &fa->fa_list : &f->fn_alias)); 567 (fa ? &fa->fa_list : &f->fn_alias));
530 fib_hash_genid++; 568 fib_hash_genid++;
531 write_unlock_bh(&fib_hash_lock);
532 569
533 if (new_f) 570 if (new_f)
534 fz->fz_nent++; 571 fz->fz_nent++;
@@ -603,14 +640,12 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
603 tb->tb_id, &cfg->fc_nlinfo, 0); 640 tb->tb_id, &cfg->fc_nlinfo, 0);
604 641
605 kill_fn = 0; 642 kill_fn = 0;
606 write_lock_bh(&fib_hash_lock); 643 list_del_rcu(&fa->fa_list);
607 list_del(&fa->fa_list);
608 if (list_empty(&f->fn_alias)) { 644 if (list_empty(&f->fn_alias)) {
609 hlist_del(&f->fn_hash); 645 hlist_del_rcu(&f->fn_hash);
610 kill_fn = 1; 646 kill_fn = 1;
611 } 647 }
612 fib_hash_genid++; 648 fib_hash_genid++;
613 write_unlock_bh(&fib_hash_lock);
614 649
615 if (fa->fa_state & FA_S_ACCESSED) 650 if (fa->fa_state & FA_S_ACCESSED)
616 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1); 651 rt_cache_flush(cfg->fc_nlinfo.nl_net, -1);
@@ -641,14 +676,12 @@ static int fn_flush_list(struct fn_zone *fz, int idx)
641 struct fib_info *fi = fa->fa_info; 676 struct fib_info *fi = fa->fa_info;
642 677
643 if (fi && (fi->fib_flags&RTNH_F_DEAD)) { 678 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
644 write_lock_bh(&fib_hash_lock); 679 list_del_rcu(&fa->fa_list);
645 list_del(&fa->fa_list);
646 if (list_empty(&f->fn_alias)) { 680 if (list_empty(&f->fn_alias)) {
647 hlist_del(&f->fn_hash); 681 hlist_del_rcu(&f->fn_hash);
648 kill_f = 1; 682 kill_f = 1;
649 } 683 }
650 fib_hash_genid++; 684 fib_hash_genid++;
651 write_unlock_bh(&fib_hash_lock);
652 685
653 fn_free_alias(fa, f); 686 fn_free_alias(fa, f);
654 found++; 687 found++;
@@ -662,13 +695,16 @@ static int fn_flush_list(struct fn_zone *fz, int idx)
662 return found; 695 return found;
663} 696}
664 697
698/* caller must hold RTNL. */
665int fib_table_flush(struct fib_table *tb) 699int fib_table_flush(struct fib_table *tb)
666{ 700{
667 struct fn_hash *table = (struct fn_hash *) tb->tb_data; 701 struct fn_hash *table = (struct fn_hash *) tb->tb_data;
668 struct fn_zone *fz; 702 struct fn_zone *fz;
669 int found = 0; 703 int found = 0;
670 704
671 for (fz = table->fn_zone_list; fz; fz = fz->fz_next) { 705 for (fz = rtnl_dereference(table->fn_zone_list);
706 fz != NULL;
707 fz = rtnl_dereference(fz->fz_next)) {
672 int i; 708 int i;
673 709
674 for (i = fz->fz_divisor - 1; i >= 0; i--) 710 for (i = fz->fz_divisor - 1; i >= 0; i--)
@@ -690,10 +726,10 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
690 726
691 s_i = cb->args[4]; 727 s_i = cb->args[4];
692 i = 0; 728 i = 0;
693 hlist_for_each_entry(f, node, head, fn_hash) { 729 hlist_for_each_entry_rcu(f, node, head, fn_hash) {
694 struct fib_alias *fa; 730 struct fib_alias *fa;
695 731
696 list_for_each_entry(fa, &f->fn_alias, fa_list) { 732 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
697 if (i < s_i) 733 if (i < s_i)
698 goto next; 734 goto next;
699 735
@@ -711,7 +747,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb,
711 cb->args[4] = i; 747 cb->args[4] = i;
712 return -1; 748 return -1;
713 } 749 }
714 next: 750next:
715 i++; 751 i++;
716 } 752 }
717 } 753 }
@@ -746,23 +782,26 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb,
746int fib_table_dump(struct fib_table *tb, struct sk_buff *skb, 782int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
747 struct netlink_callback *cb) 783 struct netlink_callback *cb)
748{ 784{
749 int m, s_m; 785 int m = 0, s_m;
750 struct fn_zone *fz; 786 struct fn_zone *fz;
751 struct fn_hash *table = (struct fn_hash *)tb->tb_data; 787 struct fn_hash *table = (struct fn_hash *)tb->tb_data;
752 788
753 s_m = cb->args[2]; 789 s_m = cb->args[2];
754 read_lock(&fib_hash_lock); 790 rcu_read_lock();
755 for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { 791 for (fz = rcu_dereference(table->fn_zone_list);
756 if (m < s_m) continue; 792 fz != NULL;
793 fz = rcu_dereference(fz->fz_next), m++) {
794 if (m < s_m)
795 continue;
757 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { 796 if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) {
758 cb->args[2] = m; 797 cb->args[2] = m;
759 read_unlock(&fib_hash_lock); 798 rcu_read_unlock();
760 return -1; 799 return -1;
761 } 800 }
762 memset(&cb->args[3], 0, 801 memset(&cb->args[3], 0,
763 sizeof(cb->args) - 3*sizeof(cb->args[0])); 802 sizeof(cb->args) - 3*sizeof(cb->args[0]));
764 } 803 }
765 read_unlock(&fib_hash_lock); 804 rcu_read_unlock();
766 cb->args[2] = m; 805 cb->args[2] = m;
767 return skb->len; 806 return skb->len;
768} 807}
@@ -825,8 +864,9 @@ static struct fib_alias *fib_get_first(struct seq_file *seq)
825 iter->genid = fib_hash_genid; 864 iter->genid = fib_hash_genid;
826 iter->valid = 1; 865 iter->valid = 1;
827 866
828 for (iter->zone = table->fn_zone_list; iter->zone; 867 for (iter->zone = rcu_dereference(table->fn_zone_list);
829 iter->zone = iter->zone->fz_next) { 868 iter->zone != NULL;
869 iter->zone = rcu_dereference(iter->zone->fz_next)) {
830 int maxslot; 870 int maxslot;
831 871
832 if (!iter->zone->fz_nent) 872 if (!iter->zone->fz_nent)
@@ -911,7 +951,7 @@ static struct fib_alias *fib_get_next(struct seq_file *seq)
911 } 951 }
912 } 952 }
913 953
914 iter->zone = iter->zone->fz_next; 954 iter->zone = rcu_dereference(iter->zone->fz_next);
915 955
916 if (!iter->zone) 956 if (!iter->zone)
917 goto out; 957 goto out;
@@ -950,11 +990,11 @@ static struct fib_alias *fib_get_idx(struct seq_file *seq, loff_t pos)
950} 990}
951 991
952static void *fib_seq_start(struct seq_file *seq, loff_t *pos) 992static void *fib_seq_start(struct seq_file *seq, loff_t *pos)
953 __acquires(fib_hash_lock) 993 __acquires(RCU)
954{ 994{
955 void *v = NULL; 995 void *v = NULL;
956 996
957 read_lock(&fib_hash_lock); 997 rcu_read_lock();
958 if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN)) 998 if (fib_get_table(seq_file_net(seq), RT_TABLE_MAIN))
959 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; 999 v = *pos ? fib_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
960 return v; 1000 return v;
@@ -967,15 +1007,16 @@ static void *fib_seq_next(struct seq_file *seq, void *v, loff_t *pos)
967} 1007}
968 1008
969static void fib_seq_stop(struct seq_file *seq, void *v) 1009static void fib_seq_stop(struct seq_file *seq, void *v)
970 __releases(fib_hash_lock) 1010 __releases(RCU)
971{ 1011{
972 read_unlock(&fib_hash_lock); 1012 rcu_read_unlock();
973} 1013}
974 1014
975static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi) 1015static unsigned fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
976{ 1016{
977 static const unsigned type2flags[RTN_MAX + 1] = { 1017 static const unsigned type2flags[RTN_MAX + 1] = {
978 [7] = RTF_REJECT, [8] = RTF_REJECT, 1018 [7] = RTF_REJECT,
1019 [8] = RTF_REJECT,
979 }; 1020 };
980 unsigned flags = type2flags[type]; 1021 unsigned flags = type2flags[type];
981 1022
diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h
index 637b133973bd..a29edf2219c8 100644
--- a/net/ipv4/fib_lookup.h
+++ b/net/ipv4/fib_lookup.h
@@ -12,17 +12,22 @@ struct fib_alias {
12 u8 fa_type; 12 u8 fa_type;
13 u8 fa_scope; 13 u8 fa_scope;
14 u8 fa_state; 14 u8 fa_state;
15#ifdef CONFIG_IP_FIB_TRIE
16 struct rcu_head rcu; 15 struct rcu_head rcu;
17#endif
18}; 16};
19 17
20#define FA_S_ACCESSED 0x01 18#define FA_S_ACCESSED 0x01
21 19
20/* Dont write on fa_state unless needed, to keep it shared on all cpus */
21static inline void fib_alias_accessed(struct fib_alias *fa)
22{
23 if (!(fa->fa_state & FA_S_ACCESSED))
24 fa->fa_state |= FA_S_ACCESSED;
25}
26
22/* Exported by fib_semantics.c */ 27/* Exported by fib_semantics.c */
23extern int fib_semantic_match(struct list_head *head, 28extern int fib_semantic_match(struct list_head *head,
24 const struct flowi *flp, 29 const struct flowi *flp,
25 struct fib_result *res, int prefixlen); 30 struct fib_result *res, int prefixlen, int fib_flags);
26extern void fib_release_info(struct fib_info *); 31extern void fib_release_info(struct fib_info *);
27extern struct fib_info *fib_create_info(struct fib_config *cfg); 32extern struct fib_info *fib_create_info(struct fib_config *cfg);
28extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); 33extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi);
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 76daeb5ff564..7981a24f5c7b 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -6,7 +6,7 @@
6 * IPv4 Forwarding Information Base: policy rules. 6 * IPv4 Forwarding Information Base: policy rules.
7 * 7 *
8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9 * Thomas Graf <tgraf@suug.ch> 9 * Thomas Graf <tgraf@suug.ch>
10 * 10 *
11 * This program is free software; you can redistribute it and/or 11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License 12 * modify it under the terms of the GNU General Public License
@@ -14,7 +14,7 @@
14 * 2 of the License, or (at your option) any later version. 14 * 2 of the License, or (at your option) any later version.
15 * 15 *
16 * Fixes: 16 * Fixes:
17 * Rani Assaf : local_rule cannot be deleted 17 * Rani Assaf : local_rule cannot be deleted
18 * Marc Boucher : routing by fwmark 18 * Marc Boucher : routing by fwmark
19 */ 19 */
20 20
@@ -32,8 +32,7 @@
32#include <net/ip_fib.h> 32#include <net/ip_fib.h>
33#include <net/fib_rules.h> 33#include <net/fib_rules.h>
34 34
35struct fib4_rule 35struct fib4_rule {
36{
37 struct fib_rule common; 36 struct fib_rule common;
38 u8 dst_len; 37 u8 dst_len;
39 u8 src_len; 38 u8 src_len;
@@ -58,6 +57,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
58{ 57{
59 struct fib_lookup_arg arg = { 58 struct fib_lookup_arg arg = {
60 .result = res, 59 .result = res,
60 .flags = FIB_LOOKUP_NOREF,
61 }; 61 };
62 int err; 62 int err;
63 63
@@ -91,10 +91,11 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
91 goto errout; 91 goto errout;
92 } 92 }
93 93
94 if ((tbl = fib_get_table(rule->fr_net, rule->table)) == NULL) 94 tbl = fib_get_table(rule->fr_net, rule->table);
95 if (!tbl)
95 goto errout; 96 goto errout;
96 97
97 err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result); 98 err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags);
98 if (err > 0) 99 if (err > 0)
99 err = -EAGAIN; 100 err = -EAGAIN;
100errout: 101errout:
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 20f09c5b31e8..3e0da3ef6116 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -60,21 +60,30 @@ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
60 60
61static DEFINE_SPINLOCK(fib_multipath_lock); 61static DEFINE_SPINLOCK(fib_multipath_lock);
62 62
63#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ 63#define for_nexthops(fi) { \
64for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) 64 int nhsel; const struct fib_nh *nh; \
65 65 for (nhsel = 0, nh = (fi)->fib_nh; \
66#define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \ 66 nhsel < (fi)->fib_nhs; \
67for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++) 67 nh++, nhsel++)
68
69#define change_nexthops(fi) { \
70 int nhsel; struct fib_nh *nexthop_nh; \
71 for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
72 nhsel < (fi)->fib_nhs; \
73 nexthop_nh++, nhsel++)
68 74
69#else /* CONFIG_IP_ROUTE_MULTIPATH */ 75#else /* CONFIG_IP_ROUTE_MULTIPATH */
70 76
71/* Hope, that gcc will optimize it to get rid of dummy loop */ 77/* Hope, that gcc will optimize it to get rid of dummy loop */
72 78
73#define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ 79#define for_nexthops(fi) { \
74for (nhsel=0; nhsel < 1; nhsel++) 80 int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \
81 for (nhsel = 0; nhsel < 1; nhsel++)
75 82
76#define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ 83#define change_nexthops(fi) { \
77for (nhsel=0; nhsel < 1; nhsel++) 84 int nhsel; \
85 struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \
86 for (nhsel = 0; nhsel < 1; nhsel++)
78 87
79#endif /* CONFIG_IP_ROUTE_MULTIPATH */ 88#endif /* CONFIG_IP_ROUTE_MULTIPATH */
80 89
@@ -86,63 +95,70 @@ static const struct
86 int error; 95 int error;
87 u8 scope; 96 u8 scope;
88} fib_props[RTN_MAX + 1] = { 97} fib_props[RTN_MAX + 1] = {
89 { 98 [RTN_UNSPEC] = {
90 .error = 0, 99 .error = 0,
91 .scope = RT_SCOPE_NOWHERE, 100 .scope = RT_SCOPE_NOWHERE,
92 }, /* RTN_UNSPEC */ 101 },
93 { 102 [RTN_UNICAST] = {
94 .error = 0, 103 .error = 0,
95 .scope = RT_SCOPE_UNIVERSE, 104 .scope = RT_SCOPE_UNIVERSE,
96 }, /* RTN_UNICAST */ 105 },
97 { 106 [RTN_LOCAL] = {
98 .error = 0, 107 .error = 0,
99 .scope = RT_SCOPE_HOST, 108 .scope = RT_SCOPE_HOST,
100 }, /* RTN_LOCAL */ 109 },
101 { 110 [RTN_BROADCAST] = {
102 .error = 0, 111 .error = 0,
103 .scope = RT_SCOPE_LINK, 112 .scope = RT_SCOPE_LINK,
104 }, /* RTN_BROADCAST */ 113 },
105 { 114 [RTN_ANYCAST] = {
106 .error = 0, 115 .error = 0,
107 .scope = RT_SCOPE_LINK, 116 .scope = RT_SCOPE_LINK,
108 }, /* RTN_ANYCAST */ 117 },
109 { 118 [RTN_MULTICAST] = {
110 .error = 0, 119 .error = 0,
111 .scope = RT_SCOPE_UNIVERSE, 120 .scope = RT_SCOPE_UNIVERSE,
112 }, /* RTN_MULTICAST */ 121 },
113 { 122 [RTN_BLACKHOLE] = {
114 .error = -EINVAL, 123 .error = -EINVAL,
115 .scope = RT_SCOPE_UNIVERSE, 124 .scope = RT_SCOPE_UNIVERSE,
116 }, /* RTN_BLACKHOLE */ 125 },
117 { 126 [RTN_UNREACHABLE] = {
118 .error = -EHOSTUNREACH, 127 .error = -EHOSTUNREACH,
119 .scope = RT_SCOPE_UNIVERSE, 128 .scope = RT_SCOPE_UNIVERSE,
120 }, /* RTN_UNREACHABLE */ 129 },
121 { 130 [RTN_PROHIBIT] = {
122 .error = -EACCES, 131 .error = -EACCES,
123 .scope = RT_SCOPE_UNIVERSE, 132 .scope = RT_SCOPE_UNIVERSE,
124 }, /* RTN_PROHIBIT */ 133 },
125 { 134 [RTN_THROW] = {
126 .error = -EAGAIN, 135 .error = -EAGAIN,
127 .scope = RT_SCOPE_UNIVERSE, 136 .scope = RT_SCOPE_UNIVERSE,
128 }, /* RTN_THROW */ 137 },
129 { 138 [RTN_NAT] = {
130 .error = -EINVAL, 139 .error = -EINVAL,
131 .scope = RT_SCOPE_NOWHERE, 140 .scope = RT_SCOPE_NOWHERE,
132 }, /* RTN_NAT */ 141 },
133 { 142 [RTN_XRESOLVE] = {
134 .error = -EINVAL, 143 .error = -EINVAL,
135 .scope = RT_SCOPE_NOWHERE, 144 .scope = RT_SCOPE_NOWHERE,
136 }, /* RTN_XRESOLVE */ 145 },
137}; 146};
138 147
139 148
140/* Release a nexthop info record */ 149/* Release a nexthop info record */
141 150
151static void free_fib_info_rcu(struct rcu_head *head)
152{
153 struct fib_info *fi = container_of(head, struct fib_info, rcu);
154
155 kfree(fi);
156}
157
142void free_fib_info(struct fib_info *fi) 158void free_fib_info(struct fib_info *fi)
143{ 159{
144 if (fi->fib_dead == 0) { 160 if (fi->fib_dead == 0) {
145 printk(KERN_WARNING "Freeing alive fib_info %p\n", fi); 161 pr_warning("Freeing alive fib_info %p\n", fi);
146 return; 162 return;
147 } 163 }
148 change_nexthops(fi) { 164 change_nexthops(fi) {
@@ -152,7 +168,7 @@ void free_fib_info(struct fib_info *fi)
152 } endfor_nexthops(fi); 168 } endfor_nexthops(fi);
153 fib_info_cnt--; 169 fib_info_cnt--;
154 release_net(fi->fib_net); 170 release_net(fi->fib_net);
155 kfree(fi); 171 call_rcu(&fi->rcu, free_fib_info_rcu);
156} 172}
157 173
158void fib_release_info(struct fib_info *fi) 174void fib_release_info(struct fib_info *fi)
@@ -173,7 +189,7 @@ void fib_release_info(struct fib_info *fi)
173 spin_unlock_bh(&fib_info_lock); 189 spin_unlock_bh(&fib_info_lock);
174} 190}
175 191
176static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) 192static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
177{ 193{
178 const struct fib_nh *onh = ofi->fib_nh; 194 const struct fib_nh *onh = ofi->fib_nh;
179 195
@@ -187,7 +203,7 @@ static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *
187#ifdef CONFIG_NET_CLS_ROUTE 203#ifdef CONFIG_NET_CLS_ROUTE
188 nh->nh_tclassid != onh->nh_tclassid || 204 nh->nh_tclassid != onh->nh_tclassid ||
189#endif 205#endif
190 ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) 206 ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
191 return -1; 207 return -1;
192 onh++; 208 onh++;
193 } endfor_nexthops(fi); 209 } endfor_nexthops(fi);
@@ -238,7 +254,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
238 nfi->fib_priority == fi->fib_priority && 254 nfi->fib_priority == fi->fib_priority &&
239 memcmp(nfi->fib_metrics, fi->fib_metrics, 255 memcmp(nfi->fib_metrics, fi->fib_metrics,
240 sizeof(fi->fib_metrics)) == 0 && 256 sizeof(fi->fib_metrics)) == 0 &&
241 ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && 257 ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
242 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) 258 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
243 return fi; 259 return fi;
244 } 260 }
@@ -247,9 +263,8 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
247} 263}
248 264
249/* Check, that the gateway is already configured. 265/* Check, that the gateway is already configured.
250 Used only by redirect accept routine. 266 * Used only by redirect accept routine.
251 */ 267 */
252
253int ip_fib_check_default(__be32 gw, struct net_device *dev) 268int ip_fib_check_default(__be32 gw, struct net_device *dev)
254{ 269{
255 struct hlist_head *head; 270 struct hlist_head *head;
@@ -264,7 +279,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev)
264 hlist_for_each_entry(nh, node, head, nh_hash) { 279 hlist_for_each_entry(nh, node, head, nh_hash) {
265 if (nh->nh_dev == dev && 280 if (nh->nh_dev == dev &&
266 nh->nh_gw == gw && 281 nh->nh_gw == gw &&
267 !(nh->nh_flags&RTNH_F_DEAD)) { 282 !(nh->nh_flags & RTNH_F_DEAD)) {
268 spin_unlock(&fib_info_lock); 283 spin_unlock(&fib_info_lock);
269 return 0; 284 return 0;
270 } 285 }
@@ -362,10 +377,10 @@ int fib_detect_death(struct fib_info *fi, int order,
362 } 377 }
363 if (state == NUD_REACHABLE) 378 if (state == NUD_REACHABLE)
364 return 0; 379 return 0;
365 if ((state&NUD_VALID) && order != dflt) 380 if ((state & NUD_VALID) && order != dflt)
366 return 0; 381 return 0;
367 if ((state&NUD_VALID) || 382 if ((state & NUD_VALID) ||
368 (*last_idx<0 && order > dflt)) { 383 (*last_idx < 0 && order > dflt)) {
369 *last_resort = fi; 384 *last_resort = fi;
370 *last_idx = order; 385 *last_idx = order;
371 } 386 }
@@ -476,75 +491,76 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
476 491
477 492
478/* 493/*
479 Picture 494 * Picture
480 ------- 495 * -------
481 496 *
482 Semantics of nexthop is very messy by historical reasons. 497 * Semantics of nexthop is very messy by historical reasons.
483 We have to take into account, that: 498 * We have to take into account, that:
484 a) gateway can be actually local interface address, 499 * a) gateway can be actually local interface address,
485 so that gatewayed route is direct. 500 * so that gatewayed route is direct.
486 b) gateway must be on-link address, possibly 501 * b) gateway must be on-link address, possibly
487 described not by an ifaddr, but also by a direct route. 502 * described not by an ifaddr, but also by a direct route.
488 c) If both gateway and interface are specified, they should not 503 * c) If both gateway and interface are specified, they should not
489 contradict. 504 * contradict.
490 d) If we use tunnel routes, gateway could be not on-link. 505 * d) If we use tunnel routes, gateway could be not on-link.
491 506 *
492 Attempt to reconcile all of these (alas, self-contradictory) conditions 507 * Attempt to reconcile all of these (alas, self-contradictory) conditions
493 results in pretty ugly and hairy code with obscure logic. 508 * results in pretty ugly and hairy code with obscure logic.
494 509 *
495 I chose to generalized it instead, so that the size 510 * I chose to generalized it instead, so that the size
496 of code does not increase practically, but it becomes 511 * of code does not increase practically, but it becomes
497 much more general. 512 * much more general.
498 Every prefix is assigned a "scope" value: "host" is local address, 513 * Every prefix is assigned a "scope" value: "host" is local address,
499 "link" is direct route, 514 * "link" is direct route,
500 [ ... "site" ... "interior" ... ] 515 * [ ... "site" ... "interior" ... ]
501 and "universe" is true gateway route with global meaning. 516 * and "universe" is true gateway route with global meaning.
502 517 *
503 Every prefix refers to a set of "nexthop"s (gw, oif), 518 * Every prefix refers to a set of "nexthop"s (gw, oif),
504 where gw must have narrower scope. This recursion stops 519 * where gw must have narrower scope. This recursion stops
505 when gw has LOCAL scope or if "nexthop" is declared ONLINK, 520 * when gw has LOCAL scope or if "nexthop" is declared ONLINK,
506 which means that gw is forced to be on link. 521 * which means that gw is forced to be on link.
507 522 *
508 Code is still hairy, but now it is apparently logically 523 * Code is still hairy, but now it is apparently logically
509 consistent and very flexible. F.e. as by-product it allows 524 * consistent and very flexible. F.e. as by-product it allows
510 to co-exists in peace independent exterior and interior 525 * to co-exists in peace independent exterior and interior
511 routing processes. 526 * routing processes.
512 527 *
513 Normally it looks as following. 528 * Normally it looks as following.
514 529 *
515 {universe prefix} -> (gw, oif) [scope link] 530 * {universe prefix} -> (gw, oif) [scope link]
516 | 531 * |
517 |-> {link prefix} -> (gw, oif) [scope local] 532 * |-> {link prefix} -> (gw, oif) [scope local]
518 | 533 * |
519 |-> {local prefix} (terminal node) 534 * |-> {local prefix} (terminal node)
520 */ 535 */
521
522static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, 536static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
523 struct fib_nh *nh) 537 struct fib_nh *nh)
524{ 538{
525 int err; 539 int err;
526 struct net *net; 540 struct net *net;
541 struct net_device *dev;
527 542
528 net = cfg->fc_nlinfo.nl_net; 543 net = cfg->fc_nlinfo.nl_net;
529 if (nh->nh_gw) { 544 if (nh->nh_gw) {
530 struct fib_result res; 545 struct fib_result res;
531 546
532 if (nh->nh_flags&RTNH_F_ONLINK) { 547 if (nh->nh_flags & RTNH_F_ONLINK) {
533 struct net_device *dev;
534 548
535 if (cfg->fc_scope >= RT_SCOPE_LINK) 549 if (cfg->fc_scope >= RT_SCOPE_LINK)
536 return -EINVAL; 550 return -EINVAL;
537 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) 551 if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
538 return -EINVAL; 552 return -EINVAL;
539 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL) 553 dev = __dev_get_by_index(net, nh->nh_oif);
554 if (!dev)
540 return -ENODEV; 555 return -ENODEV;
541 if (!(dev->flags&IFF_UP)) 556 if (!(dev->flags & IFF_UP))
542 return -ENETDOWN; 557 return -ENETDOWN;
543 nh->nh_dev = dev; 558 nh->nh_dev = dev;
544 dev_hold(dev); 559 dev_hold(dev);
545 nh->nh_scope = RT_SCOPE_LINK; 560 nh->nh_scope = RT_SCOPE_LINK;
546 return 0; 561 return 0;
547 } 562 }
563 rcu_read_lock();
548 { 564 {
549 struct flowi fl = { 565 struct flowi fl = {
550 .nl_u = { 566 .nl_u = {
@@ -559,50 +575,53 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
559 /* It is not necessary, but requires a bit of thinking */ 575 /* It is not necessary, but requires a bit of thinking */
560 if (fl.fl4_scope < RT_SCOPE_LINK) 576 if (fl.fl4_scope < RT_SCOPE_LINK)
561 fl.fl4_scope = RT_SCOPE_LINK; 577 fl.fl4_scope = RT_SCOPE_LINK;
562 if ((err = fib_lookup(net, &fl, &res)) != 0) 578 err = fib_lookup(net, &fl, &res);
579 if (err) {
580 rcu_read_unlock();
563 return err; 581 return err;
582 }
564 } 583 }
565 err = -EINVAL; 584 err = -EINVAL;
566 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) 585 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
567 goto out; 586 goto out;
568 nh->nh_scope = res.scope; 587 nh->nh_scope = res.scope;
569 nh->nh_oif = FIB_RES_OIF(res); 588 nh->nh_oif = FIB_RES_OIF(res);
570 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) 589 nh->nh_dev = dev = FIB_RES_DEV(res);
590 if (!dev)
571 goto out; 591 goto out;
572 dev_hold(nh->nh_dev); 592 dev_hold(dev);
573 err = -ENETDOWN; 593 err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
574 if (!(nh->nh_dev->flags & IFF_UP))
575 goto out;
576 err = 0;
577out:
578 fib_res_put(&res);
579 return err;
580 } else { 594 } else {
581 struct in_device *in_dev; 595 struct in_device *in_dev;
582 596
583 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) 597 if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK))
584 return -EINVAL; 598 return -EINVAL;
585 599
600 rcu_read_lock();
601 err = -ENODEV;
586 in_dev = inetdev_by_index(net, nh->nh_oif); 602 in_dev = inetdev_by_index(net, nh->nh_oif);
587 if (in_dev == NULL) 603 if (in_dev == NULL)
588 return -ENODEV; 604 goto out;
589 if (!(in_dev->dev->flags&IFF_UP)) { 605 err = -ENETDOWN;
590 in_dev_put(in_dev); 606 if (!(in_dev->dev->flags & IFF_UP))
591 return -ENETDOWN; 607 goto out;
592 }
593 nh->nh_dev = in_dev->dev; 608 nh->nh_dev = in_dev->dev;
594 dev_hold(nh->nh_dev); 609 dev_hold(nh->nh_dev);
595 nh->nh_scope = RT_SCOPE_HOST; 610 nh->nh_scope = RT_SCOPE_HOST;
596 in_dev_put(in_dev); 611 err = 0;
597 } 612 }
598 return 0; 613out:
614 rcu_read_unlock();
615 return err;
599} 616}
600 617
601static inline unsigned int fib_laddr_hashfn(__be32 val) 618static inline unsigned int fib_laddr_hashfn(__be32 val)
602{ 619{
603 unsigned int mask = (fib_hash_size - 1); 620 unsigned int mask = (fib_hash_size - 1);
604 621
605 return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; 622 return ((__force u32)val ^
623 ((__force u32)val >> 7) ^
624 ((__force u32)val >> 14)) & mask;
606} 625}
607 626
608static struct hlist_head *fib_hash_alloc(int bytes) 627static struct hlist_head *fib_hash_alloc(int bytes)
@@ -611,7 +630,8 @@ static struct hlist_head *fib_hash_alloc(int bytes)
611 return kzalloc(bytes, GFP_KERNEL); 630 return kzalloc(bytes, GFP_KERNEL);
612 else 631 else
613 return (struct hlist_head *) 632 return (struct hlist_head *)
614 __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes)); 633 __get_free_pages(GFP_KERNEL | __GFP_ZERO,
634 get_order(bytes));
615} 635}
616 636
617static void fib_hash_free(struct hlist_head *hash, int bytes) 637static void fib_hash_free(struct hlist_head *hash, int bytes)
@@ -806,7 +826,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
806 goto failure; 826 goto failure;
807 } else { 827 } else {
808 change_nexthops(fi) { 828 change_nexthops(fi) {
809 if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0) 829 err = fib_check_nh(cfg, fi, nexthop_nh);
830 if (err != 0)
810 goto failure; 831 goto failure;
811 } endfor_nexthops(fi) 832 } endfor_nexthops(fi)
812 } 833 }
@@ -819,7 +840,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
819 } 840 }
820 841
821link_it: 842link_it:
822 if ((ofi = fib_find_info(fi)) != NULL) { 843 ofi = fib_find_info(fi);
844 if (ofi) {
823 fi->fib_dead = 1; 845 fi->fib_dead = 1;
824 free_fib_info(fi); 846 free_fib_info(fi);
825 ofi->fib_treeref++; 847 ofi->fib_treeref++;
@@ -864,7 +886,7 @@ failure:
864 886
865/* Note! fib_semantic_match intentionally uses RCU list functions. */ 887/* Note! fib_semantic_match intentionally uses RCU list functions. */
866int fib_semantic_match(struct list_head *head, const struct flowi *flp, 888int fib_semantic_match(struct list_head *head, const struct flowi *flp,
867 struct fib_result *res, int prefixlen) 889 struct fib_result *res, int prefixlen, int fib_flags)
868{ 890{
869 struct fib_alias *fa; 891 struct fib_alias *fa;
870 int nh_sel = 0; 892 int nh_sel = 0;
@@ -879,7 +901,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
879 if (fa->fa_scope < flp->fl4_scope) 901 if (fa->fa_scope < flp->fl4_scope)
880 continue; 902 continue;
881 903
882 fa->fa_state |= FA_S_ACCESSED; 904 fib_alias_accessed(fa);
883 905
884 err = fib_props[fa->fa_type].error; 906 err = fib_props[fa->fa_type].error;
885 if (err == 0) { 907 if (err == 0) {
@@ -895,7 +917,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
895 case RTN_ANYCAST: 917 case RTN_ANYCAST:
896 case RTN_MULTICAST: 918 case RTN_MULTICAST:
897 for_nexthops(fi) { 919 for_nexthops(fi) {
898 if (nh->nh_flags&RTNH_F_DEAD) 920 if (nh->nh_flags & RTNH_F_DEAD)
899 continue; 921 continue;
900 if (!flp->oif || flp->oif == nh->nh_oif) 922 if (!flp->oif || flp->oif == nh->nh_oif)
901 break; 923 break;
@@ -906,16 +928,15 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp,
906 goto out_fill_res; 928 goto out_fill_res;
907 } 929 }
908#else 930#else
909 if (nhsel < 1) { 931 if (nhsel < 1)
910 goto out_fill_res; 932 goto out_fill_res;
911 }
912#endif 933#endif
913 endfor_nexthops(fi); 934 endfor_nexthops(fi);
914 continue; 935 continue;
915 936
916 default: 937 default:
917 printk(KERN_WARNING "fib_semantic_match bad type %#x\n", 938 pr_warning("fib_semantic_match bad type %#x\n",
918 fa->fa_type); 939 fa->fa_type);
919 return -EINVAL; 940 return -EINVAL;
920 } 941 }
921 } 942 }
@@ -929,7 +950,8 @@ out_fill_res:
929 res->type = fa->fa_type; 950 res->type = fa->fa_type;
930 res->scope = fa->fa_scope; 951 res->scope = fa->fa_scope;
931 res->fi = fa->fa_info; 952 res->fi = fa->fa_info;
932 atomic_inc(&res->fi->fib_clntref); 953 if (!(fib_flags & FIB_LOOKUP_NOREF))
954 atomic_inc(&res->fi->fib_clntref);
933 return 0; 955 return 0;
934} 956}
935 957
@@ -1028,10 +1050,10 @@ nla_put_failure:
1028} 1050}
1029 1051
1030/* 1052/*
1031 Update FIB if: 1053 * Update FIB if:
1032 - local address disappeared -> we must delete all the entries 1054 * - local address disappeared -> we must delete all the entries
1033 referring to it. 1055 * referring to it.
1034 - device went down -> we must shutdown all nexthops going via it. 1056 * - device went down -> we must shutdown all nexthops going via it.
1035 */ 1057 */
1036int fib_sync_down_addr(struct net *net, __be32 local) 1058int fib_sync_down_addr(struct net *net, __be32 local)
1037{ 1059{
@@ -1078,7 +1100,7 @@ int fib_sync_down_dev(struct net_device *dev, int force)
1078 prev_fi = fi; 1100 prev_fi = fi;
1079 dead = 0; 1101 dead = 0;
1080 change_nexthops(fi) { 1102 change_nexthops(fi) {
1081 if (nexthop_nh->nh_flags&RTNH_F_DEAD) 1103 if (nexthop_nh->nh_flags & RTNH_F_DEAD)
1082 dead++; 1104 dead++;
1083 else if (nexthop_nh->nh_dev == dev && 1105 else if (nexthop_nh->nh_dev == dev &&
1084 nexthop_nh->nh_scope != scope) { 1106 nexthop_nh->nh_scope != scope) {
@@ -1110,10 +1132,9 @@ int fib_sync_down_dev(struct net_device *dev, int force)
1110#ifdef CONFIG_IP_ROUTE_MULTIPATH 1132#ifdef CONFIG_IP_ROUTE_MULTIPATH
1111 1133
1112/* 1134/*
1113 Dead device goes up. We wake up dead nexthops. 1135 * Dead device goes up. We wake up dead nexthops.
1114 It takes sense only on multipath routes. 1136 * It takes sense only on multipath routes.
1115 */ 1137 */
1116
1117int fib_sync_up(struct net_device *dev) 1138int fib_sync_up(struct net_device *dev)
1118{ 1139{
1119 struct fib_info *prev_fi; 1140 struct fib_info *prev_fi;
@@ -1123,7 +1144,7 @@ int fib_sync_up(struct net_device *dev)
1123 struct fib_nh *nh; 1144 struct fib_nh *nh;
1124 int ret; 1145 int ret;
1125 1146
1126 if (!(dev->flags&IFF_UP)) 1147 if (!(dev->flags & IFF_UP))
1127 return 0; 1148 return 0;
1128 1149
1129 prev_fi = NULL; 1150 prev_fi = NULL;
@@ -1142,12 +1163,12 @@ int fib_sync_up(struct net_device *dev)
1142 prev_fi = fi; 1163 prev_fi = fi;
1143 alive = 0; 1164 alive = 0;
1144 change_nexthops(fi) { 1165 change_nexthops(fi) {
1145 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { 1166 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
1146 alive++; 1167 alive++;
1147 continue; 1168 continue;
1148 } 1169 }
1149 if (nexthop_nh->nh_dev == NULL || 1170 if (nexthop_nh->nh_dev == NULL ||
1150 !(nexthop_nh->nh_dev->flags&IFF_UP)) 1171 !(nexthop_nh->nh_dev->flags & IFF_UP))
1151 continue; 1172 continue;
1152 if (nexthop_nh->nh_dev != dev || 1173 if (nexthop_nh->nh_dev != dev ||
1153 !__in_dev_get_rtnl(dev)) 1174 !__in_dev_get_rtnl(dev))
@@ -1169,10 +1190,9 @@ int fib_sync_up(struct net_device *dev)
1169} 1190}
1170 1191
1171/* 1192/*
1172 The algorithm is suboptimal, but it provides really 1193 * The algorithm is suboptimal, but it provides really
1173 fair weighted route distribution. 1194 * fair weighted route distribution.
1174 */ 1195 */
1175
1176void fib_select_multipath(const struct flowi *flp, struct fib_result *res) 1196void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1177{ 1197{
1178 struct fib_info *fi = res->fi; 1198 struct fib_info *fi = res->fi;
@@ -1182,7 +1202,7 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1182 if (fi->fib_power <= 0) { 1202 if (fi->fib_power <= 0) {
1183 int power = 0; 1203 int power = 0;
1184 change_nexthops(fi) { 1204 change_nexthops(fi) {
1185 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { 1205 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
1186 power += nexthop_nh->nh_weight; 1206 power += nexthop_nh->nh_weight;
1187 nexthop_nh->nh_power = nexthop_nh->nh_weight; 1207 nexthop_nh->nh_power = nexthop_nh->nh_weight;
1188 } 1208 }
@@ -1198,15 +1218,16 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1198 1218
1199 1219
1200 /* w should be random number [0..fi->fib_power-1], 1220 /* w should be random number [0..fi->fib_power-1],
1201 it is pretty bad approximation. 1221 * it is pretty bad approximation.
1202 */ 1222 */
1203 1223
1204 w = jiffies % fi->fib_power; 1224 w = jiffies % fi->fib_power;
1205 1225
1206 change_nexthops(fi) { 1226 change_nexthops(fi) {
1207 if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) && 1227 if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) &&
1208 nexthop_nh->nh_power) { 1228 nexthop_nh->nh_power) {
1209 if ((w -= nexthop_nh->nh_power) <= 0) { 1229 w -= nexthop_nh->nh_power;
1230 if (w <= 0) {
1210 nexthop_nh->nh_power--; 1231 nexthop_nh->nh_power--;
1211 fi->fib_power--; 1232 fi->fib_power--;
1212 res->nh_sel = nhsel; 1233 res->nh_sel = nhsel;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 4a8e370862bc..cd5e13aee7d5 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -186,9 +186,7 @@ static inline struct tnode *node_parent_rcu(struct node *node)
186{ 186{
187 struct tnode *ret = node_parent(node); 187 struct tnode *ret = node_parent(node);
188 188
189 return rcu_dereference_check(ret, 189 return rcu_dereference_rtnl(ret);
190 rcu_read_lock_held() ||
191 lockdep_rtnl_is_held());
192} 190}
193 191
194/* Same as rcu_assign_pointer 192/* Same as rcu_assign_pointer
@@ -211,9 +209,7 @@ static inline struct node *tnode_get_child_rcu(struct tnode *tn, unsigned int i)
211{ 209{
212 struct node *ret = tnode_get_child(tn, i); 210 struct node *ret = tnode_get_child(tn, i);
213 211
214 return rcu_dereference_check(ret, 212 return rcu_dereference_rtnl(ret);
215 rcu_read_lock_held() ||
216 lockdep_rtnl_is_held());
217} 213}
218 214
219static inline int tnode_child_length(const struct tnode *tn) 215static inline int tnode_child_length(const struct tnode *tn)
@@ -459,8 +455,8 @@ static struct tnode *tnode_new(t_key key, int pos, int bits)
459 tn->empty_children = 1<<bits; 455 tn->empty_children = 1<<bits;
460 } 456 }
461 457
462 pr_debug("AT %p s=%u %lu\n", tn, (unsigned int) sizeof(struct tnode), 458 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
463 (unsigned long) (sizeof(struct node) << bits)); 459 sizeof(struct node) << bits);
464 return tn; 460 return tn;
465} 461}
466 462
@@ -609,11 +605,10 @@ static struct node *resize(struct trie *t, struct tnode *tn)
609 605
610 /* Keep root node larger */ 606 /* Keep root node larger */
611 607
612 if (!node_parent((struct node*) tn)) { 608 if (!node_parent((struct node *)tn)) {
613 inflate_threshold_use = inflate_threshold_root; 609 inflate_threshold_use = inflate_threshold_root;
614 halve_threshold_use = halve_threshold_root; 610 halve_threshold_use = halve_threshold_root;
615 } 611 } else {
616 else {
617 inflate_threshold_use = inflate_threshold; 612 inflate_threshold_use = inflate_threshold;
618 halve_threshold_use = halve_threshold; 613 halve_threshold_use = halve_threshold;
619 } 614 }
@@ -639,7 +634,7 @@ static struct node *resize(struct trie *t, struct tnode *tn)
639 check_tnode(tn); 634 check_tnode(tn);
640 635
641 /* Return if at least one inflate is run */ 636 /* Return if at least one inflate is run */
642 if( max_work != MAX_WORK) 637 if (max_work != MAX_WORK)
643 return (struct node *) tn; 638 return (struct node *) tn;
644 639
645 /* 640 /*
@@ -966,9 +961,7 @@ fib_find_node(struct trie *t, u32 key)
966 struct node *n; 961 struct node *n;
967 962
968 pos = 0; 963 pos = 0;
969 n = rcu_dereference_check(t->trie, 964 n = rcu_dereference_rtnl(t->trie);
970 rcu_read_lock_held() ||
971 lockdep_rtnl_is_held());
972 965
973 while (n != NULL && NODE_TYPE(n) == T_TNODE) { 966 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
974 tn = (struct tnode *) n; 967 tn = (struct tnode *) n;
@@ -1349,7 +1342,7 @@ err:
1349/* should be called with rcu_read_lock */ 1342/* should be called with rcu_read_lock */
1350static int check_leaf(struct trie *t, struct leaf *l, 1343static int check_leaf(struct trie *t, struct leaf *l,
1351 t_key key, const struct flowi *flp, 1344 t_key key, const struct flowi *flp,
1352 struct fib_result *res) 1345 struct fib_result *res, int fib_flags)
1353{ 1346{
1354 struct leaf_info *li; 1347 struct leaf_info *li;
1355 struct hlist_head *hhead = &l->list; 1348 struct hlist_head *hhead = &l->list;
@@ -1363,7 +1356,7 @@ static int check_leaf(struct trie *t, struct leaf *l,
1363 if (l->key != (key & ntohl(mask))) 1356 if (l->key != (key & ntohl(mask)))
1364 continue; 1357 continue;
1365 1358
1366 err = fib_semantic_match(&li->falh, flp, res, plen); 1359 err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags);
1367 1360
1368#ifdef CONFIG_IP_FIB_TRIE_STATS 1361#ifdef CONFIG_IP_FIB_TRIE_STATS
1369 if (err <= 0) 1362 if (err <= 0)
@@ -1379,7 +1372,7 @@ static int check_leaf(struct trie *t, struct leaf *l,
1379} 1372}
1380 1373
1381int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, 1374int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1382 struct fib_result *res) 1375 struct fib_result *res, int fib_flags)
1383{ 1376{
1384 struct trie *t = (struct trie *) tb->tb_data; 1377 struct trie *t = (struct trie *) tb->tb_data;
1385 int ret; 1378 int ret;
@@ -1391,8 +1384,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1391 t_key cindex = 0; 1384 t_key cindex = 0;
1392 int current_prefix_length = KEYLENGTH; 1385 int current_prefix_length = KEYLENGTH;
1393 struct tnode *cn; 1386 struct tnode *cn;
1394 t_key node_prefix, key_prefix, pref_mismatch; 1387 t_key pref_mismatch;
1395 int mp;
1396 1388
1397 rcu_read_lock(); 1389 rcu_read_lock();
1398 1390
@@ -1406,7 +1398,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1406 1398
1407 /* Just a leaf? */ 1399 /* Just a leaf? */
1408 if (IS_LEAF(n)) { 1400 if (IS_LEAF(n)) {
1409 ret = check_leaf(t, (struct leaf *)n, key, flp, res); 1401 ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags);
1410 goto found; 1402 goto found;
1411 } 1403 }
1412 1404
@@ -1431,7 +1423,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1431 } 1423 }
1432 1424
1433 if (IS_LEAF(n)) { 1425 if (IS_LEAF(n)) {
1434 ret = check_leaf(t, (struct leaf *)n, key, flp, res); 1426 ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags);
1435 if (ret > 0) 1427 if (ret > 0)
1436 goto backtrace; 1428 goto backtrace;
1437 goto found; 1429 goto found;
@@ -1507,10 +1499,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1507 * matching prefix. 1499 * matching prefix.
1508 */ 1500 */
1509 1501
1510 node_prefix = mask_pfx(cn->key, cn->pos); 1502 pref_mismatch = mask_pfx(cn->key ^ key, cn->pos);
1511 key_prefix = mask_pfx(key, cn->pos);
1512 pref_mismatch = key_prefix^node_prefix;
1513 mp = 0;
1514 1503
1515 /* 1504 /*
1516 * In short: If skipped bits in this node do not match 1505 * In short: If skipped bits in this node do not match
@@ -1518,13 +1507,9 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp,
1518 * state.directly. 1507 * state.directly.
1519 */ 1508 */
1520 if (pref_mismatch) { 1509 if (pref_mismatch) {
1521 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) { 1510 int mp = KEYLENGTH - fls(pref_mismatch);
1522 mp++;
1523 pref_mismatch = pref_mismatch << 1;
1524 }
1525 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1526 1511
1527 if (key_prefix != 0) 1512 if (tkey_extract_bits(cn->key, mp, cn->pos - mp) != 0)
1528 goto backtrace; 1513 goto backtrace;
1529 1514
1530 if (current_prefix_length >= cn->pos) 1515 if (current_prefix_length >= cn->pos)
@@ -1748,16 +1733,14 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct node *c)
1748 1733
1749 /* Node empty, walk back up to parent */ 1734 /* Node empty, walk back up to parent */
1750 c = (struct node *) p; 1735 c = (struct node *) p;
1751 } while ( (p = node_parent_rcu(c)) != NULL); 1736 } while ((p = node_parent_rcu(c)) != NULL);
1752 1737
1753 return NULL; /* Root of trie */ 1738 return NULL; /* Root of trie */
1754} 1739}
1755 1740
1756static struct leaf *trie_firstleaf(struct trie *t) 1741static struct leaf *trie_firstleaf(struct trie *t)
1757{ 1742{
1758 struct tnode *n = (struct tnode *) rcu_dereference_check(t->trie, 1743 struct tnode *n = (struct tnode *)rcu_dereference_rtnl(t->trie);
1759 rcu_read_lock_held() ||
1760 lockdep_rtnl_is_held());
1761 1744
1762 if (!n) 1745 if (!n)
1763 return NULL; 1746 return NULL;
@@ -1855,7 +1838,8 @@ void fib_table_select_default(struct fib_table *tb,
1855 if (!next_fi->fib_nh[0].nh_gw || 1838 if (!next_fi->fib_nh[0].nh_gw ||
1856 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) 1839 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1857 continue; 1840 continue;
1858 fa->fa_state |= FA_S_ACCESSED; 1841
1842 fib_alias_accessed(fa);
1859 1843
1860 if (fi == NULL) { 1844 if (fi == NULL) {
1861 if (next_fi != res->fi) 1845 if (next_fi != res->fi)
@@ -2043,14 +2027,14 @@ struct fib_trie_iter {
2043 struct seq_net_private p; 2027 struct seq_net_private p;
2044 struct fib_table *tb; 2028 struct fib_table *tb;
2045 struct tnode *tnode; 2029 struct tnode *tnode;
2046 unsigned index; 2030 unsigned int index;
2047 unsigned depth; 2031 unsigned int depth;
2048}; 2032};
2049 2033
2050static struct node *fib_trie_get_next(struct fib_trie_iter *iter) 2034static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
2051{ 2035{
2052 struct tnode *tn = iter->tnode; 2036 struct tnode *tn = iter->tnode;
2053 unsigned cindex = iter->index; 2037 unsigned int cindex = iter->index;
2054 struct tnode *p; 2038 struct tnode *p;
2055 2039
2056 /* A single entry routing table */ 2040 /* A single entry routing table */
@@ -2159,7 +2143,7 @@ static void trie_collect_stats(struct trie *t, struct trie_stat *s)
2159 */ 2143 */
2160static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) 2144static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
2161{ 2145{
2162 unsigned i, max, pointers, bytes, avdepth; 2146 unsigned int i, max, pointers, bytes, avdepth;
2163 2147
2164 if (stat->leaves) 2148 if (stat->leaves)
2165 avdepth = stat->totdepth*100 / stat->leaves; 2149 avdepth = stat->totdepth*100 / stat->leaves;
@@ -2356,7 +2340,8 @@ static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2356 2340
2357static void seq_indent(struct seq_file *seq, int n) 2341static void seq_indent(struct seq_file *seq, int n)
2358{ 2342{
2359 while (n-- > 0) seq_puts(seq, " "); 2343 while (n-- > 0)
2344 seq_puts(seq, " ");
2360} 2345}
2361 2346
2362static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) 2347static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
@@ -2388,7 +2373,7 @@ static const char *const rtn_type_names[__RTN_MAX] = {
2388 [RTN_XRESOLVE] = "XRESOLVE", 2373 [RTN_XRESOLVE] = "XRESOLVE",
2389}; 2374};
2390 2375
2391static inline const char *rtn_type(char *buf, size_t len, unsigned t) 2376static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
2392{ 2377{
2393 if (t < __RTN_MAX && rtn_type_names[t]) 2378 if (t < __RTN_MAX && rtn_type_names[t])
2394 return rtn_type_names[t]; 2379 return rtn_type_names[t];
@@ -2544,13 +2529,12 @@ static void fib_route_seq_stop(struct seq_file *seq, void *v)
2544 rcu_read_unlock(); 2529 rcu_read_unlock();
2545} 2530}
2546 2531
2547static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi) 2532static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2548{ 2533{
2549 static unsigned type2flags[RTN_MAX + 1] = { 2534 unsigned int flags = 0;
2550 [7] = RTF_REJECT, [8] = RTF_REJECT,
2551 };
2552 unsigned flags = type2flags[type];
2553 2535
2536 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2537 flags = RTF_REJECT;
2554 if (fi && fi->fib_nh->nh_gw) 2538 if (fi && fi->fib_nh->nh_gw)
2555 flags |= RTF_GATEWAY; 2539 flags |= RTF_GATEWAY;
2556 if (mask == htonl(0xFFFFFFFF)) 2540 if (mask == htonl(0xFFFFFFFF))
@@ -2562,7 +2546,7 @@ static unsigned fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
2562/* 2546/*
2563 * This outputs /proc/net/route. 2547 * This outputs /proc/net/route.
2564 * The format of the file is not supposed to be changed 2548 * The format of the file is not supposed to be changed
2565 * and needs to be same as fib_hash output to avoid breaking 2549 * and needs to be same as fib_hash output to avoid breaking
2566 * legacy utilities 2550 * legacy utilities
2567 */ 2551 */
2568static int fib_route_seq_show(struct seq_file *seq, void *v) 2552static int fib_route_seq_show(struct seq_file *seq, void *v)
@@ -2587,7 +2571,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
2587 2571
2588 list_for_each_entry_rcu(fa, &li->falh, fa_list) { 2572 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2589 const struct fib_info *fi = fa->fa_info; 2573 const struct fib_info *fi = fa->fa_info;
2590 unsigned flags = fib_flag_trans(fa->fa_type, mask, fi); 2574 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
2591 int len; 2575 int len;
2592 2576
2593 if (fa->fa_type == RTN_BROADCAST 2577 if (fa->fa_type == RTN_BROADCAST
diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c
new file mode 100644
index 000000000000..caea6885fdbd
--- /dev/null
+++ b/net/ipv4/gre.c
@@ -0,0 +1,151 @@
1/*
2 * GRE over IPv4 demultiplexer driver
3 *
4 * Authors: Dmitry Kozlov (xeb@mail.ru)
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <linux/module.h>
14#include <linux/kernel.h>
15#include <linux/kmod.h>
16#include <linux/skbuff.h>
17#include <linux/in.h>
18#include <linux/netdevice.h>
19#include <linux/version.h>
20#include <linux/spinlock.h>
21#include <net/protocol.h>
22#include <net/gre.h>
23
24
25static const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly;
26static DEFINE_SPINLOCK(gre_proto_lock);
27
28int gre_add_protocol(const struct gre_protocol *proto, u8 version)
29{
30 if (version >= GREPROTO_MAX)
31 goto err_out;
32
33 spin_lock(&gre_proto_lock);
34 if (gre_proto[version])
35 goto err_out_unlock;
36
37 rcu_assign_pointer(gre_proto[version], proto);
38 spin_unlock(&gre_proto_lock);
39 return 0;
40
41err_out_unlock:
42 spin_unlock(&gre_proto_lock);
43err_out:
44 return -1;
45}
46EXPORT_SYMBOL_GPL(gre_add_protocol);
47
48int gre_del_protocol(const struct gre_protocol *proto, u8 version)
49{
50 if (version >= GREPROTO_MAX)
51 goto err_out;
52
53 spin_lock(&gre_proto_lock);
54 if (gre_proto[version] != proto)
55 goto err_out_unlock;
56 rcu_assign_pointer(gre_proto[version], NULL);
57 spin_unlock(&gre_proto_lock);
58 synchronize_rcu();
59 return 0;
60
61err_out_unlock:
62 spin_unlock(&gre_proto_lock);
63err_out:
64 return -1;
65}
66EXPORT_SYMBOL_GPL(gre_del_protocol);
67
68static int gre_rcv(struct sk_buff *skb)
69{
70 const struct gre_protocol *proto;
71 u8 ver;
72 int ret;
73
74 if (!pskb_may_pull(skb, 12))
75 goto drop;
76
77 ver = skb->data[1]&0x7f;
78 if (ver >= GREPROTO_MAX)
79 goto drop;
80
81 rcu_read_lock();
82 proto = rcu_dereference(gre_proto[ver]);
83 if (!proto || !proto->handler)
84 goto drop_unlock;
85 ret = proto->handler(skb);
86 rcu_read_unlock();
87 return ret;
88
89drop_unlock:
90 rcu_read_unlock();
91drop:
92 kfree_skb(skb);
93 return NET_RX_DROP;
94}
95
96static void gre_err(struct sk_buff *skb, u32 info)
97{
98 const struct gre_protocol *proto;
99 u8 ver;
100
101 if (!pskb_may_pull(skb, 12))
102 goto drop;
103
104 ver = skb->data[1]&0x7f;
105 if (ver >= GREPROTO_MAX)
106 goto drop;
107
108 rcu_read_lock();
109 proto = rcu_dereference(gre_proto[ver]);
110 if (!proto || !proto->err_handler)
111 goto drop_unlock;
112 proto->err_handler(skb, info);
113 rcu_read_unlock();
114 return;
115
116drop_unlock:
117 rcu_read_unlock();
118drop:
119 kfree_skb(skb);
120}
121
122static const struct net_protocol net_gre_protocol = {
123 .handler = gre_rcv,
124 .err_handler = gre_err,
125 .netns_ok = 1,
126};
127
128static int __init gre_init(void)
129{
130 pr_info("GRE over IPv4 demultiplexor driver");
131
132 if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
133 pr_err("gre: can't add protocol\n");
134 return -EAGAIN;
135 }
136
137 return 0;
138}
139
140static void __exit gre_exit(void)
141{
142 inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
143}
144
145module_init(gre_init);
146module_exit(gre_exit);
147
148MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver");
149MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
150MODULE_LICENSE("GPL");
151
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index a0d847c7cba5..96bc7f9475a3 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -379,7 +379,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
379 inet->tos = ip_hdr(skb)->tos; 379 inet->tos = ip_hdr(skb)->tos;
380 daddr = ipc.addr = rt->rt_src; 380 daddr = ipc.addr = rt->rt_src;
381 ipc.opt = NULL; 381 ipc.opt = NULL;
382 ipc.shtx.flags = 0; 382 ipc.tx_flags = 0;
383 if (icmp_param->replyopts.optlen) { 383 if (icmp_param->replyopts.optlen) {
384 ipc.opt = &icmp_param->replyopts; 384 ipc.opt = &icmp_param->replyopts;
385 if (ipc.opt->srr) 385 if (ipc.opt->srr)
@@ -538,7 +538,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
538 inet_sk(sk)->tos = tos; 538 inet_sk(sk)->tos = tos;
539 ipc.addr = iph->saddr; 539 ipc.addr = iph->saddr;
540 ipc.opt = &icmp_param.replyopts; 540 ipc.opt = &icmp_param.replyopts;
541 ipc.shtx.flags = 0; 541 ipc.tx_flags = 0;
542 542
543 { 543 {
544 struct flowi fl = { 544 struct flowi fl = {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 2a4bb76f2132..c8877c6c7216 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1269,14 +1269,14 @@ void ip_mc_rejoin_group(struct ip_mc_list *im)
1269 if (im->multiaddr == IGMP_ALL_HOSTS) 1269 if (im->multiaddr == IGMP_ALL_HOSTS)
1270 return; 1270 return;
1271 1271
1272 if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) { 1272 /* a failover is happening and switches
1273 igmp_mod_timer(im, IGMP_Initial_Report_Delay); 1273 * must be notified immediately */
1274 return; 1274 if (IGMP_V1_SEEN(in_dev))
1275 } 1275 igmp_send_report(in_dev, im, IGMP_HOST_MEMBERSHIP_REPORT);
1276 /* else, v3 */ 1276 else if (IGMP_V2_SEEN(in_dev))
1277 im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv : 1277 igmp_send_report(in_dev, im, IGMPV2_HOST_MEMBERSHIP_REPORT);
1278 IGMP_Unsolicited_Report_Count; 1278 else
1279 igmp_ifc_event(in_dev); 1279 igmp_send_report(in_dev, im, IGMPV3_HOST_MEMBERSHIP_REPORT);
1280#endif 1280#endif
1281} 1281}
1282EXPORT_SYMBOL(ip_mc_rejoin_group); 1282EXPORT_SYMBOL(ip_mc_rejoin_group);
@@ -1418,6 +1418,7 @@ void ip_mc_destroy_dev(struct in_device *in_dev)
1418 write_unlock_bh(&in_dev->mc_list_lock); 1418 write_unlock_bh(&in_dev->mc_list_lock);
1419} 1419}
1420 1420
1421/* RTNL is locked */
1421static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) 1422static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1422{ 1423{
1423 struct flowi fl = { .nl_u = { .ip4_u = 1424 struct flowi fl = { .nl_u = { .ip4_u =
@@ -1428,15 +1429,12 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
1428 1429
1429 if (imr->imr_ifindex) { 1430 if (imr->imr_ifindex) {
1430 idev = inetdev_by_index(net, imr->imr_ifindex); 1431 idev = inetdev_by_index(net, imr->imr_ifindex);
1431 if (idev)
1432 __in_dev_put(idev);
1433 return idev; 1432 return idev;
1434 } 1433 }
1435 if (imr->imr_address.s_addr) { 1434 if (imr->imr_address.s_addr) {
1436 dev = ip_dev_find(net, imr->imr_address.s_addr); 1435 dev = __ip_dev_find(net, imr->imr_address.s_addr, false);
1437 if (!dev) 1436 if (!dev)
1438 return NULL; 1437 return NULL;
1439 dev_put(dev);
1440 } 1438 }
1441 1439
1442 if (!dev && !ip_route_output_key(net, &rt, &fl)) { 1440 if (!dev && !ip_route_output_key(net, &rt, &fl)) {
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e5fa2ddce320..ba8042665849 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -425,7 +425,7 @@ static int inet_diag_bc_run(const void *bc, int len,
425 bc += op->no; 425 bc += op->no;
426 } 426 }
427 } 427 }
428 return (len == 0); 428 return len == 0;
429} 429}
430 430
431static int valid_cc(const void *bc, int len, int cc) 431static int valid_cc(const void *bc, int len, int cc)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index fb7ad5a21ff3..1b344f30b463 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -101,19 +101,43 @@ void inet_put_port(struct sock *sk)
101} 101}
102EXPORT_SYMBOL(inet_put_port); 102EXPORT_SYMBOL(inet_put_port);
103 103
104void __inet_inherit_port(struct sock *sk, struct sock *child) 104int __inet_inherit_port(struct sock *sk, struct sock *child)
105{ 105{
106 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; 106 struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
107 const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num, 107 unsigned short port = inet_sk(child)->inet_num;
108 const int bhash = inet_bhashfn(sock_net(sk), port,
108 table->bhash_size); 109 table->bhash_size);
109 struct inet_bind_hashbucket *head = &table->bhash[bhash]; 110 struct inet_bind_hashbucket *head = &table->bhash[bhash];
110 struct inet_bind_bucket *tb; 111 struct inet_bind_bucket *tb;
111 112
112 spin_lock(&head->lock); 113 spin_lock(&head->lock);
113 tb = inet_csk(sk)->icsk_bind_hash; 114 tb = inet_csk(sk)->icsk_bind_hash;
115 if (tb->port != port) {
116 /* NOTE: using tproxy and redirecting skbs to a proxy
117 * on a different listener port breaks the assumption
118 * that the listener socket's icsk_bind_hash is the same
119 * as that of the child socket. We have to look up or
120 * create a new bind bucket for the child here. */
121 struct hlist_node *node;
122 inet_bind_bucket_for_each(tb, node, &head->chain) {
123 if (net_eq(ib_net(tb), sock_net(sk)) &&
124 tb->port == port)
125 break;
126 }
127 if (!node) {
128 tb = inet_bind_bucket_create(table->bind_bucket_cachep,
129 sock_net(sk), head, port);
130 if (!tb) {
131 spin_unlock(&head->lock);
132 return -ENOMEM;
133 }
134 }
135 }
114 sk_add_bind_node(child, &tb->owners); 136 sk_add_bind_node(child, &tb->owners);
115 inet_csk(child)->icsk_bind_hash = tb; 137 inet_csk(child)->icsk_bind_hash = tb;
116 spin_unlock(&head->lock); 138 spin_unlock(&head->lock);
139
140 return 0;
117} 141}
118EXPORT_SYMBOL_GPL(__inet_inherit_port); 142EXPORT_SYMBOL_GPL(__inet_inherit_port);
119 143
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b7c41654dde5..168440834ade 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -116,11 +116,11 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a)
116 struct ip4_create_arg *arg = a; 116 struct ip4_create_arg *arg = a;
117 117
118 qp = container_of(q, struct ipq, q); 118 qp = container_of(q, struct ipq, q);
119 return (qp->id == arg->iph->id && 119 return qp->id == arg->iph->id &&
120 qp->saddr == arg->iph->saddr && 120 qp->saddr == arg->iph->saddr &&
121 qp->daddr == arg->iph->daddr && 121 qp->daddr == arg->iph->daddr &&
122 qp->protocol == arg->iph->protocol && 122 qp->protocol == arg->iph->protocol &&
123 qp->user == arg->user); 123 qp->user == arg->user;
124} 124}
125 125
126/* Memory Tracking Functions. */ 126/* Memory Tracking Functions. */
@@ -542,7 +542,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
542 /* If the first fragment is fragmented itself, we split 542 /* If the first fragment is fragmented itself, we split
543 * it to two chunks: the first with data and paged part 543 * it to two chunks: the first with data and paged part
544 * and the second, holding only fragments. */ 544 * and the second, holding only fragments. */
545 if (skb_has_frags(head)) { 545 if (skb_has_frag_list(head)) {
546 struct sk_buff *clone; 546 struct sk_buff *clone;
547 int i, plen = 0; 547 int i, plen = 0;
548 548
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 35c93e8b6a46..d0ffcbe369b7 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -44,6 +44,7 @@
44#include <net/net_namespace.h> 44#include <net/net_namespace.h>
45#include <net/netns/generic.h> 45#include <net/netns/generic.h>
46#include <net/rtnetlink.h> 46#include <net/rtnetlink.h>
47#include <net/gre.h>
47 48
48#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 49#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
49#include <net/ipv6.h> 50#include <net/ipv6.h>
@@ -63,13 +64,13 @@
63 We cannot track such dead loops during route installation, 64 We cannot track such dead loops during route installation,
64 it is infeasible task. The most general solutions would be 65 it is infeasible task. The most general solutions would be
65 to keep skb->encapsulation counter (sort of local ttl), 66 to keep skb->encapsulation counter (sort of local ttl),
66 and silently drop packet when it expires. It is the best 67 and silently drop packet when it expires. It is a good
67 solution, but it supposes maintaing new variable in ALL 68 solution, but it supposes maintaing new variable in ALL
68 skb, even if no tunneling is used. 69 skb, even if no tunneling is used.
69 70
70 Current solution: HARD_TX_LOCK lock breaks dead loops. 71 Current solution: xmit_recursion breaks dead loops. This is a percpu
71 72 counter, since when we enter the first ndo_xmit(), cpu migration is
72 73 forbidden. We force an exit if this counter reaches RECURSION_LIMIT
73 74
74 2. Networking dead loops would not kill routers, but would really 75 2. Networking dead loops would not kill routers, but would really
75 kill network. IP hop limit plays role of "t->recursion" in this case, 76 kill network. IP hop limit plays role of "t->recursion" in this case,
@@ -128,7 +129,7 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev);
128 129
129static int ipgre_net_id __read_mostly; 130static int ipgre_net_id __read_mostly;
130struct ipgre_net { 131struct ipgre_net {
131 struct ip_tunnel *tunnels[4][HASH_SIZE]; 132 struct ip_tunnel __rcu *tunnels[4][HASH_SIZE];
132 133
133 struct net_device *fb_tunnel_dev; 134 struct net_device *fb_tunnel_dev;
134}; 135};
@@ -158,13 +159,40 @@ struct ipgre_net {
158#define tunnels_l tunnels[1] 159#define tunnels_l tunnels[1]
159#define tunnels_wc tunnels[0] 160#define tunnels_wc tunnels[0]
160/* 161/*
161 * Locking : hash tables are protected by RCU and a spinlock 162 * Locking : hash tables are protected by RCU and RTNL
162 */ 163 */
163static DEFINE_SPINLOCK(ipgre_lock);
164 164
165#define for_each_ip_tunnel_rcu(start) \ 165#define for_each_ip_tunnel_rcu(start) \
166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 166 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
167 167
168/* often modified stats are per cpu, other are shared (netdev->stats) */
169struct pcpu_tstats {
170 unsigned long rx_packets;
171 unsigned long rx_bytes;
172 unsigned long tx_packets;
173 unsigned long tx_bytes;
174};
175
176static struct net_device_stats *ipgre_get_stats(struct net_device *dev)
177{
178 struct pcpu_tstats sum = { 0 };
179 int i;
180
181 for_each_possible_cpu(i) {
182 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
183
184 sum.rx_packets += tstats->rx_packets;
185 sum.rx_bytes += tstats->rx_bytes;
186 sum.tx_packets += tstats->tx_packets;
187 sum.tx_bytes += tstats->tx_bytes;
188 }
189 dev->stats.rx_packets = sum.rx_packets;
190 dev->stats.rx_bytes = sum.rx_bytes;
191 dev->stats.tx_packets = sum.tx_packets;
192 dev->stats.tx_bytes = sum.tx_bytes;
193 return &dev->stats;
194}
195
168/* Given src, dst and key, find appropriate for input tunnel. */ 196/* Given src, dst and key, find appropriate for input tunnel. */
169 197
170static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, 198static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
@@ -173,8 +201,8 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
173{ 201{
174 struct net *net = dev_net(dev); 202 struct net *net = dev_net(dev);
175 int link = dev->ifindex; 203 int link = dev->ifindex;
176 unsigned h0 = HASH(remote); 204 unsigned int h0 = HASH(remote);
177 unsigned h1 = HASH(key); 205 unsigned int h1 = HASH(key);
178 struct ip_tunnel *t, *cand = NULL; 206 struct ip_tunnel *t, *cand = NULL;
179 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 207 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
180 int dev_type = (gre_proto == htons(ETH_P_TEB)) ? 208 int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
@@ -289,13 +317,13 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
289 return NULL; 317 return NULL;
290} 318}
291 319
292static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign, 320static struct ip_tunnel __rcu **__ipgre_bucket(struct ipgre_net *ign,
293 struct ip_tunnel_parm *parms) 321 struct ip_tunnel_parm *parms)
294{ 322{
295 __be32 remote = parms->iph.daddr; 323 __be32 remote = parms->iph.daddr;
296 __be32 local = parms->iph.saddr; 324 __be32 local = parms->iph.saddr;
297 __be32 key = parms->i_key; 325 __be32 key = parms->i_key;
298 unsigned h = HASH(key); 326 unsigned int h = HASH(key);
299 int prio = 0; 327 int prio = 0;
300 328
301 if (local) 329 if (local)
@@ -308,7 +336,7 @@ static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
308 return &ign->tunnels[prio][h]; 336 return &ign->tunnels[prio][h];
309} 337}
310 338
311static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign, 339static inline struct ip_tunnel __rcu **ipgre_bucket(struct ipgre_net *ign,
312 struct ip_tunnel *t) 340 struct ip_tunnel *t)
313{ 341{
314 return __ipgre_bucket(ign, &t->parms); 342 return __ipgre_bucket(ign, &t->parms);
@@ -316,23 +344,22 @@ static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
316 344
317static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t) 345static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
318{ 346{
319 struct ip_tunnel **tp = ipgre_bucket(ign, t); 347 struct ip_tunnel __rcu **tp = ipgre_bucket(ign, t);
320 348
321 spin_lock_bh(&ipgre_lock); 349 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
322 t->next = *tp;
323 rcu_assign_pointer(*tp, t); 350 rcu_assign_pointer(*tp, t);
324 spin_unlock_bh(&ipgre_lock);
325} 351}
326 352
327static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) 353static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
328{ 354{
329 struct ip_tunnel **tp; 355 struct ip_tunnel __rcu **tp;
330 356 struct ip_tunnel *iter;
331 for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) { 357
332 if (t == *tp) { 358 for (tp = ipgre_bucket(ign, t);
333 spin_lock_bh(&ipgre_lock); 359 (iter = rtnl_dereference(*tp)) != NULL;
334 *tp = t->next; 360 tp = &iter->next) {
335 spin_unlock_bh(&ipgre_lock); 361 if (t == iter) {
362 rcu_assign_pointer(*tp, t->next);
336 break; 363 break;
337 } 364 }
338 } 365 }
@@ -346,10 +373,13 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
346 __be32 local = parms->iph.saddr; 373 __be32 local = parms->iph.saddr;
347 __be32 key = parms->i_key; 374 __be32 key = parms->i_key;
348 int link = parms->link; 375 int link = parms->link;
349 struct ip_tunnel *t, **tp; 376 struct ip_tunnel *t;
377 struct ip_tunnel __rcu **tp;
350 struct ipgre_net *ign = net_generic(net, ipgre_net_id); 378 struct ipgre_net *ign = net_generic(net, ipgre_net_id);
351 379
352 for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) 380 for (tp = __ipgre_bucket(ign, parms);
381 (t = rtnl_dereference(*tp)) != NULL;
382 tp = &t->next)
353 if (local == t->parms.iph.saddr && 383 if (local == t->parms.iph.saddr &&
354 remote == t->parms.iph.daddr && 384 remote == t->parms.iph.daddr &&
355 key == t->parms.i_key && 385 key == t->parms.i_key &&
@@ -360,7 +390,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
360 return t; 390 return t;
361} 391}
362 392
363static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, 393static struct ip_tunnel *ipgre_tunnel_locate(struct net *net,
364 struct ip_tunnel_parm *parms, int create) 394 struct ip_tunnel_parm *parms, int create)
365{ 395{
366 struct ip_tunnel *t, *nt; 396 struct ip_tunnel *t, *nt;
@@ -582,7 +612,7 @@ static int ipgre_rcv(struct sk_buff *skb)
582 if ((tunnel = ipgre_tunnel_lookup(skb->dev, 612 if ((tunnel = ipgre_tunnel_lookup(skb->dev,
583 iph->saddr, iph->daddr, key, 613 iph->saddr, iph->daddr, key,
584 gre_proto))) { 614 gre_proto))) {
585 struct net_device_stats *stats = &tunnel->dev->stats; 615 struct pcpu_tstats *tstats;
586 616
587 secpath_reset(skb); 617 secpath_reset(skb);
588 618
@@ -606,22 +636,22 @@ static int ipgre_rcv(struct sk_buff *skb)
606 /* Looped back packet, drop it! */ 636 /* Looped back packet, drop it! */
607 if (skb_rtable(skb)->fl.iif == 0) 637 if (skb_rtable(skb)->fl.iif == 0)
608 goto drop; 638 goto drop;
609 stats->multicast++; 639 tunnel->dev->stats.multicast++;
610 skb->pkt_type = PACKET_BROADCAST; 640 skb->pkt_type = PACKET_BROADCAST;
611 } 641 }
612#endif 642#endif
613 643
614 if (((flags&GRE_CSUM) && csum) || 644 if (((flags&GRE_CSUM) && csum) ||
615 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { 645 (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
616 stats->rx_crc_errors++; 646 tunnel->dev->stats.rx_crc_errors++;
617 stats->rx_errors++; 647 tunnel->dev->stats.rx_errors++;
618 goto drop; 648 goto drop;
619 } 649 }
620 if (tunnel->parms.i_flags&GRE_SEQ) { 650 if (tunnel->parms.i_flags&GRE_SEQ) {
621 if (!(flags&GRE_SEQ) || 651 if (!(flags&GRE_SEQ) ||
622 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { 652 (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
623 stats->rx_fifo_errors++; 653 tunnel->dev->stats.rx_fifo_errors++;
624 stats->rx_errors++; 654 tunnel->dev->stats.rx_errors++;
625 goto drop; 655 goto drop;
626 } 656 }
627 tunnel->i_seqno = seqno + 1; 657 tunnel->i_seqno = seqno + 1;
@@ -630,8 +660,8 @@ static int ipgre_rcv(struct sk_buff *skb)
630 /* Warning: All skb pointers will be invalidated! */ 660 /* Warning: All skb pointers will be invalidated! */
631 if (tunnel->dev->type == ARPHRD_ETHER) { 661 if (tunnel->dev->type == ARPHRD_ETHER) {
632 if (!pskb_may_pull(skb, ETH_HLEN)) { 662 if (!pskb_may_pull(skb, ETH_HLEN)) {
633 stats->rx_length_errors++; 663 tunnel->dev->stats.rx_length_errors++;
634 stats->rx_errors++; 664 tunnel->dev->stats.rx_errors++;
635 goto drop; 665 goto drop;
636 } 666 }
637 667
@@ -640,14 +670,19 @@ static int ipgre_rcv(struct sk_buff *skb)
640 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); 670 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
641 } 671 }
642 672
643 skb_tunnel_rx(skb, tunnel->dev); 673 tstats = this_cpu_ptr(tunnel->dev->tstats);
674 tstats->rx_packets++;
675 tstats->rx_bytes += skb->len;
676
677 __skb_tunnel_rx(skb, tunnel->dev);
644 678
645 skb_reset_network_header(skb); 679 skb_reset_network_header(skb);
646 ipgre_ecn_decapsulate(iph, skb); 680 ipgre_ecn_decapsulate(iph, skb);
647 681
648 netif_rx(skb); 682 netif_rx(skb);
683
649 rcu_read_unlock(); 684 rcu_read_unlock();
650 return(0); 685 return 0;
651 } 686 }
652 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 687 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
653 688
@@ -655,20 +690,19 @@ drop:
655 rcu_read_unlock(); 690 rcu_read_unlock();
656drop_nolock: 691drop_nolock:
657 kfree_skb(skb); 692 kfree_skb(skb);
658 return(0); 693 return 0;
659} 694}
660 695
661static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 696static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
662{ 697{
663 struct ip_tunnel *tunnel = netdev_priv(dev); 698 struct ip_tunnel *tunnel = netdev_priv(dev);
664 struct net_device_stats *stats = &dev->stats; 699 struct pcpu_tstats *tstats;
665 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
666 struct iphdr *old_iph = ip_hdr(skb); 700 struct iphdr *old_iph = ip_hdr(skb);
667 struct iphdr *tiph; 701 struct iphdr *tiph;
668 u8 tos; 702 u8 tos;
669 __be16 df; 703 __be16 df;
670 struct rtable *rt; /* Route to the other host */ 704 struct rtable *rt; /* Route to the other host */
671 struct net_device *tdev; /* Device to other host */ 705 struct net_device *tdev; /* Device to other host */
672 struct iphdr *iph; /* Our new IP header */ 706 struct iphdr *iph; /* Our new IP header */
673 unsigned int max_headroom; /* The extra header space needed */ 707 unsigned int max_headroom; /* The extra header space needed */
674 int gre_hlen; 708 int gre_hlen;
@@ -690,7 +724,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
690 /* NBMA tunnel */ 724 /* NBMA tunnel */
691 725
692 if (skb_dst(skb) == NULL) { 726 if (skb_dst(skb) == NULL) {
693 stats->tx_fifo_errors++; 727 dev->stats.tx_fifo_errors++;
694 goto tx_error; 728 goto tx_error;
695 } 729 }
696 730
@@ -736,14 +770,20 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
736 } 770 }
737 771
738 { 772 {
739 struct flowi fl = { .oif = tunnel->parms.link, 773 struct flowi fl = {
740 .nl_u = { .ip4_u = 774 .oif = tunnel->parms.link,
741 { .daddr = dst, 775 .nl_u = {
742 .saddr = tiph->saddr, 776 .ip4_u = {
743 .tos = RT_TOS(tos) } }, 777 .daddr = dst,
744 .proto = IPPROTO_GRE }; 778 .saddr = tiph->saddr,
779 .tos = RT_TOS(tos)
780 }
781 },
782 .proto = IPPROTO_GRE
783 }
784;
745 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 785 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
746 stats->tx_carrier_errors++; 786 dev->stats.tx_carrier_errors++;
747 goto tx_error; 787 goto tx_error;
748 } 788 }
749 } 789 }
@@ -751,7 +791,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
751 791
752 if (tdev == dev) { 792 if (tdev == dev) {
753 ip_rt_put(rt); 793 ip_rt_put(rt);
754 stats->collisions++; 794 dev->stats.collisions++;
755 goto tx_error; 795 goto tx_error;
756 } 796 }
757 797
@@ -814,7 +854,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
814 dev->needed_headroom = max_headroom; 854 dev->needed_headroom = max_headroom;
815 if (!new_skb) { 855 if (!new_skb) {
816 ip_rt_put(rt); 856 ip_rt_put(rt);
817 txq->tx_dropped++; 857 dev->stats.tx_dropped++;
818 dev_kfree_skb(skb); 858 dev_kfree_skb(skb);
819 return NETDEV_TX_OK; 859 return NETDEV_TX_OK;
820 } 860 }
@@ -881,15 +921,15 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
881 } 921 }
882 922
883 nf_reset(skb); 923 nf_reset(skb);
884 924 tstats = this_cpu_ptr(dev->tstats);
885 IPTUNNEL_XMIT(); 925 __IPTUNNEL_XMIT(tstats, &dev->stats);
886 return NETDEV_TX_OK; 926 return NETDEV_TX_OK;
887 927
888tx_error_icmp: 928tx_error_icmp:
889 dst_link_failure(skb); 929 dst_link_failure(skb);
890 930
891tx_error: 931tx_error:
892 stats->tx_errors++; 932 dev->stats.tx_errors++;
893 dev_kfree_skb(skb); 933 dev_kfree_skb(skb);
894 return NETDEV_TX_OK; 934 return NETDEV_TX_OK;
895} 935}
@@ -909,13 +949,19 @@ static int ipgre_tunnel_bind_dev(struct net_device *dev)
909 /* Guess output device to choose reasonable mtu and needed_headroom */ 949 /* Guess output device to choose reasonable mtu and needed_headroom */
910 950
911 if (iph->daddr) { 951 if (iph->daddr) {
912 struct flowi fl = { .oif = tunnel->parms.link, 952 struct flowi fl = {
913 .nl_u = { .ip4_u = 953 .oif = tunnel->parms.link,
914 { .daddr = iph->daddr, 954 .nl_u = {
915 .saddr = iph->saddr, 955 .ip4_u = {
916 .tos = RT_TOS(iph->tos) } }, 956 .daddr = iph->daddr,
917 .proto = IPPROTO_GRE }; 957 .saddr = iph->saddr,
958 .tos = RT_TOS(iph->tos)
959 }
960 },
961 .proto = IPPROTO_GRE
962 };
918 struct rtable *rt; 963 struct rtable *rt;
964
919 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 965 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
920 tdev = rt->dst.dev; 966 tdev = rt->dst.dev;
921 ip_rt_put(rt); 967 ip_rt_put(rt);
@@ -1012,7 +1058,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
1012 break; 1058 break;
1013 } 1059 }
1014 } else { 1060 } else {
1015 unsigned nflags = 0; 1061 unsigned int nflags = 0;
1016 1062
1017 t = netdev_priv(dev); 1063 t = netdev_priv(dev);
1018 1064
@@ -1125,7 +1171,7 @@ static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1125 1171
1126static int ipgre_header(struct sk_buff *skb, struct net_device *dev, 1172static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1127 unsigned short type, 1173 unsigned short type,
1128 const void *daddr, const void *saddr, unsigned len) 1174 const void *daddr, const void *saddr, unsigned int len)
1129{ 1175{
1130 struct ip_tunnel *t = netdev_priv(dev); 1176 struct ip_tunnel *t = netdev_priv(dev);
1131 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen); 1177 struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
@@ -1167,13 +1213,19 @@ static int ipgre_open(struct net_device *dev)
1167 struct ip_tunnel *t = netdev_priv(dev); 1213 struct ip_tunnel *t = netdev_priv(dev);
1168 1214
1169 if (ipv4_is_multicast(t->parms.iph.daddr)) { 1215 if (ipv4_is_multicast(t->parms.iph.daddr)) {
1170 struct flowi fl = { .oif = t->parms.link, 1216 struct flowi fl = {
1171 .nl_u = { .ip4_u = 1217 .oif = t->parms.link,
1172 { .daddr = t->parms.iph.daddr, 1218 .nl_u = {
1173 .saddr = t->parms.iph.saddr, 1219 .ip4_u = {
1174 .tos = RT_TOS(t->parms.iph.tos) } }, 1220 .daddr = t->parms.iph.daddr,
1175 .proto = IPPROTO_GRE }; 1221 .saddr = t->parms.iph.saddr,
1222 .tos = RT_TOS(t->parms.iph.tos)
1223 }
1224 },
1225 .proto = IPPROTO_GRE
1226 };
1176 struct rtable *rt; 1227 struct rtable *rt;
1228
1177 if (ip_route_output_key(dev_net(dev), &rt, &fl)) 1229 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1178 return -EADDRNOTAVAIL; 1230 return -EADDRNOTAVAIL;
1179 dev = rt->dst.dev; 1231 dev = rt->dst.dev;
@@ -1193,10 +1245,8 @@ static int ipgre_close(struct net_device *dev)
1193 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { 1245 if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1194 struct in_device *in_dev; 1246 struct in_device *in_dev;
1195 in_dev = inetdev_by_index(dev_net(dev), t->mlink); 1247 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1196 if (in_dev) { 1248 if (in_dev)
1197 ip_mc_dec_group(in_dev, t->parms.iph.daddr); 1249 ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1198 in_dev_put(in_dev);
1199 }
1200 } 1250 }
1201 return 0; 1251 return 0;
1202} 1252}
@@ -1213,12 +1263,19 @@ static const struct net_device_ops ipgre_netdev_ops = {
1213 .ndo_start_xmit = ipgre_tunnel_xmit, 1263 .ndo_start_xmit = ipgre_tunnel_xmit,
1214 .ndo_do_ioctl = ipgre_tunnel_ioctl, 1264 .ndo_do_ioctl = ipgre_tunnel_ioctl,
1215 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1265 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1266 .ndo_get_stats = ipgre_get_stats,
1216}; 1267};
1217 1268
1269static void ipgre_dev_free(struct net_device *dev)
1270{
1271 free_percpu(dev->tstats);
1272 free_netdev(dev);
1273}
1274
1218static void ipgre_tunnel_setup(struct net_device *dev) 1275static void ipgre_tunnel_setup(struct net_device *dev)
1219{ 1276{
1220 dev->netdev_ops = &ipgre_netdev_ops; 1277 dev->netdev_ops = &ipgre_netdev_ops;
1221 dev->destructor = free_netdev; 1278 dev->destructor = ipgre_dev_free;
1222 1279
1223 dev->type = ARPHRD_IPGRE; 1280 dev->type = ARPHRD_IPGRE;
1224 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; 1281 dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
@@ -1256,6 +1313,10 @@ static int ipgre_tunnel_init(struct net_device *dev)
1256 } else 1313 } else
1257 dev->header_ops = &ipgre_header_ops; 1314 dev->header_ops = &ipgre_header_ops;
1258 1315
1316 dev->tstats = alloc_percpu(struct pcpu_tstats);
1317 if (!dev->tstats)
1318 return -ENOMEM;
1319
1259 return 0; 1320 return 0;
1260} 1321}
1261 1322
@@ -1274,14 +1335,13 @@ static void ipgre_fb_tunnel_init(struct net_device *dev)
1274 tunnel->hlen = sizeof(struct iphdr) + 4; 1335 tunnel->hlen = sizeof(struct iphdr) + 4;
1275 1336
1276 dev_hold(dev); 1337 dev_hold(dev);
1277 ign->tunnels_wc[0] = tunnel; 1338 rcu_assign_pointer(ign->tunnels_wc[0], tunnel);
1278} 1339}
1279 1340
1280 1341
1281static const struct net_protocol ipgre_protocol = { 1342static const struct gre_protocol ipgre_protocol = {
1282 .handler = ipgre_rcv, 1343 .handler = ipgre_rcv,
1283 .err_handler = ipgre_err, 1344 .err_handler = ipgre_err,
1284 .netns_ok = 1,
1285}; 1345};
1286 1346
1287static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) 1347static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
@@ -1291,11 +1351,13 @@ static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head)
1291 for (prio = 0; prio < 4; prio++) { 1351 for (prio = 0; prio < 4; prio++) {
1292 int h; 1352 int h;
1293 for (h = 0; h < HASH_SIZE; h++) { 1353 for (h = 0; h < HASH_SIZE; h++) {
1294 struct ip_tunnel *t = ign->tunnels[prio][h]; 1354 struct ip_tunnel *t;
1355
1356 t = rtnl_dereference(ign->tunnels[prio][h]);
1295 1357
1296 while (t != NULL) { 1358 while (t != NULL) {
1297 unregister_netdevice_queue(t->dev, head); 1359 unregister_netdevice_queue(t->dev, head);
1298 t = t->next; 1360 t = rtnl_dereference(t->next);
1299 } 1361 }
1300 } 1362 }
1301 } 1363 }
@@ -1441,6 +1503,10 @@ static int ipgre_tap_init(struct net_device *dev)
1441 1503
1442 ipgre_tunnel_bind_dev(dev); 1504 ipgre_tunnel_bind_dev(dev);
1443 1505
1506 dev->tstats = alloc_percpu(struct pcpu_tstats);
1507 if (!dev->tstats)
1508 return -ENOMEM;
1509
1444 return 0; 1510 return 0;
1445} 1511}
1446 1512
@@ -1451,6 +1517,7 @@ static const struct net_device_ops ipgre_tap_netdev_ops = {
1451 .ndo_set_mac_address = eth_mac_addr, 1517 .ndo_set_mac_address = eth_mac_addr,
1452 .ndo_validate_addr = eth_validate_addr, 1518 .ndo_validate_addr = eth_validate_addr,
1453 .ndo_change_mtu = ipgre_tunnel_change_mtu, 1519 .ndo_change_mtu = ipgre_tunnel_change_mtu,
1520 .ndo_get_stats = ipgre_get_stats,
1454}; 1521};
1455 1522
1456static void ipgre_tap_setup(struct net_device *dev) 1523static void ipgre_tap_setup(struct net_device *dev)
@@ -1459,7 +1526,7 @@ static void ipgre_tap_setup(struct net_device *dev)
1459 ether_setup(dev); 1526 ether_setup(dev);
1460 1527
1461 dev->netdev_ops = &ipgre_tap_netdev_ops; 1528 dev->netdev_ops = &ipgre_tap_netdev_ops;
1462 dev->destructor = free_netdev; 1529 dev->destructor = ipgre_dev_free;
1463 1530
1464 dev->iflink = 0; 1531 dev->iflink = 0;
1465 dev->features |= NETIF_F_NETNS_LOCAL; 1532 dev->features |= NETIF_F_NETNS_LOCAL;
@@ -1487,6 +1554,10 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev, struct nla
1487 if (!tb[IFLA_MTU]) 1554 if (!tb[IFLA_MTU])
1488 dev->mtu = mtu; 1555 dev->mtu = mtu;
1489 1556
1557 /* Can use a lockless transmit, unless we generate output sequences */
1558 if (!(nt->parms.o_flags & GRE_SEQ))
1559 dev->features |= NETIF_F_LLTX;
1560
1490 err = register_netdevice(dev); 1561 err = register_netdevice(dev);
1491 if (err) 1562 if (err)
1492 goto out; 1563 goto out;
@@ -1522,7 +1593,7 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1522 t = nt; 1593 t = nt;
1523 1594
1524 if (dev->type != ARPHRD_ETHER) { 1595 if (dev->type != ARPHRD_ETHER) {
1525 unsigned nflags = 0; 1596 unsigned int nflags = 0;
1526 1597
1527 if (ipv4_is_multicast(p.iph.daddr)) 1598 if (ipv4_is_multicast(p.iph.daddr))
1528 nflags = IFF_BROADCAST; 1599 nflags = IFF_BROADCAST;
@@ -1663,7 +1734,7 @@ static int __init ipgre_init(void)
1663 if (err < 0) 1734 if (err < 0)
1664 return err; 1735 return err;
1665 1736
1666 err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); 1737 err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
1667 if (err < 0) { 1738 if (err < 0) {
1668 printk(KERN_INFO "ipgre init: can't add protocol\n"); 1739 printk(KERN_INFO "ipgre init: can't add protocol\n");
1669 goto add_proto_failed; 1740 goto add_proto_failed;
@@ -1683,7 +1754,7 @@ out:
1683tap_ops_failed: 1754tap_ops_failed:
1684 rtnl_link_unregister(&ipgre_link_ops); 1755 rtnl_link_unregister(&ipgre_link_ops);
1685rtnl_link_failed: 1756rtnl_link_failed:
1686 inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); 1757 gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
1687add_proto_failed: 1758add_proto_failed:
1688 unregister_pernet_device(&ipgre_net_ops); 1759 unregister_pernet_device(&ipgre_net_ops);
1689 goto out; 1760 goto out;
@@ -1693,7 +1764,7 @@ static void __exit ipgre_fini(void)
1693{ 1764{
1694 rtnl_link_unregister(&ipgre_tap_ops); 1765 rtnl_link_unregister(&ipgre_tap_ops);
1695 rtnl_link_unregister(&ipgre_link_ops); 1766 rtnl_link_unregister(&ipgre_link_ops);
1696 if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) 1767 if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0)
1697 printk(KERN_INFO "ipgre close: can't remove protocol\n"); 1768 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1698 unregister_pernet_device(&ipgre_net_ops); 1769 unregister_pernet_device(&ipgre_net_ops);
1699} 1770}
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ba9836c488ed..1906fa35860c 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -466,7 +466,7 @@ error:
466 } 466 }
467 return -EINVAL; 467 return -EINVAL;
468} 468}
469 469EXPORT_SYMBOL(ip_options_compile);
470 470
471/* 471/*
472 * Undo all the changes done by ip_options_compile(). 472 * Undo all the changes done by ip_options_compile().
@@ -646,3 +646,4 @@ int ip_options_rcv_srr(struct sk_buff *skb)
646 } 646 }
647 return 0; 647 return 0;
648} 648}
649EXPORT_SYMBOL(ip_options_rcv_srr);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 7649d7750075..439d2a34ee44 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -487,7 +487,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
487 * LATER: this step can be merged to real generation of fragments, 487 * LATER: this step can be merged to real generation of fragments,
488 * we can switch to copy when see the first bad fragment. 488 * we can switch to copy when see the first bad fragment.
489 */ 489 */
490 if (skb_has_frags(skb)) { 490 if (skb_has_frag_list(skb)) {
491 struct sk_buff *frag, *frag2; 491 struct sk_buff *frag, *frag2;
492 int first_len = skb_pagelen(skb); 492 int first_len = skb_pagelen(skb);
493 493
@@ -844,10 +844,9 @@ int ip_append_data(struct sock *sk,
844 inet->cork.length = 0; 844 inet->cork.length = 0;
845 sk->sk_sndmsg_page = NULL; 845 sk->sk_sndmsg_page = NULL;
846 sk->sk_sndmsg_off = 0; 846 sk->sk_sndmsg_off = 0;
847 if ((exthdrlen = rt->dst.header_len) != 0) { 847 exthdrlen = rt->dst.header_len;
848 length += exthdrlen; 848 length += exthdrlen;
849 transhdrlen += exthdrlen; 849 transhdrlen += exthdrlen;
850 }
851 } else { 850 } else {
852 rt = (struct rtable *)inet->cork.dst; 851 rt = (struct rtable *)inet->cork.dst;
853 if (inet->cork.flags & IPCORK_OPT) 852 if (inet->cork.flags & IPCORK_OPT)
@@ -934,16 +933,19 @@ alloc_new_skb:
934 !(rt->dst.dev->features&NETIF_F_SG)) 933 !(rt->dst.dev->features&NETIF_F_SG))
935 alloclen = mtu; 934 alloclen = mtu;
936 else 935 else
937 alloclen = datalen + fragheaderlen; 936 alloclen = fraglen;
938 937
939 /* The last fragment gets additional space at tail. 938 /* The last fragment gets additional space at tail.
940 * Note, with MSG_MORE we overallocate on fragments, 939 * Note, with MSG_MORE we overallocate on fragments,
941 * because we have no idea what fragment will be 940 * because we have no idea what fragment will be
942 * the last. 941 * the last.
943 */ 942 */
944 if (datalen == length + fraggap) 943 if (datalen == length + fraggap) {
945 alloclen += rt->dst.trailer_len; 944 alloclen += rt->dst.trailer_len;
946 945 /* make sure mtu is not reached */
946 if (datalen > mtu - fragheaderlen - rt->dst.trailer_len)
947 datalen -= ALIGN(rt->dst.trailer_len, 8);
948 }
947 if (transhdrlen) { 949 if (transhdrlen) {
948 skb = sock_alloc_send_skb(sk, 950 skb = sock_alloc_send_skb(sk,
949 alloclen + hh_len + 15, 951 alloclen + hh_len + 15,
@@ -960,7 +962,7 @@ alloc_new_skb:
960 else 962 else
961 /* only the initial fragment is 963 /* only the initial fragment is
962 time stamped */ 964 time stamped */
963 ipc->shtx.flags = 0; 965 ipc->tx_flags = 0;
964 } 966 }
965 if (skb == NULL) 967 if (skb == NULL)
966 goto error; 968 goto error;
@@ -971,7 +973,7 @@ alloc_new_skb:
971 skb->ip_summed = csummode; 973 skb->ip_summed = csummode;
972 skb->csum = 0; 974 skb->csum = 0;
973 skb_reserve(skb, hh_len); 975 skb_reserve(skb, hh_len);
974 *skb_tx(skb) = ipc->shtx; 976 skb_shinfo(skb)->tx_flags = ipc->tx_flags;
975 977
976 /* 978 /*
977 * Find where to start putting bytes. 979 * Find where to start putting bytes.
@@ -1391,7 +1393,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar
1391 1393
1392 daddr = ipc.addr = rt->rt_src; 1394 daddr = ipc.addr = rt->rt_src;
1393 ipc.opt = NULL; 1395 ipc.opt = NULL;
1394 ipc.shtx.flags = 0; 1396 ipc.tx_flags = 0;
1395 1397
1396 if (replyopts.opt.optlen) { 1398 if (replyopts.opt.optlen) {
1397 ipc.opt = &replyopts.opt; 1399 ipc.opt = &replyopts.opt;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ec036731a70b..e9b816e6cd73 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -122,31 +122,59 @@
122 122
123static int ipip_net_id __read_mostly; 123static int ipip_net_id __read_mostly;
124struct ipip_net { 124struct ipip_net {
125 struct ip_tunnel *tunnels_r_l[HASH_SIZE]; 125 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
126 struct ip_tunnel *tunnels_r[HASH_SIZE]; 126 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
127 struct ip_tunnel *tunnels_l[HASH_SIZE]; 127 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
128 struct ip_tunnel *tunnels_wc[1]; 128 struct ip_tunnel __rcu *tunnels_wc[1];
129 struct ip_tunnel **tunnels[4]; 129 struct ip_tunnel __rcu **tunnels[4];
130 130
131 struct net_device *fb_tunnel_dev; 131 struct net_device *fb_tunnel_dev;
132}; 132};
133 133
134static void ipip_tunnel_init(struct net_device *dev); 134static int ipip_tunnel_init(struct net_device *dev);
135static void ipip_tunnel_setup(struct net_device *dev); 135static void ipip_tunnel_setup(struct net_device *dev);
136static void ipip_dev_free(struct net_device *dev);
136 137
137/* 138/*
138 * Locking : hash tables are protected by RCU and a spinlock 139 * Locking : hash tables are protected by RCU and RTNL
139 */ 140 */
140static DEFINE_SPINLOCK(ipip_lock);
141 141
142#define for_each_ip_tunnel_rcu(start) \ 142#define for_each_ip_tunnel_rcu(start) \
143 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 143 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144 144
145/* often modified stats are per cpu, other are shared (netdev->stats) */
146struct pcpu_tstats {
147 unsigned long rx_packets;
148 unsigned long rx_bytes;
149 unsigned long tx_packets;
150 unsigned long tx_bytes;
151};
152
153static struct net_device_stats *ipip_get_stats(struct net_device *dev)
154{
155 struct pcpu_tstats sum = { 0 };
156 int i;
157
158 for_each_possible_cpu(i) {
159 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
160
161 sum.rx_packets += tstats->rx_packets;
162 sum.rx_bytes += tstats->rx_bytes;
163 sum.tx_packets += tstats->tx_packets;
164 sum.tx_bytes += tstats->tx_bytes;
165 }
166 dev->stats.rx_packets = sum.rx_packets;
167 dev->stats.rx_bytes = sum.rx_bytes;
168 dev->stats.tx_packets = sum.tx_packets;
169 dev->stats.tx_bytes = sum.tx_bytes;
170 return &dev->stats;
171}
172
145static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, 173static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
146 __be32 remote, __be32 local) 174 __be32 remote, __be32 local)
147{ 175{
148 unsigned h0 = HASH(remote); 176 unsigned int h0 = HASH(remote);
149 unsigned h1 = HASH(local); 177 unsigned int h1 = HASH(local);
150 struct ip_tunnel *t; 178 struct ip_tunnel *t;
151 struct ipip_net *ipn = net_generic(net, ipip_net_id); 179 struct ipip_net *ipn = net_generic(net, ipip_net_id);
152 180
@@ -169,12 +197,12 @@ static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
169 return NULL; 197 return NULL;
170} 198}
171 199
172static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn, 200static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
173 struct ip_tunnel_parm *parms) 201 struct ip_tunnel_parm *parms)
174{ 202{
175 __be32 remote = parms->iph.daddr; 203 __be32 remote = parms->iph.daddr;
176 __be32 local = parms->iph.saddr; 204 __be32 local = parms->iph.saddr;
177 unsigned h = 0; 205 unsigned int h = 0;
178 int prio = 0; 206 int prio = 0;
179 207
180 if (remote) { 208 if (remote) {
@@ -188,7 +216,7 @@ static struct ip_tunnel **__ipip_bucket(struct ipip_net *ipn,
188 return &ipn->tunnels[prio][h]; 216 return &ipn->tunnels[prio][h];
189} 217}
190 218
191static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn, 219static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
192 struct ip_tunnel *t) 220 struct ip_tunnel *t)
193{ 221{
194 return __ipip_bucket(ipn, &t->parms); 222 return __ipip_bucket(ipn, &t->parms);
@@ -196,13 +224,14 @@ static inline struct ip_tunnel **ipip_bucket(struct ipip_net *ipn,
196 224
197static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) 225static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
198{ 226{
199 struct ip_tunnel **tp; 227 struct ip_tunnel __rcu **tp;
200 228 struct ip_tunnel *iter;
201 for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) { 229
202 if (t == *tp) { 230 for (tp = ipip_bucket(ipn, t);
203 spin_lock_bh(&ipip_lock); 231 (iter = rtnl_dereference(*tp)) != NULL;
204 *tp = t->next; 232 tp = &iter->next) {
205 spin_unlock_bh(&ipip_lock); 233 if (t == iter) {
234 rcu_assign_pointer(*tp, t->next);
206 break; 235 break;
207 } 236 }
208 } 237 }
@@ -210,12 +239,10 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
210 239
211static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) 240static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
212{ 241{
213 struct ip_tunnel **tp = ipip_bucket(ipn, t); 242 struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
214 243
215 spin_lock_bh(&ipip_lock); 244 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
216 t->next = *tp;
217 rcu_assign_pointer(*tp, t); 245 rcu_assign_pointer(*tp, t);
218 spin_unlock_bh(&ipip_lock);
219} 246}
220 247
221static struct ip_tunnel * ipip_tunnel_locate(struct net *net, 248static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
@@ -223,12 +250,15 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
223{ 250{
224 __be32 remote = parms->iph.daddr; 251 __be32 remote = parms->iph.daddr;
225 __be32 local = parms->iph.saddr; 252 __be32 local = parms->iph.saddr;
226 struct ip_tunnel *t, **tp, *nt; 253 struct ip_tunnel *t, *nt;
254 struct ip_tunnel __rcu **tp;
227 struct net_device *dev; 255 struct net_device *dev;
228 char name[IFNAMSIZ]; 256 char name[IFNAMSIZ];
229 struct ipip_net *ipn = net_generic(net, ipip_net_id); 257 struct ipip_net *ipn = net_generic(net, ipip_net_id);
230 258
231 for (tp = __ipip_bucket(ipn, parms); (t = *tp) != NULL; tp = &t->next) { 259 for (tp = __ipip_bucket(ipn, parms);
260 (t = rtnl_dereference(*tp)) != NULL;
261 tp = &t->next) {
232 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) 262 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
233 return t; 263 return t;
234 } 264 }
@@ -238,7 +268,7 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
238 if (parms->name[0]) 268 if (parms->name[0])
239 strlcpy(name, parms->name, IFNAMSIZ); 269 strlcpy(name, parms->name, IFNAMSIZ);
240 else 270 else
241 sprintf(name, "tunl%%d"); 271 strcpy(name, "tunl%d");
242 272
243 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); 273 dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
244 if (dev == NULL) 274 if (dev == NULL)
@@ -254,7 +284,8 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
254 nt = netdev_priv(dev); 284 nt = netdev_priv(dev);
255 nt->parms = *parms; 285 nt->parms = *parms;
256 286
257 ipip_tunnel_init(dev); 287 if (ipip_tunnel_init(dev) < 0)
288 goto failed_free;
258 289
259 if (register_netdevice(dev) < 0) 290 if (register_netdevice(dev) < 0)
260 goto failed_free; 291 goto failed_free;
@@ -264,20 +295,19 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
264 return nt; 295 return nt;
265 296
266failed_free: 297failed_free:
267 free_netdev(dev); 298 ipip_dev_free(dev);
268 return NULL; 299 return NULL;
269} 300}
270 301
302/* called with RTNL */
271static void ipip_tunnel_uninit(struct net_device *dev) 303static void ipip_tunnel_uninit(struct net_device *dev)
272{ 304{
273 struct net *net = dev_net(dev); 305 struct net *net = dev_net(dev);
274 struct ipip_net *ipn = net_generic(net, ipip_net_id); 306 struct ipip_net *ipn = net_generic(net, ipip_net_id);
275 307
276 if (dev == ipn->fb_tunnel_dev) { 308 if (dev == ipn->fb_tunnel_dev)
277 spin_lock_bh(&ipip_lock); 309 rcu_assign_pointer(ipn->tunnels_wc[0], NULL);
278 ipn->tunnels_wc[0] = NULL; 310 else
279 spin_unlock_bh(&ipip_lock);
280 } else
281 ipip_tunnel_unlink(ipn, netdev_priv(dev)); 311 ipip_tunnel_unlink(ipn, netdev_priv(dev));
282 dev_put(dev); 312 dev_put(dev);
283} 313}
@@ -359,8 +389,10 @@ static int ipip_rcv(struct sk_buff *skb)
359 const struct iphdr *iph = ip_hdr(skb); 389 const struct iphdr *iph = ip_hdr(skb);
360 390
361 rcu_read_lock(); 391 rcu_read_lock();
362 if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev), 392 tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
363 iph->saddr, iph->daddr)) != NULL) { 393 if (tunnel != NULL) {
394 struct pcpu_tstats *tstats;
395
364 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { 396 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
365 rcu_read_unlock(); 397 rcu_read_unlock();
366 kfree_skb(skb); 398 kfree_skb(skb);
@@ -374,10 +406,16 @@ static int ipip_rcv(struct sk_buff *skb)
374 skb->protocol = htons(ETH_P_IP); 406 skb->protocol = htons(ETH_P_IP);
375 skb->pkt_type = PACKET_HOST; 407 skb->pkt_type = PACKET_HOST;
376 408
377 skb_tunnel_rx(skb, tunnel->dev); 409 tstats = this_cpu_ptr(tunnel->dev->tstats);
410 tstats->rx_packets++;
411 tstats->rx_bytes += skb->len;
412
413 __skb_tunnel_rx(skb, tunnel->dev);
378 414
379 ipip_ecn_decapsulate(iph, skb); 415 ipip_ecn_decapsulate(iph, skb);
416
380 netif_rx(skb); 417 netif_rx(skb);
418
381 rcu_read_unlock(); 419 rcu_read_unlock();
382 return 0; 420 return 0;
383 } 421 }
@@ -394,13 +432,12 @@ static int ipip_rcv(struct sk_buff *skb)
394static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) 432static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
395{ 433{
396 struct ip_tunnel *tunnel = netdev_priv(dev); 434 struct ip_tunnel *tunnel = netdev_priv(dev);
397 struct net_device_stats *stats = &dev->stats; 435 struct pcpu_tstats *tstats;
398 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
399 struct iphdr *tiph = &tunnel->parms.iph; 436 struct iphdr *tiph = &tunnel->parms.iph;
400 u8 tos = tunnel->parms.iph.tos; 437 u8 tos = tunnel->parms.iph.tos;
401 __be16 df = tiph->frag_off; 438 __be16 df = tiph->frag_off;
402 struct rtable *rt; /* Route to the other host */ 439 struct rtable *rt; /* Route to the other host */
403 struct net_device *tdev; /* Device to other host */ 440 struct net_device *tdev; /* Device to other host */
404 struct iphdr *old_iph = ip_hdr(skb); 441 struct iphdr *old_iph = ip_hdr(skb);
405 struct iphdr *iph; /* Our new IP header */ 442 struct iphdr *iph; /* Our new IP header */
406 unsigned int max_headroom; /* The extra header space needed */ 443 unsigned int max_headroom; /* The extra header space needed */
@@ -410,13 +447,13 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
410 if (skb->protocol != htons(ETH_P_IP)) 447 if (skb->protocol != htons(ETH_P_IP))
411 goto tx_error; 448 goto tx_error;
412 449
413 if (tos&1) 450 if (tos & 1)
414 tos = old_iph->tos; 451 tos = old_iph->tos;
415 452
416 if (!dst) { 453 if (!dst) {
417 /* NBMA tunnel */ 454 /* NBMA tunnel */
418 if ((rt = skb_rtable(skb)) == NULL) { 455 if ((rt = skb_rtable(skb)) == NULL) {
419 stats->tx_fifo_errors++; 456 dev->stats.tx_fifo_errors++;
420 goto tx_error; 457 goto tx_error;
421 } 458 }
422 if ((dst = rt->rt_gateway) == 0) 459 if ((dst = rt->rt_gateway) == 0)
@@ -424,14 +461,20 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
424 } 461 }
425 462
426 { 463 {
427 struct flowi fl = { .oif = tunnel->parms.link, 464 struct flowi fl = {
428 .nl_u = { .ip4_u = 465 .oif = tunnel->parms.link,
429 { .daddr = dst, 466 .nl_u = {
430 .saddr = tiph->saddr, 467 .ip4_u = {
431 .tos = RT_TOS(tos) } }, 468 .daddr = dst,
432 .proto = IPPROTO_IPIP }; 469 .saddr = tiph->saddr,
470 .tos = RT_TOS(tos)
471 }
472 },
473 .proto = IPPROTO_IPIP
474 };
475
433 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 476 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
434 stats->tx_carrier_errors++; 477 dev->stats.tx_carrier_errors++;
435 goto tx_error_icmp; 478 goto tx_error_icmp;
436 } 479 }
437 } 480 }
@@ -439,7 +482,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
439 482
440 if (tdev == dev) { 483 if (tdev == dev) {
441 ip_rt_put(rt); 484 ip_rt_put(rt);
442 stats->collisions++; 485 dev->stats.collisions++;
443 goto tx_error; 486 goto tx_error;
444 } 487 }
445 488
@@ -449,7 +492,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
449 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 492 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
450 493
451 if (mtu < 68) { 494 if (mtu < 68) {
452 stats->collisions++; 495 dev->stats.collisions++;
453 ip_rt_put(rt); 496 ip_rt_put(rt);
454 goto tx_error; 497 goto tx_error;
455 } 498 }
@@ -485,7 +528,7 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
485 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 528 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
486 if (!new_skb) { 529 if (!new_skb) {
487 ip_rt_put(rt); 530 ip_rt_put(rt);
488 txq->tx_dropped++; 531 dev->stats.tx_dropped++;
489 dev_kfree_skb(skb); 532 dev_kfree_skb(skb);
490 return NETDEV_TX_OK; 533 return NETDEV_TX_OK;
491 } 534 }
@@ -522,14 +565,14 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
522 iph->ttl = old_iph->ttl; 565 iph->ttl = old_iph->ttl;
523 566
524 nf_reset(skb); 567 nf_reset(skb);
525 568 tstats = this_cpu_ptr(dev->tstats);
526 IPTUNNEL_XMIT(); 569 __IPTUNNEL_XMIT(tstats, &dev->stats);
527 return NETDEV_TX_OK; 570 return NETDEV_TX_OK;
528 571
529tx_error_icmp: 572tx_error_icmp:
530 dst_link_failure(skb); 573 dst_link_failure(skb);
531tx_error: 574tx_error:
532 stats->tx_errors++; 575 dev->stats.tx_errors++;
533 dev_kfree_skb(skb); 576 dev_kfree_skb(skb);
534 return NETDEV_TX_OK; 577 return NETDEV_TX_OK;
535} 578}
@@ -544,13 +587,19 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
544 iph = &tunnel->parms.iph; 587 iph = &tunnel->parms.iph;
545 588
546 if (iph->daddr) { 589 if (iph->daddr) {
547 struct flowi fl = { .oif = tunnel->parms.link, 590 struct flowi fl = {
548 .nl_u = { .ip4_u = 591 .oif = tunnel->parms.link,
549 { .daddr = iph->daddr, 592 .nl_u = {
550 .saddr = iph->saddr, 593 .ip4_u = {
551 .tos = RT_TOS(iph->tos) } }, 594 .daddr = iph->daddr,
552 .proto = IPPROTO_IPIP }; 595 .saddr = iph->saddr,
596 .tos = RT_TOS(iph->tos)
597 }
598 },
599 .proto = IPPROTO_IPIP
600 };
553 struct rtable *rt; 601 struct rtable *rt;
602
554 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { 603 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
555 tdev = rt->dst.dev; 604 tdev = rt->dst.dev;
556 ip_rt_put(rt); 605 ip_rt_put(rt);
@@ -696,13 +745,19 @@ static const struct net_device_ops ipip_netdev_ops = {
696 .ndo_start_xmit = ipip_tunnel_xmit, 745 .ndo_start_xmit = ipip_tunnel_xmit,
697 .ndo_do_ioctl = ipip_tunnel_ioctl, 746 .ndo_do_ioctl = ipip_tunnel_ioctl,
698 .ndo_change_mtu = ipip_tunnel_change_mtu, 747 .ndo_change_mtu = ipip_tunnel_change_mtu,
699 748 .ndo_get_stats = ipip_get_stats,
700}; 749};
701 750
751static void ipip_dev_free(struct net_device *dev)
752{
753 free_percpu(dev->tstats);
754 free_netdev(dev);
755}
756
702static void ipip_tunnel_setup(struct net_device *dev) 757static void ipip_tunnel_setup(struct net_device *dev)
703{ 758{
704 dev->netdev_ops = &ipip_netdev_ops; 759 dev->netdev_ops = &ipip_netdev_ops;
705 dev->destructor = free_netdev; 760 dev->destructor = ipip_dev_free;
706 761
707 dev->type = ARPHRD_TUNNEL; 762 dev->type = ARPHRD_TUNNEL;
708 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 763 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
@@ -711,10 +766,11 @@ static void ipip_tunnel_setup(struct net_device *dev)
711 dev->iflink = 0; 766 dev->iflink = 0;
712 dev->addr_len = 4; 767 dev->addr_len = 4;
713 dev->features |= NETIF_F_NETNS_LOCAL; 768 dev->features |= NETIF_F_NETNS_LOCAL;
769 dev->features |= NETIF_F_LLTX;
714 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; 770 dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
715} 771}
716 772
717static void ipip_tunnel_init(struct net_device *dev) 773static int ipip_tunnel_init(struct net_device *dev)
718{ 774{
719 struct ip_tunnel *tunnel = netdev_priv(dev); 775 struct ip_tunnel *tunnel = netdev_priv(dev);
720 776
@@ -725,9 +781,15 @@ static void ipip_tunnel_init(struct net_device *dev)
725 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 781 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
726 782
727 ipip_tunnel_bind_dev(dev); 783 ipip_tunnel_bind_dev(dev);
784
785 dev->tstats = alloc_percpu(struct pcpu_tstats);
786 if (!dev->tstats)
787 return -ENOMEM;
788
789 return 0;
728} 790}
729 791
730static void __net_init ipip_fb_tunnel_init(struct net_device *dev) 792static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
731{ 793{
732 struct ip_tunnel *tunnel = netdev_priv(dev); 794 struct ip_tunnel *tunnel = netdev_priv(dev);
733 struct iphdr *iph = &tunnel->parms.iph; 795 struct iphdr *iph = &tunnel->parms.iph;
@@ -740,11 +802,16 @@ static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
740 iph->protocol = IPPROTO_IPIP; 802 iph->protocol = IPPROTO_IPIP;
741 iph->ihl = 5; 803 iph->ihl = 5;
742 804
805 dev->tstats = alloc_percpu(struct pcpu_tstats);
806 if (!dev->tstats)
807 return -ENOMEM;
808
743 dev_hold(dev); 809 dev_hold(dev);
744 ipn->tunnels_wc[0] = tunnel; 810 rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
811 return 0;
745} 812}
746 813
747static struct xfrm_tunnel ipip_handler = { 814static struct xfrm_tunnel ipip_handler __read_mostly = {
748 .handler = ipip_rcv, 815 .handler = ipip_rcv,
749 .err_handler = ipip_err, 816 .err_handler = ipip_err,
750 .priority = 1, 817 .priority = 1,
@@ -760,11 +827,12 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
760 for (prio = 1; prio < 4; prio++) { 827 for (prio = 1; prio < 4; prio++) {
761 int h; 828 int h;
762 for (h = 0; h < HASH_SIZE; h++) { 829 for (h = 0; h < HASH_SIZE; h++) {
763 struct ip_tunnel *t = ipn->tunnels[prio][h]; 830 struct ip_tunnel *t;
764 831
832 t = rtnl_dereference(ipn->tunnels[prio][h]);
765 while (t != NULL) { 833 while (t != NULL) {
766 unregister_netdevice_queue(t->dev, head); 834 unregister_netdevice_queue(t->dev, head);
767 t = t->next; 835 t = rtnl_dereference(t->next);
768 } 836 }
769 } 837 }
770 } 838 }
@@ -789,7 +857,9 @@ static int __net_init ipip_init_net(struct net *net)
789 } 857 }
790 dev_net_set(ipn->fb_tunnel_dev, net); 858 dev_net_set(ipn->fb_tunnel_dev, net);
791 859
792 ipip_fb_tunnel_init(ipn->fb_tunnel_dev); 860 err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
861 if (err)
862 goto err_reg_dev;
793 863
794 if ((err = register_netdev(ipn->fb_tunnel_dev))) 864 if ((err = register_netdev(ipn->fb_tunnel_dev)))
795 goto err_reg_dev; 865 goto err_reg_dev;
@@ -797,7 +867,7 @@ static int __net_init ipip_init_net(struct net *net)
797 return 0; 867 return 0;
798 868
799err_reg_dev: 869err_reg_dev:
800 free_netdev(ipn->fb_tunnel_dev); 870 ipip_dev_free(ipn->fb_tunnel_dev);
801err_alloc_dev: 871err_alloc_dev:
802 /* nothing */ 872 /* nothing */
803 return err; 873 return err;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 179fcab866fc..86dd5691af46 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -75,7 +75,7 @@ struct mr_table {
75 struct net *net; 75 struct net *net;
76#endif 76#endif
77 u32 id; 77 u32 id;
78 struct sock *mroute_sk; 78 struct sock __rcu *mroute_sk;
79 struct timer_list ipmr_expire_timer; 79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue; 80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES]; 81 struct list_head mfc_cache_array[MFC_LINES];
@@ -98,7 +98,7 @@ struct ipmr_result {
98}; 98};
99 99
100/* Big lock, protecting vif table, mrt cache and mroute socket state. 100/* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock. 101 * Note that the changes are semaphored via rtnl_lock.
102 */ 102 */
103 103
104static DEFINE_RWLOCK(mrt_lock); 104static DEFINE_RWLOCK(mrt_lock);
@@ -113,11 +113,11 @@ static DEFINE_RWLOCK(mrt_lock);
113static DEFINE_SPINLOCK(mfc_unres_lock); 113static DEFINE_SPINLOCK(mfc_unres_lock);
114 114
115/* We return to original Alan's scheme. Hash table of resolved 115/* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected 116 * entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected 117 * with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock. 118 * with strong spinlock mfc_unres_lock.
119 119 *
120 In this case data path is free of exclusive locks at all. 120 * In this case data path is free of exclusive locks at all.
121 */ 121 */
122 122
123static struct kmem_cache *mrt_cachep __read_mostly; 123static struct kmem_cache *mrt_cachep __read_mostly;
@@ -396,9 +396,9 @@ struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
396 set_fs(KERNEL_DS); 396 set_fs(KERNEL_DS);
397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL); 397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
398 set_fs(oldfs); 398 set_fs(oldfs);
399 } else 399 } else {
400 err = -EOPNOTSUPP; 400 err = -EOPNOTSUPP;
401 401 }
402 dev = NULL; 402 dev = NULL;
403 403
404 if (err == 0 && 404 if (err == 0 &&
@@ -495,7 +495,8 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
495 dev->iflink = 0; 495 dev->iflink = 0;
496 496
497 rcu_read_lock(); 497 rcu_read_lock();
498 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) { 498 in_dev = __in_dev_get_rcu(dev);
499 if (!in_dev) {
499 rcu_read_unlock(); 500 rcu_read_unlock();
500 goto failure; 501 goto failure;
501 } 502 }
@@ -552,9 +553,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
552 mrt->mroute_reg_vif_num = -1; 553 mrt->mroute_reg_vif_num = -1;
553#endif 554#endif
554 555
555 if (vifi+1 == mrt->maxvif) { 556 if (vifi + 1 == mrt->maxvif) {
556 int tmp; 557 int tmp;
557 for (tmp=vifi-1; tmp>=0; tmp--) { 558
559 for (tmp = vifi - 1; tmp >= 0; tmp--) {
558 if (VIF_EXISTS(mrt, tmp)) 560 if (VIF_EXISTS(mrt, tmp))
559 break; 561 break;
560 } 562 }
@@ -565,25 +567,33 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify,
565 567
566 dev_set_allmulti(dev, -1); 568 dev_set_allmulti(dev, -1);
567 569
568 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) { 570 in_dev = __in_dev_get_rtnl(dev);
571 if (in_dev) {
569 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; 572 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
570 ip_rt_multicast_event(in_dev); 573 ip_rt_multicast_event(in_dev);
571 } 574 }
572 575
573 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify) 576 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER) && !notify)
574 unregister_netdevice_queue(dev, head); 577 unregister_netdevice_queue(dev, head);
575 578
576 dev_put(dev); 579 dev_put(dev);
577 return 0; 580 return 0;
578} 581}
579 582
580static inline void ipmr_cache_free(struct mfc_cache *c) 583static void ipmr_cache_free_rcu(struct rcu_head *head)
581{ 584{
585 struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
586
582 kmem_cache_free(mrt_cachep, c); 587 kmem_cache_free(mrt_cachep, c);
583} 588}
584 589
590static inline void ipmr_cache_free(struct mfc_cache *c)
591{
592 call_rcu(&c->rcu, ipmr_cache_free_rcu);
593}
594
585/* Destroy an unresolved cache entry, killing queued skbs 595/* Destroy an unresolved cache entry, killing queued skbs
586 and reporting error to netlink readers. 596 * and reporting error to netlink readers.
587 */ 597 */
588 598
589static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) 599static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
@@ -605,8 +615,9 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
605 memset(&e->msg, 0, sizeof(e->msg)); 615 memset(&e->msg, 0, sizeof(e->msg));
606 616
607 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 617 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
608 } else 618 } else {
609 kfree_skb(skb); 619 kfree_skb(skb);
620 }
610 } 621 }
611 622
612 ipmr_cache_free(c); 623 ipmr_cache_free(c);
@@ -724,13 +735,13 @@ static int vif_add(struct net *net, struct mr_table *mrt,
724 case 0: 735 case 0:
725 if (vifc->vifc_flags == VIFF_USE_IFINDEX) { 736 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
726 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); 737 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
727 if (dev && dev->ip_ptr == NULL) { 738 if (dev && __in_dev_get_rtnl(dev) == NULL) {
728 dev_put(dev); 739 dev_put(dev);
729 return -EADDRNOTAVAIL; 740 return -EADDRNOTAVAIL;
730 } 741 }
731 } else 742 } else {
732 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); 743 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
733 744 }
734 if (!dev) 745 if (!dev)
735 return -EADDRNOTAVAIL; 746 return -EADDRNOTAVAIL;
736 err = dev_set_allmulti(dev, 1); 747 err = dev_set_allmulti(dev, 1);
@@ -743,16 +754,16 @@ static int vif_add(struct net *net, struct mr_table *mrt,
743 return -EINVAL; 754 return -EINVAL;
744 } 755 }
745 756
746 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { 757 in_dev = __in_dev_get_rtnl(dev);
758 if (!in_dev) {
747 dev_put(dev); 759 dev_put(dev);
748 return -EADDRNOTAVAIL; 760 return -EADDRNOTAVAIL;
749 } 761 }
750 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; 762 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
751 ip_rt_multicast_event(in_dev); 763 ip_rt_multicast_event(in_dev);
752 764
753 /* 765 /* Fill in the VIF structures */
754 * Fill in the VIF structures 766
755 */
756 v->rate_limit = vifc->vifc_rate_limit; 767 v->rate_limit = vifc->vifc_rate_limit;
757 v->local = vifc->vifc_lcl_addr.s_addr; 768 v->local = vifc->vifc_lcl_addr.s_addr;
758 v->remote = vifc->vifc_rmt_addr.s_addr; 769 v->remote = vifc->vifc_rmt_addr.s_addr;
@@ -765,14 +776,14 @@ static int vif_add(struct net *net, struct mr_table *mrt,
765 v->pkt_in = 0; 776 v->pkt_in = 0;
766 v->pkt_out = 0; 777 v->pkt_out = 0;
767 v->link = dev->ifindex; 778 v->link = dev->ifindex;
768 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) 779 if (v->flags & (VIFF_TUNNEL | VIFF_REGISTER))
769 v->link = dev->iflink; 780 v->link = dev->iflink;
770 781
771 /* And finish update writing critical data */ 782 /* And finish update writing critical data */
772 write_lock_bh(&mrt_lock); 783 write_lock_bh(&mrt_lock);
773 v->dev = dev; 784 v->dev = dev;
774#ifdef CONFIG_IP_PIMSM 785#ifdef CONFIG_IP_PIMSM
775 if (v->flags&VIFF_REGISTER) 786 if (v->flags & VIFF_REGISTER)
776 mrt->mroute_reg_vif_num = vifi; 787 mrt->mroute_reg_vif_num = vifi;
777#endif 788#endif
778 if (vifi+1 > mrt->maxvif) 789 if (vifi+1 > mrt->maxvif)
@@ -781,6 +792,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
781 return 0; 792 return 0;
782} 793}
783 794
795/* called with rcu_read_lock() */
784static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, 796static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
785 __be32 origin, 797 __be32 origin,
786 __be32 mcastgrp) 798 __be32 mcastgrp)
@@ -788,7 +800,7 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
788 int line = MFC_HASH(mcastgrp, origin); 800 int line = MFC_HASH(mcastgrp, origin);
789 struct mfc_cache *c; 801 struct mfc_cache *c;
790 802
791 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { 803 list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) {
792 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp) 804 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
793 return c; 805 return c;
794 } 806 }
@@ -801,19 +813,20 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
801static struct mfc_cache *ipmr_cache_alloc(void) 813static struct mfc_cache *ipmr_cache_alloc(void)
802{ 814{
803 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); 815 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
804 if (c == NULL) 816
805 return NULL; 817 if (c)
806 c->mfc_un.res.minvif = MAXVIFS; 818 c->mfc_un.res.minvif = MAXVIFS;
807 return c; 819 return c;
808} 820}
809 821
810static struct mfc_cache *ipmr_cache_alloc_unres(void) 822static struct mfc_cache *ipmr_cache_alloc_unres(void)
811{ 823{
812 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); 824 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
813 if (c == NULL) 825
814 return NULL; 826 if (c) {
815 skb_queue_head_init(&c->mfc_un.unres.unresolved); 827 skb_queue_head_init(&c->mfc_un.unres.unresolved);
816 c->mfc_un.unres.expires = jiffies + 10*HZ; 828 c->mfc_un.unres.expires = jiffies + 10*HZ;
829 }
817 return c; 830 return c;
818} 831}
819 832
@@ -827,17 +840,15 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
827 struct sk_buff *skb; 840 struct sk_buff *skb;
828 struct nlmsgerr *e; 841 struct nlmsgerr *e;
829 842
830 /* 843 /* Play the pending entries through our router */
831 * Play the pending entries through our router
832 */
833 844
834 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) { 845 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
835 if (ip_hdr(skb)->version == 0) { 846 if (ip_hdr(skb)->version == 0) {
836 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); 847 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
837 848
838 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) { 849 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
839 nlh->nlmsg_len = (skb_tail_pointer(skb) - 850 nlh->nlmsg_len = skb_tail_pointer(skb) -
840 (u8 *)nlh); 851 (u8 *)nlh;
841 } else { 852 } else {
842 nlh->nlmsg_type = NLMSG_ERROR; 853 nlh->nlmsg_type = NLMSG_ERROR;
843 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); 854 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
@@ -848,8 +859,9 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
848 } 859 }
849 860
850 rtnl_unicast(skb, net, NETLINK_CB(skb).pid); 861 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
851 } else 862 } else {
852 ip_mr_forward(net, mrt, skb, c, 0); 863 ip_mr_forward(net, mrt, skb, c, 0);
864 }
853 } 865 }
854} 866}
855 867
@@ -867,6 +879,7 @@ static int ipmr_cache_report(struct mr_table *mrt,
867 const int ihl = ip_hdrlen(pkt); 879 const int ihl = ip_hdrlen(pkt);
868 struct igmphdr *igmp; 880 struct igmphdr *igmp;
869 struct igmpmsg *msg; 881 struct igmpmsg *msg;
882 struct sock *mroute_sk;
870 int ret; 883 int ret;
871 884
872#ifdef CONFIG_IP_PIMSM 885#ifdef CONFIG_IP_PIMSM
@@ -882,9 +895,9 @@ static int ipmr_cache_report(struct mr_table *mrt,
882#ifdef CONFIG_IP_PIMSM 895#ifdef CONFIG_IP_PIMSM
883 if (assert == IGMPMSG_WHOLEPKT) { 896 if (assert == IGMPMSG_WHOLEPKT) {
884 /* Ugly, but we have no choice with this interface. 897 /* Ugly, but we have no choice with this interface.
885 Duplicate old header, fix ihl, length etc. 898 * Duplicate old header, fix ihl, length etc.
886 And all this only to mangle msg->im_msgtype and 899 * And all this only to mangle msg->im_msgtype and
887 to set msg->im_mbz to "mbz" :-) 900 * to set msg->im_mbz to "mbz" :-)
888 */ 901 */
889 skb_push(skb, sizeof(struct iphdr)); 902 skb_push(skb, sizeof(struct iphdr));
890 skb_reset_network_header(skb); 903 skb_reset_network_header(skb);
@@ -901,39 +914,38 @@ static int ipmr_cache_report(struct mr_table *mrt,
901#endif 914#endif
902 { 915 {
903 916
904 /* 917 /* Copy the IP header */
905 * Copy the IP header
906 */
907 918
908 skb->network_header = skb->tail; 919 skb->network_header = skb->tail;
909 skb_put(skb, ihl); 920 skb_put(skb, ihl);
910 skb_copy_to_linear_data(skb, pkt->data, ihl); 921 skb_copy_to_linear_data(skb, pkt->data, ihl);
911 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */ 922 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
912 msg = (struct igmpmsg *)skb_network_header(skb); 923 msg = (struct igmpmsg *)skb_network_header(skb);
913 msg->im_vif = vifi; 924 msg->im_vif = vifi;
914 skb_dst_set(skb, dst_clone(skb_dst(pkt))); 925 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
915 926
916 /* 927 /* Add our header */
917 * Add our header
918 */
919 928
920 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr)); 929 igmp = (struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
921 igmp->type = 930 igmp->type =
922 msg->im_msgtype = assert; 931 msg->im_msgtype = assert;
923 igmp->code = 0; 932 igmp->code = 0;
924 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */ 933 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
925 skb->transport_header = skb->network_header; 934 skb->transport_header = skb->network_header;
926 } 935 }
927 936
928 if (mrt->mroute_sk == NULL) { 937 rcu_read_lock();
938 mroute_sk = rcu_dereference(mrt->mroute_sk);
939 if (mroute_sk == NULL) {
940 rcu_read_unlock();
929 kfree_skb(skb); 941 kfree_skb(skb);
930 return -EINVAL; 942 return -EINVAL;
931 } 943 }
932 944
933 /* 945 /* Deliver to mrouted */
934 * Deliver to mrouted 946
935 */ 947 ret = sock_queue_rcv_skb(mroute_sk, skb);
936 ret = sock_queue_rcv_skb(mrt->mroute_sk, skb); 948 rcu_read_unlock();
937 if (ret < 0) { 949 if (ret < 0) {
938 if (net_ratelimit()) 950 if (net_ratelimit())
939 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); 951 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
@@ -965,9 +977,7 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
965 } 977 }
966 978
967 if (!found) { 979 if (!found) {
968 /* 980 /* Create a new entry if allowable */
969 * Create a new entry if allowable
970 */
971 981
972 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || 982 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
973 (c = ipmr_cache_alloc_unres()) == NULL) { 983 (c = ipmr_cache_alloc_unres()) == NULL) {
@@ -977,16 +987,14 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
977 return -ENOBUFS; 987 return -ENOBUFS;
978 } 988 }
979 989
980 /* 990 /* Fill in the new cache entry */
981 * Fill in the new cache entry 991
982 */
983 c->mfc_parent = -1; 992 c->mfc_parent = -1;
984 c->mfc_origin = iph->saddr; 993 c->mfc_origin = iph->saddr;
985 c->mfc_mcastgrp = iph->daddr; 994 c->mfc_mcastgrp = iph->daddr;
986 995
987 /* 996 /* Reflect first query at mrouted. */
988 * Reflect first query at mrouted. 997
989 */
990 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE); 998 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
991 if (err < 0) { 999 if (err < 0) {
992 /* If the report failed throw the cache entry 1000 /* If the report failed throw the cache entry
@@ -1006,10 +1014,9 @@ ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
1006 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires); 1014 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1007 } 1015 }
1008 1016
1009 /* 1017 /* See if we can append the packet */
1010 * See if we can append the packet 1018
1011 */ 1019 if (c->mfc_un.unres.unresolved.qlen > 3) {
1012 if (c->mfc_un.unres.unresolved.qlen>3) {
1013 kfree_skb(skb); 1020 kfree_skb(skb);
1014 err = -ENOBUFS; 1021 err = -ENOBUFS;
1015 } else { 1022 } else {
@@ -1035,9 +1042,7 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1035 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { 1042 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1036 if (c->mfc_origin == mfc->mfcc_origin.s_addr && 1043 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1037 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { 1044 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1038 write_lock_bh(&mrt_lock); 1045 list_del_rcu(&c->list);
1039 list_del(&c->list);
1040 write_unlock_bh(&mrt_lock);
1041 1046
1042 ipmr_cache_free(c); 1047 ipmr_cache_free(c);
1043 return 0; 1048 return 0;
@@ -1090,9 +1095,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1090 if (!mrtsock) 1095 if (!mrtsock)
1091 c->mfc_flags |= MFC_STATIC; 1096 c->mfc_flags |= MFC_STATIC;
1092 1097
1093 write_lock_bh(&mrt_lock); 1098 list_add_rcu(&c->list, &mrt->mfc_cache_array[line]);
1094 list_add(&c->list, &mrt->mfc_cache_array[line]);
1095 write_unlock_bh(&mrt_lock);
1096 1099
1097 /* 1100 /*
1098 * Check to see if we resolved a queued list. If so we 1101 * Check to see if we resolved a queued list. If so we
@@ -1130,26 +1133,21 @@ static void mroute_clean_tables(struct mr_table *mrt)
1130 LIST_HEAD(list); 1133 LIST_HEAD(list);
1131 struct mfc_cache *c, *next; 1134 struct mfc_cache *c, *next;
1132 1135
1133 /* 1136 /* Shut down all active vif entries */
1134 * Shut down all active vif entries 1137
1135 */
1136 for (i = 0; i < mrt->maxvif; i++) { 1138 for (i = 0; i < mrt->maxvif; i++) {
1137 if (!(mrt->vif_table[i].flags&VIFF_STATIC)) 1139 if (!(mrt->vif_table[i].flags & VIFF_STATIC))
1138 vif_delete(mrt, i, 0, &list); 1140 vif_delete(mrt, i, 0, &list);
1139 } 1141 }
1140 unregister_netdevice_many(&list); 1142 unregister_netdevice_many(&list);
1141 1143
1142 /* 1144 /* Wipe the cache */
1143 * Wipe the cache 1145
1144 */
1145 for (i = 0; i < MFC_LINES; i++) { 1146 for (i = 0; i < MFC_LINES; i++) {
1146 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { 1147 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1147 if (c->mfc_flags&MFC_STATIC) 1148 if (c->mfc_flags & MFC_STATIC)
1148 continue; 1149 continue;
1149 write_lock_bh(&mrt_lock); 1150 list_del_rcu(&c->list);
1150 list_del(&c->list);
1151 write_unlock_bh(&mrt_lock);
1152
1153 ipmr_cache_free(c); 1151 ipmr_cache_free(c);
1154 } 1152 }
1155 } 1153 }
@@ -1164,6 +1162,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
1164 } 1162 }
1165} 1163}
1166 1164
1165/* called from ip_ra_control(), before an RCU grace period,
1166 * we dont need to call synchronize_rcu() here
1167 */
1167static void mrtsock_destruct(struct sock *sk) 1168static void mrtsock_destruct(struct sock *sk)
1168{ 1169{
1169 struct net *net = sock_net(sk); 1170 struct net *net = sock_net(sk);
@@ -1171,13 +1172,9 @@ static void mrtsock_destruct(struct sock *sk)
1171 1172
1172 rtnl_lock(); 1173 rtnl_lock();
1173 ipmr_for_each_table(mrt, net) { 1174 ipmr_for_each_table(mrt, net) {
1174 if (sk == mrt->mroute_sk) { 1175 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1175 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; 1176 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1176 1177 rcu_assign_pointer(mrt->mroute_sk, NULL);
1177 write_lock_bh(&mrt_lock);
1178 mrt->mroute_sk = NULL;
1179 write_unlock_bh(&mrt_lock);
1180
1181 mroute_clean_tables(mrt); 1178 mroute_clean_tables(mrt);
1182 } 1179 }
1183 } 1180 }
@@ -1204,7 +1201,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1204 return -ENOENT; 1201 return -ENOENT;
1205 1202
1206 if (optname != MRT_INIT) { 1203 if (optname != MRT_INIT) {
1207 if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN)) 1204 if (sk != rcu_dereference_raw(mrt->mroute_sk) &&
1205 !capable(CAP_NET_ADMIN))
1208 return -EACCES; 1206 return -EACCES;
1209 } 1207 }
1210 1208
@@ -1217,23 +1215,20 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1217 return -ENOPROTOOPT; 1215 return -ENOPROTOOPT;
1218 1216
1219 rtnl_lock(); 1217 rtnl_lock();
1220 if (mrt->mroute_sk) { 1218 if (rtnl_dereference(mrt->mroute_sk)) {
1221 rtnl_unlock(); 1219 rtnl_unlock();
1222 return -EADDRINUSE; 1220 return -EADDRINUSE;
1223 } 1221 }
1224 1222
1225 ret = ip_ra_control(sk, 1, mrtsock_destruct); 1223 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1226 if (ret == 0) { 1224 if (ret == 0) {
1227 write_lock_bh(&mrt_lock); 1225 rcu_assign_pointer(mrt->mroute_sk, sk);
1228 mrt->mroute_sk = sk;
1229 write_unlock_bh(&mrt_lock);
1230
1231 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; 1226 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1232 } 1227 }
1233 rtnl_unlock(); 1228 rtnl_unlock();
1234 return ret; 1229 return ret;
1235 case MRT_DONE: 1230 case MRT_DONE:
1236 if (sk != mrt->mroute_sk) 1231 if (sk != rcu_dereference_raw(mrt->mroute_sk))
1237 return -EACCES; 1232 return -EACCES;
1238 return ip_ra_control(sk, 0, NULL); 1233 return ip_ra_control(sk, 0, NULL);
1239 case MRT_ADD_VIF: 1234 case MRT_ADD_VIF:
@@ -1246,7 +1241,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1246 return -ENFILE; 1241 return -ENFILE;
1247 rtnl_lock(); 1242 rtnl_lock();
1248 if (optname == MRT_ADD_VIF) { 1243 if (optname == MRT_ADD_VIF) {
1249 ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk); 1244 ret = vif_add(net, mrt, &vif,
1245 sk == rtnl_dereference(mrt->mroute_sk));
1250 } else { 1246 } else {
1251 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL); 1247 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1252 } 1248 }
@@ -1267,7 +1263,8 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1267 if (optname == MRT_DEL_MFC) 1263 if (optname == MRT_DEL_MFC)
1268 ret = ipmr_mfc_delete(mrt, &mfc); 1264 ret = ipmr_mfc_delete(mrt, &mfc);
1269 else 1265 else
1270 ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk); 1266 ret = ipmr_mfc_add(net, mrt, &mfc,
1267 sk == rtnl_dereference(mrt->mroute_sk));
1271 rtnl_unlock(); 1268 rtnl_unlock();
1272 return ret; 1269 return ret;
1273 /* 1270 /*
@@ -1276,7 +1273,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1276 case MRT_ASSERT: 1273 case MRT_ASSERT:
1277 { 1274 {
1278 int v; 1275 int v;
1279 if (get_user(v,(int __user *)optval)) 1276 if (get_user(v, (int __user *)optval))
1280 return -EFAULT; 1277 return -EFAULT;
1281 mrt->mroute_do_assert = (v) ? 1 : 0; 1278 mrt->mroute_do_assert = (v) ? 1 : 0;
1282 return 0; 1279 return 0;
@@ -1286,7 +1283,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1286 { 1283 {
1287 int v; 1284 int v;
1288 1285
1289 if (get_user(v,(int __user *)optval)) 1286 if (get_user(v, (int __user *)optval))
1290 return -EFAULT; 1287 return -EFAULT;
1291 v = (v) ? 1 : 0; 1288 v = (v) ? 1 : 0;
1292 1289
@@ -1309,14 +1306,16 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
1309 return -EINVAL; 1306 return -EINVAL;
1310 if (get_user(v, (u32 __user *)optval)) 1307 if (get_user(v, (u32 __user *)optval))
1311 return -EFAULT; 1308 return -EFAULT;
1312 if (sk == mrt->mroute_sk)
1313 return -EBUSY;
1314 1309
1315 rtnl_lock(); 1310 rtnl_lock();
1316 ret = 0; 1311 ret = 0;
1317 if (!ipmr_new_table(net, v)) 1312 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1318 ret = -ENOMEM; 1313 ret = -EBUSY;
1319 raw_sk(sk)->ipmr_table = v; 1314 } else {
1315 if (!ipmr_new_table(net, v))
1316 ret = -ENOMEM;
1317 raw_sk(sk)->ipmr_table = v;
1318 }
1320 rtnl_unlock(); 1319 rtnl_unlock();
1321 return ret; 1320 return ret;
1322 } 1321 }
@@ -1347,9 +1346,9 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
1347 1346
1348 if (optname != MRT_VERSION && 1347 if (optname != MRT_VERSION &&
1349#ifdef CONFIG_IP_PIMSM 1348#ifdef CONFIG_IP_PIMSM
1350 optname!=MRT_PIM && 1349 optname != MRT_PIM &&
1351#endif 1350#endif
1352 optname!=MRT_ASSERT) 1351 optname != MRT_ASSERT)
1353 return -ENOPROTOOPT; 1352 return -ENOPROTOOPT;
1354 1353
1355 if (get_user(olr, optlen)) 1354 if (get_user(olr, optlen))
@@ -1416,19 +1415,19 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1416 if (copy_from_user(&sr, arg, sizeof(sr))) 1415 if (copy_from_user(&sr, arg, sizeof(sr)))
1417 return -EFAULT; 1416 return -EFAULT;
1418 1417
1419 read_lock(&mrt_lock); 1418 rcu_read_lock();
1420 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr); 1419 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1421 if (c) { 1420 if (c) {
1422 sr.pktcnt = c->mfc_un.res.pkt; 1421 sr.pktcnt = c->mfc_un.res.pkt;
1423 sr.bytecnt = c->mfc_un.res.bytes; 1422 sr.bytecnt = c->mfc_un.res.bytes;
1424 sr.wrong_if = c->mfc_un.res.wrong_if; 1423 sr.wrong_if = c->mfc_un.res.wrong_if;
1425 read_unlock(&mrt_lock); 1424 rcu_read_unlock();
1426 1425
1427 if (copy_to_user(arg, &sr, sizeof(sr))) 1426 if (copy_to_user(arg, &sr, sizeof(sr)))
1428 return -EFAULT; 1427 return -EFAULT;
1429 return 0; 1428 return 0;
1430 } 1429 }
1431 read_unlock(&mrt_lock); 1430 rcu_read_unlock();
1432 return -EADDRNOTAVAIL; 1431 return -EADDRNOTAVAIL;
1433 default: 1432 default:
1434 return -ENOIOCTLCMD; 1433 return -ENOIOCTLCMD;
@@ -1465,7 +1464,7 @@ static struct notifier_block ip_mr_notifier = {
1465}; 1464};
1466 1465
1467/* 1466/*
1468 * Encapsulate a packet by attaching a valid IPIP header to it. 1467 * Encapsulate a packet by attaching a valid IPIP header to it.
1469 * This avoids tunnel drivers and other mess and gives us the speed so 1468 * This avoids tunnel drivers and other mess and gives us the speed so
1470 * important for multicast video. 1469 * important for multicast video.
1471 */ 1470 */
@@ -1480,7 +1479,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1480 skb_reset_network_header(skb); 1479 skb_reset_network_header(skb);
1481 iph = ip_hdr(skb); 1480 iph = ip_hdr(skb);
1482 1481
1483 iph->version = 4; 1482 iph->version = 4;
1484 iph->tos = old_iph->tos; 1483 iph->tos = old_iph->tos;
1485 iph->ttl = old_iph->ttl; 1484 iph->ttl = old_iph->ttl;
1486 iph->frag_off = 0; 1485 iph->frag_off = 0;
@@ -1498,7 +1497,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1498 1497
1499static inline int ipmr_forward_finish(struct sk_buff *skb) 1498static inline int ipmr_forward_finish(struct sk_buff *skb)
1500{ 1499{
1501 struct ip_options * opt = &(IPCB(skb)->opt); 1500 struct ip_options *opt = &(IPCB(skb)->opt);
1502 1501
1503 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); 1502 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1504 1503
@@ -1535,22 +1534,34 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1535 } 1534 }
1536#endif 1535#endif
1537 1536
1538 if (vif->flags&VIFF_TUNNEL) { 1537 if (vif->flags & VIFF_TUNNEL) {
1539 struct flowi fl = { .oif = vif->link, 1538 struct flowi fl = {
1540 .nl_u = { .ip4_u = 1539 .oif = vif->link,
1541 { .daddr = vif->remote, 1540 .nl_u = {
1542 .saddr = vif->local, 1541 .ip4_u = {
1543 .tos = RT_TOS(iph->tos) } }, 1542 .daddr = vif->remote,
1544 .proto = IPPROTO_IPIP }; 1543 .saddr = vif->local,
1544 .tos = RT_TOS(iph->tos)
1545 }
1546 },
1547 .proto = IPPROTO_IPIP
1548 };
1549
1545 if (ip_route_output_key(net, &rt, &fl)) 1550 if (ip_route_output_key(net, &rt, &fl))
1546 goto out_free; 1551 goto out_free;
1547 encap = sizeof(struct iphdr); 1552 encap = sizeof(struct iphdr);
1548 } else { 1553 } else {
1549 struct flowi fl = { .oif = vif->link, 1554 struct flowi fl = {
1550 .nl_u = { .ip4_u = 1555 .oif = vif->link,
1551 { .daddr = iph->daddr, 1556 .nl_u = {
1552 .tos = RT_TOS(iph->tos) } }, 1557 .ip4_u = {
1553 .proto = IPPROTO_IPIP }; 1558 .daddr = iph->daddr,
1559 .tos = RT_TOS(iph->tos)
1560 }
1561 },
1562 .proto = IPPROTO_IPIP
1563 };
1564
1554 if (ip_route_output_key(net, &rt, &fl)) 1565 if (ip_route_output_key(net, &rt, &fl))
1555 goto out_free; 1566 goto out_free;
1556 } 1567 }
@@ -1559,8 +1570,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1559 1570
1560 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { 1571 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1561 /* Do not fragment multicasts. Alas, IPv4 does not 1572 /* Do not fragment multicasts. Alas, IPv4 does not
1562 allow to send ICMP, so that packets will disappear 1573 * allow to send ICMP, so that packets will disappear
1563 to blackhole. 1574 * to blackhole.
1564 */ 1575 */
1565 1576
1566 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); 1577 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
@@ -1583,7 +1594,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1583 ip_decrease_ttl(ip_hdr(skb)); 1594 ip_decrease_ttl(ip_hdr(skb));
1584 1595
1585 /* FIXME: forward and output firewalls used to be called here. 1596 /* FIXME: forward and output firewalls used to be called here.
1586 * What do we do with netfilter? -- RR */ 1597 * What do we do with netfilter? -- RR
1598 */
1587 if (vif->flags & VIFF_TUNNEL) { 1599 if (vif->flags & VIFF_TUNNEL) {
1588 ip_encap(skb, vif->local, vif->remote); 1600 ip_encap(skb, vif->local, vif->remote);
1589 /* FIXME: extra output firewall step used to be here. --RR */ 1601 /* FIXME: extra output firewall step used to be here. --RR */
@@ -1644,15 +1656,15 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1644 1656
1645 if (skb_rtable(skb)->fl.iif == 0) { 1657 if (skb_rtable(skb)->fl.iif == 0) {
1646 /* It is our own packet, looped back. 1658 /* It is our own packet, looped back.
1647 Very complicated situation... 1659 * Very complicated situation...
1648 1660 *
1649 The best workaround until routing daemons will be 1661 * The best workaround until routing daemons will be
1650 fixed is not to redistribute packet, if it was 1662 * fixed is not to redistribute packet, if it was
1651 send through wrong interface. It means, that 1663 * send through wrong interface. It means, that
1652 multicast applications WILL NOT work for 1664 * multicast applications WILL NOT work for
1653 (S,G), which have default multicast route pointing 1665 * (S,G), which have default multicast route pointing
1654 to wrong oif. In any case, it is not a good 1666 * to wrong oif. In any case, it is not a good
1655 idea to use multicasting applications on router. 1667 * idea to use multicasting applications on router.
1656 */ 1668 */
1657 goto dont_forward; 1669 goto dont_forward;
1658 } 1670 }
@@ -1662,9 +1674,9 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1662 1674
1663 if (true_vifi >= 0 && mrt->mroute_do_assert && 1675 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1664 /* pimsm uses asserts, when switching from RPT to SPT, 1676 /* pimsm uses asserts, when switching from RPT to SPT,
1665 so that we cannot check that packet arrived on an oif. 1677 * so that we cannot check that packet arrived on an oif.
1666 It is bad, but otherwise we would need to move pretty 1678 * It is bad, but otherwise we would need to move pretty
1667 large chunk of pimd to kernel. Ough... --ANK 1679 * large chunk of pimd to kernel. Ough... --ANK
1668 */ 1680 */
1669 (mrt->mroute_do_pim || 1681 (mrt->mroute_do_pim ||
1670 cache->mfc_un.res.ttls[true_vifi] < 255) && 1682 cache->mfc_un.res.ttls[true_vifi] < 255) &&
@@ -1682,10 +1694,12 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1682 /* 1694 /*
1683 * Forward the frame 1695 * Forward the frame
1684 */ 1696 */
1685 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) { 1697 for (ct = cache->mfc_un.res.maxvif - 1;
1698 ct >= cache->mfc_un.res.minvif; ct--) {
1686 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) { 1699 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1687 if (psend != -1) { 1700 if (psend != -1) {
1688 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1701 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1702
1689 if (skb2) 1703 if (skb2)
1690 ipmr_queue_xmit(net, mrt, skb2, cache, 1704 ipmr_queue_xmit(net, mrt, skb2, cache,
1691 psend); 1705 psend);
@@ -1696,6 +1710,7 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1696 if (psend != -1) { 1710 if (psend != -1) {
1697 if (local) { 1711 if (local) {
1698 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1712 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1713
1699 if (skb2) 1714 if (skb2)
1700 ipmr_queue_xmit(net, mrt, skb2, cache, psend); 1715 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1701 } else { 1716 } else {
@@ -1713,6 +1728,7 @@ dont_forward:
1713 1728
1714/* 1729/*
1715 * Multicast packets for forwarding arrive here 1730 * Multicast packets for forwarding arrive here
1731 * Called with rcu_read_lock();
1716 */ 1732 */
1717 1733
1718int ip_mr_input(struct sk_buff *skb) 1734int ip_mr_input(struct sk_buff *skb)
@@ -1724,9 +1740,9 @@ int ip_mr_input(struct sk_buff *skb)
1724 int err; 1740 int err;
1725 1741
1726 /* Packet is looped back after forward, it should not be 1742 /* Packet is looped back after forward, it should not be
1727 forwarded second time, but still can be delivered locally. 1743 * forwarded second time, but still can be delivered locally.
1728 */ 1744 */
1729 if (IPCB(skb)->flags&IPSKB_FORWARDED) 1745 if (IPCB(skb)->flags & IPSKB_FORWARDED)
1730 goto dont_forward; 1746 goto dont_forward;
1731 1747
1732 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt); 1748 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
@@ -1736,28 +1752,28 @@ int ip_mr_input(struct sk_buff *skb)
1736 } 1752 }
1737 1753
1738 if (!local) { 1754 if (!local) {
1739 if (IPCB(skb)->opt.router_alert) { 1755 if (IPCB(skb)->opt.router_alert) {
1740 if (ip_call_ra_chain(skb)) 1756 if (ip_call_ra_chain(skb))
1741 return 0; 1757 return 0;
1742 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP){ 1758 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
1743 /* IGMPv1 (and broken IGMPv2 implementations sort of 1759 /* IGMPv1 (and broken IGMPv2 implementations sort of
1744 Cisco IOS <= 11.2(8)) do not put router alert 1760 * Cisco IOS <= 11.2(8)) do not put router alert
1745 option to IGMP packets destined to routable 1761 * option to IGMP packets destined to routable
1746 groups. It is very bad, because it means 1762 * groups. It is very bad, because it means
1747 that we can forward NO IGMP messages. 1763 * that we can forward NO IGMP messages.
1748 */ 1764 */
1749 read_lock(&mrt_lock); 1765 struct sock *mroute_sk;
1750 if (mrt->mroute_sk) { 1766
1751 nf_reset(skb); 1767 mroute_sk = rcu_dereference(mrt->mroute_sk);
1752 raw_rcv(mrt->mroute_sk, skb); 1768 if (mroute_sk) {
1753 read_unlock(&mrt_lock); 1769 nf_reset(skb);
1754 return 0; 1770 raw_rcv(mroute_sk, skb);
1755 } 1771 return 0;
1756 read_unlock(&mrt_lock); 1772 }
1757 } 1773 }
1758 } 1774 }
1759 1775
1760 read_lock(&mrt_lock); 1776 /* already under rcu_read_lock() */
1761 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); 1777 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1762 1778
1763 /* 1779 /*
@@ -1769,13 +1785,12 @@ int ip_mr_input(struct sk_buff *skb)
1769 if (local) { 1785 if (local) {
1770 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); 1786 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1771 ip_local_deliver(skb); 1787 ip_local_deliver(skb);
1772 if (skb2 == NULL) { 1788 if (skb2 == NULL)
1773 read_unlock(&mrt_lock);
1774 return -ENOBUFS; 1789 return -ENOBUFS;
1775 }
1776 skb = skb2; 1790 skb = skb2;
1777 } 1791 }
1778 1792
1793 read_lock(&mrt_lock);
1779 vif = ipmr_find_vif(mrt, skb->dev); 1794 vif = ipmr_find_vif(mrt, skb->dev);
1780 if (vif >= 0) { 1795 if (vif >= 0) {
1781 int err2 = ipmr_cache_unresolved(mrt, vif, skb); 1796 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
@@ -1788,8 +1803,8 @@ int ip_mr_input(struct sk_buff *skb)
1788 return -ENODEV; 1803 return -ENODEV;
1789 } 1804 }
1790 1805
1806 read_lock(&mrt_lock);
1791 ip_mr_forward(net, mrt, skb, cache, local); 1807 ip_mr_forward(net, mrt, skb, cache, local);
1792
1793 read_unlock(&mrt_lock); 1808 read_unlock(&mrt_lock);
1794 1809
1795 if (local) 1810 if (local)
@@ -1805,6 +1820,7 @@ dont_forward:
1805} 1820}
1806 1821
1807#ifdef CONFIG_IP_PIMSM 1822#ifdef CONFIG_IP_PIMSM
1823/* called with rcu_read_lock() */
1808static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, 1824static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1809 unsigned int pimlen) 1825 unsigned int pimlen)
1810{ 1826{
@@ -1813,10 +1829,10 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1813 1829
1814 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); 1830 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1815 /* 1831 /*
1816 Check that: 1832 * Check that:
1817 a. packet is really destinted to a multicast group 1833 * a. packet is really sent to a multicast group
1818 b. packet is not a NULL-REGISTER 1834 * b. packet is not a NULL-REGISTER
1819 c. packet is not truncated 1835 * c. packet is not truncated
1820 */ 1836 */
1821 if (!ipv4_is_multicast(encap->daddr) || 1837 if (!ipv4_is_multicast(encap->daddr) ||
1822 encap->tot_len == 0 || 1838 encap->tot_len == 0 ||
@@ -1826,26 +1842,23 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1826 read_lock(&mrt_lock); 1842 read_lock(&mrt_lock);
1827 if (mrt->mroute_reg_vif_num >= 0) 1843 if (mrt->mroute_reg_vif_num >= 0)
1828 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; 1844 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1829 if (reg_dev)
1830 dev_hold(reg_dev);
1831 read_unlock(&mrt_lock); 1845 read_unlock(&mrt_lock);
1832 1846
1833 if (reg_dev == NULL) 1847 if (reg_dev == NULL)
1834 return 1; 1848 return 1;
1835 1849
1836 skb->mac_header = skb->network_header; 1850 skb->mac_header = skb->network_header;
1837 skb_pull(skb, (u8*)encap - skb->data); 1851 skb_pull(skb, (u8 *)encap - skb->data);
1838 skb_reset_network_header(skb); 1852 skb_reset_network_header(skb);
1839 skb->protocol = htons(ETH_P_IP); 1853 skb->protocol = htons(ETH_P_IP);
1840 skb->ip_summed = 0; 1854 skb->ip_summed = CHECKSUM_NONE;
1841 skb->pkt_type = PACKET_HOST; 1855 skb->pkt_type = PACKET_HOST;
1842 1856
1843 skb_tunnel_rx(skb, reg_dev); 1857 skb_tunnel_rx(skb, reg_dev);
1844 1858
1845 netif_rx(skb); 1859 netif_rx(skb);
1846 dev_put(reg_dev);
1847 1860
1848 return 0; 1861 return NET_RX_SUCCESS;
1849} 1862}
1850#endif 1863#endif
1851 1864
@@ -1854,7 +1867,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1854 * Handle IGMP messages of PIMv1 1867 * Handle IGMP messages of PIMv1
1855 */ 1868 */
1856 1869
1857int pim_rcv_v1(struct sk_buff * skb) 1870int pim_rcv_v1(struct sk_buff *skb)
1858{ 1871{
1859 struct igmphdr *pim; 1872 struct igmphdr *pim;
1860 struct net *net = dev_net(skb->dev); 1873 struct net *net = dev_net(skb->dev);
@@ -1881,7 +1894,7 @@ drop:
1881#endif 1894#endif
1882 1895
1883#ifdef CONFIG_IP_PIMSM_V2 1896#ifdef CONFIG_IP_PIMSM_V2
1884static int pim_rcv(struct sk_buff * skb) 1897static int pim_rcv(struct sk_buff *skb)
1885{ 1898{
1886 struct pimreghdr *pim; 1899 struct pimreghdr *pim;
1887 struct net *net = dev_net(skb->dev); 1900 struct net *net = dev_net(skb->dev);
@@ -1891,8 +1904,8 @@ static int pim_rcv(struct sk_buff * skb)
1891 goto drop; 1904 goto drop;
1892 1905
1893 pim = (struct pimreghdr *)skb_transport_header(skb); 1906 pim = (struct pimreghdr *)skb_transport_header(skb);
1894 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) || 1907 if (pim->type != ((PIM_VERSION << 4) | (PIM_REGISTER)) ||
1895 (pim->flags&PIM_NULL_REGISTER) || 1908 (pim->flags & PIM_NULL_REGISTER) ||
1896 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 && 1909 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1897 csum_fold(skb_checksum(skb, 0, skb->len, 0)))) 1910 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1898 goto drop; 1911 goto drop;
@@ -1958,28 +1971,33 @@ int ipmr_get_route(struct net *net,
1958 if (mrt == NULL) 1971 if (mrt == NULL)
1959 return -ENOENT; 1972 return -ENOENT;
1960 1973
1961 read_lock(&mrt_lock); 1974 rcu_read_lock();
1962 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst); 1975 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1963 1976
1964 if (cache == NULL) { 1977 if (cache == NULL) {
1965 struct sk_buff *skb2; 1978 struct sk_buff *skb2;
1966 struct iphdr *iph; 1979 struct iphdr *iph;
1967 struct net_device *dev; 1980 struct net_device *dev;
1968 int vif; 1981 int vif = -1;
1969 1982
1970 if (nowait) { 1983 if (nowait) {
1971 read_unlock(&mrt_lock); 1984 rcu_read_unlock();
1972 return -EAGAIN; 1985 return -EAGAIN;
1973 } 1986 }
1974 1987
1975 dev = skb->dev; 1988 dev = skb->dev;
1976 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) { 1989 read_lock(&mrt_lock);
1990 if (dev)
1991 vif = ipmr_find_vif(mrt, dev);
1992 if (vif < 0) {
1977 read_unlock(&mrt_lock); 1993 read_unlock(&mrt_lock);
1994 rcu_read_unlock();
1978 return -ENODEV; 1995 return -ENODEV;
1979 } 1996 }
1980 skb2 = skb_clone(skb, GFP_ATOMIC); 1997 skb2 = skb_clone(skb, GFP_ATOMIC);
1981 if (!skb2) { 1998 if (!skb2) {
1982 read_unlock(&mrt_lock); 1999 read_unlock(&mrt_lock);
2000 rcu_read_unlock();
1983 return -ENOMEM; 2001 return -ENOMEM;
1984 } 2002 }
1985 2003
@@ -1992,13 +2010,16 @@ int ipmr_get_route(struct net *net,
1992 iph->version = 0; 2010 iph->version = 0;
1993 err = ipmr_cache_unresolved(mrt, vif, skb2); 2011 err = ipmr_cache_unresolved(mrt, vif, skb2);
1994 read_unlock(&mrt_lock); 2012 read_unlock(&mrt_lock);
2013 rcu_read_unlock();
1995 return err; 2014 return err;
1996 } 2015 }
1997 2016
1998 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY)) 2017 read_lock(&mrt_lock);
2018 if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY))
1999 cache->mfc_flags |= MFC_NOTIFY; 2019 cache->mfc_flags |= MFC_NOTIFY;
2000 err = __ipmr_fill_mroute(mrt, skb, cache, rtm); 2020 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
2001 read_unlock(&mrt_lock); 2021 read_unlock(&mrt_lock);
2022 rcu_read_unlock();
2002 return err; 2023 return err;
2003} 2024}
2004 2025
@@ -2050,14 +2071,14 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2050 s_h = cb->args[1]; 2071 s_h = cb->args[1];
2051 s_e = cb->args[2]; 2072 s_e = cb->args[2];
2052 2073
2053 read_lock(&mrt_lock); 2074 rcu_read_lock();
2054 ipmr_for_each_table(mrt, net) { 2075 ipmr_for_each_table(mrt, net) {
2055 if (t < s_t) 2076 if (t < s_t)
2056 goto next_table; 2077 goto next_table;
2057 if (t > s_t) 2078 if (t > s_t)
2058 s_h = 0; 2079 s_h = 0;
2059 for (h = s_h; h < MFC_LINES; h++) { 2080 for (h = s_h; h < MFC_LINES; h++) {
2060 list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) { 2081 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_array[h], list) {
2061 if (e < s_e) 2082 if (e < s_e)
2062 goto next_entry; 2083 goto next_entry;
2063 if (ipmr_fill_mroute(mrt, skb, 2084 if (ipmr_fill_mroute(mrt, skb,
@@ -2075,7 +2096,7 @@ next_table:
2075 t++; 2096 t++;
2076 } 2097 }
2077done: 2098done:
2078 read_unlock(&mrt_lock); 2099 rcu_read_unlock();
2079 2100
2080 cb->args[2] = e; 2101 cb->args[2] = e;
2081 cb->args[1] = h; 2102 cb->args[1] = h;
@@ -2086,7 +2107,8 @@ done:
2086 2107
2087#ifdef CONFIG_PROC_FS 2108#ifdef CONFIG_PROC_FS
2088/* 2109/*
2089 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif 2110 * The /proc interfaces to multicast routing :
2111 * /proc/net/ip_mr_cache & /proc/net/ip_mr_vif
2090 */ 2112 */
2091struct ipmr_vif_iter { 2113struct ipmr_vif_iter {
2092 struct seq_net_private p; 2114 struct seq_net_private p;
@@ -2208,14 +2230,14 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2208 struct mr_table *mrt = it->mrt; 2230 struct mr_table *mrt = it->mrt;
2209 struct mfc_cache *mfc; 2231 struct mfc_cache *mfc;
2210 2232
2211 read_lock(&mrt_lock); 2233 rcu_read_lock();
2212 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) { 2234 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2213 it->cache = &mrt->mfc_cache_array[it->ct]; 2235 it->cache = &mrt->mfc_cache_array[it->ct];
2214 list_for_each_entry(mfc, it->cache, list) 2236 list_for_each_entry_rcu(mfc, it->cache, list)
2215 if (pos-- == 0) 2237 if (pos-- == 0)
2216 return mfc; 2238 return mfc;
2217 } 2239 }
2218 read_unlock(&mrt_lock); 2240 rcu_read_unlock();
2219 2241
2220 spin_lock_bh(&mfc_unres_lock); 2242 spin_lock_bh(&mfc_unres_lock);
2221 it->cache = &mrt->mfc_unres_queue; 2243 it->cache = &mrt->mfc_unres_queue;
@@ -2274,7 +2296,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2274 } 2296 }
2275 2297
2276 /* exhausted cache_array, show unresolved */ 2298 /* exhausted cache_array, show unresolved */
2277 read_unlock(&mrt_lock); 2299 rcu_read_unlock();
2278 it->cache = &mrt->mfc_unres_queue; 2300 it->cache = &mrt->mfc_unres_queue;
2279 it->ct = 0; 2301 it->ct = 0;
2280 2302
@@ -2282,7 +2304,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2282 if (!list_empty(it->cache)) 2304 if (!list_empty(it->cache))
2283 return list_first_entry(it->cache, struct mfc_cache, list); 2305 return list_first_entry(it->cache, struct mfc_cache, list);
2284 2306
2285 end_of_list: 2307end_of_list:
2286 spin_unlock_bh(&mfc_unres_lock); 2308 spin_unlock_bh(&mfc_unres_lock);
2287 it->cache = NULL; 2309 it->cache = NULL;
2288 2310
@@ -2297,7 +2319,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2297 if (it->cache == &mrt->mfc_unres_queue) 2319 if (it->cache == &mrt->mfc_unres_queue)
2298 spin_unlock_bh(&mfc_unres_lock); 2320 spin_unlock_bh(&mfc_unres_lock);
2299 else if (it->cache == &mrt->mfc_cache_array[it->ct]) 2321 else if (it->cache == &mrt->mfc_cache_array[it->ct])
2300 read_unlock(&mrt_lock); 2322 rcu_read_unlock();
2301} 2323}
2302 2324
2303static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) 2325static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -2323,7 +2345,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2323 mfc->mfc_un.res.bytes, 2345 mfc->mfc_un.res.bytes,
2324 mfc->mfc_un.res.wrong_if); 2346 mfc->mfc_un.res.wrong_if);
2325 for (n = mfc->mfc_un.res.minvif; 2347 for (n = mfc->mfc_un.res.minvif;
2326 n < mfc->mfc_un.res.maxvif; n++ ) { 2348 n < mfc->mfc_un.res.maxvif; n++) {
2327 if (VIF_EXISTS(mrt, n) && 2349 if (VIF_EXISTS(mrt, n) &&
2328 mfc->mfc_un.res.ttls[n] < 255) 2350 mfc->mfc_un.res.ttls[n] < 255)
2329 seq_printf(seq, 2351 seq_printf(seq,
@@ -2421,7 +2443,7 @@ int __init ip_mr_init(void)
2421 2443
2422 mrt_cachep = kmem_cache_create("ip_mrt_cache", 2444 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2423 sizeof(struct mfc_cache), 2445 sizeof(struct mfc_cache),
2424 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, 2446 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
2425 NULL); 2447 NULL);
2426 if (!mrt_cachep) 2448 if (!mrt_cachep)
2427 return -ENOMEM; 2449 return -ENOMEM;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 1833bdbf9805..8e3350643b63 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -324,10 +324,10 @@ config IP_NF_TARGET_ECN
324 324
325config IP_NF_TARGET_TTL 325config IP_NF_TARGET_TTL
326 tristate '"TTL" target support' 326 tristate '"TTL" target support'
327 depends on NETFILTER_ADVANCED 327 depends on NETFILTER_ADVANCED && IP_NF_MANGLE
328 select NETFILTER_XT_TARGET_HL 328 select NETFILTER_XT_TARGET_HL
329 ---help--- 329 ---help---
330 This is a backwards-compat option for the user's convenience 330 This is a backwards-compatible option for the user's convenience
331 (e.g. when running oldconfig). It selects 331 (e.g. when running oldconfig). It selects
332 CONFIG_NETFILTER_XT_TARGET_HL. 332 CONFIG_NETFILTER_XT_TARGET_HL.
333 333
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e8f4f9a57f12..3cad2591ace0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -72,7 +72,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
72 for (i = 0; i < len; i++) 72 for (i = 0; i < len; i++)
73 ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i]; 73 ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
74 74
75 return (ret != 0); 75 return ret != 0;
76} 76}
77 77
78/* 78/*
@@ -228,7 +228,7 @@ arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
228 return NF_DROP; 228 return NF_DROP;
229} 229}
230 230
231static inline const struct arpt_entry_target * 231static inline const struct xt_entry_target *
232arpt_get_target_c(const struct arpt_entry *e) 232arpt_get_target_c(const struct arpt_entry *e)
233{ 233{
234 return arpt_get_target((struct arpt_entry *)e); 234 return arpt_get_target((struct arpt_entry *)e);
@@ -282,7 +282,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
282 282
283 arp = arp_hdr(skb); 283 arp = arp_hdr(skb);
284 do { 284 do {
285 const struct arpt_entry_target *t; 285 const struct xt_entry_target *t;
286 286
287 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { 287 if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
288 e = arpt_next_entry(e); 288 e = arpt_next_entry(e);
@@ -297,10 +297,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
297 if (!t->u.kernel.target->target) { 297 if (!t->u.kernel.target->target) {
298 int v; 298 int v;
299 299
300 v = ((struct arpt_standard_target *)t)->verdict; 300 v = ((struct xt_standard_target *)t)->verdict;
301 if (v < 0) { 301 if (v < 0) {
302 /* Pop from stack? */ 302 /* Pop from stack? */
303 if (v != ARPT_RETURN) { 303 if (v != XT_RETURN) {
304 verdict = (unsigned)(-v) - 1; 304 verdict = (unsigned)(-v) - 1;
305 break; 305 break;
306 } 306 }
@@ -332,7 +332,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
332 /* Target might have changed stuff. */ 332 /* Target might have changed stuff. */
333 arp = arp_hdr(skb); 333 arp = arp_hdr(skb);
334 334
335 if (verdict == ARPT_CONTINUE) 335 if (verdict == XT_CONTINUE)
336 e = arpt_next_entry(e); 336 e = arpt_next_entry(e);
337 else 337 else
338 /* Verdict */ 338 /* Verdict */
@@ -377,7 +377,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
377 e->counters.pcnt = pos; 377 e->counters.pcnt = pos;
378 378
379 for (;;) { 379 for (;;) {
380 const struct arpt_standard_target *t 380 const struct xt_standard_target *t
381 = (void *)arpt_get_target_c(e); 381 = (void *)arpt_get_target_c(e);
382 int visited = e->comefrom & (1 << hook); 382 int visited = e->comefrom & (1 << hook);
383 383
@@ -392,13 +392,13 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
392 /* Unconditional return/END. */ 392 /* Unconditional return/END. */
393 if ((e->target_offset == sizeof(struct arpt_entry) && 393 if ((e->target_offset == sizeof(struct arpt_entry) &&
394 (strcmp(t->target.u.user.name, 394 (strcmp(t->target.u.user.name,
395 ARPT_STANDARD_TARGET) == 0) && 395 XT_STANDARD_TARGET) == 0) &&
396 t->verdict < 0 && unconditional(&e->arp)) || 396 t->verdict < 0 && unconditional(&e->arp)) ||
397 visited) { 397 visited) {
398 unsigned int oldpos, size; 398 unsigned int oldpos, size;
399 399
400 if ((strcmp(t->target.u.user.name, 400 if ((strcmp(t->target.u.user.name,
401 ARPT_STANDARD_TARGET) == 0) && 401 XT_STANDARD_TARGET) == 0) &&
402 t->verdict < -NF_MAX_VERDICT - 1) { 402 t->verdict < -NF_MAX_VERDICT - 1) {
403 duprintf("mark_source_chains: bad " 403 duprintf("mark_source_chains: bad "
404 "negative verdict (%i)\n", 404 "negative verdict (%i)\n",
@@ -433,7 +433,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
433 int newpos = t->verdict; 433 int newpos = t->verdict;
434 434
435 if (strcmp(t->target.u.user.name, 435 if (strcmp(t->target.u.user.name,
436 ARPT_STANDARD_TARGET) == 0 && 436 XT_STANDARD_TARGET) == 0 &&
437 newpos >= 0) { 437 newpos >= 0) {
438 if (newpos > newinfo->size - 438 if (newpos > newinfo->size -
439 sizeof(struct arpt_entry)) { 439 sizeof(struct arpt_entry)) {
@@ -464,14 +464,14 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
464 464
465static inline int check_entry(const struct arpt_entry *e, const char *name) 465static inline int check_entry(const struct arpt_entry *e, const char *name)
466{ 466{
467 const struct arpt_entry_target *t; 467 const struct xt_entry_target *t;
468 468
469 if (!arp_checkentry(&e->arp)) { 469 if (!arp_checkentry(&e->arp)) {
470 duprintf("arp_tables: arp check failed %p %s.\n", e, name); 470 duprintf("arp_tables: arp check failed %p %s.\n", e, name);
471 return -EINVAL; 471 return -EINVAL;
472 } 472 }
473 473
474 if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset) 474 if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
475 return -EINVAL; 475 return -EINVAL;
476 476
477 t = arpt_get_target_c(e); 477 t = arpt_get_target_c(e);
@@ -483,7 +483,7 @@ static inline int check_entry(const struct arpt_entry *e, const char *name)
483 483
484static inline int check_target(struct arpt_entry *e, const char *name) 484static inline int check_target(struct arpt_entry *e, const char *name)
485{ 485{
486 struct arpt_entry_target *t = arpt_get_target(e); 486 struct xt_entry_target *t = arpt_get_target(e);
487 int ret; 487 int ret;
488 struct xt_tgchk_param par = { 488 struct xt_tgchk_param par = {
489 .table = name, 489 .table = name,
@@ -506,7 +506,7 @@ static inline int check_target(struct arpt_entry *e, const char *name)
506static inline int 506static inline int
507find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) 507find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
508{ 508{
509 struct arpt_entry_target *t; 509 struct xt_entry_target *t;
510 struct xt_target *target; 510 struct xt_target *target;
511 int ret; 511 int ret;
512 512
@@ -536,7 +536,7 @@ out:
536 536
537static bool check_underflow(const struct arpt_entry *e) 537static bool check_underflow(const struct arpt_entry *e)
538{ 538{
539 const struct arpt_entry_target *t; 539 const struct xt_entry_target *t;
540 unsigned int verdict; 540 unsigned int verdict;
541 541
542 if (!unconditional(&e->arp)) 542 if (!unconditional(&e->arp))
@@ -544,7 +544,7 @@ static bool check_underflow(const struct arpt_entry *e)
544 t = arpt_get_target_c(e); 544 t = arpt_get_target_c(e);
545 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 545 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
546 return false; 546 return false;
547 verdict = ((struct arpt_standard_target *)t)->verdict; 547 verdict = ((struct xt_standard_target *)t)->verdict;
548 verdict = -verdict - 1; 548 verdict = -verdict - 1;
549 return verdict == NF_DROP || verdict == NF_ACCEPT; 549 return verdict == NF_DROP || verdict == NF_ACCEPT;
550} 550}
@@ -566,7 +566,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
566 } 566 }
567 567
568 if (e->next_offset 568 if (e->next_offset
569 < sizeof(struct arpt_entry) + sizeof(struct arpt_entry_target)) { 569 < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) {
570 duprintf("checking: element %p size %u\n", 570 duprintf("checking: element %p size %u\n",
571 e, e->next_offset); 571 e, e->next_offset);
572 return -EINVAL; 572 return -EINVAL;
@@ -598,7 +598,7 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
598static inline void cleanup_entry(struct arpt_entry *e) 598static inline void cleanup_entry(struct arpt_entry *e)
599{ 599{
600 struct xt_tgdtor_param par; 600 struct xt_tgdtor_param par;
601 struct arpt_entry_target *t; 601 struct xt_entry_target *t;
602 602
603 t = arpt_get_target(e); 603 t = arpt_get_target(e);
604 par.target = t->u.kernel.target; 604 par.target = t->u.kernel.target;
@@ -794,7 +794,7 @@ static int copy_entries_to_user(unsigned int total_size,
794 /* FIXME: use iterator macros --RR */ 794 /* FIXME: use iterator macros --RR */
795 /* ... then go back and fix counters and names */ 795 /* ... then go back and fix counters and names */
796 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 796 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
797 const struct arpt_entry_target *t; 797 const struct xt_entry_target *t;
798 798
799 e = (struct arpt_entry *)(loc_cpu_entry + off); 799 e = (struct arpt_entry *)(loc_cpu_entry + off);
800 if (copy_to_user(userptr + off 800 if (copy_to_user(userptr + off
@@ -807,7 +807,7 @@ static int copy_entries_to_user(unsigned int total_size,
807 807
808 t = arpt_get_target_c(e); 808 t = arpt_get_target_c(e);
809 if (copy_to_user(userptr + off + e->target_offset 809 if (copy_to_user(userptr + off + e->target_offset
810 + offsetof(struct arpt_entry_target, 810 + offsetof(struct xt_entry_target,
811 u.user.name), 811 u.user.name),
812 t->u.kernel.target->name, 812 t->u.kernel.target->name,
813 strlen(t->u.kernel.target->name)+1) != 0) { 813 strlen(t->u.kernel.target->name)+1) != 0) {
@@ -844,7 +844,7 @@ static int compat_calc_entry(const struct arpt_entry *e,
844 const struct xt_table_info *info, 844 const struct xt_table_info *info,
845 const void *base, struct xt_table_info *newinfo) 845 const void *base, struct xt_table_info *newinfo)
846{ 846{
847 const struct arpt_entry_target *t; 847 const struct xt_entry_target *t;
848 unsigned int entry_offset; 848 unsigned int entry_offset;
849 int off, i, ret; 849 int off, i, ret;
850 850
@@ -895,7 +895,7 @@ static int compat_table_info(const struct xt_table_info *info,
895static int get_info(struct net *net, void __user *user, 895static int get_info(struct net *net, void __user *user,
896 const int *len, int compat) 896 const int *len, int compat)
897{ 897{
898 char name[ARPT_TABLE_MAXNAMELEN]; 898 char name[XT_TABLE_MAXNAMELEN];
899 struct xt_table *t; 899 struct xt_table *t;
900 int ret; 900 int ret;
901 901
@@ -908,7 +908,7 @@ static int get_info(struct net *net, void __user *user,
908 if (copy_from_user(name, user, sizeof(name)) != 0) 908 if (copy_from_user(name, user, sizeof(name)) != 0)
909 return -EFAULT; 909 return -EFAULT;
910 910
911 name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; 911 name[XT_TABLE_MAXNAMELEN-1] = '\0';
912#ifdef CONFIG_COMPAT 912#ifdef CONFIG_COMPAT
913 if (compat) 913 if (compat)
914 xt_compat_lock(NFPROTO_ARP); 914 xt_compat_lock(NFPROTO_ARP);
@@ -1204,7 +1204,7 @@ static int do_add_counters(struct net *net, const void __user *user,
1204#ifdef CONFIG_COMPAT 1204#ifdef CONFIG_COMPAT
1205static inline void compat_release_entry(struct compat_arpt_entry *e) 1205static inline void compat_release_entry(struct compat_arpt_entry *e)
1206{ 1206{
1207 struct arpt_entry_target *t; 1207 struct xt_entry_target *t;
1208 1208
1209 t = compat_arpt_get_target(e); 1209 t = compat_arpt_get_target(e);
1210 module_put(t->u.kernel.target->me); 1210 module_put(t->u.kernel.target->me);
@@ -1220,7 +1220,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
1220 const unsigned int *underflows, 1220 const unsigned int *underflows,
1221 const char *name) 1221 const char *name)
1222{ 1222{
1223 struct arpt_entry_target *t; 1223 struct xt_entry_target *t;
1224 struct xt_target *target; 1224 struct xt_target *target;
1225 unsigned int entry_offset; 1225 unsigned int entry_offset;
1226 int ret, off, h; 1226 int ret, off, h;
@@ -1288,7 +1288,7 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
1288 unsigned int *size, const char *name, 1288 unsigned int *size, const char *name,
1289 struct xt_table_info *newinfo, unsigned char *base) 1289 struct xt_table_info *newinfo, unsigned char *base)
1290{ 1290{
1291 struct arpt_entry_target *t; 1291 struct xt_entry_target *t;
1292 struct xt_target *target; 1292 struct xt_target *target;
1293 struct arpt_entry *de; 1293 struct arpt_entry *de;
1294 unsigned int origsize; 1294 unsigned int origsize;
@@ -1474,7 +1474,7 @@ out_unlock:
1474} 1474}
1475 1475
1476struct compat_arpt_replace { 1476struct compat_arpt_replace {
1477 char name[ARPT_TABLE_MAXNAMELEN]; 1477 char name[XT_TABLE_MAXNAMELEN];
1478 u32 valid_hooks; 1478 u32 valid_hooks;
1479 u32 num_entries; 1479 u32 num_entries;
1480 u32 size; 1480 u32 size;
@@ -1567,7 +1567,7 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
1567 struct xt_counters *counters, 1567 struct xt_counters *counters,
1568 unsigned int i) 1568 unsigned int i)
1569{ 1569{
1570 struct arpt_entry_target *t; 1570 struct xt_entry_target *t;
1571 struct compat_arpt_entry __user *ce; 1571 struct compat_arpt_entry __user *ce;
1572 u_int16_t target_offset, next_offset; 1572 u_int16_t target_offset, next_offset;
1573 compat_uint_t origsize; 1573 compat_uint_t origsize;
@@ -1628,7 +1628,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
1628} 1628}
1629 1629
1630struct compat_arpt_get_entries { 1630struct compat_arpt_get_entries {
1631 char name[ARPT_TABLE_MAXNAMELEN]; 1631 char name[XT_TABLE_MAXNAMELEN];
1632 compat_uint_t size; 1632 compat_uint_t size;
1633 struct compat_arpt_entry entrytable[0]; 1633 struct compat_arpt_entry entrytable[0];
1634}; 1634};
@@ -1828,7 +1828,7 @@ void arpt_unregister_table(struct xt_table *table)
1828/* The built-in targets: standard (NULL) and error. */ 1828/* The built-in targets: standard (NULL) and error. */
1829static struct xt_target arpt_builtin_tg[] __read_mostly = { 1829static struct xt_target arpt_builtin_tg[] __read_mostly = {
1830 { 1830 {
1831 .name = ARPT_STANDARD_TARGET, 1831 .name = XT_STANDARD_TARGET,
1832 .targetsize = sizeof(int), 1832 .targetsize = sizeof(int),
1833 .family = NFPROTO_ARP, 1833 .family = NFPROTO_ARP,
1834#ifdef CONFIG_COMPAT 1834#ifdef CONFIG_COMPAT
@@ -1838,9 +1838,9 @@ static struct xt_target arpt_builtin_tg[] __read_mostly = {
1838#endif 1838#endif
1839 }, 1839 },
1840 { 1840 {
1841 .name = ARPT_ERROR_TARGET, 1841 .name = XT_ERROR_TARGET,
1842 .target = arpt_error, 1842 .target = arpt_error,
1843 .targetsize = ARPT_FUNCTION_MAXNAMELEN, 1843 .targetsize = XT_FUNCTION_MAXNAMELEN,
1844 .family = NFPROTO_ARP, 1844 .family = NFPROTO_ARP,
1845 }, 1845 },
1846}; 1846};
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index e1be7dd1171b..b8ddcc480ed9 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -63,7 +63,7 @@ static int checkentry(const struct xt_tgchk_param *par)
63 return false; 63 return false;
64 64
65 if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && 65 if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
66 mangle->target != ARPT_CONTINUE) 66 mangle->target != XT_CONTINUE)
67 return false; 67 return false;
68 return true; 68 return true;
69} 69}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index d163f2e3b2e9..d31b007a6d80 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -186,7 +186,7 @@ static inline bool unconditional(const struct ipt_ip *ip)
186} 186}
187 187
188/* for const-correctness */ 188/* for const-correctness */
189static inline const struct ipt_entry_target * 189static inline const struct xt_entry_target *
190ipt_get_target_c(const struct ipt_entry *e) 190ipt_get_target_c(const struct ipt_entry *e)
191{ 191{
192 return ipt_get_target((struct ipt_entry *)e); 192 return ipt_get_target((struct ipt_entry *)e);
@@ -230,9 +230,9 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
230 const char *hookname, const char **chainname, 230 const char *hookname, const char **chainname,
231 const char **comment, unsigned int *rulenum) 231 const char **comment, unsigned int *rulenum)
232{ 232{
233 const struct ipt_standard_target *t = (void *)ipt_get_target_c(s); 233 const struct xt_standard_target *t = (void *)ipt_get_target_c(s);
234 234
235 if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) { 235 if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
236 /* Head of user chain: ERROR target with chainname */ 236 /* Head of user chain: ERROR target with chainname */
237 *chainname = t->target.data; 237 *chainname = t->target.data;
238 (*rulenum) = 0; 238 (*rulenum) = 0;
@@ -241,7 +241,7 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
241 241
242 if (s->target_offset == sizeof(struct ipt_entry) && 242 if (s->target_offset == sizeof(struct ipt_entry) &&
243 strcmp(t->target.u.kernel.target->name, 243 strcmp(t->target.u.kernel.target->name,
244 IPT_STANDARD_TARGET) == 0 && 244 XT_STANDARD_TARGET) == 0 &&
245 t->verdict < 0 && 245 t->verdict < 0 &&
246 unconditional(&s->ip)) { 246 unconditional(&s->ip)) {
247 /* Tail of chains: STANDARD target (return/policy) */ 247 /* Tail of chains: STANDARD target (return/policy) */
@@ -346,7 +346,7 @@ ipt_do_table(struct sk_buff *skb,
346 get_entry(table_base, private->underflow[hook])); 346 get_entry(table_base, private->underflow[hook]));
347 347
348 do { 348 do {
349 const struct ipt_entry_target *t; 349 const struct xt_entry_target *t;
350 const struct xt_entry_match *ematch; 350 const struct xt_entry_match *ematch;
351 351
352 IP_NF_ASSERT(e); 352 IP_NF_ASSERT(e);
@@ -380,10 +380,10 @@ ipt_do_table(struct sk_buff *skb,
380 if (!t->u.kernel.target->target) { 380 if (!t->u.kernel.target->target) {
381 int v; 381 int v;
382 382
383 v = ((struct ipt_standard_target *)t)->verdict; 383 v = ((struct xt_standard_target *)t)->verdict;
384 if (v < 0) { 384 if (v < 0) {
385 /* Pop from stack? */ 385 /* Pop from stack? */
386 if (v != IPT_RETURN) { 386 if (v != XT_RETURN) {
387 verdict = (unsigned)(-v) - 1; 387 verdict = (unsigned)(-v) - 1;
388 break; 388 break;
389 } 389 }
@@ -421,7 +421,7 @@ ipt_do_table(struct sk_buff *skb,
421 verdict = t->u.kernel.target->target(skb, &acpar); 421 verdict = t->u.kernel.target->target(skb, &acpar);
422 /* Target might have changed stuff. */ 422 /* Target might have changed stuff. */
423 ip = ip_hdr(skb); 423 ip = ip_hdr(skb);
424 if (verdict == IPT_CONTINUE) 424 if (verdict == XT_CONTINUE)
425 e = ipt_next_entry(e); 425 e = ipt_next_entry(e);
426 else 426 else
427 /* Verdict */ 427 /* Verdict */
@@ -461,7 +461,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
461 e->counters.pcnt = pos; 461 e->counters.pcnt = pos;
462 462
463 for (;;) { 463 for (;;) {
464 const struct ipt_standard_target *t 464 const struct xt_standard_target *t
465 = (void *)ipt_get_target_c(e); 465 = (void *)ipt_get_target_c(e);
466 int visited = e->comefrom & (1 << hook); 466 int visited = e->comefrom & (1 << hook);
467 467
@@ -475,13 +475,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
475 /* Unconditional return/END. */ 475 /* Unconditional return/END. */
476 if ((e->target_offset == sizeof(struct ipt_entry) && 476 if ((e->target_offset == sizeof(struct ipt_entry) &&
477 (strcmp(t->target.u.user.name, 477 (strcmp(t->target.u.user.name,
478 IPT_STANDARD_TARGET) == 0) && 478 XT_STANDARD_TARGET) == 0) &&
479 t->verdict < 0 && unconditional(&e->ip)) || 479 t->verdict < 0 && unconditional(&e->ip)) ||
480 visited) { 480 visited) {
481 unsigned int oldpos, size; 481 unsigned int oldpos, size;
482 482
483 if ((strcmp(t->target.u.user.name, 483 if ((strcmp(t->target.u.user.name,
484 IPT_STANDARD_TARGET) == 0) && 484 XT_STANDARD_TARGET) == 0) &&
485 t->verdict < -NF_MAX_VERDICT - 1) { 485 t->verdict < -NF_MAX_VERDICT - 1) {
486 duprintf("mark_source_chains: bad " 486 duprintf("mark_source_chains: bad "
487 "negative verdict (%i)\n", 487 "negative verdict (%i)\n",
@@ -524,7 +524,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
524 int newpos = t->verdict; 524 int newpos = t->verdict;
525 525
526 if (strcmp(t->target.u.user.name, 526 if (strcmp(t->target.u.user.name,
527 IPT_STANDARD_TARGET) == 0 && 527 XT_STANDARD_TARGET) == 0 &&
528 newpos >= 0) { 528 newpos >= 0) {
529 if (newpos > newinfo->size - 529 if (newpos > newinfo->size -
530 sizeof(struct ipt_entry)) { 530 sizeof(struct ipt_entry)) {
@@ -552,7 +552,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
552 return 1; 552 return 1;
553} 553}
554 554
555static void cleanup_match(struct ipt_entry_match *m, struct net *net) 555static void cleanup_match(struct xt_entry_match *m, struct net *net)
556{ 556{
557 struct xt_mtdtor_param par; 557 struct xt_mtdtor_param par;
558 558
@@ -568,14 +568,14 @@ static void cleanup_match(struct ipt_entry_match *m, struct net *net)
568static int 568static int
569check_entry(const struct ipt_entry *e, const char *name) 569check_entry(const struct ipt_entry *e, const char *name)
570{ 570{
571 const struct ipt_entry_target *t; 571 const struct xt_entry_target *t;
572 572
573 if (!ip_checkentry(&e->ip)) { 573 if (!ip_checkentry(&e->ip)) {
574 duprintf("ip check failed %p %s.\n", e, par->match->name); 574 duprintf("ip check failed %p %s.\n", e, par->match->name);
575 return -EINVAL; 575 return -EINVAL;
576 } 576 }
577 577
578 if (e->target_offset + sizeof(struct ipt_entry_target) > 578 if (e->target_offset + sizeof(struct xt_entry_target) >
579 e->next_offset) 579 e->next_offset)
580 return -EINVAL; 580 return -EINVAL;
581 581
@@ -587,7 +587,7 @@ check_entry(const struct ipt_entry *e, const char *name)
587} 587}
588 588
589static int 589static int
590check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) 590check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
591{ 591{
592 const struct ipt_ip *ip = par->entryinfo; 592 const struct ipt_ip *ip = par->entryinfo;
593 int ret; 593 int ret;
@@ -605,7 +605,7 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
605} 605}
606 606
607static int 607static int
608find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par) 608find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
609{ 609{
610 struct xt_match *match; 610 struct xt_match *match;
611 int ret; 611 int ret;
@@ -630,7 +630,7 @@ err:
630 630
631static int check_target(struct ipt_entry *e, struct net *net, const char *name) 631static int check_target(struct ipt_entry *e, struct net *net, const char *name)
632{ 632{
633 struct ipt_entry_target *t = ipt_get_target(e); 633 struct xt_entry_target *t = ipt_get_target(e);
634 struct xt_tgchk_param par = { 634 struct xt_tgchk_param par = {
635 .net = net, 635 .net = net,
636 .table = name, 636 .table = name,
@@ -656,7 +656,7 @@ static int
656find_check_entry(struct ipt_entry *e, struct net *net, const char *name, 656find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
657 unsigned int size) 657 unsigned int size)
658{ 658{
659 struct ipt_entry_target *t; 659 struct xt_entry_target *t;
660 struct xt_target *target; 660 struct xt_target *target;
661 int ret; 661 int ret;
662 unsigned int j; 662 unsigned int j;
@@ -707,7 +707,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
707 707
708static bool check_underflow(const struct ipt_entry *e) 708static bool check_underflow(const struct ipt_entry *e)
709{ 709{
710 const struct ipt_entry_target *t; 710 const struct xt_entry_target *t;
711 unsigned int verdict; 711 unsigned int verdict;
712 712
713 if (!unconditional(&e->ip)) 713 if (!unconditional(&e->ip))
@@ -715,7 +715,7 @@ static bool check_underflow(const struct ipt_entry *e)
715 t = ipt_get_target_c(e); 715 t = ipt_get_target_c(e);
716 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 716 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
717 return false; 717 return false;
718 verdict = ((struct ipt_standard_target *)t)->verdict; 718 verdict = ((struct xt_standard_target *)t)->verdict;
719 verdict = -verdict - 1; 719 verdict = -verdict - 1;
720 return verdict == NF_DROP || verdict == NF_ACCEPT; 720 return verdict == NF_DROP || verdict == NF_ACCEPT;
721} 721}
@@ -738,7 +738,7 @@ check_entry_size_and_hooks(struct ipt_entry *e,
738 } 738 }
739 739
740 if (e->next_offset 740 if (e->next_offset
741 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) { 741 < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) {
742 duprintf("checking: element %p size %u\n", 742 duprintf("checking: element %p size %u\n",
743 e, e->next_offset); 743 e, e->next_offset);
744 return -EINVAL; 744 return -EINVAL;
@@ -771,7 +771,7 @@ static void
771cleanup_entry(struct ipt_entry *e, struct net *net) 771cleanup_entry(struct ipt_entry *e, struct net *net)
772{ 772{
773 struct xt_tgdtor_param par; 773 struct xt_tgdtor_param par;
774 struct ipt_entry_target *t; 774 struct xt_entry_target *t;
775 struct xt_entry_match *ematch; 775 struct xt_entry_match *ematch;
776 776
777 /* Cleanup all matches */ 777 /* Cleanup all matches */
@@ -972,8 +972,8 @@ copy_entries_to_user(unsigned int total_size,
972 /* ... then go back and fix counters and names */ 972 /* ... then go back and fix counters and names */
973 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 973 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
974 unsigned int i; 974 unsigned int i;
975 const struct ipt_entry_match *m; 975 const struct xt_entry_match *m;
976 const struct ipt_entry_target *t; 976 const struct xt_entry_target *t;
977 977
978 e = (struct ipt_entry *)(loc_cpu_entry + off); 978 e = (struct ipt_entry *)(loc_cpu_entry + off);
979 if (copy_to_user(userptr + off 979 if (copy_to_user(userptr + off
@@ -990,7 +990,7 @@ copy_entries_to_user(unsigned int total_size,
990 m = (void *)e + i; 990 m = (void *)e + i;
991 991
992 if (copy_to_user(userptr + off + i 992 if (copy_to_user(userptr + off + i
993 + offsetof(struct ipt_entry_match, 993 + offsetof(struct xt_entry_match,
994 u.user.name), 994 u.user.name),
995 m->u.kernel.match->name, 995 m->u.kernel.match->name,
996 strlen(m->u.kernel.match->name)+1) 996 strlen(m->u.kernel.match->name)+1)
@@ -1002,7 +1002,7 @@ copy_entries_to_user(unsigned int total_size,
1002 1002
1003 t = ipt_get_target_c(e); 1003 t = ipt_get_target_c(e);
1004 if (copy_to_user(userptr + off + e->target_offset 1004 if (copy_to_user(userptr + off + e->target_offset
1005 + offsetof(struct ipt_entry_target, 1005 + offsetof(struct xt_entry_target,
1006 u.user.name), 1006 u.user.name),
1007 t->u.kernel.target->name, 1007 t->u.kernel.target->name,
1008 strlen(t->u.kernel.target->name)+1) != 0) { 1008 strlen(t->u.kernel.target->name)+1) != 0) {
@@ -1040,7 +1040,7 @@ static int compat_calc_entry(const struct ipt_entry *e,
1040 const void *base, struct xt_table_info *newinfo) 1040 const void *base, struct xt_table_info *newinfo)
1041{ 1041{
1042 const struct xt_entry_match *ematch; 1042 const struct xt_entry_match *ematch;
1043 const struct ipt_entry_target *t; 1043 const struct xt_entry_target *t;
1044 unsigned int entry_offset; 1044 unsigned int entry_offset;
1045 int off, i, ret; 1045 int off, i, ret;
1046 1046
@@ -1092,7 +1092,7 @@ static int compat_table_info(const struct xt_table_info *info,
1092static int get_info(struct net *net, void __user *user, 1092static int get_info(struct net *net, void __user *user,
1093 const int *len, int compat) 1093 const int *len, int compat)
1094{ 1094{
1095 char name[IPT_TABLE_MAXNAMELEN]; 1095 char name[XT_TABLE_MAXNAMELEN];
1096 struct xt_table *t; 1096 struct xt_table *t;
1097 int ret; 1097 int ret;
1098 1098
@@ -1105,7 +1105,7 @@ static int get_info(struct net *net, void __user *user,
1105 if (copy_from_user(name, user, sizeof(name)) != 0) 1105 if (copy_from_user(name, user, sizeof(name)) != 0)
1106 return -EFAULT; 1106 return -EFAULT;
1107 1107
1108 name[IPT_TABLE_MAXNAMELEN-1] = '\0'; 1108 name[XT_TABLE_MAXNAMELEN-1] = '\0';
1109#ifdef CONFIG_COMPAT 1109#ifdef CONFIG_COMPAT
1110 if (compat) 1110 if (compat)
1111 xt_compat_lock(AF_INET); 1111 xt_compat_lock(AF_INET);
@@ -1400,14 +1400,14 @@ do_add_counters(struct net *net, const void __user *user,
1400 1400
1401#ifdef CONFIG_COMPAT 1401#ifdef CONFIG_COMPAT
1402struct compat_ipt_replace { 1402struct compat_ipt_replace {
1403 char name[IPT_TABLE_MAXNAMELEN]; 1403 char name[XT_TABLE_MAXNAMELEN];
1404 u32 valid_hooks; 1404 u32 valid_hooks;
1405 u32 num_entries; 1405 u32 num_entries;
1406 u32 size; 1406 u32 size;
1407 u32 hook_entry[NF_INET_NUMHOOKS]; 1407 u32 hook_entry[NF_INET_NUMHOOKS];
1408 u32 underflow[NF_INET_NUMHOOKS]; 1408 u32 underflow[NF_INET_NUMHOOKS];
1409 u32 num_counters; 1409 u32 num_counters;
1410 compat_uptr_t counters; /* struct ipt_counters * */ 1410 compat_uptr_t counters; /* struct xt_counters * */
1411 struct compat_ipt_entry entries[0]; 1411 struct compat_ipt_entry entries[0];
1412}; 1412};
1413 1413
@@ -1416,7 +1416,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1416 unsigned int *size, struct xt_counters *counters, 1416 unsigned int *size, struct xt_counters *counters,
1417 unsigned int i) 1417 unsigned int i)
1418{ 1418{
1419 struct ipt_entry_target *t; 1419 struct xt_entry_target *t;
1420 struct compat_ipt_entry __user *ce; 1420 struct compat_ipt_entry __user *ce;
1421 u_int16_t target_offset, next_offset; 1421 u_int16_t target_offset, next_offset;
1422 compat_uint_t origsize; 1422 compat_uint_t origsize;
@@ -1451,7 +1451,7 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1451} 1451}
1452 1452
1453static int 1453static int
1454compat_find_calc_match(struct ipt_entry_match *m, 1454compat_find_calc_match(struct xt_entry_match *m,
1455 const char *name, 1455 const char *name,
1456 const struct ipt_ip *ip, 1456 const struct ipt_ip *ip,
1457 unsigned int hookmask, 1457 unsigned int hookmask,
@@ -1473,7 +1473,7 @@ compat_find_calc_match(struct ipt_entry_match *m,
1473 1473
1474static void compat_release_entry(struct compat_ipt_entry *e) 1474static void compat_release_entry(struct compat_ipt_entry *e)
1475{ 1475{
1476 struct ipt_entry_target *t; 1476 struct xt_entry_target *t;
1477 struct xt_entry_match *ematch; 1477 struct xt_entry_match *ematch;
1478 1478
1479 /* Cleanup all matches */ 1479 /* Cleanup all matches */
@@ -1494,7 +1494,7 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1494 const char *name) 1494 const char *name)
1495{ 1495{
1496 struct xt_entry_match *ematch; 1496 struct xt_entry_match *ematch;
1497 struct ipt_entry_target *t; 1497 struct xt_entry_target *t;
1498 struct xt_target *target; 1498 struct xt_target *target;
1499 unsigned int entry_offset; 1499 unsigned int entry_offset;
1500 unsigned int j; 1500 unsigned int j;
@@ -1576,7 +1576,7 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1576 unsigned int *size, const char *name, 1576 unsigned int *size, const char *name,
1577 struct xt_table_info *newinfo, unsigned char *base) 1577 struct xt_table_info *newinfo, unsigned char *base)
1578{ 1578{
1579 struct ipt_entry_target *t; 1579 struct xt_entry_target *t;
1580 struct xt_target *target; 1580 struct xt_target *target;
1581 struct ipt_entry *de; 1581 struct ipt_entry *de;
1582 unsigned int origsize; 1582 unsigned int origsize;
@@ -1884,7 +1884,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
1884} 1884}
1885 1885
1886struct compat_ipt_get_entries { 1886struct compat_ipt_get_entries {
1887 char name[IPT_TABLE_MAXNAMELEN]; 1887 char name[XT_TABLE_MAXNAMELEN];
1888 compat_uint_t size; 1888 compat_uint_t size;
1889 struct compat_ipt_entry entrytable[0]; 1889 struct compat_ipt_entry entrytable[0];
1890}; 1890};
@@ -2039,7 +2039,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2039 2039
2040 case IPT_SO_GET_REVISION_MATCH: 2040 case IPT_SO_GET_REVISION_MATCH:
2041 case IPT_SO_GET_REVISION_TARGET: { 2041 case IPT_SO_GET_REVISION_TARGET: {
2042 struct ipt_get_revision rev; 2042 struct xt_get_revision rev;
2043 int target; 2043 int target;
2044 2044
2045 if (*len != sizeof(rev)) { 2045 if (*len != sizeof(rev)) {
@@ -2176,7 +2176,7 @@ static int icmp_checkentry(const struct xt_mtchk_param *par)
2176 2176
2177static struct xt_target ipt_builtin_tg[] __read_mostly = { 2177static struct xt_target ipt_builtin_tg[] __read_mostly = {
2178 { 2178 {
2179 .name = IPT_STANDARD_TARGET, 2179 .name = XT_STANDARD_TARGET,
2180 .targetsize = sizeof(int), 2180 .targetsize = sizeof(int),
2181 .family = NFPROTO_IPV4, 2181 .family = NFPROTO_IPV4,
2182#ifdef CONFIG_COMPAT 2182#ifdef CONFIG_COMPAT
@@ -2186,9 +2186,9 @@ static struct xt_target ipt_builtin_tg[] __read_mostly = {
2186#endif 2186#endif
2187 }, 2187 },
2188 { 2188 {
2189 .name = IPT_ERROR_TARGET, 2189 .name = XT_ERROR_TARGET,
2190 .target = ipt_error, 2190 .target = ipt_error,
2191 .targetsize = IPT_FUNCTION_MAXNAMELEN, 2191 .targetsize = XT_FUNCTION_MAXNAMELEN,
2192 .family = NFPROTO_IPV4, 2192 .family = NFPROTO_IPV4,
2193 }, 2193 },
2194}; 2194};
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 3a43cf36db87..1e26a4897655 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -29,6 +29,7 @@
29#include <net/netfilter/nf_conntrack.h> 29#include <net/netfilter/nf_conntrack.h>
30#include <net/net_namespace.h> 30#include <net/net_namespace.h>
31#include <net/checksum.h> 31#include <net/checksum.h>
32#include <net/ip.h>
32 33
33#define CLUSTERIP_VERSION "0.8" 34#define CLUSTERIP_VERSION "0.8"
34 35
@@ -231,24 +232,22 @@ clusterip_hashfn(const struct sk_buff *skb,
231{ 232{
232 const struct iphdr *iph = ip_hdr(skb); 233 const struct iphdr *iph = ip_hdr(skb);
233 unsigned long hashval; 234 unsigned long hashval;
234 u_int16_t sport, dport; 235 u_int16_t sport = 0, dport = 0;
235 const u_int16_t *ports; 236 int poff;
236 237
237 switch (iph->protocol) { 238 poff = proto_ports_offset(iph->protocol);
238 case IPPROTO_TCP: 239 if (poff >= 0) {
239 case IPPROTO_UDP: 240 const u_int16_t *ports;
240 case IPPROTO_UDPLITE: 241 u16 _ports[2];
241 case IPPROTO_SCTP: 242
242 case IPPROTO_DCCP: 243 ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
243 case IPPROTO_ICMP: 244 if (ports) {
244 ports = (const void *)iph+iph->ihl*4; 245 sport = ports[0];
245 sport = ports[0]; 246 dport = ports[1];
246 dport = ports[1]; 247 }
247 break; 248 } else {
248 default:
249 if (net_ratelimit()) 249 if (net_ratelimit())
250 pr_info("unknown protocol %u\n", iph->protocol); 250 pr_info("unknown protocol %u\n", iph->protocol);
251 sport = dport = 0;
252 } 251 }
253 252
254 switch (config->hash_mode) { 253 switch (config->hash_mode) {
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 915fc17d7ce2..72ffc8fda2e9 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -24,16 +24,15 @@
24#include <linux/netfilter/x_tables.h> 24#include <linux/netfilter/x_tables.h>
25#include <linux/netfilter_ipv4/ipt_LOG.h> 25#include <linux/netfilter_ipv4/ipt_LOG.h>
26#include <net/netfilter/nf_log.h> 26#include <net/netfilter/nf_log.h>
27#include <net/netfilter/xt_log.h>
27 28
28MODULE_LICENSE("GPL"); 29MODULE_LICENSE("GPL");
29MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); 30MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
30MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog"); 31MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog");
31 32
32/* Use lock to serialize, so printks don't overlap */
33static DEFINE_SPINLOCK(log_lock);
34
35/* One level of recursion won't kill us */ 33/* One level of recursion won't kill us */
36static void dump_packet(const struct nf_loginfo *info, 34static void dump_packet(struct sbuff *m,
35 const struct nf_loginfo *info,
37 const struct sk_buff *skb, 36 const struct sk_buff *skb,
38 unsigned int iphoff) 37 unsigned int iphoff)
39{ 38{
@@ -48,32 +47,32 @@ static void dump_packet(const struct nf_loginfo *info,
48 47
49 ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); 48 ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
50 if (ih == NULL) { 49 if (ih == NULL) {
51 printk("TRUNCATED"); 50 sb_add(m, "TRUNCATED");
52 return; 51 return;
53 } 52 }
54 53
55 /* Important fields: 54 /* Important fields:
56 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ 55 * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
57 /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ 56 /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
58 printk("SRC=%pI4 DST=%pI4 ", 57 sb_add(m, "SRC=%pI4 DST=%pI4 ",
59 &ih->saddr, &ih->daddr); 58 &ih->saddr, &ih->daddr);
60 59
61 /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ 60 /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
62 printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", 61 sb_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
63 ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, 62 ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
64 ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); 63 ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
65 64
66 /* Max length: 6 "CE DF MF " */ 65 /* Max length: 6 "CE DF MF " */
67 if (ntohs(ih->frag_off) & IP_CE) 66 if (ntohs(ih->frag_off) & IP_CE)
68 printk("CE "); 67 sb_add(m, "CE ");
69 if (ntohs(ih->frag_off) & IP_DF) 68 if (ntohs(ih->frag_off) & IP_DF)
70 printk("DF "); 69 sb_add(m, "DF ");
71 if (ntohs(ih->frag_off) & IP_MF) 70 if (ntohs(ih->frag_off) & IP_MF)
72 printk("MF "); 71 sb_add(m, "MF ");
73 72
74 /* Max length: 11 "FRAG:65535 " */ 73 /* Max length: 11 "FRAG:65535 " */
75 if (ntohs(ih->frag_off) & IP_OFFSET) 74 if (ntohs(ih->frag_off) & IP_OFFSET)
76 printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); 75 sb_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
77 76
78 if ((logflags & IPT_LOG_IPOPT) && 77 if ((logflags & IPT_LOG_IPOPT) &&
79 ih->ihl * 4 > sizeof(struct iphdr)) { 78 ih->ihl * 4 > sizeof(struct iphdr)) {
@@ -85,15 +84,15 @@ static void dump_packet(const struct nf_loginfo *info,
85 op = skb_header_pointer(skb, iphoff+sizeof(_iph), 84 op = skb_header_pointer(skb, iphoff+sizeof(_iph),
86 optsize, _opt); 85 optsize, _opt);
87 if (op == NULL) { 86 if (op == NULL) {
88 printk("TRUNCATED"); 87 sb_add(m, "TRUNCATED");
89 return; 88 return;
90 } 89 }
91 90
92 /* Max length: 127 "OPT (" 15*4*2chars ") " */ 91 /* Max length: 127 "OPT (" 15*4*2chars ") " */
93 printk("OPT ("); 92 sb_add(m, "OPT (");
94 for (i = 0; i < optsize; i++) 93 for (i = 0; i < optsize; i++)
95 printk("%02X", op[i]); 94 sb_add(m, "%02X", op[i]);
96 printk(") "); 95 sb_add(m, ") ");
97 } 96 }
98 97
99 switch (ih->protocol) { 98 switch (ih->protocol) {
@@ -102,7 +101,7 @@ static void dump_packet(const struct nf_loginfo *info,
102 const struct tcphdr *th; 101 const struct tcphdr *th;
103 102
104 /* Max length: 10 "PROTO=TCP " */ 103 /* Max length: 10 "PROTO=TCP " */
105 printk("PROTO=TCP "); 104 sb_add(m, "PROTO=TCP ");
106 105
107 if (ntohs(ih->frag_off) & IP_OFFSET) 106 if (ntohs(ih->frag_off) & IP_OFFSET)
108 break; 107 break;
@@ -111,41 +110,41 @@ static void dump_packet(const struct nf_loginfo *info,
111 th = skb_header_pointer(skb, iphoff + ih->ihl * 4, 110 th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
112 sizeof(_tcph), &_tcph); 111 sizeof(_tcph), &_tcph);
113 if (th == NULL) { 112 if (th == NULL) {
114 printk("INCOMPLETE [%u bytes] ", 113 sb_add(m, "INCOMPLETE [%u bytes] ",
115 skb->len - iphoff - ih->ihl*4); 114 skb->len - iphoff - ih->ihl*4);
116 break; 115 break;
117 } 116 }
118 117
119 /* Max length: 20 "SPT=65535 DPT=65535 " */ 118 /* Max length: 20 "SPT=65535 DPT=65535 " */
120 printk("SPT=%u DPT=%u ", 119 sb_add(m, "SPT=%u DPT=%u ",
121 ntohs(th->source), ntohs(th->dest)); 120 ntohs(th->source), ntohs(th->dest));
122 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ 121 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
123 if (logflags & IPT_LOG_TCPSEQ) 122 if (logflags & IPT_LOG_TCPSEQ)
124 printk("SEQ=%u ACK=%u ", 123 sb_add(m, "SEQ=%u ACK=%u ",
125 ntohl(th->seq), ntohl(th->ack_seq)); 124 ntohl(th->seq), ntohl(th->ack_seq));
126 /* Max length: 13 "WINDOW=65535 " */ 125 /* Max length: 13 "WINDOW=65535 " */
127 printk("WINDOW=%u ", ntohs(th->window)); 126 sb_add(m, "WINDOW=%u ", ntohs(th->window));
128 /* Max length: 9 "RES=0x3F " */ 127 /* Max length: 9 "RES=0x3F " */
129 printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); 128 sb_add(m, "RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
130 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ 129 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
131 if (th->cwr) 130 if (th->cwr)
132 printk("CWR "); 131 sb_add(m, "CWR ");
133 if (th->ece) 132 if (th->ece)
134 printk("ECE "); 133 sb_add(m, "ECE ");
135 if (th->urg) 134 if (th->urg)
136 printk("URG "); 135 sb_add(m, "URG ");
137 if (th->ack) 136 if (th->ack)
138 printk("ACK "); 137 sb_add(m, "ACK ");
139 if (th->psh) 138 if (th->psh)
140 printk("PSH "); 139 sb_add(m, "PSH ");
141 if (th->rst) 140 if (th->rst)
142 printk("RST "); 141 sb_add(m, "RST ");
143 if (th->syn) 142 if (th->syn)
144 printk("SYN "); 143 sb_add(m, "SYN ");
145 if (th->fin) 144 if (th->fin)
146 printk("FIN "); 145 sb_add(m, "FIN ");
147 /* Max length: 11 "URGP=65535 " */ 146 /* Max length: 11 "URGP=65535 " */
148 printk("URGP=%u ", ntohs(th->urg_ptr)); 147 sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
149 148
150 if ((logflags & IPT_LOG_TCPOPT) && 149 if ((logflags & IPT_LOG_TCPOPT) &&
151 th->doff * 4 > sizeof(struct tcphdr)) { 150 th->doff * 4 > sizeof(struct tcphdr)) {
@@ -158,15 +157,15 @@ static void dump_packet(const struct nf_loginfo *info,
158 iphoff+ih->ihl*4+sizeof(_tcph), 157 iphoff+ih->ihl*4+sizeof(_tcph),
159 optsize, _opt); 158 optsize, _opt);
160 if (op == NULL) { 159 if (op == NULL) {
161 printk("TRUNCATED"); 160 sb_add(m, "TRUNCATED");
162 return; 161 return;
163 } 162 }
164 163
165 /* Max length: 127 "OPT (" 15*4*2chars ") " */ 164 /* Max length: 127 "OPT (" 15*4*2chars ") " */
166 printk("OPT ("); 165 sb_add(m, "OPT (");
167 for (i = 0; i < optsize; i++) 166 for (i = 0; i < optsize; i++)
168 printk("%02X", op[i]); 167 sb_add(m, "%02X", op[i]);
169 printk(") "); 168 sb_add(m, ") ");
170 } 169 }
171 break; 170 break;
172 } 171 }
@@ -177,9 +176,9 @@ static void dump_packet(const struct nf_loginfo *info,
177 176
178 if (ih->protocol == IPPROTO_UDP) 177 if (ih->protocol == IPPROTO_UDP)
179 /* Max length: 10 "PROTO=UDP " */ 178 /* Max length: 10 "PROTO=UDP " */
180 printk("PROTO=UDP " ); 179 sb_add(m, "PROTO=UDP " );
181 else /* Max length: 14 "PROTO=UDPLITE " */ 180 else /* Max length: 14 "PROTO=UDPLITE " */
182 printk("PROTO=UDPLITE "); 181 sb_add(m, "PROTO=UDPLITE ");
183 182
184 if (ntohs(ih->frag_off) & IP_OFFSET) 183 if (ntohs(ih->frag_off) & IP_OFFSET)
185 break; 184 break;
@@ -188,13 +187,13 @@ static void dump_packet(const struct nf_loginfo *info,
188 uh = skb_header_pointer(skb, iphoff+ih->ihl*4, 187 uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
189 sizeof(_udph), &_udph); 188 sizeof(_udph), &_udph);
190 if (uh == NULL) { 189 if (uh == NULL) {
191 printk("INCOMPLETE [%u bytes] ", 190 sb_add(m, "INCOMPLETE [%u bytes] ",
192 skb->len - iphoff - ih->ihl*4); 191 skb->len - iphoff - ih->ihl*4);
193 break; 192 break;
194 } 193 }
195 194
196 /* Max length: 20 "SPT=65535 DPT=65535 " */ 195 /* Max length: 20 "SPT=65535 DPT=65535 " */
197 printk("SPT=%u DPT=%u LEN=%u ", 196 sb_add(m, "SPT=%u DPT=%u LEN=%u ",
198 ntohs(uh->source), ntohs(uh->dest), 197 ntohs(uh->source), ntohs(uh->dest),
199 ntohs(uh->len)); 198 ntohs(uh->len));
200 break; 199 break;
@@ -221,7 +220,7 @@ static void dump_packet(const struct nf_loginfo *info,
221 [ICMP_ADDRESSREPLY] = 12 }; 220 [ICMP_ADDRESSREPLY] = 12 };
222 221
223 /* Max length: 11 "PROTO=ICMP " */ 222 /* Max length: 11 "PROTO=ICMP " */
224 printk("PROTO=ICMP "); 223 sb_add(m, "PROTO=ICMP ");
225 224
226 if (ntohs(ih->frag_off) & IP_OFFSET) 225 if (ntohs(ih->frag_off) & IP_OFFSET)
227 break; 226 break;
@@ -230,19 +229,19 @@ static void dump_packet(const struct nf_loginfo *info,
230 ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, 229 ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
231 sizeof(_icmph), &_icmph); 230 sizeof(_icmph), &_icmph);
232 if (ich == NULL) { 231 if (ich == NULL) {
233 printk("INCOMPLETE [%u bytes] ", 232 sb_add(m, "INCOMPLETE [%u bytes] ",
234 skb->len - iphoff - ih->ihl*4); 233 skb->len - iphoff - ih->ihl*4);
235 break; 234 break;
236 } 235 }
237 236
238 /* Max length: 18 "TYPE=255 CODE=255 " */ 237 /* Max length: 18 "TYPE=255 CODE=255 " */
239 printk("TYPE=%u CODE=%u ", ich->type, ich->code); 238 sb_add(m, "TYPE=%u CODE=%u ", ich->type, ich->code);
240 239
241 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 240 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
242 if (ich->type <= NR_ICMP_TYPES && 241 if (ich->type <= NR_ICMP_TYPES &&
243 required_len[ich->type] && 242 required_len[ich->type] &&
244 skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { 243 skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
245 printk("INCOMPLETE [%u bytes] ", 244 sb_add(m, "INCOMPLETE [%u bytes] ",
246 skb->len - iphoff - ih->ihl*4); 245 skb->len - iphoff - ih->ihl*4);
247 break; 246 break;
248 } 247 }
@@ -251,35 +250,35 @@ static void dump_packet(const struct nf_loginfo *info,
251 case ICMP_ECHOREPLY: 250 case ICMP_ECHOREPLY:
252 case ICMP_ECHO: 251 case ICMP_ECHO:
253 /* Max length: 19 "ID=65535 SEQ=65535 " */ 252 /* Max length: 19 "ID=65535 SEQ=65535 " */
254 printk("ID=%u SEQ=%u ", 253 sb_add(m, "ID=%u SEQ=%u ",
255 ntohs(ich->un.echo.id), 254 ntohs(ich->un.echo.id),
256 ntohs(ich->un.echo.sequence)); 255 ntohs(ich->un.echo.sequence));
257 break; 256 break;
258 257
259 case ICMP_PARAMETERPROB: 258 case ICMP_PARAMETERPROB:
260 /* Max length: 14 "PARAMETER=255 " */ 259 /* Max length: 14 "PARAMETER=255 " */
261 printk("PARAMETER=%u ", 260 sb_add(m, "PARAMETER=%u ",
262 ntohl(ich->un.gateway) >> 24); 261 ntohl(ich->un.gateway) >> 24);
263 break; 262 break;
264 case ICMP_REDIRECT: 263 case ICMP_REDIRECT:
265 /* Max length: 24 "GATEWAY=255.255.255.255 " */ 264 /* Max length: 24 "GATEWAY=255.255.255.255 " */
266 printk("GATEWAY=%pI4 ", &ich->un.gateway); 265 sb_add(m, "GATEWAY=%pI4 ", &ich->un.gateway);
267 /* Fall through */ 266 /* Fall through */
268 case ICMP_DEST_UNREACH: 267 case ICMP_DEST_UNREACH:
269 case ICMP_SOURCE_QUENCH: 268 case ICMP_SOURCE_QUENCH:
270 case ICMP_TIME_EXCEEDED: 269 case ICMP_TIME_EXCEEDED:
271 /* Max length: 3+maxlen */ 270 /* Max length: 3+maxlen */
272 if (!iphoff) { /* Only recurse once. */ 271 if (!iphoff) { /* Only recurse once. */
273 printk("["); 272 sb_add(m, "[");
274 dump_packet(info, skb, 273 dump_packet(m, info, skb,
275 iphoff + ih->ihl*4+sizeof(_icmph)); 274 iphoff + ih->ihl*4+sizeof(_icmph));
276 printk("] "); 275 sb_add(m, "] ");
277 } 276 }
278 277
279 /* Max length: 10 "MTU=65535 " */ 278 /* Max length: 10 "MTU=65535 " */
280 if (ich->type == ICMP_DEST_UNREACH && 279 if (ich->type == ICMP_DEST_UNREACH &&
281 ich->code == ICMP_FRAG_NEEDED) 280 ich->code == ICMP_FRAG_NEEDED)
282 printk("MTU=%u ", ntohs(ich->un.frag.mtu)); 281 sb_add(m, "MTU=%u ", ntohs(ich->un.frag.mtu));
283 } 282 }
284 break; 283 break;
285 } 284 }
@@ -292,19 +291,19 @@ static void dump_packet(const struct nf_loginfo *info,
292 break; 291 break;
293 292
294 /* Max length: 9 "PROTO=AH " */ 293 /* Max length: 9 "PROTO=AH " */
295 printk("PROTO=AH "); 294 sb_add(m, "PROTO=AH ");
296 295
297 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 296 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
298 ah = skb_header_pointer(skb, iphoff+ih->ihl*4, 297 ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
299 sizeof(_ahdr), &_ahdr); 298 sizeof(_ahdr), &_ahdr);
300 if (ah == NULL) { 299 if (ah == NULL) {
301 printk("INCOMPLETE [%u bytes] ", 300 sb_add(m, "INCOMPLETE [%u bytes] ",
302 skb->len - iphoff - ih->ihl*4); 301 skb->len - iphoff - ih->ihl*4);
303 break; 302 break;
304 } 303 }
305 304
306 /* Length: 15 "SPI=0xF1234567 " */ 305 /* Length: 15 "SPI=0xF1234567 " */
307 printk("SPI=0x%x ", ntohl(ah->spi)); 306 sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
308 break; 307 break;
309 } 308 }
310 case IPPROTO_ESP: { 309 case IPPROTO_ESP: {
@@ -312,7 +311,7 @@ static void dump_packet(const struct nf_loginfo *info,
312 const struct ip_esp_hdr *eh; 311 const struct ip_esp_hdr *eh;
313 312
314 /* Max length: 10 "PROTO=ESP " */ 313 /* Max length: 10 "PROTO=ESP " */
315 printk("PROTO=ESP "); 314 sb_add(m, "PROTO=ESP ");
316 315
317 if (ntohs(ih->frag_off) & IP_OFFSET) 316 if (ntohs(ih->frag_off) & IP_OFFSET)
318 break; 317 break;
@@ -321,25 +320,25 @@ static void dump_packet(const struct nf_loginfo *info,
321 eh = skb_header_pointer(skb, iphoff+ih->ihl*4, 320 eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
322 sizeof(_esph), &_esph); 321 sizeof(_esph), &_esph);
323 if (eh == NULL) { 322 if (eh == NULL) {
324 printk("INCOMPLETE [%u bytes] ", 323 sb_add(m, "INCOMPLETE [%u bytes] ",
325 skb->len - iphoff - ih->ihl*4); 324 skb->len - iphoff - ih->ihl*4);
326 break; 325 break;
327 } 326 }
328 327
329 /* Length: 15 "SPI=0xF1234567 " */ 328 /* Length: 15 "SPI=0xF1234567 " */
330 printk("SPI=0x%x ", ntohl(eh->spi)); 329 sb_add(m, "SPI=0x%x ", ntohl(eh->spi));
331 break; 330 break;
332 } 331 }
333 /* Max length: 10 "PROTO 255 " */ 332 /* Max length: 10 "PROTO 255 " */
334 default: 333 default:
335 printk("PROTO=%u ", ih->protocol); 334 sb_add(m, "PROTO=%u ", ih->protocol);
336 } 335 }
337 336
338 /* Max length: 15 "UID=4294967295 " */ 337 /* Max length: 15 "UID=4294967295 " */
339 if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { 338 if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
340 read_lock_bh(&skb->sk->sk_callback_lock); 339 read_lock_bh(&skb->sk->sk_callback_lock);
341 if (skb->sk->sk_socket && skb->sk->sk_socket->file) 340 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
342 printk("UID=%u GID=%u ", 341 sb_add(m, "UID=%u GID=%u ",
343 skb->sk->sk_socket->file->f_cred->fsuid, 342 skb->sk->sk_socket->file->f_cred->fsuid,
344 skb->sk->sk_socket->file->f_cred->fsgid); 343 skb->sk->sk_socket->file->f_cred->fsgid);
345 read_unlock_bh(&skb->sk->sk_callback_lock); 344 read_unlock_bh(&skb->sk->sk_callback_lock);
@@ -347,7 +346,7 @@ static void dump_packet(const struct nf_loginfo *info,
347 346
348 /* Max length: 16 "MARK=0xFFFFFFFF " */ 347 /* Max length: 16 "MARK=0xFFFFFFFF " */
349 if (!iphoff && skb->mark) 348 if (!iphoff && skb->mark)
350 printk("MARK=0x%x ", skb->mark); 349 sb_add(m, "MARK=0x%x ", skb->mark);
351 350
352 /* Proto Max log string length */ 351 /* Proto Max log string length */
353 /* IP: 40+46+6+11+127 = 230 */ 352 /* IP: 40+46+6+11+127 = 230 */
@@ -364,7 +363,8 @@ static void dump_packet(const struct nf_loginfo *info,
364 /* maxlen = 230+ 91 + 230 + 252 = 803 */ 363 /* maxlen = 230+ 91 + 230 + 252 = 803 */
365} 364}
366 365
367static void dump_mac_header(const struct nf_loginfo *info, 366static void dump_mac_header(struct sbuff *m,
367 const struct nf_loginfo *info,
368 const struct sk_buff *skb) 368 const struct sk_buff *skb)
369{ 369{
370 struct net_device *dev = skb->dev; 370 struct net_device *dev = skb->dev;
@@ -378,7 +378,7 @@ static void dump_mac_header(const struct nf_loginfo *info,
378 378
379 switch (dev->type) { 379 switch (dev->type) {
380 case ARPHRD_ETHER: 380 case ARPHRD_ETHER:
381 printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", 381 sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
382 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, 382 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
383 ntohs(eth_hdr(skb)->h_proto)); 383 ntohs(eth_hdr(skb)->h_proto));
384 return; 384 return;
@@ -387,17 +387,17 @@ static void dump_mac_header(const struct nf_loginfo *info,
387 } 387 }
388 388
389fallback: 389fallback:
390 printk("MAC="); 390 sb_add(m, "MAC=");
391 if (dev->hard_header_len && 391 if (dev->hard_header_len &&
392 skb->mac_header != skb->network_header) { 392 skb->mac_header != skb->network_header) {
393 const unsigned char *p = skb_mac_header(skb); 393 const unsigned char *p = skb_mac_header(skb);
394 unsigned int i; 394 unsigned int i;
395 395
396 printk("%02x", *p++); 396 sb_add(m, "%02x", *p++);
397 for (i = 1; i < dev->hard_header_len; i++, p++) 397 for (i = 1; i < dev->hard_header_len; i++, p++)
398 printk(":%02x", *p); 398 sb_add(m, ":%02x", *p);
399 } 399 }
400 printk(" "); 400 sb_add(m, " ");
401} 401}
402 402
403static struct nf_loginfo default_loginfo = { 403static struct nf_loginfo default_loginfo = {
@@ -419,11 +419,12 @@ ipt_log_packet(u_int8_t pf,
419 const struct nf_loginfo *loginfo, 419 const struct nf_loginfo *loginfo,
420 const char *prefix) 420 const char *prefix)
421{ 421{
422 struct sbuff *m = sb_open();
423
422 if (!loginfo) 424 if (!loginfo)
423 loginfo = &default_loginfo; 425 loginfo = &default_loginfo;
424 426
425 spin_lock_bh(&log_lock); 427 sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
426 printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
427 prefix, 428 prefix,
428 in ? in->name : "", 429 in ? in->name : "",
429 out ? out->name : ""); 430 out ? out->name : "");
@@ -434,20 +435,20 @@ ipt_log_packet(u_int8_t pf,
434 435
435 physindev = skb->nf_bridge->physindev; 436 physindev = skb->nf_bridge->physindev;
436 if (physindev && in != physindev) 437 if (physindev && in != physindev)
437 printk("PHYSIN=%s ", physindev->name); 438 sb_add(m, "PHYSIN=%s ", physindev->name);
438 physoutdev = skb->nf_bridge->physoutdev; 439 physoutdev = skb->nf_bridge->physoutdev;
439 if (physoutdev && out != physoutdev) 440 if (physoutdev && out != physoutdev)
440 printk("PHYSOUT=%s ", physoutdev->name); 441 sb_add(m, "PHYSOUT=%s ", physoutdev->name);
441 } 442 }
442#endif 443#endif
443 444
444 /* MAC logging for input path only. */ 445 /* MAC logging for input path only. */
445 if (in && !out) 446 if (in && !out)
446 dump_mac_header(loginfo, skb); 447 dump_mac_header(m, loginfo, skb);
448
449 dump_packet(m, loginfo, skb, 0);
447 450
448 dump_packet(loginfo, skb, 0); 451 sb_close(m);
449 printk("\n");
450 spin_unlock_bh(&log_lock);
451} 452}
452 453
453static unsigned int 454static unsigned int
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
index c31b87668250..0f23b3f06df0 100644
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ b/net/ipv4/netfilter/nf_nat_amanda.c
@@ -44,9 +44,16 @@ static unsigned int help(struct sk_buff *skb,
44 44
45 /* Try to get same port: if not, try to change it. */ 45 /* Try to get same port: if not, try to change it. */
46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { 46 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
47 int ret;
48
47 exp->tuple.dst.u.tcp.port = htons(port); 49 exp->tuple.dst.u.tcp.port = htons(port);
48 if (nf_ct_expect_related(exp) == 0) 50 ret = nf_ct_expect_related(exp);
51 if (ret == 0)
52 break;
53 else if (ret != -EBUSY) {
54 port = 0;
49 break; 55 break;
56 }
50 } 57 }
51 58
52 if (port == 0) 59 if (port == 0)
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 957c9241fb0c..295c97431e43 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -47,7 +47,7 @@ __nf_nat_proto_find(u_int8_t protonum)
47 return rcu_dereference(nf_nat_protos[protonum]); 47 return rcu_dereference(nf_nat_protos[protonum]);
48} 48}
49 49
50const struct nf_nat_protocol * 50static const struct nf_nat_protocol *
51nf_nat_proto_find_get(u_int8_t protonum) 51nf_nat_proto_find_get(u_int8_t protonum)
52{ 52{
53 const struct nf_nat_protocol *p; 53 const struct nf_nat_protocol *p;
@@ -60,14 +60,12 @@ nf_nat_proto_find_get(u_int8_t protonum)
60 60
61 return p; 61 return p;
62} 62}
63EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
64 63
65void 64static void
66nf_nat_proto_put(const struct nf_nat_protocol *p) 65nf_nat_proto_put(const struct nf_nat_protocol *p)
67{ 66{
68 module_put(p->me); 67 module_put(p->me);
69} 68}
70EXPORT_SYMBOL_GPL(nf_nat_proto_put);
71 69
72/* We keep an extra hash for each conntrack, for fast searching. */ 70/* We keep an extra hash for each conntrack, for fast searching. */
73static inline unsigned int 71static inline unsigned int
@@ -262,11 +260,17 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
262 proto = __nf_nat_proto_find(orig_tuple->dst.protonum); 260 proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
263 261
264 /* Only bother mapping if it's not already in range and unique */ 262 /* Only bother mapping if it's not already in range and unique */
265 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM) && 263 if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
266 (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || 264 if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) {
267 proto->in_range(tuple, maniptype, &range->min, &range->max)) && 265 if (proto->in_range(tuple, maniptype, &range->min,
268 !nf_nat_used_tuple(tuple, ct)) 266 &range->max) &&
269 goto out; 267 (range->min.all == range->max.all ||
268 !nf_nat_used_tuple(tuple, ct)))
269 goto out;
270 } else if (!nf_nat_used_tuple(tuple, ct)) {
271 goto out;
272 }
273 }
270 274
271 /* Last change: get protocol to try to obtain unique tuple. */ 275 /* Last change: get protocol to try to obtain unique tuple. */
272 proto->unique_tuple(tuple, range, maniptype, ct); 276 proto->unique_tuple(tuple, range, maniptype, ct);
@@ -458,6 +462,18 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
458 return 0; 462 return 0;
459 } 463 }
460 464
465 if (manip == IP_NAT_MANIP_SRC)
466 statusbit = IPS_SRC_NAT;
467 else
468 statusbit = IPS_DST_NAT;
469
470 /* Invert if this is reply dir. */
471 if (dir == IP_CT_DIR_REPLY)
472 statusbit ^= IPS_NAT_MASK;
473
474 if (!(ct->status & statusbit))
475 return 1;
476
461 pr_debug("icmp_reply_translation: translating error %p manip %u " 477 pr_debug("icmp_reply_translation: translating error %p manip %u "
462 "dir %s\n", skb, manip, 478 "dir %s\n", skb, manip,
463 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); 479 dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
@@ -492,20 +508,9 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
492 508
493 /* Change outer to look the reply to an incoming packet 509 /* Change outer to look the reply to an incoming packet
494 * (proto 0 means don't invert per-proto part). */ 510 * (proto 0 means don't invert per-proto part). */
495 if (manip == IP_NAT_MANIP_SRC) 511 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
496 statusbit = IPS_SRC_NAT; 512 if (!manip_pkt(0, skb, 0, &target, manip))
497 else 513 return 0;
498 statusbit = IPS_DST_NAT;
499
500 /* Invert if this is reply dir. */
501 if (dir == IP_CT_DIR_REPLY)
502 statusbit ^= IPS_NAT_MASK;
503
504 if (ct->status & statusbit) {
505 nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
506 if (!manip_pkt(0, skb, 0, &target, manip))
507 return 0;
508 }
509 514
510 return 1; 515 return 1;
511} 516}
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
index 86e0e84ff0a0..dc73abb3fe27 100644
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ b/net/ipv4/netfilter/nf_nat_ftp.c
@@ -79,9 +79,16 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb,
79 79
80 /* Try to get same port: if not, try to change it. */ 80 /* Try to get same port: if not, try to change it. */
81 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { 81 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
82 int ret;
83
82 exp->tuple.dst.u.tcp.port = htons(port); 84 exp->tuple.dst.u.tcp.port = htons(port);
83 if (nf_ct_expect_related(exp) == 0) 85 ret = nf_ct_expect_related(exp);
86 if (ret == 0)
87 break;
88 else if (ret != -EBUSY) {
89 port = 0;
84 break; 90 break;
91 }
85 } 92 }
86 93
87 if (port == 0) 94 if (port == 0)
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 5045196d853c..790f3160e012 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -222,13 +222,24 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
222 /* Try to get a pair of ports. */ 222 /* Try to get a pair of ports. */
223 for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port); 223 for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
224 nated_port != 0; nated_port += 2) { 224 nated_port != 0; nated_port += 2) {
225 int ret;
226
225 rtp_exp->tuple.dst.u.udp.port = htons(nated_port); 227 rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
226 if (nf_ct_expect_related(rtp_exp) == 0) { 228 ret = nf_ct_expect_related(rtp_exp);
229 if (ret == 0) {
227 rtcp_exp->tuple.dst.u.udp.port = 230 rtcp_exp->tuple.dst.u.udp.port =
228 htons(nated_port + 1); 231 htons(nated_port + 1);
229 if (nf_ct_expect_related(rtcp_exp) == 0) 232 ret = nf_ct_expect_related(rtcp_exp);
233 if (ret == 0)
234 break;
235 else if (ret != -EBUSY) {
236 nf_ct_unexpect_related(rtp_exp);
237 nated_port = 0;
230 break; 238 break;
231 nf_ct_unexpect_related(rtp_exp); 239 }
240 } else if (ret != -EBUSY) {
241 nated_port = 0;
242 break;
232 } 243 }
233 } 244 }
234 245
@@ -284,9 +295,16 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
284 295
285 /* Try to get same port: if not, try to change it. */ 296 /* Try to get same port: if not, try to change it. */
286 for (; nated_port != 0; nated_port++) { 297 for (; nated_port != 0; nated_port++) {
298 int ret;
299
287 exp->tuple.dst.u.tcp.port = htons(nated_port); 300 exp->tuple.dst.u.tcp.port = htons(nated_port);
288 if (nf_ct_expect_related(exp) == 0) 301 ret = nf_ct_expect_related(exp);
302 if (ret == 0)
303 break;
304 else if (ret != -EBUSY) {
305 nated_port = 0;
289 break; 306 break;
307 }
290 } 308 }
291 309
292 if (nated_port == 0) { /* No port available */ 310 if (nated_port == 0) { /* No port available */
@@ -334,9 +352,16 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
334 352
335 /* Try to get same port: if not, try to change it. */ 353 /* Try to get same port: if not, try to change it. */
336 for (; nated_port != 0; nated_port++) { 354 for (; nated_port != 0; nated_port++) {
355 int ret;
356
337 exp->tuple.dst.u.tcp.port = htons(nated_port); 357 exp->tuple.dst.u.tcp.port = htons(nated_port);
338 if (nf_ct_expect_related(exp) == 0) 358 ret = nf_ct_expect_related(exp);
359 if (ret == 0)
339 break; 360 break;
361 else if (ret != -EBUSY) {
362 nated_port = 0;
363 break;
364 }
340 } 365 }
341 366
342 if (nated_port == 0) { /* No port available */ 367 if (nated_port == 0) { /* No port available */
@@ -418,9 +443,16 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
418 443
419 /* Try to get same port: if not, try to change it. */ 444 /* Try to get same port: if not, try to change it. */
420 for (; nated_port != 0; nated_port++) { 445 for (; nated_port != 0; nated_port++) {
446 int ret;
447
421 exp->tuple.dst.u.tcp.port = htons(nated_port); 448 exp->tuple.dst.u.tcp.port = htons(nated_port);
422 if (nf_ct_expect_related(exp) == 0) 449 ret = nf_ct_expect_related(exp);
450 if (ret == 0)
451 break;
452 else if (ret != -EBUSY) {
453 nated_port = 0;
423 break; 454 break;
455 }
424 } 456 }
425 457
426 if (nated_port == 0) { /* No port available */ 458 if (nated_port == 0) { /* No port available */
@@ -500,9 +532,16 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
500 532
501 /* Try to get same port: if not, try to change it. */ 533 /* Try to get same port: if not, try to change it. */
502 for (nated_port = ntohs(port); nated_port != 0; nated_port++) { 534 for (nated_port = ntohs(port); nated_port != 0; nated_port++) {
535 int ret;
536
503 exp->tuple.dst.u.tcp.port = htons(nated_port); 537 exp->tuple.dst.u.tcp.port = htons(nated_port);
504 if (nf_ct_expect_related(exp) == 0) 538 ret = nf_ct_expect_related(exp);
539 if (ret == 0)
505 break; 540 break;
541 else if (ret != -EBUSY) {
542 nated_port = 0;
543 break;
544 }
506 } 545 }
507 546
508 if (nated_port == 0) { /* No port available */ 547 if (nated_port == 0) { /* No port available */
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 4a0c6b548eee..31427fb57aa8 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -153,6 +153,35 @@ void nf_nat_set_seq_adjust(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
153} 153}
154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust); 154EXPORT_SYMBOL_GPL(nf_nat_set_seq_adjust);
155 155
156static void nf_nat_csum(struct sk_buff *skb, struct iphdr *iph, void *data,
157 int datalen, __sum16 *check, int oldlen)
158{
159 struct rtable *rt = skb_rtable(skb);
160
161 if (skb->ip_summed != CHECKSUM_PARTIAL) {
162 if (!(rt->rt_flags & RTCF_LOCAL) &&
163 skb->dev->features & NETIF_F_V4_CSUM) {
164 skb->ip_summed = CHECKSUM_PARTIAL;
165 skb->csum_start = skb_headroom(skb) +
166 skb_network_offset(skb) +
167 iph->ihl * 4;
168 skb->csum_offset = (void *)check - data;
169 *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
170 datalen, iph->protocol, 0);
171 } else {
172 *check = 0;
173 *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
174 datalen, iph->protocol,
175 csum_partial(data, datalen,
176 0));
177 if (iph->protocol == IPPROTO_UDP && !*check)
178 *check = CSUM_MANGLED_0;
179 }
180 } else
181 inet_proto_csum_replace2(check, skb,
182 htons(oldlen), htons(datalen), 1);
183}
184
156/* Generic function for mangling variable-length address changes inside 185/* Generic function for mangling variable-length address changes inside
157 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX 186 * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
158 * command in FTP). 187 * command in FTP).
@@ -169,7 +198,6 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
169 const char *rep_buffer, 198 const char *rep_buffer,
170 unsigned int rep_len, bool adjust) 199 unsigned int rep_len, bool adjust)
171{ 200{
172 struct rtable *rt = skb_rtable(skb);
173 struct iphdr *iph; 201 struct iphdr *iph;
174 struct tcphdr *tcph; 202 struct tcphdr *tcph;
175 int oldlen, datalen; 203 int oldlen, datalen;
@@ -192,26 +220,7 @@ int __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
192 match_offset, match_len, rep_buffer, rep_len); 220 match_offset, match_len, rep_buffer, rep_len);
193 221
194 datalen = skb->len - iph->ihl*4; 222 datalen = skb->len - iph->ihl*4;
195 if (skb->ip_summed != CHECKSUM_PARTIAL) { 223 nf_nat_csum(skb, iph, tcph, datalen, &tcph->check, oldlen);
196 if (!(rt->rt_flags & RTCF_LOCAL) &&
197 skb->dev->features & NETIF_F_V4_CSUM) {
198 skb->ip_summed = CHECKSUM_PARTIAL;
199 skb->csum_start = skb_headroom(skb) +
200 skb_network_offset(skb) +
201 iph->ihl * 4;
202 skb->csum_offset = offsetof(struct tcphdr, check);
203 tcph->check = ~tcp_v4_check(datalen,
204 iph->saddr, iph->daddr, 0);
205 } else {
206 tcph->check = 0;
207 tcph->check = tcp_v4_check(datalen,
208 iph->saddr, iph->daddr,
209 csum_partial(tcph,
210 datalen, 0));
211 }
212 } else
213 inet_proto_csum_replace2(&tcph->check, skb,
214 htons(oldlen), htons(datalen), 1);
215 224
216 if (adjust && rep_len != match_len) 225 if (adjust && rep_len != match_len)
217 nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq, 226 nf_nat_set_seq_adjust(ct, ctinfo, tcph->seq,
@@ -240,7 +249,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
240 const char *rep_buffer, 249 const char *rep_buffer,
241 unsigned int rep_len) 250 unsigned int rep_len)
242{ 251{
243 struct rtable *rt = skb_rtable(skb);
244 struct iphdr *iph; 252 struct iphdr *iph;
245 struct udphdr *udph; 253 struct udphdr *udph;
246 int datalen, oldlen; 254 int datalen, oldlen;
@@ -274,29 +282,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
274 if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) 282 if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
275 return 1; 283 return 1;
276 284
277 if (skb->ip_summed != CHECKSUM_PARTIAL) { 285 nf_nat_csum(skb, iph, udph, datalen, &udph->check, oldlen);
278 if (!(rt->rt_flags & RTCF_LOCAL) &&
279 skb->dev->features & NETIF_F_V4_CSUM) {
280 skb->ip_summed = CHECKSUM_PARTIAL;
281 skb->csum_start = skb_headroom(skb) +
282 skb_network_offset(skb) +
283 iph->ihl * 4;
284 skb->csum_offset = offsetof(struct udphdr, check);
285 udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
286 datalen, IPPROTO_UDP,
287 0);
288 } else {
289 udph->check = 0;
290 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
291 datalen, IPPROTO_UDP,
292 csum_partial(udph,
293 datalen, 0));
294 if (!udph->check)
295 udph->check = CSUM_MANGLED_0;
296 }
297 } else
298 inet_proto_csum_replace2(&udph->check, skb,
299 htons(oldlen), htons(datalen), 1);
300 286
301 return 1; 287 return 1;
302} 288}
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
index ea83a886b03e..535e1a802356 100644
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ b/net/ipv4/netfilter/nf_nat_irc.c
@@ -45,9 +45,16 @@ static unsigned int help(struct sk_buff *skb,
45 45
46 /* Try to get same port: if not, try to change it. */ 46 /* Try to get same port: if not, try to change it. */
47 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { 47 for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
48 int ret;
49
48 exp->tuple.dst.u.tcp.port = htons(port); 50 exp->tuple.dst.u.tcp.port = htons(port);
49 if (nf_ct_expect_related(exp) == 0) 51 ret = nf_ct_expect_related(exp);
52 if (ret == 0)
53 break;
54 else if (ret != -EBUSY) {
55 port = 0;
50 break; 56 break;
57 }
51 } 58 }
52 59
53 if (port == 0) 60 if (port == 0)
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index ebbd319f62f5..21c30426480b 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -106,16 +106,15 @@ alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
106{ 106{
107 /* Force range to this IP; let proto decide mapping for 107 /* Force range to this IP; let proto decide mapping for
108 per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). 108 per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
109 Use reply in case it's already been mangled (eg local packet).
110 */ 109 */
111 __be32 ip 110 struct nf_nat_range range;
112 = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC 111
113 ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip 112 range.flags = 0;
114 : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); 113 pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
115 struct nf_nat_range range 114 HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ?
116 = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } }; 115 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
117 116 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
118 pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, &ip); 117
119 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); 118 return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
120} 119}
121 120
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 11b538deaaec..e40cf7816fdb 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -307,9 +307,16 @@ static unsigned int ip_nat_sip_expect(struct sk_buff *skb, unsigned int dataoff,
307 exp->expectfn = ip_nat_sip_expected; 307 exp->expectfn = ip_nat_sip_expected;
308 308
309 for (; port != 0; port++) { 309 for (; port != 0; port++) {
310 int ret;
311
310 exp->tuple.dst.u.udp.port = htons(port); 312 exp->tuple.dst.u.udp.port = htons(port);
311 if (nf_ct_expect_related(exp) == 0) 313 ret = nf_ct_expect_related(exp);
314 if (ret == 0)
315 break;
316 else if (ret != -EBUSY) {
317 port = 0;
312 break; 318 break;
319 }
313 } 320 }
314 321
315 if (port == 0) 322 if (port == 0)
@@ -480,13 +487,25 @@ static unsigned int ip_nat_sdp_media(struct sk_buff *skb, unsigned int dataoff,
480 /* Try to get same pair of ports: if not, try to change them. */ 487 /* Try to get same pair of ports: if not, try to change them. */
481 for (port = ntohs(rtp_exp->tuple.dst.u.udp.port); 488 for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
482 port != 0; port += 2) { 489 port != 0; port += 2) {
490 int ret;
491
483 rtp_exp->tuple.dst.u.udp.port = htons(port); 492 rtp_exp->tuple.dst.u.udp.port = htons(port);
484 if (nf_ct_expect_related(rtp_exp) != 0) 493 ret = nf_ct_expect_related(rtp_exp);
494 if (ret == -EBUSY)
485 continue; 495 continue;
496 else if (ret < 0) {
497 port = 0;
498 break;
499 }
486 rtcp_exp->tuple.dst.u.udp.port = htons(port + 1); 500 rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
487 if (nf_ct_expect_related(rtcp_exp) == 0) 501 ret = nf_ct_expect_related(rtcp_exp);
502 if (ret == 0)
488 break; 503 break;
489 nf_ct_unexpect_related(rtp_exp); 504 else if (ret != -EBUSY) {
505 nf_ct_unexpect_related(rtp_exp);
506 port = 0;
507 break;
508 }
490 } 509 }
491 510
492 if (port == 0) 511 if (port == 0)
diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c
index f2d297351405..65699c24411c 100644
--- a/net/ipv4/protocol.c
+++ b/net/ipv4/protocol.c
@@ -28,8 +28,7 @@
28#include <linux/spinlock.h> 28#include <linux/spinlock.h>
29#include <net/protocol.h> 29#include <net/protocol.h>
30 30
31const struct net_protocol *inet_protos[MAX_INET_PROTOS] ____cacheline_aligned_in_smp; 31const struct net_protocol *inet_protos[MAX_INET_PROTOS] __read_mostly;
32static DEFINE_SPINLOCK(inet_proto_lock);
33 32
34/* 33/*
35 * Add a protocol handler to the hash tables 34 * Add a protocol handler to the hash tables
@@ -37,20 +36,9 @@ static DEFINE_SPINLOCK(inet_proto_lock);
37 36
38int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) 37int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol)
39{ 38{
40 int hash, ret; 39 int hash = protocol & (MAX_INET_PROTOS - 1);
41 40
42 hash = protocol & (MAX_INET_PROTOS - 1); 41 return !cmpxchg(&inet_protos[hash], NULL, prot) ? 0 : -1;
43
44 spin_lock_bh(&inet_proto_lock);
45 if (inet_protos[hash]) {
46 ret = -1;
47 } else {
48 inet_protos[hash] = prot;
49 ret = 0;
50 }
51 spin_unlock_bh(&inet_proto_lock);
52
53 return ret;
54} 42}
55EXPORT_SYMBOL(inet_add_protocol); 43EXPORT_SYMBOL(inet_add_protocol);
56 44
@@ -60,18 +48,9 @@ EXPORT_SYMBOL(inet_add_protocol);
60 48
61int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) 49int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol)
62{ 50{
63 int hash, ret; 51 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
64
65 hash = protocol & (MAX_INET_PROTOS - 1);
66 52
67 spin_lock_bh(&inet_proto_lock); 53 ret = (cmpxchg(&inet_protos[hash], prot, NULL) == prot) ? 0 : -1;
68 if (inet_protos[hash] == prot) {
69 inet_protos[hash] = NULL;
70 ret = 0;
71 } else {
72 ret = -1;
73 }
74 spin_unlock_bh(&inet_proto_lock);
75 54
76 synchronize_net(); 55 synchronize_net();
77 56
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 009a7b2aa1ef..1f85ef289895 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -505,7 +505,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
505 505
506 ipc.addr = inet->inet_saddr; 506 ipc.addr = inet->inet_saddr;
507 ipc.opt = NULL; 507 ipc.opt = NULL;
508 ipc.shtx.flags = 0; 508 ipc.tx_flags = 0;
509 ipc.oif = sk->sk_bound_dev_if; 509 ipc.oif = sk->sk_bound_dev_if;
510 510
511 if (msg->msg_controllen) { 511 if (msg->msg_controllen) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ac6559cb54f9..d6cb2bfcd8e1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -159,7 +159,6 @@ static struct dst_ops ipv4_dst_ops = {
159 .link_failure = ipv4_link_failure, 159 .link_failure = ipv4_link_failure,
160 .update_pmtu = ip_rt_update_pmtu, 160 .update_pmtu = ip_rt_update_pmtu,
161 .local_out = __ip_local_out, 161 .local_out = __ip_local_out,
162 .entries = ATOMIC_INIT(0),
163}; 162};
164 163
165#define ECN_OR_COST(class) TC_PRIO_##class 164#define ECN_OR_COST(class) TC_PRIO_##class
@@ -466,7 +465,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v)
466 465
467 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " 466 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
468 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", 467 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
469 atomic_read(&ipv4_dst_ops.entries), 468 dst_entries_get_slow(&ipv4_dst_ops),
470 st->in_hit, 469 st->in_hit,
471 st->in_slow_tot, 470 st->in_slow_tot,
472 st->in_slow_mc, 471 st->in_slow_mc,
@@ -945,6 +944,7 @@ static int rt_garbage_collect(struct dst_ops *ops)
945 struct rtable *rth, **rthp; 944 struct rtable *rth, **rthp;
946 unsigned long now = jiffies; 945 unsigned long now = jiffies;
947 int goal; 946 int goal;
947 int entries = dst_entries_get_fast(&ipv4_dst_ops);
948 948
949 /* 949 /*
950 * Garbage collection is pretty expensive, 950 * Garbage collection is pretty expensive,
@@ -954,28 +954,28 @@ static int rt_garbage_collect(struct dst_ops *ops)
954 RT_CACHE_STAT_INC(gc_total); 954 RT_CACHE_STAT_INC(gc_total);
955 955
956 if (now - last_gc < ip_rt_gc_min_interval && 956 if (now - last_gc < ip_rt_gc_min_interval &&
957 atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) { 957 entries < ip_rt_max_size) {
958 RT_CACHE_STAT_INC(gc_ignored); 958 RT_CACHE_STAT_INC(gc_ignored);
959 goto out; 959 goto out;
960 } 960 }
961 961
962 entries = dst_entries_get_slow(&ipv4_dst_ops);
962 /* Calculate number of entries, which we want to expire now. */ 963 /* Calculate number of entries, which we want to expire now. */
963 goal = atomic_read(&ipv4_dst_ops.entries) - 964 goal = entries - (ip_rt_gc_elasticity << rt_hash_log);
964 (ip_rt_gc_elasticity << rt_hash_log);
965 if (goal <= 0) { 965 if (goal <= 0) {
966 if (equilibrium < ipv4_dst_ops.gc_thresh) 966 if (equilibrium < ipv4_dst_ops.gc_thresh)
967 equilibrium = ipv4_dst_ops.gc_thresh; 967 equilibrium = ipv4_dst_ops.gc_thresh;
968 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 968 goal = entries - equilibrium;
969 if (goal > 0) { 969 if (goal > 0) {
970 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); 970 equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1);
971 goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; 971 goal = entries - equilibrium;
972 } 972 }
973 } else { 973 } else {
974 /* We are in dangerous area. Try to reduce cache really 974 /* We are in dangerous area. Try to reduce cache really
975 * aggressively. 975 * aggressively.
976 */ 976 */
977 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); 977 goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1);
978 equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; 978 equilibrium = entries - goal;
979 } 979 }
980 980
981 if (now - last_gc >= ip_rt_gc_min_interval) 981 if (now - last_gc >= ip_rt_gc_min_interval)
@@ -1032,14 +1032,16 @@ static int rt_garbage_collect(struct dst_ops *ops)
1032 expire >>= 1; 1032 expire >>= 1;
1033#if RT_CACHE_DEBUG >= 2 1033#if RT_CACHE_DEBUG >= 2
1034 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, 1034 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire,
1035 atomic_read(&ipv4_dst_ops.entries), goal, i); 1035 dst_entries_get_fast(&ipv4_dst_ops), goal, i);
1036#endif 1036#endif
1037 1037
1038 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 1038 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
1039 goto out; 1039 goto out;
1040 } while (!in_softirq() && time_before_eq(jiffies, now)); 1040 } while (!in_softirq() && time_before_eq(jiffies, now));
1041 1041
1042 if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) 1042 if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size)
1043 goto out;
1044 if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size)
1043 goto out; 1045 goto out;
1044 if (net_ratelimit()) 1046 if (net_ratelimit())
1045 printk(KERN_WARNING "dst cache overflow\n"); 1047 printk(KERN_WARNING "dst cache overflow\n");
@@ -1049,11 +1051,12 @@ static int rt_garbage_collect(struct dst_ops *ops)
1049work_done: 1051work_done:
1050 expire += ip_rt_gc_min_interval; 1052 expire += ip_rt_gc_min_interval;
1051 if (expire > ip_rt_gc_timeout || 1053 if (expire > ip_rt_gc_timeout ||
1052 atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh) 1054 dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh ||
1055 dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh)
1053 expire = ip_rt_gc_timeout; 1056 expire = ip_rt_gc_timeout;
1054#if RT_CACHE_DEBUG >= 2 1057#if RT_CACHE_DEBUG >= 2
1055 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, 1058 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire,
1056 atomic_read(&ipv4_dst_ops.entries), goal, rover); 1059 dst_entries_get_fast(&ipv4_dst_ops), goal, rover);
1057#endif 1060#endif
1058out: return 0; 1061out: return 0;
1059} 1062}
@@ -1102,23 +1105,23 @@ restart:
1102 * Note that we do rt_free on this new route entry, so that 1105 * Note that we do rt_free on this new route entry, so that
1103 * once its refcount hits zero, we are still able to reap it 1106 * once its refcount hits zero, we are still able to reap it
1104 * (Thanks Alexey) 1107 * (Thanks Alexey)
1105 * Note also the rt_free uses call_rcu. We don't actually 1108 * Note: To avoid expensive rcu stuff for this uncached dst,
1106 * need rcu protection here, this is just our path to get 1109 * we set DST_NOCACHE so that dst_release() can free dst without
1107 * on the route gc list. 1110 * waiting a grace period.
1108 */ 1111 */
1109 1112
1113 rt->dst.flags |= DST_NOCACHE;
1110 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { 1114 if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
1111 int err = arp_bind_neighbour(&rt->dst); 1115 int err = arp_bind_neighbour(&rt->dst);
1112 if (err) { 1116 if (err) {
1113 if (net_ratelimit()) 1117 if (net_ratelimit())
1114 printk(KERN_WARNING 1118 printk(KERN_WARNING
1115 "Neighbour table failure & not caching routes.\n"); 1119 "Neighbour table failure & not caching routes.\n");
1116 rt_drop(rt); 1120 ip_rt_put(rt);
1117 return err; 1121 return err;
1118 } 1122 }
1119 } 1123 }
1120 1124
1121 rt_free(rt);
1122 goto skip_hashing; 1125 goto skip_hashing;
1123 } 1126 }
1124 1127
@@ -1268,18 +1271,11 @@ skip_hashing:
1268 1271
1269void rt_bind_peer(struct rtable *rt, int create) 1272void rt_bind_peer(struct rtable *rt, int create)
1270{ 1273{
1271 static DEFINE_SPINLOCK(rt_peer_lock);
1272 struct inet_peer *peer; 1274 struct inet_peer *peer;
1273 1275
1274 peer = inet_getpeer(rt->rt_dst, create); 1276 peer = inet_getpeer(rt->rt_dst, create);
1275 1277
1276 spin_lock_bh(&rt_peer_lock); 1278 if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
1277 if (rt->peer == NULL) {
1278 rt->peer = peer;
1279 peer = NULL;
1280 }
1281 spin_unlock_bh(&rt_peer_lock);
1282 if (peer)
1283 inet_putpeer(peer); 1279 inet_putpeer(peer);
1284} 1280}
1285 1281
@@ -1779,12 +1775,15 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt)
1779 1775
1780 if (rt->fl.iif == 0) 1776 if (rt->fl.iif == 0)
1781 src = rt->rt_src; 1777 src = rt->rt_src;
1782 else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { 1778 else {
1783 src = FIB_RES_PREFSRC(res); 1779 rcu_read_lock();
1784 fib_res_put(&res); 1780 if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0)
1785 } else 1781 src = FIB_RES_PREFSRC(res);
1786 src = inet_select_addr(rt->dst.dev, rt->rt_gateway, 1782 else
1783 src = inet_select_addr(rt->dst.dev, rt->rt_gateway,
1787 RT_SCOPE_UNIVERSE); 1784 RT_SCOPE_UNIVERSE);
1785 rcu_read_unlock();
1786 }
1788 memcpy(addr, &src, 4); 1787 memcpy(addr, &src, 4);
1789} 1788}
1790 1789
@@ -2087,6 +2086,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
2087 * Such approach solves two big problems: 2086 * Such approach solves two big problems:
2088 * 1. Not simplex devices are handled properly. 2087 * 1. Not simplex devices are handled properly.
2089 * 2. IP spoofing attempts are filtered with 100% of guarantee. 2088 * 2. IP spoofing attempts are filtered with 100% of guarantee.
2089 * called with rcu_read_lock()
2090 */ 2090 */
2091 2091
2092static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2092static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2108,7 +2108,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2108 unsigned hash; 2108 unsigned hash;
2109 __be32 spec_dst; 2109 __be32 spec_dst;
2110 int err = -EINVAL; 2110 int err = -EINVAL;
2111 int free_res = 0;
2112 struct net * net = dev_net(dev); 2111 struct net * net = dev_net(dev);
2113 2112
2114 /* IP on this device is disabled. */ 2113 /* IP on this device is disabled. */
@@ -2124,7 +2123,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2124 ipv4_is_loopback(saddr)) 2123 ipv4_is_loopback(saddr))
2125 goto martian_source; 2124 goto martian_source;
2126 2125
2127 if (daddr == htonl(0xFFFFFFFF) || (saddr == 0 && daddr == 0)) 2126 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
2128 goto brd_input; 2127 goto brd_input;
2129 2128
2130 /* Accept zero addresses only to limited broadcast; 2129 /* Accept zero addresses only to limited broadcast;
@@ -2133,19 +2132,18 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2133 if (ipv4_is_zeronet(saddr)) 2132 if (ipv4_is_zeronet(saddr))
2134 goto martian_source; 2133 goto martian_source;
2135 2134
2136 if (ipv4_is_lbcast(daddr) || ipv4_is_zeronet(daddr) || 2135 if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr))
2137 ipv4_is_loopback(daddr))
2138 goto martian_destination; 2136 goto martian_destination;
2139 2137
2140 /* 2138 /*
2141 * Now we are ready to route packet. 2139 * Now we are ready to route packet.
2142 */ 2140 */
2143 if ((err = fib_lookup(net, &fl, &res)) != 0) { 2141 err = fib_lookup(net, &fl, &res);
2142 if (err != 0) {
2144 if (!IN_DEV_FORWARD(in_dev)) 2143 if (!IN_DEV_FORWARD(in_dev))
2145 goto e_hostunreach; 2144 goto e_hostunreach;
2146 goto no_route; 2145 goto no_route;
2147 } 2146 }
2148 free_res = 1;
2149 2147
2150 RT_CACHE_STAT_INC(in_slow_tot); 2148 RT_CACHE_STAT_INC(in_slow_tot);
2151 2149
@@ -2154,8 +2152,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2154 2152
2155 if (res.type == RTN_LOCAL) { 2153 if (res.type == RTN_LOCAL) {
2156 err = fib_validate_source(saddr, daddr, tos, 2154 err = fib_validate_source(saddr, daddr, tos,
2157 net->loopback_dev->ifindex, 2155 net->loopback_dev->ifindex,
2158 dev, &spec_dst, &itag, skb->mark); 2156 dev, &spec_dst, &itag, skb->mark);
2159 if (err < 0) 2157 if (err < 0)
2160 goto martian_source_keep_err; 2158 goto martian_source_keep_err;
2161 if (err) 2159 if (err)
@@ -2170,9 +2168,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
2170 goto martian_destination; 2168 goto martian_destination;
2171 2169
2172 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); 2170 err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
2173done:
2174 if (free_res)
2175 fib_res_put(&res);
2176out: return err; 2171out: return err;
2177 2172
2178brd_input: 2173brd_input:
@@ -2232,7 +2227,7 @@ local_input:
2232 rth->rt_type = res.type; 2227 rth->rt_type = res.type;
2233 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); 2228 hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net));
2234 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); 2229 err = rt_intern_hash(hash, rth, NULL, skb, fl.iif);
2235 goto done; 2230 goto out;
2236 2231
2237no_route: 2232no_route:
2238 RT_CACHE_STAT_INC(in_no_route); 2233 RT_CACHE_STAT_INC(in_no_route);
@@ -2255,21 +2250,21 @@ martian_destination:
2255 2250
2256e_hostunreach: 2251e_hostunreach:
2257 err = -EHOSTUNREACH; 2252 err = -EHOSTUNREACH;
2258 goto done; 2253 goto out;
2259 2254
2260e_inval: 2255e_inval:
2261 err = -EINVAL; 2256 err = -EINVAL;
2262 goto done; 2257 goto out;
2263 2258
2264e_nobufs: 2259e_nobufs:
2265 err = -ENOBUFS; 2260 err = -ENOBUFS;
2266 goto done; 2261 goto out;
2267 2262
2268martian_source: 2263martian_source:
2269 err = -EINVAL; 2264 err = -EINVAL;
2270martian_source_keep_err: 2265martian_source_keep_err:
2271 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); 2266 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
2272 goto done; 2267 goto out;
2273} 2268}
2274 2269
2275int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, 2270int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
@@ -2355,6 +2350,7 @@ skip_cache:
2355} 2350}
2356EXPORT_SYMBOL(ip_route_input_common); 2351EXPORT_SYMBOL(ip_route_input_common);
2357 2352
2353/* called with rcu_read_lock() */
2358static int __mkroute_output(struct rtable **result, 2354static int __mkroute_output(struct rtable **result,
2359 struct fib_result *res, 2355 struct fib_result *res,
2360 const struct flowi *fl, 2356 const struct flowi *fl,
@@ -2365,53 +2361,47 @@ static int __mkroute_output(struct rtable **result,
2365 struct rtable *rth; 2361 struct rtable *rth;
2366 struct in_device *in_dev; 2362 struct in_device *in_dev;
2367 u32 tos = RT_FL_TOS(oldflp); 2363 u32 tos = RT_FL_TOS(oldflp);
2368 int err = 0;
2369 2364
2370 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags&IFF_LOOPBACK)) 2365 if (ipv4_is_loopback(fl->fl4_src) && !(dev_out->flags & IFF_LOOPBACK))
2371 return -EINVAL; 2366 return -EINVAL;
2372 2367
2373 if (fl->fl4_dst == htonl(0xFFFFFFFF)) 2368 if (ipv4_is_lbcast(fl->fl4_dst))
2374 res->type = RTN_BROADCAST; 2369 res->type = RTN_BROADCAST;
2375 else if (ipv4_is_multicast(fl->fl4_dst)) 2370 else if (ipv4_is_multicast(fl->fl4_dst))
2376 res->type = RTN_MULTICAST; 2371 res->type = RTN_MULTICAST;
2377 else if (ipv4_is_lbcast(fl->fl4_dst) || ipv4_is_zeronet(fl->fl4_dst)) 2372 else if (ipv4_is_zeronet(fl->fl4_dst))
2378 return -EINVAL; 2373 return -EINVAL;
2379 2374
2380 if (dev_out->flags & IFF_LOOPBACK) 2375 if (dev_out->flags & IFF_LOOPBACK)
2381 flags |= RTCF_LOCAL; 2376 flags |= RTCF_LOCAL;
2382 2377
2383 /* get work reference to inet device */ 2378 in_dev = __in_dev_get_rcu(dev_out);
2384 in_dev = in_dev_get(dev_out);
2385 if (!in_dev) 2379 if (!in_dev)
2386 return -EINVAL; 2380 return -EINVAL;
2387 2381
2388 if (res->type == RTN_BROADCAST) { 2382 if (res->type == RTN_BROADCAST) {
2389 flags |= RTCF_BROADCAST | RTCF_LOCAL; 2383 flags |= RTCF_BROADCAST | RTCF_LOCAL;
2390 if (res->fi) { 2384 res->fi = NULL;
2391 fib_info_put(res->fi);
2392 res->fi = NULL;
2393 }
2394 } else if (res->type == RTN_MULTICAST) { 2385 } else if (res->type == RTN_MULTICAST) {
2395 flags |= RTCF_MULTICAST|RTCF_LOCAL; 2386 flags |= RTCF_MULTICAST | RTCF_LOCAL;
2396 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, 2387 if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src,
2397 oldflp->proto)) 2388 oldflp->proto))
2398 flags &= ~RTCF_LOCAL; 2389 flags &= ~RTCF_LOCAL;
2399 /* If multicast route do not exist use 2390 /* If multicast route do not exist use
2400 default one, but do not gateway in this case. 2391 * default one, but do not gateway in this case.
2401 Yes, it is hack. 2392 * Yes, it is hack.
2402 */ 2393 */
2403 if (res->fi && res->prefixlen < 4) { 2394 if (res->fi && res->prefixlen < 4)
2404 fib_info_put(res->fi);
2405 res->fi = NULL; 2395 res->fi = NULL;
2406 }
2407 } 2396 }
2408 2397
2409 2398
2410 rth = dst_alloc(&ipv4_dst_ops); 2399 rth = dst_alloc(&ipv4_dst_ops);
2411 if (!rth) { 2400 if (!rth)
2412 err = -ENOBUFS; 2401 return -ENOBUFS;
2413 goto cleanup; 2402
2414 } 2403 in_dev_hold(in_dev);
2404 rth->idev = in_dev;
2415 2405
2416 atomic_set(&rth->dst.__refcnt, 1); 2406 atomic_set(&rth->dst.__refcnt, 1);
2417 rth->dst.flags= DST_HOST; 2407 rth->dst.flags= DST_HOST;
@@ -2432,7 +2422,6 @@ static int __mkroute_output(struct rtable **result,
2432 cache entry */ 2422 cache entry */
2433 rth->dst.dev = dev_out; 2423 rth->dst.dev = dev_out;
2434 dev_hold(dev_out); 2424 dev_hold(dev_out);
2435 rth->idev = in_dev_get(dev_out);
2436 rth->rt_gateway = fl->fl4_dst; 2425 rth->rt_gateway = fl->fl4_dst;
2437 rth->rt_spec_dst= fl->fl4_src; 2426 rth->rt_spec_dst= fl->fl4_src;
2438 2427
@@ -2467,15 +2456,11 @@ static int __mkroute_output(struct rtable **result,
2467 rt_set_nexthop(rth, res, 0); 2456 rt_set_nexthop(rth, res, 0);
2468 2457
2469 rth->rt_flags = flags; 2458 rth->rt_flags = flags;
2470
2471 *result = rth; 2459 *result = rth;
2472 cleanup: 2460 return 0;
2473 /* release work reference to inet device */
2474 in_dev_put(in_dev);
2475
2476 return err;
2477} 2461}
2478 2462
2463/* called with rcu_read_lock() */
2479static int ip_mkroute_output(struct rtable **rp, 2464static int ip_mkroute_output(struct rtable **rp,
2480 struct fib_result *res, 2465 struct fib_result *res,
2481 const struct flowi *fl, 2466 const struct flowi *fl,
@@ -2497,6 +2482,7 @@ static int ip_mkroute_output(struct rtable **rp,
2497 2482
2498/* 2483/*
2499 * Major route resolver routine. 2484 * Major route resolver routine.
2485 * called with rcu_read_lock();
2500 */ 2486 */
2501 2487
2502static int ip_route_output_slow(struct net *net, struct rtable **rp, 2488static int ip_route_output_slow(struct net *net, struct rtable **rp,
@@ -2515,9 +2501,8 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2515 .iif = net->loopback_dev->ifindex, 2501 .iif = net->loopback_dev->ifindex,
2516 .oif = oldflp->oif }; 2502 .oif = oldflp->oif };
2517 struct fib_result res; 2503 struct fib_result res;
2518 unsigned flags = 0; 2504 unsigned int flags = 0;
2519 struct net_device *dev_out = NULL; 2505 struct net_device *dev_out = NULL;
2520 int free_res = 0;
2521 int err; 2506 int err;
2522 2507
2523 2508
@@ -2543,9 +2528,9 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2543 2528
2544 if (oldflp->oif == 0 && 2529 if (oldflp->oif == 0 &&
2545 (ipv4_is_multicast(oldflp->fl4_dst) || 2530 (ipv4_is_multicast(oldflp->fl4_dst) ||
2546 oldflp->fl4_dst == htonl(0xFFFFFFFF))) { 2531 ipv4_is_lbcast(oldflp->fl4_dst))) {
2547 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2532 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2548 dev_out = ip_dev_find(net, oldflp->fl4_src); 2533 dev_out = __ip_dev_find(net, oldflp->fl4_src, false);
2549 if (dev_out == NULL) 2534 if (dev_out == NULL)
2550 goto out; 2535 goto out;
2551 2536
@@ -2570,29 +2555,24 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2570 2555
2571 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { 2556 if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) {
2572 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ 2557 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
2573 dev_out = ip_dev_find(net, oldflp->fl4_src); 2558 if (!__ip_dev_find(net, oldflp->fl4_src, false))
2574 if (dev_out == NULL)
2575 goto out; 2559 goto out;
2576 dev_put(dev_out);
2577 dev_out = NULL;
2578 } 2560 }
2579 } 2561 }
2580 2562
2581 2563
2582 if (oldflp->oif) { 2564 if (oldflp->oif) {
2583 dev_out = dev_get_by_index(net, oldflp->oif); 2565 dev_out = dev_get_by_index_rcu(net, oldflp->oif);
2584 err = -ENODEV; 2566 err = -ENODEV;
2585 if (dev_out == NULL) 2567 if (dev_out == NULL)
2586 goto out; 2568 goto out;
2587 2569
2588 /* RACE: Check return value of inet_select_addr instead. */ 2570 /* RACE: Check return value of inet_select_addr instead. */
2589 if (__in_dev_get_rtnl(dev_out) == NULL) { 2571 if (rcu_dereference(dev_out->ip_ptr) == NULL)
2590 dev_put(dev_out);
2591 goto out; /* Wrong error code */ 2572 goto out; /* Wrong error code */
2592 }
2593 2573
2594 if (ipv4_is_local_multicast(oldflp->fl4_dst) || 2574 if (ipv4_is_local_multicast(oldflp->fl4_dst) ||
2595 oldflp->fl4_dst == htonl(0xFFFFFFFF)) { 2575 ipv4_is_lbcast(oldflp->fl4_dst)) {
2596 if (!fl.fl4_src) 2576 if (!fl.fl4_src)
2597 fl.fl4_src = inet_select_addr(dev_out, 0, 2577 fl.fl4_src = inet_select_addr(dev_out, 0,
2598 RT_SCOPE_LINK); 2578 RT_SCOPE_LINK);
@@ -2612,10 +2592,7 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2612 fl.fl4_dst = fl.fl4_src; 2592 fl.fl4_dst = fl.fl4_src;
2613 if (!fl.fl4_dst) 2593 if (!fl.fl4_dst)
2614 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK); 2594 fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
2615 if (dev_out)
2616 dev_put(dev_out);
2617 dev_out = net->loopback_dev; 2595 dev_out = net->loopback_dev;
2618 dev_hold(dev_out);
2619 fl.oif = net->loopback_dev->ifindex; 2596 fl.oif = net->loopback_dev->ifindex;
2620 res.type = RTN_LOCAL; 2597 res.type = RTN_LOCAL;
2621 flags |= RTCF_LOCAL; 2598 flags |= RTCF_LOCAL;
@@ -2649,23 +2626,15 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2649 res.type = RTN_UNICAST; 2626 res.type = RTN_UNICAST;
2650 goto make_route; 2627 goto make_route;
2651 } 2628 }
2652 if (dev_out)
2653 dev_put(dev_out);
2654 err = -ENETUNREACH; 2629 err = -ENETUNREACH;
2655 goto out; 2630 goto out;
2656 } 2631 }
2657 free_res = 1;
2658 2632
2659 if (res.type == RTN_LOCAL) { 2633 if (res.type == RTN_LOCAL) {
2660 if (!fl.fl4_src) 2634 if (!fl.fl4_src)
2661 fl.fl4_src = fl.fl4_dst; 2635 fl.fl4_src = fl.fl4_dst;
2662 if (dev_out)
2663 dev_put(dev_out);
2664 dev_out = net->loopback_dev; 2636 dev_out = net->loopback_dev;
2665 dev_hold(dev_out);
2666 fl.oif = dev_out->ifindex; 2637 fl.oif = dev_out->ifindex;
2667 if (res.fi)
2668 fib_info_put(res.fi);
2669 res.fi = NULL; 2638 res.fi = NULL;
2670 flags |= RTCF_LOCAL; 2639 flags |= RTCF_LOCAL;
2671 goto make_route; 2640 goto make_route;
@@ -2682,28 +2651,21 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
2682 if (!fl.fl4_src) 2651 if (!fl.fl4_src)
2683 fl.fl4_src = FIB_RES_PREFSRC(res); 2652 fl.fl4_src = FIB_RES_PREFSRC(res);
2684 2653
2685 if (dev_out)
2686 dev_put(dev_out);
2687 dev_out = FIB_RES_DEV(res); 2654 dev_out = FIB_RES_DEV(res);
2688 dev_hold(dev_out);
2689 fl.oif = dev_out->ifindex; 2655 fl.oif = dev_out->ifindex;
2690 2656
2691 2657
2692make_route: 2658make_route:
2693 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); 2659 err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags);
2694 2660
2695
2696 if (free_res)
2697 fib_res_put(&res);
2698 if (dev_out)
2699 dev_put(dev_out);
2700out: return err; 2661out: return err;
2701} 2662}
2702 2663
2703int __ip_route_output_key(struct net *net, struct rtable **rp, 2664int __ip_route_output_key(struct net *net, struct rtable **rp,
2704 const struct flowi *flp) 2665 const struct flowi *flp)
2705{ 2666{
2706 unsigned hash; 2667 unsigned int hash;
2668 int res;
2707 struct rtable *rth; 2669 struct rtable *rth;
2708 2670
2709 if (!rt_caching(net)) 2671 if (!rt_caching(net))
@@ -2734,7 +2696,10 @@ int __ip_route_output_key(struct net *net, struct rtable **rp,
2734 rcu_read_unlock_bh(); 2696 rcu_read_unlock_bh();
2735 2697
2736slow_output: 2698slow_output:
2737 return ip_route_output_slow(net, rp, flp); 2699 rcu_read_lock();
2700 res = ip_route_output_slow(net, rp, flp);
2701 rcu_read_unlock();
2702 return res;
2738} 2703}
2739EXPORT_SYMBOL_GPL(__ip_route_output_key); 2704EXPORT_SYMBOL_GPL(__ip_route_output_key);
2740 2705
@@ -2753,7 +2718,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = {
2753 .destroy = ipv4_dst_destroy, 2718 .destroy = ipv4_dst_destroy,
2754 .check = ipv4_blackhole_dst_check, 2719 .check = ipv4_blackhole_dst_check,
2755 .update_pmtu = ipv4_rt_blackhole_update_pmtu, 2720 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
2756 .entries = ATOMIC_INIT(0),
2757}; 2721};
2758 2722
2759 2723
@@ -2798,7 +2762,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi
2798 2762
2799 dst_release(&(*rp)->dst); 2763 dst_release(&(*rp)->dst);
2800 *rp = rt; 2764 *rp = rt;
2801 return (rt ? 0 : -ENOMEM); 2765 return rt ? 0 : -ENOMEM;
2802} 2766}
2803 2767
2804int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, 2768int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp,
@@ -3323,6 +3287,12 @@ int __init ip_rt_init(void)
3323 3287
3324 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; 3288 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
3325 3289
3290 if (dst_entries_init(&ipv4_dst_ops) < 0)
3291 panic("IP: failed to allocate ipv4_dst_ops counter\n");
3292
3293 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
3294 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
3295
3326 rt_hash_table = (struct rt_hash_bucket *) 3296 rt_hash_table = (struct rt_hash_bucket *)
3327 alloc_large_system_hash("IP route cache", 3297 alloc_large_system_hash("IP route cache",
3328 sizeof(struct rt_hash_bucket), 3298 sizeof(struct rt_hash_bucket),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f115ea68a4ef..1664a0590bb8 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2392,7 +2392,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2392 err = tp->af_specific->md5_parse(sk, optval, optlen); 2392 err = tp->af_specific->md5_parse(sk, optval, optlen);
2393 break; 2393 break;
2394#endif 2394#endif
2395 2395 case TCP_USER_TIMEOUT:
2396 /* Cap the max timeout in ms TCP will retry/retrans
2397 * before giving up and aborting (ETIMEDOUT) a connection.
2398 */
2399 icsk->icsk_user_timeout = msecs_to_jiffies(val);
2400 break;
2396 default: 2401 default:
2397 err = -ENOPROTOOPT; 2402 err = -ENOPROTOOPT;
2398 break; 2403 break;
@@ -2611,6 +2616,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
2611 case TCP_THIN_DUPACK: 2616 case TCP_THIN_DUPACK:
2612 val = tp->thin_dupack; 2617 val = tp->thin_dupack;
2613 break; 2618 break;
2619
2620 case TCP_USER_TIMEOUT:
2621 val = jiffies_to_msecs(icsk->icsk_user_timeout);
2622 break;
2614 default: 2623 default:
2615 return -ENOPROTOOPT; 2624 return -ENOPROTOOPT;
2616 } 2625 }
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index b55f60f6fcbe..ee0df4817498 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -182,7 +182,7 @@ static void tcp_incr_quickack(struct sock *sk)
182 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); 182 icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
183} 183}
184 184
185void tcp_enter_quickack_mode(struct sock *sk) 185static void tcp_enter_quickack_mode(struct sock *sk)
186{ 186{
187 struct inet_connection_sock *icsk = inet_csk(sk); 187 struct inet_connection_sock *icsk = inet_csk(sk);
188 tcp_incr_quickack(sk); 188 tcp_incr_quickack(sk);
@@ -805,25 +805,12 @@ void tcp_update_metrics(struct sock *sk)
805 } 805 }
806} 806}
807 807
808/* Numbers are taken from RFC3390.
809 *
810 * John Heffner states:
811 *
812 * The RFC specifies a window of no more than 4380 bytes
813 * unless 2*MSS > 4380. Reading the pseudocode in the RFC
814 * is a bit misleading because they use a clamp at 4380 bytes
815 * rather than use a multiplier in the relevant range.
816 */
817__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) 808__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst)
818{ 809{
819 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); 810 __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
820 811
821 if (!cwnd) { 812 if (!cwnd)
822 if (tp->mss_cache > 1460) 813 cwnd = rfc3390_bytes_to_packets(tp->mss_cache);
823 cwnd = 2;
824 else
825 cwnd = (tp->mss_cache > 1095) ? 3 : 4;
826 }
827 return min_t(__u32, cwnd, tp->snd_cwnd_clamp); 814 return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
828} 815}
829 816
@@ -2314,7 +2301,7 @@ static inline int tcp_dupack_heuristics(struct tcp_sock *tp)
2314 2301
2315static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) 2302static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
2316{ 2303{
2317 return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); 2304 return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto;
2318} 2305}
2319 2306
2320static inline int tcp_head_timedout(struct sock *sk) 2307static inline int tcp_head_timedout(struct sock *sk)
@@ -2508,7 +2495,7 @@ static void tcp_timeout_skbs(struct sock *sk)
2508/* Mark head of queue up as lost. With RFC3517 SACK, the packets is 2495/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
2509 * is against sacked "cnt", otherwise it's against facked "cnt" 2496 * is against sacked "cnt", otherwise it's against facked "cnt"
2510 */ 2497 */
2511static void tcp_mark_head_lost(struct sock *sk, int packets) 2498static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
2512{ 2499{
2513 struct tcp_sock *tp = tcp_sk(sk); 2500 struct tcp_sock *tp = tcp_sk(sk);
2514 struct sk_buff *skb; 2501 struct sk_buff *skb;
@@ -2516,13 +2503,13 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
2516 int err; 2503 int err;
2517 unsigned int mss; 2504 unsigned int mss;
2518 2505
2519 if (packets == 0)
2520 return;
2521
2522 WARN_ON(packets > tp->packets_out); 2506 WARN_ON(packets > tp->packets_out);
2523 if (tp->lost_skb_hint) { 2507 if (tp->lost_skb_hint) {
2524 skb = tp->lost_skb_hint; 2508 skb = tp->lost_skb_hint;
2525 cnt = tp->lost_cnt_hint; 2509 cnt = tp->lost_cnt_hint;
2510 /* Head already handled? */
2511 if (mark_head && skb != tcp_write_queue_head(sk))
2512 return;
2526 } else { 2513 } else {
2527 skb = tcp_write_queue_head(sk); 2514 skb = tcp_write_queue_head(sk);
2528 cnt = 0; 2515 cnt = 0;
@@ -2557,6 +2544,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets)
2557 } 2544 }
2558 2545
2559 tcp_skb_mark_lost(tp, skb); 2546 tcp_skb_mark_lost(tp, skb);
2547
2548 if (mark_head)
2549 break;
2560 } 2550 }
2561 tcp_verify_left_out(tp); 2551 tcp_verify_left_out(tp);
2562} 2552}
@@ -2568,17 +2558,18 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
2568 struct tcp_sock *tp = tcp_sk(sk); 2558 struct tcp_sock *tp = tcp_sk(sk);
2569 2559
2570 if (tcp_is_reno(tp)) { 2560 if (tcp_is_reno(tp)) {
2571 tcp_mark_head_lost(sk, 1); 2561 tcp_mark_head_lost(sk, 1, 1);
2572 } else if (tcp_is_fack(tp)) { 2562 } else if (tcp_is_fack(tp)) {
2573 int lost = tp->fackets_out - tp->reordering; 2563 int lost = tp->fackets_out - tp->reordering;
2574 if (lost <= 0) 2564 if (lost <= 0)
2575 lost = 1; 2565 lost = 1;
2576 tcp_mark_head_lost(sk, lost); 2566 tcp_mark_head_lost(sk, lost, 0);
2577 } else { 2567 } else {
2578 int sacked_upto = tp->sacked_out - tp->reordering; 2568 int sacked_upto = tp->sacked_out - tp->reordering;
2579 if (sacked_upto < fast_rexmit) 2569 if (sacked_upto >= 0)
2580 sacked_upto = fast_rexmit; 2570 tcp_mark_head_lost(sk, sacked_upto, 0);
2581 tcp_mark_head_lost(sk, sacked_upto); 2571 else if (fast_rexmit)
2572 tcp_mark_head_lost(sk, 1, 1);
2582 } 2573 }
2583 2574
2584 tcp_timeout_skbs(sk); 2575 tcp_timeout_skbs(sk);
@@ -2887,7 +2878,7 @@ static void tcp_mtup_probe_success(struct sock *sk)
2887 icsk->icsk_mtup.probe_size; 2878 icsk->icsk_mtup.probe_size;
2888 tp->snd_cwnd_cnt = 0; 2879 tp->snd_cwnd_cnt = 0;
2889 tp->snd_cwnd_stamp = tcp_time_stamp; 2880 tp->snd_cwnd_stamp = tcp_time_stamp;
2890 tp->rcv_ssthresh = tcp_current_ssthresh(sk); 2881 tp->snd_ssthresh = tcp_current_ssthresh(sk);
2891 2882
2892 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size; 2883 icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
2893 icsk->icsk_mtup.probe_size = 0; 2884 icsk->icsk_mtup.probe_size = 0;
@@ -2984,7 +2975,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
2984 before(tp->snd_una, tp->high_seq) && 2975 before(tp->snd_una, tp->high_seq) &&
2985 icsk->icsk_ca_state != TCP_CA_Open && 2976 icsk->icsk_ca_state != TCP_CA_Open &&
2986 tp->fackets_out > tp->reordering) { 2977 tp->fackets_out > tp->reordering) {
2987 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); 2978 tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering, 0);
2988 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS); 2979 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSS);
2989 } 2980 }
2990 2981
@@ -3412,8 +3403,8 @@ static void tcp_ack_probe(struct sock *sk)
3412 3403
3413static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) 3404static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag)
3414{ 3405{
3415 return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || 3406 return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) ||
3416 inet_csk(sk)->icsk_ca_state != TCP_CA_Open); 3407 inet_csk(sk)->icsk_ca_state != TCP_CA_Open;
3417} 3408}
3418 3409
3419static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) 3410static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag)
@@ -3430,9 +3421,9 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp,
3430 const u32 ack, const u32 ack_seq, 3421 const u32 ack, const u32 ack_seq,
3431 const u32 nwin) 3422 const u32 nwin)
3432{ 3423{
3433 return (after(ack, tp->snd_una) || 3424 return after(ack, tp->snd_una) ||
3434 after(ack_seq, tp->snd_wl1) || 3425 after(ack_seq, tp->snd_wl1) ||
3435 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd)); 3426 (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
3436} 3427}
3437 3428
3438/* Update our send window. 3429/* Update our send window.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 020766292bb0..8f8527d41682 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1422,7 +1422,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1422 1422
1423 newsk = tcp_create_openreq_child(sk, req, skb); 1423 newsk = tcp_create_openreq_child(sk, req, skb);
1424 if (!newsk) 1424 if (!newsk)
1425 goto exit; 1425 goto exit_nonewsk;
1426 1426
1427 newsk->sk_gso_type = SKB_GSO_TCPV4; 1427 newsk->sk_gso_type = SKB_GSO_TCPV4;
1428 sk_setup_caps(newsk, dst); 1428 sk_setup_caps(newsk, dst);
@@ -1469,16 +1469,20 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1469 } 1469 }
1470#endif 1470#endif
1471 1471
1472 if (__inet_inherit_port(sk, newsk) < 0) {
1473 sock_put(newsk);
1474 goto exit;
1475 }
1472 __inet_hash_nolisten(newsk, NULL); 1476 __inet_hash_nolisten(newsk, NULL);
1473 __inet_inherit_port(sk, newsk);
1474 1477
1475 return newsk; 1478 return newsk;
1476 1479
1477exit_overflow: 1480exit_overflow:
1478 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1481 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1482exit_nonewsk:
1483 dst_release(dst);
1479exit: 1484exit:
1480 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); 1485 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1481 dst_release(dst);
1482 return NULL; 1486 return NULL;
1483} 1487}
1484EXPORT_SYMBOL(tcp_v4_syn_recv_sock); 1488EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
@@ -2571,7 +2575,6 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
2571 2575
2572 return tcp_gro_receive(head, skb); 2576 return tcp_gro_receive(head, skb);
2573} 2577}
2574EXPORT_SYMBOL(tcp4_gro_receive);
2575 2578
2576int tcp4_gro_complete(struct sk_buff *skb) 2579int tcp4_gro_complete(struct sk_buff *skb)
2577{ 2580{
@@ -2584,7 +2587,6 @@ int tcp4_gro_complete(struct sk_buff *skb)
2584 2587
2585 return tcp_gro_complete(skb); 2588 return tcp_gro_complete(skb);
2586} 2589}
2587EXPORT_SYMBOL(tcp4_gro_complete);
2588 2590
2589struct proto tcp_prot = { 2591struct proto tcp_prot = {
2590 .name = "TCP", 2592 .name = "TCP",
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f25b56cb85cb..43cf901d7659 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -55,7 +55,7 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
55 return 1; 55 return 1;
56 if (after(end_seq, s_win) && before(seq, e_win)) 56 if (after(end_seq, s_win) && before(seq, e_win))
57 return 1; 57 return 1;
58 return (seq == e_win && seq == end_seq); 58 return seq == e_win && seq == end_seq;
59} 59}
60 60
61/* 61/*
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index de3bd8458588..05b1ecf36763 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -224,16 +224,10 @@ void tcp_select_initial_window(int __space, __u32 mss,
224 } 224 }
225 } 225 }
226 226
227 /* Set initial window to value enough for senders, 227 /* Set initial window to value enough for senders, following RFC5681. */
228 * following RFC2414. Senders, not following this RFC,
229 * will be satisfied with 2.
230 */
231 if (mss > (1 << *rcv_wscale)) { 228 if (mss > (1 << *rcv_wscale)) {
232 int init_cwnd = 4; 229 int init_cwnd = rfc3390_bytes_to_packets(mss);
233 if (mss > 1460 * 3) 230
234 init_cwnd = 2;
235 else if (mss > 1460)
236 init_cwnd = 3;
237 /* when initializing use the value from init_rcv_wnd 231 /* when initializing use the value from init_rcv_wnd
238 * rather than the default from above 232 * rather than the default from above
239 */ 233 */
@@ -1376,9 +1370,9 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp,
1376 const struct sk_buff *skb, 1370 const struct sk_buff *skb,
1377 unsigned mss_now, int nonagle) 1371 unsigned mss_now, int nonagle)
1378{ 1372{
1379 return (skb->len < mss_now && 1373 return skb->len < mss_now &&
1380 ((nonagle & TCP_NAGLE_CORK) || 1374 ((nonagle & TCP_NAGLE_CORK) ||
1381 (!nonagle && tp->packets_out && tcp_minshall_check(tp)))); 1375 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
1382} 1376}
1383 1377
1384/* Return non-zero if the Nagle test allows this packet to be 1378/* Return non-zero if the Nagle test allows this packet to be
@@ -1449,10 +1443,10 @@ int tcp_may_send_now(struct sock *sk)
1449 struct tcp_sock *tp = tcp_sk(sk); 1443 struct tcp_sock *tp = tcp_sk(sk);
1450 struct sk_buff *skb = tcp_send_head(sk); 1444 struct sk_buff *skb = tcp_send_head(sk);
1451 1445
1452 return (skb && 1446 return skb &&
1453 tcp_snd_test(sk, skb, tcp_current_mss(sk), 1447 tcp_snd_test(sk, skb, tcp_current_mss(sk),
1454 (tcp_skb_is_last(sk, skb) ? 1448 (tcp_skb_is_last(sk, skb) ?
1455 tp->nonagle : TCP_NAGLE_PUSH))); 1449 tp->nonagle : TCP_NAGLE_PUSH));
1456} 1450}
1457 1451
1458/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet 1452/* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
@@ -2429,6 +2423,12 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
2429 __u8 rcv_wscale; 2423 __u8 rcv_wscale;
2430 /* Set this up on the first call only */ 2424 /* Set this up on the first call only */
2431 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); 2425 req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
2426
2427 /* limit the window selection if the user enforce a smaller rx buffer */
2428 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2429 (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
2430 req->window_clamp = tcp_full_space(sk);
2431
2432 /* tcp_full_space because it is guaranteed to be the first packet */ 2432 /* tcp_full_space because it is guaranteed to be the first packet */
2433 tcp_select_initial_window(tcp_full_space(sk), 2433 tcp_select_initial_window(tcp_full_space(sk),
2434 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), 2434 mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
@@ -2555,6 +2555,11 @@ static void tcp_connect_init(struct sock *sk)
2555 2555
2556 tcp_initialize_rcv_mss(sk); 2556 tcp_initialize_rcv_mss(sk);
2557 2557
2558 /* limit the window selection if the user enforce a smaller rx buffer */
2559 if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
2560 (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
2561 tp->window_clamp = tcp_full_space(sk);
2562
2558 tcp_select_initial_window(tcp_full_space(sk), 2563 tcp_select_initial_window(tcp_full_space(sk),
2559 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), 2564 tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
2560 &tp->rcv_wnd, 2565 &tp->rcv_wnd,
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 74c54b30600f..74a6aa003657 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -140,10 +140,10 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
140 */ 140 */
141static bool retransmits_timed_out(struct sock *sk, 141static bool retransmits_timed_out(struct sock *sk,
142 unsigned int boundary, 142 unsigned int boundary,
143 unsigned int timeout,
143 bool syn_set) 144 bool syn_set)
144{ 145{
145 unsigned int timeout, linear_backoff_thresh; 146 unsigned int linear_backoff_thresh, start_ts;
146 unsigned int start_ts;
147 unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN; 147 unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN;
148 148
149 if (!inet_csk(sk)->icsk_retransmits) 149 if (!inet_csk(sk)->icsk_retransmits)
@@ -154,14 +154,15 @@ static bool retransmits_timed_out(struct sock *sk,
154 else 154 else
155 start_ts = tcp_sk(sk)->retrans_stamp; 155 start_ts = tcp_sk(sk)->retrans_stamp;
156 156
157 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); 157 if (likely(timeout == 0)) {
158 158 linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base);
159 if (boundary <= linear_backoff_thresh)
160 timeout = ((2 << boundary) - 1) * rto_base;
161 else
162 timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
163 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
164 159
160 if (boundary <= linear_backoff_thresh)
161 timeout = ((2 << boundary) - 1) * rto_base;
162 else
163 timeout = ((2 << linear_backoff_thresh) - 1) * rto_base +
164 (boundary - linear_backoff_thresh) * TCP_RTO_MAX;
165 }
165 return (tcp_time_stamp - start_ts) >= timeout; 166 return (tcp_time_stamp - start_ts) >= timeout;
166} 167}
167 168
@@ -178,7 +179,7 @@ static int tcp_write_timeout(struct sock *sk)
178 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; 179 retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
179 syn_set = 1; 180 syn_set = 1;
180 } else { 181 } else {
181 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) { 182 if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
182 /* Black hole detection */ 183 /* Black hole detection */
183 tcp_mtu_probing(icsk, sk); 184 tcp_mtu_probing(icsk, sk);
184 185
@@ -191,14 +192,15 @@ static int tcp_write_timeout(struct sock *sk)
191 192
192 retry_until = tcp_orphan_retries(sk, alive); 193 retry_until = tcp_orphan_retries(sk, alive);
193 do_reset = alive || 194 do_reset = alive ||
194 !retransmits_timed_out(sk, retry_until, 0); 195 !retransmits_timed_out(sk, retry_until, 0, 0);
195 196
196 if (tcp_out_of_resources(sk, do_reset)) 197 if (tcp_out_of_resources(sk, do_reset))
197 return 1; 198 return 1;
198 } 199 }
199 } 200 }
200 201
201 if (retransmits_timed_out(sk, retry_until, syn_set)) { 202 if (retransmits_timed_out(sk, retry_until,
203 syn_set ? 0 : icsk->icsk_user_timeout, syn_set)) {
202 /* Has it gone just too far? */ 204 /* Has it gone just too far? */
203 tcp_write_err(sk); 205 tcp_write_err(sk);
204 return 1; 206 return 1;
@@ -365,18 +367,19 @@ void tcp_retransmit_timer(struct sock *sk)
365 if (icsk->icsk_retransmits == 0) { 367 if (icsk->icsk_retransmits == 0) {
366 int mib_idx; 368 int mib_idx;
367 369
368 if (icsk->icsk_ca_state == TCP_CA_Disorder) { 370 if (icsk->icsk_ca_state == TCP_CA_Recovery) {
369 if (tcp_is_sack(tp))
370 mib_idx = LINUX_MIB_TCPSACKFAILURES;
371 else
372 mib_idx = LINUX_MIB_TCPRENOFAILURES;
373 } else if (icsk->icsk_ca_state == TCP_CA_Recovery) {
374 if (tcp_is_sack(tp)) 371 if (tcp_is_sack(tp))
375 mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; 372 mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL;
376 else 373 else
377 mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; 374 mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL;
378 } else if (icsk->icsk_ca_state == TCP_CA_Loss) { 375 } else if (icsk->icsk_ca_state == TCP_CA_Loss) {
379 mib_idx = LINUX_MIB_TCPLOSSFAILURES; 376 mib_idx = LINUX_MIB_TCPLOSSFAILURES;
377 } else if ((icsk->icsk_ca_state == TCP_CA_Disorder) ||
378 tp->sacked_out) {
379 if (tcp_is_sack(tp))
380 mib_idx = LINUX_MIB_TCPSACKFAILURES;
381 else
382 mib_idx = LINUX_MIB_TCPRENOFAILURES;
380 } else { 383 } else {
381 mib_idx = LINUX_MIB_TCPTIMEOUTS; 384 mib_idx = LINUX_MIB_TCPTIMEOUTS;
382 } 385 }
@@ -440,7 +443,7 @@ out_reset_timer:
440 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 443 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
441 } 444 }
442 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 445 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
443 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0)) 446 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0, 0))
444 __sk_dst_reset(sk); 447 __sk_dst_reset(sk);
445 448
446out:; 449out:;
@@ -560,7 +563,14 @@ static void tcp_keepalive_timer (unsigned long data)
560 elapsed = keepalive_time_elapsed(tp); 563 elapsed = keepalive_time_elapsed(tp);
561 564
562 if (elapsed >= keepalive_time_when(tp)) { 565 if (elapsed >= keepalive_time_when(tp)) {
563 if (icsk->icsk_probes_out >= keepalive_probes(tp)) { 566 /* If the TCP_USER_TIMEOUT option is enabled, use that
567 * to determine when to timeout instead.
568 */
569 if ((icsk->icsk_user_timeout != 0 &&
570 elapsed >= icsk->icsk_user_timeout &&
571 icsk->icsk_probes_out > 0) ||
572 (icsk->icsk_user_timeout == 0 &&
573 icsk->icsk_probes_out >= keepalive_probes(tp))) {
564 tcp_send_active_reset(sk, GFP_ATOMIC); 574 tcp_send_active_reset(sk, GFP_ATOMIC);
565 tcp_write_err(sk); 575 tcp_write_err(sk);
566 goto out; 576 goto out;
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index 20151d6a6241..a534dda5456e 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -80,7 +80,7 @@ static void tcp_westwood_init(struct sock *sk)
80 */ 80 */
81static inline u32 westwood_do_filter(u32 a, u32 b) 81static inline u32 westwood_do_filter(u32 a, u32 b)
82{ 82{
83 return (((7 * a) + b) >> 3); 83 return ((7 * a) + b) >> 3;
84} 84}
85 85
86static void westwood_filter(struct westwood *w, u32 delta) 86static void westwood_filter(struct westwood *w, u32 delta)
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index 59186ca7808a..9a17bd2a0a37 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -14,8 +14,8 @@
14#include <net/protocol.h> 14#include <net/protocol.h>
15#include <net/xfrm.h> 15#include <net/xfrm.h>
16 16
17static struct xfrm_tunnel *tunnel4_handlers; 17static struct xfrm_tunnel *tunnel4_handlers __read_mostly;
18static struct xfrm_tunnel *tunnel64_handlers; 18static struct xfrm_tunnel *tunnel64_handlers __read_mostly;
19static DEFINE_MUTEX(tunnel4_mutex); 19static DEFINE_MUTEX(tunnel4_mutex);
20 20
21static inline struct xfrm_tunnel **fam_handlers(unsigned short family) 21static inline struct xfrm_tunnel **fam_handlers(unsigned short family)
@@ -39,7 +39,7 @@ int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family)
39 } 39 }
40 40
41 handler->next = *pprev; 41 handler->next = *pprev;
42 *pprev = handler; 42 rcu_assign_pointer(*pprev, handler);
43 43
44 ret = 0; 44 ret = 0;
45 45
@@ -73,6 +73,11 @@ int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family)
73} 73}
74EXPORT_SYMBOL(xfrm4_tunnel_deregister); 74EXPORT_SYMBOL(xfrm4_tunnel_deregister);
75 75
76#define for_each_tunnel_rcu(head, handler) \
77 for (handler = rcu_dereference(head); \
78 handler != NULL; \
79 handler = rcu_dereference(handler->next)) \
80
76static int tunnel4_rcv(struct sk_buff *skb) 81static int tunnel4_rcv(struct sk_buff *skb)
77{ 82{
78 struct xfrm_tunnel *handler; 83 struct xfrm_tunnel *handler;
@@ -80,7 +85,7 @@ static int tunnel4_rcv(struct sk_buff *skb)
80 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 85 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
81 goto drop; 86 goto drop;
82 87
83 for (handler = tunnel4_handlers; handler; handler = handler->next) 88 for_each_tunnel_rcu(tunnel4_handlers, handler)
84 if (!handler->handler(skb)) 89 if (!handler->handler(skb))
85 return 0; 90 return 0;
86 91
@@ -99,7 +104,7 @@ static int tunnel64_rcv(struct sk_buff *skb)
99 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 104 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
100 goto drop; 105 goto drop;
101 106
102 for (handler = tunnel64_handlers; handler; handler = handler->next) 107 for_each_tunnel_rcu(tunnel64_handlers, handler)
103 if (!handler->handler(skb)) 108 if (!handler->handler(skb))
104 return 0; 109 return 0;
105 110
@@ -115,7 +120,7 @@ static void tunnel4_err(struct sk_buff *skb, u32 info)
115{ 120{
116 struct xfrm_tunnel *handler; 121 struct xfrm_tunnel *handler;
117 122
118 for (handler = tunnel4_handlers; handler; handler = handler->next) 123 for_each_tunnel_rcu(tunnel4_handlers, handler)
119 if (!handler->err_handler(skb, info)) 124 if (!handler->err_handler(skb, info))
120 break; 125 break;
121} 126}
@@ -125,7 +130,7 @@ static void tunnel64_err(struct sk_buff *skb, u32 info)
125{ 130{
126 struct xfrm_tunnel *handler; 131 struct xfrm_tunnel *handler;
127 132
128 for (handler = tunnel64_handlers; handler; handler = handler->next) 133 for_each_tunnel_rcu(tunnel64_handlers, handler)
129 if (!handler->err_handler(skb, info)) 134 if (!handler->err_handler(skb, info))
130 break; 135 break;
131} 136}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fb23c2e63b52..b3f7e8cf18ac 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -797,7 +797,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
797 return -EOPNOTSUPP; 797 return -EOPNOTSUPP;
798 798
799 ipc.opt = NULL; 799 ipc.opt = NULL;
800 ipc.shtx.flags = 0; 800 ipc.tx_flags = 0;
801 801
802 if (up->pending) { 802 if (up->pending) {
803 /* 803 /*
@@ -845,7 +845,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
845 ipc.addr = inet->inet_saddr; 845 ipc.addr = inet->inet_saddr;
846 846
847 ipc.oif = sk->sk_bound_dev_if; 847 ipc.oif = sk->sk_bound_dev_if;
848 err = sock_tx_timestamp(msg, sk, &ipc.shtx); 848 err = sock_tx_timestamp(sk, &ipc.tx_flags);
849 if (err) 849 if (err)
850 return err; 850 return err;
851 if (msg->msg_controllen) { 851 if (msg->msg_controllen) {
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index a580349f0b8a..4464f3bff6a7 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -174,7 +174,7 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops)
174 struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); 174 struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
175 175
176 xfrm4_policy_afinfo.garbage_collect(net); 176 xfrm4_policy_afinfo.garbage_collect(net);
177 return (atomic_read(&ops->entries) > ops->gc_thresh * 2); 177 return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);
178} 178}
179 179
180static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) 180static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -232,7 +232,6 @@ static struct dst_ops xfrm4_dst_ops = {
232 .ifdown = xfrm4_dst_ifdown, 232 .ifdown = xfrm4_dst_ifdown,
233 .local_out = __ip_local_out, 233 .local_out = __ip_local_out,
234 .gc_thresh = 1024, 234 .gc_thresh = 1024,
235 .entries = ATOMIC_INIT(0),
236}; 235};
237 236
238static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { 237static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
@@ -288,6 +287,7 @@ void __init xfrm4_init(int rt_max_size)
288 * and start cleaning when were 1/2 full 287 * and start cleaning when were 1/2 full
289 */ 288 */
290 xfrm4_dst_ops.gc_thresh = rt_max_size/2; 289 xfrm4_dst_ops.gc_thresh = rt_max_size/2;
290 dst_entries_init(&xfrm4_dst_ops);
291 291
292 xfrm4_state_init(); 292 xfrm4_state_init();
293 xfrm4_policy_init(); 293 xfrm4_policy_init();
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 41f5982d2087..82806455e859 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -58,14 +58,14 @@ static int xfrm_tunnel_err(struct sk_buff *skb, u32 info)
58 return -ENOENT; 58 return -ENOENT;
59} 59}
60 60
61static struct xfrm_tunnel xfrm_tunnel_handler = { 61static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = {
62 .handler = xfrm_tunnel_rcv, 62 .handler = xfrm_tunnel_rcv,
63 .err_handler = xfrm_tunnel_err, 63 .err_handler = xfrm_tunnel_err,
64 .priority = 2, 64 .priority = 2,
65}; 65};
66 66
67#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) 67#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
68static struct xfrm_tunnel xfrm64_tunnel_handler = { 68static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = {
69 .handler = xfrm_tunnel_rcv, 69 .handler = xfrm_tunnel_rcv,
70 .err_handler = xfrm_tunnel_err, 70 .err_handler = xfrm_tunnel_err,
71 .priority = 2, 71 .priority = 2,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 324fac3b6c16..ec7a91d9e865 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -243,7 +243,7 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev)
243/* Check if a route is valid prefix route */ 243/* Check if a route is valid prefix route */
244static inline int addrconf_is_prefix_route(const struct rt6_info *rt) 244static inline int addrconf_is_prefix_route(const struct rt6_info *rt)
245{ 245{
246 return ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0); 246 return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0;
247} 247}
248 248
249static void addrconf_del_timer(struct inet6_ifaddr *ifp) 249static void addrconf_del_timer(struct inet6_ifaddr *ifp)
@@ -1544,7 +1544,7 @@ static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev)
1544 return 0; 1544 return 0;
1545} 1545}
1546 1546
1547int __ipv6_isatap_ifid(u8 *eui, __be32 addr) 1547static int __ipv6_isatap_ifid(u8 *eui, __be32 addr)
1548{ 1548{
1549 if (addr == 0) 1549 if (addr == 0)
1550 return -1; 1550 return -1;
@@ -1560,7 +1560,6 @@ int __ipv6_isatap_ifid(u8 *eui, __be32 addr)
1560 memcpy(eui + 4, &addr, 4); 1560 memcpy(eui + 4, &addr, 4);
1561 return 0; 1561 return 0;
1562} 1562}
1563EXPORT_SYMBOL(__ipv6_isatap_ifid);
1564 1563
1565static int addrconf_ifid_sit(u8 *eui, struct net_device *dev) 1564static int addrconf_ifid_sit(u8 *eui, struct net_device *dev)
1566{ 1565{
@@ -2964,7 +2963,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
2964 start sending router solicitations. 2963 start sending router solicitations.
2965 */ 2964 */
2966 2965
2967 if (ifp->idev->cnf.forwarding == 0 && 2966 if ((ifp->idev->cnf.forwarding == 0 ||
2967 ifp->idev->cnf.forwarding == 2) &&
2968 ifp->idev->cnf.rtr_solicits > 0 && 2968 ifp->idev->cnf.rtr_solicits > 0 &&
2969 (dev->flags&IFF_LOOPBACK) == 0 && 2969 (dev->flags&IFF_LOOPBACK) == 0 &&
2970 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) { 2970 (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 8175f802651b..c8993e5a337c 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -518,10 +518,9 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
518 518
519static inline int ip6addrlbl_msgsize(void) 519static inline int ip6addrlbl_msgsize(void)
520{ 520{
521 return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) 521 return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
522 + nla_total_size(16) /* IFAL_ADDRESS */ 522 + nla_total_size(16) /* IFAL_ADDRESS */
523 + nla_total_size(4) /* IFAL_LABEL */ 523 + nla_total_size(4); /* IFAL_LABEL */
524 );
525} 524}
526 525
527static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, 526static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 56b9bf2516f4..54e8e42f7a88 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -343,7 +343,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
343 */ 343 */
344 v4addr = LOOPBACK4_IPV6; 344 v4addr = LOOPBACK4_IPV6;
345 if (!(addr_type & IPV6_ADDR_MULTICAST)) { 345 if (!(addr_type & IPV6_ADDR_MULTICAST)) {
346 if (!ipv6_chk_addr(net, &addr->sin6_addr, 346 if (!inet->transparent &&
347 !ipv6_chk_addr(net, &addr->sin6_addr,
347 dev, 0)) { 348 dev, 0)) {
348 err = -EADDRNOTAVAIL; 349 err = -EADDRNOTAVAIL;
349 goto out_unlock; 350 goto out_unlock;
@@ -467,7 +468,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
467 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) 468 if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
468 sin->sin6_scope_id = sk->sk_bound_dev_if; 469 sin->sin6_scope_id = sk->sk_bound_dev_if;
469 *uaddr_len = sizeof(*sin); 470 *uaddr_len = sizeof(*sin);
470 return(0); 471 return 0;
471} 472}
472 473
473EXPORT_SYMBOL(inet6_getname); 474EXPORT_SYMBOL(inet6_getname);
@@ -488,7 +489,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
488 case SIOCADDRT: 489 case SIOCADDRT:
489 case SIOCDELRT: 490 case SIOCDELRT:
490 491
491 return(ipv6_route_ioctl(net, cmd, (void __user *)arg)); 492 return ipv6_route_ioctl(net, cmd, (void __user *)arg);
492 493
493 case SIOCSIFADDR: 494 case SIOCSIFADDR:
494 return addrconf_add_ifaddr(net, (void __user *) arg); 495 return addrconf_add_ifaddr(net, (void __user *) arg);
@@ -502,7 +503,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
502 return sk->sk_prot->ioctl(sk, cmd, arg); 503 return sk->sk_prot->ioctl(sk, cmd, arg);
503 } 504 }
504 /*NOTREACHED*/ 505 /*NOTREACHED*/
505 return(0); 506 return 0;
506} 507}
507 508
508EXPORT_SYMBOL(inet6_ioctl); 509EXPORT_SYMBOL(inet6_ioctl);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index ef371aa01ac5..320bdb877eed 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -577,6 +577,25 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
577 u8 *ptr = nh + opt->dst1; 577 u8 *ptr = nh + opt->dst1;
578 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr); 578 put_cmsg(msg, SOL_IPV6, IPV6_2292DSTOPTS, (ptr[1]+1)<<3, ptr);
579 } 579 }
580 if (np->rxopt.bits.rxorigdstaddr) {
581 struct sockaddr_in6 sin6;
582 u16 *ports = (u16 *) skb_transport_header(skb);
583
584 if (skb_transport_offset(skb) + 4 <= skb->len) {
585 /* All current transport protocols have the port numbers in the
586 * first four bytes of the transport header and this function is
587 * written with this assumption in mind.
588 */
589
590 sin6.sin6_family = AF_INET6;
591 ipv6_addr_copy(&sin6.sin6_addr, &ipv6_hdr(skb)->daddr);
592 sin6.sin6_port = ports[1];
593 sin6.sin6_flowinfo = 0;
594 sin6.sin6_scope_id = 0;
595
596 put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);
597 }
598 }
580 return 0; 599 return 0;
581} 600}
582 601
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index e1caa5d526c2..14ed0a955b56 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -13,12 +13,12 @@ int ipv6_ext_hdr(u8 nexthdr)
13 /* 13 /*
14 * find out if nexthdr is an extension header or a protocol 14 * find out if nexthdr is an extension header or a protocol
15 */ 15 */
16 return ( (nexthdr == NEXTHDR_HOP) || 16 return (nexthdr == NEXTHDR_HOP) ||
17 (nexthdr == NEXTHDR_ROUTING) || 17 (nexthdr == NEXTHDR_ROUTING) ||
18 (nexthdr == NEXTHDR_FRAGMENT) || 18 (nexthdr == NEXTHDR_FRAGMENT) ||
19 (nexthdr == NEXTHDR_AUTH) || 19 (nexthdr == NEXTHDR_AUTH) ||
20 (nexthdr == NEXTHDR_NONE) || 20 (nexthdr == NEXTHDR_NONE) ||
21 (nexthdr == NEXTHDR_DEST) ); 21 (nexthdr == NEXTHDR_DEST);
22} 22}
23 23
24/* 24/*
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b1108ede18e1..d829874d8946 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -34,11 +34,10 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi *fl,
34{ 34{
35 struct fib_lookup_arg arg = { 35 struct fib_lookup_arg arg = {
36 .lookup_ptr = lookup, 36 .lookup_ptr = lookup,
37 .flags = FIB_LOOKUP_NOREF,
37 }; 38 };
38 39
39 fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg); 40 fib_rules_lookup(net->ipv6.fib6_rules_ops, fl, flags, &arg);
40 if (arg.rule)
41 fib_rule_put(arg.rule);
42 41
43 if (arg.result) 42 if (arg.result)
44 return arg.result; 43 return arg.result;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index b6a585909d35..de382114609b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1500,15 +1500,18 @@ static void fib6_gc_timer_cb(unsigned long arg)
1500 1500
1501static int __net_init fib6_net_init(struct net *net) 1501static int __net_init fib6_net_init(struct net *net)
1502{ 1502{
1503 size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;
1504
1503 setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net); 1505 setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net);
1504 1506
1505 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL); 1507 net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
1506 if (!net->ipv6.rt6_stats) 1508 if (!net->ipv6.rt6_stats)
1507 goto out_timer; 1509 goto out_timer;
1508 1510
1509 net->ipv6.fib_table_hash = kcalloc(FIB6_TABLE_HASHSZ, 1511 /* Avoid false sharing : Use at least a full cache line */
1510 sizeof(*net->ipv6.fib_table_hash), 1512 size = max_t(size_t, size, L1_CACHE_BYTES);
1511 GFP_KERNEL); 1513
1514 net->ipv6.fib_table_hash = kzalloc(size, GFP_KERNEL);
1512 if (!net->ipv6.fib_table_hash) 1515 if (!net->ipv6.fib_table_hash)
1513 goto out_rt6_stats; 1516 goto out_rt6_stats;
1514 1517
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 980912ed7a38..99157b4cd56e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -637,7 +637,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
637 } 637 }
638 mtu -= hlen + sizeof(struct frag_hdr); 638 mtu -= hlen + sizeof(struct frag_hdr);
639 639
640 if (skb_has_frags(skb)) { 640 if (skb_has_frag_list(skb)) {
641 int first_len = skb_pagelen(skb); 641 int first_len = skb_pagelen(skb);
642 struct sk_buff *frag2; 642 struct sk_buff *frag2;
643 643
@@ -878,8 +878,8 @@ static inline int ip6_rt_check(struct rt6key *rt_key,
878 struct in6_addr *fl_addr, 878 struct in6_addr *fl_addr,
879 struct in6_addr *addr_cache) 879 struct in6_addr *addr_cache)
880{ 880{
881 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && 881 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
882 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); 882 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
883} 883}
884 884
885static struct dst_entry *ip6_sk_dst_check(struct sock *sk, 885static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 0fd027f3f47e..c2c0f89397b1 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -75,7 +75,7 @@ MODULE_LICENSE("GPL");
75 (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \ 75 (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
76 (HASH_SIZE - 1)) 76 (HASH_SIZE - 1))
77 77
78static void ip6_tnl_dev_init(struct net_device *dev); 78static int ip6_tnl_dev_init(struct net_device *dev);
79static void ip6_tnl_dev_setup(struct net_device *dev); 79static void ip6_tnl_dev_setup(struct net_device *dev);
80 80
81static int ip6_tnl_net_id __read_mostly; 81static int ip6_tnl_net_id __read_mostly;
@@ -83,15 +83,42 @@ struct ip6_tnl_net {
83 /* the IPv6 tunnel fallback device */ 83 /* the IPv6 tunnel fallback device */
84 struct net_device *fb_tnl_dev; 84 struct net_device *fb_tnl_dev;
85 /* lists for storing tunnels in use */ 85 /* lists for storing tunnels in use */
86 struct ip6_tnl *tnls_r_l[HASH_SIZE]; 86 struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
87 struct ip6_tnl *tnls_wc[1]; 87 struct ip6_tnl __rcu *tnls_wc[1];
88 struct ip6_tnl **tnls[2]; 88 struct ip6_tnl __rcu **tnls[2];
89}; 89};
90 90
91/* often modified stats are per cpu, other are shared (netdev->stats) */
92struct pcpu_tstats {
93 unsigned long rx_packets;
94 unsigned long rx_bytes;
95 unsigned long tx_packets;
96 unsigned long tx_bytes;
97};
98
99static struct net_device_stats *ip6_get_stats(struct net_device *dev)
100{
101 struct pcpu_tstats sum = { 0 };
102 int i;
103
104 for_each_possible_cpu(i) {
105 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
106
107 sum.rx_packets += tstats->rx_packets;
108 sum.rx_bytes += tstats->rx_bytes;
109 sum.tx_packets += tstats->tx_packets;
110 sum.tx_bytes += tstats->tx_bytes;
111 }
112 dev->stats.rx_packets = sum.rx_packets;
113 dev->stats.rx_bytes = sum.rx_bytes;
114 dev->stats.tx_packets = sum.tx_packets;
115 dev->stats.tx_bytes = sum.tx_bytes;
116 return &dev->stats;
117}
118
91/* 119/*
92 * Locking : hash tables are protected by RCU and a spinlock 120 * Locking : hash tables are protected by RCU and RTNL
93 */ 121 */
94static DEFINE_SPINLOCK(ip6_tnl_lock);
95 122
96static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) 123static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
97{ 124{
@@ -138,8 +165,8 @@ static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
138static struct ip6_tnl * 165static struct ip6_tnl *
139ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local) 166ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
140{ 167{
141 unsigned h0 = HASH(remote); 168 unsigned int h0 = HASH(remote);
142 unsigned h1 = HASH(local); 169 unsigned int h1 = HASH(local);
143 struct ip6_tnl *t; 170 struct ip6_tnl *t;
144 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 171 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
145 172
@@ -167,7 +194,7 @@ ip6_tnl_lookup(struct net *net, struct in6_addr *remote, struct in6_addr *local)
167 * Return: head of IPv6 tunnel list 194 * Return: head of IPv6 tunnel list
168 **/ 195 **/
169 196
170static struct ip6_tnl ** 197static struct ip6_tnl __rcu **
171ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p) 198ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
172{ 199{
173 struct in6_addr *remote = &p->raddr; 200 struct in6_addr *remote = &p->raddr;
@@ -190,12 +217,10 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, struct ip6_tnl_parm *p)
190static void 217static void
191ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 218ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
192{ 219{
193 struct ip6_tnl **tp = ip6_tnl_bucket(ip6n, &t->parms); 220 struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms);
194 221
195 spin_lock_bh(&ip6_tnl_lock); 222 rcu_assign_pointer(t->next , rtnl_dereference(*tp));
196 t->next = *tp;
197 rcu_assign_pointer(*tp, t); 223 rcu_assign_pointer(*tp, t);
198 spin_unlock_bh(&ip6_tnl_lock);
199} 224}
200 225
201/** 226/**
@@ -206,18 +231,25 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
206static void 231static void
207ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) 232ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
208{ 233{
209 struct ip6_tnl **tp; 234 struct ip6_tnl __rcu **tp;
210 235 struct ip6_tnl *iter;
211 for (tp = ip6_tnl_bucket(ip6n, &t->parms); *tp; tp = &(*tp)->next) { 236
212 if (t == *tp) { 237 for (tp = ip6_tnl_bucket(ip6n, &t->parms);
213 spin_lock_bh(&ip6_tnl_lock); 238 (iter = rtnl_dereference(*tp)) != NULL;
214 *tp = t->next; 239 tp = &iter->next) {
215 spin_unlock_bh(&ip6_tnl_lock); 240 if (t == iter) {
241 rcu_assign_pointer(*tp, t->next);
216 break; 242 break;
217 } 243 }
218 } 244 }
219} 245}
220 246
247static void ip6_dev_free(struct net_device *dev)
248{
249 free_percpu(dev->tstats);
250 free_netdev(dev);
251}
252
221/** 253/**
222 * ip6_tnl_create() - create a new tunnel 254 * ip6_tnl_create() - create a new tunnel
223 * @p: tunnel parameters 255 * @p: tunnel parameters
@@ -256,7 +288,9 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
256 288
257 t = netdev_priv(dev); 289 t = netdev_priv(dev);
258 t->parms = *p; 290 t->parms = *p;
259 ip6_tnl_dev_init(dev); 291 err = ip6_tnl_dev_init(dev);
292 if (err < 0)
293 goto failed_free;
260 294
261 if ((err = register_netdevice(dev)) < 0) 295 if ((err = register_netdevice(dev)) < 0)
262 goto failed_free; 296 goto failed_free;
@@ -266,7 +300,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p)
266 return t; 300 return t;
267 301
268failed_free: 302failed_free:
269 free_netdev(dev); 303 ip6_dev_free(dev);
270failed: 304failed:
271 return NULL; 305 return NULL;
272} 306}
@@ -290,10 +324,13 @@ static struct ip6_tnl *ip6_tnl_locate(struct net *net,
290{ 324{
291 struct in6_addr *remote = &p->raddr; 325 struct in6_addr *remote = &p->raddr;
292 struct in6_addr *local = &p->laddr; 326 struct in6_addr *local = &p->laddr;
327 struct ip6_tnl __rcu **tp;
293 struct ip6_tnl *t; 328 struct ip6_tnl *t;
294 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 329 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
295 330
296 for (t = *ip6_tnl_bucket(ip6n, p); t; t = t->next) { 331 for (tp = ip6_tnl_bucket(ip6n, p);
332 (t = rtnl_dereference(*tp)) != NULL;
333 tp = &t->next) {
297 if (ipv6_addr_equal(local, &t->parms.laddr) && 334 if (ipv6_addr_equal(local, &t->parms.laddr) &&
298 ipv6_addr_equal(remote, &t->parms.raddr)) 335 ipv6_addr_equal(remote, &t->parms.raddr))
299 return t; 336 return t;
@@ -318,13 +355,10 @@ ip6_tnl_dev_uninit(struct net_device *dev)
318 struct net *net = dev_net(dev); 355 struct net *net = dev_net(dev);
319 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 356 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
320 357
321 if (dev == ip6n->fb_tnl_dev) { 358 if (dev == ip6n->fb_tnl_dev)
322 spin_lock_bh(&ip6_tnl_lock); 359 rcu_assign_pointer(ip6n->tnls_wc[0], NULL);
323 ip6n->tnls_wc[0] = NULL; 360 else
324 spin_unlock_bh(&ip6_tnl_lock);
325 } else {
326 ip6_tnl_unlink(ip6n, t); 361 ip6_tnl_unlink(ip6n, t);
327 }
328 ip6_tnl_dst_reset(t); 362 ip6_tnl_dst_reset(t);
329 dev_put(dev); 363 dev_put(dev);
330} 364}
@@ -702,6 +736,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
702 736
703 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, 737 if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,
704 &ipv6h->daddr)) != NULL) { 738 &ipv6h->daddr)) != NULL) {
739 struct pcpu_tstats *tstats;
740
705 if (t->parms.proto != ipproto && t->parms.proto != 0) { 741 if (t->parms.proto != ipproto && t->parms.proto != 0) {
706 rcu_read_unlock(); 742 rcu_read_unlock();
707 goto discard; 743 goto discard;
@@ -724,10 +760,16 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
724 skb->pkt_type = PACKET_HOST; 760 skb->pkt_type = PACKET_HOST;
725 memset(skb->cb, 0, sizeof(struct inet6_skb_parm)); 761 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
726 762
727 skb_tunnel_rx(skb, t->dev); 763 tstats = this_cpu_ptr(t->dev->tstats);
764 tstats->rx_packets++;
765 tstats->rx_bytes += skb->len;
766
767 __skb_tunnel_rx(skb, t->dev);
728 768
729 dscp_ecn_decapsulate(t, ipv6h, skb); 769 dscp_ecn_decapsulate(t, ipv6h, skb);
770
730 netif_rx(skb); 771 netif_rx(skb);
772
731 rcu_read_unlock(); 773 rcu_read_unlock();
732 return 0; 774 return 0;
733 } 775 }
@@ -934,8 +976,10 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
934 err = ip6_local_out(skb); 976 err = ip6_local_out(skb);
935 977
936 if (net_xmit_eval(err) == 0) { 978 if (net_xmit_eval(err) == 0) {
937 stats->tx_bytes += pkt_len; 979 struct pcpu_tstats *tstats = this_cpu_ptr(t->dev->tstats);
938 stats->tx_packets++; 980
981 tstats->tx_bytes += pkt_len;
982 tstats->tx_packets++;
939 } else { 983 } else {
940 stats->tx_errors++; 984 stats->tx_errors++;
941 stats->tx_aborted_errors++; 985 stats->tx_aborted_errors++;
@@ -1300,12 +1344,14 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
1300 1344
1301 1345
1302static const struct net_device_ops ip6_tnl_netdev_ops = { 1346static const struct net_device_ops ip6_tnl_netdev_ops = {
1303 .ndo_uninit = ip6_tnl_dev_uninit, 1347 .ndo_uninit = ip6_tnl_dev_uninit,
1304 .ndo_start_xmit = ip6_tnl_xmit, 1348 .ndo_start_xmit = ip6_tnl_xmit,
1305 .ndo_do_ioctl = ip6_tnl_ioctl, 1349 .ndo_do_ioctl = ip6_tnl_ioctl,
1306 .ndo_change_mtu = ip6_tnl_change_mtu, 1350 .ndo_change_mtu = ip6_tnl_change_mtu,
1351 .ndo_get_stats = ip6_get_stats,
1307}; 1352};
1308 1353
1354
1309/** 1355/**
1310 * ip6_tnl_dev_setup - setup virtual tunnel device 1356 * ip6_tnl_dev_setup - setup virtual tunnel device
1311 * @dev: virtual device associated with tunnel 1357 * @dev: virtual device associated with tunnel
@@ -1317,7 +1363,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
1317static void ip6_tnl_dev_setup(struct net_device *dev) 1363static void ip6_tnl_dev_setup(struct net_device *dev)
1318{ 1364{
1319 dev->netdev_ops = &ip6_tnl_netdev_ops; 1365 dev->netdev_ops = &ip6_tnl_netdev_ops;
1320 dev->destructor = free_netdev; 1366 dev->destructor = ip6_dev_free;
1321 1367
1322 dev->type = ARPHRD_TUNNEL6; 1368 dev->type = ARPHRD_TUNNEL6;
1323 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr); 1369 dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
@@ -1333,12 +1379,17 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
1333 * @dev: virtual device associated with tunnel 1379 * @dev: virtual device associated with tunnel
1334 **/ 1380 **/
1335 1381
1336static inline void 1382static inline int
1337ip6_tnl_dev_init_gen(struct net_device *dev) 1383ip6_tnl_dev_init_gen(struct net_device *dev)
1338{ 1384{
1339 struct ip6_tnl *t = netdev_priv(dev); 1385 struct ip6_tnl *t = netdev_priv(dev);
1386
1340 t->dev = dev; 1387 t->dev = dev;
1341 strcpy(t->parms.name, dev->name); 1388 strcpy(t->parms.name, dev->name);
1389 dev->tstats = alloc_percpu(struct pcpu_tstats);
1390 if (!dev->tstats)
1391 return -ENOMEM;
1392 return 0;
1342} 1393}
1343 1394
1344/** 1395/**
@@ -1346,11 +1397,15 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
1346 * @dev: virtual device associated with tunnel 1397 * @dev: virtual device associated with tunnel
1347 **/ 1398 **/
1348 1399
1349static void ip6_tnl_dev_init(struct net_device *dev) 1400static int ip6_tnl_dev_init(struct net_device *dev)
1350{ 1401{
1351 struct ip6_tnl *t = netdev_priv(dev); 1402 struct ip6_tnl *t = netdev_priv(dev);
1352 ip6_tnl_dev_init_gen(dev); 1403 int err = ip6_tnl_dev_init_gen(dev);
1404
1405 if (err)
1406 return err;
1353 ip6_tnl_link_config(t); 1407 ip6_tnl_link_config(t);
1408 return 0;
1354} 1409}
1355 1410
1356/** 1411/**
@@ -1360,25 +1415,29 @@ static void ip6_tnl_dev_init(struct net_device *dev)
1360 * Return: 0 1415 * Return: 0
1361 **/ 1416 **/
1362 1417
1363static void __net_init ip6_fb_tnl_dev_init(struct net_device *dev) 1418static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev)
1364{ 1419{
1365 struct ip6_tnl *t = netdev_priv(dev); 1420 struct ip6_tnl *t = netdev_priv(dev);
1366 struct net *net = dev_net(dev); 1421 struct net *net = dev_net(dev);
1367 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); 1422 struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
1423 int err = ip6_tnl_dev_init_gen(dev);
1424
1425 if (err)
1426 return err;
1368 1427
1369 ip6_tnl_dev_init_gen(dev);
1370 t->parms.proto = IPPROTO_IPV6; 1428 t->parms.proto = IPPROTO_IPV6;
1371 dev_hold(dev); 1429 dev_hold(dev);
1372 ip6n->tnls_wc[0] = t; 1430 rcu_assign_pointer(ip6n->tnls_wc[0], t);
1431 return 0;
1373} 1432}
1374 1433
1375static struct xfrm6_tunnel ip4ip6_handler = { 1434static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
1376 .handler = ip4ip6_rcv, 1435 .handler = ip4ip6_rcv,
1377 .err_handler = ip4ip6_err, 1436 .err_handler = ip4ip6_err,
1378 .priority = 1, 1437 .priority = 1,
1379}; 1438};
1380 1439
1381static struct xfrm6_tunnel ip6ip6_handler = { 1440static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
1382 .handler = ip6ip6_rcv, 1441 .handler = ip6ip6_rcv,
1383 .err_handler = ip6ip6_err, 1442 .err_handler = ip6ip6_err,
1384 .priority = 1, 1443 .priority = 1,
@@ -1391,14 +1450,14 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n)
1391 LIST_HEAD(list); 1450 LIST_HEAD(list);
1392 1451
1393 for (h = 0; h < HASH_SIZE; h++) { 1452 for (h = 0; h < HASH_SIZE; h++) {
1394 t = ip6n->tnls_r_l[h]; 1453 t = rtnl_dereference(ip6n->tnls_r_l[h]);
1395 while (t != NULL) { 1454 while (t != NULL) {
1396 unregister_netdevice_queue(t->dev, &list); 1455 unregister_netdevice_queue(t->dev, &list);
1397 t = t->next; 1456 t = rtnl_dereference(t->next);
1398 } 1457 }
1399 } 1458 }
1400 1459
1401 t = ip6n->tnls_wc[0]; 1460 t = rtnl_dereference(ip6n->tnls_wc[0]);
1402 unregister_netdevice_queue(t->dev, &list); 1461 unregister_netdevice_queue(t->dev, &list);
1403 unregister_netdevice_many(&list); 1462 unregister_netdevice_many(&list);
1404} 1463}
@@ -1419,7 +1478,9 @@ static int __net_init ip6_tnl_init_net(struct net *net)
1419 goto err_alloc_dev; 1478 goto err_alloc_dev;
1420 dev_net_set(ip6n->fb_tnl_dev, net); 1479 dev_net_set(ip6n->fb_tnl_dev, net);
1421 1480
1422 ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); 1481 err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
1482 if (err < 0)
1483 goto err_register;
1423 1484
1424 err = register_netdev(ip6n->fb_tnl_dev); 1485 err = register_netdev(ip6n->fb_tnl_dev);
1425 if (err < 0) 1486 if (err < 0)
@@ -1427,7 +1488,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
1427 return 0; 1488 return 0;
1428 1489
1429err_register: 1490err_register:
1430 free_netdev(ip6n->fb_tnl_dev); 1491 ip6_dev_free(ip6n->fb_tnl_dev);
1431err_alloc_dev: 1492err_alloc_dev:
1432 return err; 1493 return err;
1433} 1494}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 66078dad7fe8..6f32ffce7022 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -667,6 +667,7 @@ static int pim6_rcv(struct sk_buff *skb)
667 skb_tunnel_rx(skb, reg_dev); 667 skb_tunnel_rx(skb, reg_dev);
668 668
669 netif_rx(skb); 669 netif_rx(skb);
670
670 dev_put(reg_dev); 671 dev_put(reg_dev);
671 return 0; 672 return 0;
672 drop: 673 drop:
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a7f66bc8f0b0..0553867a317f 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -342,6 +342,21 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
342 retv = 0; 342 retv = 0;
343 break; 343 break;
344 344
345 case IPV6_TRANSPARENT:
346 if (optlen < sizeof(int))
347 goto e_inval;
348 /* we don't have a separate transparent bit for IPV6 we use the one in the IPv4 socket */
349 inet_sk(sk)->transparent = valbool;
350 retv = 0;
351 break;
352
353 case IPV6_RECVORIGDSTADDR:
354 if (optlen < sizeof(int))
355 goto e_inval;
356 np->rxopt.bits.rxorigdstaddr = valbool;
357 retv = 0;
358 break;
359
345 case IPV6_HOPOPTS: 360 case IPV6_HOPOPTS:
346 case IPV6_RTHDRDSTOPTS: 361 case IPV6_RTHDRDSTOPTS:
347 case IPV6_RTHDR: 362 case IPV6_RTHDR:
@@ -1104,6 +1119,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
1104 break; 1119 break;
1105 } 1120 }
1106 1121
1122 case IPV6_TRANSPARENT:
1123 val = inet_sk(sk)->transparent;
1124 break;
1125
1126 case IPV6_RECVORIGDSTADDR:
1127 val = np->rxopt.bits.rxorigdstaddr;
1128 break;
1129
1107 case IPV6_UNICAST_HOPS: 1130 case IPV6_UNICAST_HOPS:
1108 case IPV6_MULTICAST_HOPS: 1131 case IPV6_MULTICAST_HOPS:
1109 { 1132 {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 58841c4ae947..998d6d27e7cf 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -91,7 +91,9 @@
91#include <linux/netfilter.h> 91#include <linux/netfilter.h>
92#include <linux/netfilter_ipv6.h> 92#include <linux/netfilter_ipv6.h>
93 93
94static u32 ndisc_hash(const void *pkey, const struct net_device *dev); 94static u32 ndisc_hash(const void *pkey,
95 const struct net_device *dev,
96 __u32 rnd);
95static int ndisc_constructor(struct neighbour *neigh); 97static int ndisc_constructor(struct neighbour *neigh);
96static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); 98static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
97static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); 99static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
@@ -228,12 +230,12 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
228 do { 230 do {
229 cur = ((void *)cur) + (cur->nd_opt_len << 3); 231 cur = ((void *)cur) + (cur->nd_opt_len << 3);
230 } while(cur < end && cur->nd_opt_type != type); 232 } while(cur < end && cur->nd_opt_type != type);
231 return (cur <= end && cur->nd_opt_type == type ? cur : NULL); 233 return cur <= end && cur->nd_opt_type == type ? cur : NULL;
232} 234}
233 235
234static inline int ndisc_is_useropt(struct nd_opt_hdr *opt) 236static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
235{ 237{
236 return (opt->nd_opt_type == ND_OPT_RDNSS); 238 return opt->nd_opt_type == ND_OPT_RDNSS;
237} 239}
238 240
239static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, 241static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
@@ -244,7 +246,7 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
244 do { 246 do {
245 cur = ((void *)cur) + (cur->nd_opt_len << 3); 247 cur = ((void *)cur) + (cur->nd_opt_len << 3);
246 } while(cur < end && !ndisc_is_useropt(cur)); 248 } while(cur < end && !ndisc_is_useropt(cur));
247 return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL); 249 return cur <= end && ndisc_is_useropt(cur) ? cur : NULL;
248} 250}
249 251
250static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, 252static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
@@ -319,7 +321,7 @@ static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
319 int prepad = ndisc_addr_option_pad(dev->type); 321 int prepad = ndisc_addr_option_pad(dev->type);
320 if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad)) 322 if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
321 return NULL; 323 return NULL;
322 return (lladdr + prepad); 324 return lladdr + prepad;
323} 325}
324 326
325int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir) 327int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
@@ -350,7 +352,9 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d
350 352
351EXPORT_SYMBOL(ndisc_mc_map); 353EXPORT_SYMBOL(ndisc_mc_map);
352 354
353static u32 ndisc_hash(const void *pkey, const struct net_device *dev) 355static u32 ndisc_hash(const void *pkey,
356 const struct net_device *dev,
357 __u32 hash_rnd)
354{ 358{
355 const u32 *p32 = pkey; 359 const u32 *p32 = pkey;
356 u32 addr_hash, i; 360 u32 addr_hash, i;
@@ -359,7 +363,7 @@ static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
359 for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++) 363 for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
360 addr_hash ^= *p32++; 364 addr_hash ^= *p32++;
361 365
362 return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd); 366 return jhash_2words(addr_hash, dev->ifindex, hash_rnd);
363} 367}
364 368
365static int ndisc_constructor(struct neighbour *neigh) 369static int ndisc_constructor(struct neighbour *neigh)
@@ -1105,6 +1109,18 @@ errout:
1105 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err); 1109 rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1106} 1110}
1107 1111
1112static inline int accept_ra(struct inet6_dev *in6_dev)
1113{
1114 /*
1115 * If forwarding is enabled, RA are not accepted unless the special
1116 * hybrid mode (accept_ra=2) is enabled.
1117 */
1118 if (in6_dev->cnf.forwarding && in6_dev->cnf.accept_ra < 2)
1119 return 0;
1120
1121 return in6_dev->cnf.accept_ra;
1122}
1123
1108static void ndisc_router_discovery(struct sk_buff *skb) 1124static void ndisc_router_discovery(struct sk_buff *skb)
1109{ 1125{
1110 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); 1126 struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
@@ -1158,8 +1174,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
1158 return; 1174 return;
1159 } 1175 }
1160 1176
1161 /* skip route and link configuration on routers */ 1177 if (!accept_ra(in6_dev))
1162 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra)
1163 goto skip_linkparms; 1178 goto skip_linkparms;
1164 1179
1165#ifdef CONFIG_IPV6_NDISC_NODETYPE 1180#ifdef CONFIG_IPV6_NDISC_NODETYPE
@@ -1309,8 +1324,7 @@ skip_linkparms:
1309 NEIGH_UPDATE_F_ISROUTER); 1324 NEIGH_UPDATE_F_ISROUTER);
1310 } 1325 }
1311 1326
1312 /* skip route and link configuration on routers */ 1327 if (!accept_ra(in6_dev))
1313 if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra)
1314 goto out; 1328 goto out;
1315 1329
1316#ifdef CONFIG_IPV6_ROUTE_INFO 1330#ifdef CONFIG_IPV6_ROUTE_INFO
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 29d643bcafa4..44d2eeac089b 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -132,10 +132,10 @@ config IP6_NF_MATCH_RT
132# The targets 132# The targets
133config IP6_NF_TARGET_HL 133config IP6_NF_TARGET_HL
134 tristate '"HL" hoplimit target support' 134 tristate '"HL" hoplimit target support'
135 depends on NETFILTER_ADVANCED 135 depends on NETFILTER_ADVANCED && IP6_NF_MANGLE
136 select NETFILTER_XT_TARGET_HL 136 select NETFILTER_XT_TARGET_HL
137 ---help--- 137 ---help---
138 This is a backwards-compat option for the user's convenience 138 This is a backwards-compatible option for the user's convenience
139 (e.g. when running oldconfig). It selects 139 (e.g. when running oldconfig). It selects
140 CONFIG_NETFILTER_XT_TARGET_HL. 140 CONFIG_NETFILTER_XT_TARGET_HL.
141 141
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index aafbba30c899..3f8e4a3d83ce 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -11,10 +11,11 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
11obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o 11obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
12 12
13# objects for l3 independent conntrack 13# objects for l3 independent conntrack
14nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o 14nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
15nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
15 16
16# l3 independent conntrack 17# l3 independent conntrack
17obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o 18obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o
18 19
19# matches 20# matches
20obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o 21obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 8e754be92c24..51df035897e7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -82,13 +82,13 @@ EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table);
82int 82int
83ip6t_ext_hdr(u8 nexthdr) 83ip6t_ext_hdr(u8 nexthdr)
84{ 84{
85 return ( (nexthdr == IPPROTO_HOPOPTS) || 85 return (nexthdr == IPPROTO_HOPOPTS) ||
86 (nexthdr == IPPROTO_ROUTING) || 86 (nexthdr == IPPROTO_ROUTING) ||
87 (nexthdr == IPPROTO_FRAGMENT) || 87 (nexthdr == IPPROTO_FRAGMENT) ||
88 (nexthdr == IPPROTO_ESP) || 88 (nexthdr == IPPROTO_ESP) ||
89 (nexthdr == IPPROTO_AH) || 89 (nexthdr == IPPROTO_AH) ||
90 (nexthdr == IPPROTO_NONE) || 90 (nexthdr == IPPROTO_NONE) ||
91 (nexthdr == IPPROTO_DSTOPTS) ); 91 (nexthdr == IPPROTO_DSTOPTS);
92} 92}
93 93
94/* Returns whether matches rule or not. */ 94/* Returns whether matches rule or not. */
@@ -215,7 +215,7 @@ static inline bool unconditional(const struct ip6t_ip6 *ipv6)
215 return memcmp(ipv6, &uncond, sizeof(uncond)) == 0; 215 return memcmp(ipv6, &uncond, sizeof(uncond)) == 0;
216} 216}
217 217
218static inline const struct ip6t_entry_target * 218static inline const struct xt_entry_target *
219ip6t_get_target_c(const struct ip6t_entry *e) 219ip6t_get_target_c(const struct ip6t_entry *e)
220{ 220{
221 return ip6t_get_target((struct ip6t_entry *)e); 221 return ip6t_get_target((struct ip6t_entry *)e);
@@ -260,9 +260,9 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
260 const char *hookname, const char **chainname, 260 const char *hookname, const char **chainname,
261 const char **comment, unsigned int *rulenum) 261 const char **comment, unsigned int *rulenum)
262{ 262{
263 const struct ip6t_standard_target *t = (void *)ip6t_get_target_c(s); 263 const struct xt_standard_target *t = (void *)ip6t_get_target_c(s);
264 264
265 if (strcmp(t->target.u.kernel.target->name, IP6T_ERROR_TARGET) == 0) { 265 if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
266 /* Head of user chain: ERROR target with chainname */ 266 /* Head of user chain: ERROR target with chainname */
267 *chainname = t->target.data; 267 *chainname = t->target.data;
268 (*rulenum) = 0; 268 (*rulenum) = 0;
@@ -271,7 +271,7 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
271 271
272 if (s->target_offset == sizeof(struct ip6t_entry) && 272 if (s->target_offset == sizeof(struct ip6t_entry) &&
273 strcmp(t->target.u.kernel.target->name, 273 strcmp(t->target.u.kernel.target->name,
274 IP6T_STANDARD_TARGET) == 0 && 274 XT_STANDARD_TARGET) == 0 &&
275 t->verdict < 0 && 275 t->verdict < 0 &&
276 unconditional(&s->ipv6)) { 276 unconditional(&s->ipv6)) {
277 /* Tail of chains: STANDARD target (return/policy) */ 277 /* Tail of chains: STANDARD target (return/policy) */
@@ -369,7 +369,7 @@ ip6t_do_table(struct sk_buff *skb,
369 e = get_entry(table_base, private->hook_entry[hook]); 369 e = get_entry(table_base, private->hook_entry[hook]);
370 370
371 do { 371 do {
372 const struct ip6t_entry_target *t; 372 const struct xt_entry_target *t;
373 const struct xt_entry_match *ematch; 373 const struct xt_entry_match *ematch;
374 374
375 IP_NF_ASSERT(e); 375 IP_NF_ASSERT(e);
@@ -403,10 +403,10 @@ ip6t_do_table(struct sk_buff *skb,
403 if (!t->u.kernel.target->target) { 403 if (!t->u.kernel.target->target) {
404 int v; 404 int v;
405 405
406 v = ((struct ip6t_standard_target *)t)->verdict; 406 v = ((struct xt_standard_target *)t)->verdict;
407 if (v < 0) { 407 if (v < 0) {
408 /* Pop from stack? */ 408 /* Pop from stack? */
409 if (v != IP6T_RETURN) { 409 if (v != XT_RETURN) {
410 verdict = (unsigned)(-v) - 1; 410 verdict = (unsigned)(-v) - 1;
411 break; 411 break;
412 } 412 }
@@ -434,7 +434,7 @@ ip6t_do_table(struct sk_buff *skb,
434 acpar.targinfo = t->data; 434 acpar.targinfo = t->data;
435 435
436 verdict = t->u.kernel.target->target(skb, &acpar); 436 verdict = t->u.kernel.target->target(skb, &acpar);
437 if (verdict == IP6T_CONTINUE) 437 if (verdict == XT_CONTINUE)
438 e = ip6t_next_entry(e); 438 e = ip6t_next_entry(e);
439 else 439 else
440 /* Verdict */ 440 /* Verdict */
@@ -474,7 +474,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
474 e->counters.pcnt = pos; 474 e->counters.pcnt = pos;
475 475
476 for (;;) { 476 for (;;) {
477 const struct ip6t_standard_target *t 477 const struct xt_standard_target *t
478 = (void *)ip6t_get_target_c(e); 478 = (void *)ip6t_get_target_c(e);
479 int visited = e->comefrom & (1 << hook); 479 int visited = e->comefrom & (1 << hook);
480 480
@@ -488,13 +488,13 @@ mark_source_chains(const struct xt_table_info *newinfo,
488 /* Unconditional return/END. */ 488 /* Unconditional return/END. */
489 if ((e->target_offset == sizeof(struct ip6t_entry) && 489 if ((e->target_offset == sizeof(struct ip6t_entry) &&
490 (strcmp(t->target.u.user.name, 490 (strcmp(t->target.u.user.name,
491 IP6T_STANDARD_TARGET) == 0) && 491 XT_STANDARD_TARGET) == 0) &&
492 t->verdict < 0 && 492 t->verdict < 0 &&
493 unconditional(&e->ipv6)) || visited) { 493 unconditional(&e->ipv6)) || visited) {
494 unsigned int oldpos, size; 494 unsigned int oldpos, size;
495 495
496 if ((strcmp(t->target.u.user.name, 496 if ((strcmp(t->target.u.user.name,
497 IP6T_STANDARD_TARGET) == 0) && 497 XT_STANDARD_TARGET) == 0) &&
498 t->verdict < -NF_MAX_VERDICT - 1) { 498 t->verdict < -NF_MAX_VERDICT - 1) {
499 duprintf("mark_source_chains: bad " 499 duprintf("mark_source_chains: bad "
500 "negative verdict (%i)\n", 500 "negative verdict (%i)\n",
@@ -537,7 +537,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
537 int newpos = t->verdict; 537 int newpos = t->verdict;
538 538
539 if (strcmp(t->target.u.user.name, 539 if (strcmp(t->target.u.user.name,
540 IP6T_STANDARD_TARGET) == 0 && 540 XT_STANDARD_TARGET) == 0 &&
541 newpos >= 0) { 541 newpos >= 0) {
542 if (newpos > newinfo->size - 542 if (newpos > newinfo->size -
543 sizeof(struct ip6t_entry)) { 543 sizeof(struct ip6t_entry)) {
@@ -565,7 +565,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
565 return 1; 565 return 1;
566} 566}
567 567
568static void cleanup_match(struct ip6t_entry_match *m, struct net *net) 568static void cleanup_match(struct xt_entry_match *m, struct net *net)
569{ 569{
570 struct xt_mtdtor_param par; 570 struct xt_mtdtor_param par;
571 571
@@ -581,14 +581,14 @@ static void cleanup_match(struct ip6t_entry_match *m, struct net *net)
581static int 581static int
582check_entry(const struct ip6t_entry *e, const char *name) 582check_entry(const struct ip6t_entry *e, const char *name)
583{ 583{
584 const struct ip6t_entry_target *t; 584 const struct xt_entry_target *t;
585 585
586 if (!ip6_checkentry(&e->ipv6)) { 586 if (!ip6_checkentry(&e->ipv6)) {
587 duprintf("ip_tables: ip check failed %p %s.\n", e, name); 587 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
588 return -EINVAL; 588 return -EINVAL;
589 } 589 }
590 590
591 if (e->target_offset + sizeof(struct ip6t_entry_target) > 591 if (e->target_offset + sizeof(struct xt_entry_target) >
592 e->next_offset) 592 e->next_offset)
593 return -EINVAL; 593 return -EINVAL;
594 594
@@ -599,7 +599,7 @@ check_entry(const struct ip6t_entry *e, const char *name)
599 return 0; 599 return 0;
600} 600}
601 601
602static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par) 602static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
603{ 603{
604 const struct ip6t_ip6 *ipv6 = par->entryinfo; 604 const struct ip6t_ip6 *ipv6 = par->entryinfo;
605 int ret; 605 int ret;
@@ -618,7 +618,7 @@ static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par)
618} 618}
619 619
620static int 620static int
621find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par) 621find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
622{ 622{
623 struct xt_match *match; 623 struct xt_match *match;
624 int ret; 624 int ret;
@@ -643,7 +643,7 @@ err:
643 643
644static int check_target(struct ip6t_entry *e, struct net *net, const char *name) 644static int check_target(struct ip6t_entry *e, struct net *net, const char *name)
645{ 645{
646 struct ip6t_entry_target *t = ip6t_get_target(e); 646 struct xt_entry_target *t = ip6t_get_target(e);
647 struct xt_tgchk_param par = { 647 struct xt_tgchk_param par = {
648 .net = net, 648 .net = net,
649 .table = name, 649 .table = name,
@@ -670,7 +670,7 @@ static int
670find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, 670find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
671 unsigned int size) 671 unsigned int size)
672{ 672{
673 struct ip6t_entry_target *t; 673 struct xt_entry_target *t;
674 struct xt_target *target; 674 struct xt_target *target;
675 int ret; 675 int ret;
676 unsigned int j; 676 unsigned int j;
@@ -721,7 +721,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
721 721
722static bool check_underflow(const struct ip6t_entry *e) 722static bool check_underflow(const struct ip6t_entry *e)
723{ 723{
724 const struct ip6t_entry_target *t; 724 const struct xt_entry_target *t;
725 unsigned int verdict; 725 unsigned int verdict;
726 726
727 if (!unconditional(&e->ipv6)) 727 if (!unconditional(&e->ipv6))
@@ -729,7 +729,7 @@ static bool check_underflow(const struct ip6t_entry *e)
729 t = ip6t_get_target_c(e); 729 t = ip6t_get_target_c(e);
730 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) 730 if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
731 return false; 731 return false;
732 verdict = ((struct ip6t_standard_target *)t)->verdict; 732 verdict = ((struct xt_standard_target *)t)->verdict;
733 verdict = -verdict - 1; 733 verdict = -verdict - 1;
734 return verdict == NF_DROP || verdict == NF_ACCEPT; 734 return verdict == NF_DROP || verdict == NF_ACCEPT;
735} 735}
@@ -752,7 +752,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
752 } 752 }
753 753
754 if (e->next_offset 754 if (e->next_offset
755 < sizeof(struct ip6t_entry) + sizeof(struct ip6t_entry_target)) { 755 < sizeof(struct ip6t_entry) + sizeof(struct xt_entry_target)) {
756 duprintf("checking: element %p size %u\n", 756 duprintf("checking: element %p size %u\n",
757 e, e->next_offset); 757 e, e->next_offset);
758 return -EINVAL; 758 return -EINVAL;
@@ -784,7 +784,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
784static void cleanup_entry(struct ip6t_entry *e, struct net *net) 784static void cleanup_entry(struct ip6t_entry *e, struct net *net)
785{ 785{
786 struct xt_tgdtor_param par; 786 struct xt_tgdtor_param par;
787 struct ip6t_entry_target *t; 787 struct xt_entry_target *t;
788 struct xt_entry_match *ematch; 788 struct xt_entry_match *ematch;
789 789
790 /* Cleanup all matches */ 790 /* Cleanup all matches */
@@ -985,8 +985,8 @@ copy_entries_to_user(unsigned int total_size,
985 /* ... then go back and fix counters and names */ 985 /* ... then go back and fix counters and names */
986 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ 986 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
987 unsigned int i; 987 unsigned int i;
988 const struct ip6t_entry_match *m; 988 const struct xt_entry_match *m;
989 const struct ip6t_entry_target *t; 989 const struct xt_entry_target *t;
990 990
991 e = (struct ip6t_entry *)(loc_cpu_entry + off); 991 e = (struct ip6t_entry *)(loc_cpu_entry + off);
992 if (copy_to_user(userptr + off 992 if (copy_to_user(userptr + off
@@ -1003,7 +1003,7 @@ copy_entries_to_user(unsigned int total_size,
1003 m = (void *)e + i; 1003 m = (void *)e + i;
1004 1004
1005 if (copy_to_user(userptr + off + i 1005 if (copy_to_user(userptr + off + i
1006 + offsetof(struct ip6t_entry_match, 1006 + offsetof(struct xt_entry_match,
1007 u.user.name), 1007 u.user.name),
1008 m->u.kernel.match->name, 1008 m->u.kernel.match->name,
1009 strlen(m->u.kernel.match->name)+1) 1009 strlen(m->u.kernel.match->name)+1)
@@ -1015,7 +1015,7 @@ copy_entries_to_user(unsigned int total_size,
1015 1015
1016 t = ip6t_get_target_c(e); 1016 t = ip6t_get_target_c(e);
1017 if (copy_to_user(userptr + off + e->target_offset 1017 if (copy_to_user(userptr + off + e->target_offset
1018 + offsetof(struct ip6t_entry_target, 1018 + offsetof(struct xt_entry_target,
1019 u.user.name), 1019 u.user.name),
1020 t->u.kernel.target->name, 1020 t->u.kernel.target->name,
1021 strlen(t->u.kernel.target->name)+1) != 0) { 1021 strlen(t->u.kernel.target->name)+1) != 0) {
@@ -1053,7 +1053,7 @@ static int compat_calc_entry(const struct ip6t_entry *e,
1053 const void *base, struct xt_table_info *newinfo) 1053 const void *base, struct xt_table_info *newinfo)
1054{ 1054{
1055 const struct xt_entry_match *ematch; 1055 const struct xt_entry_match *ematch;
1056 const struct ip6t_entry_target *t; 1056 const struct xt_entry_target *t;
1057 unsigned int entry_offset; 1057 unsigned int entry_offset;
1058 int off, i, ret; 1058 int off, i, ret;
1059 1059
@@ -1105,7 +1105,7 @@ static int compat_table_info(const struct xt_table_info *info,
1105static int get_info(struct net *net, void __user *user, 1105static int get_info(struct net *net, void __user *user,
1106 const int *len, int compat) 1106 const int *len, int compat)
1107{ 1107{
1108 char name[IP6T_TABLE_MAXNAMELEN]; 1108 char name[XT_TABLE_MAXNAMELEN];
1109 struct xt_table *t; 1109 struct xt_table *t;
1110 int ret; 1110 int ret;
1111 1111
@@ -1118,7 +1118,7 @@ static int get_info(struct net *net, void __user *user,
1118 if (copy_from_user(name, user, sizeof(name)) != 0) 1118 if (copy_from_user(name, user, sizeof(name)) != 0)
1119 return -EFAULT; 1119 return -EFAULT;
1120 1120
1121 name[IP6T_TABLE_MAXNAMELEN-1] = '\0'; 1121 name[XT_TABLE_MAXNAMELEN-1] = '\0';
1122#ifdef CONFIG_COMPAT 1122#ifdef CONFIG_COMPAT
1123 if (compat) 1123 if (compat)
1124 xt_compat_lock(AF_INET6); 1124 xt_compat_lock(AF_INET6);
@@ -1415,14 +1415,14 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
1415 1415
1416#ifdef CONFIG_COMPAT 1416#ifdef CONFIG_COMPAT
1417struct compat_ip6t_replace { 1417struct compat_ip6t_replace {
1418 char name[IP6T_TABLE_MAXNAMELEN]; 1418 char name[XT_TABLE_MAXNAMELEN];
1419 u32 valid_hooks; 1419 u32 valid_hooks;
1420 u32 num_entries; 1420 u32 num_entries;
1421 u32 size; 1421 u32 size;
1422 u32 hook_entry[NF_INET_NUMHOOKS]; 1422 u32 hook_entry[NF_INET_NUMHOOKS];
1423 u32 underflow[NF_INET_NUMHOOKS]; 1423 u32 underflow[NF_INET_NUMHOOKS];
1424 u32 num_counters; 1424 u32 num_counters;
1425 compat_uptr_t counters; /* struct ip6t_counters * */ 1425 compat_uptr_t counters; /* struct xt_counters * */
1426 struct compat_ip6t_entry entries[0]; 1426 struct compat_ip6t_entry entries[0];
1427}; 1427};
1428 1428
@@ -1431,7 +1431,7 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
1431 unsigned int *size, struct xt_counters *counters, 1431 unsigned int *size, struct xt_counters *counters,
1432 unsigned int i) 1432 unsigned int i)
1433{ 1433{
1434 struct ip6t_entry_target *t; 1434 struct xt_entry_target *t;
1435 struct compat_ip6t_entry __user *ce; 1435 struct compat_ip6t_entry __user *ce;
1436 u_int16_t target_offset, next_offset; 1436 u_int16_t target_offset, next_offset;
1437 compat_uint_t origsize; 1437 compat_uint_t origsize;
@@ -1466,7 +1466,7 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
1466} 1466}
1467 1467
1468static int 1468static int
1469compat_find_calc_match(struct ip6t_entry_match *m, 1469compat_find_calc_match(struct xt_entry_match *m,
1470 const char *name, 1470 const char *name,
1471 const struct ip6t_ip6 *ipv6, 1471 const struct ip6t_ip6 *ipv6,
1472 unsigned int hookmask, 1472 unsigned int hookmask,
@@ -1488,7 +1488,7 @@ compat_find_calc_match(struct ip6t_entry_match *m,
1488 1488
1489static void compat_release_entry(struct compat_ip6t_entry *e) 1489static void compat_release_entry(struct compat_ip6t_entry *e)
1490{ 1490{
1491 struct ip6t_entry_target *t; 1491 struct xt_entry_target *t;
1492 struct xt_entry_match *ematch; 1492 struct xt_entry_match *ematch;
1493 1493
1494 /* Cleanup all matches */ 1494 /* Cleanup all matches */
@@ -1509,7 +1509,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
1509 const char *name) 1509 const char *name)
1510{ 1510{
1511 struct xt_entry_match *ematch; 1511 struct xt_entry_match *ematch;
1512 struct ip6t_entry_target *t; 1512 struct xt_entry_target *t;
1513 struct xt_target *target; 1513 struct xt_target *target;
1514 unsigned int entry_offset; 1514 unsigned int entry_offset;
1515 unsigned int j; 1515 unsigned int j;
@@ -1591,7 +1591,7 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
1591 unsigned int *size, const char *name, 1591 unsigned int *size, const char *name,
1592 struct xt_table_info *newinfo, unsigned char *base) 1592 struct xt_table_info *newinfo, unsigned char *base)
1593{ 1593{
1594 struct ip6t_entry_target *t; 1594 struct xt_entry_target *t;
1595 struct xt_target *target; 1595 struct xt_target *target;
1596 struct ip6t_entry *de; 1596 struct ip6t_entry *de;
1597 unsigned int origsize; 1597 unsigned int origsize;
@@ -1899,7 +1899,7 @@ compat_do_ip6t_set_ctl(struct sock *sk, int cmd, void __user *user,
1899} 1899}
1900 1900
1901struct compat_ip6t_get_entries { 1901struct compat_ip6t_get_entries {
1902 char name[IP6T_TABLE_MAXNAMELEN]; 1902 char name[XT_TABLE_MAXNAMELEN];
1903 compat_uint_t size; 1903 compat_uint_t size;
1904 struct compat_ip6t_entry entrytable[0]; 1904 struct compat_ip6t_entry entrytable[0];
1905}; 1905};
@@ -2054,7 +2054,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2054 2054
2055 case IP6T_SO_GET_REVISION_MATCH: 2055 case IP6T_SO_GET_REVISION_MATCH:
2056 case IP6T_SO_GET_REVISION_TARGET: { 2056 case IP6T_SO_GET_REVISION_TARGET: {
2057 struct ip6t_get_revision rev; 2057 struct xt_get_revision rev;
2058 int target; 2058 int target;
2059 2059
2060 if (*len != sizeof(rev)) { 2060 if (*len != sizeof(rev)) {
@@ -2191,7 +2191,7 @@ static int icmp6_checkentry(const struct xt_mtchk_param *par)
2191/* The built-in targets: standard (NULL) and error. */ 2191/* The built-in targets: standard (NULL) and error. */
2192static struct xt_target ip6t_builtin_tg[] __read_mostly = { 2192static struct xt_target ip6t_builtin_tg[] __read_mostly = {
2193 { 2193 {
2194 .name = IP6T_STANDARD_TARGET, 2194 .name = XT_STANDARD_TARGET,
2195 .targetsize = sizeof(int), 2195 .targetsize = sizeof(int),
2196 .family = NFPROTO_IPV6, 2196 .family = NFPROTO_IPV6,
2197#ifdef CONFIG_COMPAT 2197#ifdef CONFIG_COMPAT
@@ -2201,9 +2201,9 @@ static struct xt_target ip6t_builtin_tg[] __read_mostly = {
2201#endif 2201#endif
2202 }, 2202 },
2203 { 2203 {
2204 .name = IP6T_ERROR_TARGET, 2204 .name = XT_ERROR_TARGET,
2205 .target = ip6t_error, 2205 .target = ip6t_error,
2206 .targetsize = IP6T_FUNCTION_MAXNAMELEN, 2206 .targetsize = XT_FUNCTION_MAXNAMELEN,
2207 .family = NFPROTO_IPV6, 2207 .family = NFPROTO_IPV6,
2208 }, 2208 },
2209}; 2209};
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 0a07ae7b933f..09c88891a753 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -23,6 +23,7 @@
23#include <linux/netfilter/x_tables.h> 23#include <linux/netfilter/x_tables.h>
24#include <linux/netfilter_ipv6/ip6_tables.h> 24#include <linux/netfilter_ipv6/ip6_tables.h>
25#include <net/netfilter/nf_log.h> 25#include <net/netfilter/nf_log.h>
26#include <net/netfilter/xt_log.h>
26 27
27MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>"); 28MODULE_AUTHOR("Jan Rekorajski <baggins@pld.org.pl>");
28MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog"); 29MODULE_DESCRIPTION("Xtables: IPv6 packet logging to syslog");
@@ -32,11 +33,9 @@ struct in_device;
32#include <net/route.h> 33#include <net/route.h>
33#include <linux/netfilter_ipv6/ip6t_LOG.h> 34#include <linux/netfilter_ipv6/ip6t_LOG.h>
34 35
35/* Use lock to serialize, so printks don't overlap */
36static DEFINE_SPINLOCK(log_lock);
37
38/* One level of recursion won't kill us */ 36/* One level of recursion won't kill us */
39static void dump_packet(const struct nf_loginfo *info, 37static void dump_packet(struct sbuff *m,
38 const struct nf_loginfo *info,
40 const struct sk_buff *skb, unsigned int ip6hoff, 39 const struct sk_buff *skb, unsigned int ip6hoff,
41 int recurse) 40 int recurse)
42{ 41{
@@ -55,15 +54,15 @@ static void dump_packet(const struct nf_loginfo *info,
55 54
56 ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); 55 ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
57 if (ih == NULL) { 56 if (ih == NULL) {
58 printk("TRUNCATED"); 57 sb_add(m, "TRUNCATED");
59 return; 58 return;
60 } 59 }
61 60
62 /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */ 61 /* Max length: 88 "SRC=0000.0000.0000.0000.0000.0000.0000.0000 DST=0000.0000.0000.0000.0000.0000.0000.0000 " */
63 printk("SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr); 62 sb_add(m, "SRC=%pI6 DST=%pI6 ", &ih->saddr, &ih->daddr);
64 63
65 /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */ 64 /* Max length: 44 "LEN=65535 TC=255 HOPLIMIT=255 FLOWLBL=FFFFF " */
66 printk("LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ", 65 sb_add(m, "LEN=%Zu TC=%u HOPLIMIT=%u FLOWLBL=%u ",
67 ntohs(ih->payload_len) + sizeof(struct ipv6hdr), 66 ntohs(ih->payload_len) + sizeof(struct ipv6hdr),
68 (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20, 67 (ntohl(*(__be32 *)ih) & 0x0ff00000) >> 20,
69 ih->hop_limit, 68 ih->hop_limit,
@@ -78,35 +77,35 @@ static void dump_packet(const struct nf_loginfo *info,
78 77
79 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); 78 hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
80 if (hp == NULL) { 79 if (hp == NULL) {
81 printk("TRUNCATED"); 80 sb_add(m, "TRUNCATED");
82 return; 81 return;
83 } 82 }
84 83
85 /* Max length: 48 "OPT (...) " */ 84 /* Max length: 48 "OPT (...) " */
86 if (logflags & IP6T_LOG_IPOPT) 85 if (logflags & IP6T_LOG_IPOPT)
87 printk("OPT ( "); 86 sb_add(m, "OPT ( ");
88 87
89 switch (currenthdr) { 88 switch (currenthdr) {
90 case IPPROTO_FRAGMENT: { 89 case IPPROTO_FRAGMENT: {
91 struct frag_hdr _fhdr; 90 struct frag_hdr _fhdr;
92 const struct frag_hdr *fh; 91 const struct frag_hdr *fh;
93 92
94 printk("FRAG:"); 93 sb_add(m, "FRAG:");
95 fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), 94 fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
96 &_fhdr); 95 &_fhdr);
97 if (fh == NULL) { 96 if (fh == NULL) {
98 printk("TRUNCATED "); 97 sb_add(m, "TRUNCATED ");
99 return; 98 return;
100 } 99 }
101 100
102 /* Max length: 6 "65535 " */ 101 /* Max length: 6 "65535 " */
103 printk("%u ", ntohs(fh->frag_off) & 0xFFF8); 102 sb_add(m, "%u ", ntohs(fh->frag_off) & 0xFFF8);
104 103
105 /* Max length: 11 "INCOMPLETE " */ 104 /* Max length: 11 "INCOMPLETE " */
106 if (fh->frag_off & htons(0x0001)) 105 if (fh->frag_off & htons(0x0001))
107 printk("INCOMPLETE "); 106 sb_add(m, "INCOMPLETE ");
108 107
109 printk("ID:%08x ", ntohl(fh->identification)); 108 sb_add(m, "ID:%08x ", ntohl(fh->identification));
110 109
111 if (ntohs(fh->frag_off) & 0xFFF8) 110 if (ntohs(fh->frag_off) & 0xFFF8)
112 fragment = 1; 111 fragment = 1;
@@ -120,7 +119,7 @@ static void dump_packet(const struct nf_loginfo *info,
120 case IPPROTO_HOPOPTS: 119 case IPPROTO_HOPOPTS:
121 if (fragment) { 120 if (fragment) {
122 if (logflags & IP6T_LOG_IPOPT) 121 if (logflags & IP6T_LOG_IPOPT)
123 printk(")"); 122 sb_add(m, ")");
124 return; 123 return;
125 } 124 }
126 hdrlen = ipv6_optlen(hp); 125 hdrlen = ipv6_optlen(hp);
@@ -132,10 +131,10 @@ static void dump_packet(const struct nf_loginfo *info,
132 const struct ip_auth_hdr *ah; 131 const struct ip_auth_hdr *ah;
133 132
134 /* Max length: 3 "AH " */ 133 /* Max length: 3 "AH " */
135 printk("AH "); 134 sb_add(m, "AH ");
136 135
137 if (fragment) { 136 if (fragment) {
138 printk(")"); 137 sb_add(m, ")");
139 return; 138 return;
140 } 139 }
141 140
@@ -146,13 +145,13 @@ static void dump_packet(const struct nf_loginfo *info,
146 * Max length: 26 "INCOMPLETE [65535 145 * Max length: 26 "INCOMPLETE [65535
147 * bytes] )" 146 * bytes] )"
148 */ 147 */
149 printk("INCOMPLETE [%u bytes] )", 148 sb_add(m, "INCOMPLETE [%u bytes] )",
150 skb->len - ptr); 149 skb->len - ptr);
151 return; 150 return;
152 } 151 }
153 152
154 /* Length: 15 "SPI=0xF1234567 */ 153 /* Length: 15 "SPI=0xF1234567 */
155 printk("SPI=0x%x ", ntohl(ah->spi)); 154 sb_add(m, "SPI=0x%x ", ntohl(ah->spi));
156 155
157 } 156 }
158 157
@@ -164,10 +163,10 @@ static void dump_packet(const struct nf_loginfo *info,
164 const struct ip_esp_hdr *eh; 163 const struct ip_esp_hdr *eh;
165 164
166 /* Max length: 4 "ESP " */ 165 /* Max length: 4 "ESP " */
167 printk("ESP "); 166 sb_add(m, "ESP ");
168 167
169 if (fragment) { 168 if (fragment) {
170 printk(")"); 169 sb_add(m, ")");
171 return; 170 return;
172 } 171 }
173 172
@@ -177,23 +176,23 @@ static void dump_packet(const struct nf_loginfo *info,
177 eh = skb_header_pointer(skb, ptr, sizeof(_esph), 176 eh = skb_header_pointer(skb, ptr, sizeof(_esph),
178 &_esph); 177 &_esph);
179 if (eh == NULL) { 178 if (eh == NULL) {
180 printk("INCOMPLETE [%u bytes] )", 179 sb_add(m, "INCOMPLETE [%u bytes] )",
181 skb->len - ptr); 180 skb->len - ptr);
182 return; 181 return;
183 } 182 }
184 183
185 /* Length: 16 "SPI=0xF1234567 )" */ 184 /* Length: 16 "SPI=0xF1234567 )" */
186 printk("SPI=0x%x )", ntohl(eh->spi) ); 185 sb_add(m, "SPI=0x%x )", ntohl(eh->spi) );
187 186
188 } 187 }
189 return; 188 return;
190 default: 189 default:
191 /* Max length: 20 "Unknown Ext Hdr 255" */ 190 /* Max length: 20 "Unknown Ext Hdr 255" */
192 printk("Unknown Ext Hdr %u", currenthdr); 191 sb_add(m, "Unknown Ext Hdr %u", currenthdr);
193 return; 192 return;
194 } 193 }
195 if (logflags & IP6T_LOG_IPOPT) 194 if (logflags & IP6T_LOG_IPOPT)
196 printk(") "); 195 sb_add(m, ") ");
197 196
198 currenthdr = hp->nexthdr; 197 currenthdr = hp->nexthdr;
199 ptr += hdrlen; 198 ptr += hdrlen;
@@ -205,7 +204,7 @@ static void dump_packet(const struct nf_loginfo *info,
205 const struct tcphdr *th; 204 const struct tcphdr *th;
206 205
207 /* Max length: 10 "PROTO=TCP " */ 206 /* Max length: 10 "PROTO=TCP " */
208 printk("PROTO=TCP "); 207 sb_add(m, "PROTO=TCP ");
209 208
210 if (fragment) 209 if (fragment)
211 break; 210 break;
@@ -213,40 +212,40 @@ static void dump_packet(const struct nf_loginfo *info,
213 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 212 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
214 th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph); 213 th = skb_header_pointer(skb, ptr, sizeof(_tcph), &_tcph);
215 if (th == NULL) { 214 if (th == NULL) {
216 printk("INCOMPLETE [%u bytes] ", skb->len - ptr); 215 sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
217 return; 216 return;
218 } 217 }
219 218
220 /* Max length: 20 "SPT=65535 DPT=65535 " */ 219 /* Max length: 20 "SPT=65535 DPT=65535 " */
221 printk("SPT=%u DPT=%u ", 220 sb_add(m, "SPT=%u DPT=%u ",
222 ntohs(th->source), ntohs(th->dest)); 221 ntohs(th->source), ntohs(th->dest));
223 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ 222 /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
224 if (logflags & IP6T_LOG_TCPSEQ) 223 if (logflags & IP6T_LOG_TCPSEQ)
225 printk("SEQ=%u ACK=%u ", 224 sb_add(m, "SEQ=%u ACK=%u ",
226 ntohl(th->seq), ntohl(th->ack_seq)); 225 ntohl(th->seq), ntohl(th->ack_seq));
227 /* Max length: 13 "WINDOW=65535 " */ 226 /* Max length: 13 "WINDOW=65535 " */
228 printk("WINDOW=%u ", ntohs(th->window)); 227 sb_add(m, "WINDOW=%u ", ntohs(th->window));
229 /* Max length: 9 "RES=0x3C " */ 228 /* Max length: 9 "RES=0x3C " */
230 printk("RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); 229 sb_add(m, "RES=0x%02x ", (u_int8_t)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
231 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ 230 /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
232 if (th->cwr) 231 if (th->cwr)
233 printk("CWR "); 232 sb_add(m, "CWR ");
234 if (th->ece) 233 if (th->ece)
235 printk("ECE "); 234 sb_add(m, "ECE ");
236 if (th->urg) 235 if (th->urg)
237 printk("URG "); 236 sb_add(m, "URG ");
238 if (th->ack) 237 if (th->ack)
239 printk("ACK "); 238 sb_add(m, "ACK ");
240 if (th->psh) 239 if (th->psh)
241 printk("PSH "); 240 sb_add(m, "PSH ");
242 if (th->rst) 241 if (th->rst)
243 printk("RST "); 242 sb_add(m, "RST ");
244 if (th->syn) 243 if (th->syn)
245 printk("SYN "); 244 sb_add(m, "SYN ");
246 if (th->fin) 245 if (th->fin)
247 printk("FIN "); 246 sb_add(m, "FIN ");
248 /* Max length: 11 "URGP=65535 " */ 247 /* Max length: 11 "URGP=65535 " */
249 printk("URGP=%u ", ntohs(th->urg_ptr)); 248 sb_add(m, "URGP=%u ", ntohs(th->urg_ptr));
250 249
251 if ((logflags & IP6T_LOG_TCPOPT) && 250 if ((logflags & IP6T_LOG_TCPOPT) &&
252 th->doff * 4 > sizeof(struct tcphdr)) { 251 th->doff * 4 > sizeof(struct tcphdr)) {
@@ -260,15 +259,15 @@ static void dump_packet(const struct nf_loginfo *info,
260 ptr + sizeof(struct tcphdr), 259 ptr + sizeof(struct tcphdr),
261 optsize, _opt); 260 optsize, _opt);
262 if (op == NULL) { 261 if (op == NULL) {
263 printk("OPT (TRUNCATED)"); 262 sb_add(m, "OPT (TRUNCATED)");
264 return; 263 return;
265 } 264 }
266 265
267 /* Max length: 127 "OPT (" 15*4*2chars ") " */ 266 /* Max length: 127 "OPT (" 15*4*2chars ") " */
268 printk("OPT ("); 267 sb_add(m, "OPT (");
269 for (i =0; i < optsize; i++) 268 for (i =0; i < optsize; i++)
270 printk("%02X", op[i]); 269 sb_add(m, "%02X", op[i]);
271 printk(") "); 270 sb_add(m, ") ");
272 } 271 }
273 break; 272 break;
274 } 273 }
@@ -279,9 +278,9 @@ static void dump_packet(const struct nf_loginfo *info,
279 278
280 if (currenthdr == IPPROTO_UDP) 279 if (currenthdr == IPPROTO_UDP)
281 /* Max length: 10 "PROTO=UDP " */ 280 /* Max length: 10 "PROTO=UDP " */
282 printk("PROTO=UDP " ); 281 sb_add(m, "PROTO=UDP " );
283 else /* Max length: 14 "PROTO=UDPLITE " */ 282 else /* Max length: 14 "PROTO=UDPLITE " */
284 printk("PROTO=UDPLITE "); 283 sb_add(m, "PROTO=UDPLITE ");
285 284
286 if (fragment) 285 if (fragment)
287 break; 286 break;
@@ -289,12 +288,12 @@ static void dump_packet(const struct nf_loginfo *info,
289 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 288 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
290 uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph); 289 uh = skb_header_pointer(skb, ptr, sizeof(_udph), &_udph);
291 if (uh == NULL) { 290 if (uh == NULL) {
292 printk("INCOMPLETE [%u bytes] ", skb->len - ptr); 291 sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
293 return; 292 return;
294 } 293 }
295 294
296 /* Max length: 20 "SPT=65535 DPT=65535 " */ 295 /* Max length: 20 "SPT=65535 DPT=65535 " */
297 printk("SPT=%u DPT=%u LEN=%u ", 296 sb_add(m, "SPT=%u DPT=%u LEN=%u ",
298 ntohs(uh->source), ntohs(uh->dest), 297 ntohs(uh->source), ntohs(uh->dest),
299 ntohs(uh->len)); 298 ntohs(uh->len));
300 break; 299 break;
@@ -304,7 +303,7 @@ static void dump_packet(const struct nf_loginfo *info,
304 const struct icmp6hdr *ic; 303 const struct icmp6hdr *ic;
305 304
306 /* Max length: 13 "PROTO=ICMPv6 " */ 305 /* Max length: 13 "PROTO=ICMPv6 " */
307 printk("PROTO=ICMPv6 "); 306 sb_add(m, "PROTO=ICMPv6 ");
308 307
309 if (fragment) 308 if (fragment)
310 break; 309 break;
@@ -312,18 +311,18 @@ static void dump_packet(const struct nf_loginfo *info,
312 /* Max length: 25 "INCOMPLETE [65535 bytes] " */ 311 /* Max length: 25 "INCOMPLETE [65535 bytes] " */
313 ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h); 312 ic = skb_header_pointer(skb, ptr, sizeof(_icmp6h), &_icmp6h);
314 if (ic == NULL) { 313 if (ic == NULL) {
315 printk("INCOMPLETE [%u bytes] ", skb->len - ptr); 314 sb_add(m, "INCOMPLETE [%u bytes] ", skb->len - ptr);
316 return; 315 return;
317 } 316 }
318 317
319 /* Max length: 18 "TYPE=255 CODE=255 " */ 318 /* Max length: 18 "TYPE=255 CODE=255 " */
320 printk("TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code); 319 sb_add(m, "TYPE=%u CODE=%u ", ic->icmp6_type, ic->icmp6_code);
321 320
322 switch (ic->icmp6_type) { 321 switch (ic->icmp6_type) {
323 case ICMPV6_ECHO_REQUEST: 322 case ICMPV6_ECHO_REQUEST:
324 case ICMPV6_ECHO_REPLY: 323 case ICMPV6_ECHO_REPLY:
325 /* Max length: 19 "ID=65535 SEQ=65535 " */ 324 /* Max length: 19 "ID=65535 SEQ=65535 " */
326 printk("ID=%u SEQ=%u ", 325 sb_add(m, "ID=%u SEQ=%u ",
327 ntohs(ic->icmp6_identifier), 326 ntohs(ic->icmp6_identifier),
328 ntohs(ic->icmp6_sequence)); 327 ntohs(ic->icmp6_sequence));
329 break; 328 break;
@@ -334,35 +333,35 @@ static void dump_packet(const struct nf_loginfo *info,
334 333
335 case ICMPV6_PARAMPROB: 334 case ICMPV6_PARAMPROB:
336 /* Max length: 17 "POINTER=ffffffff " */ 335 /* Max length: 17 "POINTER=ffffffff " */
337 printk("POINTER=%08x ", ntohl(ic->icmp6_pointer)); 336 sb_add(m, "POINTER=%08x ", ntohl(ic->icmp6_pointer));
338 /* Fall through */ 337 /* Fall through */
339 case ICMPV6_DEST_UNREACH: 338 case ICMPV6_DEST_UNREACH:
340 case ICMPV6_PKT_TOOBIG: 339 case ICMPV6_PKT_TOOBIG:
341 case ICMPV6_TIME_EXCEED: 340 case ICMPV6_TIME_EXCEED:
342 /* Max length: 3+maxlen */ 341 /* Max length: 3+maxlen */
343 if (recurse) { 342 if (recurse) {
344 printk("["); 343 sb_add(m, "[");
345 dump_packet(info, skb, ptr + sizeof(_icmp6h), 344 dump_packet(m, info, skb,
346 0); 345 ptr + sizeof(_icmp6h), 0);
347 printk("] "); 346 sb_add(m, "] ");
348 } 347 }
349 348
350 /* Max length: 10 "MTU=65535 " */ 349 /* Max length: 10 "MTU=65535 " */
351 if (ic->icmp6_type == ICMPV6_PKT_TOOBIG) 350 if (ic->icmp6_type == ICMPV6_PKT_TOOBIG)
352 printk("MTU=%u ", ntohl(ic->icmp6_mtu)); 351 sb_add(m, "MTU=%u ", ntohl(ic->icmp6_mtu));
353 } 352 }
354 break; 353 break;
355 } 354 }
356 /* Max length: 10 "PROTO=255 " */ 355 /* Max length: 10 "PROTO=255 " */
357 default: 356 default:
358 printk("PROTO=%u ", currenthdr); 357 sb_add(m, "PROTO=%u ", currenthdr);
359 } 358 }
360 359
361 /* Max length: 15 "UID=4294967295 " */ 360 /* Max length: 15 "UID=4294967295 " */
362 if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) { 361 if ((logflags & IP6T_LOG_UID) && recurse && skb->sk) {
363 read_lock_bh(&skb->sk->sk_callback_lock); 362 read_lock_bh(&skb->sk->sk_callback_lock);
364 if (skb->sk->sk_socket && skb->sk->sk_socket->file) 363 if (skb->sk->sk_socket && skb->sk->sk_socket->file)
365 printk("UID=%u GID=%u ", 364 sb_add(m, "UID=%u GID=%u ",
366 skb->sk->sk_socket->file->f_cred->fsuid, 365 skb->sk->sk_socket->file->f_cred->fsuid,
367 skb->sk->sk_socket->file->f_cred->fsgid); 366 skb->sk->sk_socket->file->f_cred->fsgid);
368 read_unlock_bh(&skb->sk->sk_callback_lock); 367 read_unlock_bh(&skb->sk->sk_callback_lock);
@@ -370,10 +369,11 @@ static void dump_packet(const struct nf_loginfo *info,
370 369
371 /* Max length: 16 "MARK=0xFFFFFFFF " */ 370 /* Max length: 16 "MARK=0xFFFFFFFF " */
372 if (!recurse && skb->mark) 371 if (!recurse && skb->mark)
373 printk("MARK=0x%x ", skb->mark); 372 sb_add(m, "MARK=0x%x ", skb->mark);
374} 373}
375 374
376static void dump_mac_header(const struct nf_loginfo *info, 375static void dump_mac_header(struct sbuff *m,
376 const struct nf_loginfo *info,
377 const struct sk_buff *skb) 377 const struct sk_buff *skb)
378{ 378{
379 struct net_device *dev = skb->dev; 379 struct net_device *dev = skb->dev;
@@ -387,7 +387,7 @@ static void dump_mac_header(const struct nf_loginfo *info,
387 387
388 switch (dev->type) { 388 switch (dev->type) {
389 case ARPHRD_ETHER: 389 case ARPHRD_ETHER:
390 printk("MACSRC=%pM MACDST=%pM MACPROTO=%04x ", 390 sb_add(m, "MACSRC=%pM MACDST=%pM MACPROTO=%04x ",
391 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, 391 eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
392 ntohs(eth_hdr(skb)->h_proto)); 392 ntohs(eth_hdr(skb)->h_proto));
393 return; 393 return;
@@ -396,7 +396,7 @@ static void dump_mac_header(const struct nf_loginfo *info,
396 } 396 }
397 397
398fallback: 398fallback:
399 printk("MAC="); 399 sb_add(m, "MAC=");
400 if (dev->hard_header_len && 400 if (dev->hard_header_len &&
401 skb->mac_header != skb->network_header) { 401 skb->mac_header != skb->network_header) {
402 const unsigned char *p = skb_mac_header(skb); 402 const unsigned char *p = skb_mac_header(skb);
@@ -408,19 +408,19 @@ fallback:
408 p = NULL; 408 p = NULL;
409 409
410 if (p != NULL) { 410 if (p != NULL) {
411 printk("%02x", *p++); 411 sb_add(m, "%02x", *p++);
412 for (i = 1; i < len; i++) 412 for (i = 1; i < len; i++)
413 printk(":%02x", p[i]); 413 sb_add(m, ":%02x", p[i]);
414 } 414 }
415 printk(" "); 415 sb_add(m, " ");
416 416
417 if (dev->type == ARPHRD_SIT) { 417 if (dev->type == ARPHRD_SIT) {
418 const struct iphdr *iph = 418 const struct iphdr *iph =
419 (struct iphdr *)skb_mac_header(skb); 419 (struct iphdr *)skb_mac_header(skb);
420 printk("TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr); 420 sb_add(m, "TUNNEL=%pI4->%pI4 ", &iph->saddr, &iph->daddr);
421 } 421 }
422 } else 422 } else
423 printk(" "); 423 sb_add(m, " ");
424} 424}
425 425
426static struct nf_loginfo default_loginfo = { 426static struct nf_loginfo default_loginfo = {
@@ -442,22 +442,23 @@ ip6t_log_packet(u_int8_t pf,
442 const struct nf_loginfo *loginfo, 442 const struct nf_loginfo *loginfo,
443 const char *prefix) 443 const char *prefix)
444{ 444{
445 struct sbuff *m = sb_open();
446
445 if (!loginfo) 447 if (!loginfo)
446 loginfo = &default_loginfo; 448 loginfo = &default_loginfo;
447 449
448 spin_lock_bh(&log_lock); 450 sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
449 printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, 451 prefix,
450 prefix, 452 in ? in->name : "",
451 in ? in->name : "", 453 out ? out->name : "");
452 out ? out->name : "");
453 454
454 /* MAC logging for input path only. */ 455 /* MAC logging for input path only. */
455 if (in && !out) 456 if (in && !out)
456 dump_mac_header(loginfo, skb); 457 dump_mac_header(m, loginfo, skb);
458
459 dump_packet(m, loginfo, skb, skb_network_offset(skb), 1);
457 460
458 dump_packet(loginfo, skb, skb_network_offset(skb), 1); 461 sb_close(m);
459 printk("\n");
460 spin_unlock_bh(&log_lock);
461} 462}
462 463
463static unsigned int 464static unsigned int
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index ff43461704be..c8af58b22562 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -16,7 +16,6 @@
16#include <linux/module.h> 16#include <linux/module.h>
17#include <linux/skbuff.h> 17#include <linux/skbuff.h>
18#include <linux/icmp.h> 18#include <linux/icmp.h>
19#include <linux/sysctl.h>
20#include <net/ipv6.h> 19#include <net/ipv6.h>
21#include <net/inet_frag.h> 20#include <net/inet_frag.h>
22 21
@@ -29,6 +28,7 @@
29#include <net/netfilter/nf_conntrack_core.h> 28#include <net/netfilter/nf_conntrack_core.h>
30#include <net/netfilter/nf_conntrack_zones.h> 29#include <net/netfilter/nf_conntrack_zones.h>
31#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 30#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
31#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
32#include <net/netfilter/nf_log.h> 32#include <net/netfilter/nf_log.h>
33 33
34static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, 34static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
@@ -189,53 +189,6 @@ out:
189 return nf_conntrack_confirm(skb); 189 return nf_conntrack_confirm(skb);
190} 190}
191 191
192static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
193 struct sk_buff *skb)
194{
195 u16 zone = NF_CT_DEFAULT_ZONE;
196
197 if (skb->nfct)
198 zone = nf_ct_zone((struct nf_conn *)skb->nfct);
199
200#ifdef CONFIG_BRIDGE_NETFILTER
201 if (skb->nf_bridge &&
202 skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
203 return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
204#endif
205 if (hooknum == NF_INET_PRE_ROUTING)
206 return IP6_DEFRAG_CONNTRACK_IN + zone;
207 else
208 return IP6_DEFRAG_CONNTRACK_OUT + zone;
209
210}
211
212static unsigned int ipv6_defrag(unsigned int hooknum,
213 struct sk_buff *skb,
214 const struct net_device *in,
215 const struct net_device *out,
216 int (*okfn)(struct sk_buff *))
217{
218 struct sk_buff *reasm;
219
220 /* Previously seen (loopback)? */
221 if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
222 return NF_ACCEPT;
223
224 reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
225 /* queued */
226 if (reasm == NULL)
227 return NF_STOLEN;
228
229 /* error occured or not fragmented */
230 if (reasm == skb)
231 return NF_ACCEPT;
232
233 nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
234 (struct net_device *)out, okfn);
235
236 return NF_STOLEN;
237}
238
239static unsigned int __ipv6_conntrack_in(struct net *net, 192static unsigned int __ipv6_conntrack_in(struct net *net,
240 unsigned int hooknum, 193 unsigned int hooknum,
241 struct sk_buff *skb, 194 struct sk_buff *skb,
@@ -288,13 +241,6 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
288 241
289static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { 242static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
290 { 243 {
291 .hook = ipv6_defrag,
292 .owner = THIS_MODULE,
293 .pf = NFPROTO_IPV6,
294 .hooknum = NF_INET_PRE_ROUTING,
295 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
296 },
297 {
298 .hook = ipv6_conntrack_in, 244 .hook = ipv6_conntrack_in,
299 .owner = THIS_MODULE, 245 .owner = THIS_MODULE,
300 .pf = NFPROTO_IPV6, 246 .pf = NFPROTO_IPV6,
@@ -309,13 +255,6 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
309 .priority = NF_IP6_PRI_CONNTRACK, 255 .priority = NF_IP6_PRI_CONNTRACK,
310 }, 256 },
311 { 257 {
312 .hook = ipv6_defrag,
313 .owner = THIS_MODULE,
314 .pf = NFPROTO_IPV6,
315 .hooknum = NF_INET_LOCAL_OUT,
316 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
317 },
318 {
319 .hook = ipv6_confirm, 258 .hook = ipv6_confirm,
320 .owner = THIS_MODULE, 259 .owner = THIS_MODULE,
321 .pf = NFPROTO_IPV6, 260 .pf = NFPROTO_IPV6,
@@ -387,10 +326,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = {
387 .nlattr_to_tuple = ipv6_nlattr_to_tuple, 326 .nlattr_to_tuple = ipv6_nlattr_to_tuple,
388 .nla_policy = ipv6_nla_policy, 327 .nla_policy = ipv6_nla_policy,
389#endif 328#endif
390#ifdef CONFIG_SYSCTL
391 .ctl_table_path = nf_net_netfilter_sysctl_path,
392 .ctl_table = nf_ct_ipv6_sysctl_table,
393#endif
394 .me = THIS_MODULE, 329 .me = THIS_MODULE,
395}; 330};
396 331
@@ -403,16 +338,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
403 int ret = 0; 338 int ret = 0;
404 339
405 need_conntrack(); 340 need_conntrack();
341 nf_defrag_ipv6_enable();
406 342
407 ret = nf_ct_frag6_init();
408 if (ret < 0) {
409 pr_err("nf_conntrack_ipv6: can't initialize frag6.\n");
410 return ret;
411 }
412 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6); 343 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
413 if (ret < 0) { 344 if (ret < 0) {
414 pr_err("nf_conntrack_ipv6: can't register tcp.\n"); 345 pr_err("nf_conntrack_ipv6: can't register tcp.\n");
415 goto cleanup_frag6; 346 return ret;
416 } 347 }
417 348
418 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6); 349 ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
@@ -450,8 +381,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
450 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); 381 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
451 cleanup_tcp: 382 cleanup_tcp:
452 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); 383 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
453 cleanup_frag6:
454 nf_ct_frag6_cleanup();
455 return ret; 384 return ret;
456} 385}
457 386
@@ -463,7 +392,6 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void)
463 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); 392 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6);
464 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); 393 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6);
465 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); 394 nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6);
466 nf_ct_frag6_cleanup();
467} 395}
468 396
469module_init(nf_conntrack_l3proto_ipv6_init); 397module_init(nf_conntrack_l3proto_ipv6_init);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 578f3c1a16db..489d71b844ac 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -73,7 +73,7 @@ static struct inet_frags nf_frags;
73static struct netns_frags nf_init_frags; 73static struct netns_frags nf_init_frags;
74 74
75#ifdef CONFIG_SYSCTL 75#ifdef CONFIG_SYSCTL
76struct ctl_table nf_ct_ipv6_sysctl_table[] = { 76struct ctl_table nf_ct_frag6_sysctl_table[] = {
77 { 77 {
78 .procname = "nf_conntrack_frag6_timeout", 78 .procname = "nf_conntrack_frag6_timeout",
79 .data = &nf_init_frags.timeout, 79 .data = &nf_init_frags.timeout,
@@ -97,6 +97,8 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = {
97 }, 97 },
98 { } 98 { }
99}; 99};
100
101static struct ctl_table_header *nf_ct_frag6_sysctl_header;
100#endif 102#endif
101 103
102static unsigned int nf_hashfn(struct inet_frag_queue *q) 104static unsigned int nf_hashfn(struct inet_frag_queue *q)
@@ -363,7 +365,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
363 /* If the first fragment is fragmented itself, we split 365 /* If the first fragment is fragmented itself, we split
364 * it to two chunks: the first with data and paged part 366 * it to two chunks: the first with data and paged part
365 * and the second, holding only fragments. */ 367 * and the second, holding only fragments. */
366 if (skb_has_frags(head)) { 368 if (skb_has_frag_list(head)) {
367 struct sk_buff *clone; 369 struct sk_buff *clone;
368 int i, plen = 0; 370 int i, plen = 0;
369 371
@@ -623,11 +625,21 @@ int nf_ct_frag6_init(void)
623 inet_frags_init_net(&nf_init_frags); 625 inet_frags_init_net(&nf_init_frags);
624 inet_frags_init(&nf_frags); 626 inet_frags_init(&nf_frags);
625 627
628 nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path,
629 nf_ct_frag6_sysctl_table);
630 if (!nf_ct_frag6_sysctl_header) {
631 inet_frags_fini(&nf_frags);
632 return -ENOMEM;
633 }
634
626 return 0; 635 return 0;
627} 636}
628 637
629void nf_ct_frag6_cleanup(void) 638void nf_ct_frag6_cleanup(void)
630{ 639{
640 unregister_sysctl_table(nf_ct_frag6_sysctl_header);
641 nf_ct_frag6_sysctl_header = NULL;
642
631 inet_frags_fini(&nf_frags); 643 inet_frags_fini(&nf_frags);
632 644
633 nf_init_frags.low_thresh = 0; 645 nf_init_frags.low_thresh = 0;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
new file mode 100644
index 000000000000..99abfb53bab9
--- /dev/null
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -0,0 +1,131 @@
1/* (C) 1999-2001 Paul `Rusty' Russell
2 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License version 2 as
6 * published by the Free Software Foundation.
7 */
8
9#include <linux/types.h>
10#include <linux/ipv6.h>
11#include <linux/in6.h>
12#include <linux/netfilter.h>
13#include <linux/module.h>
14#include <linux/skbuff.h>
15#include <linux/icmp.h>
16#include <linux/sysctl.h>
17#include <net/ipv6.h>
18#include <net/inet_frag.h>
19
20#include <linux/netfilter_ipv6.h>
21#include <linux/netfilter_bridge.h>
22#include <net/netfilter/nf_conntrack.h>
23#include <net/netfilter/nf_conntrack_helper.h>
24#include <net/netfilter/nf_conntrack_l4proto.h>
25#include <net/netfilter/nf_conntrack_l3proto.h>
26#include <net/netfilter/nf_conntrack_core.h>
27#include <net/netfilter/nf_conntrack_zones.h>
28#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
29#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
30
31static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
32 struct sk_buff *skb)
33{
34 u16 zone = NF_CT_DEFAULT_ZONE;
35
36 if (skb->nfct)
37 zone = nf_ct_zone((struct nf_conn *)skb->nfct);
38
39#ifdef CONFIG_BRIDGE_NETFILTER
40 if (skb->nf_bridge &&
41 skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
42 return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
43#endif
44 if (hooknum == NF_INET_PRE_ROUTING)
45 return IP6_DEFRAG_CONNTRACK_IN + zone;
46 else
47 return IP6_DEFRAG_CONNTRACK_OUT + zone;
48
49}
50
51static unsigned int ipv6_defrag(unsigned int hooknum,
52 struct sk_buff *skb,
53 const struct net_device *in,
54 const struct net_device *out,
55 int (*okfn)(struct sk_buff *))
56{
57 struct sk_buff *reasm;
58
59 /* Previously seen (loopback)? */
60 if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
61 return NF_ACCEPT;
62
63 reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
64 /* queued */
65 if (reasm == NULL)
66 return NF_STOLEN;
67
68 /* error occured or not fragmented */
69 if (reasm == skb)
70 return NF_ACCEPT;
71
72 nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
73 (struct net_device *)out, okfn);
74
75 return NF_STOLEN;
76}
77
78static struct nf_hook_ops ipv6_defrag_ops[] = {
79 {
80 .hook = ipv6_defrag,
81 .owner = THIS_MODULE,
82 .pf = NFPROTO_IPV6,
83 .hooknum = NF_INET_PRE_ROUTING,
84 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
85 },
86 {
87 .hook = ipv6_defrag,
88 .owner = THIS_MODULE,
89 .pf = NFPROTO_IPV6,
90 .hooknum = NF_INET_LOCAL_OUT,
91 .priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
92 },
93};
94
95static int __init nf_defrag_init(void)
96{
97 int ret = 0;
98
99 ret = nf_ct_frag6_init();
100 if (ret < 0) {
101 pr_err("nf_defrag_ipv6: can't initialize frag6.\n");
102 return ret;
103 }
104 ret = nf_register_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
105 if (ret < 0) {
106 pr_err("nf_defrag_ipv6: can't register hooks\n");
107 goto cleanup_frag6;
108 }
109 return ret;
110
111cleanup_frag6:
112 nf_ct_frag6_cleanup();
113 return ret;
114
115}
116
117static void __exit nf_defrag_fini(void)
118{
119 nf_unregister_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops));
120 nf_ct_frag6_cleanup();
121}
122
123void nf_defrag_ipv6_enable(void)
124{
125}
126EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable);
127
128module_init(nf_defrag_init);
129module_exit(nf_defrag_fini);
130
131MODULE_LICENSE("GPL");
diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c
index 1fa3468f0f32..9bb936ae2452 100644
--- a/net/ipv6/protocol.c
+++ b/net/ipv6/protocol.c
@@ -25,28 +25,14 @@
25#include <linux/spinlock.h> 25#include <linux/spinlock.h>
26#include <net/protocol.h> 26#include <net/protocol.h>
27 27
28const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS]; 28const struct inet6_protocol *inet6_protos[MAX_INET_PROTOS] __read_mostly;
29static DEFINE_SPINLOCK(inet6_proto_lock);
30
31 29
32int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol) 30int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol)
33{ 31{
34 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 32 int hash = protocol & (MAX_INET_PROTOS - 1);
35
36 spin_lock_bh(&inet6_proto_lock);
37
38 if (inet6_protos[hash]) {
39 ret = -1;
40 } else {
41 inet6_protos[hash] = prot;
42 ret = 0;
43 }
44
45 spin_unlock_bh(&inet6_proto_lock);
46 33
47 return ret; 34 return !cmpxchg(&inet6_protos[hash], NULL, prot) ? 0 : -1;
48} 35}
49
50EXPORT_SYMBOL(inet6_add_protocol); 36EXPORT_SYMBOL(inet6_add_protocol);
51 37
52/* 38/*
@@ -57,20 +43,10 @@ int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol
57{ 43{
58 int ret, hash = protocol & (MAX_INET_PROTOS - 1); 44 int ret, hash = protocol & (MAX_INET_PROTOS - 1);
59 45
60 spin_lock_bh(&inet6_proto_lock); 46 ret = (cmpxchg(&inet6_protos[hash], prot, NULL) == prot) ? 0 : -1;
61
62 if (inet6_protos[hash] != prot) {
63 ret = -1;
64 } else {
65 inet6_protos[hash] = NULL;
66 ret = 0;
67 }
68
69 spin_unlock_bh(&inet6_proto_lock);
70 47
71 synchronize_net(); 48 synchronize_net();
72 49
73 return ret; 50 return ret;
74} 51}
75
76EXPORT_SYMBOL(inet6_del_protocol); 52EXPORT_SYMBOL(inet6_del_protocol);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index e677937a07fc..45e6efb7f171 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -764,7 +764,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
764 return -EINVAL; 764 return -EINVAL;
765 765
766 if (sin6->sin6_family && sin6->sin6_family != AF_INET6) 766 if (sin6->sin6_family && sin6->sin6_family != AF_INET6)
767 return(-EAFNOSUPPORT); 767 return -EAFNOSUPPORT;
768 768
769 /* port is the proto value [0..255] carried in nexthdr */ 769 /* port is the proto value [0..255] carried in nexthdr */
770 proto = ntohs(sin6->sin6_port); 770 proto = ntohs(sin6->sin6_port);
@@ -772,10 +772,10 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
772 if (!proto) 772 if (!proto)
773 proto = inet->inet_num; 773 proto = inet->inet_num;
774 else if (proto != inet->inet_num) 774 else if (proto != inet->inet_num)
775 return(-EINVAL); 775 return -EINVAL;
776 776
777 if (proto > 255) 777 if (proto > 255)
778 return(-EINVAL); 778 return -EINVAL;
779 779
780 daddr = &sin6->sin6_addr; 780 daddr = &sin6->sin6_addr;
781 if (np->sndflow) { 781 if (np->sndflow) {
@@ -985,7 +985,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
985 /* You may get strange result with a positive odd offset; 985 /* You may get strange result with a positive odd offset;
986 RFC2292bis agrees with me. */ 986 RFC2292bis agrees with me. */
987 if (val > 0 && (val&1)) 987 if (val > 0 && (val&1))
988 return(-EINVAL); 988 return -EINVAL;
989 if (val < 0) { 989 if (val < 0) {
990 rp->checksum = 0; 990 rp->checksum = 0;
991 } else { 991 } else {
@@ -997,7 +997,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname,
997 break; 997 break;
998 998
999 default: 999 default:
1000 return(-ENOPROTOOPT); 1000 return -ENOPROTOOPT;
1001 } 1001 }
1002} 1002}
1003 1003
@@ -1190,7 +1190,7 @@ static int rawv6_init_sk(struct sock *sk)
1190 default: 1190 default:
1191 break; 1191 break;
1192 } 1192 }
1193 return(0); 1193 return 0;
1194} 1194}
1195 1195
1196struct proto rawv6_prot = { 1196struct proto rawv6_prot = {
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 64cfef1b0a4c..c7ba3149633f 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -458,7 +458,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
458 /* If the first fragment is fragmented itself, we split 458 /* If the first fragment is fragmented itself, we split
459 * it to two chunks: the first with data and paged part 459 * it to two chunks: the first with data and paged part
460 * and the second, holding only fragments. */ 460 * and the second, holding only fragments. */
461 if (skb_has_frags(head)) { 461 if (skb_has_frag_list(head)) {
462 struct sk_buff *clone; 462 struct sk_buff *clone;
463 int i, plen = 0; 463 int i, plen = 0;
464 464
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a275c6e1e25c..25661f968f3f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -109,7 +109,6 @@ static struct dst_ops ip6_dst_ops_template = {
109 .link_failure = ip6_link_failure, 109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu, 110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = __ip6_local_out, 111 .local_out = __ip6_local_out,
112 .entries = ATOMIC_INIT(0),
113}; 112};
114 113
115static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) 114static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = {
122 .destroy = ip6_dst_destroy, 121 .destroy = ip6_dst_destroy,
123 .check = ip6_dst_check, 122 .check = ip6_dst_check,
124 .update_pmtu = ip6_rt_blackhole_update_pmtu, 123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
125 .entries = ATOMIC_INIT(0),
126}; 124};
127 125
128static struct rt6_info ip6_null_entry_template = { 126static struct rt6_info ip6_null_entry_template = {
@@ -217,14 +215,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
217 215
218static __inline__ int rt6_check_expired(const struct rt6_info *rt) 216static __inline__ int rt6_check_expired(const struct rt6_info *rt)
219{ 217{
220 return (rt->rt6i_flags & RTF_EXPIRES && 218 return (rt->rt6i_flags & RTF_EXPIRES) &&
221 time_after(jiffies, rt->rt6i_expires)); 219 time_after(jiffies, rt->rt6i_expires);
222} 220}
223 221
224static inline int rt6_need_strict(struct in6_addr *daddr) 222static inline int rt6_need_strict(struct in6_addr *daddr)
225{ 223{
226 return (ipv6_addr_type(daddr) & 224 return ipv6_addr_type(daddr) &
227 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); 225 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
228} 226}
229 227
230/* 228/*
@@ -440,7 +438,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
440 __func__, match); 438 __func__, match);
441 439
442 net = dev_net(rt0->rt6i_dev); 440 net = dev_net(rt0->rt6i_dev);
443 return (match ? match : net->ipv6.ip6_null_entry); 441 return match ? match : net->ipv6.ip6_null_entry;
444} 442}
445 443
446#ifdef CONFIG_IPV6_ROUTE_INFO 444#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -859,7 +857,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
859 857
860 dst_release(*dstp); 858 dst_release(*dstp);
861 *dstp = new; 859 *dstp = new;
862 return (new ? 0 : -ENOMEM); 860 return new ? 0 : -ENOMEM;
863} 861}
864EXPORT_SYMBOL_GPL(ip6_dst_blackhole); 862EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
865 863
@@ -1058,19 +1056,22 @@ static int ip6_dst_gc(struct dst_ops *ops)
1058 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; 1056 int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
1059 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; 1057 int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
1060 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; 1058 unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
1059 int entries;
1061 1060
1061 entries = dst_entries_get_fast(ops);
1062 if (time_after(rt_last_gc + rt_min_interval, now) && 1062 if (time_after(rt_last_gc + rt_min_interval, now) &&
1063 atomic_read(&ops->entries) <= rt_max_size) 1063 entries <= rt_max_size)
1064 goto out; 1064 goto out;
1065 1065
1066 net->ipv6.ip6_rt_gc_expire++; 1066 net->ipv6.ip6_rt_gc_expire++;
1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); 1067 fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
1068 net->ipv6.ip6_rt_last_gc = now; 1068 net->ipv6.ip6_rt_last_gc = now;
1069 if (atomic_read(&ops->entries) < ops->gc_thresh) 1069 entries = dst_entries_get_slow(ops);
1070 if (entries < ops->gc_thresh)
1070 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; 1071 net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1;
1071out: 1072out:
1072 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; 1073 net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity;
1073 return (atomic_read(&ops->entries) > rt_max_size); 1074 return entries > rt_max_size;
1074} 1075}
1075 1076
1076/* Clean host part of a prefix. Not necessary in radix tree, 1077/* Clean host part of a prefix. Not necessary in radix tree,
@@ -1169,6 +1170,8 @@ int ip6_route_add(struct fib6_config *cfg)
1169 1170
1170 if (addr_type & IPV6_ADDR_MULTICAST) 1171 if (addr_type & IPV6_ADDR_MULTICAST)
1171 rt->dst.input = ip6_mc_input; 1172 rt->dst.input = ip6_mc_input;
1173 else if (cfg->fc_flags & RTF_LOCAL)
1174 rt->dst.input = ip6_input;
1172 else 1175 else
1173 rt->dst.input = ip6_forward; 1176 rt->dst.input = ip6_forward;
1174 1177
@@ -1190,7 +1193,8 @@ int ip6_route_add(struct fib6_config *cfg)
1190 they would result in kernel looping; promote them to reject routes 1193 they would result in kernel looping; promote them to reject routes
1191 */ 1194 */
1192 if ((cfg->fc_flags & RTF_REJECT) || 1195 if ((cfg->fc_flags & RTF_REJECT) ||
1193 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { 1196 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK)
1197 && !(cfg->fc_flags&RTF_LOCAL))) {
1194 /* hold loopback dev/idev if we haven't done so. */ 1198 /* hold loopback dev/idev if we haven't done so. */
1195 if (dev != net->loopback_dev) { 1199 if (dev != net->loopback_dev) {
1196 if (dev) { 1200 if (dev) {
@@ -2102,6 +2106,9 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
2102 if (rtm->rtm_type == RTN_UNREACHABLE) 2106 if (rtm->rtm_type == RTN_UNREACHABLE)
2103 cfg->fc_flags |= RTF_REJECT; 2107 cfg->fc_flags |= RTF_REJECT;
2104 2108
2109 if (rtm->rtm_type == RTN_LOCAL)
2110 cfg->fc_flags |= RTF_LOCAL;
2111
2105 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 2112 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
2106 cfg->fc_nlinfo.nlh = nlh; 2113 cfg->fc_nlinfo.nlh = nlh;
2107 cfg->fc_nlinfo.nl_net = sock_net(skb->sk); 2114 cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -2222,6 +2229,8 @@ static int rt6_fill_node(struct net *net,
2222 NLA_PUT_U32(skb, RTA_TABLE, table); 2229 NLA_PUT_U32(skb, RTA_TABLE, table);
2223 if (rt->rt6i_flags&RTF_REJECT) 2230 if (rt->rt6i_flags&RTF_REJECT)
2224 rtm->rtm_type = RTN_UNREACHABLE; 2231 rtm->rtm_type = RTN_UNREACHABLE;
2232 else if (rt->rt6i_flags&RTF_LOCAL)
2233 rtm->rtm_type = RTN_LOCAL;
2225 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) 2234 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2226 rtm->rtm_type = RTN_LOCAL; 2235 rtm->rtm_type = RTN_LOCAL;
2227 else 2236 else
@@ -2516,7 +2525,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2516 net->ipv6.rt6_stats->fib_rt_alloc, 2525 net->ipv6.rt6_stats->fib_rt_alloc,
2517 net->ipv6.rt6_stats->fib_rt_entries, 2526 net->ipv6.rt6_stats->fib_rt_entries,
2518 net->ipv6.rt6_stats->fib_rt_cache, 2527 net->ipv6.rt6_stats->fib_rt_cache,
2519 atomic_read(&net->ipv6.ip6_dst_ops.entries), 2528 dst_entries_get_slow(&net->ipv6.ip6_dst_ops),
2520 net->ipv6.rt6_stats->fib_discarded_routes); 2529 net->ipv6.rt6_stats->fib_discarded_routes);
2521 2530
2522 return 0; 2531 return 0;
@@ -2658,11 +2667,14 @@ static int __net_init ip6_route_net_init(struct net *net)
2658 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, 2667 memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template,
2659 sizeof(net->ipv6.ip6_dst_ops)); 2668 sizeof(net->ipv6.ip6_dst_ops));
2660 2669
2670 if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0)
2671 goto out_ip6_dst_ops;
2672
2661 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, 2673 net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template,
2662 sizeof(*net->ipv6.ip6_null_entry), 2674 sizeof(*net->ipv6.ip6_null_entry),
2663 GFP_KERNEL); 2675 GFP_KERNEL);
2664 if (!net->ipv6.ip6_null_entry) 2676 if (!net->ipv6.ip6_null_entry)
2665 goto out_ip6_dst_ops; 2677 goto out_ip6_dst_entries;
2666 net->ipv6.ip6_null_entry->dst.path = 2678 net->ipv6.ip6_null_entry->dst.path =
2667 (struct dst_entry *)net->ipv6.ip6_null_entry; 2679 (struct dst_entry *)net->ipv6.ip6_null_entry;
2668 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; 2680 net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
@@ -2712,6 +2724,8 @@ out_ip6_prohibit_entry:
2712out_ip6_null_entry: 2724out_ip6_null_entry:
2713 kfree(net->ipv6.ip6_null_entry); 2725 kfree(net->ipv6.ip6_null_entry);
2714#endif 2726#endif
2727out_ip6_dst_entries:
2728 dst_entries_destroy(&net->ipv6.ip6_dst_ops);
2715out_ip6_dst_ops: 2729out_ip6_dst_ops:
2716 goto out; 2730 goto out;
2717} 2731}
@@ -2750,10 +2764,14 @@ int __init ip6_route_init(void)
2750 if (!ip6_dst_ops_template.kmem_cachep) 2764 if (!ip6_dst_ops_template.kmem_cachep)
2751 goto out; 2765 goto out;
2752 2766
2753 ret = register_pernet_subsys(&ip6_route_net_ops); 2767 ret = dst_entries_init(&ip6_dst_blackhole_ops);
2754 if (ret) 2768 if (ret)
2755 goto out_kmem_cache; 2769 goto out_kmem_cache;
2756 2770
2771 ret = register_pernet_subsys(&ip6_route_net_ops);
2772 if (ret)
2773 goto out_dst_entries;
2774
2757 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; 2775 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep;
2758 2776
2759 /* Registering of the loopback is done before this portion of code, 2777 /* Registering of the loopback is done before this portion of code,
@@ -2800,6 +2818,8 @@ out_fib6_init:
2800 fib6_gc_cleanup(); 2818 fib6_gc_cleanup();
2801out_register_subsys: 2819out_register_subsys:
2802 unregister_pernet_subsys(&ip6_route_net_ops); 2820 unregister_pernet_subsys(&ip6_route_net_ops);
2821out_dst_entries:
2822 dst_entries_destroy(&ip6_dst_blackhole_ops);
2803out_kmem_cache: 2823out_kmem_cache:
2804 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); 2824 kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep);
2805 goto out; 2825 goto out;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 4699cd3c3118..367a6cc584cc 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -63,36 +63,63 @@
63#define HASH_SIZE 16 63#define HASH_SIZE 16
64#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) 64#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
65 65
66static void ipip6_tunnel_init(struct net_device *dev); 66static int ipip6_tunnel_init(struct net_device *dev);
67static void ipip6_tunnel_setup(struct net_device *dev); 67static void ipip6_tunnel_setup(struct net_device *dev);
68static void ipip6_dev_free(struct net_device *dev);
68 69
69static int sit_net_id __read_mostly; 70static int sit_net_id __read_mostly;
70struct sit_net { 71struct sit_net {
71 struct ip_tunnel *tunnels_r_l[HASH_SIZE]; 72 struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
72 struct ip_tunnel *tunnels_r[HASH_SIZE]; 73 struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
73 struct ip_tunnel *tunnels_l[HASH_SIZE]; 74 struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
74 struct ip_tunnel *tunnels_wc[1]; 75 struct ip_tunnel __rcu *tunnels_wc[1];
75 struct ip_tunnel **tunnels[4]; 76 struct ip_tunnel __rcu **tunnels[4];
76 77
77 struct net_device *fb_tunnel_dev; 78 struct net_device *fb_tunnel_dev;
78}; 79};
79 80
80/* 81/*
81 * Locking : hash tables are protected by RCU and a spinlock 82 * Locking : hash tables are protected by RCU and RTNL
82 */ 83 */
83static DEFINE_SPINLOCK(ipip6_lock);
84 84
85#define for_each_ip_tunnel_rcu(start) \ 85#define for_each_ip_tunnel_rcu(start) \
86 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) 86 for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
87 87
88/* often modified stats are per cpu, other are shared (netdev->stats) */
89struct pcpu_tstats {
90 unsigned long rx_packets;
91 unsigned long rx_bytes;
92 unsigned long tx_packets;
93 unsigned long tx_bytes;
94};
95
96static struct net_device_stats *ipip6_get_stats(struct net_device *dev)
97{
98 struct pcpu_tstats sum = { 0 };
99 int i;
100
101 for_each_possible_cpu(i) {
102 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
103
104 sum.rx_packets += tstats->rx_packets;
105 sum.rx_bytes += tstats->rx_bytes;
106 sum.tx_packets += tstats->tx_packets;
107 sum.tx_bytes += tstats->tx_bytes;
108 }
109 dev->stats.rx_packets = sum.rx_packets;
110 dev->stats.rx_bytes = sum.rx_bytes;
111 dev->stats.tx_packets = sum.tx_packets;
112 dev->stats.tx_bytes = sum.tx_bytes;
113 return &dev->stats;
114}
88/* 115/*
89 * Must be invoked with rcu_read_lock 116 * Must be invoked with rcu_read_lock
90 */ 117 */
91static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net, 118static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
92 struct net_device *dev, __be32 remote, __be32 local) 119 struct net_device *dev, __be32 remote, __be32 local)
93{ 120{
94 unsigned h0 = HASH(remote); 121 unsigned int h0 = HASH(remote);
95 unsigned h1 = HASH(local); 122 unsigned int h1 = HASH(local);
96 struct ip_tunnel *t; 123 struct ip_tunnel *t;
97 struct sit_net *sitn = net_generic(net, sit_net_id); 124 struct sit_net *sitn = net_generic(net, sit_net_id);
98 125
@@ -121,12 +148,12 @@ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
121 return NULL; 148 return NULL;
122} 149}
123 150
124static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn, 151static struct ip_tunnel __rcu **__ipip6_bucket(struct sit_net *sitn,
125 struct ip_tunnel_parm *parms) 152 struct ip_tunnel_parm *parms)
126{ 153{
127 __be32 remote = parms->iph.daddr; 154 __be32 remote = parms->iph.daddr;
128 __be32 local = parms->iph.saddr; 155 __be32 local = parms->iph.saddr;
129 unsigned h = 0; 156 unsigned int h = 0;
130 int prio = 0; 157 int prio = 0;
131 158
132 if (remote) { 159 if (remote) {
@@ -140,7 +167,7 @@ static struct ip_tunnel **__ipip6_bucket(struct sit_net *sitn,
140 return &sitn->tunnels[prio][h]; 167 return &sitn->tunnels[prio][h];
141} 168}
142 169
143static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn, 170static inline struct ip_tunnel __rcu **ipip6_bucket(struct sit_net *sitn,
144 struct ip_tunnel *t) 171 struct ip_tunnel *t)
145{ 172{
146 return __ipip6_bucket(sitn, &t->parms); 173 return __ipip6_bucket(sitn, &t->parms);
@@ -148,13 +175,14 @@ static inline struct ip_tunnel **ipip6_bucket(struct sit_net *sitn,
148 175
149static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t) 176static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
150{ 177{
151 struct ip_tunnel **tp; 178 struct ip_tunnel __rcu **tp;
152 179 struct ip_tunnel *iter;
153 for (tp = ipip6_bucket(sitn, t); *tp; tp = &(*tp)->next) { 180
154 if (t == *tp) { 181 for (tp = ipip6_bucket(sitn, t);
155 spin_lock_bh(&ipip6_lock); 182 (iter = rtnl_dereference(*tp)) != NULL;
156 *tp = t->next; 183 tp = &iter->next) {
157 spin_unlock_bh(&ipip6_lock); 184 if (t == iter) {
185 rcu_assign_pointer(*tp, t->next);
158 break; 186 break;
159 } 187 }
160 } 188 }
@@ -162,12 +190,10 @@ static void ipip6_tunnel_unlink(struct sit_net *sitn, struct ip_tunnel *t)
162 190
163static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t) 191static void ipip6_tunnel_link(struct sit_net *sitn, struct ip_tunnel *t)
164{ 192{
165 struct ip_tunnel **tp = ipip6_bucket(sitn, t); 193 struct ip_tunnel __rcu **tp = ipip6_bucket(sitn, t);
166 194
167 spin_lock_bh(&ipip6_lock); 195 rcu_assign_pointer(t->next, rtnl_dereference(*tp));
168 t->next = *tp;
169 rcu_assign_pointer(*tp, t); 196 rcu_assign_pointer(*tp, t);
170 spin_unlock_bh(&ipip6_lock);
171} 197}
172 198
173static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) 199static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
@@ -187,17 +213,20 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
187#endif 213#endif
188} 214}
189 215
190static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, 216static struct ip_tunnel *ipip6_tunnel_locate(struct net *net,
191 struct ip_tunnel_parm *parms, int create) 217 struct ip_tunnel_parm *parms, int create)
192{ 218{
193 __be32 remote = parms->iph.daddr; 219 __be32 remote = parms->iph.daddr;
194 __be32 local = parms->iph.saddr; 220 __be32 local = parms->iph.saddr;
195 struct ip_tunnel *t, **tp, *nt; 221 struct ip_tunnel *t, *nt;
222 struct ip_tunnel __rcu **tp;
196 struct net_device *dev; 223 struct net_device *dev;
197 char name[IFNAMSIZ]; 224 char name[IFNAMSIZ];
198 struct sit_net *sitn = net_generic(net, sit_net_id); 225 struct sit_net *sitn = net_generic(net, sit_net_id);
199 226
200 for (tp = __ipip6_bucket(sitn, parms); (t = *tp) != NULL; tp = &t->next) { 227 for (tp = __ipip6_bucket(sitn, parms);
228 (t = rtnl_dereference(*tp)) != NULL;
229 tp = &t->next) {
201 if (local == t->parms.iph.saddr && 230 if (local == t->parms.iph.saddr &&
202 remote == t->parms.iph.daddr && 231 remote == t->parms.iph.daddr &&
203 parms->link == t->parms.link) { 232 parms->link == t->parms.link) {
@@ -213,7 +242,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
213 if (parms->name[0]) 242 if (parms->name[0])
214 strlcpy(name, parms->name, IFNAMSIZ); 243 strlcpy(name, parms->name, IFNAMSIZ);
215 else 244 else
216 sprintf(name, "sit%%d"); 245 strcpy(name, "sit%d");
217 246
218 dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup); 247 dev = alloc_netdev(sizeof(*t), name, ipip6_tunnel_setup);
219 if (dev == NULL) 248 if (dev == NULL)
@@ -229,7 +258,8 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
229 nt = netdev_priv(dev); 258 nt = netdev_priv(dev);
230 259
231 nt->parms = *parms; 260 nt->parms = *parms;
232 ipip6_tunnel_init(dev); 261 if (ipip6_tunnel_init(dev) < 0)
262 goto failed_free;
233 ipip6_tunnel_clone_6rd(dev, sitn); 263 ipip6_tunnel_clone_6rd(dev, sitn);
234 264
235 if (parms->i_flags & SIT_ISATAP) 265 if (parms->i_flags & SIT_ISATAP)
@@ -244,7 +274,7 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net,
244 return nt; 274 return nt;
245 275
246failed_free: 276failed_free:
247 free_netdev(dev); 277 ipip6_dev_free(dev);
248failed: 278failed:
249 return NULL; 279 return NULL;
250} 280}
@@ -340,7 +370,7 @@ ipip6_tunnel_add_prl(struct ip_tunnel *t, struct ip_tunnel_prl *a, int chg)
340 370
341 ASSERT_RTNL(); 371 ASSERT_RTNL();
342 372
343 for (p = t->prl; p; p = p->next) { 373 for (p = rtnl_dereference(t->prl); p; p = rtnl_dereference(p->next)) {
344 if (p->addr == a->addr) { 374 if (p->addr == a->addr) {
345 if (chg) { 375 if (chg) {
346 p->flags = a->flags; 376 p->flags = a->flags;
@@ -451,15 +481,12 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
451 struct sit_net *sitn = net_generic(net, sit_net_id); 481 struct sit_net *sitn = net_generic(net, sit_net_id);
452 482
453 if (dev == sitn->fb_tunnel_dev) { 483 if (dev == sitn->fb_tunnel_dev) {
454 spin_lock_bh(&ipip6_lock); 484 rcu_assign_pointer(sitn->tunnels_wc[0], NULL);
455 sitn->tunnels_wc[0] = NULL;
456 spin_unlock_bh(&ipip6_lock);
457 dev_put(dev);
458 } else { 485 } else {
459 ipip6_tunnel_unlink(sitn, netdev_priv(dev)); 486 ipip6_tunnel_unlink(sitn, netdev_priv(dev));
460 ipip6_tunnel_del_prl(netdev_priv(dev), NULL); 487 ipip6_tunnel_del_prl(netdev_priv(dev), NULL);
461 dev_put(dev);
462 } 488 }
489 dev_put(dev);
463} 490}
464 491
465 492
@@ -548,6 +575,8 @@ static int ipip6_rcv(struct sk_buff *skb)
548 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, 575 tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,
549 iph->saddr, iph->daddr); 576 iph->saddr, iph->daddr);
550 if (tunnel != NULL) { 577 if (tunnel != NULL) {
578 struct pcpu_tstats *tstats;
579
551 secpath_reset(skb); 580 secpath_reset(skb);
552 skb->mac_header = skb->network_header; 581 skb->mac_header = skb->network_header;
553 skb_reset_network_header(skb); 582 skb_reset_network_header(skb);
@@ -563,10 +592,16 @@ static int ipip6_rcv(struct sk_buff *skb)
563 return 0; 592 return 0;
564 } 593 }
565 594
566 skb_tunnel_rx(skb, tunnel->dev); 595 tstats = this_cpu_ptr(tunnel->dev->tstats);
596 tstats->rx_packets++;
597 tstats->rx_bytes += skb->len;
598
599 __skb_tunnel_rx(skb, tunnel->dev);
567 600
568 ipip6_ecn_decapsulate(iph, skb); 601 ipip6_ecn_decapsulate(iph, skb);
602
569 netif_rx(skb); 603 netif_rx(skb);
604
570 rcu_read_unlock(); 605 rcu_read_unlock();
571 return 0; 606 return 0;
572 } 607 }
@@ -590,7 +625,7 @@ __be32 try_6rd(struct in6_addr *v6dst, struct ip_tunnel *tunnel)
590#ifdef CONFIG_IPV6_SIT_6RD 625#ifdef CONFIG_IPV6_SIT_6RD
591 if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix, 626 if (ipv6_prefix_equal(v6dst, &tunnel->ip6rd.prefix,
592 tunnel->ip6rd.prefixlen)) { 627 tunnel->ip6rd.prefixlen)) {
593 unsigned pbw0, pbi0; 628 unsigned int pbw0, pbi0;
594 int pbi1; 629 int pbi1;
595 u32 d; 630 u32 d;
596 631
@@ -625,14 +660,13 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
625 struct net_device *dev) 660 struct net_device *dev)
626{ 661{
627 struct ip_tunnel *tunnel = netdev_priv(dev); 662 struct ip_tunnel *tunnel = netdev_priv(dev);
628 struct net_device_stats *stats = &dev->stats; 663 struct pcpu_tstats *tstats;
629 struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
630 struct iphdr *tiph = &tunnel->parms.iph; 664 struct iphdr *tiph = &tunnel->parms.iph;
631 struct ipv6hdr *iph6 = ipv6_hdr(skb); 665 struct ipv6hdr *iph6 = ipv6_hdr(skb);
632 u8 tos = tunnel->parms.iph.tos; 666 u8 tos = tunnel->parms.iph.tos;
633 __be16 df = tiph->frag_off; 667 __be16 df = tiph->frag_off;
634 struct rtable *rt; /* Route to the other host */ 668 struct rtable *rt; /* Route to the other host */
635 struct net_device *tdev; /* Device to other host */ 669 struct net_device *tdev; /* Device to other host */
636 struct iphdr *iph; /* Our new IP header */ 670 struct iphdr *iph; /* Our new IP header */
637 unsigned int max_headroom; /* The extra header space needed */ 671 unsigned int max_headroom; /* The extra header space needed */
638 __be32 dst = tiph->daddr; 672 __be32 dst = tiph->daddr;
@@ -703,20 +737,20 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
703 .oif = tunnel->parms.link, 737 .oif = tunnel->parms.link,
704 .proto = IPPROTO_IPV6 }; 738 .proto = IPPROTO_IPV6 };
705 if (ip_route_output_key(dev_net(dev), &rt, &fl)) { 739 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
706 stats->tx_carrier_errors++; 740 dev->stats.tx_carrier_errors++;
707 goto tx_error_icmp; 741 goto tx_error_icmp;
708 } 742 }
709 } 743 }
710 if (rt->rt_type != RTN_UNICAST) { 744 if (rt->rt_type != RTN_UNICAST) {
711 ip_rt_put(rt); 745 ip_rt_put(rt);
712 stats->tx_carrier_errors++; 746 dev->stats.tx_carrier_errors++;
713 goto tx_error_icmp; 747 goto tx_error_icmp;
714 } 748 }
715 tdev = rt->dst.dev; 749 tdev = rt->dst.dev;
716 750
717 if (tdev == dev) { 751 if (tdev == dev) {
718 ip_rt_put(rt); 752 ip_rt_put(rt);
719 stats->collisions++; 753 dev->stats.collisions++;
720 goto tx_error; 754 goto tx_error;
721 } 755 }
722 756
@@ -724,7 +758,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
724 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 758 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
725 759
726 if (mtu < 68) { 760 if (mtu < 68) {
727 stats->collisions++; 761 dev->stats.collisions++;
728 ip_rt_put(rt); 762 ip_rt_put(rt);
729 goto tx_error; 763 goto tx_error;
730 } 764 }
@@ -763,7 +797,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
763 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); 797 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
764 if (!new_skb) { 798 if (!new_skb) {
765 ip_rt_put(rt); 799 ip_rt_put(rt);
766 txq->tx_dropped++; 800 dev->stats.tx_dropped++;
767 dev_kfree_skb(skb); 801 dev_kfree_skb(skb);
768 return NETDEV_TX_OK; 802 return NETDEV_TX_OK;
769 } 803 }
@@ -799,14 +833,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
799 iph->ttl = iph6->hop_limit; 833 iph->ttl = iph6->hop_limit;
800 834
801 nf_reset(skb); 835 nf_reset(skb);
802 836 tstats = this_cpu_ptr(dev->tstats);
803 IPTUNNEL_XMIT(); 837 __IPTUNNEL_XMIT(tstats, &dev->stats);
804 return NETDEV_TX_OK; 838 return NETDEV_TX_OK;
805 839
806tx_error_icmp: 840tx_error_icmp:
807 dst_link_failure(skb); 841 dst_link_failure(skb);
808tx_error: 842tx_error:
809 stats->tx_errors++; 843 dev->stats.tx_errors++;
810 dev_kfree_skb(skb); 844 dev_kfree_skb(skb);
811 return NETDEV_TX_OK; 845 return NETDEV_TX_OK;
812} 846}
@@ -1083,12 +1117,19 @@ static const struct net_device_ops ipip6_netdev_ops = {
1083 .ndo_start_xmit = ipip6_tunnel_xmit, 1117 .ndo_start_xmit = ipip6_tunnel_xmit,
1084 .ndo_do_ioctl = ipip6_tunnel_ioctl, 1118 .ndo_do_ioctl = ipip6_tunnel_ioctl,
1085 .ndo_change_mtu = ipip6_tunnel_change_mtu, 1119 .ndo_change_mtu = ipip6_tunnel_change_mtu,
1120 .ndo_get_stats = ipip6_get_stats,
1086}; 1121};
1087 1122
1123static void ipip6_dev_free(struct net_device *dev)
1124{
1125 free_percpu(dev->tstats);
1126 free_netdev(dev);
1127}
1128
1088static void ipip6_tunnel_setup(struct net_device *dev) 1129static void ipip6_tunnel_setup(struct net_device *dev)
1089{ 1130{
1090 dev->netdev_ops = &ipip6_netdev_ops; 1131 dev->netdev_ops = &ipip6_netdev_ops;
1091 dev->destructor = free_netdev; 1132 dev->destructor = ipip6_dev_free;
1092 1133
1093 dev->type = ARPHRD_SIT; 1134 dev->type = ARPHRD_SIT;
1094 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr); 1135 dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
@@ -1098,9 +1139,10 @@ static void ipip6_tunnel_setup(struct net_device *dev)
1098 dev->iflink = 0; 1139 dev->iflink = 0;
1099 dev->addr_len = 4; 1140 dev->addr_len = 4;
1100 dev->features |= NETIF_F_NETNS_LOCAL; 1141 dev->features |= NETIF_F_NETNS_LOCAL;
1142 dev->features |= NETIF_F_LLTX;
1101} 1143}
1102 1144
1103static void ipip6_tunnel_init(struct net_device *dev) 1145static int ipip6_tunnel_init(struct net_device *dev)
1104{ 1146{
1105 struct ip_tunnel *tunnel = netdev_priv(dev); 1147 struct ip_tunnel *tunnel = netdev_priv(dev);
1106 1148
@@ -1111,9 +1153,14 @@ static void ipip6_tunnel_init(struct net_device *dev)
1111 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); 1153 memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1112 1154
1113 ipip6_tunnel_bind_dev(dev); 1155 ipip6_tunnel_bind_dev(dev);
1156 dev->tstats = alloc_percpu(struct pcpu_tstats);
1157 if (!dev->tstats)
1158 return -ENOMEM;
1159
1160 return 0;
1114} 1161}
1115 1162
1116static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) 1163static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1117{ 1164{
1118 struct ip_tunnel *tunnel = netdev_priv(dev); 1165 struct ip_tunnel *tunnel = netdev_priv(dev);
1119 struct iphdr *iph = &tunnel->parms.iph; 1166 struct iphdr *iph = &tunnel->parms.iph;
@@ -1128,11 +1175,15 @@ static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
1128 iph->ihl = 5; 1175 iph->ihl = 5;
1129 iph->ttl = 64; 1176 iph->ttl = 64;
1130 1177
1178 dev->tstats = alloc_percpu(struct pcpu_tstats);
1179 if (!dev->tstats)
1180 return -ENOMEM;
1131 dev_hold(dev); 1181 dev_hold(dev);
1132 sitn->tunnels_wc[0] = tunnel; 1182 sitn->tunnels_wc[0] = tunnel;
1183 return 0;
1133} 1184}
1134 1185
1135static struct xfrm_tunnel sit_handler = { 1186static struct xfrm_tunnel sit_handler __read_mostly = {
1136 .handler = ipip6_rcv, 1187 .handler = ipip6_rcv,
1137 .err_handler = ipip6_err, 1188 .err_handler = ipip6_err,
1138 .priority = 1, 1189 .priority = 1,
@@ -1173,7 +1224,10 @@ static int __net_init sit_init_net(struct net *net)
1173 } 1224 }
1174 dev_net_set(sitn->fb_tunnel_dev, net); 1225 dev_net_set(sitn->fb_tunnel_dev, net);
1175 1226
1176 ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); 1227 err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
1228 if (err)
1229 goto err_dev_free;
1230
1177 ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); 1231 ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
1178 1232
1179 if ((err = register_netdev(sitn->fb_tunnel_dev))) 1233 if ((err = register_netdev(sitn->fb_tunnel_dev)))
@@ -1183,7 +1237,8 @@ static int __net_init sit_init_net(struct net *net)
1183 1237
1184err_reg_dev: 1238err_reg_dev:
1185 dev_put(sitn->fb_tunnel_dev); 1239 dev_put(sitn->fb_tunnel_dev);
1186 free_netdev(sitn->fb_tunnel_dev); 1240err_dev_free:
1241 ipip6_dev_free(sitn->fb_tunnel_dev);
1187err_alloc_dev: 1242err_alloc_dev:
1188 return err; 1243 return err;
1189} 1244}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index fe6d40418c0b..7e41e2cbb85e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -139,7 +139,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
139 return -EINVAL; 139 return -EINVAL;
140 140
141 if (usin->sin6_family != AF_INET6) 141 if (usin->sin6_family != AF_INET6)
142 return(-EAFNOSUPPORT); 142 return -EAFNOSUPPORT;
143 143
144 memset(&fl, 0, sizeof(fl)); 144 memset(&fl, 0, sizeof(fl));
145 145
@@ -1409,7 +1409,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1409 1409
1410 newsk = tcp_create_openreq_child(sk, req, skb); 1410 newsk = tcp_create_openreq_child(sk, req, skb);
1411 if (newsk == NULL) 1411 if (newsk == NULL)
1412 goto out; 1412 goto out_nonewsk;
1413 1413
1414 /* 1414 /*
1415 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1415 * No need to charge this sock to the relevant IPv6 refcnt debug socks
@@ -1497,18 +1497,22 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1497 } 1497 }
1498#endif 1498#endif
1499 1499
1500 if (__inet_inherit_port(sk, newsk) < 0) {
1501 sock_put(newsk);
1502 goto out;
1503 }
1500 __inet6_hash(newsk, NULL); 1504 __inet6_hash(newsk, NULL);
1501 __inet_inherit_port(sk, newsk);
1502 1505
1503 return newsk; 1506 return newsk;
1504 1507
1505out_overflow: 1508out_overflow:
1506 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1509 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1507out: 1510out_nonewsk:
1508 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1509 if (opt && opt != np->opt) 1511 if (opt && opt != np->opt)
1510 sock_kfree_s(sk, opt, opt->tot_len); 1512 sock_kfree_s(sk, opt, opt->tot_len);
1511 dst_release(dst); 1513 dst_release(dst);
1514out:
1515 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1512 return NULL; 1516 return NULL;
1513} 1517}
1514 1518
diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index fc3c86a47452..d9864725d0c6 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -30,8 +30,8 @@
30#include <net/protocol.h> 30#include <net/protocol.h>
31#include <net/xfrm.h> 31#include <net/xfrm.h>
32 32
33static struct xfrm6_tunnel *tunnel6_handlers; 33static struct xfrm6_tunnel *tunnel6_handlers __read_mostly;
34static struct xfrm6_tunnel *tunnel46_handlers; 34static struct xfrm6_tunnel *tunnel46_handlers __read_mostly;
35static DEFINE_MUTEX(tunnel6_mutex); 35static DEFINE_MUTEX(tunnel6_mutex);
36 36
37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family) 37int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
@@ -51,7 +51,7 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
51 } 51 }
52 52
53 handler->next = *pprev; 53 handler->next = *pprev;
54 *pprev = handler; 54 rcu_assign_pointer(*pprev, handler);
55 55
56 ret = 0; 56 ret = 0;
57 57
@@ -88,6 +88,11 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
88 88
89EXPORT_SYMBOL(xfrm6_tunnel_deregister); 89EXPORT_SYMBOL(xfrm6_tunnel_deregister);
90 90
91#define for_each_tunnel_rcu(head, handler) \
92 for (handler = rcu_dereference(head); \
93 handler != NULL; \
94 handler = rcu_dereference(handler->next)) \
95
91static int tunnel6_rcv(struct sk_buff *skb) 96static int tunnel6_rcv(struct sk_buff *skb)
92{ 97{
93 struct xfrm6_tunnel *handler; 98 struct xfrm6_tunnel *handler;
@@ -95,7 +100,7 @@ static int tunnel6_rcv(struct sk_buff *skb)
95 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) 100 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
96 goto drop; 101 goto drop;
97 102
98 for (handler = tunnel6_handlers; handler; handler = handler->next) 103 for_each_tunnel_rcu(tunnel6_handlers, handler)
99 if (!handler->handler(skb)) 104 if (!handler->handler(skb))
100 return 0; 105 return 0;
101 106
@@ -113,7 +118,7 @@ static int tunnel46_rcv(struct sk_buff *skb)
113 if (!pskb_may_pull(skb, sizeof(struct iphdr))) 118 if (!pskb_may_pull(skb, sizeof(struct iphdr)))
114 goto drop; 119 goto drop;
115 120
116 for (handler = tunnel46_handlers; handler; handler = handler->next) 121 for_each_tunnel_rcu(tunnel46_handlers, handler)
117 if (!handler->handler(skb)) 122 if (!handler->handler(skb))
118 return 0; 123 return 0;
119 124
@@ -129,7 +134,7 @@ static void tunnel6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
129{ 134{
130 struct xfrm6_tunnel *handler; 135 struct xfrm6_tunnel *handler;
131 136
132 for (handler = tunnel6_handlers; handler; handler = handler->next) 137 for_each_tunnel_rcu(tunnel6_handlers, handler)
133 if (!handler->err_handler(skb, opt, type, code, offset, info)) 138 if (!handler->err_handler(skb, opt, type, code, offset, info))
134 break; 139 break;
135} 140}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5acb3560ff15..c84dad432114 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -122,8 +122,8 @@ static void udp_v6_rehash(struct sock *sk)
122 122
123static inline int compute_score(struct sock *sk, struct net *net, 123static inline int compute_score(struct sock *sk, struct net *net,
124 unsigned short hnum, 124 unsigned short hnum,
125 struct in6_addr *saddr, __be16 sport, 125 const struct in6_addr *saddr, __be16 sport,
126 struct in6_addr *daddr, __be16 dport, 126 const struct in6_addr *daddr, __be16 dport,
127 int dif) 127 int dif)
128{ 128{
129 int score = -1; 129 int score = -1;
@@ -239,8 +239,8 @@ exact_match:
239} 239}
240 240
241static struct sock *__udp6_lib_lookup(struct net *net, 241static struct sock *__udp6_lib_lookup(struct net *net,
242 struct in6_addr *saddr, __be16 sport, 242 const struct in6_addr *saddr, __be16 sport,
243 struct in6_addr *daddr, __be16 dport, 243 const struct in6_addr *daddr, __be16 dport,
244 int dif, struct udp_table *udptable) 244 int dif, struct udp_table *udptable)
245{ 245{
246 struct sock *sk, *result; 246 struct sock *sk, *result;
@@ -320,6 +320,14 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
320 udptable); 320 udptable);
321} 321}
322 322
323struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
324 const struct in6_addr *daddr, __be16 dport, int dif)
325{
326 return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table);
327}
328EXPORT_SYMBOL_GPL(udp6_lib_lookup);
329
330
323/* 331/*
324 * This should be easy, if there is something there we 332 * This should be easy, if there is something there we
325 * return it, otherwise we block. 333 * return it, otherwise we block.
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 6baeabbbca82..7e74023ea6e4 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -199,7 +199,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops)
199 struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); 199 struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
200 200
201 xfrm6_policy_afinfo.garbage_collect(net); 201 xfrm6_policy_afinfo.garbage_collect(net);
202 return (atomic_read(&ops->entries) > ops->gc_thresh * 2); 202 return dst_entries_get_fast(ops) > ops->gc_thresh * 2;
203} 203}
204 204
205static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) 205static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -255,7 +255,6 @@ static struct dst_ops xfrm6_dst_ops = {
255 .ifdown = xfrm6_dst_ifdown, 255 .ifdown = xfrm6_dst_ifdown,
256 .local_out = __ip6_local_out, 256 .local_out = __ip6_local_out,
257 .gc_thresh = 1024, 257 .gc_thresh = 1024,
258 .entries = ATOMIC_INIT(0),
259}; 258};
260 259
261static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { 260static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
@@ -312,11 +311,13 @@ int __init xfrm6_init(void)
312 */ 311 */
313 gc_thresh = FIB6_TABLE_HASHSZ * 8; 312 gc_thresh = FIB6_TABLE_HASHSZ * 8;
314 xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; 313 xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
314 dst_entries_init(&xfrm6_dst_ops);
315 315
316 ret = xfrm6_policy_init(); 316 ret = xfrm6_policy_init();
317 if (ret) 317 if (ret) {
318 dst_entries_destroy(&xfrm6_dst_ops);
318 goto out; 319 goto out;
319 320 }
320 ret = xfrm6_state_init(); 321 ret = xfrm6_state_init();
321 if (ret) 322 if (ret)
322 goto out_policy; 323 goto out_policy;
@@ -341,4 +342,5 @@ void xfrm6_fini(void)
341 //xfrm6_input_fini(); 342 //xfrm6_input_fini();
342 xfrm6_policy_fini(); 343 xfrm6_policy_fini();
343 xfrm6_state_fini(); 344 xfrm6_state_fini();
345 dst_entries_destroy(&xfrm6_dst_ops);
344} 346}
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 2ce3a8278f26..2969cad408de 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -199,7 +199,7 @@ static void x6spi_destroy_rcu(struct rcu_head *head)
199 container_of(head, struct xfrm6_tunnel_spi, rcu_head)); 199 container_of(head, struct xfrm6_tunnel_spi, rcu_head));
200} 200}
201 201
202void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) 202static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
203{ 203{
204 struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); 204 struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
205 struct xfrm6_tunnel_spi *x6spi; 205 struct xfrm6_tunnel_spi *x6spi;
@@ -223,8 +223,6 @@ void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
223 spin_unlock_bh(&xfrm6_tunnel_spi_lock); 223 spin_unlock_bh(&xfrm6_tunnel_spi_lock);
224} 224}
225 225
226EXPORT_SYMBOL(xfrm6_tunnel_free_spi);
227
228static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) 226static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
229{ 227{
230 skb_push(skb, -skb_network_offset(skb)); 228 skb_push(skb, -skb_network_offset(skb));
@@ -317,13 +315,13 @@ static const struct xfrm_type xfrm6_tunnel_type = {
317 .output = xfrm6_tunnel_output, 315 .output = xfrm6_tunnel_output,
318}; 316};
319 317
320static struct xfrm6_tunnel xfrm6_tunnel_handler = { 318static struct xfrm6_tunnel xfrm6_tunnel_handler __read_mostly = {
321 .handler = xfrm6_tunnel_rcv, 319 .handler = xfrm6_tunnel_rcv,
322 .err_handler = xfrm6_tunnel_err, 320 .err_handler = xfrm6_tunnel_err,
323 .priority = 2, 321 .priority = 2,
324}; 322};
325 323
326static struct xfrm6_tunnel xfrm46_tunnel_handler = { 324static struct xfrm6_tunnel xfrm46_tunnel_handler __read_mostly = {
327 .handler = xfrm6_tunnel_rcv, 325 .handler = xfrm6_tunnel_rcv,
328 .err_handler = xfrm6_tunnel_err, 326 .err_handler = xfrm6_tunnel_err,
329 .priority = 2, 327 .priority = 2,
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index fd55b5135de5..7f097989cde2 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -573,9 +573,9 @@ static int irda_find_lsap_sel(struct irda_sock *self, char *name)
573 /* Requested object/attribute doesn't exist */ 573 /* Requested object/attribute doesn't exist */
574 if((self->errno == IAS_CLASS_UNKNOWN) || 574 if((self->errno == IAS_CLASS_UNKNOWN) ||
575 (self->errno == IAS_ATTRIB_UNKNOWN)) 575 (self->errno == IAS_ATTRIB_UNKNOWN))
576 return (-EADDRNOTAVAIL); 576 return -EADDRNOTAVAIL;
577 else 577 else
578 return (-EHOSTUNREACH); 578 return -EHOSTUNREACH;
579 } 579 }
580 580
581 /* Get the remote TSAP selector */ 581 /* Get the remote TSAP selector */
@@ -663,7 +663,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
663 __func__, name); 663 __func__, name);
664 self->daddr = DEV_ADDR_ANY; 664 self->daddr = DEV_ADDR_ANY;
665 kfree(discoveries); 665 kfree(discoveries);
666 return(-ENOTUNIQ); 666 return -ENOTUNIQ;
667 } 667 }
668 /* First time we found that one, save it ! */ 668 /* First time we found that one, save it ! */
669 daddr = self->daddr; 669 daddr = self->daddr;
@@ -677,7 +677,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
677 IRDA_DEBUG(0, "%s(), unexpected IAS query failure\n", __func__); 677 IRDA_DEBUG(0, "%s(), unexpected IAS query failure\n", __func__);
678 self->daddr = DEV_ADDR_ANY; 678 self->daddr = DEV_ADDR_ANY;
679 kfree(discoveries); 679 kfree(discoveries);
680 return(-EHOSTUNREACH); 680 return -EHOSTUNREACH;
681 break; 681 break;
682 } 682 }
683 } 683 }
@@ -689,7 +689,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name)
689 IRDA_DEBUG(1, "%s(), cannot discover service ''%s'' in any device !!!\n", 689 IRDA_DEBUG(1, "%s(), cannot discover service ''%s'' in any device !!!\n",
690 __func__, name); 690 __func__, name);
691 self->daddr = DEV_ADDR_ANY; 691 self->daddr = DEV_ADDR_ANY;
692 return(-EADDRNOTAVAIL); 692 return -EADDRNOTAVAIL;
693 } 693 }
694 694
695 /* Revert back to discovered device & service */ 695 /* Revert back to discovered device & service */
@@ -715,14 +715,11 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
715 struct sockaddr_irda saddr; 715 struct sockaddr_irda saddr;
716 struct sock *sk = sock->sk; 716 struct sock *sk = sock->sk;
717 struct irda_sock *self = irda_sk(sk); 717 struct irda_sock *self = irda_sk(sk);
718 int err;
719 718
720 lock_kernel();
721 memset(&saddr, 0, sizeof(saddr)); 719 memset(&saddr, 0, sizeof(saddr));
722 if (peer) { 720 if (peer) {
723 err = -ENOTCONN;
724 if (sk->sk_state != TCP_ESTABLISHED) 721 if (sk->sk_state != TCP_ESTABLISHED)
725 goto out; 722 return -ENOTCONN;
726 723
727 saddr.sir_family = AF_IRDA; 724 saddr.sir_family = AF_IRDA;
728 saddr.sir_lsap_sel = self->dtsap_sel; 725 saddr.sir_lsap_sel = self->dtsap_sel;
@@ -739,10 +736,8 @@ static int irda_getname(struct socket *sock, struct sockaddr *uaddr,
739 /* uaddr_len come to us uninitialised */ 736 /* uaddr_len come to us uninitialised */
740 *uaddr_len = sizeof (struct sockaddr_irda); 737 *uaddr_len = sizeof (struct sockaddr_irda);
741 memcpy(uaddr, &saddr, *uaddr_len); 738 memcpy(uaddr, &saddr, *uaddr_len);
742 err = 0; 739
743out: 740 return 0;
744 unlock_kernel();
745 return err;
746} 741}
747 742
748/* 743/*
@@ -758,7 +753,8 @@ static int irda_listen(struct socket *sock, int backlog)
758 753
759 IRDA_DEBUG(2, "%s()\n", __func__); 754 IRDA_DEBUG(2, "%s()\n", __func__);
760 755
761 lock_kernel(); 756 lock_sock(sk);
757
762 if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) && 758 if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
763 (sk->sk_type != SOCK_DGRAM)) 759 (sk->sk_type != SOCK_DGRAM))
764 goto out; 760 goto out;
@@ -770,7 +766,7 @@ static int irda_listen(struct socket *sock, int backlog)
770 err = 0; 766 err = 0;
771 } 767 }
772out: 768out:
773 unlock_kernel(); 769 release_sock(sk);
774 770
775 return err; 771 return err;
776} 772}
@@ -793,7 +789,7 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
793 if (addr_len != sizeof(struct sockaddr_irda)) 789 if (addr_len != sizeof(struct sockaddr_irda))
794 return -EINVAL; 790 return -EINVAL;
795 791
796 lock_kernel(); 792 lock_sock(sk);
797#ifdef CONFIG_IRDA_ULTRA 793#ifdef CONFIG_IRDA_ULTRA
798 /* Special care for Ultra sockets */ 794 /* Special care for Ultra sockets */
799 if ((sk->sk_type == SOCK_DGRAM) && 795 if ((sk->sk_type == SOCK_DGRAM) &&
@@ -836,7 +832,7 @@ static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
836 832
837 err = 0; 833 err = 0;
838out: 834out:
839 unlock_kernel(); 835 release_sock(sk);
840 return err; 836 return err;
841} 837}
842 838
@@ -856,12 +852,13 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
856 852
857 IRDA_DEBUG(2, "%s()\n", __func__); 853 IRDA_DEBUG(2, "%s()\n", __func__);
858 854
859 lock_kernel();
860 err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0); 855 err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
861 if (err) 856 if (err)
862 goto out; 857 return err;
863 858
864 err = -EINVAL; 859 err = -EINVAL;
860
861 lock_sock(sk);
865 if (sock->state != SS_UNCONNECTED) 862 if (sock->state != SS_UNCONNECTED)
866 goto out; 863 goto out;
867 864
@@ -947,7 +944,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
947 irda_connect_response(new); 944 irda_connect_response(new);
948 err = 0; 945 err = 0;
949out: 946out:
950 unlock_kernel(); 947 release_sock(sk);
951 return err; 948 return err;
952} 949}
953 950
@@ -981,7 +978,7 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
981 978
982 IRDA_DEBUG(2, "%s(%p)\n", __func__, self); 979 IRDA_DEBUG(2, "%s(%p)\n", __func__, self);
983 980
984 lock_kernel(); 981 lock_sock(sk);
985 /* Don't allow connect for Ultra sockets */ 982 /* Don't allow connect for Ultra sockets */
986 err = -ESOCKTNOSUPPORT; 983 err = -ESOCKTNOSUPPORT;
987 if ((sk->sk_type == SOCK_DGRAM) && (sk->sk_protocol == IRDAPROTO_ULTRA)) 984 if ((sk->sk_type == SOCK_DGRAM) && (sk->sk_protocol == IRDAPROTO_ULTRA))
@@ -1072,6 +1069,8 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
1072 1069
1073 if (sk->sk_state != TCP_ESTABLISHED) { 1070 if (sk->sk_state != TCP_ESTABLISHED) {
1074 sock->state = SS_UNCONNECTED; 1071 sock->state = SS_UNCONNECTED;
1072 if (sk->sk_prot->disconnect(sk, flags))
1073 sock->state = SS_DISCONNECTING;
1075 err = sock_error(sk); 1074 err = sock_error(sk);
1076 if (!err) 1075 if (!err)
1077 err = -ECONNRESET; 1076 err = -ECONNRESET;
@@ -1084,7 +1083,7 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
1084 self->saddr = irttp_get_saddr(self->tsap); 1083 self->saddr = irttp_get_saddr(self->tsap);
1085 err = 0; 1084 err = 0;
1086out: 1085out:
1087 unlock_kernel(); 1086 release_sock(sk);
1088 return err; 1087 return err;
1089} 1088}
1090 1089
@@ -1231,7 +1230,6 @@ static int irda_release(struct socket *sock)
1231 if (sk == NULL) 1230 if (sk == NULL)
1232 return 0; 1231 return 0;
1233 1232
1234 lock_kernel();
1235 lock_sock(sk); 1233 lock_sock(sk);
1236 sk->sk_state = TCP_CLOSE; 1234 sk->sk_state = TCP_CLOSE;
1237 sk->sk_shutdown |= SEND_SHUTDOWN; 1235 sk->sk_shutdown |= SEND_SHUTDOWN;
@@ -1250,7 +1248,6 @@ static int irda_release(struct socket *sock)
1250 /* Destroy networking socket if we are the last reference on it, 1248 /* Destroy networking socket if we are the last reference on it,
1251 * i.e. if(sk->sk_refcnt == 0) -> sk_free(sk) */ 1249 * i.e. if(sk->sk_refcnt == 0) -> sk_free(sk) */
1252 sock_put(sk); 1250 sock_put(sk);
1253 unlock_kernel();
1254 1251
1255 /* Notes on socket locking and deallocation... - Jean II 1252 /* Notes on socket locking and deallocation... - Jean II
1256 * In theory we should put pairs of sock_hold() / sock_put() to 1253 * In theory we should put pairs of sock_hold() / sock_put() to
@@ -1298,7 +1295,6 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
1298 1295
1299 IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len); 1296 IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len);
1300 1297
1301 lock_kernel();
1302 /* Note : socket.c set MSG_EOR on SEQPACKET sockets */ 1298 /* Note : socket.c set MSG_EOR on SEQPACKET sockets */
1303 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT | 1299 if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT |
1304 MSG_NOSIGNAL)) { 1300 MSG_NOSIGNAL)) {
@@ -1306,6 +1302,8 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
1306 goto out; 1302 goto out;
1307 } 1303 }
1308 1304
1305 lock_sock(sk);
1306
1309 if (sk->sk_shutdown & SEND_SHUTDOWN) 1307 if (sk->sk_shutdown & SEND_SHUTDOWN)
1310 goto out_err; 1308 goto out_err;
1311 1309
@@ -1361,14 +1359,14 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
1361 goto out_err; 1359 goto out_err;
1362 } 1360 }
1363 1361
1364 unlock_kernel(); 1362 release_sock(sk);
1365 /* Tell client how much data we actually sent */ 1363 /* Tell client how much data we actually sent */
1366 return len; 1364 return len;
1367 1365
1368out_err: 1366out_err:
1369 err = sk_stream_error(sk, msg->msg_flags, err); 1367 err = sk_stream_error(sk, msg->msg_flags, err);
1370out: 1368out:
1371 unlock_kernel(); 1369 release_sock(sk);
1372 return err; 1370 return err;
1373 1371
1374} 1372}
@@ -1390,14 +1388,10 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
1390 1388
1391 IRDA_DEBUG(4, "%s()\n", __func__); 1389 IRDA_DEBUG(4, "%s()\n", __func__);
1392 1390
1393 lock_kernel();
1394 if ((err = sock_error(sk)) < 0)
1395 goto out;
1396
1397 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, 1391 skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
1398 flags & MSG_DONTWAIT, &err); 1392 flags & MSG_DONTWAIT, &err);
1399 if (!skb) 1393 if (!skb)
1400 goto out; 1394 return err;
1401 1395
1402 skb_reset_transport_header(skb); 1396 skb_reset_transport_header(skb);
1403 copied = skb->len; 1397 copied = skb->len;
@@ -1425,12 +1419,8 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
1425 irttp_flow_request(self->tsap, FLOW_START); 1419 irttp_flow_request(self->tsap, FLOW_START);
1426 } 1420 }
1427 } 1421 }
1428 unlock_kernel();
1429 return copied;
1430 1422
1431out: 1423 return copied;
1432 unlock_kernel();
1433 return err;
1434} 1424}
1435 1425
1436/* 1426/*
@@ -1448,17 +1438,15 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
1448 1438
1449 IRDA_DEBUG(3, "%s()\n", __func__); 1439 IRDA_DEBUG(3, "%s()\n", __func__);
1450 1440
1451 lock_kernel();
1452 if ((err = sock_error(sk)) < 0) 1441 if ((err = sock_error(sk)) < 0)
1453 goto out; 1442 return err;
1454 1443
1455 err = -EINVAL;
1456 if (sock->flags & __SO_ACCEPTCON) 1444 if (sock->flags & __SO_ACCEPTCON)
1457 goto out; 1445 return -EINVAL;
1458 1446
1459 err =-EOPNOTSUPP; 1447 err =-EOPNOTSUPP;
1460 if (flags & MSG_OOB) 1448 if (flags & MSG_OOB)
1461 goto out; 1449 return -EOPNOTSUPP;
1462 1450
1463 err = 0; 1451 err = 0;
1464 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); 1452 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
@@ -1500,7 +1488,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
1500 finish_wait(sk_sleep(sk), &wait); 1488 finish_wait(sk_sleep(sk), &wait);
1501 1489
1502 if (err) 1490 if (err)
1503 goto out; 1491 return err;
1504 if (sk->sk_shutdown & RCV_SHUTDOWN) 1492 if (sk->sk_shutdown & RCV_SHUTDOWN)
1505 break; 1493 break;
1506 1494
@@ -1553,9 +1541,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
1553 } 1541 }
1554 } 1542 }
1555 1543
1556out: 1544 return copied;
1557 unlock_kernel();
1558 return err ? : copied;
1559} 1545}
1560 1546
1561/* 1547/*
@@ -1573,13 +1559,12 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
1573 struct sk_buff *skb; 1559 struct sk_buff *skb;
1574 int err; 1560 int err;
1575 1561
1576 lock_kernel();
1577
1578 IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len); 1562 IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len);
1579 1563
1580 err = -EINVAL;
1581 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT)) 1564 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
1582 goto out; 1565 return -EINVAL;
1566
1567 lock_sock(sk);
1583 1568
1584 if (sk->sk_shutdown & SEND_SHUTDOWN) { 1569 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1585 send_sig(SIGPIPE, current, 0); 1570 send_sig(SIGPIPE, current, 0);
@@ -1630,10 +1615,12 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
1630 IRDA_DEBUG(0, "%s(), err=%d\n", __func__, err); 1615 IRDA_DEBUG(0, "%s(), err=%d\n", __func__, err);
1631 goto out; 1616 goto out;
1632 } 1617 }
1633 unlock_kernel(); 1618
1619 release_sock(sk);
1634 return len; 1620 return len;
1621
1635out: 1622out:
1636 unlock_kernel(); 1623 release_sock(sk);
1637 return err; 1624 return err;
1638} 1625}
1639 1626
@@ -1656,10 +1643,11 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
1656 1643
1657 IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len); 1644 IRDA_DEBUG(4, "%s(), len=%zd\n", __func__, len);
1658 1645
1659 lock_kernel();
1660 err = -EINVAL; 1646 err = -EINVAL;
1661 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT)) 1647 if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT))
1662 goto out; 1648 return -EINVAL;
1649
1650 lock_sock(sk);
1663 1651
1664 err = -EPIPE; 1652 err = -EPIPE;
1665 if (sk->sk_shutdown & SEND_SHUTDOWN) { 1653 if (sk->sk_shutdown & SEND_SHUTDOWN) {
@@ -1732,7 +1720,7 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
1732 if (err) 1720 if (err)
1733 IRDA_DEBUG(0, "%s(), err=%d\n", __func__, err); 1721 IRDA_DEBUG(0, "%s(), err=%d\n", __func__, err);
1734out: 1722out:
1735 unlock_kernel(); 1723 release_sock(sk);
1736 return err ? : len; 1724 return err ? : len;
1737} 1725}
1738#endif /* CONFIG_IRDA_ULTRA */ 1726#endif /* CONFIG_IRDA_ULTRA */
@@ -1747,7 +1735,7 @@ static int irda_shutdown(struct socket *sock, int how)
1747 1735
1748 IRDA_DEBUG(1, "%s(%p)\n", __func__, self); 1736 IRDA_DEBUG(1, "%s(%p)\n", __func__, self);
1749 1737
1750 lock_kernel(); 1738 lock_sock(sk);
1751 1739
1752 sk->sk_state = TCP_CLOSE; 1740 sk->sk_state = TCP_CLOSE;
1753 sk->sk_shutdown |= SEND_SHUTDOWN; 1741 sk->sk_shutdown |= SEND_SHUTDOWN;
@@ -1769,7 +1757,7 @@ static int irda_shutdown(struct socket *sock, int how)
1769 self->daddr = DEV_ADDR_ANY; /* Until we get re-connected */ 1757 self->daddr = DEV_ADDR_ANY; /* Until we get re-connected */
1770 self->saddr = 0x0; /* so IrLMP assign us any link */ 1758 self->saddr = 0x0; /* so IrLMP assign us any link */
1771 1759
1772 unlock_kernel(); 1760 release_sock(sk);
1773 1761
1774 return 0; 1762 return 0;
1775} 1763}
@@ -1786,7 +1774,6 @@ static unsigned int irda_poll(struct file * file, struct socket *sock,
1786 1774
1787 IRDA_DEBUG(4, "%s()\n", __func__); 1775 IRDA_DEBUG(4, "%s()\n", __func__);
1788 1776
1789 lock_kernel();
1790 poll_wait(file, sk_sleep(sk), wait); 1777 poll_wait(file, sk_sleep(sk), wait);
1791 mask = 0; 1778 mask = 0;
1792 1779
@@ -1834,20 +1821,8 @@ static unsigned int irda_poll(struct file * file, struct socket *sock,
1834 default: 1821 default:
1835 break; 1822 break;
1836 } 1823 }
1837 unlock_kernel();
1838 return mask;
1839}
1840 1824
1841static unsigned int irda_datagram_poll(struct file *file, struct socket *sock, 1825 return mask;
1842 poll_table *wait)
1843{
1844 int err;
1845
1846 lock_kernel();
1847 err = datagram_poll(file, sock, wait);
1848 unlock_kernel();
1849
1850 return err;
1851} 1826}
1852 1827
1853/* 1828/*
@@ -1860,7 +1835,6 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1860 1835
1861 IRDA_DEBUG(4, "%s(), cmd=%#x\n", __func__, cmd); 1836 IRDA_DEBUG(4, "%s(), cmd=%#x\n", __func__, cmd);
1862 1837
1863 lock_kernel();
1864 err = -EINVAL; 1838 err = -EINVAL;
1865 switch (cmd) { 1839 switch (cmd) {
1866 case TIOCOUTQ: { 1840 case TIOCOUTQ: {
@@ -1903,7 +1877,6 @@ static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1903 IRDA_DEBUG(1, "%s(), doing device ioctl!\n", __func__); 1877 IRDA_DEBUG(1, "%s(), doing device ioctl!\n", __func__);
1904 err = -ENOIOCTLCMD; 1878 err = -ENOIOCTLCMD;
1905 } 1879 }
1906 unlock_kernel();
1907 1880
1908 return err; 1881 return err;
1909} 1882}
@@ -1927,7 +1900,7 @@ static int irda_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
1927 * Set some options for the socket 1900 * Set some options for the socket
1928 * 1901 *
1929 */ 1902 */
1930static int __irda_setsockopt(struct socket *sock, int level, int optname, 1903static int irda_setsockopt(struct socket *sock, int level, int optname,
1931 char __user *optval, unsigned int optlen) 1904 char __user *optval, unsigned int optlen)
1932{ 1905{
1933 struct sock *sk = sock->sk; 1906 struct sock *sk = sock->sk;
@@ -1935,13 +1908,15 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
1935 struct irda_ias_set *ias_opt; 1908 struct irda_ias_set *ias_opt;
1936 struct ias_object *ias_obj; 1909 struct ias_object *ias_obj;
1937 struct ias_attrib * ias_attr; /* Attribute in IAS object */ 1910 struct ias_attrib * ias_attr; /* Attribute in IAS object */
1938 int opt, free_ias = 0; 1911 int opt, free_ias = 0, err = 0;
1939 1912
1940 IRDA_DEBUG(2, "%s(%p)\n", __func__, self); 1913 IRDA_DEBUG(2, "%s(%p)\n", __func__, self);
1941 1914
1942 if (level != SOL_IRLMP) 1915 if (level != SOL_IRLMP)
1943 return -ENOPROTOOPT; 1916 return -ENOPROTOOPT;
1944 1917
1918 lock_sock(sk);
1919
1945 switch (optname) { 1920 switch (optname) {
1946 case IRLMP_IAS_SET: 1921 case IRLMP_IAS_SET:
1947 /* The user want to add an attribute to an existing IAS object 1922 /* The user want to add an attribute to an existing IAS object
@@ -1951,17 +1926,22 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
1951 * create the right attribute... 1926 * create the right attribute...
1952 */ 1927 */
1953 1928
1954 if (optlen != sizeof(struct irda_ias_set)) 1929 if (optlen != sizeof(struct irda_ias_set)) {
1955 return -EINVAL; 1930 err = -EINVAL;
1931 goto out;
1932 }
1956 1933
1957 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC); 1934 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC);
1958 if (ias_opt == NULL) 1935 if (ias_opt == NULL) {
1959 return -ENOMEM; 1936 err = -ENOMEM;
1937 goto out;
1938 }
1960 1939
1961 /* Copy query to the driver. */ 1940 /* Copy query to the driver. */
1962 if (copy_from_user(ias_opt, optval, optlen)) { 1941 if (copy_from_user(ias_opt, optval, optlen)) {
1963 kfree(ias_opt); 1942 kfree(ias_opt);
1964 return -EFAULT; 1943 err = -EFAULT;
1944 goto out;
1965 } 1945 }
1966 1946
1967 /* Find the object we target. 1947 /* Find the object we target.
@@ -1971,7 +1951,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
1971 if(ias_opt->irda_class_name[0] == '\0') { 1951 if(ias_opt->irda_class_name[0] == '\0') {
1972 if(self->ias_obj == NULL) { 1952 if(self->ias_obj == NULL) {
1973 kfree(ias_opt); 1953 kfree(ias_opt);
1974 return -EINVAL; 1954 err = -EINVAL;
1955 goto out;
1975 } 1956 }
1976 ias_obj = self->ias_obj; 1957 ias_obj = self->ias_obj;
1977 } else 1958 } else
@@ -1983,7 +1964,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
1983 if((!capable(CAP_NET_ADMIN)) && 1964 if((!capable(CAP_NET_ADMIN)) &&
1984 ((ias_obj == NULL) || (ias_obj != self->ias_obj))) { 1965 ((ias_obj == NULL) || (ias_obj != self->ias_obj))) {
1985 kfree(ias_opt); 1966 kfree(ias_opt);
1986 return -EPERM; 1967 err = -EPERM;
1968 goto out;
1987 } 1969 }
1988 1970
1989 /* If the object doesn't exist, create it */ 1971 /* If the object doesn't exist, create it */
@@ -1993,7 +1975,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
1993 jiffies); 1975 jiffies);
1994 if (ias_obj == NULL) { 1976 if (ias_obj == NULL) {
1995 kfree(ias_opt); 1977 kfree(ias_opt);
1996 return -ENOMEM; 1978 err = -ENOMEM;
1979 goto out;
1997 } 1980 }
1998 free_ias = 1; 1981 free_ias = 1;
1999 } 1982 }
@@ -2005,7 +1988,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2005 kfree(ias_obj->name); 1988 kfree(ias_obj->name);
2006 kfree(ias_obj); 1989 kfree(ias_obj);
2007 } 1990 }
2008 return -EINVAL; 1991 err = -EINVAL;
1992 goto out;
2009 } 1993 }
2010 1994
2011 /* Look at the type */ 1995 /* Look at the type */
@@ -2028,7 +2012,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2028 kfree(ias_obj); 2012 kfree(ias_obj);
2029 } 2013 }
2030 2014
2031 return -EINVAL; 2015 err = -EINVAL;
2016 goto out;
2032 } 2017 }
2033 /* Add an octet sequence attribute */ 2018 /* Add an octet sequence attribute */
2034 irias_add_octseq_attrib( 2019 irias_add_octseq_attrib(
@@ -2060,7 +2045,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2060 kfree(ias_obj->name); 2045 kfree(ias_obj->name);
2061 kfree(ias_obj); 2046 kfree(ias_obj);
2062 } 2047 }
2063 return -EINVAL; 2048 err = -EINVAL;
2049 goto out;
2064 } 2050 }
2065 irias_insert_object(ias_obj); 2051 irias_insert_object(ias_obj);
2066 kfree(ias_opt); 2052 kfree(ias_opt);
@@ -2071,17 +2057,22 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2071 * object is not owned by the kernel and delete it. 2057 * object is not owned by the kernel and delete it.
2072 */ 2058 */
2073 2059
2074 if (optlen != sizeof(struct irda_ias_set)) 2060 if (optlen != sizeof(struct irda_ias_set)) {
2075 return -EINVAL; 2061 err = -EINVAL;
2062 goto out;
2063 }
2076 2064
2077 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC); 2065 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC);
2078 if (ias_opt == NULL) 2066 if (ias_opt == NULL) {
2079 return -ENOMEM; 2067 err = -ENOMEM;
2068 goto out;
2069 }
2080 2070
2081 /* Copy query to the driver. */ 2071 /* Copy query to the driver. */
2082 if (copy_from_user(ias_opt, optval, optlen)) { 2072 if (copy_from_user(ias_opt, optval, optlen)) {
2083 kfree(ias_opt); 2073 kfree(ias_opt);
2084 return -EFAULT; 2074 err = -EFAULT;
2075 goto out;
2085 } 2076 }
2086 2077
2087 /* Find the object we target. 2078 /* Find the object we target.
@@ -2094,7 +2085,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2094 ias_obj = irias_find_object(ias_opt->irda_class_name); 2085 ias_obj = irias_find_object(ias_opt->irda_class_name);
2095 if(ias_obj == (struct ias_object *) NULL) { 2086 if(ias_obj == (struct ias_object *) NULL) {
2096 kfree(ias_opt); 2087 kfree(ias_opt);
2097 return -EINVAL; 2088 err = -EINVAL;
2089 goto out;
2098 } 2090 }
2099 2091
2100 /* Only ROOT can mess with the global IAS database. 2092 /* Only ROOT can mess with the global IAS database.
@@ -2103,7 +2095,8 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2103 if((!capable(CAP_NET_ADMIN)) && 2095 if((!capable(CAP_NET_ADMIN)) &&
2104 ((ias_obj == NULL) || (ias_obj != self->ias_obj))) { 2096 ((ias_obj == NULL) || (ias_obj != self->ias_obj))) {
2105 kfree(ias_opt); 2097 kfree(ias_opt);
2106 return -EPERM; 2098 err = -EPERM;
2099 goto out;
2107 } 2100 }
2108 2101
2109 /* Find the attribute (in the object) we target */ 2102 /* Find the attribute (in the object) we target */
@@ -2111,14 +2104,16 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2111 ias_opt->irda_attrib_name); 2104 ias_opt->irda_attrib_name);
2112 if(ias_attr == (struct ias_attrib *) NULL) { 2105 if(ias_attr == (struct ias_attrib *) NULL) {
2113 kfree(ias_opt); 2106 kfree(ias_opt);
2114 return -EINVAL; 2107 err = -EINVAL;
2108 goto out;
2115 } 2109 }
2116 2110
2117 /* Check is the user space own the object */ 2111 /* Check is the user space own the object */
2118 if(ias_attr->value->owner != IAS_USER_ATTR) { 2112 if(ias_attr->value->owner != IAS_USER_ATTR) {
2119 IRDA_DEBUG(1, "%s(), attempting to delete a kernel attribute\n", __func__); 2113 IRDA_DEBUG(1, "%s(), attempting to delete a kernel attribute\n", __func__);
2120 kfree(ias_opt); 2114 kfree(ias_opt);
2121 return -EPERM; 2115 err = -EPERM;
2116 goto out;
2122 } 2117 }
2123 2118
2124 /* Remove the attribute (and maybe the object) */ 2119 /* Remove the attribute (and maybe the object) */
@@ -2126,11 +2121,15 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2126 kfree(ias_opt); 2121 kfree(ias_opt);
2127 break; 2122 break;
2128 case IRLMP_MAX_SDU_SIZE: 2123 case IRLMP_MAX_SDU_SIZE:
2129 if (optlen < sizeof(int)) 2124 if (optlen < sizeof(int)) {
2130 return -EINVAL; 2125 err = -EINVAL;
2126 goto out;
2127 }
2131 2128
2132 if (get_user(opt, (int __user *)optval)) 2129 if (get_user(opt, (int __user *)optval)) {
2133 return -EFAULT; 2130 err = -EFAULT;
2131 goto out;
2132 }
2134 2133
2135 /* Only possible for a seqpacket service (TTP with SAR) */ 2134 /* Only possible for a seqpacket service (TTP with SAR) */
2136 if (sk->sk_type != SOCK_SEQPACKET) { 2135 if (sk->sk_type != SOCK_SEQPACKET) {
@@ -2140,16 +2139,21 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2140 } else { 2139 } else {
2141 IRDA_WARNING("%s: not allowed to set MAXSDUSIZE for this socket type!\n", 2140 IRDA_WARNING("%s: not allowed to set MAXSDUSIZE for this socket type!\n",
2142 __func__); 2141 __func__);
2143 return -ENOPROTOOPT; 2142 err = -ENOPROTOOPT;
2143 goto out;
2144 } 2144 }
2145 break; 2145 break;
2146 case IRLMP_HINTS_SET: 2146 case IRLMP_HINTS_SET:
2147 if (optlen < sizeof(int)) 2147 if (optlen < sizeof(int)) {
2148 return -EINVAL; 2148 err = -EINVAL;
2149 goto out;
2150 }
2149 2151
2150 /* The input is really a (__u8 hints[2]), easier as an int */ 2152 /* The input is really a (__u8 hints[2]), easier as an int */
2151 if (get_user(opt, (int __user *)optval)) 2153 if (get_user(opt, (int __user *)optval)) {
2152 return -EFAULT; 2154 err = -EFAULT;
2155 goto out;
2156 }
2153 2157
2154 /* Unregister any old registration */ 2158 /* Unregister any old registration */
2155 if (self->skey) 2159 if (self->skey)
@@ -2163,12 +2167,16 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2163 * making a discovery (nodes which don't match any hint 2167 * making a discovery (nodes which don't match any hint
2164 * bit in the mask are not reported). 2168 * bit in the mask are not reported).
2165 */ 2169 */
2166 if (optlen < sizeof(int)) 2170 if (optlen < sizeof(int)) {
2167 return -EINVAL; 2171 err = -EINVAL;
2172 goto out;
2173 }
2168 2174
2169 /* The input is really a (__u8 hints[2]), easier as an int */ 2175 /* The input is really a (__u8 hints[2]), easier as an int */
2170 if (get_user(opt, (int __user *)optval)) 2176 if (get_user(opt, (int __user *)optval)) {
2171 return -EFAULT; 2177 err = -EFAULT;
2178 goto out;
2179 }
2172 2180
2173 /* Set the new hint mask */ 2181 /* Set the new hint mask */
2174 self->mask.word = (__u16) opt; 2182 self->mask.word = (__u16) opt;
@@ -2180,19 +2188,12 @@ static int __irda_setsockopt(struct socket *sock, int level, int optname,
2180 2188
2181 break; 2189 break;
2182 default: 2190 default:
2183 return -ENOPROTOOPT; 2191 err = -ENOPROTOOPT;
2192 break;
2184 } 2193 }
2185 return 0;
2186}
2187 2194
2188static int irda_setsockopt(struct socket *sock, int level, int optname, 2195out:
2189 char __user *optval, unsigned int optlen) 2196 release_sock(sk);
2190{
2191 int err;
2192
2193 lock_kernel();
2194 err = __irda_setsockopt(sock, level, optname, optval, optlen);
2195 unlock_kernel();
2196 2197
2197 return err; 2198 return err;
2198} 2199}
@@ -2249,7 +2250,7 @@ static int irda_extract_ias_value(struct irda_ias_set *ias_opt,
2249/* 2250/*
2250 * Function irda_getsockopt (sock, level, optname, optval, optlen) 2251 * Function irda_getsockopt (sock, level, optname, optval, optlen)
2251 */ 2252 */
2252static int __irda_getsockopt(struct socket *sock, int level, int optname, 2253static int irda_getsockopt(struct socket *sock, int level, int optname,
2253 char __user *optval, int __user *optlen) 2254 char __user *optval, int __user *optlen)
2254{ 2255{
2255 struct sock *sk = sock->sk; 2256 struct sock *sk = sock->sk;
@@ -2262,7 +2263,7 @@ static int __irda_getsockopt(struct socket *sock, int level, int optname,
2262 int daddr = DEV_ADDR_ANY; /* Dest address for IAS queries */ 2263 int daddr = DEV_ADDR_ANY; /* Dest address for IAS queries */
2263 int val = 0; 2264 int val = 0;
2264 int len = 0; 2265 int len = 0;
2265 int err; 2266 int err = 0;
2266 int offset, total; 2267 int offset, total;
2267 2268
2268 IRDA_DEBUG(2, "%s(%p)\n", __func__, self); 2269 IRDA_DEBUG(2, "%s(%p)\n", __func__, self);
@@ -2276,15 +2277,18 @@ static int __irda_getsockopt(struct socket *sock, int level, int optname,
2276 if(len < 0) 2277 if(len < 0)
2277 return -EINVAL; 2278 return -EINVAL;
2278 2279
2280 lock_sock(sk);
2281
2279 switch (optname) { 2282 switch (optname) {
2280 case IRLMP_ENUMDEVICES: 2283 case IRLMP_ENUMDEVICES:
2281 /* Ask lmp for the current discovery log */ 2284 /* Ask lmp for the current discovery log */
2282 discoveries = irlmp_get_discoveries(&list.len, self->mask.word, 2285 discoveries = irlmp_get_discoveries(&list.len, self->mask.word,
2283 self->nslots); 2286 self->nslots);
2284 /* Check if the we got some results */ 2287 /* Check if the we got some results */
2285 if (discoveries == NULL) 2288 if (discoveries == NULL) {
2286 return -EAGAIN; /* Didn't find any devices */ 2289 err = -EAGAIN;
2287 err = 0; 2290 goto out; /* Didn't find any devices */
2291 }
2288 2292
2289 /* Write total list length back to client */ 2293 /* Write total list length back to client */
2290 if (copy_to_user(optval, &list, 2294 if (copy_to_user(optval, &list,
@@ -2297,8 +2301,7 @@ static int __irda_getsockopt(struct socket *sock, int level, int optname,
2297 sizeof(struct irda_device_info); 2301 sizeof(struct irda_device_info);
2298 2302
2299 /* Copy the list itself - watch for overflow */ 2303 /* Copy the list itself - watch for overflow */
2300 if(list.len > 2048) 2304 if (list.len > 2048) {
2301 {
2302 err = -EINVAL; 2305 err = -EINVAL;
2303 goto bed; 2306 goto bed;
2304 } 2307 }
@@ -2314,17 +2317,20 @@ static int __irda_getsockopt(struct socket *sock, int level, int optname,
2314bed: 2317bed:
2315 /* Free up our buffer */ 2318 /* Free up our buffer */
2316 kfree(discoveries); 2319 kfree(discoveries);
2317 if (err)
2318 return err;
2319 break; 2320 break;
2320 case IRLMP_MAX_SDU_SIZE: 2321 case IRLMP_MAX_SDU_SIZE:
2321 val = self->max_data_size; 2322 val = self->max_data_size;
2322 len = sizeof(int); 2323 len = sizeof(int);
2323 if (put_user(len, optlen)) 2324 if (put_user(len, optlen)) {
2324 return -EFAULT; 2325 err = -EFAULT;
2326 goto out;
2327 }
2328
2329 if (copy_to_user(optval, &val, len)) {
2330 err = -EFAULT;
2331 goto out;
2332 }
2325 2333
2326 if (copy_to_user(optval, &val, len))
2327 return -EFAULT;
2328 break; 2334 break;
2329 case IRLMP_IAS_GET: 2335 case IRLMP_IAS_GET:
2330 /* The user want an object from our local IAS database. 2336 /* The user want an object from our local IAS database.
@@ -2332,17 +2338,22 @@ bed:
2332 * that we found */ 2338 * that we found */
2333 2339
2334 /* Check that the user has allocated the right space for us */ 2340 /* Check that the user has allocated the right space for us */
2335 if (len != sizeof(struct irda_ias_set)) 2341 if (len != sizeof(struct irda_ias_set)) {
2336 return -EINVAL; 2342 err = -EINVAL;
2343 goto out;
2344 }
2337 2345
2338 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC); 2346 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC);
2339 if (ias_opt == NULL) 2347 if (ias_opt == NULL) {
2340 return -ENOMEM; 2348 err = -ENOMEM;
2349 goto out;
2350 }
2341 2351
2342 /* Copy query to the driver. */ 2352 /* Copy query to the driver. */
2343 if (copy_from_user(ias_opt, optval, len)) { 2353 if (copy_from_user(ias_opt, optval, len)) {
2344 kfree(ias_opt); 2354 kfree(ias_opt);
2345 return -EFAULT; 2355 err = -EFAULT;
2356 goto out;
2346 } 2357 }
2347 2358
2348 /* Find the object we target. 2359 /* Find the object we target.
@@ -2355,7 +2366,8 @@ bed:
2355 ias_obj = irias_find_object(ias_opt->irda_class_name); 2366 ias_obj = irias_find_object(ias_opt->irda_class_name);
2356 if(ias_obj == (struct ias_object *) NULL) { 2367 if(ias_obj == (struct ias_object *) NULL) {
2357 kfree(ias_opt); 2368 kfree(ias_opt);
2358 return -EINVAL; 2369 err = -EINVAL;
2370 goto out;
2359 } 2371 }
2360 2372
2361 /* Find the attribute (in the object) we target */ 2373 /* Find the attribute (in the object) we target */
@@ -2363,21 +2375,23 @@ bed:
2363 ias_opt->irda_attrib_name); 2375 ias_opt->irda_attrib_name);
2364 if(ias_attr == (struct ias_attrib *) NULL) { 2376 if(ias_attr == (struct ias_attrib *) NULL) {
2365 kfree(ias_opt); 2377 kfree(ias_opt);
2366 return -EINVAL; 2378 err = -EINVAL;
2379 goto out;
2367 } 2380 }
2368 2381
2369 /* Translate from internal to user structure */ 2382 /* Translate from internal to user structure */
2370 err = irda_extract_ias_value(ias_opt, ias_attr->value); 2383 err = irda_extract_ias_value(ias_opt, ias_attr->value);
2371 if(err) { 2384 if(err) {
2372 kfree(ias_opt); 2385 kfree(ias_opt);
2373 return err; 2386 goto out;
2374 } 2387 }
2375 2388
2376 /* Copy reply to the user */ 2389 /* Copy reply to the user */
2377 if (copy_to_user(optval, ias_opt, 2390 if (copy_to_user(optval, ias_opt,
2378 sizeof(struct irda_ias_set))) { 2391 sizeof(struct irda_ias_set))) {
2379 kfree(ias_opt); 2392 kfree(ias_opt);
2380 return -EFAULT; 2393 err = -EFAULT;
2394 goto out;
2381 } 2395 }
2382 /* Note : don't need to put optlen, we checked it */ 2396 /* Note : don't need to put optlen, we checked it */
2383 kfree(ias_opt); 2397 kfree(ias_opt);
@@ -2388,17 +2402,22 @@ bed:
2388 * then wait for the answer to come back. */ 2402 * then wait for the answer to come back. */
2389 2403
2390 /* Check that the user has allocated the right space for us */ 2404 /* Check that the user has allocated the right space for us */
2391 if (len != sizeof(struct irda_ias_set)) 2405 if (len != sizeof(struct irda_ias_set)) {
2392 return -EINVAL; 2406 err = -EINVAL;
2407 goto out;
2408 }
2393 2409
2394 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC); 2410 ias_opt = kmalloc(sizeof(struct irda_ias_set), GFP_ATOMIC);
2395 if (ias_opt == NULL) 2411 if (ias_opt == NULL) {
2396 return -ENOMEM; 2412 err = -ENOMEM;
2413 goto out;
2414 }
2397 2415
2398 /* Copy query to the driver. */ 2416 /* Copy query to the driver. */
2399 if (copy_from_user(ias_opt, optval, len)) { 2417 if (copy_from_user(ias_opt, optval, len)) {
2400 kfree(ias_opt); 2418 kfree(ias_opt);
2401 return -EFAULT; 2419 err = -EFAULT;
2420 goto out;
2402 } 2421 }
2403 2422
2404 /* At this point, there are two cases... 2423 /* At this point, there are two cases...
@@ -2419,7 +2438,8 @@ bed:
2419 daddr = ias_opt->daddr; 2438 daddr = ias_opt->daddr;
2420 if((!daddr) || (daddr == DEV_ADDR_ANY)) { 2439 if((!daddr) || (daddr == DEV_ADDR_ANY)) {
2421 kfree(ias_opt); 2440 kfree(ias_opt);
2422 return -EINVAL; 2441 err = -EINVAL;
2442 goto out;
2423 } 2443 }
2424 } 2444 }
2425 2445
@@ -2428,7 +2448,8 @@ bed:
2428 IRDA_WARNING("%s: busy with a previous query\n", 2448 IRDA_WARNING("%s: busy with a previous query\n",
2429 __func__); 2449 __func__);
2430 kfree(ias_opt); 2450 kfree(ias_opt);
2431 return -EBUSY; 2451 err = -EBUSY;
2452 goto out;
2432 } 2453 }
2433 2454
2434 self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self, 2455 self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self,
@@ -2436,7 +2457,8 @@ bed:
2436 2457
2437 if (self->iriap == NULL) { 2458 if (self->iriap == NULL) {
2438 kfree(ias_opt); 2459 kfree(ias_opt);
2439 return -ENOMEM; 2460 err = -ENOMEM;
2461 goto out;
2440 } 2462 }
2441 2463
2442 /* Treat unexpected wakeup as disconnect */ 2464 /* Treat unexpected wakeup as disconnect */
@@ -2455,7 +2477,8 @@ bed:
2455 * we can free it regardless! */ 2477 * we can free it regardless! */
2456 kfree(ias_opt); 2478 kfree(ias_opt);
2457 /* Treat signals as disconnect */ 2479 /* Treat signals as disconnect */
2458 return -EHOSTUNREACH; 2480 err = -EHOSTUNREACH;
2481 goto out;
2459 } 2482 }
2460 2483
2461 /* Check what happened */ 2484 /* Check what happened */
@@ -2465,9 +2488,11 @@ bed:
2465 /* Requested object/attribute doesn't exist */ 2488 /* Requested object/attribute doesn't exist */
2466 if((self->errno == IAS_CLASS_UNKNOWN) || 2489 if((self->errno == IAS_CLASS_UNKNOWN) ||
2467 (self->errno == IAS_ATTRIB_UNKNOWN)) 2490 (self->errno == IAS_ATTRIB_UNKNOWN))
2468 return (-EADDRNOTAVAIL); 2491 err = -EADDRNOTAVAIL;
2469 else 2492 else
2470 return (-EHOSTUNREACH); 2493 err = -EHOSTUNREACH;
2494
2495 goto out;
2471 } 2496 }
2472 2497
2473 /* Translate from internal to user structure */ 2498 /* Translate from internal to user structure */
@@ -2476,14 +2501,15 @@ bed:
2476 irias_delete_value(self->ias_result); 2501 irias_delete_value(self->ias_result);
2477 if (err) { 2502 if (err) {
2478 kfree(ias_opt); 2503 kfree(ias_opt);
2479 return err; 2504 goto out;
2480 } 2505 }
2481 2506
2482 /* Copy reply to the user */ 2507 /* Copy reply to the user */
2483 if (copy_to_user(optval, ias_opt, 2508 if (copy_to_user(optval, ias_opt,
2484 sizeof(struct irda_ias_set))) { 2509 sizeof(struct irda_ias_set))) {
2485 kfree(ias_opt); 2510 kfree(ias_opt);
2486 return -EFAULT; 2511 err = -EFAULT;
2512 goto out;
2487 } 2513 }
2488 /* Note : don't need to put optlen, we checked it */ 2514 /* Note : don't need to put optlen, we checked it */
2489 kfree(ias_opt); 2515 kfree(ias_opt);
@@ -2504,11 +2530,15 @@ bed:
2504 */ 2530 */
2505 2531
2506 /* Check that the user is passing us an int */ 2532 /* Check that the user is passing us an int */
2507 if (len != sizeof(int)) 2533 if (len != sizeof(int)) {
2508 return -EINVAL; 2534 err = -EINVAL;
2535 goto out;
2536 }
2509 /* Get timeout in ms (max time we block the caller) */ 2537 /* Get timeout in ms (max time we block the caller) */
2510 if (get_user(val, (int __user *)optval)) 2538 if (get_user(val, (int __user *)optval)) {
2511 return -EFAULT; 2539 err = -EFAULT;
2540 goto out;
2541 }
2512 2542
2513 /* Tell IrLMP we want to be notified */ 2543 /* Tell IrLMP we want to be notified */
2514 irlmp_update_client(self->ckey, self->mask.word, 2544 irlmp_update_client(self->ckey, self->mask.word,
@@ -2520,8 +2550,6 @@ bed:
2520 2550
2521 /* Wait until a node is discovered */ 2551 /* Wait until a node is discovered */
2522 if (!self->cachedaddr) { 2552 if (!self->cachedaddr) {
2523 int ret = 0;
2524
2525 IRDA_DEBUG(1, "%s(), nothing discovered yet, going to sleep...\n", __func__); 2553 IRDA_DEBUG(1, "%s(), nothing discovered yet, going to sleep...\n", __func__);
2526 2554
2527 /* Set watchdog timer to expire in <val> ms. */ 2555 /* Set watchdog timer to expire in <val> ms. */
@@ -2534,7 +2562,7 @@ bed:
2534 /* Wait for IR-LMP to call us back */ 2562 /* Wait for IR-LMP to call us back */
2535 __wait_event_interruptible(self->query_wait, 2563 __wait_event_interruptible(self->query_wait,
2536 (self->cachedaddr != 0 || self->errno == -ETIME), 2564 (self->cachedaddr != 0 || self->errno == -ETIME),
2537 ret); 2565 err);
2538 2566
2539 /* If watchdog is still activated, kill it! */ 2567 /* If watchdog is still activated, kill it! */
2540 if(timer_pending(&(self->watchdog))) 2568 if(timer_pending(&(self->watchdog)))
@@ -2542,8 +2570,8 @@ bed:
2542 2570
2543 IRDA_DEBUG(1, "%s(), ...waking up !\n", __func__); 2571 IRDA_DEBUG(1, "%s(), ...waking up !\n", __func__);
2544 2572
2545 if (ret != 0) 2573 if (err != 0)
2546 return ret; 2574 goto out;
2547 } 2575 }
2548 else 2576 else
2549 IRDA_DEBUG(1, "%s(), found immediately !\n", 2577 IRDA_DEBUG(1, "%s(), found immediately !\n",
@@ -2566,25 +2594,19 @@ bed:
2566 * If the user want more details, he should query 2594 * If the user want more details, he should query
2567 * the whole discovery log and pick one device... 2595 * the whole discovery log and pick one device...
2568 */ 2596 */
2569 if (put_user(daddr, (int __user *)optval)) 2597 if (put_user(daddr, (int __user *)optval)) {
2570 return -EFAULT; 2598 err = -EFAULT;
2599 goto out;
2600 }
2571 2601
2572 break; 2602 break;
2573 default: 2603 default:
2574 return -ENOPROTOOPT; 2604 err = -ENOPROTOOPT;
2575 } 2605 }
2576 2606
2577 return 0; 2607out:
2578}
2579
2580static int irda_getsockopt(struct socket *sock, int level, int optname,
2581 char __user *optval, int __user *optlen)
2582{
2583 int err;
2584 2608
2585 lock_kernel(); 2609 release_sock(sk);
2586 err = __irda_getsockopt(sock, level, optname, optval, optlen);
2587 unlock_kernel();
2588 2610
2589 return err; 2611 return err;
2590} 2612}
@@ -2628,7 +2650,7 @@ static const struct proto_ops irda_seqpacket_ops = {
2628 .socketpair = sock_no_socketpair, 2650 .socketpair = sock_no_socketpair,
2629 .accept = irda_accept, 2651 .accept = irda_accept,
2630 .getname = irda_getname, 2652 .getname = irda_getname,
2631 .poll = irda_datagram_poll, 2653 .poll = datagram_poll,
2632 .ioctl = irda_ioctl, 2654 .ioctl = irda_ioctl,
2633#ifdef CONFIG_COMPAT 2655#ifdef CONFIG_COMPAT
2634 .compat_ioctl = irda_compat_ioctl, 2656 .compat_ioctl = irda_compat_ioctl,
@@ -2652,7 +2674,7 @@ static const struct proto_ops irda_dgram_ops = {
2652 .socketpair = sock_no_socketpair, 2674 .socketpair = sock_no_socketpair,
2653 .accept = irda_accept, 2675 .accept = irda_accept,
2654 .getname = irda_getname, 2676 .getname = irda_getname,
2655 .poll = irda_datagram_poll, 2677 .poll = datagram_poll,
2656 .ioctl = irda_ioctl, 2678 .ioctl = irda_ioctl,
2657#ifdef CONFIG_COMPAT 2679#ifdef CONFIG_COMPAT
2658 .compat_ioctl = irda_compat_ioctl, 2680 .compat_ioctl = irda_compat_ioctl,
@@ -2677,7 +2699,7 @@ static const struct proto_ops irda_ultra_ops = {
2677 .socketpair = sock_no_socketpair, 2699 .socketpair = sock_no_socketpair,
2678 .accept = sock_no_accept, 2700 .accept = sock_no_accept,
2679 .getname = irda_getname, 2701 .getname = irda_getname,
2680 .poll = irda_datagram_poll, 2702 .poll = datagram_poll,
2681 .ioctl = irda_ioctl, 2703 .ioctl = irda_ioctl,
2682#ifdef CONFIG_COMPAT 2704#ifdef CONFIG_COMPAT
2683 .compat_ioctl = irda_compat_ioctl, 2705 .compat_ioctl = irda_compat_ioctl,
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
index c1c8ae939126..36c3f037f172 100644
--- a/net/irda/discovery.c
+++ b/net/irda/discovery.c
@@ -315,7 +315,7 @@ struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
315 315
316 /* Get the actual number of device in the buffer and return */ 316 /* Get the actual number of device in the buffer and return */
317 *pn = i; 317 *pn = i;
318 return(buffer); 318 return buffer;
319} 319}
320 320
321#ifdef CONFIG_PROC_FS 321#ifdef CONFIG_PROC_FS
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index faa82ca2dfdc..a39cca8331df 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -449,8 +449,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp)
449 } 449 }
450 450
451#ifdef SERIAL_DO_RESTART 451#ifdef SERIAL_DO_RESTART
452 return ((self->flags & ASYNC_HUP_NOTIFY) ? 452 return (self->flags & ASYNC_HUP_NOTIFY) ?
453 -EAGAIN : -ERESTARTSYS); 453 -EAGAIN : -ERESTARTSYS;
454#else 454#else
455 return -EAGAIN; 455 return -EAGAIN;
456#endif 456#endif
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index fce364c6c71a..5b743bdd89ba 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -502,7 +502,8 @@ static void iriap_getvaluebyclass_confirm(struct iriap_cb *self,
502 IRDA_DEBUG(4, "%s(), strlen=%d\n", __func__, value_len); 502 IRDA_DEBUG(4, "%s(), strlen=%d\n", __func__, value_len);
503 503
504 /* Make sure the string is null-terminated */ 504 /* Make sure the string is null-terminated */
505 fp[n+value_len] = 0x00; 505 if (n + value_len < skb->len)
506 fp[n + value_len] = 0x00;
506 IRDA_DEBUG(4, "Got string %s\n", fp+n); 507 IRDA_DEBUG(4, "Got string %s\n", fp+n);
507 508
508 /* Will truncate to IAS_MAX_STRING bytes */ 509 /* Will truncate to IAS_MAX_STRING bytes */
diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c
index 5bb8353105cc..8ee1ff6c742f 100644
--- a/net/irda/irlan/irlan_eth.c
+++ b/net/irda/irlan/irlan_eth.c
@@ -45,13 +45,11 @@ static int irlan_eth_close(struct net_device *dev);
45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, 45static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
46 struct net_device *dev); 46 struct net_device *dev);
47static void irlan_eth_set_multicast_list( struct net_device *dev); 47static void irlan_eth_set_multicast_list( struct net_device *dev);
48static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev);
49 48
50static const struct net_device_ops irlan_eth_netdev_ops = { 49static const struct net_device_ops irlan_eth_netdev_ops = {
51 .ndo_open = irlan_eth_open, 50 .ndo_open = irlan_eth_open,
52 .ndo_stop = irlan_eth_close, 51 .ndo_stop = irlan_eth_close,
53 .ndo_start_xmit = irlan_eth_xmit, 52 .ndo_start_xmit = irlan_eth_xmit,
54 .ndo_get_stats = irlan_eth_get_stats,
55 .ndo_set_multicast_list = irlan_eth_set_multicast_list, 53 .ndo_set_multicast_list = irlan_eth_set_multicast_list,
56 .ndo_change_mtu = eth_change_mtu, 54 .ndo_change_mtu = eth_change_mtu,
57 .ndo_validate_addr = eth_validate_addr, 55 .ndo_validate_addr = eth_validate_addr,
@@ -208,10 +206,10 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
208 * tried :-) DB 206 * tried :-) DB
209 */ 207 */
210 /* irttp_data_request already free the packet */ 208 /* irttp_data_request already free the packet */
211 self->stats.tx_dropped++; 209 dev->stats.tx_dropped++;
212 } else { 210 } else {
213 self->stats.tx_packets++; 211 dev->stats.tx_packets++;
214 self->stats.tx_bytes += len; 212 dev->stats.tx_bytes += len;
215 } 213 }
216 214
217 return NETDEV_TX_OK; 215 return NETDEV_TX_OK;
@@ -226,15 +224,16 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb,
226int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb) 224int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
227{ 225{
228 struct irlan_cb *self = instance; 226 struct irlan_cb *self = instance;
227 struct net_device *dev = self->dev;
229 228
230 if (skb == NULL) { 229 if (skb == NULL) {
231 ++self->stats.rx_dropped; 230 dev->stats.rx_dropped++;
232 return 0; 231 return 0;
233 } 232 }
234 if (skb->len < ETH_HLEN) { 233 if (skb->len < ETH_HLEN) {
235 IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n", 234 IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n",
236 __func__, skb->len); 235 __func__, skb->len);
237 ++self->stats.rx_dropped; 236 dev->stats.rx_dropped++;
238 dev_kfree_skb(skb); 237 dev_kfree_skb(skb);
239 return 0; 238 return 0;
240 } 239 }
@@ -244,10 +243,10 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb)
244 * might have been previously set by the low level IrDA network 243 * might have been previously set by the low level IrDA network
245 * device driver 244 * device driver
246 */ 245 */
247 skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */ 246 skb->protocol = eth_type_trans(skb, dev); /* Remove eth header */
248 247
249 self->stats.rx_packets++; 248 dev->stats.rx_packets++;
250 self->stats.rx_bytes += skb->len; 249 dev->stats.rx_bytes += skb->len;
251 250
252 netif_rx(skb); /* Eat it! */ 251 netif_rx(skb); /* Eat it! */
253 252
@@ -348,16 +347,3 @@ static void irlan_eth_set_multicast_list(struct net_device *dev)
348 else 347 else
349 irlan_set_broadcast_filter(self, FALSE); 348 irlan_set_broadcast_filter(self, FALSE);
350} 349}
351
352/*
353 * Function irlan_get_stats (dev)
354 *
355 * Get the current statistics for this device
356 *
357 */
358static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev)
359{
360 struct irlan_cb *self = netdev_priv(dev);
361
362 return &self->stats;
363}
diff --git a/net/irda/irlan/irlan_event.c b/net/irda/irlan/irlan_event.c
index cbcb4eb54037..43f16040a6fe 100644
--- a/net/irda/irlan/irlan_event.c
+++ b/net/irda/irlan/irlan_event.c
@@ -24,7 +24,7 @@
24 24
25#include <net/irda/irlan_event.h> 25#include <net/irda/irlan_event.h>
26 26
27char *irlan_state[] = { 27const char * const irlan_state[] = {
28 "IRLAN_IDLE", 28 "IRLAN_IDLE",
29 "IRLAN_QUERY", 29 "IRLAN_QUERY",
30 "IRLAN_CONN", 30 "IRLAN_CONN",
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 0e7d8bde145d..6115a44c0a24 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -939,7 +939,7 @@ struct irda_device_info *irlmp_get_discoveries(int *pn, __u16 mask, int nslots)
939 } 939 }
940 940
941 /* Return current cached discovery log */ 941 /* Return current cached discovery log */
942 return(irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE)); 942 return irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE);
943} 943}
944EXPORT_SYMBOL(irlmp_get_discoveries); 944EXPORT_SYMBOL(irlmp_get_discoveries);
945 945
diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c
index 3750884094da..062e63b1c5c4 100644
--- a/net/irda/irlmp_frame.c
+++ b/net/irda/irlmp_frame.c
@@ -448,7 +448,7 @@ static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap_sel,
448 (self->cache.slsap_sel == slsap_sel) && 448 (self->cache.slsap_sel == slsap_sel) &&
449 (self->cache.dlsap_sel == dlsap_sel)) 449 (self->cache.dlsap_sel == dlsap_sel))
450 { 450 {
451 return (self->cache.lsap); 451 return self->cache.lsap;
452 } 452 }
453#endif 453#endif
454 454
diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h
index 4300df35d37d..0d82ff5aeff1 100644
--- a/net/irda/irnet/irnet.h
+++ b/net/irda/irnet/irnet.h
@@ -458,6 +458,8 @@ typedef struct irnet_socket
458 int disco_index; /* Last read in the discovery log */ 458 int disco_index; /* Last read in the discovery log */
459 int disco_number; /* Size of the discovery log */ 459 int disco_number; /* Size of the discovery log */
460 460
461 struct mutex lock;
462
461} irnet_socket; 463} irnet_socket;
462 464
463/* 465/*
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index e98e40d76f4f..7f17a8020e8a 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -238,7 +238,7 @@ irnet_ias_to_tsap(irnet_socket * self,
238 DEXIT(IRDA_SR_TRACE, "\n"); 238 DEXIT(IRDA_SR_TRACE, "\n");
239 239
240 /* Return the TSAP */ 240 /* Return the TSAP */
241 return(dtsap_sel); 241 return dtsap_sel;
242} 242}
243 243
244/*------------------------------------------------------------------*/ 244/*------------------------------------------------------------------*/
@@ -301,7 +301,7 @@ irnet_connect_tsap(irnet_socket * self)
301 { 301 {
302 clear_bit(0, &self->ttp_connect); 302 clear_bit(0, &self->ttp_connect);
303 DERROR(IRDA_SR_ERROR, "connect aborted!\n"); 303 DERROR(IRDA_SR_ERROR, "connect aborted!\n");
304 return(err); 304 return err;
305 } 305 }
306 306
307 /* Connect to remote device */ 307 /* Connect to remote device */
@@ -312,7 +312,7 @@ irnet_connect_tsap(irnet_socket * self)
312 { 312 {
313 clear_bit(0, &self->ttp_connect); 313 clear_bit(0, &self->ttp_connect);
314 DERROR(IRDA_SR_ERROR, "connect aborted!\n"); 314 DERROR(IRDA_SR_ERROR, "connect aborted!\n");
315 return(err); 315 return err;
316 } 316 }
317 317
318 /* The above call is non-blocking. 318 /* The above call is non-blocking.
@@ -321,7 +321,7 @@ irnet_connect_tsap(irnet_socket * self)
321 * See you there ;-) */ 321 * See you there ;-) */
322 322
323 DEXIT(IRDA_SR_TRACE, "\n"); 323 DEXIT(IRDA_SR_TRACE, "\n");
324 return(err); 324 return err;
325} 325}
326 326
327/*------------------------------------------------------------------*/ 327/*------------------------------------------------------------------*/
@@ -362,10 +362,10 @@ irnet_discover_next_daddr(irnet_socket * self)
362 /* The above request is non-blocking. 362 /* The above request is non-blocking.
363 * After a while, IrDA will call us back in irnet_discovervalue_confirm() 363 * After a while, IrDA will call us back in irnet_discovervalue_confirm()
364 * We will then call irnet_ias_to_tsap() and come back here again... */ 364 * We will then call irnet_ias_to_tsap() and come back here again... */
365 return(0); 365 return 0;
366 } 366 }
367 else 367 else
368 return(1); 368 return 1;
369} 369}
370 370
371/*------------------------------------------------------------------*/ 371/*------------------------------------------------------------------*/
@@ -436,7 +436,7 @@ irnet_discover_daddr_and_lsap_sel(irnet_socket * self)
436 /* Follow me in irnet_discovervalue_confirm() */ 436 /* Follow me in irnet_discovervalue_confirm() */
437 437
438 DEXIT(IRDA_SR_TRACE, "\n"); 438 DEXIT(IRDA_SR_TRACE, "\n");
439 return(0); 439 return 0;
440} 440}
441 441
442/*------------------------------------------------------------------*/ 442/*------------------------------------------------------------------*/
@@ -485,7 +485,7 @@ irnet_dname_to_daddr(irnet_socket * self)
485 /* No luck ! */ 485 /* No luck ! */
486 DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname); 486 DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname);
487 kfree(discoveries); 487 kfree(discoveries);
488 return(-EADDRNOTAVAIL); 488 return -EADDRNOTAVAIL;
489} 489}
490 490
491 491
@@ -527,7 +527,7 @@ irda_irnet_create(irnet_socket * self)
527 INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect); 527 INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect);
528 528
529 DEXIT(IRDA_SOCK_TRACE, "\n"); 529 DEXIT(IRDA_SOCK_TRACE, "\n");
530 return(0); 530 return 0;
531} 531}
532 532
533/*------------------------------------------------------------------*/ 533/*------------------------------------------------------------------*/
@@ -601,7 +601,7 @@ irda_irnet_connect(irnet_socket * self)
601 * We will finish the connection procedure in irnet_connect_tsap(). 601 * We will finish the connection procedure in irnet_connect_tsap().
602 */ 602 */
603 DEXIT(IRDA_SOCK_TRACE, "\n"); 603 DEXIT(IRDA_SOCK_TRACE, "\n");
604 return(0); 604 return 0;
605} 605}
606 606
607/*------------------------------------------------------------------*/ 607/*------------------------------------------------------------------*/
@@ -733,7 +733,7 @@ irnet_daddr_to_dname(irnet_socket * self)
733 /* No luck ! */ 733 /* No luck ! */
734 DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr); 734 DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr);
735 kfree(discoveries); 735 kfree(discoveries);
736 return(-EADDRNOTAVAIL); 736 return -EADDRNOTAVAIL;
737} 737}
738 738
739/*------------------------------------------------------------------*/ 739/*------------------------------------------------------------------*/
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index dfe7b38dd4af..0993bd454ea5 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -166,7 +166,7 @@ irnet_ctrl_write(irnet_socket * ap,
166 } 166 }
167 167
168 /* Success : we have parsed all commands successfully */ 168 /* Success : we have parsed all commands successfully */
169 return(count); 169 return count;
170} 170}
171 171
172#ifdef INITIAL_DISCOVERY 172#ifdef INITIAL_DISCOVERY
@@ -300,7 +300,7 @@ irnet_ctrl_read(irnet_socket * ap,
300 } 300 }
301 301
302 DEXIT(CTRL_TRACE, "\n"); 302 DEXIT(CTRL_TRACE, "\n");
303 return(strlen(event)); 303 return strlen(event);
304 } 304 }
305#endif /* INITIAL_DISCOVERY */ 305#endif /* INITIAL_DISCOVERY */
306 306
@@ -409,7 +409,7 @@ irnet_ctrl_read(irnet_socket * ap,
409 } 409 }
410 410
411 DEXIT(CTRL_TRACE, "\n"); 411 DEXIT(CTRL_TRACE, "\n");
412 return(strlen(event)); 412 return strlen(event);
413} 413}
414 414
415/*------------------------------------------------------------------*/ 415/*------------------------------------------------------------------*/
@@ -480,7 +480,6 @@ dev_irnet_open(struct inode * inode,
480 ap = kzalloc(sizeof(*ap), GFP_KERNEL); 480 ap = kzalloc(sizeof(*ap), GFP_KERNEL);
481 DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n"); 481 DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n");
482 482
483 lock_kernel();
484 /* initialize the irnet structure */ 483 /* initialize the irnet structure */
485 ap->file = file; 484 ap->file = file;
486 485
@@ -502,18 +501,20 @@ dev_irnet_open(struct inode * inode,
502 { 501 {
503 DERROR(FS_ERROR, "Can't setup IrDA link...\n"); 502 DERROR(FS_ERROR, "Can't setup IrDA link...\n");
504 kfree(ap); 503 kfree(ap);
505 unlock_kernel(); 504
506 return err; 505 return err;
507 } 506 }
508 507
509 /* For the control channel */ 508 /* For the control channel */
510 ap->event_index = irnet_events.index; /* Cancel all past events */ 509 ap->event_index = irnet_events.index; /* Cancel all past events */
511 510
511 mutex_init(&ap->lock);
512
512 /* Put our stuff where we will be able to find it later */ 513 /* Put our stuff where we will be able to find it later */
513 file->private_data = ap; 514 file->private_data = ap;
514 515
515 DEXIT(FS_TRACE, " - ap=0x%p\n", ap); 516 DEXIT(FS_TRACE, " - ap=0x%p\n", ap);
516 unlock_kernel(); 517
517 return 0; 518 return 0;
518} 519}
519 520
@@ -623,7 +624,7 @@ dev_irnet_poll(struct file * file,
623 mask |= irnet_ctrl_poll(ap, file, wait); 624 mask |= irnet_ctrl_poll(ap, file, wait);
624 625
625 DEXIT(FS_TRACE, " - mask=0x%X\n", mask); 626 DEXIT(FS_TRACE, " - mask=0x%X\n", mask);
626 return(mask); 627 return mask;
627} 628}
628 629
629/*------------------------------------------------------------------*/ 630/*------------------------------------------------------------------*/
@@ -664,7 +665,9 @@ dev_irnet_ioctl(
664 { 665 {
665 DEBUG(FS_INFO, "Entering PPP discipline.\n"); 666 DEBUG(FS_INFO, "Entering PPP discipline.\n");
666 /* PPP channel setup (ap->chan in configued in dev_irnet_open())*/ 667 /* PPP channel setup (ap->chan in configued in dev_irnet_open())*/
667 lock_kernel(); 668 if (mutex_lock_interruptible(&ap->lock))
669 return -EINTR;
670
668 err = ppp_register_channel(&ap->chan); 671 err = ppp_register_channel(&ap->chan);
669 if(err == 0) 672 if(err == 0)
670 { 673 {
@@ -677,14 +680,17 @@ dev_irnet_ioctl(
677 } 680 }
678 else 681 else
679 DERROR(FS_ERROR, "Can't setup PPP channel...\n"); 682 DERROR(FS_ERROR, "Can't setup PPP channel...\n");
680 unlock_kernel(); 683
684 mutex_unlock(&ap->lock);
681 } 685 }
682 else 686 else
683 { 687 {
684 /* In theory, should be N_TTY */ 688 /* In theory, should be N_TTY */
685 DEBUG(FS_INFO, "Exiting PPP discipline.\n"); 689 DEBUG(FS_INFO, "Exiting PPP discipline.\n");
686 /* Disconnect from the generic PPP layer */ 690 /* Disconnect from the generic PPP layer */
687 lock_kernel(); 691 if (mutex_lock_interruptible(&ap->lock))
692 return -EINTR;
693
688 if(ap->ppp_open) 694 if(ap->ppp_open)
689 { 695 {
690 ap->ppp_open = 0; 696 ap->ppp_open = 0;
@@ -693,24 +699,31 @@ dev_irnet_ioctl(
693 else 699 else
694 DERROR(FS_ERROR, "Channel not registered !\n"); 700 DERROR(FS_ERROR, "Channel not registered !\n");
695 err = 0; 701 err = 0;
696 unlock_kernel(); 702
703 mutex_unlock(&ap->lock);
697 } 704 }
698 break; 705 break;
699 706
700 /* Query PPP channel and unit number */ 707 /* Query PPP channel and unit number */
701 case PPPIOCGCHAN: 708 case PPPIOCGCHAN:
702 lock_kernel(); 709 if (mutex_lock_interruptible(&ap->lock))
710 return -EINTR;
711
703 if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan), 712 if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan),
704 (int __user *)argp)) 713 (int __user *)argp))
705 err = 0; 714 err = 0;
706 unlock_kernel(); 715
716 mutex_unlock(&ap->lock);
707 break; 717 break;
708 case PPPIOCGUNIT: 718 case PPPIOCGUNIT:
709 lock_kernel(); 719 if (mutex_lock_interruptible(&ap->lock))
720 return -EINTR;
721
710 if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan), 722 if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan),
711 (int __user *)argp)) 723 (int __user *)argp))
712 err = 0; 724 err = 0;
713 unlock_kernel(); 725
726 mutex_unlock(&ap->lock);
714 break; 727 break;
715 728
716 /* All these ioctls can be passed both directly and from ppp_generic, 729 /* All these ioctls can be passed both directly and from ppp_generic,
@@ -730,9 +743,12 @@ dev_irnet_ioctl(
730 if(!capable(CAP_NET_ADMIN)) 743 if(!capable(CAP_NET_ADMIN))
731 err = -EPERM; 744 err = -EPERM;
732 else { 745 else {
733 lock_kernel(); 746 if (mutex_lock_interruptible(&ap->lock))
747 return -EINTR;
748
734 err = ppp_irnet_ioctl(&ap->chan, cmd, arg); 749 err = ppp_irnet_ioctl(&ap->chan, cmd, arg);
735 unlock_kernel(); 750
751 mutex_unlock(&ap->lock);
736 } 752 }
737 break; 753 break;
738 754
@@ -740,7 +756,9 @@ dev_irnet_ioctl(
740 /* Get termios */ 756 /* Get termios */
741 case TCGETS: 757 case TCGETS:
742 DEBUG(FS_INFO, "Get termios.\n"); 758 DEBUG(FS_INFO, "Get termios.\n");
743 lock_kernel(); 759 if (mutex_lock_interruptible(&ap->lock))
760 return -EINTR;
761
744#ifndef TCGETS2 762#ifndef TCGETS2
745 if(!kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios)) 763 if(!kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios))
746 err = 0; 764 err = 0;
@@ -748,12 +766,15 @@ dev_irnet_ioctl(
748 if(kernel_termios_to_user_termios_1((struct termios __user *)argp, &ap->termios)) 766 if(kernel_termios_to_user_termios_1((struct termios __user *)argp, &ap->termios))
749 err = 0; 767 err = 0;
750#endif 768#endif
751 unlock_kernel(); 769
770 mutex_unlock(&ap->lock);
752 break; 771 break;
753 /* Set termios */ 772 /* Set termios */
754 case TCSETSF: 773 case TCSETSF:
755 DEBUG(FS_INFO, "Set termios.\n"); 774 DEBUG(FS_INFO, "Set termios.\n");
756 lock_kernel(); 775 if (mutex_lock_interruptible(&ap->lock))
776 return -EINTR;
777
757#ifndef TCGETS2 778#ifndef TCGETS2
758 if(!user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp)) 779 if(!user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp))
759 err = 0; 780 err = 0;
@@ -761,7 +782,8 @@ dev_irnet_ioctl(
761 if(!user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp)) 782 if(!user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp))
762 err = 0; 783 err = 0;
763#endif 784#endif
764 unlock_kernel(); 785
786 mutex_unlock(&ap->lock);
765 break; 787 break;
766 788
767 /* Set DTR/RTS */ 789 /* Set DTR/RTS */
@@ -784,9 +806,10 @@ dev_irnet_ioctl(
784 * We should also worry that we don't accept junk here and that 806 * We should also worry that we don't accept junk here and that
785 * we get rid of our own buffers */ 807 * we get rid of our own buffers */
786#ifdef FLUSH_TO_PPP 808#ifdef FLUSH_TO_PPP
787 lock_kernel(); 809 if (mutex_lock_interruptible(&ap->lock))
810 return -EINTR;
788 ppp_output_wakeup(&ap->chan); 811 ppp_output_wakeup(&ap->chan);
789 unlock_kernel(); 812 mutex_unlock(&ap->lock);
790#endif /* FLUSH_TO_PPP */ 813#endif /* FLUSH_TO_PPP */
791 err = 0; 814 err = 0;
792 break; 815 break;
diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h
index b5df2418f90c..940225866da0 100644
--- a/net/irda/irnet/irnet_ppp.h
+++ b/net/irda/irnet/irnet_ppp.h
@@ -103,7 +103,8 @@ static const struct file_operations irnet_device_fops =
103 .poll = dev_irnet_poll, 103 .poll = dev_irnet_poll,
104 .unlocked_ioctl = dev_irnet_ioctl, 104 .unlocked_ioctl = dev_irnet_ioctl,
105 .open = dev_irnet_open, 105 .open = dev_irnet_open,
106 .release = dev_irnet_close 106 .release = dev_irnet_close,
107 .llseek = noop_llseek,
107 /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */ 108 /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */
108}; 109};
109 110
diff --git a/net/irda/parameters.c b/net/irda/parameters.c
index fc1a20565e2d..71cd38c1a67f 100644
--- a/net/irda/parameters.c
+++ b/net/irda/parameters.c
@@ -298,6 +298,8 @@ static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
298 298
299 p.pi = pi; /* In case handler needs to know */ 299 p.pi = pi; /* In case handler needs to know */
300 p.pl = buf[1]; /* Extract length of value */ 300 p.pl = buf[1]; /* Extract length of value */
301 if (p.pl > 32)
302 p.pl = 32;
301 303
302 IRDA_DEBUG(2, "%s(), pi=%#x, pl=%d\n", __func__, 304 IRDA_DEBUG(2, "%s(), pi=%#x, pl=%d\n", __func__,
303 p.pi, p.pl); 305 p.pi, p.pl);
@@ -318,7 +320,7 @@ static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi,
318 (__u8) str[0], (__u8) str[1]); 320 (__u8) str[0], (__u8) str[1]);
319 321
320 /* Null terminate string */ 322 /* Null terminate string */
321 str[p.pl+1] = '\0'; 323 str[p.pl] = '\0';
322 324
323 p.pv.c = str; /* Handler will need to take a copy */ 325 p.pv.c = str; /* Handler will need to take a copy */
324 326
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 43040e97c474..d87c22df6f1e 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -565,12 +565,12 @@ pfkey_proto2satype(uint16_t proto)
565 565
566static uint8_t pfkey_proto_to_xfrm(uint8_t proto) 566static uint8_t pfkey_proto_to_xfrm(uint8_t proto)
567{ 567{
568 return (proto == IPSEC_PROTO_ANY ? 0 : proto); 568 return proto == IPSEC_PROTO_ANY ? 0 : proto;
569} 569}
570 570
571static uint8_t pfkey_proto_from_xfrm(uint8_t proto) 571static uint8_t pfkey_proto_from_xfrm(uint8_t proto)
572{ 572{
573 return (proto ? proto : IPSEC_PROTO_ANY); 573 return proto ? proto : IPSEC_PROTO_ANY;
574} 574}
575 575
576static inline int pfkey_sockaddr_len(sa_family_t family) 576static inline int pfkey_sockaddr_len(sa_family_t family)
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 1ae697681bc7..8d9ce0accc98 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -144,7 +144,6 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
144 nf_reset(skb); 144 nf_reset(skb);
145 145
146 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { 146 if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
147 dev->last_rx = jiffies;
148 dev->stats.rx_packets++; 147 dev->stats.rx_packets++;
149 dev->stats.rx_bytes += data_len; 148 dev->stats.rx_bytes += data_len;
150 } else 149 } else
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 226a0ae3bcfd..1c770c0644d1 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -65,9 +65,7 @@ static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif
65 continue; 65 continue;
66 66
67 if ((l2tp->conn_id == tunnel_id) && 67 if ((l2tp->conn_id == tunnel_id) &&
68#ifdef CONFIG_NET_NS 68 net_eq(sock_net(sk), net) &&
69 (sk->sk_net == net) &&
70#endif
71 !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && 69 !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
72 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) 70 !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
73 goto found; 71 goto found;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index ff954b3e94b6..39a21d0c61c4 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1768,7 +1768,7 @@ static const struct proto_ops pppol2tp_ops = {
1768 .ioctl = pppox_ioctl, 1768 .ioctl = pppox_ioctl,
1769}; 1769};
1770 1770
1771static struct pppox_proto pppol2tp_proto = { 1771static const struct pppox_proto pppol2tp_proto = {
1772 .create = pppol2tp_create, 1772 .create = pppol2tp_create,
1773 .ioctl = pppol2tp_ioctl 1773 .ioctl = pppol2tp_ioctl
1774}; 1774};
diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aes_ccm.c
index a87cb3ba2df6..d2b03e0851ef 100644
--- a/net/mac80211/aes_ccm.c
+++ b/net/mac80211/aes_ccm.c
@@ -138,10 +138,8 @@ struct crypto_cipher *ieee80211_aes_key_setup_encrypt(const u8 key[])
138 struct crypto_cipher *tfm; 138 struct crypto_cipher *tfm;
139 139
140 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 140 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
141 if (IS_ERR(tfm)) 141 if (!IS_ERR(tfm))
142 return NULL; 142 crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN);
143
144 crypto_cipher_setkey(tfm, key, ALG_CCMP_KEY_LEN);
145 143
146 return tfm; 144 return tfm;
147} 145}
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index 3d097b3d7b62..b4d66cca76d6 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -119,10 +119,8 @@ struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[])
119 struct crypto_cipher *tfm; 119 struct crypto_cipher *tfm;
120 120
121 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); 121 tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
122 if (IS_ERR(tfm)) 122 if (!IS_ERR(tfm))
123 return NULL; 123 crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
124
125 crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
126 124
127 return tfm; 125 return tfm;
128} 126}
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 965b272499fd..720b7a84af59 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -56,7 +56,7 @@ static void ieee80211_free_tid_rx(struct rcu_head *h)
56} 56}
57 57
58void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 58void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
59 u16 initiator, u16 reason) 59 u16 initiator, u16 reason, bool tx)
60{ 60{
61 struct ieee80211_local *local = sta->local; 61 struct ieee80211_local *local = sta->local;
62 struct tid_ampdu_rx *tid_rx; 62 struct tid_ampdu_rx *tid_rx;
@@ -81,20 +81,21 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
81 "aggregation for tid %d\n", tid); 81 "aggregation for tid %d\n", tid);
82 82
83 /* check if this is a self generated aggregation halt */ 83 /* check if this is a self generated aggregation halt */
84 if (initiator == WLAN_BACK_RECIPIENT) 84 if (initiator == WLAN_BACK_RECIPIENT && tx)
85 ieee80211_send_delba(sta->sdata, sta->sta.addr, 85 ieee80211_send_delba(sta->sdata, sta->sta.addr,
86 tid, 0, reason); 86 tid, 0, reason);
87 87
88 del_timer_sync(&tid_rx->session_timer); 88 del_timer_sync(&tid_rx->session_timer);
89 del_timer_sync(&tid_rx->reorder_timer);
89 90
90 call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx); 91 call_rcu(&tid_rx->rcu_head, ieee80211_free_tid_rx);
91} 92}
92 93
93void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 94void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
94 u16 initiator, u16 reason) 95 u16 initiator, u16 reason, bool tx)
95{ 96{
96 mutex_lock(&sta->ampdu_mlme.mtx); 97 mutex_lock(&sta->ampdu_mlme.mtx);
97 ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason); 98 ___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason, tx);
98 mutex_unlock(&sta->ampdu_mlme.mtx); 99 mutex_unlock(&sta->ampdu_mlme.mtx);
99} 100}
100 101
@@ -120,6 +121,20 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
120 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work); 121 ieee80211_queue_work(&sta->local->hw, &sta->ampdu_mlme.work);
121} 122}
122 123
124static void sta_rx_agg_reorder_timer_expired(unsigned long data)
125{
126 u8 *ptid = (u8 *)data;
127 u8 *timer_to_id = ptid - *ptid;
128 struct sta_info *sta = container_of(timer_to_id, struct sta_info,
129 timer_to_tid[0]);
130
131 rcu_read_lock();
132 spin_lock(&sta->lock);
133 ieee80211_release_reorder_timeout(sta, *ptid);
134 spin_unlock(&sta->lock);
135 rcu_read_unlock();
136}
137
123static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, 138static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
124 u8 dialog_token, u16 status, u16 policy, 139 u8 dialog_token, u16 status, u16 policy,
125 u16 buf_size, u16 timeout) 140 u16 buf_size, u16 timeout)
@@ -251,11 +266,18 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
251 goto end; 266 goto end;
252 } 267 }
253 268
269 spin_lock_init(&tid_agg_rx->reorder_lock);
270
254 /* rx timer */ 271 /* rx timer */
255 tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired; 272 tid_agg_rx->session_timer.function = sta_rx_agg_session_timer_expired;
256 tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid]; 273 tid_agg_rx->session_timer.data = (unsigned long)&sta->timer_to_tid[tid];
257 init_timer(&tid_agg_rx->session_timer); 274 init_timer(&tid_agg_rx->session_timer);
258 275
276 /* rx reorder timer */
277 tid_agg_rx->reorder_timer.function = sta_rx_agg_reorder_timer_expired;
278 tid_agg_rx->reorder_timer.data = (unsigned long)&sta->timer_to_tid[tid];
279 init_timer(&tid_agg_rx->reorder_timer);
280
259 /* prepare reordering buffer */ 281 /* prepare reordering buffer */
260 tid_agg_rx->reorder_buf = 282 tid_agg_rx->reorder_buf =
261 kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC); 283 kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 8f23401832b7..d4679b265ba8 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -145,7 +145,8 @@ static void kfree_tid_tx(struct rcu_head *rcu_head)
145} 145}
146 146
147int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, 147int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
148 enum ieee80211_back_parties initiator) 148 enum ieee80211_back_parties initiator,
149 bool tx)
149{ 150{
150 struct ieee80211_local *local = sta->local; 151 struct ieee80211_local *local = sta->local;
151 struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid]; 152 struct tid_ampdu_tx *tid_tx = sta->ampdu_mlme.tid_tx[tid];
@@ -185,6 +186,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
185 clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state); 186 clear_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state);
186 187
187 tid_tx->stop_initiator = initiator; 188 tid_tx->stop_initiator = initiator;
189 tid_tx->tx_stop = tx;
188 190
189 ret = drv_ampdu_action(local, sta->sdata, 191 ret = drv_ampdu_action(local, sta->sdata,
190 IEEE80211_AMPDU_TX_STOP, 192 IEEE80211_AMPDU_TX_STOP,
@@ -577,13 +579,14 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_vif *vif,
577EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); 579EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe);
578 580
579int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, 581int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
580 enum ieee80211_back_parties initiator) 582 enum ieee80211_back_parties initiator,
583 bool tx)
581{ 584{
582 int ret; 585 int ret;
583 586
584 mutex_lock(&sta->ampdu_mlme.mtx); 587 mutex_lock(&sta->ampdu_mlme.mtx);
585 588
586 ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator); 589 ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator, tx);
587 590
588 mutex_unlock(&sta->ampdu_mlme.mtx); 591 mutex_unlock(&sta->ampdu_mlme.mtx);
589 592
@@ -672,7 +675,7 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
672 goto unlock_sta; 675 goto unlock_sta;
673 } 676 }
674 677
675 if (tid_tx->stop_initiator == WLAN_BACK_INITIATOR) 678 if (tid_tx->stop_initiator == WLAN_BACK_INITIATOR && tid_tx->tx_stop)
676 ieee80211_send_delba(sta->sdata, ra, tid, 679 ieee80211_send_delba(sta->sdata, ra, tid,
677 WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); 680 WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE);
678 681
@@ -772,7 +775,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
772 775
773 sta->ampdu_mlme.addba_req_num[tid] = 0; 776 sta->ampdu_mlme.addba_req_num[tid] = 0;
774 } else { 777 } else {
775 ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR); 778 ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR,
779 true);
776 } 780 }
777 781
778 out: 782 out:
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 29ac8e1a509e..18bd0e550600 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -19,33 +19,6 @@
19#include "rate.h" 19#include "rate.h"
20#include "mesh.h" 20#include "mesh.h"
21 21
22static bool nl80211_type_check(enum nl80211_iftype type)
23{
24 switch (type) {
25 case NL80211_IFTYPE_ADHOC:
26 case NL80211_IFTYPE_STATION:
27 case NL80211_IFTYPE_MONITOR:
28#ifdef CONFIG_MAC80211_MESH
29 case NL80211_IFTYPE_MESH_POINT:
30#endif
31 case NL80211_IFTYPE_AP:
32 case NL80211_IFTYPE_AP_VLAN:
33 case NL80211_IFTYPE_WDS:
34 return true;
35 default:
36 return false;
37 }
38}
39
40static bool nl80211_params_check(enum nl80211_iftype type,
41 struct vif_params *params)
42{
43 if (!nl80211_type_check(type))
44 return false;
45
46 return true;
47}
48
49static int ieee80211_add_iface(struct wiphy *wiphy, char *name, 22static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
50 enum nl80211_iftype type, u32 *flags, 23 enum nl80211_iftype type, u32 *flags,
51 struct vif_params *params) 24 struct vif_params *params)
@@ -55,9 +28,6 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name,
55 struct ieee80211_sub_if_data *sdata; 28 struct ieee80211_sub_if_data *sdata;
56 int err; 29 int err;
57 30
58 if (!nl80211_params_check(type, params))
59 return -EINVAL;
60
61 err = ieee80211_if_add(local, name, &dev, type, params); 31 err = ieee80211_if_add(local, name, &dev, type, params);
62 if (err || type != NL80211_IFTYPE_MONITOR || !flags) 32 if (err || type != NL80211_IFTYPE_MONITOR || !flags)
63 return err; 33 return err;
@@ -82,12 +52,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
82 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 52 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
83 int ret; 53 int ret;
84 54
85 if (ieee80211_sdata_running(sdata))
86 return -EBUSY;
87
88 if (!nl80211_params_check(type, params))
89 return -EINVAL;
90
91 ret = ieee80211_if_change_type(sdata, type); 55 ret = ieee80211_if_change_type(sdata, type);
92 if (ret) 56 if (ret)
93 return ret; 57 return ret;
@@ -104,54 +68,71 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
104 params && params->use_4addr >= 0) 68 params && params->use_4addr >= 0)
105 sdata->u.mgd.use_4addr = params->use_4addr; 69 sdata->u.mgd.use_4addr = params->use_4addr;
106 70
107 if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags) 71 if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags) {
108 sdata->u.mntr_flags = *flags; 72 struct ieee80211_local *local = sdata->local;
73
74 if (ieee80211_sdata_running(sdata)) {
75 /*
76 * Prohibit MONITOR_FLAG_COOK_FRAMES to be
77 * changed while the interface is up.
78 * Else we would need to add a lot of cruft
79 * to update everything:
80 * cooked_mntrs, monitor and all fif_* counters
81 * reconfigure hardware
82 */
83 if ((*flags & MONITOR_FLAG_COOK_FRAMES) !=
84 (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES))
85 return -EBUSY;
86
87 ieee80211_adjust_monitor_flags(sdata, -1);
88 sdata->u.mntr_flags = *flags;
89 ieee80211_adjust_monitor_flags(sdata, 1);
90
91 ieee80211_configure_filter(local);
92 } else {
93 /*
94 * Because the interface is down, ieee80211_do_stop
95 * and ieee80211_do_open take care of "everything"
96 * mentioned in the comment above.
97 */
98 sdata->u.mntr_flags = *flags;
99 }
100 }
109 101
110 return 0; 102 return 0;
111} 103}
112 104
113static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, 105static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
114 u8 key_idx, const u8 *mac_addr, 106 u8 key_idx, bool pairwise, const u8 *mac_addr,
115 struct key_params *params) 107 struct key_params *params)
116{ 108{
117 struct ieee80211_sub_if_data *sdata; 109 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
118 struct sta_info *sta = NULL; 110 struct sta_info *sta = NULL;
119 enum ieee80211_key_alg alg;
120 struct ieee80211_key *key; 111 struct ieee80211_key *key;
121 int err; 112 int err;
122 113
123 if (!netif_running(dev)) 114 if (!ieee80211_sdata_running(sdata))
124 return -ENETDOWN; 115 return -ENETDOWN;
125 116
126 sdata = IEEE80211_DEV_TO_SUB_IF(dev); 117 /* reject WEP and TKIP keys if WEP failed to initialize */
127
128 switch (params->cipher) { 118 switch (params->cipher) {
129 case WLAN_CIPHER_SUITE_WEP40: 119 case WLAN_CIPHER_SUITE_WEP40:
130 case WLAN_CIPHER_SUITE_WEP104:
131 alg = ALG_WEP;
132 break;
133 case WLAN_CIPHER_SUITE_TKIP: 120 case WLAN_CIPHER_SUITE_TKIP:
134 alg = ALG_TKIP; 121 case WLAN_CIPHER_SUITE_WEP104:
135 break; 122 if (IS_ERR(sdata->local->wep_tx_tfm))
136 case WLAN_CIPHER_SUITE_CCMP: 123 return -EINVAL;
137 alg = ALG_CCMP;
138 break;
139 case WLAN_CIPHER_SUITE_AES_CMAC:
140 alg = ALG_AES_CMAC;
141 break; 124 break;
142 default: 125 default:
143 return -EINVAL; 126 break;
144 } 127 }
145 128
146 /* reject WEP and TKIP keys if WEP failed to initialize */ 129 key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len,
147 if ((alg == ALG_WEP || alg == ALG_TKIP) && 130 params->key, params->seq_len, params->seq);
148 IS_ERR(sdata->local->wep_tx_tfm)) 131 if (IS_ERR(key))
149 return -EINVAL; 132 return PTR_ERR(key);
150 133
151 key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key, 134 if (pairwise)
152 params->seq_len, params->seq); 135 key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE;
153 if (!key)
154 return -ENOMEM;
155 136
156 mutex_lock(&sdata->local->sta_mtx); 137 mutex_lock(&sdata->local->sta_mtx);
157 138
@@ -164,9 +145,10 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
164 } 145 }
165 } 146 }
166 147
167 ieee80211_key_link(key, sdata, sta); 148 err = ieee80211_key_link(key, sdata, sta);
149 if (err)
150 ieee80211_key_free(sdata->local, key);
168 151
169 err = 0;
170 out_unlock: 152 out_unlock:
171 mutex_unlock(&sdata->local->sta_mtx); 153 mutex_unlock(&sdata->local->sta_mtx);
172 154
@@ -174,7 +156,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
174} 156}
175 157
176static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, 158static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
177 u8 key_idx, const u8 *mac_addr) 159 u8 key_idx, bool pairwise, const u8 *mac_addr)
178{ 160{
179 struct ieee80211_sub_if_data *sdata; 161 struct ieee80211_sub_if_data *sdata;
180 struct sta_info *sta; 162 struct sta_info *sta;
@@ -191,10 +173,17 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
191 if (!sta) 173 if (!sta)
192 goto out_unlock; 174 goto out_unlock;
193 175
194 if (sta->key) { 176 if (pairwise) {
195 ieee80211_key_free(sdata->local, sta->key); 177 if (sta->ptk) {
196 WARN_ON(sta->key); 178 ieee80211_key_free(sdata->local, sta->ptk);
197 ret = 0; 179 ret = 0;
180 }
181 } else {
182 if (sta->gtk[key_idx]) {
183 ieee80211_key_free(sdata->local,
184 sta->gtk[key_idx]);
185 ret = 0;
186 }
198 } 187 }
199 188
200 goto out_unlock; 189 goto out_unlock;
@@ -216,7 +205,8 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev,
216} 205}
217 206
218static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, 207static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
219 u8 key_idx, const u8 *mac_addr, void *cookie, 208 u8 key_idx, bool pairwise, const u8 *mac_addr,
209 void *cookie,
220 void (*callback)(void *cookie, 210 void (*callback)(void *cookie,
221 struct key_params *params)) 211 struct key_params *params))
222{ 212{
@@ -224,7 +214,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
224 struct sta_info *sta = NULL; 214 struct sta_info *sta = NULL;
225 u8 seq[6] = {0}; 215 u8 seq[6] = {0};
226 struct key_params params; 216 struct key_params params;
227 struct ieee80211_key *key; 217 struct ieee80211_key *key = NULL;
228 u32 iv32; 218 u32 iv32;
229 u16 iv16; 219 u16 iv16;
230 int err = -ENOENT; 220 int err = -ENOENT;
@@ -238,7 +228,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
238 if (!sta) 228 if (!sta)
239 goto out; 229 goto out;
240 230
241 key = sta->key; 231 if (pairwise)
232 key = sta->ptk;
233 else if (key_idx < NUM_DEFAULT_KEYS)
234 key = sta->gtk[key_idx];
242 } else 235 } else
243 key = sdata->keys[key_idx]; 236 key = sdata->keys[key_idx];
244 237
@@ -247,10 +240,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
247 240
248 memset(&params, 0, sizeof(params)); 241 memset(&params, 0, sizeof(params));
249 242
250 switch (key->conf.alg) { 243 params.cipher = key->conf.cipher;
251 case ALG_TKIP:
252 params.cipher = WLAN_CIPHER_SUITE_TKIP;
253 244
245 switch (key->conf.cipher) {
246 case WLAN_CIPHER_SUITE_TKIP:
254 iv32 = key->u.tkip.tx.iv32; 247 iv32 = key->u.tkip.tx.iv32;
255 iv16 = key->u.tkip.tx.iv16; 248 iv16 = key->u.tkip.tx.iv16;
256 249
@@ -268,8 +261,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
268 params.seq = seq; 261 params.seq = seq;
269 params.seq_len = 6; 262 params.seq_len = 6;
270 break; 263 break;
271 case ALG_CCMP: 264 case WLAN_CIPHER_SUITE_CCMP:
272 params.cipher = WLAN_CIPHER_SUITE_CCMP;
273 seq[0] = key->u.ccmp.tx_pn[5]; 265 seq[0] = key->u.ccmp.tx_pn[5];
274 seq[1] = key->u.ccmp.tx_pn[4]; 266 seq[1] = key->u.ccmp.tx_pn[4];
275 seq[2] = key->u.ccmp.tx_pn[3]; 267 seq[2] = key->u.ccmp.tx_pn[3];
@@ -279,14 +271,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
279 params.seq = seq; 271 params.seq = seq;
280 params.seq_len = 6; 272 params.seq_len = 6;
281 break; 273 break;
282 case ALG_WEP: 274 case WLAN_CIPHER_SUITE_AES_CMAC:
283 if (key->conf.keylen == 5)
284 params.cipher = WLAN_CIPHER_SUITE_WEP40;
285 else
286 params.cipher = WLAN_CIPHER_SUITE_WEP104;
287 break;
288 case ALG_AES_CMAC:
289 params.cipher = WLAN_CIPHER_SUITE_AES_CMAC;
290 seq[0] = key->u.aes_cmac.tx_pn[5]; 275 seq[0] = key->u.aes_cmac.tx_pn[5];
291 seq[1] = key->u.aes_cmac.tx_pn[4]; 276 seq[1] = key->u.aes_cmac.tx_pn[4];
292 seq[2] = key->u.aes_cmac.tx_pn[3]; 277 seq[2] = key->u.aes_cmac.tx_pn[3];
@@ -342,13 +327,19 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
342 STATION_INFO_TX_BYTES | 327 STATION_INFO_TX_BYTES |
343 STATION_INFO_RX_PACKETS | 328 STATION_INFO_RX_PACKETS |
344 STATION_INFO_TX_PACKETS | 329 STATION_INFO_TX_PACKETS |
345 STATION_INFO_TX_BITRATE; 330 STATION_INFO_TX_RETRIES |
331 STATION_INFO_TX_FAILED |
332 STATION_INFO_TX_BITRATE |
333 STATION_INFO_RX_DROP_MISC;
346 334
347 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); 335 sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
348 sinfo->rx_bytes = sta->rx_bytes; 336 sinfo->rx_bytes = sta->rx_bytes;
349 sinfo->tx_bytes = sta->tx_bytes; 337 sinfo->tx_bytes = sta->tx_bytes;
350 sinfo->rx_packets = sta->rx_packets; 338 sinfo->rx_packets = sta->rx_packets;
351 sinfo->tx_packets = sta->tx_packets; 339 sinfo->tx_packets = sta->tx_packets;
340 sinfo->tx_retries = sta->tx_retry_count;
341 sinfo->tx_failed = sta->tx_retry_failed;
342 sinfo->rx_dropped_misc = sta->rx_dropped;
352 343
353 if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || 344 if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
354 (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { 345 (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
@@ -634,6 +625,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
634 struct sta_info *sta, 625 struct sta_info *sta,
635 struct station_parameters *params) 626 struct station_parameters *params)
636{ 627{
628 unsigned long flags;
637 u32 rates; 629 u32 rates;
638 int i, j; 630 int i, j;
639 struct ieee80211_supported_band *sband; 631 struct ieee80211_supported_band *sband;
@@ -642,7 +634,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
642 634
643 sband = local->hw.wiphy->bands[local->oper_channel->band]; 635 sband = local->hw.wiphy->bands[local->oper_channel->band];
644 636
645 spin_lock_bh(&sta->lock); 637 spin_lock_irqsave(&sta->flaglock, flags);
646 mask = params->sta_flags_mask; 638 mask = params->sta_flags_mask;
647 set = params->sta_flags_set; 639 set = params->sta_flags_set;
648 640
@@ -669,7 +661,7 @@ static void sta_apply_parameters(struct ieee80211_local *local,
669 if (set & BIT(NL80211_STA_FLAG_MFP)) 661 if (set & BIT(NL80211_STA_FLAG_MFP))
670 sta->flags |= WLAN_STA_MFP; 662 sta->flags |= WLAN_STA_MFP;
671 } 663 }
672 spin_unlock_bh(&sta->lock); 664 spin_unlock_irqrestore(&sta->flaglock, flags);
673 665
674 /* 666 /*
675 * cfg80211 validates this (1-2007) and allows setting the AID 667 * cfg80211 validates this (1-2007) and allows setting the AID
@@ -1143,9 +1135,9 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
1143 p.uapsd = false; 1135 p.uapsd = false;
1144 1136
1145 if (drv_conf_tx(local, params->queue, &p)) { 1137 if (drv_conf_tx(local, params->queue, &p)) {
1146 printk(KERN_DEBUG "%s: failed to set TX queue " 1138 wiphy_debug(local->hw.wiphy,
1147 "parameters for queue %d\n", 1139 "failed to set TX queue parameters for queue %d\n",
1148 wiphy_name(local->hw.wiphy), params->queue); 1140 params->queue);
1149 return -EINVAL; 1141 return -EINVAL;
1150 } 1142 }
1151 1143
@@ -1207,15 +1199,26 @@ static int ieee80211_scan(struct wiphy *wiphy,
1207 struct net_device *dev, 1199 struct net_device *dev,
1208 struct cfg80211_scan_request *req) 1200 struct cfg80211_scan_request *req)
1209{ 1201{
1210 struct ieee80211_sub_if_data *sdata; 1202 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1211
1212 sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1213 1203
1214 if (sdata->vif.type != NL80211_IFTYPE_STATION && 1204 switch (ieee80211_vif_type_p2p(&sdata->vif)) {
1215 sdata->vif.type != NL80211_IFTYPE_ADHOC && 1205 case NL80211_IFTYPE_STATION:
1216 sdata->vif.type != NL80211_IFTYPE_MESH_POINT && 1206 case NL80211_IFTYPE_ADHOC:
1217 (sdata->vif.type != NL80211_IFTYPE_AP || sdata->u.ap.beacon)) 1207 case NL80211_IFTYPE_MESH_POINT:
1208 case NL80211_IFTYPE_P2P_CLIENT:
1209 break;
1210 case NL80211_IFTYPE_P2P_GO:
1211 if (sdata->local->ops->hw_scan)
1212 break;
1213 /* FIXME: implement NoA while scanning in software */
1214 return -EOPNOTSUPP;
1215 case NL80211_IFTYPE_AP:
1216 if (sdata->u.ap.beacon)
1217 return -EOPNOTSUPP;
1218 break;
1219 default:
1218 return -EOPNOTSUPP; 1220 return -EOPNOTSUPP;
1221 }
1219 1222
1220 return ieee80211_request_scan(sdata, req); 1223 return ieee80211_request_scan(sdata, req);
1221} 1224}
@@ -1362,7 +1365,7 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, int *dbm)
1362} 1365}
1363 1366
1364static int ieee80211_set_wds_peer(struct wiphy *wiphy, struct net_device *dev, 1367static int ieee80211_set_wds_peer(struct wiphy *wiphy, struct net_device *dev,
1365 u8 *addr) 1368 const u8 *addr)
1366{ 1369{
1367 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1370 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1368 1371
@@ -1411,7 +1414,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
1411 if (!sdata->u.mgd.associated || 1414 if (!sdata->u.mgd.associated ||
1412 sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) { 1415 sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) {
1413 mutex_lock(&sdata->local->iflist_mtx); 1416 mutex_lock(&sdata->local->iflist_mtx);
1414 ieee80211_recalc_smps(sdata->local, sdata); 1417 ieee80211_recalc_smps(sdata->local);
1415 mutex_unlock(&sdata->local->iflist_mtx); 1418 mutex_unlock(&sdata->local->iflist_mtx);
1416 return 0; 1419 return 0;
1417 } 1420 }
@@ -1541,11 +1544,11 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy,
1541 return ieee80211_wk_cancel_remain_on_channel(sdata, cookie); 1544 return ieee80211_wk_cancel_remain_on_channel(sdata, cookie);
1542} 1545}
1543 1546
1544static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev, 1547static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
1545 struct ieee80211_channel *chan, 1548 struct ieee80211_channel *chan,
1546 enum nl80211_channel_type channel_type, 1549 enum nl80211_channel_type channel_type,
1547 bool channel_type_valid, 1550 bool channel_type_valid,
1548 const u8 *buf, size_t len, u64 *cookie) 1551 const u8 *buf, size_t len, u64 *cookie)
1549{ 1552{
1550 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 1553 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
1551 struct ieee80211_local *local = sdata->local; 1554 struct ieee80211_local *local = sdata->local;
@@ -1566,7 +1569,11 @@ static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
1566 1569
1567 switch (sdata->vif.type) { 1570 switch (sdata->vif.type) {
1568 case NL80211_IFTYPE_ADHOC: 1571 case NL80211_IFTYPE_ADHOC:
1569 if (mgmt->u.action.category == WLAN_CATEGORY_PUBLIC) 1572 case NL80211_IFTYPE_AP:
1573 case NL80211_IFTYPE_AP_VLAN:
1574 case NL80211_IFTYPE_P2P_GO:
1575 if (!ieee80211_is_action(mgmt->frame_control) ||
1576 mgmt->u.action.category == WLAN_CATEGORY_PUBLIC)
1570 break; 1577 break;
1571 rcu_read_lock(); 1578 rcu_read_lock();
1572 sta = sta_info_get(sdata, mgmt->da); 1579 sta = sta_info_get(sdata, mgmt->da);
@@ -1575,8 +1582,7 @@ static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
1575 return -ENOLINK; 1582 return -ENOLINK;
1576 break; 1583 break;
1577 case NL80211_IFTYPE_STATION: 1584 case NL80211_IFTYPE_STATION:
1578 if (!(sdata->u.mgd.flags & IEEE80211_STA_MFP_ENABLED)) 1585 case NL80211_IFTYPE_P2P_CLIENT:
1579 flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
1580 break; 1586 break;
1581 default: 1587 default:
1582 return -EOPNOTSUPP; 1588 return -EOPNOTSUPP;
@@ -1598,6 +1604,23 @@ static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev,
1598 return 0; 1604 return 0;
1599} 1605}
1600 1606
1607static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
1608 struct net_device *dev,
1609 u16 frame_type, bool reg)
1610{
1611 struct ieee80211_local *local = wiphy_priv(wiphy);
1612
1613 if (frame_type != (IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ))
1614 return;
1615
1616 if (reg)
1617 local->probe_req_reg++;
1618 else
1619 local->probe_req_reg--;
1620
1621 ieee80211_queue_work(&local->hw, &local->reconfig_filter);
1622}
1623
1601struct cfg80211_ops mac80211_config_ops = { 1624struct cfg80211_ops mac80211_config_ops = {
1602 .add_virtual_intf = ieee80211_add_iface, 1625 .add_virtual_intf = ieee80211_add_iface,
1603 .del_virtual_intf = ieee80211_del_iface, 1626 .del_virtual_intf = ieee80211_del_iface,
@@ -1647,6 +1670,7 @@ struct cfg80211_ops mac80211_config_ops = {
1647 .set_bitrate_mask = ieee80211_set_bitrate_mask, 1670 .set_bitrate_mask = ieee80211_set_bitrate_mask,
1648 .remain_on_channel = ieee80211_remain_on_channel, 1671 .remain_on_channel = ieee80211_remain_on_channel,
1649 .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, 1672 .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
1650 .action = ieee80211_action, 1673 .mgmt_tx = ieee80211_mgmt_tx,
1651 .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, 1674 .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
1675 .mgmt_frame_register = ieee80211_mgmt_frame_register,
1652}; 1676};
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 32be11e4c4d9..5b24740fc0b0 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -11,7 +11,7 @@ __ieee80211_get_channel_mode(struct ieee80211_local *local,
11{ 11{
12 struct ieee80211_sub_if_data *sdata; 12 struct ieee80211_sub_if_data *sdata;
13 13
14 WARN_ON(!mutex_is_locked(&local->iflist_mtx)); 14 lockdep_assert_held(&local->iflist_mtx);
15 15
16 list_for_each_entry(sdata, &local->interfaces, list) { 16 list_for_each_entry(sdata, &local->interfaces, list) {
17 if (sdata == ignore) 17 if (sdata == ignore)
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index b8b0ae79a743..18260aa99c56 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -86,13 +86,15 @@ static ssize_t tsf_write(struct file *file,
86 if (strncmp(buf, "reset", 5) == 0) { 86 if (strncmp(buf, "reset", 5) == 0) {
87 if (local->ops->reset_tsf) { 87 if (local->ops->reset_tsf) {
88 drv_reset_tsf(local); 88 drv_reset_tsf(local);
89 printk(KERN_INFO "%s: debugfs reset TSF\n", wiphy_name(local->hw.wiphy)); 89 wiphy_info(local->hw.wiphy, "debugfs reset TSF\n");
90 } 90 }
91 } else { 91 } else {
92 tsf = simple_strtoul(buf, NULL, 0); 92 tsf = simple_strtoul(buf, NULL, 0);
93 if (local->ops->set_tsf) { 93 if (local->ops->set_tsf) {
94 drv_set_tsf(local, tsf); 94 drv_set_tsf(local, tsf);
95 printk(KERN_INFO "%s: debugfs set TSF to %#018llx\n", wiphy_name(local->hw.wiphy), tsf); 95 wiphy_info(local->hw.wiphy,
96 "debugfs set TSF to %#018llx\n", tsf);
97
96 } 98 }
97 } 99 }
98 100
@@ -375,7 +377,6 @@ void debugfs_hw_add(struct ieee80211_local *local)
375 if (!phyd) 377 if (!phyd)
376 return; 378 return;
377 379
378 local->debugfs.stations = debugfs_create_dir("stations", phyd);
379 local->debugfs.keys = debugfs_create_dir("keys", phyd); 380 local->debugfs.keys = debugfs_create_dir("keys", phyd);
380 381
381 DEBUGFS_ADD(frequency); 382 DEBUGFS_ADD(frequency);
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 7cd8dd9fc240..4aa47d074a79 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -66,26 +66,13 @@ static ssize_t key_algorithm_read(struct file *file,
66 char __user *userbuf, 66 char __user *userbuf,
67 size_t count, loff_t *ppos) 67 size_t count, loff_t *ppos)
68{ 68{
69 char *alg; 69 char buf[15];
70 struct ieee80211_key *key = file->private_data; 70 struct ieee80211_key *key = file->private_data;
71 u32 c = key->conf.cipher;
71 72
72 switch (key->conf.alg) { 73 sprintf(buf, "%.2x-%.2x-%.2x:%d\n",
73 case ALG_WEP: 74 c >> 24, (c >> 16) & 0xff, (c >> 8) & 0xff, c & 0xff);
74 alg = "WEP\n"; 75 return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf));
75 break;
76 case ALG_TKIP:
77 alg = "TKIP\n";
78 break;
79 case ALG_CCMP:
80 alg = "CCMP\n";
81 break;
82 case ALG_AES_CMAC:
83 alg = "AES-128-CMAC\n";
84 break;
85 default:
86 return 0;
87 }
88 return simple_read_from_buffer(userbuf, count, ppos, alg, strlen(alg));
89} 76}
90KEY_OPS(algorithm); 77KEY_OPS(algorithm);
91 78
@@ -97,21 +84,22 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
97 int len; 84 int len;
98 struct ieee80211_key *key = file->private_data; 85 struct ieee80211_key *key = file->private_data;
99 86
100 switch (key->conf.alg) { 87 switch (key->conf.cipher) {
101 case ALG_WEP: 88 case WLAN_CIPHER_SUITE_WEP40:
89 case WLAN_CIPHER_SUITE_WEP104:
102 len = scnprintf(buf, sizeof(buf), "\n"); 90 len = scnprintf(buf, sizeof(buf), "\n");
103 break; 91 break;
104 case ALG_TKIP: 92 case WLAN_CIPHER_SUITE_TKIP:
105 len = scnprintf(buf, sizeof(buf), "%08x %04x\n", 93 len = scnprintf(buf, sizeof(buf), "%08x %04x\n",
106 key->u.tkip.tx.iv32, 94 key->u.tkip.tx.iv32,
107 key->u.tkip.tx.iv16); 95 key->u.tkip.tx.iv16);
108 break; 96 break;
109 case ALG_CCMP: 97 case WLAN_CIPHER_SUITE_CCMP:
110 tpn = key->u.ccmp.tx_pn; 98 tpn = key->u.ccmp.tx_pn;
111 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", 99 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
112 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); 100 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]);
113 break; 101 break;
114 case ALG_AES_CMAC: 102 case WLAN_CIPHER_SUITE_AES_CMAC:
115 tpn = key->u.aes_cmac.tx_pn; 103 tpn = key->u.aes_cmac.tx_pn;
116 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", 104 len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
117 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], 105 tpn[0], tpn[1], tpn[2], tpn[3], tpn[4],
@@ -132,11 +120,12 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
132 int i, len; 120 int i, len;
133 const u8 *rpn; 121 const u8 *rpn;
134 122
135 switch (key->conf.alg) { 123 switch (key->conf.cipher) {
136 case ALG_WEP: 124 case WLAN_CIPHER_SUITE_WEP40:
125 case WLAN_CIPHER_SUITE_WEP104:
137 len = scnprintf(buf, sizeof(buf), "\n"); 126 len = scnprintf(buf, sizeof(buf), "\n");
138 break; 127 break;
139 case ALG_TKIP: 128 case WLAN_CIPHER_SUITE_TKIP:
140 for (i = 0; i < NUM_RX_DATA_QUEUES; i++) 129 for (i = 0; i < NUM_RX_DATA_QUEUES; i++)
141 p += scnprintf(p, sizeof(buf)+buf-p, 130 p += scnprintf(p, sizeof(buf)+buf-p,
142 "%08x %04x\n", 131 "%08x %04x\n",
@@ -144,7 +133,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
144 key->u.tkip.rx[i].iv16); 133 key->u.tkip.rx[i].iv16);
145 len = p - buf; 134 len = p - buf;
146 break; 135 break;
147 case ALG_CCMP: 136 case WLAN_CIPHER_SUITE_CCMP:
148 for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) { 137 for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) {
149 rpn = key->u.ccmp.rx_pn[i]; 138 rpn = key->u.ccmp.rx_pn[i];
150 p += scnprintf(p, sizeof(buf)+buf-p, 139 p += scnprintf(p, sizeof(buf)+buf-p,
@@ -154,7 +143,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
154 } 143 }
155 len = p - buf; 144 len = p - buf;
156 break; 145 break;
157 case ALG_AES_CMAC: 146 case WLAN_CIPHER_SUITE_AES_CMAC:
158 rpn = key->u.aes_cmac.rx_pn; 147 rpn = key->u.aes_cmac.rx_pn;
159 p += scnprintf(p, sizeof(buf)+buf-p, 148 p += scnprintf(p, sizeof(buf)+buf-p,
160 "%02x%02x%02x%02x%02x%02x\n", 149 "%02x%02x%02x%02x%02x%02x\n",
@@ -176,11 +165,11 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
176 char buf[20]; 165 char buf[20];
177 int len; 166 int len;
178 167
179 switch (key->conf.alg) { 168 switch (key->conf.cipher) {
180 case ALG_CCMP: 169 case WLAN_CIPHER_SUITE_CCMP:
181 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); 170 len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
182 break; 171 break;
183 case ALG_AES_CMAC: 172 case WLAN_CIPHER_SUITE_AES_CMAC:
184 len = scnprintf(buf, sizeof(buf), "%u\n", 173 len = scnprintf(buf, sizeof(buf), "%u\n",
185 key->u.aes_cmac.replays); 174 key->u.aes_cmac.replays);
186 break; 175 break;
@@ -198,8 +187,8 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf,
198 char buf[20]; 187 char buf[20];
199 int len; 188 int len;
200 189
201 switch (key->conf.alg) { 190 switch (key->conf.cipher) {
202 case ALG_AES_CMAC: 191 case WLAN_CIPHER_SUITE_AES_CMAC:
203 len = scnprintf(buf, sizeof(buf), "%u\n", 192 len = scnprintf(buf, sizeof(buf), "%u\n",
204 key->u.aes_cmac.icverrors); 193 key->u.aes_cmac.icverrors);
205 break; 194 break;
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 8ad33eef7dda..cbdf36d7841c 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -410,6 +410,9 @@ void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata)
410 sprintf(buf, "netdev:%s", sdata->name); 410 sprintf(buf, "netdev:%s", sdata->name);
411 sdata->debugfs.dir = debugfs_create_dir(buf, 411 sdata->debugfs.dir = debugfs_create_dir(buf,
412 sdata->local->hw.wiphy->debugfsdir); 412 sdata->local->hw.wiphy->debugfsdir);
413 if (sdata->debugfs.dir)
414 sdata->debugfs.subdir_stations = debugfs_create_dir("stations",
415 sdata->debugfs.dir);
413 add_files(sdata); 416 add_files(sdata);
414} 417}
415 418
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 6a8fdc372c43..4601fea1784d 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -198,7 +198,8 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *userbu
198 else 198 else
199 ret = ieee80211_stop_tx_ba_session(&sta->sta, tid); 199 ret = ieee80211_stop_tx_ba_session(&sta->sta, tid);
200 } else { 200 } else {
201 __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, 3); 201 __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT,
202 3, true);
202 ret = 0; 203 ret = 0;
203 } 204 }
204 205
@@ -302,7 +303,7 @@ STA_OPS(ht_capa);
302 303
303void ieee80211_sta_debugfs_add(struct sta_info *sta) 304void ieee80211_sta_debugfs_add(struct sta_info *sta)
304{ 305{
305 struct dentry *stations_dir = sta->local->debugfs.stations; 306 struct dentry *stations_dir = sta->sdata->debugfs.subdir_stations;
306 u8 mac[3*ETH_ALEN]; 307 u8 mac[3*ETH_ALEN];
307 308
308 sta->debugfs.add_has_run = true; 309 sta->debugfs.add_has_run = true;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 14123dce544b..16983825f8e8 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -54,6 +54,20 @@ static inline int drv_add_interface(struct ieee80211_local *local,
54 return ret; 54 return ret;
55} 55}
56 56
57static inline int drv_change_interface(struct ieee80211_local *local,
58 struct ieee80211_sub_if_data *sdata,
59 enum nl80211_iftype type, bool p2p)
60{
61 int ret;
62
63 might_sleep();
64
65 trace_drv_change_interface(local, sdata, type, p2p);
66 ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
67 trace_drv_return_int(local, ret);
68 return ret;
69}
70
57static inline void drv_remove_interface(struct ieee80211_local *local, 71static inline void drv_remove_interface(struct ieee80211_local *local,
58 struct ieee80211_vif *vif) 72 struct ieee80211_vif *vif)
59{ 73{
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 5d5d2a974668..6831fb1641c8 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -25,12 +25,14 @@ static inline void trace_ ## name(proto) {}
25#define STA_PR_FMT " sta:%pM" 25#define STA_PR_FMT " sta:%pM"
26#define STA_PR_ARG __entry->sta_addr 26#define STA_PR_ARG __entry->sta_addr
27 27
28#define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \ 28#define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \
29 __field(bool, p2p) \
29 __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>") 30 __string(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
30#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ 31#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \
32 __entry->p2p = sdata->vif.p2p; \
31 __assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>") 33 __assign_str(vif_name, sdata->dev ? sdata->dev->name : "<nodev>")
32#define VIF_PR_FMT " vif:%s(%d)" 34#define VIF_PR_FMT " vif:%s(%d%s)"
33#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type 35#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
34 36
35/* 37/*
36 * Tracing for driver callbacks. 38 * Tracing for driver callbacks.
@@ -136,6 +138,34 @@ TRACE_EVENT(drv_add_interface,
136 ) 138 )
137); 139);
138 140
141TRACE_EVENT(drv_change_interface,
142 TP_PROTO(struct ieee80211_local *local,
143 struct ieee80211_sub_if_data *sdata,
144 enum nl80211_iftype type, bool p2p),
145
146 TP_ARGS(local, sdata, type, p2p),
147
148 TP_STRUCT__entry(
149 LOCAL_ENTRY
150 VIF_ENTRY
151 __field(u32, new_type)
152 __field(bool, new_p2p)
153 ),
154
155 TP_fast_assign(
156 LOCAL_ASSIGN;
157 VIF_ASSIGN;
158 __entry->new_type = type;
159 __entry->new_p2p = p2p;
160 ),
161
162 TP_printk(
163 LOCAL_PR_FMT VIF_PR_FMT " new type:%d%s",
164 LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type,
165 __entry->new_p2p ? "/p2p" : ""
166 )
167);
168
139TRACE_EVENT(drv_remove_interface, 169TRACE_EVENT(drv_remove_interface,
140 TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), 170 TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata),
141 171
@@ -336,7 +366,7 @@ TRACE_EVENT(drv_set_key,
336 LOCAL_ENTRY 366 LOCAL_ENTRY
337 VIF_ENTRY 367 VIF_ENTRY
338 STA_ENTRY 368 STA_ENTRY
339 __field(enum ieee80211_key_alg, alg) 369 __field(u32, cipher)
340 __field(u8, hw_key_idx) 370 __field(u8, hw_key_idx)
341 __field(u8, flags) 371 __field(u8, flags)
342 __field(s8, keyidx) 372 __field(s8, keyidx)
@@ -346,7 +376,7 @@ TRACE_EVENT(drv_set_key,
346 LOCAL_ASSIGN; 376 LOCAL_ASSIGN;
347 VIF_ASSIGN; 377 VIF_ASSIGN;
348 STA_ASSIGN; 378 STA_ASSIGN;
349 __entry->alg = key->alg; 379 __entry->cipher = key->cipher;
350 __entry->flags = key->flags; 380 __entry->flags = key->flags;
351 __entry->keyidx = key->keyidx; 381 __entry->keyidx = key->keyidx;
352 __entry->hw_key_idx = key->hw_key_idx; 382 __entry->hw_key_idx = key->hw_key_idx;
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 9d101fb33861..75d679d75e63 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -101,16 +101,16 @@ void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband,
101 ht_cap->mcs.rx_mask[32/8] |= 1; 101 ht_cap->mcs.rx_mask[32/8] |= 1;
102} 102}
103 103
104void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta) 104void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx)
105{ 105{
106 int i; 106 int i;
107 107
108 cancel_work_sync(&sta->ampdu_mlme.work); 108 cancel_work_sync(&sta->ampdu_mlme.work);
109 109
110 for (i = 0; i < STA_TID_NUM; i++) { 110 for (i = 0; i < STA_TID_NUM; i++) {
111 __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR); 111 __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR, tx);
112 __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, 112 __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT,
113 WLAN_REASON_QSTA_LEAVE_QBSS); 113 WLAN_REASON_QSTA_LEAVE_QBSS, tx);
114 } 114 }
115} 115}
116 116
@@ -135,7 +135,7 @@ void ieee80211_ba_session_work(struct work_struct *work)
135 if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired)) 135 if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired))
136 ___ieee80211_stop_rx_ba_session( 136 ___ieee80211_stop_rx_ba_session(
137 sta, tid, WLAN_BACK_RECIPIENT, 137 sta, tid, WLAN_BACK_RECIPIENT,
138 WLAN_REASON_QSTA_TIMEOUT); 138 WLAN_REASON_QSTA_TIMEOUT, true);
139 139
140 tid_tx = sta->ampdu_mlme.tid_tx[tid]; 140 tid_tx = sta->ampdu_mlme.tid_tx[tid];
141 if (!tid_tx) 141 if (!tid_tx)
@@ -146,7 +146,8 @@ void ieee80211_ba_session_work(struct work_struct *work)
146 else if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP, 146 else if (test_and_clear_bit(HT_AGG_STATE_WANT_STOP,
147 &tid_tx->state)) 147 &tid_tx->state))
148 ___ieee80211_stop_tx_ba_session(sta, tid, 148 ___ieee80211_stop_tx_ba_session(sta, tid,
149 WLAN_BACK_INITIATOR); 149 WLAN_BACK_INITIATOR,
150 true);
150 } 151 }
151 mutex_unlock(&sta->ampdu_mlme.mtx); 152 mutex_unlock(&sta->ampdu_mlme.mtx);
152} 153}
@@ -214,9 +215,11 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
214#endif /* CONFIG_MAC80211_HT_DEBUG */ 215#endif /* CONFIG_MAC80211_HT_DEBUG */
215 216
216 if (initiator == WLAN_BACK_INITIATOR) 217 if (initiator == WLAN_BACK_INITIATOR)
217 __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0); 218 __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0,
219 true);
218 else 220 else
219 __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_RECIPIENT); 221 __ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_RECIPIENT,
222 true);
220} 223}
221 224
222int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, 225int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
@@ -265,3 +268,33 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
265 268
266 return 0; 269 return 0;
267} 270}
271
272void ieee80211_request_smps_work(struct work_struct *work)
273{
274 struct ieee80211_sub_if_data *sdata =
275 container_of(work, struct ieee80211_sub_if_data,
276 u.mgd.request_smps_work);
277
278 mutex_lock(&sdata->u.mgd.mtx);
279 __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode);
280 mutex_unlock(&sdata->u.mgd.mtx);
281}
282
283void ieee80211_request_smps(struct ieee80211_vif *vif,
284 enum ieee80211_smps_mode smps_mode)
285{
286 struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
287
288 if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
289 return;
290
291 if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF))
292 smps_mode = IEEE80211_SMPS_AUTOMATIC;
293
294 sdata->u.mgd.driver_smps_mode = smps_mode;
295
296 ieee80211_queue_work(&sdata->local->hw,
297 &sdata->u.mgd.request_smps_work);
298}
299/* this might change ... don't want non-open drivers using it */
300EXPORT_SYMBOL_GPL(ieee80211_request_smps);
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index c691780725a7..ff60c022f51d 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -173,6 +173,19 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
173 memcpy(skb_put(skb, ifibss->ie_len), 173 memcpy(skb_put(skb, ifibss->ie_len),
174 ifibss->ie, ifibss->ie_len); 174 ifibss->ie, ifibss->ie_len);
175 175
176 if (local->hw.queues >= 4) {
177 pos = skb_put(skb, 9);
178 *pos++ = WLAN_EID_VENDOR_SPECIFIC;
179 *pos++ = 7; /* len */
180 *pos++ = 0x00; /* Microsoft OUI 00:50:F2 */
181 *pos++ = 0x50;
182 *pos++ = 0xf2;
183 *pos++ = 2; /* WME */
184 *pos++ = 0; /* WME info */
185 *pos++ = 1; /* WME ver */
186 *pos++ = 0; /* U-APSD no in use */
187 }
188
176 rcu_assign_pointer(ifibss->presp, skb); 189 rcu_assign_pointer(ifibss->presp, skb);
177 190
178 sdata->vif.bss_conf.beacon_int = beacon_int; 191 sdata->vif.bss_conf.beacon_int = beacon_int;
@@ -266,37 +279,45 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
266 if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) 279 if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
267 return; 280 return;
268 281
269 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates && 282 if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
270 memcmp(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) { 283 memcmp(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) {
271 supp_rates = ieee80211_sta_get_rates(local, elems, band);
272 284
273 rcu_read_lock(); 285 rcu_read_lock();
274
275 sta = sta_info_get(sdata, mgmt->sa); 286 sta = sta_info_get(sdata, mgmt->sa);
276 if (sta) {
277 u32 prev_rates;
278 287
279 prev_rates = sta->sta.supp_rates[band]; 288 if (elems->supp_rates) {
280 /* make sure mandatory rates are always added */ 289 supp_rates = ieee80211_sta_get_rates(local, elems,
281 sta->sta.supp_rates[band] = supp_rates | 290 band);
282 ieee80211_mandatory_rates(local, band); 291 if (sta) {
292 u32 prev_rates;
293
294 prev_rates = sta->sta.supp_rates[band];
295 /* make sure mandatory rates are always added */
296 sta->sta.supp_rates[band] = supp_rates |
297 ieee80211_mandatory_rates(local, band);
283 298
284 if (sta->sta.supp_rates[band] != prev_rates) { 299 if (sta->sta.supp_rates[band] != prev_rates) {
285#ifdef CONFIG_MAC80211_IBSS_DEBUG 300#ifdef CONFIG_MAC80211_IBSS_DEBUG
286 printk(KERN_DEBUG "%s: updated supp_rates set " 301 printk(KERN_DEBUG
287 "for %pM based on beacon/probe_response " 302 "%s: updated supp_rates set "
288 "(0x%x -> 0x%x)\n", 303 "for %pM based on beacon"
289 sdata->name, sta->sta.addr, 304 "/probe_resp (0x%x -> 0x%x)\n",
290 prev_rates, sta->sta.supp_rates[band]); 305 sdata->name, sta->sta.addr,
306 prev_rates,
307 sta->sta.supp_rates[band]);
291#endif 308#endif
292 rate_control_rate_init(sta); 309 rate_control_rate_init(sta);
293 } 310 }
294 rcu_read_unlock(); 311 } else
295 } else { 312 sta = ieee80211_ibss_add_sta(sdata, mgmt->bssid,
296 rcu_read_unlock(); 313 mgmt->sa, supp_rates,
297 ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, 314 GFP_ATOMIC);
298 supp_rates, GFP_KERNEL);
299 } 315 }
316
317 if (sta && elems->wmm_info)
318 set_sta_flags(sta, WLAN_STA_WME);
319
320 rcu_read_unlock();
300 } 321 }
301 322
302 bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, 323 bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems,
@@ -427,8 +448,8 @@ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata,
427 return NULL; 448 return NULL;
428 449
429#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 450#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
430 printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n", 451 wiphy_debug(local->hw.wiphy, "Adding new IBSS station %pM (dev=%s)\n",
431 wiphy_name(local->hw.wiphy), addr, sdata->name); 452 addr, sdata->name);
432#endif 453#endif
433 454
434 sta = sta_info_alloc(sdata, addr, gfp); 455 sta = sta_info_alloc(sdata, addr, gfp);
@@ -920,12 +941,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
920 memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN); 941 memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN);
921 sdata->u.ibss.ssid_len = params->ssid_len; 942 sdata->u.ibss.ssid_len = params->ssid_len;
922 943
944 mutex_unlock(&sdata->u.ibss.mtx);
945
946 mutex_lock(&sdata->local->mtx);
923 ieee80211_recalc_idle(sdata->local); 947 ieee80211_recalc_idle(sdata->local);
948 mutex_unlock(&sdata->local->mtx);
924 949
925 ieee80211_queue_work(&sdata->local->hw, &sdata->work); 950 ieee80211_queue_work(&sdata->local->hw, &sdata->work);
926 951
927 mutex_unlock(&sdata->u.ibss.mtx);
928
929 return 0; 952 return 0;
930} 953}
931 954
@@ -980,7 +1003,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
980 1003
981 mutex_unlock(&sdata->u.ibss.mtx); 1004 mutex_unlock(&sdata->u.ibss.mtx);
982 1005
1006 mutex_lock(&local->mtx);
983 ieee80211_recalc_idle(sdata->local); 1007 ieee80211_recalc_idle(sdata->local);
1008 mutex_unlock(&local->mtx);
984 1009
985 return 0; 1010 return 0;
986} 1011}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 65e0ed6c2975..b80c38689927 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -50,12 +50,6 @@ struct ieee80211_local;
50 * increased memory use (about 2 kB of RAM per entry). */ 50 * increased memory use (about 2 kB of RAM per entry). */
51#define IEEE80211_FRAGMENT_MAX 4 51#define IEEE80211_FRAGMENT_MAX 4
52 52
53/*
54 * Time after which we ignore scan results and no longer report/use
55 * them in any way.
56 */
57#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ)
58
59#define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024)) 53#define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024))
60 54
61#define IEEE80211_DEFAULT_UAPSD_QUEUES \ 55#define IEEE80211_DEFAULT_UAPSD_QUEUES \
@@ -165,12 +159,37 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
165#define RX_DROP_MONITOR ((__force ieee80211_rx_result) 2u) 159#define RX_DROP_MONITOR ((__force ieee80211_rx_result) 2u)
166#define RX_QUEUED ((__force ieee80211_rx_result) 3u) 160#define RX_QUEUED ((__force ieee80211_rx_result) 3u)
167 161
168#define IEEE80211_RX_IN_SCAN BIT(0) 162/**
169/* frame is destined to interface currently processed (incl. multicast frames) */ 163 * enum ieee80211_packet_rx_flags - packet RX flags
170#define IEEE80211_RX_RA_MATCH BIT(1) 164 * @IEEE80211_RX_RA_MATCH: frame is destined to interface currently processed
171#define IEEE80211_RX_AMSDU BIT(2) 165 * (incl. multicast frames)
172#define IEEE80211_RX_FRAGMENTED BIT(3) 166 * @IEEE80211_RX_IN_SCAN: received while scanning
173/* only add flags here that do not change with subframes of an aMPDU */ 167 * @IEEE80211_RX_FRAGMENTED: fragmented frame
168 * @IEEE80211_RX_AMSDU: a-MSDU packet
169 * @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed
170 *
171 * These are per-frame flags that are attached to a frame in the
172 * @rx_flags field of &struct ieee80211_rx_status.
173 */
174enum ieee80211_packet_rx_flags {
175 IEEE80211_RX_IN_SCAN = BIT(0),
176 IEEE80211_RX_RA_MATCH = BIT(1),
177 IEEE80211_RX_FRAGMENTED = BIT(2),
178 IEEE80211_RX_AMSDU = BIT(3),
179 IEEE80211_RX_MALFORMED_ACTION_FRM = BIT(4),
180};
181
182/**
183 * enum ieee80211_rx_flags - RX data flags
184 *
185 * @IEEE80211_RX_CMNTR: received on cooked monitor already
186 *
187 * These flags are used across handling multiple interfaces
188 * for a single frame.
189 */
190enum ieee80211_rx_flags {
191 IEEE80211_RX_CMNTR = BIT(0),
192};
174 193
175struct ieee80211_rx_data { 194struct ieee80211_rx_data {
176 struct sk_buff *skb; 195 struct sk_buff *skb;
@@ -343,10 +362,14 @@ struct ieee80211_if_managed {
343 unsigned long timers_running; /* used for quiesce/restart */ 362 unsigned long timers_running; /* used for quiesce/restart */
344 bool powersave; /* powersave requested for this iface */ 363 bool powersave; /* powersave requested for this iface */
345 enum ieee80211_smps_mode req_smps, /* requested smps mode */ 364 enum ieee80211_smps_mode req_smps, /* requested smps mode */
346 ap_smps; /* smps mode AP thinks we're in */ 365 ap_smps, /* smps mode AP thinks we're in */
366 driver_smps_mode; /* smps mode request */
367
368 struct work_struct request_smps_work;
347 369
348 unsigned int flags; 370 unsigned int flags;
349 371
372 bool beacon_crc_valid;
350 u32 beacon_crc; 373 u32 beacon_crc;
351 374
352 enum { 375 enum {
@@ -371,6 +394,13 @@ struct ieee80211_if_managed {
371 int ave_beacon_signal; 394 int ave_beacon_signal;
372 395
373 /* 396 /*
397 * Number of Beacon frames used in ave_beacon_signal. This can be used
398 * to avoid generating less reliable cqm events that would be based
399 * only on couple of received frames.
400 */
401 unsigned int count_beacon_signal;
402
403 /*
374 * Last Beacon frame signal strength average (ave_beacon_signal / 16) 404 * Last Beacon frame signal strength average (ave_beacon_signal / 16)
375 * that triggered a cqm event. 0 indicates that no event has been 405 * that triggered a cqm event. 0 indicates that no event has been
376 * generated for the current association. 406 * generated for the current association.
@@ -474,6 +504,19 @@ enum ieee80211_sub_if_data_flags {
474 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), 504 IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3),
475}; 505};
476 506
507/**
508 * enum ieee80211_sdata_state_bits - virtual interface state bits
509 * @SDATA_STATE_RUNNING: virtual interface is up & running; this
510 * mirrors netif_running() but is separate for interface type
511 * change handling while the interface is up
512 * @SDATA_STATE_OFFCHANNEL: This interface is currently in offchannel
513 * mode, so queues are stopped
514 */
515enum ieee80211_sdata_state_bits {
516 SDATA_STATE_RUNNING,
517 SDATA_STATE_OFFCHANNEL,
518};
519
477struct ieee80211_sub_if_data { 520struct ieee80211_sub_if_data {
478 struct list_head list; 521 struct list_head list;
479 522
@@ -487,6 +530,8 @@ struct ieee80211_sub_if_data {
487 530
488 unsigned int flags; 531 unsigned int flags;
489 532
533 unsigned long state;
534
490 int drop_unencrypted; 535 int drop_unencrypted;
491 536
492 char name[IFNAMSIZ]; 537 char name[IFNAMSIZ];
@@ -497,17 +542,20 @@ struct ieee80211_sub_if_data {
497 */ 542 */
498 bool ht_opmode_valid; 543 bool ht_opmode_valid;
499 544
545 /* to detect idle changes */
546 bool old_idle;
547
500 /* Fragment table for host-based reassembly */ 548 /* Fragment table for host-based reassembly */
501 struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; 549 struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX];
502 unsigned int fragment_next; 550 unsigned int fragment_next;
503 551
504#define NUM_DEFAULT_KEYS 4
505#define NUM_DEFAULT_MGMT_KEYS 2
506 struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; 552 struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
507 struct ieee80211_key *default_key; 553 struct ieee80211_key *default_key;
508 struct ieee80211_key *default_mgmt_key; 554 struct ieee80211_key *default_mgmt_key;
509 555
510 u16 sequence_number; 556 u16 sequence_number;
557 __be16 control_port_protocol;
558 bool control_port_no_encrypt;
511 559
512 struct work_struct work; 560 struct work_struct work;
513 struct sk_buff_head skb_queue; 561 struct sk_buff_head skb_queue;
@@ -539,6 +587,7 @@ struct ieee80211_sub_if_data {
539#ifdef CONFIG_MAC80211_DEBUGFS 587#ifdef CONFIG_MAC80211_DEBUGFS
540 struct { 588 struct {
541 struct dentry *dir; 589 struct dentry *dir;
590 struct dentry *subdir_stations;
542 struct dentry *default_key; 591 struct dentry *default_key;
543 struct dentry *default_mgmt_key; 592 struct dentry *default_mgmt_key;
544 } debugfs; 593 } debugfs;
@@ -595,11 +644,17 @@ enum queue_stop_reason {
595 * determine if we are on the operating channel or not 644 * determine if we are on the operating channel or not
596 * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning, 645 * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning,
597 * gets only set in conjunction with SCAN_SW_SCANNING 646 * gets only set in conjunction with SCAN_SW_SCANNING
647 * @SCAN_COMPLETED: Set for our scan work function when the driver reported
648 * that the scan completed.
649 * @SCAN_ABORTED: Set for our scan work function when the driver reported
650 * a scan complete for an aborted scan.
598 */ 651 */
599enum { 652enum {
600 SCAN_SW_SCANNING, 653 SCAN_SW_SCANNING,
601 SCAN_HW_SCANNING, 654 SCAN_HW_SCANNING,
602 SCAN_OFF_CHANNEL, 655 SCAN_OFF_CHANNEL,
656 SCAN_COMPLETED,
657 SCAN_ABORTED,
603}; 658};
604 659
605/** 660/**
@@ -634,7 +689,6 @@ struct ieee80211_local {
634 /* 689 /*
635 * work stuff, potentially off-channel (in the future) 690 * work stuff, potentially off-channel (in the future)
636 */ 691 */
637 struct mutex work_mtx;
638 struct list_head work_list; 692 struct list_head work_list;
639 struct timer_list work_timer; 693 struct timer_list work_timer;
640 struct work_struct work_work; 694 struct work_struct work_work;
@@ -653,9 +707,13 @@ struct ieee80211_local {
653 int open_count; 707 int open_count;
654 int monitors, cooked_mntrs; 708 int monitors, cooked_mntrs;
655 /* number of interfaces with corresponding FIF_ flags */ 709 /* number of interfaces with corresponding FIF_ flags */
656 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll; 710 int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll,
711 fif_probe_req;
712 int probe_req_reg;
657 unsigned int filter_flags; /* FIF_* */ 713 unsigned int filter_flags; /* FIF_* */
658 714
715 bool wiphy_ciphers_allocated;
716
659 /* protects the aggregated multicast list and filter calls */ 717 /* protects the aggregated multicast list and filter calls */
660 spinlock_t filter_lock; 718 spinlock_t filter_lock;
661 719
@@ -746,9 +804,10 @@ struct ieee80211_local {
746 */ 804 */
747 struct mutex key_mtx; 805 struct mutex key_mtx;
748 806
807 /* mutex for scan and work locking */
808 struct mutex mtx;
749 809
750 /* Scanning and BSS list */ 810 /* Scanning and BSS list */
751 struct mutex scan_mtx;
752 unsigned long scanning; 811 unsigned long scanning;
753 struct cfg80211_ssid scan_ssid; 812 struct cfg80211_ssid scan_ssid;
754 struct cfg80211_scan_request *int_scan_req; 813 struct cfg80211_scan_request *int_scan_req;
@@ -866,10 +925,14 @@ struct ieee80211_local {
866#ifdef CONFIG_MAC80211_DEBUGFS 925#ifdef CONFIG_MAC80211_DEBUGFS
867 struct local_debugfsdentries { 926 struct local_debugfsdentries {
868 struct dentry *rcdir; 927 struct dentry *rcdir;
869 struct dentry *stations;
870 struct dentry *keys; 928 struct dentry *keys;
871 } debugfs; 929 } debugfs;
872#endif 930#endif
931
932 /* dummy netdev for use w/ NAPI */
933 struct net_device napi_dev;
934
935 struct napi_struct napi;
873}; 936};
874 937
875static inline struct ieee80211_sub_if_data * 938static inline struct ieee80211_sub_if_data *
@@ -1003,6 +1066,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
1003void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata); 1066void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata);
1004void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, 1067void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1005 struct sk_buff *skb); 1068 struct sk_buff *skb);
1069void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata);
1070void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata);
1006 1071
1007/* IBSS code */ 1072/* IBSS code */
1008void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); 1073void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local);
@@ -1068,10 +1133,12 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata);
1068void ieee80211_remove_interfaces(struct ieee80211_local *local); 1133void ieee80211_remove_interfaces(struct ieee80211_local *local);
1069u32 __ieee80211_recalc_idle(struct ieee80211_local *local); 1134u32 __ieee80211_recalc_idle(struct ieee80211_local *local);
1070void ieee80211_recalc_idle(struct ieee80211_local *local); 1135void ieee80211_recalc_idle(struct ieee80211_local *local);
1136void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
1137 const int offset);
1071 1138
1072static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) 1139static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
1073{ 1140{
1074 return netif_running(sdata->dev); 1141 return test_bit(SDATA_STATE_RUNNING, &sdata->state);
1075} 1142}
1076 1143
1077/* tx handling */ 1144/* tx handling */
@@ -1105,12 +1172,13 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata,
1105int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, 1172int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
1106 enum ieee80211_smps_mode smps, const u8 *da, 1173 enum ieee80211_smps_mode smps, const u8 *da,
1107 const u8 *bssid); 1174 const u8 *bssid);
1175void ieee80211_request_smps_work(struct work_struct *work);
1108 1176
1109void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 1177void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
1110 u16 initiator, u16 reason); 1178 u16 initiator, u16 reason, bool stop);
1111void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, 1179void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
1112 u16 initiator, u16 reason); 1180 u16 initiator, u16 reason, bool stop);
1113void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta); 1181void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, bool tx);
1114void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, 1182void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
1115 struct sta_info *sta, 1183 struct sta_info *sta,
1116 struct ieee80211_mgmt *mgmt, size_t len); 1184 struct ieee80211_mgmt *mgmt, size_t len);
@@ -1124,13 +1192,16 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
1124 size_t len); 1192 size_t len);
1125 1193
1126int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, 1194int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
1127 enum ieee80211_back_parties initiator); 1195 enum ieee80211_back_parties initiator,
1196 bool tx);
1128int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, 1197int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
1129 enum ieee80211_back_parties initiator); 1198 enum ieee80211_back_parties initiator,
1199 bool tx);
1130void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid); 1200void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid);
1131void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid); 1201void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid);
1132void ieee80211_ba_session_work(struct work_struct *work); 1202void ieee80211_ba_session_work(struct work_struct *work);
1133void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid); 1203void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid);
1204void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid);
1134 1205
1135/* Spectrum management */ 1206/* Spectrum management */
1136void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, 1207void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -1146,6 +1217,12 @@ int __ieee80211_suspend(struct ieee80211_hw *hw);
1146 1217
1147static inline int __ieee80211_resume(struct ieee80211_hw *hw) 1218static inline int __ieee80211_resume(struct ieee80211_hw *hw)
1148{ 1219{
1220 struct ieee80211_local *local = hw_to_local(hw);
1221
1222 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
1223 "%s: resume with hardware scan still in progress\n",
1224 wiphy_name(hw->wiphy));
1225
1149 return ieee80211_reconfig(hw_to_local(hw)); 1226 return ieee80211_reconfig(hw_to_local(hw));
1150} 1227}
1151#else 1228#else
@@ -1208,7 +1285,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
1208 const u8 *key, u8 key_len, u8 key_idx); 1285 const u8 *key, u8 key_len, u8 key_idx);
1209int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, 1286int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
1210 const u8 *ie, size_t ie_len, 1287 const u8 *ie, size_t ie_len,
1211 enum ieee80211_band band); 1288 enum ieee80211_band band, u32 rate_mask,
1289 u8 channel);
1212void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, 1290void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1213 const u8 *ssid, size_t ssid_len, 1291 const u8 *ssid, size_t ssid_len,
1214 const u8 *ie, size_t ie_len); 1292 const u8 *ie, size_t ie_len);
@@ -1221,8 +1299,7 @@ u32 ieee80211_sta_get_rates(struct ieee80211_local *local,
1221 enum ieee80211_band band); 1299 enum ieee80211_band band);
1222int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata, 1300int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
1223 enum ieee80211_smps_mode smps_mode); 1301 enum ieee80211_smps_mode smps_mode);
1224void ieee80211_recalc_smps(struct ieee80211_local *local, 1302void ieee80211_recalc_smps(struct ieee80211_local *local);
1225 struct ieee80211_sub_if_data *forsdata);
1226 1303
1227size_t ieee80211_ie_split(const u8 *ies, size_t ielen, 1304size_t ieee80211_ie_split(const u8 *ies, size_t ielen,
1228 const u8 *ids, int n_ids, size_t offset); 1305 const u8 *ids, int n_ids, size_t offset);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index ebbe264e2b0b..f9163b12c7f1 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -24,6 +24,7 @@
24#include "led.h" 24#include "led.h"
25#include "driver-ops.h" 25#include "driver-ops.h"
26#include "wme.h" 26#include "wme.h"
27#include "rate.h"
27 28
28/** 29/**
29 * DOC: Interface list locking 30 * DOC: Interface list locking
@@ -94,21 +95,14 @@ static inline int identical_mac_addr_allowed(int type1, int type2)
94 type2 == NL80211_IFTYPE_AP_VLAN)); 95 type2 == NL80211_IFTYPE_AP_VLAN));
95} 96}
96 97
97static int ieee80211_open(struct net_device *dev) 98static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
99 enum nl80211_iftype iftype)
98{ 100{
99 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
100 struct ieee80211_sub_if_data *nsdata;
101 struct ieee80211_local *local = sdata->local; 101 struct ieee80211_local *local = sdata->local;
102 struct sta_info *sta; 102 struct ieee80211_sub_if_data *nsdata;
103 u32 changed = 0; 103 struct net_device *dev = sdata->dev;
104 int res;
105 u32 hw_reconf_flags = 0;
106 u8 null_addr[ETH_ALEN] = {0};
107 104
108 /* fail early if user set an invalid address */ 105 ASSERT_RTNL();
109 if (compare_ether_addr(dev->dev_addr, null_addr) &&
110 !is_valid_ether_addr(dev->dev_addr))
111 return -EADDRNOTAVAIL;
112 106
113 /* we hold the RTNL here so can safely walk the list */ 107 /* we hold the RTNL here so can safely walk the list */
114 list_for_each_entry(nsdata, &local->interfaces, list) { 108 list_for_each_entry(nsdata, &local->interfaces, list) {
@@ -125,7 +119,7 @@ static int ieee80211_open(struct net_device *dev)
125 * belonging to the same hardware. Then, however, we're 119 * belonging to the same hardware. Then, however, we're
126 * faced with having to adopt two different TSF timers... 120 * faced with having to adopt two different TSF timers...
127 */ 121 */
128 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && 122 if (iftype == NL80211_IFTYPE_ADHOC &&
129 nsdata->vif.type == NL80211_IFTYPE_ADHOC) 123 nsdata->vif.type == NL80211_IFTYPE_ADHOC)
130 return -EBUSY; 124 return -EBUSY;
131 125
@@ -139,19 +133,56 @@ static int ieee80211_open(struct net_device *dev)
139 /* 133 /*
140 * check whether it may have the same address 134 * check whether it may have the same address
141 */ 135 */
142 if (!identical_mac_addr_allowed(sdata->vif.type, 136 if (!identical_mac_addr_allowed(iftype,
143 nsdata->vif.type)) 137 nsdata->vif.type))
144 return -ENOTUNIQ; 138 return -ENOTUNIQ;
145 139
146 /* 140 /*
147 * can only add VLANs to enabled APs 141 * can only add VLANs to enabled APs
148 */ 142 */
149 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && 143 if (iftype == NL80211_IFTYPE_AP_VLAN &&
150 nsdata->vif.type == NL80211_IFTYPE_AP) 144 nsdata->vif.type == NL80211_IFTYPE_AP)
151 sdata->bss = &nsdata->u.ap; 145 sdata->bss = &nsdata->u.ap;
152 } 146 }
153 } 147 }
154 148
149 return 0;
150}
151
152void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata,
153 const int offset)
154{
155 struct ieee80211_local *local = sdata->local;
156 u32 flags = sdata->u.mntr_flags;
157
158#define ADJUST(_f, _s) do { \
159 if (flags & MONITOR_FLAG_##_f) \
160 local->fif_##_s += offset; \
161 } while (0)
162
163 ADJUST(FCSFAIL, fcsfail);
164 ADJUST(PLCPFAIL, plcpfail);
165 ADJUST(CONTROL, control);
166 ADJUST(CONTROL, pspoll);
167 ADJUST(OTHER_BSS, other_bss);
168
169#undef ADJUST
170}
171
172/*
173 * NOTE: Be very careful when changing this function, it must NOT return
174 * an error on interface type changes that have been pre-checked, so most
175 * checks should be in ieee80211_check_concurrent_iface.
176 */
177static int ieee80211_do_open(struct net_device *dev, bool coming_up)
178{
179 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
180 struct ieee80211_local *local = sdata->local;
181 struct sta_info *sta;
182 u32 changed = 0;
183 int res;
184 u32 hw_reconf_flags = 0;
185
155 switch (sdata->vif.type) { 186 switch (sdata->vif.type) {
156 case NL80211_IFTYPE_WDS: 187 case NL80211_IFTYPE_WDS:
157 if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) 188 if (!is_valid_ether_addr(sdata->u.wds.remote_addr))
@@ -177,7 +208,9 @@ static int ieee80211_open(struct net_device *dev)
177 /* no special treatment */ 208 /* no special treatment */
178 break; 209 break;
179 case NL80211_IFTYPE_UNSPECIFIED: 210 case NL80211_IFTYPE_UNSPECIFIED:
180 case __NL80211_IFTYPE_AFTER_LAST: 211 case NUM_NL80211_IFTYPES:
212 case NL80211_IFTYPE_P2P_CLIENT:
213 case NL80211_IFTYPE_P2P_GO:
181 /* cannot happen */ 214 /* cannot happen */
182 WARN_ON(1); 215 WARN_ON(1);
183 break; 216 break;
@@ -187,39 +220,30 @@ static int ieee80211_open(struct net_device *dev)
187 res = drv_start(local); 220 res = drv_start(local);
188 if (res) 221 if (res)
189 goto err_del_bss; 222 goto err_del_bss;
223 if (local->ops->napi_poll)
224 napi_enable(&local->napi);
190 /* we're brought up, everything changes */ 225 /* we're brought up, everything changes */
191 hw_reconf_flags = ~0; 226 hw_reconf_flags = ~0;
192 ieee80211_led_radio(local, true); 227 ieee80211_led_radio(local, true);
193 } 228 }
194 229
195 /* 230 /*
196 * Check all interfaces and copy the hopefully now-present 231 * Copy the hopefully now-present MAC address to
197 * MAC address to those that have the special null one. 232 * this interface, if it has the special null one.
198 */ 233 */
199 list_for_each_entry(nsdata, &local->interfaces, list) { 234 if (is_zero_ether_addr(dev->dev_addr)) {
200 struct net_device *ndev = nsdata->dev; 235 memcpy(dev->dev_addr,
201 236 local->hw.wiphy->perm_addr,
202 /* 237 ETH_ALEN);
203 * No need to check running since we do not allow 238 memcpy(dev->perm_addr, dev->dev_addr, ETH_ALEN);
204 * it to start up with this invalid address. 239
205 */ 240 if (!is_valid_ether_addr(dev->dev_addr)) {
206 if (compare_ether_addr(null_addr, ndev->dev_addr) == 0) { 241 if (!local->open_count)
207 memcpy(ndev->dev_addr, 242 drv_stop(local);
208 local->hw.wiphy->perm_addr, 243 return -EADDRNOTAVAIL;
209 ETH_ALEN);
210 memcpy(ndev->perm_addr, ndev->dev_addr, ETH_ALEN);
211 } 244 }
212 } 245 }
213 246
214 /*
215 * Validate the MAC address for this device.
216 */
217 if (!is_valid_ether_addr(dev->dev_addr)) {
218 if (!local->open_count)
219 drv_stop(local);
220 return -EADDRNOTAVAIL;
221 }
222
223 switch (sdata->vif.type) { 247 switch (sdata->vif.type) {
224 case NL80211_IFTYPE_AP_VLAN: 248 case NL80211_IFTYPE_AP_VLAN:
225 /* no need to tell driver */ 249 /* no need to tell driver */
@@ -237,25 +261,17 @@ static int ieee80211_open(struct net_device *dev)
237 hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; 261 hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR;
238 } 262 }
239 263
240 if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) 264 ieee80211_adjust_monitor_flags(sdata, 1);
241 local->fif_fcsfail++;
242 if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL)
243 local->fif_plcpfail++;
244 if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) {
245 local->fif_control++;
246 local->fif_pspoll++;
247 }
248 if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
249 local->fif_other_bss++;
250
251 ieee80211_configure_filter(local); 265 ieee80211_configure_filter(local);
252 266
253 netif_carrier_on(dev); 267 netif_carrier_on(dev);
254 break; 268 break;
255 default: 269 default:
256 res = drv_add_interface(local, &sdata->vif); 270 if (coming_up) {
257 if (res) 271 res = drv_add_interface(local, &sdata->vif);
258 goto err_stop; 272 if (res)
273 goto err_stop;
274 }
259 275
260 if (ieee80211_vif_is_mesh(&sdata->vif)) { 276 if (ieee80211_vif_is_mesh(&sdata->vif)) {
261 local->fif_other_bss++; 277 local->fif_other_bss++;
@@ -264,8 +280,11 @@ static int ieee80211_open(struct net_device *dev)
264 ieee80211_start_mesh(sdata); 280 ieee80211_start_mesh(sdata);
265 } else if (sdata->vif.type == NL80211_IFTYPE_AP) { 281 } else if (sdata->vif.type == NL80211_IFTYPE_AP) {
266 local->fif_pspoll++; 282 local->fif_pspoll++;
283 local->fif_probe_req++;
267 284
268 ieee80211_configure_filter(local); 285 ieee80211_configure_filter(local);
286 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
287 local->fif_probe_req++;
269 } 288 }
270 289
271 changed |= ieee80211_reset_erp_info(sdata); 290 changed |= ieee80211_reset_erp_info(sdata);
@@ -277,6 +296,8 @@ static int ieee80211_open(struct net_device *dev)
277 netif_carrier_on(dev); 296 netif_carrier_on(dev);
278 } 297 }
279 298
299 set_bit(SDATA_STATE_RUNNING, &sdata->state);
300
280 if (sdata->vif.type == NL80211_IFTYPE_WDS) { 301 if (sdata->vif.type == NL80211_IFTYPE_WDS) {
281 /* Create STA entry for the WDS peer */ 302 /* Create STA entry for the WDS peer */
282 sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr, 303 sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr,
@@ -294,6 +315,8 @@ static int ieee80211_open(struct net_device *dev)
294 /* STA has been freed */ 315 /* STA has been freed */
295 goto err_del_interface; 316 goto err_del_interface;
296 } 317 }
318
319 rate_control_rate_init(sta);
297 } 320 }
298 321
299 /* 322 /*
@@ -307,9 +330,13 @@ static int ieee80211_open(struct net_device *dev)
307 if (sdata->flags & IEEE80211_SDATA_PROMISC) 330 if (sdata->flags & IEEE80211_SDATA_PROMISC)
308 atomic_inc(&local->iff_promiscs); 331 atomic_inc(&local->iff_promiscs);
309 332
333 mutex_lock(&local->mtx);
310 hw_reconf_flags |= __ieee80211_recalc_idle(local); 334 hw_reconf_flags |= __ieee80211_recalc_idle(local);
335 mutex_unlock(&local->mtx);
336
337 if (coming_up)
338 local->open_count++;
311 339
312 local->open_count++;
313 if (hw_reconf_flags) { 340 if (hw_reconf_flags) {
314 ieee80211_hw_config(local, hw_reconf_flags); 341 ieee80211_hw_config(local, hw_reconf_flags);
315 /* 342 /*
@@ -334,22 +361,42 @@ static int ieee80211_open(struct net_device *dev)
334 sdata->bss = NULL; 361 sdata->bss = NULL;
335 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 362 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
336 list_del(&sdata->u.vlan.list); 363 list_del(&sdata->u.vlan.list);
364 clear_bit(SDATA_STATE_RUNNING, &sdata->state);
337 return res; 365 return res;
338} 366}
339 367
340static int ieee80211_stop(struct net_device *dev) 368static int ieee80211_open(struct net_device *dev)
341{ 369{
342 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 370 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
371 int err;
372
373 /* fail early if user set an invalid address */
374 if (!is_zero_ether_addr(dev->dev_addr) &&
375 !is_valid_ether_addr(dev->dev_addr))
376 return -EADDRNOTAVAIL;
377
378 err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type);
379 if (err)
380 return err;
381
382 return ieee80211_do_open(dev, true);
383}
384
385static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata,
386 bool going_down)
387{
343 struct ieee80211_local *local = sdata->local; 388 struct ieee80211_local *local = sdata->local;
344 unsigned long flags; 389 unsigned long flags;
345 struct sk_buff *skb, *tmp; 390 struct sk_buff *skb, *tmp;
346 u32 hw_reconf_flags = 0; 391 u32 hw_reconf_flags = 0;
347 int i; 392 int i;
348 393
394 clear_bit(SDATA_STATE_RUNNING, &sdata->state);
395
349 /* 396 /*
350 * Stop TX on this interface first. 397 * Stop TX on this interface first.
351 */ 398 */
352 netif_tx_stop_all_queues(dev); 399 netif_tx_stop_all_queues(sdata->dev);
353 400
354 /* 401 /*
355 * Purge work for this interface. 402 * Purge work for this interface.
@@ -366,12 +413,9 @@ static int ieee80211_stop(struct net_device *dev)
366 * (because if we remove a STA after ops->remove_interface() 413 * (because if we remove a STA after ops->remove_interface()
367 * the driver will have removed the vif info already!) 414 * the driver will have removed the vif info already!)
368 * 415 *
369 * We could relax this and only unlink the stations from the 416 * This is relevant only in AP, WDS and mesh modes, since in
370 * hash table and list but keep them on a per-sdata list that 417 * all other modes we've already removed all stations when
371 * will be inserted back again when the interface is brought 418 * disconnecting etc.
372 * up again, but I don't currently see a use case for that,
373 * except with WDS which gets a STA entry created when it is
374 * brought up.
375 */ 419 */
376 sta_info_flush(local, sdata); 420 sta_info_flush(local, sdata);
377 421
@@ -387,14 +431,19 @@ static int ieee80211_stop(struct net_device *dev)
387 if (sdata->flags & IEEE80211_SDATA_PROMISC) 431 if (sdata->flags & IEEE80211_SDATA_PROMISC)
388 atomic_dec(&local->iff_promiscs); 432 atomic_dec(&local->iff_promiscs);
389 433
390 if (sdata->vif.type == NL80211_IFTYPE_AP) 434 if (sdata->vif.type == NL80211_IFTYPE_AP) {
391 local->fif_pspoll--; 435 local->fif_pspoll--;
436 local->fif_probe_req--;
437 } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
438 local->fif_probe_req--;
439 }
392 440
393 netif_addr_lock_bh(dev); 441 netif_addr_lock_bh(sdata->dev);
394 spin_lock_bh(&local->filter_lock); 442 spin_lock_bh(&local->filter_lock);
395 __hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len); 443 __hw_addr_unsync(&local->mc_list, &sdata->dev->mc,
444 sdata->dev->addr_len);
396 spin_unlock_bh(&local->filter_lock); 445 spin_unlock_bh(&local->filter_lock);
397 netif_addr_unlock_bh(dev); 446 netif_addr_unlock_bh(sdata->dev);
398 447
399 ieee80211_configure_filter(local); 448 ieee80211_configure_filter(local);
400 449
@@ -406,11 +455,21 @@ static int ieee80211_stop(struct net_device *dev)
406 struct ieee80211_sub_if_data *vlan, *tmpsdata; 455 struct ieee80211_sub_if_data *vlan, *tmpsdata;
407 struct beacon_data *old_beacon = sdata->u.ap.beacon; 456 struct beacon_data *old_beacon = sdata->u.ap.beacon;
408 457
458 /* sdata_running will return false, so this will disable */
459 ieee80211_bss_info_change_notify(sdata,
460 BSS_CHANGED_BEACON_ENABLED);
461
409 /* remove beacon */ 462 /* remove beacon */
410 rcu_assign_pointer(sdata->u.ap.beacon, NULL); 463 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
411 synchronize_rcu(); 464 synchronize_rcu();
412 kfree(old_beacon); 465 kfree(old_beacon);
413 466
467 /* free all potentially still buffered bcast frames */
468 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
469 local->total_ps_buffered--;
470 dev_kfree_skb(skb);
471 }
472
414 /* down all dependent devices, that is VLANs */ 473 /* down all dependent devices, that is VLANs */
415 list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans, 474 list_for_each_entry_safe(vlan, tmpsdata, &sdata->u.ap.vlans,
416 u.vlan.list) 475 u.vlan.list)
@@ -418,7 +477,8 @@ static int ieee80211_stop(struct net_device *dev)
418 WARN_ON(!list_empty(&sdata->u.ap.vlans)); 477 WARN_ON(!list_empty(&sdata->u.ap.vlans));
419 } 478 }
420 479
421 local->open_count--; 480 if (going_down)
481 local->open_count--;
422 482
423 switch (sdata->vif.type) { 483 switch (sdata->vif.type) {
424 case NL80211_IFTYPE_AP_VLAN: 484 case NL80211_IFTYPE_AP_VLAN:
@@ -437,40 +497,9 @@ static int ieee80211_stop(struct net_device *dev)
437 hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR; 497 hw_reconf_flags |= IEEE80211_CONF_CHANGE_MONITOR;
438 } 498 }
439 499
440 if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) 500 ieee80211_adjust_monitor_flags(sdata, -1);
441 local->fif_fcsfail--;
442 if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL)
443 local->fif_plcpfail--;
444 if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) {
445 local->fif_pspoll--;
446 local->fif_control--;
447 }
448 if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS)
449 local->fif_other_bss--;
450
451 ieee80211_configure_filter(local); 501 ieee80211_configure_filter(local);
452 break; 502 break;
453 case NL80211_IFTYPE_STATION:
454 del_timer_sync(&sdata->u.mgd.chswitch_timer);
455 del_timer_sync(&sdata->u.mgd.timer);
456 del_timer_sync(&sdata->u.mgd.conn_mon_timer);
457 del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
458 /*
459 * If any of the timers fired while we waited for it, it will
460 * have queued its work. Now the work will be running again
461 * but will not rearm the timer again because it checks
462 * whether the interface is running, which, at this point,
463 * it no longer is.
464 */
465 cancel_work_sync(&sdata->u.mgd.chswitch_work);
466 cancel_work_sync(&sdata->u.mgd.monitor_work);
467 cancel_work_sync(&sdata->u.mgd.beacon_connection_loss_work);
468
469 /* fall through */
470 case NL80211_IFTYPE_ADHOC:
471 if (sdata->vif.type == NL80211_IFTYPE_ADHOC)
472 del_timer_sync(&sdata->u.ibss.timer);
473 /* fall through */
474 case NL80211_IFTYPE_MESH_POINT: 503 case NL80211_IFTYPE_MESH_POINT:
475 if (ieee80211_vif_is_mesh(&sdata->vif)) { 504 if (ieee80211_vif_is_mesh(&sdata->vif)) {
476 /* other_bss and allmulti are always set on mesh 505 /* other_bss and allmulti are always set on mesh
@@ -498,27 +527,34 @@ static int ieee80211_stop(struct net_device *dev)
498 ieee80211_scan_cancel(local); 527 ieee80211_scan_cancel(local);
499 528
500 /* 529 /*
501 * Disable beaconing for AP and mesh, IBSS can't 530 * Disable beaconing here for mesh only, AP and IBSS
502 * still be joined to a network at this point. 531 * are already taken care of.
503 */ 532 */
504 if (sdata->vif.type == NL80211_IFTYPE_AP || 533 if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
505 sdata->vif.type == NL80211_IFTYPE_MESH_POINT) {
506 ieee80211_bss_info_change_notify(sdata, 534 ieee80211_bss_info_change_notify(sdata,
507 BSS_CHANGED_BEACON_ENABLED); 535 BSS_CHANGED_BEACON_ENABLED);
508 }
509 536
510 /* free all remaining keys, there shouldn't be any */ 537 /*
538 * Free all remaining keys, there shouldn't be any,
539 * except maybe group keys in AP more or WDS?
540 */
511 ieee80211_free_keys(sdata); 541 ieee80211_free_keys(sdata);
512 drv_remove_interface(local, &sdata->vif); 542
543 if (going_down)
544 drv_remove_interface(local, &sdata->vif);
513 } 545 }
514 546
515 sdata->bss = NULL; 547 sdata->bss = NULL;
516 548
549 mutex_lock(&local->mtx);
517 hw_reconf_flags |= __ieee80211_recalc_idle(local); 550 hw_reconf_flags |= __ieee80211_recalc_idle(local);
551 mutex_unlock(&local->mtx);
518 552
519 ieee80211_recalc_ps(local, -1); 553 ieee80211_recalc_ps(local, -1);
520 554
521 if (local->open_count == 0) { 555 if (local->open_count == 0) {
556 if (local->ops->napi_poll)
557 napi_disable(&local->napi);
522 ieee80211_clear_tx_pending(local); 558 ieee80211_clear_tx_pending(local);
523 ieee80211_stop_device(local); 559 ieee80211_stop_device(local);
524 560
@@ -541,6 +577,13 @@ static int ieee80211_stop(struct net_device *dev)
541 } 577 }
542 } 578 }
543 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 579 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
580}
581
582static int ieee80211_stop(struct net_device *dev)
583{
584 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
585
586 ieee80211_do_stop(sdata, true);
544 587
545 return 0; 588 return 0;
546} 589}
@@ -585,8 +628,6 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
585{ 628{
586 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); 629 struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
587 struct ieee80211_local *local = sdata->local; 630 struct ieee80211_local *local = sdata->local;
588 struct beacon_data *beacon;
589 struct sk_buff *skb;
590 int flushed; 631 int flushed;
591 int i; 632 int i;
592 633
@@ -599,37 +640,8 @@ static void ieee80211_teardown_sdata(struct net_device *dev)
599 __skb_queue_purge(&sdata->fragments[i].skb_list); 640 __skb_queue_purge(&sdata->fragments[i].skb_list);
600 sdata->fragment_next = 0; 641 sdata->fragment_next = 0;
601 642
602 switch (sdata->vif.type) { 643 if (ieee80211_vif_is_mesh(&sdata->vif))
603 case NL80211_IFTYPE_AP: 644 mesh_rmc_free(sdata);
604 beacon = sdata->u.ap.beacon;
605 rcu_assign_pointer(sdata->u.ap.beacon, NULL);
606 synchronize_rcu();
607 kfree(beacon);
608
609 while ((skb = skb_dequeue(&sdata->u.ap.ps_bc_buf))) {
610 local->total_ps_buffered--;
611 dev_kfree_skb(skb);
612 }
613
614 break;
615 case NL80211_IFTYPE_MESH_POINT:
616 if (ieee80211_vif_is_mesh(&sdata->vif))
617 mesh_rmc_free(sdata);
618 break;
619 case NL80211_IFTYPE_ADHOC:
620 if (WARN_ON(sdata->u.ibss.presp))
621 kfree_skb(sdata->u.ibss.presp);
622 break;
623 case NL80211_IFTYPE_STATION:
624 case NL80211_IFTYPE_WDS:
625 case NL80211_IFTYPE_AP_VLAN:
626 case NL80211_IFTYPE_MONITOR:
627 break;
628 case NL80211_IFTYPE_UNSPECIFIED:
629 case __NL80211_IFTYPE_AFTER_LAST:
630 BUG();
631 break;
632 }
633 645
634 flushed = sta_info_flush(local, sdata); 646 flushed = sta_info_flush(local, sdata);
635 WARN_ON(flushed); 647 WARN_ON(flushed);
@@ -791,7 +803,8 @@ static void ieee80211_iface_work(struct work_struct *work)
791 803
792 __ieee80211_stop_rx_ba_session( 804 __ieee80211_stop_rx_ba_session(
793 sta, tid, WLAN_BACK_RECIPIENT, 805 sta, tid, WLAN_BACK_RECIPIENT,
794 WLAN_REASON_QSTA_REQUIRE_SETUP); 806 WLAN_REASON_QSTA_REQUIRE_SETUP,
807 true);
795 } 808 }
796 mutex_unlock(&local->sta_mtx); 809 mutex_unlock(&local->sta_mtx);
797 } else switch (sdata->vif.type) { 810 } else switch (sdata->vif.type) {
@@ -844,9 +857,13 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
844 857
845 /* and set some type-dependent values */ 858 /* and set some type-dependent values */
846 sdata->vif.type = type; 859 sdata->vif.type = type;
860 sdata->vif.p2p = false;
847 sdata->dev->netdev_ops = &ieee80211_dataif_ops; 861 sdata->dev->netdev_ops = &ieee80211_dataif_ops;
848 sdata->wdev.iftype = type; 862 sdata->wdev.iftype = type;
849 863
864 sdata->control_port_protocol = cpu_to_be16(ETH_P_PAE);
865 sdata->control_port_no_encrypt = false;
866
850 /* only monitor differs */ 867 /* only monitor differs */
851 sdata->dev->type = ARPHRD_ETHER; 868 sdata->dev->type = ARPHRD_ETHER;
852 869
@@ -854,10 +871,20 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
854 INIT_WORK(&sdata->work, ieee80211_iface_work); 871 INIT_WORK(&sdata->work, ieee80211_iface_work);
855 872
856 switch (type) { 873 switch (type) {
874 case NL80211_IFTYPE_P2P_GO:
875 type = NL80211_IFTYPE_AP;
876 sdata->vif.type = type;
877 sdata->vif.p2p = true;
878 /* fall through */
857 case NL80211_IFTYPE_AP: 879 case NL80211_IFTYPE_AP:
858 skb_queue_head_init(&sdata->u.ap.ps_bc_buf); 880 skb_queue_head_init(&sdata->u.ap.ps_bc_buf);
859 INIT_LIST_HEAD(&sdata->u.ap.vlans); 881 INIT_LIST_HEAD(&sdata->u.ap.vlans);
860 break; 882 break;
883 case NL80211_IFTYPE_P2P_CLIENT:
884 type = NL80211_IFTYPE_STATION;
885 sdata->vif.type = type;
886 sdata->vif.p2p = true;
887 /* fall through */
861 case NL80211_IFTYPE_STATION: 888 case NL80211_IFTYPE_STATION:
862 ieee80211_sta_setup_sdata(sdata); 889 ieee80211_sta_setup_sdata(sdata);
863 break; 890 break;
@@ -878,7 +905,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
878 case NL80211_IFTYPE_AP_VLAN: 905 case NL80211_IFTYPE_AP_VLAN:
879 break; 906 break;
880 case NL80211_IFTYPE_UNSPECIFIED: 907 case NL80211_IFTYPE_UNSPECIFIED:
881 case __NL80211_IFTYPE_AFTER_LAST: 908 case NUM_NL80211_IFTYPES:
882 BUG(); 909 BUG();
883 break; 910 break;
884 } 911 }
@@ -886,12 +913,85 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
886 ieee80211_debugfs_add_netdev(sdata); 913 ieee80211_debugfs_add_netdev(sdata);
887} 914}
888 915
916static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
917 enum nl80211_iftype type)
918{
919 struct ieee80211_local *local = sdata->local;
920 int ret, err;
921 enum nl80211_iftype internal_type = type;
922 bool p2p = false;
923
924 ASSERT_RTNL();
925
926 if (!local->ops->change_interface)
927 return -EBUSY;
928
929 switch (sdata->vif.type) {
930 case NL80211_IFTYPE_AP:
931 case NL80211_IFTYPE_STATION:
932 case NL80211_IFTYPE_ADHOC:
933 /*
934 * Could maybe also all others here?
935 * Just not sure how that interacts
936 * with the RX/config path e.g. for
937 * mesh.
938 */
939 break;
940 default:
941 return -EBUSY;
942 }
943
944 switch (type) {
945 case NL80211_IFTYPE_AP:
946 case NL80211_IFTYPE_STATION:
947 case NL80211_IFTYPE_ADHOC:
948 /*
949 * Could probably support everything
950 * but WDS here (WDS do_open can fail
951 * under memory pressure, which this
952 * code isn't prepared to handle).
953 */
954 break;
955 case NL80211_IFTYPE_P2P_CLIENT:
956 p2p = true;
957 internal_type = NL80211_IFTYPE_STATION;
958 break;
959 case NL80211_IFTYPE_P2P_GO:
960 p2p = true;
961 internal_type = NL80211_IFTYPE_AP;
962 break;
963 default:
964 return -EBUSY;
965 }
966
967 ret = ieee80211_check_concurrent_iface(sdata, internal_type);
968 if (ret)
969 return ret;
970
971 ieee80211_do_stop(sdata, false);
972
973 ieee80211_teardown_sdata(sdata->dev);
974
975 ret = drv_change_interface(local, sdata, internal_type, p2p);
976 if (ret)
977 type = sdata->vif.type;
978
979 ieee80211_setup_sdata(sdata, type);
980
981 err = ieee80211_do_open(sdata->dev, false);
982 WARN(err, "type change: do_open returned %d", err);
983
984 return ret;
985}
986
889int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, 987int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
890 enum nl80211_iftype type) 988 enum nl80211_iftype type)
891{ 989{
990 int ret;
991
892 ASSERT_RTNL(); 992 ASSERT_RTNL();
893 993
894 if (type == sdata->vif.type) 994 if (type == ieee80211_vif_type_p2p(&sdata->vif))
895 return 0; 995 return 0;
896 996
897 /* Setting ad-hoc mode on non-IBSS channel is not supported. */ 997 /* Setting ad-hoc mode on non-IBSS channel is not supported. */
@@ -899,18 +999,15 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
899 type == NL80211_IFTYPE_ADHOC) 999 type == NL80211_IFTYPE_ADHOC)
900 return -EOPNOTSUPP; 1000 return -EOPNOTSUPP;
901 1001
902 /* 1002 if (ieee80211_sdata_running(sdata)) {
903 * We could, here, on changes between IBSS/STA/MESH modes, 1003 ret = ieee80211_runtime_change_iftype(sdata, type);
904 * invoke an MLME function instead that disassociates etc. 1004 if (ret)
905 * and goes into the requested mode. 1005 return ret;
906 */ 1006 } else {
907 1007 /* Purge and reset type-dependent state. */
908 if (ieee80211_sdata_running(sdata)) 1008 ieee80211_teardown_sdata(sdata->dev);
909 return -EBUSY; 1009 ieee80211_setup_sdata(sdata, type);
910 1010 }
911 /* Purge and reset type-dependent state. */
912 ieee80211_teardown_sdata(sdata->dev);
913 ieee80211_setup_sdata(sdata, type);
914 1011
915 /* reset some values that shouldn't be kept across type changes */ 1012 /* reset some values that shouldn't be kept across type changes */
916 sdata->vif.bss_conf.basic_rates = 1013 sdata->vif.bss_conf.basic_rates =
@@ -1167,8 +1264,7 @@ static u32 ieee80211_idle_off(struct ieee80211_local *local,
1167 return 0; 1264 return 0;
1168 1265
1169#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1266#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1170 printk(KERN_DEBUG "%s: device no longer idle - %s\n", 1267 wiphy_debug(local->hw.wiphy, "device no longer idle - %s\n", reason);
1171 wiphy_name(local->hw.wiphy), reason);
1172#endif 1268#endif
1173 1269
1174 local->hw.conf.flags &= ~IEEE80211_CONF_IDLE; 1270 local->hw.conf.flags &= ~IEEE80211_CONF_IDLE;
@@ -1181,8 +1277,7 @@ static u32 ieee80211_idle_on(struct ieee80211_local *local)
1181 return 0; 1277 return 0;
1182 1278
1183#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1279#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1184 printk(KERN_DEBUG "%s: device now idle\n", 1280 wiphy_debug(local->hw.wiphy, "device now idle\n");
1185 wiphy_name(local->hw.wiphy));
1186#endif 1281#endif
1187 1282
1188 drv_flush(local, false); 1283 drv_flush(local, false);
@@ -1195,28 +1290,61 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local)
1195{ 1290{
1196 struct ieee80211_sub_if_data *sdata; 1291 struct ieee80211_sub_if_data *sdata;
1197 int count = 0; 1292 int count = 0;
1293 bool working = false, scanning = false;
1294 struct ieee80211_work *wk;
1198 1295
1199 if (!list_empty(&local->work_list)) 1296#ifdef CONFIG_PROVE_LOCKING
1200 return ieee80211_idle_off(local, "working"); 1297 WARN_ON(debug_locks && !lockdep_rtnl_is_held() &&
1201 1298 !lockdep_is_held(&local->iflist_mtx));
1202 if (local->scanning) 1299#endif
1203 return ieee80211_idle_off(local, "scanning"); 1300 lockdep_assert_held(&local->mtx);
1204 1301
1205 list_for_each_entry(sdata, &local->interfaces, list) { 1302 list_for_each_entry(sdata, &local->interfaces, list) {
1206 if (!ieee80211_sdata_running(sdata)) 1303 if (!ieee80211_sdata_running(sdata)) {
1304 sdata->vif.bss_conf.idle = true;
1207 continue; 1305 continue;
1306 }
1307
1308 sdata->old_idle = sdata->vif.bss_conf.idle;
1309
1208 /* do not count disabled managed interfaces */ 1310 /* do not count disabled managed interfaces */
1209 if (sdata->vif.type == NL80211_IFTYPE_STATION && 1311 if (sdata->vif.type == NL80211_IFTYPE_STATION &&
1210 !sdata->u.mgd.associated) 1312 !sdata->u.mgd.associated) {
1313 sdata->vif.bss_conf.idle = true;
1211 continue; 1314 continue;
1315 }
1212 /* do not count unused IBSS interfaces */ 1316 /* do not count unused IBSS interfaces */
1213 if (sdata->vif.type == NL80211_IFTYPE_ADHOC && 1317 if (sdata->vif.type == NL80211_IFTYPE_ADHOC &&
1214 !sdata->u.ibss.ssid_len) 1318 !sdata->u.ibss.ssid_len) {
1319 sdata->vif.bss_conf.idle = true;
1215 continue; 1320 continue;
1321 }
1216 /* count everything else */ 1322 /* count everything else */
1217 count++; 1323 count++;
1218 } 1324 }
1219 1325
1326 list_for_each_entry(wk, &local->work_list, list) {
1327 working = true;
1328 wk->sdata->vif.bss_conf.idle = false;
1329 }
1330
1331 if (local->scan_sdata) {
1332 scanning = true;
1333 local->scan_sdata->vif.bss_conf.idle = false;
1334 }
1335
1336 list_for_each_entry(sdata, &local->interfaces, list) {
1337 if (sdata->old_idle == sdata->vif.bss_conf.idle)
1338 continue;
1339 if (!ieee80211_sdata_running(sdata))
1340 continue;
1341 ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE);
1342 }
1343
1344 if (working)
1345 return ieee80211_idle_off(local, "working");
1346 if (scanning)
1347 return ieee80211_idle_off(local, "scanning");
1220 if (!count) 1348 if (!count)
1221 return ieee80211_idle_on(local); 1349 return ieee80211_idle_on(local);
1222 else 1350 else
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 1b9d87ed143a..ccd676b2f599 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -49,7 +49,7 @@ static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
49 49
50static void assert_key_lock(struct ieee80211_local *local) 50static void assert_key_lock(struct ieee80211_local *local)
51{ 51{
52 WARN_ON(!mutex_is_locked(&local->key_mtx)); 52 lockdep_assert_held(&local->key_mtx);
53} 53}
54 54
55static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key) 55static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key)
@@ -60,7 +60,7 @@ static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key)
60 return NULL; 60 return NULL;
61} 61}
62 62
63static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) 63static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
64{ 64{
65 struct ieee80211_sub_if_data *sdata; 65 struct ieee80211_sub_if_data *sdata;
66 struct ieee80211_sta *sta; 66 struct ieee80211_sta *sta;
@@ -69,12 +69,20 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
69 might_sleep(); 69 might_sleep();
70 70
71 if (!key->local->ops->set_key) 71 if (!key->local->ops->set_key)
72 return; 72 goto out_unsupported;
73 73
74 assert_key_lock(key->local); 74 assert_key_lock(key->local);
75 75
76 sta = get_sta_for_key(key); 76 sta = get_sta_for_key(key);
77 77
78 /*
79 * If this is a per-STA GTK, check if it
80 * is supported; if not, return.
81 */
82 if (sta && !(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE) &&
83 !(key->local->hw.flags & IEEE80211_HW_SUPPORTS_PER_STA_GTK))
84 goto out_unsupported;
85
78 sdata = key->sdata; 86 sdata = key->sdata;
79 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 87 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
80 sdata = container_of(sdata->bss, 88 sdata = container_of(sdata->bss,
@@ -83,14 +91,28 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
83 91
84 ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf); 92 ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
85 93
86 if (!ret) 94 if (!ret) {
87 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; 95 key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
96 return 0;
97 }
88 98
89 if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) 99 if (ret != -ENOSPC && ret != -EOPNOTSUPP)
90 printk(KERN_ERR "mac80211-%s: failed to set key " 100 wiphy_err(key->local->hw.wiphy,
91 "(%d, %pM) to hardware (%d)\n", 101 "failed to set key (%d, %pM) to hardware (%d)\n",
92 wiphy_name(key->local->hw.wiphy), 102 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
93 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); 103
104 out_unsupported:
105 switch (key->conf.cipher) {
106 case WLAN_CIPHER_SUITE_WEP40:
107 case WLAN_CIPHER_SUITE_WEP104:
108 case WLAN_CIPHER_SUITE_TKIP:
109 case WLAN_CIPHER_SUITE_CCMP:
110 case WLAN_CIPHER_SUITE_AES_CMAC:
111 /* all of these we can do in software */
112 return 0;
113 default:
114 return -EINVAL;
115 }
94} 116}
95 117
96static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) 118static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
@@ -121,14 +143,33 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
121 sta, &key->conf); 143 sta, &key->conf);
122 144
123 if (ret) 145 if (ret)
124 printk(KERN_ERR "mac80211-%s: failed to remove key " 146 wiphy_err(key->local->hw.wiphy,
125 "(%d, %pM) from hardware (%d)\n", 147 "failed to remove key (%d, %pM) from hardware (%d)\n",
126 wiphy_name(key->local->hw.wiphy), 148 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
127 key->conf.keyidx, sta ? sta->addr : bcast_addr, ret);
128 149
129 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; 150 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
130} 151}
131 152
153void ieee80211_key_removed(struct ieee80211_key_conf *key_conf)
154{
155 struct ieee80211_key *key;
156
157 key = container_of(key_conf, struct ieee80211_key, conf);
158
159 might_sleep();
160 assert_key_lock(key->local);
161
162 key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
163
164 /*
165 * Flush TX path to avoid attempts to use this key
166 * after this function returns. Until then, drivers
167 * must be prepared to handle the key.
168 */
169 synchronize_rcu();
170}
171EXPORT_SYMBOL_GPL(ieee80211_key_removed);
172
132static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, 173static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata,
133 int idx) 174 int idx)
134{ 175{
@@ -184,6 +225,7 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata,
184 225
185static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, 226static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
186 struct sta_info *sta, 227 struct sta_info *sta,
228 bool pairwise,
187 struct ieee80211_key *old, 229 struct ieee80211_key *old,
188 struct ieee80211_key *new) 230 struct ieee80211_key *new)
189{ 231{
@@ -192,8 +234,14 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
192 if (new) 234 if (new)
193 list_add(&new->list, &sdata->key_list); 235 list_add(&new->list, &sdata->key_list);
194 236
195 if (sta) { 237 if (sta && pairwise) {
196 rcu_assign_pointer(sta->key, new); 238 rcu_assign_pointer(sta->ptk, new);
239 } else if (sta) {
240 if (old)
241 idx = old->conf.keyidx;
242 else
243 idx = new->conf.keyidx;
244 rcu_assign_pointer(sta->gtk[idx], new);
197 } else { 245 } else {
198 WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx); 246 WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx);
199 247
@@ -227,20 +275,18 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
227 } 275 }
228} 276}
229 277
230struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, 278struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
231 int idx,
232 size_t key_len,
233 const u8 *key_data, 279 const u8 *key_data,
234 size_t seq_len, const u8 *seq) 280 size_t seq_len, const u8 *seq)
235{ 281{
236 struct ieee80211_key *key; 282 struct ieee80211_key *key;
237 int i, j; 283 int i, j, err;
238 284
239 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS); 285 BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS);
240 286
241 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); 287 key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL);
242 if (!key) 288 if (!key)
243 return NULL; 289 return ERR_PTR(-ENOMEM);
244 290
245 /* 291 /*
246 * Default to software encryption; we'll later upload the 292 * Default to software encryption; we'll later upload the
@@ -249,15 +295,16 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
249 key->conf.flags = 0; 295 key->conf.flags = 0;
250 key->flags = 0; 296 key->flags = 0;
251 297
252 key->conf.alg = alg; 298 key->conf.cipher = cipher;
253 key->conf.keyidx = idx; 299 key->conf.keyidx = idx;
254 key->conf.keylen = key_len; 300 key->conf.keylen = key_len;
255 switch (alg) { 301 switch (cipher) {
256 case ALG_WEP: 302 case WLAN_CIPHER_SUITE_WEP40:
303 case WLAN_CIPHER_SUITE_WEP104:
257 key->conf.iv_len = WEP_IV_LEN; 304 key->conf.iv_len = WEP_IV_LEN;
258 key->conf.icv_len = WEP_ICV_LEN; 305 key->conf.icv_len = WEP_ICV_LEN;
259 break; 306 break;
260 case ALG_TKIP: 307 case WLAN_CIPHER_SUITE_TKIP:
261 key->conf.iv_len = TKIP_IV_LEN; 308 key->conf.iv_len = TKIP_IV_LEN;
262 key->conf.icv_len = TKIP_ICV_LEN; 309 key->conf.icv_len = TKIP_ICV_LEN;
263 if (seq) { 310 if (seq) {
@@ -269,7 +316,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
269 } 316 }
270 } 317 }
271 break; 318 break;
272 case ALG_CCMP: 319 case WLAN_CIPHER_SUITE_CCMP:
273 key->conf.iv_len = CCMP_HDR_LEN; 320 key->conf.iv_len = CCMP_HDR_LEN;
274 key->conf.icv_len = CCMP_MIC_LEN; 321 key->conf.icv_len = CCMP_MIC_LEN;
275 if (seq) { 322 if (seq) {
@@ -278,42 +325,38 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg,
278 key->u.ccmp.rx_pn[i][j] = 325 key->u.ccmp.rx_pn[i][j] =
279 seq[CCMP_PN_LEN - j - 1]; 326 seq[CCMP_PN_LEN - j - 1];
280 } 327 }
281 break;
282 case ALG_AES_CMAC:
283 key->conf.iv_len = 0;
284 key->conf.icv_len = sizeof(struct ieee80211_mmie);
285 if (seq)
286 for (j = 0; j < 6; j++)
287 key->u.aes_cmac.rx_pn[j] = seq[6 - j - 1];
288 break;
289 }
290 memcpy(key->conf.key, key_data, key_len);
291 INIT_LIST_HEAD(&key->list);
292
293 if (alg == ALG_CCMP) {
294 /* 328 /*
295 * Initialize AES key state here as an optimization so that 329 * Initialize AES key state here as an optimization so that
296 * it does not need to be initialized for every packet. 330 * it does not need to be initialized for every packet.
297 */ 331 */
298 key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data); 332 key->u.ccmp.tfm = ieee80211_aes_key_setup_encrypt(key_data);
299 if (!key->u.ccmp.tfm) { 333 if (IS_ERR(key->u.ccmp.tfm)) {
334 err = PTR_ERR(key->u.ccmp.tfm);
300 kfree(key); 335 kfree(key);
301 return NULL; 336 key = ERR_PTR(err);
302 } 337 }
303 } 338 break;
304 339 case WLAN_CIPHER_SUITE_AES_CMAC:
305 if (alg == ALG_AES_CMAC) { 340 key->conf.iv_len = 0;
341 key->conf.icv_len = sizeof(struct ieee80211_mmie);
342 if (seq)
343 for (j = 0; j < 6; j++)
344 key->u.aes_cmac.rx_pn[j] = seq[6 - j - 1];
306 /* 345 /*
307 * Initialize AES key state here as an optimization so that 346 * Initialize AES key state here as an optimization so that
308 * it does not need to be initialized for every packet. 347 * it does not need to be initialized for every packet.
309 */ 348 */
310 key->u.aes_cmac.tfm = 349 key->u.aes_cmac.tfm =
311 ieee80211_aes_cmac_key_setup(key_data); 350 ieee80211_aes_cmac_key_setup(key_data);
312 if (!key->u.aes_cmac.tfm) { 351 if (IS_ERR(key->u.aes_cmac.tfm)) {
352 err = PTR_ERR(key->u.aes_cmac.tfm);
313 kfree(key); 353 kfree(key);
314 return NULL; 354 key = ERR_PTR(err);
315 } 355 }
356 break;
316 } 357 }
358 memcpy(key->conf.key, key_data, key_len);
359 INIT_LIST_HEAD(&key->list);
317 360
318 return key; 361 return key;
319} 362}
@@ -326,9 +369,9 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
326 if (key->local) 369 if (key->local)
327 ieee80211_key_disable_hw_accel(key); 370 ieee80211_key_disable_hw_accel(key);
328 371
329 if (key->conf.alg == ALG_CCMP) 372 if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
330 ieee80211_aes_key_free(key->u.ccmp.tfm); 373 ieee80211_aes_key_free(key->u.ccmp.tfm);
331 if (key->conf.alg == ALG_AES_CMAC) 374 if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
332 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); 375 ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
333 if (key->local) 376 if (key->local)
334 ieee80211_debugfs_key_remove(key); 377 ieee80211_debugfs_key_remove(key);
@@ -336,12 +379,13 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key)
336 kfree(key); 379 kfree(key);
337} 380}
338 381
339void ieee80211_key_link(struct ieee80211_key *key, 382int ieee80211_key_link(struct ieee80211_key *key,
340 struct ieee80211_sub_if_data *sdata, 383 struct ieee80211_sub_if_data *sdata,
341 struct sta_info *sta) 384 struct sta_info *sta)
342{ 385{
343 struct ieee80211_key *old_key; 386 struct ieee80211_key *old_key;
344 int idx; 387 int idx, ret;
388 bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
345 389
346 BUG_ON(!sdata); 390 BUG_ON(!sdata);
347 BUG_ON(!key); 391 BUG_ON(!key);
@@ -358,13 +402,6 @@ void ieee80211_key_link(struct ieee80211_key *key,
358 */ 402 */
359 if (test_sta_flags(sta, WLAN_STA_WME)) 403 if (test_sta_flags(sta, WLAN_STA_WME))
360 key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; 404 key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA;
361
362 /*
363 * This key is for a specific sta interface,
364 * inform the driver that it should try to store
365 * this key as pairwise key.
366 */
367 key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE;
368 } else { 405 } else {
369 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 406 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
370 struct sta_info *ap; 407 struct sta_info *ap;
@@ -386,19 +423,23 @@ void ieee80211_key_link(struct ieee80211_key *key,
386 423
387 mutex_lock(&sdata->local->key_mtx); 424 mutex_lock(&sdata->local->key_mtx);
388 425
389 if (sta) 426 if (sta && pairwise)
390 old_key = sta->key; 427 old_key = sta->ptk;
428 else if (sta)
429 old_key = sta->gtk[idx];
391 else 430 else
392 old_key = sdata->keys[idx]; 431 old_key = sdata->keys[idx];
393 432
394 __ieee80211_key_replace(sdata, sta, old_key, key); 433 __ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
395 __ieee80211_key_destroy(old_key); 434 __ieee80211_key_destroy(old_key);
396 435
397 ieee80211_debugfs_key_add(key); 436 ieee80211_debugfs_key_add(key);
398 437
399 ieee80211_key_enable_hw_accel(key); 438 ret = ieee80211_key_enable_hw_accel(key);
400 439
401 mutex_unlock(&sdata->local->key_mtx); 440 mutex_unlock(&sdata->local->key_mtx);
441
442 return ret;
402} 443}
403 444
404static void __ieee80211_key_free(struct ieee80211_key *key) 445static void __ieee80211_key_free(struct ieee80211_key *key)
@@ -408,7 +449,8 @@ static void __ieee80211_key_free(struct ieee80211_key *key)
408 */ 449 */
409 if (key->sdata) 450 if (key->sdata)
410 __ieee80211_key_replace(key->sdata, key->sta, 451 __ieee80211_key_replace(key->sdata, key->sta,
411 key, NULL); 452 key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE,
453 key, NULL);
412 __ieee80211_key_destroy(key); 454 __ieee80211_key_destroy(key);
413} 455}
414 456
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index b665bbb7a471..0db1c0f5f697 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -16,6 +16,9 @@
16#include <linux/rcupdate.h> 16#include <linux/rcupdate.h>
17#include <net/mac80211.h> 17#include <net/mac80211.h>
18 18
19#define NUM_DEFAULT_KEYS 4
20#define NUM_DEFAULT_MGMT_KEYS 2
21
19#define WEP_IV_LEN 4 22#define WEP_IV_LEN 4
20#define WEP_ICV_LEN 4 23#define WEP_ICV_LEN 4
21#define ALG_TKIP_KEY_LEN 32 24#define ALG_TKIP_KEY_LEN 32
@@ -123,18 +126,16 @@ struct ieee80211_key {
123 struct ieee80211_key_conf conf; 126 struct ieee80211_key_conf conf;
124}; 127};
125 128
126struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, 129struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
127 int idx,
128 size_t key_len,
129 const u8 *key_data, 130 const u8 *key_data,
130 size_t seq_len, const u8 *seq); 131 size_t seq_len, const u8 *seq);
131/* 132/*
132 * Insert a key into data structures (sdata, sta if necessary) 133 * Insert a key into data structures (sdata, sta if necessary)
133 * to make it used, free old key. 134 * to make it used, free old key.
134 */ 135 */
135void ieee80211_key_link(struct ieee80211_key *key, 136int __must_check ieee80211_key_link(struct ieee80211_key *key,
136 struct ieee80211_sub_if_data *sdata, 137 struct ieee80211_sub_if_data *sdata,
137 struct sta_info *sta); 138 struct sta_info *sta);
138void ieee80211_key_free(struct ieee80211_local *local, 139void ieee80211_key_free(struct ieee80211_local *local,
139 struct ieee80211_key *key); 140 struct ieee80211_key *key);
140void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); 141void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ded5c3843e06..22bc42b18991 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -54,6 +54,9 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
54 if (local->monitors || local->scanning) 54 if (local->monitors || local->scanning)
55 new_flags |= FIF_BCN_PRBRESP_PROMISC; 55 new_flags |= FIF_BCN_PRBRESP_PROMISC;
56 56
57 if (local->fif_probe_req || local->probe_req_reg)
58 new_flags |= FIF_PROBE_REQ;
59
57 if (local->fif_fcsfail) 60 if (local->fif_fcsfail)
58 new_flags |= FIF_FCSFAIL; 61 new_flags |= FIF_FCSFAIL;
59 62
@@ -99,16 +102,19 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
99 int ret = 0; 102 int ret = 0;
100 int power; 103 int power;
101 enum nl80211_channel_type channel_type; 104 enum nl80211_channel_type channel_type;
105 u32 offchannel_flag;
102 106
103 might_sleep(); 107 might_sleep();
104 108
105 scan_chan = local->scan_channel; 109 scan_chan = local->scan_channel;
106 110
111 offchannel_flag = local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
107 if (scan_chan) { 112 if (scan_chan) {
108 chan = scan_chan; 113 chan = scan_chan;
109 channel_type = NL80211_CHAN_NO_HT; 114 channel_type = NL80211_CHAN_NO_HT;
110 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; 115 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
111 } else if (local->tmp_channel) { 116 } else if (local->tmp_channel &&
117 local->oper_channel != local->tmp_channel) {
112 chan = scan_chan = local->tmp_channel; 118 chan = scan_chan = local->tmp_channel;
113 channel_type = local->tmp_channel_type; 119 channel_type = local->tmp_channel_type;
114 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL; 120 local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
@@ -117,8 +123,9 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
117 channel_type = local->_oper_channel_type; 123 channel_type = local->_oper_channel_type;
118 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL; 124 local->hw.conf.flags &= ~IEEE80211_CONF_OFFCHANNEL;
119 } 125 }
126 offchannel_flag ^= local->hw.conf.flags & IEEE80211_CONF_OFFCHANNEL;
120 127
121 if (chan != local->hw.conf.channel || 128 if (offchannel_flag || chan != local->hw.conf.channel ||
122 channel_type != local->hw.conf.channel_type) { 129 channel_type != local->hw.conf.channel_type) {
123 local->hw.conf.channel = chan; 130 local->hw.conf.channel = chan;
124 local->hw.conf.channel_type = channel_type; 131 local->hw.conf.channel_type = channel_type;
@@ -197,6 +204,8 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
197 sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid; 204 sdata->vif.bss_conf.bssid = sdata->u.ibss.bssid;
198 else if (sdata->vif.type == NL80211_IFTYPE_AP) 205 else if (sdata->vif.type == NL80211_IFTYPE_AP)
199 sdata->vif.bss_conf.bssid = sdata->vif.addr; 206 sdata->vif.bss_conf.bssid = sdata->vif.addr;
207 else if (sdata->vif.type == NL80211_IFTYPE_WDS)
208 sdata->vif.bss_conf.bssid = NULL;
200 else if (ieee80211_vif_is_mesh(&sdata->vif)) { 209 else if (ieee80211_vif_is_mesh(&sdata->vif)) {
201 sdata->vif.bss_conf.bssid = zero; 210 sdata->vif.bss_conf.bssid = zero;
202 } else { 211 } else {
@@ -207,6 +216,7 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata,
207 switch (sdata->vif.type) { 216 switch (sdata->vif.type) {
208 case NL80211_IFTYPE_AP: 217 case NL80211_IFTYPE_AP:
209 case NL80211_IFTYPE_ADHOC: 218 case NL80211_IFTYPE_ADHOC:
219 case NL80211_IFTYPE_WDS:
210 case NL80211_IFTYPE_MESH_POINT: 220 case NL80211_IFTYPE_MESH_POINT:
211 break; 221 break;
212 default: 222 default:
@@ -291,7 +301,16 @@ static void ieee80211_restart_work(struct work_struct *work)
291 struct ieee80211_local *local = 301 struct ieee80211_local *local =
292 container_of(work, struct ieee80211_local, restart_work); 302 container_of(work, struct ieee80211_local, restart_work);
293 303
304 /* wait for scan work complete */
305 flush_workqueue(local->workqueue);
306
307 mutex_lock(&local->mtx);
308 WARN(test_bit(SCAN_HW_SCANNING, &local->scanning),
309 "%s called with hardware scan in progress\n", __func__);
310 mutex_unlock(&local->mtx);
311
294 rtnl_lock(); 312 rtnl_lock();
313 ieee80211_scan_cancel(local);
295 ieee80211_reconfig(local); 314 ieee80211_reconfig(local);
296 rtnl_unlock(); 315 rtnl_unlock();
297} 316}
@@ -302,7 +321,7 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
302 321
303 trace_api_restart_hw(local); 322 trace_api_restart_hw(local);
304 323
305 /* use this reason, __ieee80211_resume will unblock it */ 324 /* use this reason, ieee80211_reconfig will unblock it */
306 ieee80211_stop_queues_by_reason(hw, 325 ieee80211_stop_queues_by_reason(hw,
307 IEEE80211_QUEUE_STOP_REASON_SUSPEND); 326 IEEE80211_QUEUE_STOP_REASON_SUSPEND);
308 327
@@ -316,7 +335,7 @@ static void ieee80211_recalc_smps_work(struct work_struct *work)
316 container_of(work, struct ieee80211_local, recalc_smps); 335 container_of(work, struct ieee80211_local, recalc_smps);
317 336
318 mutex_lock(&local->iflist_mtx); 337 mutex_lock(&local->iflist_mtx);
319 ieee80211_recalc_smps(local, NULL); 338 ieee80211_recalc_smps(local);
320 mutex_unlock(&local->iflist_mtx); 339 mutex_unlock(&local->iflist_mtx);
321} 340}
322 341
@@ -336,9 +355,6 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
336 struct ieee80211_if_managed *ifmgd; 355 struct ieee80211_if_managed *ifmgd;
337 int c = 0; 356 int c = 0;
338 357
339 if (!netif_running(ndev))
340 return NOTIFY_DONE;
341
342 /* Make sure it's our interface that got changed */ 358 /* Make sure it's our interface that got changed */
343 if (!wdev) 359 if (!wdev)
344 return NOTIFY_DONE; 360 return NOTIFY_DONE;
@@ -349,11 +365,14 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
349 sdata = IEEE80211_DEV_TO_SUB_IF(ndev); 365 sdata = IEEE80211_DEV_TO_SUB_IF(ndev);
350 bss_conf = &sdata->vif.bss_conf; 366 bss_conf = &sdata->vif.bss_conf;
351 367
368 if (!ieee80211_sdata_running(sdata))
369 return NOTIFY_DONE;
370
352 /* ARP filtering is only supported in managed mode */ 371 /* ARP filtering is only supported in managed mode */
353 if (sdata->vif.type != NL80211_IFTYPE_STATION) 372 if (sdata->vif.type != NL80211_IFTYPE_STATION)
354 return NOTIFY_DONE; 373 return NOTIFY_DONE;
355 374
356 idev = sdata->dev->ip_ptr; 375 idev = __in_dev_get_rtnl(sdata->dev);
357 if (!idev) 376 if (!idev)
358 return NOTIFY_DONE; 377 return NOTIFY_DONE;
359 378
@@ -390,6 +409,80 @@ static int ieee80211_ifa_changed(struct notifier_block *nb,
390} 409}
391#endif 410#endif
392 411
412static int ieee80211_napi_poll(struct napi_struct *napi, int budget)
413{
414 struct ieee80211_local *local =
415 container_of(napi, struct ieee80211_local, napi);
416
417 return local->ops->napi_poll(&local->hw, budget);
418}
419
420void ieee80211_napi_schedule(struct ieee80211_hw *hw)
421{
422 struct ieee80211_local *local = hw_to_local(hw);
423
424 napi_schedule(&local->napi);
425}
426EXPORT_SYMBOL(ieee80211_napi_schedule);
427
428void ieee80211_napi_complete(struct ieee80211_hw *hw)
429{
430 struct ieee80211_local *local = hw_to_local(hw);
431
432 napi_complete(&local->napi);
433}
434EXPORT_SYMBOL(ieee80211_napi_complete);
435
436/* There isn't a lot of sense in it, but you can transmit anything you like */
437static const struct ieee80211_txrx_stypes
438ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = {
439 [NL80211_IFTYPE_ADHOC] = {
440 .tx = 0xffff,
441 .rx = BIT(IEEE80211_STYPE_ACTION >> 4),
442 },
443 [NL80211_IFTYPE_STATION] = {
444 .tx = 0xffff,
445 .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
446 BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
447 },
448 [NL80211_IFTYPE_AP] = {
449 .tx = 0xffff,
450 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
451 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
452 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
453 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
454 BIT(IEEE80211_STYPE_AUTH >> 4) |
455 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
456 BIT(IEEE80211_STYPE_ACTION >> 4),
457 },
458 [NL80211_IFTYPE_AP_VLAN] = {
459 /* copy AP */
460 .tx = 0xffff,
461 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
462 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
463 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
464 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
465 BIT(IEEE80211_STYPE_AUTH >> 4) |
466 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
467 BIT(IEEE80211_STYPE_ACTION >> 4),
468 },
469 [NL80211_IFTYPE_P2P_CLIENT] = {
470 .tx = 0xffff,
471 .rx = BIT(IEEE80211_STYPE_ACTION >> 4) |
472 BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
473 },
474 [NL80211_IFTYPE_P2P_GO] = {
475 .tx = 0xffff,
476 .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) |
477 BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
478 BIT(IEEE80211_STYPE_PROBE_REQ >> 4) |
479 BIT(IEEE80211_STYPE_DISASSOC >> 4) |
480 BIT(IEEE80211_STYPE_AUTH >> 4) |
481 BIT(IEEE80211_STYPE_DEAUTH >> 4) |
482 BIT(IEEE80211_STYPE_ACTION >> 4),
483 },
484};
485
393struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, 486struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
394 const struct ieee80211_ops *ops) 487 const struct ieee80211_ops *ops)
395{ 488{
@@ -419,6 +512,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
419 if (!wiphy) 512 if (!wiphy)
420 return NULL; 513 return NULL;
421 514
515 wiphy->mgmt_stypes = ieee80211_default_mgmt_stypes;
516
422 wiphy->flags |= WIPHY_FLAG_NETNS_OK | 517 wiphy->flags |= WIPHY_FLAG_NETNS_OK |
423 WIPHY_FLAG_4ADDR_AP | 518 WIPHY_FLAG_4ADDR_AP |
424 WIPHY_FLAG_4ADDR_STATION; 519 WIPHY_FLAG_4ADDR_STATION;
@@ -444,6 +539,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
444 /* set up some defaults */ 539 /* set up some defaults */
445 local->hw.queues = 1; 540 local->hw.queues = 1;
446 local->hw.max_rates = 1; 541 local->hw.max_rates = 1;
542 local->hw.max_report_rates = 0;
447 local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; 543 local->hw.conf.long_frame_max_tx_count = wiphy->retry_long;
448 local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; 544 local->hw.conf.short_frame_max_tx_count = wiphy->retry_short;
449 local->user_power_level = -1; 545 local->user_power_level = -1;
@@ -455,7 +551,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
455 __hw_addr_init(&local->mc_list); 551 __hw_addr_init(&local->mc_list);
456 552
457 mutex_init(&local->iflist_mtx); 553 mutex_init(&local->iflist_mtx);
458 mutex_init(&local->scan_mtx); 554 mutex_init(&local->mtx);
459 555
460 mutex_init(&local->key_mtx); 556 mutex_init(&local->key_mtx);
461 spin_lock_init(&local->filter_lock); 557 spin_lock_init(&local->filter_lock);
@@ -494,6 +590,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
494 skb_queue_head_init(&local->skb_queue); 590 skb_queue_head_init(&local->skb_queue);
495 skb_queue_head_init(&local->skb_queue_unreliable); 591 skb_queue_head_init(&local->skb_queue_unreliable);
496 592
593 /* init dummy netdev for use w/ NAPI */
594 init_dummy_netdev(&local->napi_dev);
595
497 return local_to_hw(local); 596 return local_to_hw(local);
498} 597}
499EXPORT_SYMBOL(ieee80211_alloc_hw); 598EXPORT_SYMBOL(ieee80211_alloc_hw);
@@ -506,6 +605,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
506 int channels, max_bitrates; 605 int channels, max_bitrates;
507 bool supp_ht; 606 bool supp_ht;
508 static const u32 cipher_suites[] = { 607 static const u32 cipher_suites[] = {
608 /* keep WEP first, it may be removed below */
509 WLAN_CIPHER_SUITE_WEP40, 609 WLAN_CIPHER_SUITE_WEP40,
510 WLAN_CIPHER_SUITE_WEP104, 610 WLAN_CIPHER_SUITE_WEP104,
511 WLAN_CIPHER_SUITE_TKIP, 611 WLAN_CIPHER_SUITE_TKIP,
@@ -515,6 +615,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
515 WLAN_CIPHER_SUITE_AES_CMAC 615 WLAN_CIPHER_SUITE_AES_CMAC
516 }; 616 };
517 617
618 if (hw->max_report_rates == 0)
619 hw->max_report_rates = hw->max_rates;
620
518 /* 621 /*
519 * generic code guarantees at least one band, 622 * generic code guarantees at least one band,
520 * set this very early because much code assumes 623 * set this very early because much code assumes
@@ -554,6 +657,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
554 /* mac80211 always supports monitor */ 657 /* mac80211 always supports monitor */
555 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); 658 local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR);
556 659
660#ifndef CONFIG_MAC80211_MESH
661 /* mesh depends on Kconfig, but drivers should set it if they want */
662 local->hw.wiphy->interface_modes &= ~BIT(NL80211_IFTYPE_MESH_POINT);
663#endif
664
665 /* mac80211 supports control port protocol changing */
666 local->hw.wiphy->flags |= WIPHY_FLAG_CONTROL_PORT_PROTOCOL;
667
557 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) 668 if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
558 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; 669 local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM;
559 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) 670 else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
@@ -589,10 +700,41 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
589 if (local->hw.wiphy->max_scan_ie_len) 700 if (local->hw.wiphy->max_scan_ie_len)
590 local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len; 701 local->hw.wiphy->max_scan_ie_len -= local->scan_ies_len;
591 702
592 local->hw.wiphy->cipher_suites = cipher_suites; 703 /* Set up cipher suites unless driver already did */
593 local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites); 704 if (!local->hw.wiphy->cipher_suites) {
594 if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE)) 705 local->hw.wiphy->cipher_suites = cipher_suites;
595 local->hw.wiphy->n_cipher_suites--; 706 local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
707 if (!(local->hw.flags & IEEE80211_HW_MFP_CAPABLE))
708 local->hw.wiphy->n_cipher_suites--;
709 }
710 if (IS_ERR(local->wep_tx_tfm) || IS_ERR(local->wep_rx_tfm)) {
711 if (local->hw.wiphy->cipher_suites == cipher_suites) {
712 local->hw.wiphy->cipher_suites += 2;
713 local->hw.wiphy->n_cipher_suites -= 2;
714 } else {
715 u32 *suites;
716 int r, w = 0;
717
718 /* Filter out WEP */
719
720 suites = kmemdup(
721 local->hw.wiphy->cipher_suites,
722 sizeof(u32) * local->hw.wiphy->n_cipher_suites,
723 GFP_KERNEL);
724 if (!suites)
725 return -ENOMEM;
726 for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) {
727 u32 suite = local->hw.wiphy->cipher_suites[r];
728 if (suite == WLAN_CIPHER_SUITE_WEP40 ||
729 suite == WLAN_CIPHER_SUITE_WEP104)
730 continue;
731 suites[w++] = suite;
732 }
733 local->hw.wiphy->cipher_suites = suites;
734 local->hw.wiphy->n_cipher_suites = w;
735 local->wiphy_ciphers_allocated = true;
736 }
737 }
596 738
597 result = wiphy_register(local->hw.wiphy); 739 result = wiphy_register(local->hw.wiphy);
598 if (result < 0) 740 if (result < 0)
@@ -641,16 +783,16 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
641 783
642 result = ieee80211_wep_init(local); 784 result = ieee80211_wep_init(local);
643 if (result < 0) 785 if (result < 0)
644 printk(KERN_DEBUG "%s: Failed to initialize wep: %d\n", 786 wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n",
645 wiphy_name(local->hw.wiphy), result); 787 result);
646 788
647 rtnl_lock(); 789 rtnl_lock();
648 790
649 result = ieee80211_init_rate_ctrl_alg(local, 791 result = ieee80211_init_rate_ctrl_alg(local,
650 hw->rate_control_algorithm); 792 hw->rate_control_algorithm);
651 if (result < 0) { 793 if (result < 0) {
652 printk(KERN_DEBUG "%s: Failed to initialize rate control " 794 wiphy_debug(local->hw.wiphy,
653 "algorithm\n", wiphy_name(local->hw.wiphy)); 795 "Failed to initialize rate control algorithm\n");
654 goto fail_rate; 796 goto fail_rate;
655 } 797 }
656 798
@@ -659,8 +801,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
659 result = ieee80211_if_add(local, "wlan%d", NULL, 801 result = ieee80211_if_add(local, "wlan%d", NULL,
660 NL80211_IFTYPE_STATION, NULL); 802 NL80211_IFTYPE_STATION, NULL);
661 if (result) 803 if (result)
662 printk(KERN_WARNING "%s: Failed to add default virtual iface\n", 804 wiphy_warn(local->hw.wiphy,
663 wiphy_name(local->hw.wiphy)); 805 "Failed to add default virtual iface\n");
664 } 806 }
665 807
666 rtnl_unlock(); 808 rtnl_unlock();
@@ -683,6 +825,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
683 goto fail_ifa; 825 goto fail_ifa;
684#endif 826#endif
685 827
828 netif_napi_add(&local->napi_dev, &local->napi, ieee80211_napi_poll,
829 local->hw.napi_weight);
830
686 return 0; 831 return 0;
687 832
688#ifdef CONFIG_INET 833#ifdef CONFIG_INET
@@ -703,6 +848,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
703 fail_workqueue: 848 fail_workqueue:
704 wiphy_unregister(local->hw.wiphy); 849 wiphy_unregister(local->hw.wiphy);
705 fail_wiphy_register: 850 fail_wiphy_register:
851 if (local->wiphy_ciphers_allocated)
852 kfree(local->hw.wiphy->cipher_suites);
706 kfree(local->int_scan_req); 853 kfree(local->int_scan_req);
707 return result; 854 return result;
708} 855}
@@ -738,6 +885,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
738 */ 885 */
739 del_timer_sync(&local->work_timer); 886 del_timer_sync(&local->work_timer);
740 887
888 cancel_work_sync(&local->restart_work);
741 cancel_work_sync(&local->reconfig_filter); 889 cancel_work_sync(&local->reconfig_filter);
742 890
743 ieee80211_clear_tx_pending(local); 891 ieee80211_clear_tx_pending(local);
@@ -746,8 +894,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
746 894
747 if (skb_queue_len(&local->skb_queue) || 895 if (skb_queue_len(&local->skb_queue) ||
748 skb_queue_len(&local->skb_queue_unreliable)) 896 skb_queue_len(&local->skb_queue_unreliable))
749 printk(KERN_WARNING "%s: skb_queue not empty\n", 897 wiphy_warn(local->hw.wiphy, "skb_queue not empty\n");
750 wiphy_name(local->hw.wiphy));
751 skb_queue_purge(&local->skb_queue); 898 skb_queue_purge(&local->skb_queue);
752 skb_queue_purge(&local->skb_queue_unreliable); 899 skb_queue_purge(&local->skb_queue_unreliable);
753 900
@@ -764,7 +911,10 @@ void ieee80211_free_hw(struct ieee80211_hw *hw)
764 struct ieee80211_local *local = hw_to_local(hw); 911 struct ieee80211_local *local = hw_to_local(hw);
765 912
766 mutex_destroy(&local->iflist_mtx); 913 mutex_destroy(&local->iflist_mtx);
767 mutex_destroy(&local->scan_mtx); 914 mutex_destroy(&local->mtx);
915
916 if (local->wiphy_ciphers_allocated)
917 kfree(local->hw.wiphy->cipher_suites);
768 918
769 wiphy_free(local->hw.wiphy); 919 wiphy_free(local->hw.wiphy);
770} 920}
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index ea13a80a476c..1c91f0f3c307 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -412,7 +412,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
412 enum plink_event event; 412 enum plink_event event;
413 enum plink_frame_type ftype; 413 enum plink_frame_type ftype;
414 size_t baselen; 414 size_t baselen;
415 bool deactivated; 415 bool deactivated, matches_local = true;
416 u8 ie_len; 416 u8 ie_len;
417 u8 *baseaddr; 417 u8 *baseaddr;
418 __le16 plid, llid, reason; 418 __le16 plid, llid, reason;
@@ -487,6 +487,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
487 /* Now we will figure out the appropriate event... */ 487 /* Now we will figure out the appropriate event... */
488 event = PLINK_UNDEFINED; 488 event = PLINK_UNDEFINED;
489 if (ftype != PLINK_CLOSE && (!mesh_matches_local(&elems, sdata))) { 489 if (ftype != PLINK_CLOSE && (!mesh_matches_local(&elems, sdata))) {
490 matches_local = false;
490 switch (ftype) { 491 switch (ftype) {
491 case PLINK_OPEN: 492 case PLINK_OPEN:
492 event = OPN_RJCT; 493 event = OPN_RJCT;
@@ -498,7 +499,15 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
498 /* avoid warning */ 499 /* avoid warning */
499 break; 500 break;
500 } 501 }
501 spin_lock_bh(&sta->lock); 502 }
503
504 if (!sta && !matches_local) {
505 rcu_read_unlock();
506 reason = cpu_to_le16(MESH_CAPABILITY_POLICY_VIOLATION);
507 llid = 0;
508 mesh_plink_frame_tx(sdata, PLINK_CLOSE, mgmt->sa, llid,
509 plid, reason);
510 return;
502 } else if (!sta) { 511 } else if (!sta) {
503 /* ftype == PLINK_OPEN */ 512 /* ftype == PLINK_OPEN */
504 u32 rates; 513 u32 rates;
@@ -522,7 +531,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
522 } 531 }
523 event = OPN_ACPT; 532 event = OPN_ACPT;
524 spin_lock_bh(&sta->lock); 533 spin_lock_bh(&sta->lock);
525 } else { 534 } else if (matches_local) {
526 spin_lock_bh(&sta->lock); 535 spin_lock_bh(&sta->lock);
527 switch (ftype) { 536 switch (ftype) {
528 case PLINK_OPEN: 537 case PLINK_OPEN:
@@ -564,6 +573,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m
564 rcu_read_unlock(); 573 rcu_read_unlock();
565 return; 574 return;
566 } 575 }
576 } else {
577 spin_lock_bh(&sta->lock);
567 } 578 }
568 579
569 mpl_dbg("Mesh plink (peer, state, llid, plid, event): %pM %s %d %d %d\n", 580 mpl_dbg("Mesh plink (peer, state, llid, plid, event): %pM %s %d %d %d\n",
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b6c163ac22da..a3a9421555af 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -54,6 +54,12 @@
54 */ 54 */
55#define IEEE80211_SIGNAL_AVE_WEIGHT 3 55#define IEEE80211_SIGNAL_AVE_WEIGHT 3
56 56
57/*
58 * How many Beacon frames need to have been used in average signal strength
59 * before starting to indicate signal change events.
60 */
61#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4
62
57#define TMR_RUNNING_TIMER 0 63#define TMR_RUNNING_TIMER 0
58#define TMR_RUNNING_CHANSW 1 64#define TMR_RUNNING_CHANSW 1
59 65
@@ -86,7 +92,7 @@ enum rx_mgmt_action {
86/* utils */ 92/* utils */
87static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd) 93static inline void ASSERT_MGD_MTX(struct ieee80211_if_managed *ifmgd)
88{ 94{
89 WARN_ON(!mutex_is_locked(&ifmgd->mtx)); 95 lockdep_assert_held(&ifmgd->mtx);
90} 96}
91 97
92/* 98/*
@@ -109,7 +115,7 @@ static void run_again(struct ieee80211_if_managed *ifmgd,
109 mod_timer(&ifmgd->timer, timeout); 115 mod_timer(&ifmgd->timer, timeout);
110} 116}
111 117
112static void mod_beacon_timer(struct ieee80211_sub_if_data *sdata) 118void ieee80211_sta_reset_beacon_monitor(struct ieee80211_sub_if_data *sdata)
113{ 119{
114 if (sdata->local->hw.flags & IEEE80211_HW_BEACON_FILTER) 120 if (sdata->local->hw.flags & IEEE80211_HW_BEACON_FILTER)
115 return; 121 return;
@@ -118,6 +124,19 @@ static void mod_beacon_timer(struct ieee80211_sub_if_data *sdata)
118 round_jiffies_up(jiffies + IEEE80211_BEACON_LOSS_TIME)); 124 round_jiffies_up(jiffies + IEEE80211_BEACON_LOSS_TIME));
119} 125}
120 126
127void ieee80211_sta_reset_conn_monitor(struct ieee80211_sub_if_data *sdata)
128{
129 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
130
131 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
132 return;
133
134 mod_timer(&sdata->u.mgd.conn_mon_timer,
135 round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
136
137 ifmgd->probe_send_count = 0;
138}
139
121static int ecw2cw(int ecw) 140static int ecw2cw(int ecw)
122{ 141{
123 return (1 << ecw) - 1; 142 return (1 << ecw) - 1;
@@ -778,16 +797,17 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
778 params.uapsd = uapsd; 797 params.uapsd = uapsd;
779 798
780#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 799#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
781 printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d " 800 wiphy_debug(local->hw.wiphy,
782 "cWmin=%d cWmax=%d txop=%d uapsd=%d\n", 801 "WMM queue=%d aci=%d acm=%d aifs=%d "
783 wiphy_name(local->hw.wiphy), queue, aci, acm, 802 "cWmin=%d cWmax=%d txop=%d uapsd=%d\n",
784 params.aifs, params.cw_min, params.cw_max, params.txop, 803 queue, aci, acm,
785 params.uapsd); 804 params.aifs, params.cw_min, params.cw_max,
805 params.txop, params.uapsd);
786#endif 806#endif
787 if (drv_conf_tx(local, queue, &params)) 807 if (drv_conf_tx(local, queue, &params))
788 printk(KERN_DEBUG "%s: failed to set TX queue " 808 wiphy_debug(local->hw.wiphy,
789 "parameters for queue %d\n", 809 "failed to set TX queue parameters for queue %d\n",
790 wiphy_name(local->hw.wiphy), queue); 810 queue);
791 } 811 }
792 812
793 /* enable WMM or activate new settings */ 813 /* enable WMM or activate new settings */
@@ -860,14 +880,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
860 sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL | 880 sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL |
861 IEEE80211_STA_BEACON_POLL); 881 IEEE80211_STA_BEACON_POLL);
862 882
863 /*
864 * Always handle WMM once after association regardless
865 * of the first value the AP uses. Setting -1 here has
866 * that effect because the AP values is an unsigned
867 * 4-bit value.
868 */
869 sdata->u.mgd.wmm_last_param_set = -1;
870
871 ieee80211_led_assoc(local, 1); 883 ieee80211_led_assoc(local, 1);
872 884
873 if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD) 885 if (local->hw.flags & IEEE80211_HW_NEED_DTIM_PERIOD)
@@ -901,7 +913,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
901 913
902 mutex_lock(&local->iflist_mtx); 914 mutex_lock(&local->iflist_mtx);
903 ieee80211_recalc_ps(local, -1); 915 ieee80211_recalc_ps(local, -1);
904 ieee80211_recalc_smps(local, sdata); 916 ieee80211_recalc_smps(local);
905 mutex_unlock(&local->iflist_mtx); 917 mutex_unlock(&local->iflist_mtx);
906 918
907 netif_tx_start_all_queues(sdata->dev); 919 netif_tx_start_all_queues(sdata->dev);
@@ -909,7 +921,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
909} 921}
910 922
911static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, 923static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
912 bool remove_sta) 924 bool remove_sta, bool tx)
913{ 925{
914 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 926 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
915 struct ieee80211_local *local = sdata->local; 927 struct ieee80211_local *local = sdata->local;
@@ -948,7 +960,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
948 sta = sta_info_get(sdata, bssid); 960 sta = sta_info_get(sdata, bssid);
949 if (sta) { 961 if (sta) {
950 set_sta_flags(sta, WLAN_STA_BLOCK_BA); 962 set_sta_flags(sta, WLAN_STA_BLOCK_BA);
951 ieee80211_sta_tear_down_BA_sessions(sta); 963 ieee80211_sta_tear_down_BA_sessions(sta, tx);
952 } 964 }
953 mutex_unlock(&local->sta_mtx); 965 mutex_unlock(&local->sta_mtx);
954 966
@@ -990,6 +1002,11 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
990 1002
991 if (remove_sta) 1003 if (remove_sta)
992 sta_info_destroy_addr(sdata, bssid); 1004 sta_info_destroy_addr(sdata, bssid);
1005
1006 del_timer_sync(&sdata->u.mgd.conn_mon_timer);
1007 del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
1008 del_timer_sync(&sdata->u.mgd.timer);
1009 del_timer_sync(&sdata->u.mgd.chswitch_timer);
993} 1010}
994 1011
995void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, 1012void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
@@ -1006,21 +1023,26 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
1006 if (is_multicast_ether_addr(hdr->addr1)) 1023 if (is_multicast_ether_addr(hdr->addr1))
1007 return; 1024 return;
1008 1025
1009 if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR) 1026 ieee80211_sta_reset_conn_monitor(sdata);
1010 return;
1011
1012 mod_timer(&sdata->u.mgd.conn_mon_timer,
1013 round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
1014} 1027}
1015 1028
1016static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) 1029static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
1017{ 1030{
1018 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; 1031 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
1019 const u8 *ssid; 1032 const u8 *ssid;
1033 u8 *dst = ifmgd->associated->bssid;
1034 u8 unicast_limit = max(1, IEEE80211_MAX_PROBE_TRIES - 3);
1035
1036 /*
1037 * Try sending broadcast probe requests for the last three
1038 * probe requests after the first ones failed since some
1039 * buggy APs only support broadcast probe requests.
1040 */
1041 if (ifmgd->probe_send_count >= unicast_limit)
1042 dst = NULL;
1020 1043
1021 ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID); 1044 ssid = ieee80211_bss_get_ie(ifmgd->associated, WLAN_EID_SSID);
1022 ieee80211_send_probe_req(sdata, ifmgd->associated->bssid, 1045 ieee80211_send_probe_req(sdata, dst, ssid + 2, ssid[1], NULL, 0);
1023 ssid + 2, ssid[1], NULL, 0);
1024 1046
1025 ifmgd->probe_send_count++; 1047 ifmgd->probe_send_count++;
1026 ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT; 1048 ifmgd->probe_timeout = jiffies + IEEE80211_PROBE_WAIT;
@@ -1102,9 +1124,12 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
1102 1124
1103 printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid); 1125 printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid);
1104 1126
1105 ieee80211_set_disassoc(sdata, true); 1127 ieee80211_set_disassoc(sdata, true, true);
1106 ieee80211_recalc_idle(local);
1107 mutex_unlock(&ifmgd->mtx); 1128 mutex_unlock(&ifmgd->mtx);
1129
1130 mutex_lock(&local->mtx);
1131 ieee80211_recalc_idle(local);
1132 mutex_unlock(&local->mtx);
1108 /* 1133 /*
1109 * must be outside lock due to cfg80211, 1134 * must be outside lock due to cfg80211,
1110 * but that's not a problem. 1135 * but that's not a problem.
@@ -1172,8 +1197,10 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
1172 printk(KERN_DEBUG "%s: deauthenticated from %pM (Reason: %u)\n", 1197 printk(KERN_DEBUG "%s: deauthenticated from %pM (Reason: %u)\n",
1173 sdata->name, bssid, reason_code); 1198 sdata->name, bssid, reason_code);
1174 1199
1175 ieee80211_set_disassoc(sdata, true); 1200 ieee80211_set_disassoc(sdata, true, false);
1201 mutex_lock(&sdata->local->mtx);
1176 ieee80211_recalc_idle(sdata->local); 1202 ieee80211_recalc_idle(sdata->local);
1203 mutex_unlock(&sdata->local->mtx);
1177 1204
1178 return RX_MGMT_CFG80211_DEAUTH; 1205 return RX_MGMT_CFG80211_DEAUTH;
1179} 1206}
@@ -1202,8 +1229,10 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
1202 printk(KERN_DEBUG "%s: disassociated from %pM (Reason: %u)\n", 1229 printk(KERN_DEBUG "%s: disassociated from %pM (Reason: %u)\n",
1203 sdata->name, mgmt->sa, reason_code); 1230 sdata->name, mgmt->sa, reason_code);
1204 1231
1205 ieee80211_set_disassoc(sdata, true); 1232 ieee80211_set_disassoc(sdata, true, false);
1233 mutex_lock(&sdata->local->mtx);
1206 ieee80211_recalc_idle(sdata->local); 1234 ieee80211_recalc_idle(sdata->local);
1235 mutex_unlock(&sdata->local->mtx);
1207 return RX_MGMT_CFG80211_DISASSOC; 1236 return RX_MGMT_CFG80211_DISASSOC;
1208} 1237}
1209 1238
@@ -1262,7 +1291,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
1262 1291
1263 rates = 0; 1292 rates = 0;
1264 basic_rates = 0; 1293 basic_rates = 0;
1265 sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; 1294 sband = local->hw.wiphy->bands[wk->chan->band];
1266 1295
1267 for (i = 0; i < elems.supp_rates_len; i++) { 1296 for (i = 0; i < elems.supp_rates_len; i++) {
1268 int rate = (elems.supp_rates[i] & 0x7f) * 5; 1297 int rate = (elems.supp_rates[i] & 0x7f) * 5;
@@ -1298,11 +1327,11 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
1298 } 1327 }
1299 } 1328 }
1300 1329
1301 sta->sta.supp_rates[local->hw.conf.channel->band] = rates; 1330 sta->sta.supp_rates[wk->chan->band] = rates;
1302 sdata->vif.bss_conf.basic_rates = basic_rates; 1331 sdata->vif.bss_conf.basic_rates = basic_rates;
1303 1332
1304 /* cf. IEEE 802.11 9.2.12 */ 1333 /* cf. IEEE 802.11 9.2.12 */
1305 if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && 1334 if (wk->chan->band == IEEE80211_BAND_2GHZ &&
1306 have_higher_than_11mbit) 1335 have_higher_than_11mbit)
1307 sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; 1336 sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE;
1308 else 1337 else
@@ -1330,6 +1359,14 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
1330 return false; 1359 return false;
1331 } 1360 }
1332 1361
1362 /*
1363 * Always handle WMM once after association regardless
1364 * of the first value the AP uses. Setting -1 here has
1365 * that effect because the AP values is an unsigned
1366 * 4-bit value.
1367 */
1368 ifmgd->wmm_last_param_set = -1;
1369
1333 if (elems.wmm_param) 1370 if (elems.wmm_param)
1334 ieee80211_sta_wmm_params(local, sdata, elems.wmm_param, 1371 ieee80211_sta_wmm_params(local, sdata, elems.wmm_param,
1335 elems.wmm_param_len); 1372 elems.wmm_param_len);
@@ -1362,7 +1399,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
1362 * Also start the timer that will detect beacon loss. 1399 * Also start the timer that will detect beacon loss.
1363 */ 1400 */
1364 ieee80211_sta_rx_notify(sdata, (struct ieee80211_hdr *)mgmt); 1401 ieee80211_sta_rx_notify(sdata, (struct ieee80211_hdr *)mgmt);
1365 mod_beacon_timer(sdata); 1402 ieee80211_sta_reset_beacon_monitor(sdata);
1366 1403
1367 return true; 1404 return true;
1368} 1405}
@@ -1465,7 +1502,7 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
1465 * we have or will be receiving any beacons or data, so let's 1502 * we have or will be receiving any beacons or data, so let's
1466 * schedule the timers again, just in case. 1503 * schedule the timers again, just in case.
1467 */ 1504 */
1468 mod_beacon_timer(sdata); 1505 ieee80211_sta_reset_beacon_monitor(sdata);
1469 1506
1470 mod_timer(&ifmgd->conn_mon_timer, 1507 mod_timer(&ifmgd->conn_mon_timer,
1471 round_jiffies_up(jiffies + 1508 round_jiffies_up(jiffies +
@@ -1540,15 +1577,18 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1540 ifmgd->last_beacon_signal = rx_status->signal; 1577 ifmgd->last_beacon_signal = rx_status->signal;
1541 if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) { 1578 if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
1542 ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE; 1579 ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
1543 ifmgd->ave_beacon_signal = rx_status->signal; 1580 ifmgd->ave_beacon_signal = rx_status->signal * 16;
1544 ifmgd->last_cqm_event_signal = 0; 1581 ifmgd->last_cqm_event_signal = 0;
1582 ifmgd->count_beacon_signal = 1;
1545 } else { 1583 } else {
1546 ifmgd->ave_beacon_signal = 1584 ifmgd->ave_beacon_signal =
1547 (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 + 1585 (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 +
1548 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) * 1586 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) *
1549 ifmgd->ave_beacon_signal) / 16; 1587 ifmgd->ave_beacon_signal) / 16;
1588 ifmgd->count_beacon_signal++;
1550 } 1589 }
1551 if (bss_conf->cqm_rssi_thold && 1590 if (bss_conf->cqm_rssi_thold &&
1591 ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT &&
1552 !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) { 1592 !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) {
1553 int sig = ifmgd->ave_beacon_signal / 16; 1593 int sig = ifmgd->ave_beacon_signal / 16;
1554 int last_event = ifmgd->last_cqm_event_signal; 1594 int last_event = ifmgd->last_cqm_event_signal;
@@ -1588,7 +1628,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1588 * Push the beacon loss detection into the future since 1628 * Push the beacon loss detection into the future since
1589 * we are processing a beacon from the AP just now. 1629 * we are processing a beacon from the AP just now.
1590 */ 1630 */
1591 mod_beacon_timer(sdata); 1631 ieee80211_sta_reset_beacon_monitor(sdata);
1592 1632
1593 ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4); 1633 ncrc = crc32_be(0, (void *)&mgmt->u.beacon.beacon_int, 4);
1594 ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable, 1634 ncrc = ieee802_11_parse_elems_crc(mgmt->u.beacon.variable,
@@ -1599,7 +1639,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1599 directed_tim = ieee80211_check_tim(elems.tim, elems.tim_len, 1639 directed_tim = ieee80211_check_tim(elems.tim, elems.tim_len,
1600 ifmgd->aid); 1640 ifmgd->aid);
1601 1641
1602 if (ncrc != ifmgd->beacon_crc) { 1642 if (ncrc != ifmgd->beacon_crc || !ifmgd->beacon_crc_valid) {
1603 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, 1643 ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems,
1604 true); 1644 true);
1605 1645
@@ -1630,9 +1670,10 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
1630 } 1670 }
1631 } 1671 }
1632 1672
1633 if (ncrc == ifmgd->beacon_crc) 1673 if (ncrc == ifmgd->beacon_crc && ifmgd->beacon_crc_valid)
1634 return; 1674 return;
1635 ifmgd->beacon_crc = ncrc; 1675 ifmgd->beacon_crc = ncrc;
1676 ifmgd->beacon_crc_valid = true;
1636 1677
1637 if (elems.erp_info && elems.erp_info_len >= 1) { 1678 if (elems.erp_info && elems.erp_info_len >= 1) {
1638 erp_valid = true; 1679 erp_valid = true;
@@ -1751,7 +1792,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1751 struct ieee80211_local *local = sdata->local; 1792 struct ieee80211_local *local = sdata->local;
1752 struct ieee80211_work *wk; 1793 struct ieee80211_work *wk;
1753 1794
1754 mutex_lock(&local->work_mtx); 1795 mutex_lock(&local->mtx);
1755 list_for_each_entry(wk, &local->work_list, list) { 1796 list_for_each_entry(wk, &local->work_list, list) {
1756 if (wk->sdata != sdata) 1797 if (wk->sdata != sdata)
1757 continue; 1798 continue;
@@ -1783,7 +1824,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
1783 free_work(wk); 1824 free_work(wk);
1784 break; 1825 break;
1785 } 1826 }
1786 mutex_unlock(&local->work_mtx); 1827 mutex_unlock(&local->mtx);
1787 1828
1788 cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len); 1829 cfg80211_send_deauth(sdata->dev, (u8 *)mgmt, skb->len);
1789 } 1830 }
@@ -1823,10 +1864,12 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
1823 1864
1824 else if (ifmgd->probe_send_count < IEEE80211_MAX_PROBE_TRIES) { 1865 else if (ifmgd->probe_send_count < IEEE80211_MAX_PROBE_TRIES) {
1825#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1866#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
1826 printk(KERN_DEBUG "No probe response from AP %pM" 1867 wiphy_debug(local->hw.wiphy,
1827 " after %dms, try %d\n", bssid, 1868 "%s: No probe response from AP %pM"
1828 (1000 * IEEE80211_PROBE_WAIT)/HZ, 1869 " after %dms, try %d\n",
1829 ifmgd->probe_send_count); 1870 sdata->name,
1871 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ,
1872 ifmgd->probe_send_count);
1830#endif 1873#endif
1831 ieee80211_mgd_probe_ap_send(sdata); 1874 ieee80211_mgd_probe_ap_send(sdata);
1832 } else { 1875 } else {
@@ -1836,12 +1879,16 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
1836 */ 1879 */
1837 ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL | 1880 ifmgd->flags &= ~(IEEE80211_STA_CONNECTION_POLL |
1838 IEEE80211_STA_BEACON_POLL); 1881 IEEE80211_STA_BEACON_POLL);
1839 printk(KERN_DEBUG "No probe response from AP %pM" 1882 wiphy_debug(local->hw.wiphy,
1840 " after %dms, disconnecting.\n", 1883 "%s: No probe response from AP %pM"
1841 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); 1884 " after %dms, disconnecting.\n",
1842 ieee80211_set_disassoc(sdata, true); 1885 sdata->name,
1843 ieee80211_recalc_idle(local); 1886 bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
1887 ieee80211_set_disassoc(sdata, true, true);
1844 mutex_unlock(&ifmgd->mtx); 1888 mutex_unlock(&ifmgd->mtx);
1889 mutex_lock(&local->mtx);
1890 ieee80211_recalc_idle(local);
1891 mutex_unlock(&local->mtx);
1845 /* 1892 /*
1846 * must be outside lock due to cfg80211, 1893 * must be outside lock due to cfg80211,
1847 * but that's not a problem. 1894 * but that's not a problem.
@@ -1917,6 +1964,8 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
1917 * time -- the code here is properly synchronised. 1964 * time -- the code here is properly synchronised.
1918 */ 1965 */
1919 1966
1967 cancel_work_sync(&ifmgd->request_smps_work);
1968
1920 cancel_work_sync(&ifmgd->beacon_connection_loss_work); 1969 cancel_work_sync(&ifmgd->beacon_connection_loss_work);
1921 if (del_timer_sync(&ifmgd->timer)) 1970 if (del_timer_sync(&ifmgd->timer))
1922 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); 1971 set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
@@ -1952,6 +2001,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
1952 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); 2001 INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
1953 INIT_WORK(&ifmgd->beacon_connection_loss_work, 2002 INIT_WORK(&ifmgd->beacon_connection_loss_work,
1954 ieee80211_beacon_connection_loss_work); 2003 ieee80211_beacon_connection_loss_work);
2004 INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_work);
1955 setup_timer(&ifmgd->timer, ieee80211_sta_timer, 2005 setup_timer(&ifmgd->timer, ieee80211_sta_timer,
1956 (unsigned long) sdata); 2006 (unsigned long) sdata);
1957 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, 2007 setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer,
@@ -2158,7 +2208,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
2158 } 2208 }
2159 2209
2160 /* Trying to reassociate - clear previous association state */ 2210 /* Trying to reassociate - clear previous association state */
2161 ieee80211_set_disassoc(sdata, true); 2211 ieee80211_set_disassoc(sdata, true, false);
2162 } 2212 }
2163 mutex_unlock(&ifmgd->mtx); 2213 mutex_unlock(&ifmgd->mtx);
2164 2214
@@ -2169,6 +2219,8 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
2169 ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N; 2219 ifmgd->flags &= ~IEEE80211_STA_DISABLE_11N;
2170 ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED; 2220 ifmgd->flags &= ~IEEE80211_STA_NULLFUNC_ACKED;
2171 2221
2222 ifmgd->beacon_crc_valid = false;
2223
2172 for (i = 0; i < req->crypto.n_ciphers_pairwise; i++) 2224 for (i = 0; i < req->crypto.n_ciphers_pairwise; i++)
2173 if (req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP40 || 2225 if (req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_WEP40 ||
2174 req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP || 2226 req->crypto.ciphers_pairwise[i] == WLAN_CIPHER_SUITE_TKIP ||
@@ -2249,6 +2301,9 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
2249 else 2301 else
2250 ifmgd->flags &= ~IEEE80211_STA_CONTROL_PORT; 2302 ifmgd->flags &= ~IEEE80211_STA_CONTROL_PORT;
2251 2303
2304 sdata->control_port_protocol = req->crypto.control_port_ethertype;
2305 sdata->control_port_no_encrypt = req->crypto.control_port_no_encrypt;
2306
2252 ieee80211_add_work(wk); 2307 ieee80211_add_work(wk);
2253 return 0; 2308 return 0;
2254} 2309}
@@ -2267,7 +2322,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2267 2322
2268 memcpy(bssid, req->bss->bssid, ETH_ALEN); 2323 memcpy(bssid, req->bss->bssid, ETH_ALEN);
2269 if (ifmgd->associated == req->bss) { 2324 if (ifmgd->associated == req->bss) {
2270 ieee80211_set_disassoc(sdata, false); 2325 ieee80211_set_disassoc(sdata, false, true);
2271 mutex_unlock(&ifmgd->mtx); 2326 mutex_unlock(&ifmgd->mtx);
2272 assoc_bss = true; 2327 assoc_bss = true;
2273 } else { 2328 } else {
@@ -2275,7 +2330,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2275 2330
2276 mutex_unlock(&ifmgd->mtx); 2331 mutex_unlock(&ifmgd->mtx);
2277 2332
2278 mutex_lock(&local->work_mtx); 2333 mutex_lock(&local->mtx);
2279 list_for_each_entry(wk, &local->work_list, list) { 2334 list_for_each_entry(wk, &local->work_list, list) {
2280 if (wk->sdata != sdata) 2335 if (wk->sdata != sdata)
2281 continue; 2336 continue;
@@ -2294,7 +2349,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2294 free_work(wk); 2349 free_work(wk);
2295 break; 2350 break;
2296 } 2351 }
2297 mutex_unlock(&local->work_mtx); 2352 mutex_unlock(&local->mtx);
2298 2353
2299 /* 2354 /*
2300 * If somebody requests authentication and we haven't 2355 * If somebody requests authentication and we haven't
@@ -2319,7 +2374,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
2319 if (assoc_bss) 2374 if (assoc_bss)
2320 sta_info_destroy_addr(sdata, bssid); 2375 sta_info_destroy_addr(sdata, bssid);
2321 2376
2377 mutex_lock(&sdata->local->mtx);
2322 ieee80211_recalc_idle(sdata->local); 2378 ieee80211_recalc_idle(sdata->local);
2379 mutex_unlock(&sdata->local->mtx);
2323 2380
2324 return 0; 2381 return 0;
2325} 2382}
@@ -2348,7 +2405,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
2348 sdata->name, req->bss->bssid, req->reason_code); 2405 sdata->name, req->bss->bssid, req->reason_code);
2349 2406
2350 memcpy(bssid, req->bss->bssid, ETH_ALEN); 2407 memcpy(bssid, req->bss->bssid, ETH_ALEN);
2351 ieee80211_set_disassoc(sdata, false); 2408 ieee80211_set_disassoc(sdata, false, true);
2352 2409
2353 mutex_unlock(&ifmgd->mtx); 2410 mutex_unlock(&ifmgd->mtx);
2354 2411
@@ -2357,7 +2414,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
2357 cookie, !req->local_state_change); 2414 cookie, !req->local_state_change);
2358 sta_info_destroy_addr(sdata, bssid); 2415 sta_info_destroy_addr(sdata, bssid);
2359 2416
2417 mutex_lock(&sdata->local->mtx);
2360 ieee80211_recalc_idle(sdata->local); 2418 ieee80211_recalc_idle(sdata->local);
2419 mutex_unlock(&sdata->local->mtx);
2361 2420
2362 return 0; 2421 return 0;
2363} 2422}
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index c36b1911987a..4b564091e51d 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -22,12 +22,16 @@
22static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata) 22static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
23{ 23{
24 struct ieee80211_local *local = sdata->local; 24 struct ieee80211_local *local = sdata->local;
25 struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
25 26
26 local->offchannel_ps_enabled = false; 27 local->offchannel_ps_enabled = false;
27 28
28 /* FIXME: what to do when local->pspolling is true? */ 29 /* FIXME: what to do when local->pspolling is true? */
29 30
30 del_timer_sync(&local->dynamic_ps_timer); 31 del_timer_sync(&local->dynamic_ps_timer);
32 del_timer_sync(&ifmgd->bcn_mon_timer);
33 del_timer_sync(&ifmgd->conn_mon_timer);
34
31 cancel_work_sync(&local->dynamic_ps_enable_work); 35 cancel_work_sync(&local->dynamic_ps_enable_work);
32 36
33 if (local->hw.conf.flags & IEEE80211_CONF_PS) { 37 if (local->hw.conf.flags & IEEE80211_CONF_PS) {
@@ -85,6 +89,9 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
85 mod_timer(&local->dynamic_ps_timer, jiffies + 89 mod_timer(&local->dynamic_ps_timer, jiffies +
86 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); 90 msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
87 } 91 }
92
93 ieee80211_sta_reset_beacon_monitor(sdata);
94 ieee80211_sta_reset_conn_monitor(sdata);
88} 95}
89 96
90void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local) 97void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local)
@@ -112,8 +119,10 @@ void ieee80211_offchannel_stop_beaconing(struct ieee80211_local *local)
112 * used from user space controlled off-channel operations. 119 * used from user space controlled off-channel operations.
113 */ 120 */
114 if (sdata->vif.type != NL80211_IFTYPE_STATION && 121 if (sdata->vif.type != NL80211_IFTYPE_STATION &&
115 sdata->vif.type != NL80211_IFTYPE_MONITOR) 122 sdata->vif.type != NL80211_IFTYPE_MONITOR) {
123 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
116 netif_tx_stop_all_queues(sdata->dev); 124 netif_tx_stop_all_queues(sdata->dev);
125 }
117 } 126 }
118 mutex_unlock(&local->iflist_mtx); 127 mutex_unlock(&local->iflist_mtx);
119} 128}
@@ -131,6 +140,7 @@ void ieee80211_offchannel_stop_station(struct ieee80211_local *local)
131 continue; 140 continue;
132 141
133 if (sdata->vif.type == NL80211_IFTYPE_STATION) { 142 if (sdata->vif.type == NL80211_IFTYPE_STATION) {
143 set_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
134 netif_tx_stop_all_queues(sdata->dev); 144 netif_tx_stop_all_queues(sdata->dev);
135 if (sdata->u.mgd.associated) 145 if (sdata->u.mgd.associated)
136 ieee80211_offchannel_ps_enable(sdata); 146 ieee80211_offchannel_ps_enable(sdata);
@@ -155,8 +165,20 @@ void ieee80211_offchannel_return(struct ieee80211_local *local,
155 ieee80211_offchannel_ps_disable(sdata); 165 ieee80211_offchannel_ps_disable(sdata);
156 } 166 }
157 167
158 if (sdata->vif.type != NL80211_IFTYPE_MONITOR) 168 if (sdata->vif.type != NL80211_IFTYPE_MONITOR) {
169 clear_bit(SDATA_STATE_OFFCHANNEL, &sdata->state);
170 /*
171 * This may wake up queues even though the driver
172 * currently has them stopped. This is not very
173 * likely, since the driver won't have gotten any
174 * (or hardly any) new packets while we weren't
175 * on the right channel, and even if it happens
176 * it will at most lead to queueing up one more
177 * packet per queue in mac80211 rather than on
178 * the interface qdisc.
179 */
159 netif_tx_wake_all_queues(sdata->dev); 180 netif_tx_wake_all_queues(sdata->dev);
181 }
160 182
161 /* re-enable beaconing */ 183 /* re-enable beaconing */
162 if (enable_beaconing && 184 if (enable_beaconing &&
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index d287fde0431d..e37355193ed1 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -45,7 +45,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
45 list_for_each_entry(sta, &local->sta_list, list) { 45 list_for_each_entry(sta, &local->sta_list, list) {
46 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { 46 if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
47 set_sta_flags(sta, WLAN_STA_BLOCK_BA); 47 set_sta_flags(sta, WLAN_STA_BLOCK_BA);
48 ieee80211_sta_tear_down_BA_sessions(sta); 48 ieee80211_sta_tear_down_BA_sessions(sta, true);
49 } 49 }
50 50
51 if (sta->uploaded) { 51 if (sta->uploaded) {
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 334cbd3d2aae..809cf230d251 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -208,7 +208,7 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc)
208 208
209 fc = hdr->frame_control; 209 fc = hdr->frame_control;
210 210
211 return ((info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc)); 211 return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc);
212} 212}
213 213
214static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx) 214static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx)
@@ -369,8 +369,8 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
369 369
370 ref = rate_control_alloc(name, local); 370 ref = rate_control_alloc(name, local);
371 if (!ref) { 371 if (!ref) {
372 printk(KERN_WARNING "%s: Failed to select rate control " 372 wiphy_warn(local->hw.wiphy,
373 "algorithm\n", wiphy_name(local->hw.wiphy)); 373 "Failed to select rate control algorithm\n");
374 return -ENOENT; 374 return -ENOENT;
375 } 375 }
376 376
@@ -381,9 +381,8 @@ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local,
381 sta_info_flush(local, NULL); 381 sta_info_flush(local, NULL);
382 } 382 }
383 383
384 printk(KERN_DEBUG "%s: Selected rate control " 384 wiphy_debug(local->hw.wiphy, "Selected rate control algorithm '%s'\n",
385 "algorithm '%s'\n", wiphy_name(local->hw.wiphy), 385 ref->ops->name);
386 ref->ops->name);
387 386
388 return 0; 387 return 0;
389} 388}
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index c5b465904e3b..2a18d6602d4a 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -397,8 +397,9 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
397 !(info->flags & IEEE80211_TX_STAT_AMPDU)) 397 !(info->flags & IEEE80211_TX_STAT_AMPDU))
398 return; 398 return;
399 399
400 if (!info->status.ampdu_len) { 400 if (!(info->flags & IEEE80211_TX_STAT_AMPDU)) {
401 info->status.ampdu_ack_len = 1; 401 info->status.ampdu_ack_len =
402 (info->flags & IEEE80211_TX_STAT_ACK ? 1 : 0);
402 info->status.ampdu_len = 1; 403 info->status.ampdu_len = 1;
403 } 404 }
404 405
@@ -426,7 +427,7 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband,
426 group = minstrel_ht_get_group_idx(&ar[i]); 427 group = minstrel_ht_get_group_idx(&ar[i]);
427 rate = &mi->groups[group].rates[ar[i].idx % 8]; 428 rate = &mi->groups[group].rates[ar[i].idx % 8];
428 429
429 if (last && (info->flags & IEEE80211_TX_STAT_ACK)) 430 if (last)
430 rate->success += info->status.ampdu_ack_len; 431 rate->success += info->status.ampdu_ack_len;
431 432
432 rate->attempts += ar[i].count * info->status.ampdu_len; 433 rate->attempts += ar[i].count * info->status.ampdu_len;
diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
index 7905f79cc2e4..4851e9e2daed 100644
--- a/net/mac80211/rc80211_pid_debugfs.c
+++ b/net/mac80211/rc80211_pid_debugfs.c
@@ -162,7 +162,7 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
162 file_info->next_entry = (file_info->next_entry + 1) % 162 file_info->next_entry = (file_info->next_entry + 1) %
163 RC_PID_EVENT_RING_SIZE; 163 RC_PID_EVENT_RING_SIZE;
164 164
165 /* Print information about the event. Note that userpace needs to 165 /* Print information about the event. Note that userspace needs to
166 * provide large enough buffers. */ 166 * provide large enough buffers. */
167 length = length < RC_PID_PRINT_BUF_SIZE ? 167 length = length < RC_PID_PRINT_BUF_SIZE ?
168 length : RC_PID_PRINT_BUF_SIZE; 168 length : RC_PID_PRINT_BUF_SIZE;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 28624282c5f3..902b03ee8f60 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -315,6 +315,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
315static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) 315static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
316{ 316{
317 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 317 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
318 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
318 int tid; 319 int tid;
319 320
320 /* does the frame have a qos control field? */ 321 /* does the frame have a qos control field? */
@@ -323,9 +324,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
323 /* frame has qos control */ 324 /* frame has qos control */
324 tid = *qc & IEEE80211_QOS_CTL_TID_MASK; 325 tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
325 if (*qc & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT) 326 if (*qc & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT)
326 rx->flags |= IEEE80211_RX_AMSDU; 327 status->rx_flags |= IEEE80211_RX_AMSDU;
327 else
328 rx->flags &= ~IEEE80211_RX_AMSDU;
329 } else { 328 } else {
330 /* 329 /*
331 * IEEE 802.11-2007, 7.1.3.4.1 ("Sequence Number field"): 330 * IEEE 802.11-2007, 7.1.3.4.1 ("Sequence Number field"):
@@ -387,26 +386,25 @@ static ieee80211_rx_result debug_noinline
387ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) 386ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx)
388{ 387{
389 struct ieee80211_local *local = rx->local; 388 struct ieee80211_local *local = rx->local;
389 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
390 struct sk_buff *skb = rx->skb; 390 struct sk_buff *skb = rx->skb;
391 391
392 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning))) 392 if (likely(!(status->rx_flags & IEEE80211_RX_IN_SCAN)))
393 return RX_CONTINUE;
394
395 if (test_bit(SCAN_HW_SCANNING, &local->scanning))
393 return ieee80211_scan_rx(rx->sdata, skb); 396 return ieee80211_scan_rx(rx->sdata, skb);
394 397
395 if (unlikely(test_bit(SCAN_SW_SCANNING, &local->scanning) && 398 if (test_bit(SCAN_SW_SCANNING, &local->scanning)) {
396 (rx->flags & IEEE80211_RX_IN_SCAN))) {
397 /* drop all the other packets during a software scan anyway */ 399 /* drop all the other packets during a software scan anyway */
398 if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED) 400 if (ieee80211_scan_rx(rx->sdata, skb) != RX_QUEUED)
399 dev_kfree_skb(skb); 401 dev_kfree_skb(skb);
400 return RX_QUEUED; 402 return RX_QUEUED;
401 } 403 }
402 404
403 if (unlikely(rx->flags & IEEE80211_RX_IN_SCAN)) { 405 /* scanning finished during invoking of handlers */
404 /* scanning finished during invoking of handlers */ 406 I802_DEBUG_INC(local->rx_handlers_drop_passive_scan);
405 I802_DEBUG_INC(local->rx_handlers_drop_passive_scan); 407 return RX_DROP_UNUSABLE;
406 return RX_DROP_UNUSABLE;
407 }
408
409 return RX_CONTINUE;
410} 408}
411 409
412 410
@@ -538,20 +536,12 @@ static void ieee80211_release_reorder_frame(struct ieee80211_hw *hw,
538 int index, 536 int index,
539 struct sk_buff_head *frames) 537 struct sk_buff_head *frames)
540{ 538{
541 struct ieee80211_supported_band *sband;
542 struct ieee80211_rate *rate = NULL;
543 struct sk_buff *skb = tid_agg_rx->reorder_buf[index]; 539 struct sk_buff *skb = tid_agg_rx->reorder_buf[index];
544 struct ieee80211_rx_status *status;
545 540
546 if (!skb) 541 if (!skb)
547 goto no_frame; 542 goto no_frame;
548 543
549 status = IEEE80211_SKB_RXCB(skb); 544 /* release the frame from the reorder ring buffer */
550
551 /* release the reordered frames to stack */
552 sband = hw->wiphy->bands[status->band];
553 if (!(status->flag & RX_FLAG_HT))
554 rate = &sband->bitrates[status->rate_idx];
555 tid_agg_rx->stored_mpdu_num--; 545 tid_agg_rx->stored_mpdu_num--;
556 tid_agg_rx->reorder_buf[index] = NULL; 546 tid_agg_rx->reorder_buf[index] = NULL;
557 __skb_queue_tail(frames, skb); 547 __skb_queue_tail(frames, skb);
@@ -580,9 +570,102 @@ static void ieee80211_release_reorder_frames(struct ieee80211_hw *hw,
580 * frames that have not yet been received are assumed to be lost and the skb 570 * frames that have not yet been received are assumed to be lost and the skb
581 * can be released for processing. This may also release other skb's from the 571 * can be released for processing. This may also release other skb's from the
582 * reorder buffer if there are no additional gaps between the frames. 572 * reorder buffer if there are no additional gaps between the frames.
573 *
574 * Callers must hold tid_agg_rx->reorder_lock.
583 */ 575 */
584#define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10) 576#define HT_RX_REORDER_BUF_TIMEOUT (HZ / 10)
585 577
578static void ieee80211_sta_reorder_release(struct ieee80211_hw *hw,
579 struct tid_ampdu_rx *tid_agg_rx,
580 struct sk_buff_head *frames)
581{
582 int index, j;
583
584 /* release the buffer until next missing frame */
585 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
586 tid_agg_rx->buf_size;
587 if (!tid_agg_rx->reorder_buf[index] &&
588 tid_agg_rx->stored_mpdu_num > 1) {
589 /*
590 * No buffers ready to be released, but check whether any
591 * frames in the reorder buffer have timed out.
592 */
593 int skipped = 1;
594 for (j = (index + 1) % tid_agg_rx->buf_size; j != index;
595 j = (j + 1) % tid_agg_rx->buf_size) {
596 if (!tid_agg_rx->reorder_buf[j]) {
597 skipped++;
598 continue;
599 }
600 if (!time_after(jiffies, tid_agg_rx->reorder_time[j] +
601 HT_RX_REORDER_BUF_TIMEOUT))
602 goto set_release_timer;
603
604#ifdef CONFIG_MAC80211_HT_DEBUG
605 if (net_ratelimit())
606 wiphy_debug(hw->wiphy,
607 "release an RX reorder frame due to timeout on earlier frames\n");
608#endif
609 ieee80211_release_reorder_frame(hw, tid_agg_rx,
610 j, frames);
611
612 /*
613 * Increment the head seq# also for the skipped slots.
614 */
615 tid_agg_rx->head_seq_num =
616 (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK;
617 skipped = 0;
618 }
619 } else while (tid_agg_rx->reorder_buf[index]) {
620 ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames);
621 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
622 tid_agg_rx->buf_size;
623 }
624
625 /*
626 * Disable the reorder release timer for now.
627 *
628 * The current implementation lacks a proper locking scheme
629 * which would protect vital statistic and debug counters
630 * from being updated by two different but concurrent BHs.
631 *
632 * More information about the topic is available from:
633 * - thread: http://marc.info/?t=128635927000001
634 *
635 * What was wrong:
636 * => http://marc.info/?l=linux-wireless&m=128636170811964
637 * "Basically the thing is that until your patch, the data
638 * in the struct didn't actually need locking because it
639 * was accessed by the RX path only which is not concurrent."
640 *
641 * List of what needs to be fixed:
642 * => http://marc.info/?l=linux-wireless&m=128656352920957
643 *
644
645 if (tid_agg_rx->stored_mpdu_num) {
646 j = index = seq_sub(tid_agg_rx->head_seq_num,
647 tid_agg_rx->ssn) % tid_agg_rx->buf_size;
648
649 for (; j != (index - 1) % tid_agg_rx->buf_size;
650 j = (j + 1) % tid_agg_rx->buf_size) {
651 if (tid_agg_rx->reorder_buf[j])
652 break;
653 }
654
655 set_release_timer:
656
657 mod_timer(&tid_agg_rx->reorder_timer,
658 tid_agg_rx->reorder_time[j] +
659 HT_RX_REORDER_BUF_TIMEOUT);
660 } else {
661 del_timer(&tid_agg_rx->reorder_timer);
662 }
663 */
664
665set_release_timer:
666 return;
667}
668
586/* 669/*
587 * As this function belongs to the RX path it must be under 670 * As this function belongs to the RX path it must be under
588 * rcu_read_lock protection. It returns false if the frame 671 * rcu_read_lock protection. It returns false if the frame
@@ -598,14 +681,16 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
598 u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4; 681 u16 mpdu_seq_num = (sc & IEEE80211_SCTL_SEQ) >> 4;
599 u16 head_seq_num, buf_size; 682 u16 head_seq_num, buf_size;
600 int index; 683 int index;
684 bool ret = true;
601 685
602 buf_size = tid_agg_rx->buf_size; 686 buf_size = tid_agg_rx->buf_size;
603 head_seq_num = tid_agg_rx->head_seq_num; 687 head_seq_num = tid_agg_rx->head_seq_num;
604 688
689 spin_lock(&tid_agg_rx->reorder_lock);
605 /* frame with out of date sequence number */ 690 /* frame with out of date sequence number */
606 if (seq_less(mpdu_seq_num, head_seq_num)) { 691 if (seq_less(mpdu_seq_num, head_seq_num)) {
607 dev_kfree_skb(skb); 692 dev_kfree_skb(skb);
608 return true; 693 goto out;
609 } 694 }
610 695
611 /* 696 /*
@@ -626,7 +711,7 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
626 /* check if we already stored this frame */ 711 /* check if we already stored this frame */
627 if (tid_agg_rx->reorder_buf[index]) { 712 if (tid_agg_rx->reorder_buf[index]) {
628 dev_kfree_skb(skb); 713 dev_kfree_skb(skb);
629 return true; 714 goto out;
630 } 715 }
631 716
632 /* 717 /*
@@ -636,58 +721,19 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_hw *hw,
636 if (mpdu_seq_num == tid_agg_rx->head_seq_num && 721 if (mpdu_seq_num == tid_agg_rx->head_seq_num &&
637 tid_agg_rx->stored_mpdu_num == 0) { 722 tid_agg_rx->stored_mpdu_num == 0) {
638 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num); 723 tid_agg_rx->head_seq_num = seq_inc(tid_agg_rx->head_seq_num);
639 return false; 724 ret = false;
725 goto out;
640 } 726 }
641 727
642 /* put the frame in the reordering buffer */ 728 /* put the frame in the reordering buffer */
643 tid_agg_rx->reorder_buf[index] = skb; 729 tid_agg_rx->reorder_buf[index] = skb;
644 tid_agg_rx->reorder_time[index] = jiffies; 730 tid_agg_rx->reorder_time[index] = jiffies;
645 tid_agg_rx->stored_mpdu_num++; 731 tid_agg_rx->stored_mpdu_num++;
646 /* release the buffer until next missing frame */ 732 ieee80211_sta_reorder_release(hw, tid_agg_rx, frames);
647 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
648 tid_agg_rx->buf_size;
649 if (!tid_agg_rx->reorder_buf[index] &&
650 tid_agg_rx->stored_mpdu_num > 1) {
651 /*
652 * No buffers ready to be released, but check whether any
653 * frames in the reorder buffer have timed out.
654 */
655 int j;
656 int skipped = 1;
657 for (j = (index + 1) % tid_agg_rx->buf_size; j != index;
658 j = (j + 1) % tid_agg_rx->buf_size) {
659 if (!tid_agg_rx->reorder_buf[j]) {
660 skipped++;
661 continue;
662 }
663 if (!time_after(jiffies, tid_agg_rx->reorder_time[j] +
664 HT_RX_REORDER_BUF_TIMEOUT))
665 break;
666 733
667#ifdef CONFIG_MAC80211_HT_DEBUG 734 out:
668 if (net_ratelimit()) 735 spin_unlock(&tid_agg_rx->reorder_lock);
669 printk(KERN_DEBUG "%s: release an RX reorder " 736 return ret;
670 "frame due to timeout on earlier "
671 "frames\n",
672 wiphy_name(hw->wiphy));
673#endif
674 ieee80211_release_reorder_frame(hw, tid_agg_rx,
675 j, frames);
676
677 /*
678 * Increment the head seq# also for the skipped slots.
679 */
680 tid_agg_rx->head_seq_num =
681 (tid_agg_rx->head_seq_num + skipped) & SEQ_MASK;
682 skipped = 0;
683 }
684 } else while (tid_agg_rx->reorder_buf[index]) {
685 ieee80211_release_reorder_frame(hw, tid_agg_rx, index, frames);
686 index = seq_sub(tid_agg_rx->head_seq_num, tid_agg_rx->ssn) %
687 tid_agg_rx->buf_size;
688 }
689
690 return true;
691} 737}
692 738
693/* 739/*
@@ -761,13 +807,14 @@ static ieee80211_rx_result debug_noinline
761ieee80211_rx_h_check(struct ieee80211_rx_data *rx) 807ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
762{ 808{
763 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; 809 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
810 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
764 811
765 /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ 812 /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */
766 if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { 813 if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) {
767 if (unlikely(ieee80211_has_retry(hdr->frame_control) && 814 if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
768 rx->sta->last_seq_ctrl[rx->queue] == 815 rx->sta->last_seq_ctrl[rx->queue] ==
769 hdr->seq_ctrl)) { 816 hdr->seq_ctrl)) {
770 if (rx->flags & IEEE80211_RX_RA_MATCH) { 817 if (status->rx_flags & IEEE80211_RX_RA_MATCH) {
771 rx->local->dot11FrameDuplicateCount++; 818 rx->local->dot11FrameDuplicateCount++;
772 rx->sta->num_duplicates++; 819 rx->sta->num_duplicates++;
773 } 820 }
@@ -796,11 +843,12 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
796 if (unlikely((ieee80211_is_data(hdr->frame_control) || 843 if (unlikely((ieee80211_is_data(hdr->frame_control) ||
797 ieee80211_is_pspoll(hdr->frame_control)) && 844 ieee80211_is_pspoll(hdr->frame_control)) &&
798 rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && 845 rx->sdata->vif.type != NL80211_IFTYPE_ADHOC &&
846 rx->sdata->vif.type != NL80211_IFTYPE_WDS &&
799 (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) { 847 (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) {
800 if ((!ieee80211_has_fromds(hdr->frame_control) && 848 if ((!ieee80211_has_fromds(hdr->frame_control) &&
801 !ieee80211_has_tods(hdr->frame_control) && 849 !ieee80211_has_tods(hdr->frame_control) &&
802 ieee80211_is_data(hdr->frame_control)) || 850 ieee80211_is_data(hdr->frame_control)) ||
803 !(rx->flags & IEEE80211_RX_RA_MATCH)) { 851 !(status->rx_flags & IEEE80211_RX_RA_MATCH)) {
804 /* Drop IBSS frames and frames for other hosts 852 /* Drop IBSS frames and frames for other hosts
805 * silently. */ 853 * silently. */
806 return RX_DROP_MONITOR; 854 return RX_DROP_MONITOR;
@@ -822,7 +870,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
822 int keyidx; 870 int keyidx;
823 int hdrlen; 871 int hdrlen;
824 ieee80211_rx_result result = RX_DROP_UNUSABLE; 872 ieee80211_rx_result result = RX_DROP_UNUSABLE;
825 struct ieee80211_key *stakey = NULL; 873 struct ieee80211_key *sta_ptk = NULL;
826 int mmie_keyidx = -1; 874 int mmie_keyidx = -1;
827 __le16 fc; 875 __le16 fc;
828 876
@@ -857,22 +905,25 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
857 * No point in finding a key and decrypting if the frame is neither 905 * No point in finding a key and decrypting if the frame is neither
858 * addressed to us nor a multicast frame. 906 * addressed to us nor a multicast frame.
859 */ 907 */
860 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 908 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
861 return RX_CONTINUE; 909 return RX_CONTINUE;
862 910
863 /* start without a key */ 911 /* start without a key */
864 rx->key = NULL; 912 rx->key = NULL;
865 913
866 if (rx->sta) 914 if (rx->sta)
867 stakey = rcu_dereference(rx->sta->key); 915 sta_ptk = rcu_dereference(rx->sta->ptk);
868 916
869 fc = hdr->frame_control; 917 fc = hdr->frame_control;
870 918
871 if (!ieee80211_has_protected(fc)) 919 if (!ieee80211_has_protected(fc))
872 mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb); 920 mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb);
873 921
874 if (!is_multicast_ether_addr(hdr->addr1) && stakey) { 922 if (!is_multicast_ether_addr(hdr->addr1) && sta_ptk) {
875 rx->key = stakey; 923 rx->key = sta_ptk;
924 if ((status->flag & RX_FLAG_DECRYPTED) &&
925 (status->flag & RX_FLAG_IV_STRIPPED))
926 return RX_CONTINUE;
876 /* Skip decryption if the frame is not protected. */ 927 /* Skip decryption if the frame is not protected. */
877 if (!ieee80211_has_protected(fc)) 928 if (!ieee80211_has_protected(fc))
878 return RX_CONTINUE; 929 return RX_CONTINUE;
@@ -885,7 +936,10 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
885 if (mmie_keyidx < NUM_DEFAULT_KEYS || 936 if (mmie_keyidx < NUM_DEFAULT_KEYS ||
886 mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) 937 mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS)
887 return RX_DROP_MONITOR; /* unexpected BIP keyidx */ 938 return RX_DROP_MONITOR; /* unexpected BIP keyidx */
888 rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]); 939 if (rx->sta)
940 rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]);
941 if (!rx->key)
942 rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]);
889 } else if (!ieee80211_has_protected(fc)) { 943 } else if (!ieee80211_has_protected(fc)) {
890 /* 944 /*
891 * The frame was not protected, so skip decryption. However, we 945 * The frame was not protected, so skip decryption. However, we
@@ -928,16 +982,25 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
928 skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1); 982 skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1);
929 keyidx = keyid >> 6; 983 keyidx = keyid >> 6;
930 984
931 rx->key = rcu_dereference(rx->sdata->keys[keyidx]); 985 /* check per-station GTK first, if multicast packet */
986 if (is_multicast_ether_addr(hdr->addr1) && rx->sta)
987 rx->key = rcu_dereference(rx->sta->gtk[keyidx]);
932 988
933 /* 989 /* if not found, try default key */
934 * RSNA-protected unicast frames should always be sent with 990 if (!rx->key) {
935 * pairwise or station-to-station keys, but for WEP we allow 991 rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
936 * using a key index as well. 992
937 */ 993 /*
938 if (rx->key && rx->key->conf.alg != ALG_WEP && 994 * RSNA-protected unicast frames should always be
939 !is_multicast_ether_addr(hdr->addr1)) 995 * sent with pairwise or station-to-station keys,
940 rx->key = NULL; 996 * but for WEP we allow using a key index as well.
997 */
998 if (rx->key &&
999 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 &&
1000 rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 &&
1001 !is_multicast_ether_addr(hdr->addr1))
1002 rx->key = NULL;
1003 }
941 } 1004 }
942 1005
943 if (rx->key) { 1006 if (rx->key) {
@@ -951,8 +1014,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
951 return RX_DROP_UNUSABLE; 1014 return RX_DROP_UNUSABLE;
952 /* the hdr variable is invalid now! */ 1015 /* the hdr variable is invalid now! */
953 1016
954 switch (rx->key->conf.alg) { 1017 switch (rx->key->conf.cipher) {
955 case ALG_WEP: 1018 case WLAN_CIPHER_SUITE_WEP40:
1019 case WLAN_CIPHER_SUITE_WEP104:
956 /* Check for weak IVs if possible */ 1020 /* Check for weak IVs if possible */
957 if (rx->sta && ieee80211_is_data(fc) && 1021 if (rx->sta && ieee80211_is_data(fc) &&
958 (!(status->flag & RX_FLAG_IV_STRIPPED) || 1022 (!(status->flag & RX_FLAG_IV_STRIPPED) ||
@@ -962,15 +1026,21 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
962 1026
963 result = ieee80211_crypto_wep_decrypt(rx); 1027 result = ieee80211_crypto_wep_decrypt(rx);
964 break; 1028 break;
965 case ALG_TKIP: 1029 case WLAN_CIPHER_SUITE_TKIP:
966 result = ieee80211_crypto_tkip_decrypt(rx); 1030 result = ieee80211_crypto_tkip_decrypt(rx);
967 break; 1031 break;
968 case ALG_CCMP: 1032 case WLAN_CIPHER_SUITE_CCMP:
969 result = ieee80211_crypto_ccmp_decrypt(rx); 1033 result = ieee80211_crypto_ccmp_decrypt(rx);
970 break; 1034 break;
971 case ALG_AES_CMAC: 1035 case WLAN_CIPHER_SUITE_AES_CMAC:
972 result = ieee80211_crypto_aes_cmac_decrypt(rx); 1036 result = ieee80211_crypto_aes_cmac_decrypt(rx);
973 break; 1037 break;
1038 default:
1039 /*
1040 * We can reach here only with HW-only algorithms
1041 * but why didn't it decrypt the frame?!
1042 */
1043 return RX_DROP_UNUSABLE;
974 } 1044 }
975 1045
976 /* either the frame has been decrypted or will be dropped */ 1046 /* either the frame has been decrypted or will be dropped */
@@ -1079,7 +1149,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
1079 sta->last_rx = jiffies; 1149 sta->last_rx = jiffies;
1080 } 1150 }
1081 1151
1082 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 1152 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1083 return RX_CONTINUE; 1153 return RX_CONTINUE;
1084 1154
1085 if (rx->sdata->vif.type == NL80211_IFTYPE_STATION) 1155 if (rx->sdata->vif.type == NL80211_IFTYPE_STATION)
@@ -1236,6 +1306,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1236 unsigned int frag, seq; 1306 unsigned int frag, seq;
1237 struct ieee80211_fragment_entry *entry; 1307 struct ieee80211_fragment_entry *entry;
1238 struct sk_buff *skb; 1308 struct sk_buff *skb;
1309 struct ieee80211_rx_status *status;
1239 1310
1240 hdr = (struct ieee80211_hdr *)rx->skb->data; 1311 hdr = (struct ieee80211_hdr *)rx->skb->data;
1241 fc = hdr->frame_control; 1312 fc = hdr->frame_control;
@@ -1265,7 +1336,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1265 /* This is the first fragment of a new frame. */ 1336 /* This is the first fragment of a new frame. */
1266 entry = ieee80211_reassemble_add(rx->sdata, frag, seq, 1337 entry = ieee80211_reassemble_add(rx->sdata, frag, seq,
1267 rx->queue, &(rx->skb)); 1338 rx->queue, &(rx->skb));
1268 if (rx->key && rx->key->conf.alg == ALG_CCMP && 1339 if (rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP &&
1269 ieee80211_has_protected(fc)) { 1340 ieee80211_has_protected(fc)) {
1270 int queue = ieee80211_is_mgmt(fc) ? 1341 int queue = ieee80211_is_mgmt(fc) ?
1271 NUM_RX_DATA_QUEUES : rx->queue; 1342 NUM_RX_DATA_QUEUES : rx->queue;
@@ -1294,7 +1365,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1294 int i; 1365 int i;
1295 u8 pn[CCMP_PN_LEN], *rpn; 1366 u8 pn[CCMP_PN_LEN], *rpn;
1296 int queue; 1367 int queue;
1297 if (!rx->key || rx->key->conf.alg != ALG_CCMP) 1368 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP)
1298 return RX_DROP_UNUSABLE; 1369 return RX_DROP_UNUSABLE;
1299 memcpy(pn, entry->last_pn, CCMP_PN_LEN); 1370 memcpy(pn, entry->last_pn, CCMP_PN_LEN);
1300 for (i = CCMP_PN_LEN - 1; i >= 0; i--) { 1371 for (i = CCMP_PN_LEN - 1; i >= 0; i--) {
@@ -1335,7 +1406,8 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
1335 } 1406 }
1336 1407
1337 /* Complete frame has been reassembled - process it now */ 1408 /* Complete frame has been reassembled - process it now */
1338 rx->flags |= IEEE80211_RX_FRAGMENTED; 1409 status = IEEE80211_SKB_RXCB(rx->skb);
1410 status->rx_flags |= IEEE80211_RX_FRAGMENTED;
1339 1411
1340 out: 1412 out:
1341 if (rx->sta) 1413 if (rx->sta)
@@ -1352,9 +1424,10 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx)
1352{ 1424{
1353 struct ieee80211_sub_if_data *sdata = rx->sdata; 1425 struct ieee80211_sub_if_data *sdata = rx->sdata;
1354 __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control; 1426 __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control;
1427 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1355 1428
1356 if (likely(!rx->sta || !ieee80211_is_pspoll(fc) || 1429 if (likely(!rx->sta || !ieee80211_is_pspoll(fc) ||
1357 !(rx->flags & IEEE80211_RX_RA_MATCH))) 1430 !(status->rx_flags & IEEE80211_RX_RA_MATCH)))
1358 return RX_CONTINUE; 1431 return RX_CONTINUE;
1359 1432
1360 if ((sdata->vif.type != NL80211_IFTYPE_AP) && 1433 if ((sdata->vif.type != NL80211_IFTYPE_AP) &&
@@ -1492,7 +1565,7 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc)
1492 * Allow EAPOL frames to us/the PAE group address regardless 1565 * Allow EAPOL frames to us/the PAE group address regardless
1493 * of whether the frame was encrypted or not. 1566 * of whether the frame was encrypted or not.
1494 */ 1567 */
1495 if (ehdr->h_proto == htons(ETH_P_PAE) && 1568 if (ehdr->h_proto == rx->sdata->control_port_protocol &&
1496 (compare_ether_addr(ehdr->h_dest, rx->sdata->vif.addr) == 0 || 1569 (compare_ether_addr(ehdr->h_dest, rx->sdata->vif.addr) == 0 ||
1497 compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0)) 1570 compare_ether_addr(ehdr->h_dest, pae_group_addr) == 0))
1498 return true; 1571 return true;
@@ -1515,6 +1588,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
1515 struct sk_buff *skb, *xmit_skb; 1588 struct sk_buff *skb, *xmit_skb;
1516 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; 1589 struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data;
1517 struct sta_info *dsta; 1590 struct sta_info *dsta;
1591 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1518 1592
1519 skb = rx->skb; 1593 skb = rx->skb;
1520 xmit_skb = NULL; 1594 xmit_skb = NULL;
@@ -1522,7 +1596,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
1522 if ((sdata->vif.type == NL80211_IFTYPE_AP || 1596 if ((sdata->vif.type == NL80211_IFTYPE_AP ||
1523 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && 1597 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) &&
1524 !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && 1598 !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) &&
1525 (rx->flags & IEEE80211_RX_RA_MATCH) && 1599 (status->rx_flags & IEEE80211_RX_RA_MATCH) &&
1526 (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { 1600 (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) {
1527 if (is_multicast_ether_addr(ehdr->h_dest)) { 1601 if (is_multicast_ether_addr(ehdr->h_dest)) {
1528 /* 1602 /*
@@ -1599,6 +1673,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
1599 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 1673 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
1600 __le16 fc = hdr->frame_control; 1674 __le16 fc = hdr->frame_control;
1601 struct sk_buff_head frame_list; 1675 struct sk_buff_head frame_list;
1676 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1602 1677
1603 if (unlikely(!ieee80211_is_data(fc))) 1678 if (unlikely(!ieee80211_is_data(fc)))
1604 return RX_CONTINUE; 1679 return RX_CONTINUE;
@@ -1606,7 +1681,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
1606 if (unlikely(!ieee80211_is_data_present(fc))) 1681 if (unlikely(!ieee80211_is_data_present(fc)))
1607 return RX_DROP_MONITOR; 1682 return RX_DROP_MONITOR;
1608 1683
1609 if (!(rx->flags & IEEE80211_RX_AMSDU)) 1684 if (!(status->rx_flags & IEEE80211_RX_AMSDU))
1610 return RX_CONTINUE; 1685 return RX_CONTINUE;
1611 1686
1612 if (ieee80211_has_a4(hdr->frame_control) && 1687 if (ieee80211_has_a4(hdr->frame_control) &&
@@ -1657,6 +1732,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
1657 struct sk_buff *skb = rx->skb, *fwd_skb; 1732 struct sk_buff *skb = rx->skb, *fwd_skb;
1658 struct ieee80211_local *local = rx->local; 1733 struct ieee80211_local *local = rx->local;
1659 struct ieee80211_sub_if_data *sdata = rx->sdata; 1734 struct ieee80211_sub_if_data *sdata = rx->sdata;
1735 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
1660 1736
1661 hdr = (struct ieee80211_hdr *) skb->data; 1737 hdr = (struct ieee80211_hdr *) skb->data;
1662 hdrlen = ieee80211_hdrlen(hdr->frame_control); 1738 hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -1702,7 +1778,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
1702 1778
1703 mesh_hdr->ttl--; 1779 mesh_hdr->ttl--;
1704 1780
1705 if (rx->flags & IEEE80211_RX_RA_MATCH) { 1781 if (status->rx_flags & IEEE80211_RX_RA_MATCH) {
1706 if (!mesh_hdr->ttl) 1782 if (!mesh_hdr->ttl)
1707 IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh, 1783 IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh,
1708 dropped_frames_ttl); 1784 dropped_frames_ttl);
@@ -1909,13 +1985,38 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata,
1909} 1985}
1910 1986
1911static ieee80211_rx_result debug_noinline 1987static ieee80211_rx_result debug_noinline
1988ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
1989{
1990 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1991 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1992
1993 /*
1994 * From here on, look only at management frames.
1995 * Data and control frames are already handled,
1996 * and unknown (reserved) frames are useless.
1997 */
1998 if (rx->skb->len < 24)
1999 return RX_DROP_MONITOR;
2000
2001 if (!ieee80211_is_mgmt(mgmt->frame_control))
2002 return RX_DROP_MONITOR;
2003
2004 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
2005 return RX_DROP_MONITOR;
2006
2007 if (ieee80211_drop_unencrypted_mgmt(rx))
2008 return RX_DROP_UNUSABLE;
2009
2010 return RX_CONTINUE;
2011}
2012
2013static ieee80211_rx_result debug_noinline
1912ieee80211_rx_h_action(struct ieee80211_rx_data *rx) 2014ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1913{ 2015{
1914 struct ieee80211_local *local = rx->local; 2016 struct ieee80211_local *local = rx->local;
1915 struct ieee80211_sub_if_data *sdata = rx->sdata; 2017 struct ieee80211_sub_if_data *sdata = rx->sdata;
1916 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; 2018 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
1917 struct sk_buff *nskb; 2019 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
1918 struct ieee80211_rx_status *status;
1919 int len = rx->skb->len; 2020 int len = rx->skb->len;
1920 2021
1921 if (!ieee80211_is_action(mgmt->frame_control)) 2022 if (!ieee80211_is_action(mgmt->frame_control))
@@ -1928,10 +2029,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
1928 if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) 2029 if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC)
1929 return RX_DROP_UNUSABLE; 2030 return RX_DROP_UNUSABLE;
1930 2031
1931 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 2032 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
1932 return RX_DROP_UNUSABLE;
1933
1934 if (ieee80211_drop_unencrypted_mgmt(rx))
1935 return RX_DROP_UNUSABLE; 2033 return RX_DROP_UNUSABLE;
1936 2034
1937 switch (mgmt->u.action.category) { 2035 switch (mgmt->u.action.category) {
@@ -2024,17 +2122,36 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2024 goto queue; 2122 goto queue;
2025 } 2123 }
2026 2124
2125 return RX_CONTINUE;
2126
2027 invalid: 2127 invalid:
2028 /* 2128 status->rx_flags |= IEEE80211_RX_MALFORMED_ACTION_FRM;
2029 * For AP mode, hostapd is responsible for handling any action 2129 /* will return in the next handlers */
2030 * frames that we didn't handle, including returning unknown 2130 return RX_CONTINUE;
2031 * ones. For all other modes we will return them to the sender, 2131
2032 * setting the 0x80 bit in the action category, as required by 2132 handled:
2033 * 802.11-2007 7.3.1.11. 2133 if (rx->sta)
2034 */ 2134 rx->sta->rx_packets++;
2035 if (sdata->vif.type == NL80211_IFTYPE_AP || 2135 dev_kfree_skb(rx->skb);
2036 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 2136 return RX_QUEUED;
2037 return RX_DROP_MONITOR; 2137
2138 queue:
2139 rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
2140 skb_queue_tail(&sdata->skb_queue, rx->skb);
2141 ieee80211_queue_work(&local->hw, &sdata->work);
2142 if (rx->sta)
2143 rx->sta->rx_packets++;
2144 return RX_QUEUED;
2145}
2146
2147static ieee80211_rx_result debug_noinline
2148ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
2149{
2150 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
2151
2152 /* skip known-bad action frames and return them in the next handler */
2153 if (status->rx_flags & IEEE80211_RX_MALFORMED_ACTION_FRM)
2154 return RX_CONTINUE;
2038 2155
2039 /* 2156 /*
2040 * Getting here means the kernel doesn't know how to handle 2157 * Getting here means the kernel doesn't know how to handle
@@ -2042,12 +2159,46 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2042 * so userspace can register for those to know whether ones 2159 * so userspace can register for those to know whether ones
2043 * it transmitted were processed or returned. 2160 * it transmitted were processed or returned.
2044 */ 2161 */
2045 status = IEEE80211_SKB_RXCB(rx->skb);
2046 2162
2047 if (cfg80211_rx_action(rx->sdata->dev, status->freq, 2163 if (cfg80211_rx_mgmt(rx->sdata->dev, status->freq,
2048 rx->skb->data, rx->skb->len, 2164 rx->skb->data, rx->skb->len,
2049 GFP_ATOMIC)) 2165 GFP_ATOMIC)) {
2050 goto handled; 2166 if (rx->sta)
2167 rx->sta->rx_packets++;
2168 dev_kfree_skb(rx->skb);
2169 return RX_QUEUED;
2170 }
2171
2172
2173 return RX_CONTINUE;
2174}
2175
2176static ieee80211_rx_result debug_noinline
2177ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
2178{
2179 struct ieee80211_local *local = rx->local;
2180 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
2181 struct sk_buff *nskb;
2182 struct ieee80211_sub_if_data *sdata = rx->sdata;
2183 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
2184
2185 if (!ieee80211_is_action(mgmt->frame_control))
2186 return RX_CONTINUE;
2187
2188 /*
2189 * For AP mode, hostapd is responsible for handling any action
2190 * frames that we didn't handle, including returning unknown
2191 * ones. For all other modes we will return them to the sender,
2192 * setting the 0x80 bit in the action category, as required by
2193 * 802.11-2007 7.3.1.11.
2194 * Newer versions of hostapd shall also use the management frame
2195 * registration mechanisms, but older ones still use cooked
2196 * monitor interfaces so push all frames there.
2197 */
2198 if (!(status->rx_flags & IEEE80211_RX_MALFORMED_ACTION_FRM) &&
2199 (sdata->vif.type == NL80211_IFTYPE_AP ||
2200 sdata->vif.type == NL80211_IFTYPE_AP_VLAN))
2201 return RX_DROP_MONITOR;
2051 2202
2052 /* do not return rejected action frames */ 2203 /* do not return rejected action frames */
2053 if (mgmt->u.action.category & 0x80) 2204 if (mgmt->u.action.category & 0x80)
@@ -2066,20 +2217,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
2066 2217
2067 ieee80211_tx_skb(rx->sdata, nskb); 2218 ieee80211_tx_skb(rx->sdata, nskb);
2068 } 2219 }
2069
2070 handled:
2071 if (rx->sta)
2072 rx->sta->rx_packets++;
2073 dev_kfree_skb(rx->skb); 2220 dev_kfree_skb(rx->skb);
2074 return RX_QUEUED; 2221 return RX_QUEUED;
2075
2076 queue:
2077 rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME;
2078 skb_queue_tail(&sdata->skb_queue, rx->skb);
2079 ieee80211_queue_work(&local->hw, &sdata->work);
2080 if (rx->sta)
2081 rx->sta->rx_packets++;
2082 return RX_QUEUED;
2083} 2222}
2084 2223
2085static ieee80211_rx_result debug_noinline 2224static ieee80211_rx_result debug_noinline
@@ -2090,15 +2229,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
2090 struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; 2229 struct ieee80211_mgmt *mgmt = (void *)rx->skb->data;
2091 __le16 stype; 2230 __le16 stype;
2092 2231
2093 if (!(rx->flags & IEEE80211_RX_RA_MATCH))
2094 return RX_DROP_MONITOR;
2095
2096 if (rx->skb->len < 24)
2097 return RX_DROP_MONITOR;
2098
2099 if (ieee80211_drop_unencrypted_mgmt(rx))
2100 return RX_DROP_UNUSABLE;
2101
2102 rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb); 2232 rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb);
2103 if (rxs != RX_CONTINUE) 2233 if (rxs != RX_CONTINUE)
2104 return rxs; 2234 return rxs;
@@ -2199,6 +2329,14 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
2199 struct net_device *prev_dev = NULL; 2329 struct net_device *prev_dev = NULL;
2200 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 2330 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2201 2331
2332 /*
2333 * If cooked monitor has been processed already, then
2334 * don't do it again. If not, set the flag.
2335 */
2336 if (rx->flags & IEEE80211_RX_CMNTR)
2337 goto out_free_skb;
2338 rx->flags |= IEEE80211_RX_CMNTR;
2339
2202 if (skb_headroom(skb) < sizeof(*rthdr) && 2340 if (skb_headroom(skb) < sizeof(*rthdr) &&
2203 pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) 2341 pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC))
2204 goto out_free_skb; 2342 goto out_free_skb;
@@ -2253,29 +2391,53 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
2253 if (prev_dev) { 2391 if (prev_dev) {
2254 skb->dev = prev_dev; 2392 skb->dev = prev_dev;
2255 netif_receive_skb(skb); 2393 netif_receive_skb(skb);
2256 skb = NULL; 2394 return;
2257 } else 2395 }
2258 goto out_free_skb;
2259
2260 return;
2261 2396
2262 out_free_skb: 2397 out_free_skb:
2263 dev_kfree_skb(skb); 2398 dev_kfree_skb(skb);
2264} 2399}
2265 2400
2401static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx,
2402 ieee80211_rx_result res)
2403{
2404 switch (res) {
2405 case RX_DROP_MONITOR:
2406 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
2407 if (rx->sta)
2408 rx->sta->rx_dropped++;
2409 /* fall through */
2410 case RX_CONTINUE: {
2411 struct ieee80211_rate *rate = NULL;
2412 struct ieee80211_supported_band *sband;
2413 struct ieee80211_rx_status *status;
2414
2415 status = IEEE80211_SKB_RXCB((rx->skb));
2416
2417 sband = rx->local->hw.wiphy->bands[status->band];
2418 if (!(status->flag & RX_FLAG_HT))
2419 rate = &sband->bitrates[status->rate_idx];
2420
2421 ieee80211_rx_cooked_monitor(rx, rate);
2422 break;
2423 }
2424 case RX_DROP_UNUSABLE:
2425 I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop);
2426 if (rx->sta)
2427 rx->sta->rx_dropped++;
2428 dev_kfree_skb(rx->skb);
2429 break;
2430 case RX_QUEUED:
2431 I802_DEBUG_INC(rx->sdata->local->rx_handlers_queued);
2432 break;
2433 }
2434}
2266 2435
2267static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata, 2436static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
2268 struct ieee80211_rx_data *rx, 2437 struct sk_buff_head *frames)
2269 struct sk_buff *skb,
2270 struct ieee80211_rate *rate)
2271{ 2438{
2272 struct sk_buff_head reorder_release;
2273 ieee80211_rx_result res = RX_DROP_MONITOR; 2439 ieee80211_rx_result res = RX_DROP_MONITOR;
2274 2440 struct sk_buff *skb;
2275 __skb_queue_head_init(&reorder_release);
2276
2277 rx->skb = skb;
2278 rx->sdata = sdata;
2279 2441
2280#define CALL_RXH(rxh) \ 2442#define CALL_RXH(rxh) \
2281 do { \ 2443 do { \
@@ -2284,23 +2446,14 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
2284 goto rxh_next; \ 2446 goto rxh_next; \
2285 } while (0); 2447 } while (0);
2286 2448
2287 /* 2449 while ((skb = __skb_dequeue(frames))) {
2288 * NB: the rxh_next label works even if we jump
2289 * to it from here because then the list will
2290 * be empty, which is a trivial check
2291 */
2292 CALL_RXH(ieee80211_rx_h_passive_scan)
2293 CALL_RXH(ieee80211_rx_h_check)
2294
2295 ieee80211_rx_reorder_ampdu(rx, &reorder_release);
2296
2297 while ((skb = __skb_dequeue(&reorder_release))) {
2298 /* 2450 /*
2299 * all the other fields are valid across frames 2451 * all the other fields are valid across frames
2300 * that belong to an aMPDU since they are on the 2452 * that belong to an aMPDU since they are on the
2301 * same TID from the same station 2453 * same TID from the same station
2302 */ 2454 */
2303 rx->skb = skb; 2455 rx->skb = skb;
2456 rx->flags = 0;
2304 2457
2305 CALL_RXH(ieee80211_rx_h_decrypt) 2458 CALL_RXH(ieee80211_rx_h_decrypt)
2306 CALL_RXH(ieee80211_rx_h_check_more_data) 2459 CALL_RXH(ieee80211_rx_h_check_more_data)
@@ -2312,50 +2465,92 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata,
2312 CALL_RXH(ieee80211_rx_h_remove_qos_control) 2465 CALL_RXH(ieee80211_rx_h_remove_qos_control)
2313 CALL_RXH(ieee80211_rx_h_amsdu) 2466 CALL_RXH(ieee80211_rx_h_amsdu)
2314#ifdef CONFIG_MAC80211_MESH 2467#ifdef CONFIG_MAC80211_MESH
2315 if (ieee80211_vif_is_mesh(&sdata->vif)) 2468 if (ieee80211_vif_is_mesh(&rx->sdata->vif))
2316 CALL_RXH(ieee80211_rx_h_mesh_fwding); 2469 CALL_RXH(ieee80211_rx_h_mesh_fwding);
2317#endif 2470#endif
2318 CALL_RXH(ieee80211_rx_h_data) 2471 CALL_RXH(ieee80211_rx_h_data)
2319 2472
2320 /* special treatment -- needs the queue */ 2473 /* special treatment -- needs the queue */
2321 res = ieee80211_rx_h_ctrl(rx, &reorder_release); 2474 res = ieee80211_rx_h_ctrl(rx, frames);
2322 if (res != RX_CONTINUE) 2475 if (res != RX_CONTINUE)
2323 goto rxh_next; 2476 goto rxh_next;
2324 2477
2478 CALL_RXH(ieee80211_rx_h_mgmt_check)
2325 CALL_RXH(ieee80211_rx_h_action) 2479 CALL_RXH(ieee80211_rx_h_action)
2480 CALL_RXH(ieee80211_rx_h_userspace_mgmt)
2481 CALL_RXH(ieee80211_rx_h_action_return)
2326 CALL_RXH(ieee80211_rx_h_mgmt) 2482 CALL_RXH(ieee80211_rx_h_mgmt)
2327 2483
2484 rxh_next:
2485 ieee80211_rx_handlers_result(rx, res);
2486
2328#undef CALL_RXH 2487#undef CALL_RXH
2488 }
2489}
2490
2491static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
2492{
2493 struct sk_buff_head reorder_release;
2494 ieee80211_rx_result res = RX_DROP_MONITOR;
2495
2496 __skb_queue_head_init(&reorder_release);
2497
2498#define CALL_RXH(rxh) \
2499 do { \
2500 res = rxh(rx); \
2501 if (res != RX_CONTINUE) \
2502 goto rxh_next; \
2503 } while (0);
2504
2505 CALL_RXH(ieee80211_rx_h_passive_scan)
2506 CALL_RXH(ieee80211_rx_h_check)
2507
2508 ieee80211_rx_reorder_ampdu(rx, &reorder_release);
2509
2510 ieee80211_rx_handlers(rx, &reorder_release);
2511 return;
2329 2512
2330 rxh_next: 2513 rxh_next:
2331 switch (res) { 2514 ieee80211_rx_handlers_result(rx, res);
2332 case RX_DROP_MONITOR: 2515
2333 I802_DEBUG_INC(sdata->local->rx_handlers_drop); 2516#undef CALL_RXH
2334 if (rx->sta) 2517}
2335 rx->sta->rx_dropped++; 2518
2336 /* fall through */ 2519/*
2337 case RX_CONTINUE: 2520 * This function makes calls into the RX path. Therefore the
2338 ieee80211_rx_cooked_monitor(rx, rate); 2521 * caller must hold the sta_info->lock and everything has to
2339 break; 2522 * be under rcu_read_lock protection as well.
2340 case RX_DROP_UNUSABLE: 2523 */
2341 I802_DEBUG_INC(sdata->local->rx_handlers_drop); 2524void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
2342 if (rx->sta) 2525{
2343 rx->sta->rx_dropped++; 2526 struct sk_buff_head frames;
2344 dev_kfree_skb(rx->skb); 2527 struct ieee80211_rx_data rx = {
2345 break; 2528 .sta = sta,
2346 case RX_QUEUED: 2529 .sdata = sta->sdata,
2347 I802_DEBUG_INC(sdata->local->rx_handlers_queued); 2530 .local = sta->local,
2348 break; 2531 .queue = tid,
2349 } 2532 };
2350 } 2533 struct tid_ampdu_rx *tid_agg_rx;
2534
2535 tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
2536 if (!tid_agg_rx)
2537 return;
2538
2539 __skb_queue_head_init(&frames);
2540
2541 spin_lock(&tid_agg_rx->reorder_lock);
2542 ieee80211_sta_reorder_release(&sta->local->hw, tid_agg_rx, &frames);
2543 spin_unlock(&tid_agg_rx->reorder_lock);
2544
2545 ieee80211_rx_handlers(&rx, &frames);
2351} 2546}
2352 2547
2353/* main receive path */ 2548/* main receive path */
2354 2549
2355static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, 2550static int prepare_for_handlers(struct ieee80211_rx_data *rx,
2356 struct ieee80211_rx_data *rx,
2357 struct ieee80211_hdr *hdr) 2551 struct ieee80211_hdr *hdr)
2358{ 2552{
2553 struct ieee80211_sub_if_data *sdata = rx->sdata;
2359 struct sk_buff *skb = rx->skb; 2554 struct sk_buff *skb = rx->skb;
2360 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 2555 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2361 u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); 2556 u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type);
@@ -2369,7 +2564,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2369 compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) { 2564 compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) {
2370 if (!(sdata->dev->flags & IFF_PROMISC)) 2565 if (!(sdata->dev->flags & IFF_PROMISC))
2371 return 0; 2566 return 0;
2372 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2567 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2373 } 2568 }
2374 break; 2569 break;
2375 case NL80211_IFTYPE_ADHOC: 2570 case NL80211_IFTYPE_ADHOC:
@@ -2379,15 +2574,15 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2379 return 1; 2574 return 1;
2380 } 2575 }
2381 else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { 2576 else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) {
2382 if (!(rx->flags & IEEE80211_RX_IN_SCAN)) 2577 if (!(status->rx_flags & IEEE80211_RX_IN_SCAN))
2383 return 0; 2578 return 0;
2384 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2579 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2385 } else if (!multicast && 2580 } else if (!multicast &&
2386 compare_ether_addr(sdata->vif.addr, 2581 compare_ether_addr(sdata->vif.addr,
2387 hdr->addr1) != 0) { 2582 hdr->addr1) != 0) {
2388 if (!(sdata->dev->flags & IFF_PROMISC)) 2583 if (!(sdata->dev->flags & IFF_PROMISC))
2389 return 0; 2584 return 0;
2390 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2585 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2391 } else if (!rx->sta) { 2586 } else if (!rx->sta) {
2392 int rate_idx; 2587 int rate_idx;
2393 if (status->flag & RX_FLAG_HT) 2588 if (status->flag & RX_FLAG_HT)
@@ -2405,7 +2600,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2405 if (!(sdata->dev->flags & IFF_PROMISC)) 2600 if (!(sdata->dev->flags & IFF_PROMISC))
2406 return 0; 2601 return 0;
2407 2602
2408 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2603 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2409 } 2604 }
2410 break; 2605 break;
2411 case NL80211_IFTYPE_AP_VLAN: 2606 case NL80211_IFTYPE_AP_VLAN:
@@ -2416,9 +2611,9 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2416 return 0; 2611 return 0;
2417 } else if (!ieee80211_bssid_match(bssid, 2612 } else if (!ieee80211_bssid_match(bssid,
2418 sdata->vif.addr)) { 2613 sdata->vif.addr)) {
2419 if (!(rx->flags & IEEE80211_RX_IN_SCAN)) 2614 if (!(status->rx_flags & IEEE80211_RX_IN_SCAN))
2420 return 0; 2615 return 0;
2421 rx->flags &= ~IEEE80211_RX_RA_MATCH; 2616 status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
2422 } 2617 }
2423 break; 2618 break;
2424 case NL80211_IFTYPE_WDS: 2619 case NL80211_IFTYPE_WDS:
@@ -2427,9 +2622,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2427 if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2)) 2622 if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2))
2428 return 0; 2623 return 0;
2429 break; 2624 break;
2430 case NL80211_IFTYPE_MONITOR: 2625 default:
2431 case NL80211_IFTYPE_UNSPECIFIED:
2432 case __NL80211_IFTYPE_AFTER_LAST:
2433 /* should never get here */ 2626 /* should never get here */
2434 WARN_ON(1); 2627 WARN_ON(1);
2435 break; 2628 break;
@@ -2439,12 +2632,56 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata,
2439} 2632}
2440 2633
2441/* 2634/*
2635 * This function returns whether or not the SKB
2636 * was destined for RX processing or not, which,
2637 * if consume is true, is equivalent to whether
2638 * or not the skb was consumed.
2639 */
2640static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
2641 struct sk_buff *skb, bool consume)
2642{
2643 struct ieee80211_local *local = rx->local;
2644 struct ieee80211_sub_if_data *sdata = rx->sdata;
2645 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2646 struct ieee80211_hdr *hdr = (void *)skb->data;
2647 int prepares;
2648
2649 rx->skb = skb;
2650 status->rx_flags |= IEEE80211_RX_RA_MATCH;
2651 prepares = prepare_for_handlers(rx, hdr);
2652
2653 if (!prepares)
2654 return false;
2655
2656 if (status->flag & RX_FLAG_MMIC_ERROR) {
2657 if (status->rx_flags & IEEE80211_RX_RA_MATCH)
2658 ieee80211_rx_michael_mic_report(hdr, rx);
2659 return false;
2660 }
2661
2662 if (!consume) {
2663 skb = skb_copy(skb, GFP_ATOMIC);
2664 if (!skb) {
2665 if (net_ratelimit())
2666 wiphy_debug(local->hw.wiphy,
2667 "failed to copy multicast frame for %s\n",
2668 sdata->name);
2669 return true;
2670 }
2671
2672 rx->skb = skb;
2673 }
2674
2675 ieee80211_invoke_rx_handlers(rx);
2676 return true;
2677}
2678
2679/*
2442 * This is the actual Rx frames handler. as it blongs to Rx path it must 2680 * This is the actual Rx frames handler. as it blongs to Rx path it must
2443 * be called with rcu_read_lock protection. 2681 * be called with rcu_read_lock protection.
2444 */ 2682 */
2445static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, 2683static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2446 struct sk_buff *skb, 2684 struct sk_buff *skb)
2447 struct ieee80211_rate *rate)
2448{ 2685{
2449 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); 2686 struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
2450 struct ieee80211_local *local = hw_to_local(hw); 2687 struct ieee80211_local *local = hw_to_local(hw);
@@ -2452,11 +2689,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2452 struct ieee80211_hdr *hdr; 2689 struct ieee80211_hdr *hdr;
2453 __le16 fc; 2690 __le16 fc;
2454 struct ieee80211_rx_data rx; 2691 struct ieee80211_rx_data rx;
2455 int prepares; 2692 struct ieee80211_sub_if_data *prev;
2456 struct ieee80211_sub_if_data *prev = NULL; 2693 struct sta_info *sta, *tmp, *prev_sta;
2457 struct sk_buff *skb_new;
2458 struct sta_info *sta, *tmp;
2459 bool found_sta = false;
2460 int err = 0; 2694 int err = 0;
2461 2695
2462 fc = ((struct ieee80211_hdr *)skb->data)->frame_control; 2696 fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
@@ -2469,7 +2703,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2469 2703
2470 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) || 2704 if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) ||
2471 test_bit(SCAN_OFF_CHANNEL, &local->scanning))) 2705 test_bit(SCAN_OFF_CHANNEL, &local->scanning)))
2472 rx.flags |= IEEE80211_RX_IN_SCAN; 2706 status->rx_flags |= IEEE80211_RX_IN_SCAN;
2473 2707
2474 if (ieee80211_is_mgmt(fc)) 2708 if (ieee80211_is_mgmt(fc))
2475 err = skb_linearize(skb); 2709 err = skb_linearize(skb);
@@ -2486,91 +2720,67 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
2486 ieee80211_verify_alignment(&rx); 2720 ieee80211_verify_alignment(&rx);
2487 2721
2488 if (ieee80211_is_data(fc)) { 2722 if (ieee80211_is_data(fc)) {
2723 prev_sta = NULL;
2724
2489 for_each_sta_info(local, hdr->addr2, sta, tmp) { 2725 for_each_sta_info(local, hdr->addr2, sta, tmp) {
2490 rx.sta = sta; 2726 if (!prev_sta) {
2491 found_sta = true; 2727 prev_sta = sta;
2492 rx.sdata = sta->sdata;
2493
2494 rx.flags |= IEEE80211_RX_RA_MATCH;
2495 prepares = prepare_for_handlers(rx.sdata, &rx, hdr);
2496 if (prepares) {
2497 if (status->flag & RX_FLAG_MMIC_ERROR) {
2498 if (rx.flags & IEEE80211_RX_RA_MATCH)
2499 ieee80211_rx_michael_mic_report(hdr, &rx);
2500 } else
2501 prev = rx.sdata;
2502 }
2503 }
2504 }
2505 if (!found_sta) {
2506 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
2507 if (!ieee80211_sdata_running(sdata))
2508 continue; 2728 continue;
2729 }
2509 2730
2510 if (sdata->vif.type == NL80211_IFTYPE_MONITOR || 2731 rx.sta = prev_sta;
2511 sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 2732 rx.sdata = prev_sta->sdata;
2512 continue; 2733 ieee80211_prepare_and_rx_handle(&rx, skb, false);
2513 2734
2514 /* 2735 prev_sta = sta;
2515 * frame is destined for this interface, but if it's 2736 }
2516 * not also for the previous one we handle that after
2517 * the loop to avoid copying the SKB once too much
2518 */
2519 2737
2520 if (!prev) { 2738 if (prev_sta) {
2521 prev = sdata; 2739 rx.sta = prev_sta;
2522 continue; 2740 rx.sdata = prev_sta->sdata;
2523 }
2524 2741
2525 rx.sta = sta_info_get_bss(prev, hdr->addr2); 2742 if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
2743 return;
2744 }
2745 }
2526 2746
2527 rx.flags |= IEEE80211_RX_RA_MATCH; 2747 prev = NULL;
2528 prepares = prepare_for_handlers(prev, &rx, hdr);
2529 2748
2530 if (!prepares) 2749 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
2531 goto next; 2750 if (!ieee80211_sdata_running(sdata))
2751 continue;
2532 2752
2533 if (status->flag & RX_FLAG_MMIC_ERROR) { 2753 if (sdata->vif.type == NL80211_IFTYPE_MONITOR ||
2534 rx.sdata = prev; 2754 sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
2535 if (rx.flags & IEEE80211_RX_RA_MATCH) 2755 continue;
2536 ieee80211_rx_michael_mic_report(hdr,
2537 &rx);
2538 goto next;
2539 }
2540 2756
2541 /* 2757 /*
2542 * frame was destined for the previous interface 2758 * frame is destined for this interface, but if it's
2543 * so invoke RX handlers for it 2759 * not also for the previous one we handle that after
2544 */ 2760 * the loop to avoid copying the SKB once too much
2761 */
2545 2762
2546 skb_new = skb_copy(skb, GFP_ATOMIC); 2763 if (!prev) {
2547 if (!skb_new) {
2548 if (net_ratelimit())
2549 printk(KERN_DEBUG "%s: failed to copy "
2550 "multicast frame for %s\n",
2551 wiphy_name(local->hw.wiphy),
2552 prev->name);
2553 goto next;
2554 }
2555 ieee80211_invoke_rx_handlers(prev, &rx, skb_new, rate);
2556next:
2557 prev = sdata; 2764 prev = sdata;
2765 continue;
2558 } 2766 }
2559 2767
2560 if (prev) { 2768 rx.sta = sta_info_get_bss(prev, hdr->addr2);
2561 rx.sta = sta_info_get_bss(prev, hdr->addr2); 2769 rx.sdata = prev;
2770 ieee80211_prepare_and_rx_handle(&rx, skb, false);
2562 2771
2563 rx.flags |= IEEE80211_RX_RA_MATCH; 2772 prev = sdata;
2564 prepares = prepare_for_handlers(prev, &rx, hdr); 2773 }
2565 2774
2566 if (!prepares) 2775 if (prev) {
2567 prev = NULL; 2776 rx.sta = sta_info_get_bss(prev, hdr->addr2);
2568 } 2777 rx.sdata = prev;
2778
2779 if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
2780 return;
2569 } 2781 }
2570 if (prev) 2782
2571 ieee80211_invoke_rx_handlers(prev, &rx, skb, rate); 2783 dev_kfree_skb(skb);
2572 else
2573 dev_kfree_skb(skb);
2574} 2784}
2575 2785
2576/* 2786/*
@@ -2611,30 +2821,41 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
2611 if (WARN_ON(!local->started)) 2821 if (WARN_ON(!local->started))
2612 goto drop; 2822 goto drop;
2613 2823
2614 if (status->flag & RX_FLAG_HT) { 2824 if (likely(!(status->flag & RX_FLAG_FAILED_PLCP_CRC))) {
2615 /* 2825 /*
2616 * rate_idx is MCS index, which can be [0-76] as documented on: 2826 * Validate the rate, unless a PLCP error means that
2617 * 2827 * we probably can't have a valid rate here anyway.
2618 * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n
2619 *
2620 * Anything else would be some sort of driver or hardware error.
2621 * The driver should catch hardware errors.
2622 */ 2828 */
2623 if (WARN((status->rate_idx < 0 || 2829
2624 status->rate_idx > 76), 2830 if (status->flag & RX_FLAG_HT) {
2625 "Rate marked as an HT rate but passed " 2831 /*
2626 "status->rate_idx is not " 2832 * rate_idx is MCS index, which can be [0-76]
2627 "an MCS index [0-76]: %d (0x%02x)\n", 2833 * as documented on:
2628 status->rate_idx, 2834 *
2629 status->rate_idx)) 2835 * http://wireless.kernel.org/en/developers/Documentation/ieee80211/802.11n
2630 goto drop; 2836 *
2631 } else { 2837 * Anything else would be some sort of driver or
2632 if (WARN_ON(status->rate_idx < 0 || 2838 * hardware error. The driver should catch hardware
2633 status->rate_idx >= sband->n_bitrates)) 2839 * errors.
2634 goto drop; 2840 */
2635 rate = &sband->bitrates[status->rate_idx]; 2841 if (WARN((status->rate_idx < 0 ||
2842 status->rate_idx > 76),
2843 "Rate marked as an HT rate but passed "
2844 "status->rate_idx is not "
2845 "an MCS index [0-76]: %d (0x%02x)\n",
2846 status->rate_idx,
2847 status->rate_idx))
2848 goto drop;
2849 } else {
2850 if (WARN_ON(status->rate_idx < 0 ||
2851 status->rate_idx >= sband->n_bitrates))
2852 goto drop;
2853 rate = &sband->bitrates[status->rate_idx];
2854 }
2636 } 2855 }
2637 2856
2857 status->rx_flags = 0;
2858
2638 /* 2859 /*
2639 * key references and virtual interfaces are protected using RCU 2860 * key references and virtual interfaces are protected using RCU
2640 * and this requires that we are in a read-side RCU section during 2861 * and this requires that we are in a read-side RCU section during
@@ -2654,7 +2875,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
2654 return; 2875 return;
2655 } 2876 }
2656 2877
2657 __ieee80211_rx_handle_packet(hw, skb, rate); 2878 __ieee80211_rx_handle_packet(hw, skb);
2658 2879
2659 rcu_read_unlock(); 2880 rcu_read_unlock();
2660 2881
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 872d7b6ef6b3..fb274db77e3c 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -242,20 +242,19 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
242 local->hw_scan_req->n_channels = n_chans; 242 local->hw_scan_req->n_channels = n_chans;
243 243
244 ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie, 244 ielen = ieee80211_build_preq_ies(local, (u8 *)local->hw_scan_req->ie,
245 req->ie, req->ie_len, band); 245 req->ie, req->ie_len, band, (u32) -1,
246 0);
246 local->hw_scan_req->ie_len = ielen; 247 local->hw_scan_req->ie_len = ielen;
247 248
248 return true; 249 return true;
249} 250}
250 251
251void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) 252static bool __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted,
253 bool was_hw_scan)
252{ 254{
253 struct ieee80211_local *local = hw_to_local(hw); 255 struct ieee80211_local *local = hw_to_local(hw);
254 bool was_hw_scan;
255
256 trace_api_scan_completed(local, aborted);
257 256
258 mutex_lock(&local->scan_mtx); 257 lockdep_assert_held(&local->mtx);
259 258
260 /* 259 /*
261 * It's ok to abort a not-yet-running scan (that 260 * It's ok to abort a not-yet-running scan (that
@@ -266,17 +265,13 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
266 if (WARN_ON(!local->scanning && !aborted)) 265 if (WARN_ON(!local->scanning && !aborted))
267 aborted = true; 266 aborted = true;
268 267
269 if (WARN_ON(!local->scan_req)) { 268 if (WARN_ON(!local->scan_req))
270 mutex_unlock(&local->scan_mtx); 269 return false;
271 return;
272 }
273 270
274 was_hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning);
275 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) { 271 if (was_hw_scan && !aborted && ieee80211_prep_hw_scan(local)) {
276 ieee80211_queue_delayed_work(&local->hw, 272 int rc = drv_hw_scan(local, local->scan_sdata, local->hw_scan_req);
277 &local->scan_work, 0); 273 if (rc == 0)
278 mutex_unlock(&local->scan_mtx); 274 return false;
279 return;
280 } 275 }
281 276
282 kfree(local->hw_scan_req); 277 kfree(local->hw_scan_req);
@@ -290,26 +285,42 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
290 local->scanning = 0; 285 local->scanning = 0;
291 local->scan_channel = NULL; 286 local->scan_channel = NULL;
292 287
293 /* we only have to protect scan_req and hw/sw scan */ 288 return true;
294 mutex_unlock(&local->scan_mtx); 289}
295
296 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
297 if (was_hw_scan)
298 goto done;
299
300 ieee80211_configure_filter(local);
301 290
302 drv_sw_scan_complete(local); 291static void __ieee80211_scan_completed_finish(struct ieee80211_hw *hw,
292 bool was_hw_scan)
293{
294 struct ieee80211_local *local = hw_to_local(hw);
303 295
304 ieee80211_offchannel_return(local, true); 296 ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
297 if (!was_hw_scan) {
298 ieee80211_configure_filter(local);
299 drv_sw_scan_complete(local);
300 ieee80211_offchannel_return(local, true);
301 }
305 302
306 done: 303 mutex_lock(&local->mtx);
307 ieee80211_recalc_idle(local); 304 ieee80211_recalc_idle(local);
305 mutex_unlock(&local->mtx);
306
308 ieee80211_mlme_notify_scan_completed(local); 307 ieee80211_mlme_notify_scan_completed(local);
309 ieee80211_ibss_notify_scan_completed(local); 308 ieee80211_ibss_notify_scan_completed(local);
310 ieee80211_mesh_notify_scan_completed(local); 309 ieee80211_mesh_notify_scan_completed(local);
311 ieee80211_queue_work(&local->hw, &local->work_work); 310 ieee80211_queue_work(&local->hw, &local->work_work);
312} 311}
312
313void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
314{
315 struct ieee80211_local *local = hw_to_local(hw);
316
317 trace_api_scan_completed(local, aborted);
318
319 set_bit(SCAN_COMPLETED, &local->scanning);
320 if (aborted)
321 set_bit(SCAN_ABORTED, &local->scanning);
322 ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0);
323}
313EXPORT_SYMBOL(ieee80211_scan_completed); 324EXPORT_SYMBOL(ieee80211_scan_completed);
314 325
315static int ieee80211_start_sw_scan(struct ieee80211_local *local) 326static int ieee80211_start_sw_scan(struct ieee80211_local *local)
@@ -353,6 +364,8 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
353 struct ieee80211_local *local = sdata->local; 364 struct ieee80211_local *local = sdata->local;
354 int rc; 365 int rc;
355 366
367 lockdep_assert_held(&local->mtx);
368
356 if (local->scan_req) 369 if (local->scan_req)
357 return -EBUSY; 370 return -EBUSY;
358 371
@@ -434,8 +447,8 @@ ieee80211_scan_get_channel_time(struct ieee80211_channel *chan)
434 return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME; 447 return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME;
435} 448}
436 449
437static int ieee80211_scan_state_decision(struct ieee80211_local *local, 450static void ieee80211_scan_state_decision(struct ieee80211_local *local,
438 unsigned long *next_delay) 451 unsigned long *next_delay)
439{ 452{
440 bool associated = false; 453 bool associated = false;
441 bool tx_empty = true; 454 bool tx_empty = true;
@@ -445,12 +458,6 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
445 struct ieee80211_sub_if_data *sdata; 458 struct ieee80211_sub_if_data *sdata;
446 struct ieee80211_channel *next_chan; 459 struct ieee80211_channel *next_chan;
447 460
448 /* if no more bands/channels left, complete scan and advance to the idle state */
449 if (local->scan_channel_idx >= local->scan_req->n_channels) {
450 ieee80211_scan_completed(&local->hw, false);
451 return 1;
452 }
453
454 /* 461 /*
455 * check if at least one STA interface is associated, 462 * check if at least one STA interface is associated,
456 * check if at least one STA interface has pending tx frames 463 * check if at least one STA interface has pending tx frames
@@ -522,7 +529,6 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
522 } 529 }
523 530
524 *next_delay = 0; 531 *next_delay = 0;
525 return 0;
526} 532}
527 533
528static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local, 534static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *local,
@@ -638,21 +644,18 @@ void ieee80211_scan_work(struct work_struct *work)
638 container_of(work, struct ieee80211_local, scan_work.work); 644 container_of(work, struct ieee80211_local, scan_work.work);
639 struct ieee80211_sub_if_data *sdata = local->scan_sdata; 645 struct ieee80211_sub_if_data *sdata = local->scan_sdata;
640 unsigned long next_delay = 0; 646 unsigned long next_delay = 0;
647 bool aborted, hw_scan, finish;
641 648
642 mutex_lock(&local->scan_mtx); 649 mutex_lock(&local->mtx);
643 if (!sdata || !local->scan_req) {
644 mutex_unlock(&local->scan_mtx);
645 return;
646 }
647 650
648 if (local->hw_scan_req) { 651 if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) {
649 int rc = drv_hw_scan(local, sdata, local->hw_scan_req); 652 aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning);
650 mutex_unlock(&local->scan_mtx); 653 goto out_complete;
651 if (rc)
652 ieee80211_scan_completed(&local->hw, true);
653 return;
654 } 654 }
655 655
656 if (!sdata || !local->scan_req)
657 goto out;
658
656 if (local->scan_req && !local->scanning) { 659 if (local->scan_req && !local->scanning) {
657 struct cfg80211_scan_request *req = local->scan_req; 660 struct cfg80211_scan_request *req = local->scan_req;
658 int rc; 661 int rc;
@@ -661,21 +664,21 @@ void ieee80211_scan_work(struct work_struct *work)
661 local->scan_sdata = NULL; 664 local->scan_sdata = NULL;
662 665
663 rc = __ieee80211_start_scan(sdata, req); 666 rc = __ieee80211_start_scan(sdata, req);
664 mutex_unlock(&local->scan_mtx); 667 if (rc) {
665 668 /* need to complete scan in cfg80211 */
666 if (rc) 669 local->scan_req = req;
667 ieee80211_scan_completed(&local->hw, true); 670 aborted = true;
668 return; 671 goto out_complete;
672 } else
673 goto out;
669 } 674 }
670 675
671 mutex_unlock(&local->scan_mtx);
672
673 /* 676 /*
674 * Avoid re-scheduling when the sdata is going away. 677 * Avoid re-scheduling when the sdata is going away.
675 */ 678 */
676 if (!ieee80211_sdata_running(sdata)) { 679 if (!ieee80211_sdata_running(sdata)) {
677 ieee80211_scan_completed(&local->hw, true); 680 aborted = true;
678 return; 681 goto out_complete;
679 } 682 }
680 683
681 /* 684 /*
@@ -685,8 +688,12 @@ void ieee80211_scan_work(struct work_struct *work)
685 do { 688 do {
686 switch (local->next_scan_state) { 689 switch (local->next_scan_state) {
687 case SCAN_DECISION: 690 case SCAN_DECISION:
688 if (ieee80211_scan_state_decision(local, &next_delay)) 691 /* if no more bands/channels left, complete scan */
689 return; 692 if (local->scan_channel_idx >= local->scan_req->n_channels) {
693 aborted = false;
694 goto out_complete;
695 }
696 ieee80211_scan_state_decision(local, &next_delay);
690 break; 697 break;
691 case SCAN_SET_CHANNEL: 698 case SCAN_SET_CHANNEL:
692 ieee80211_scan_state_set_channel(local, &next_delay); 699 ieee80211_scan_state_set_channel(local, &next_delay);
@@ -704,6 +711,19 @@ void ieee80211_scan_work(struct work_struct *work)
704 } while (next_delay == 0); 711 } while (next_delay == 0);
705 712
706 ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay); 713 ieee80211_queue_delayed_work(&local->hw, &local->scan_work, next_delay);
714 mutex_unlock(&local->mtx);
715 return;
716
717out_complete:
718 hw_scan = test_bit(SCAN_HW_SCANNING, &local->scanning);
719 finish = __ieee80211_scan_completed(&local->hw, aborted, hw_scan);
720 mutex_unlock(&local->mtx);
721 if (finish)
722 __ieee80211_scan_completed_finish(&local->hw, hw_scan);
723 return;
724
725out:
726 mutex_unlock(&local->mtx);
707} 727}
708 728
709int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, 729int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
@@ -711,9 +731,9 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
711{ 731{
712 int res; 732 int res;
713 733
714 mutex_lock(&sdata->local->scan_mtx); 734 mutex_lock(&sdata->local->mtx);
715 res = __ieee80211_start_scan(sdata, req); 735 res = __ieee80211_start_scan(sdata, req);
716 mutex_unlock(&sdata->local->scan_mtx); 736 mutex_unlock(&sdata->local->mtx);
717 737
718 return res; 738 return res;
719} 739}
@@ -726,7 +746,7 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
726 int ret = -EBUSY; 746 int ret = -EBUSY;
727 enum ieee80211_band band; 747 enum ieee80211_band band;
728 748
729 mutex_lock(&local->scan_mtx); 749 mutex_lock(&local->mtx);
730 750
731 /* busy scanning */ 751 /* busy scanning */
732 if (local->scan_req) 752 if (local->scan_req)
@@ -761,25 +781,44 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
761 781
762 ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req); 782 ret = __ieee80211_start_scan(sdata, sdata->local->int_scan_req);
763 unlock: 783 unlock:
764 mutex_unlock(&local->scan_mtx); 784 mutex_unlock(&local->mtx);
765 return ret; 785 return ret;
766} 786}
767 787
788/*
789 * Only call this function when a scan can't be queued -- under RTNL.
790 */
768void ieee80211_scan_cancel(struct ieee80211_local *local) 791void ieee80211_scan_cancel(struct ieee80211_local *local)
769{ 792{
770 bool abortscan; 793 bool abortscan;
771 794 bool finish = false;
772 cancel_delayed_work_sync(&local->scan_work);
773 795
774 /* 796 /*
775 * Only call this function when a scan can't be 797 * We are only canceling software scan, or deferred scan that was not
776 * queued -- mostly at suspend under RTNL. 798 * yet really started (see __ieee80211_start_scan ).
799 *
800 * Regarding hardware scan:
801 * - we can not call __ieee80211_scan_completed() as when
802 * SCAN_HW_SCANNING bit is set this function change
803 * local->hw_scan_req to operate on 5G band, what race with
804 * driver which can use local->hw_scan_req
805 *
806 * - we can not cancel scan_work since driver can schedule it
807 * by ieee80211_scan_completed(..., true) to finish scan
808 *
809 * Hence low lever driver is responsible for canceling HW scan.
777 */ 810 */
778 mutex_lock(&local->scan_mtx);
779 abortscan = test_bit(SCAN_SW_SCANNING, &local->scanning) ||
780 (!local->scanning && local->scan_req);
781 mutex_unlock(&local->scan_mtx);
782 811
812 mutex_lock(&local->mtx);
813 abortscan = local->scan_req && !test_bit(SCAN_HW_SCANNING, &local->scanning);
783 if (abortscan) 814 if (abortscan)
784 ieee80211_scan_completed(&local->hw, true); 815 finish = __ieee80211_scan_completed(&local->hw, true, false);
816 mutex_unlock(&local->mtx);
817
818 if (abortscan) {
819 /* The scan is canceled, but stop work from being pending */
820 cancel_delayed_work_sync(&local->scan_work);
821 }
822 if (finish)
823 __ieee80211_scan_completed_finish(&local->hw, false);
785} 824}
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 6d86f0c1ad04..6d8f897d8763 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -125,7 +125,7 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
125 lockdep_is_held(&local->sta_mtx)); 125 lockdep_is_held(&local->sta_mtx));
126 while (sta) { 126 while (sta) {
127 if ((sta->sdata == sdata || 127 if ((sta->sdata == sdata ||
128 sta->sdata->bss == sdata->bss) && 128 (sta->sdata->bss && sta->sdata->bss == sdata->bss)) &&
129 memcmp(sta->sta.addr, addr, ETH_ALEN) == 0) 129 memcmp(sta->sta.addr, addr, ETH_ALEN) == 0)
130 break; 130 break;
131 sta = rcu_dereference_check(sta->hnext, 131 sta = rcu_dereference_check(sta->hnext,
@@ -174,8 +174,7 @@ static void __sta_info_free(struct ieee80211_local *local,
174 } 174 }
175 175
176#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 176#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
177 printk(KERN_DEBUG "%s: Destroyed STA %pM\n", 177 wiphy_debug(local->hw.wiphy, "Destroyed STA %pM\n", sta->sta.addr);
178 wiphy_name(local->hw.wiphy), sta->sta.addr);
179#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 178#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
180 179
181 kfree(sta); 180 kfree(sta);
@@ -262,8 +261,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
262 sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX); 261 sta->last_seq_ctrl[i] = cpu_to_le16(USHRT_MAX);
263 262
264#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 263#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
265 printk(KERN_DEBUG "%s: Allocated STA %pM\n", 264 wiphy_debug(local->hw.wiphy, "Allocated STA %pM\n", sta->sta.addr);
266 wiphy_name(local->hw.wiphy), sta->sta.addr);
267#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 265#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
268 266
269#ifdef CONFIG_MAC80211_MESH 267#ifdef CONFIG_MAC80211_MESH
@@ -282,7 +280,7 @@ static int sta_info_finish_insert(struct sta_info *sta, bool async)
282 unsigned long flags; 280 unsigned long flags;
283 int err = 0; 281 int err = 0;
284 282
285 WARN_ON(!mutex_is_locked(&local->sta_mtx)); 283 lockdep_assert_held(&local->sta_mtx);
286 284
287 /* notify driver */ 285 /* notify driver */
288 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) 286 if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
@@ -300,8 +298,9 @@ static int sta_info_finish_insert(struct sta_info *sta, bool async)
300 sta->uploaded = true; 298 sta->uploaded = true;
301#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 299#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
302 if (async) 300 if (async)
303 printk(KERN_DEBUG "%s: Finished adding IBSS STA %pM\n", 301 wiphy_debug(local->hw.wiphy,
304 wiphy_name(local->hw.wiphy), sta->sta.addr); 302 "Finished adding IBSS STA %pM\n",
303 sta->sta.addr);
305#endif 304#endif
306 } 305 }
307 306
@@ -411,8 +410,8 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
411 spin_unlock_irqrestore(&local->sta_lock, flags); 410 spin_unlock_irqrestore(&local->sta_lock, flags);
412 411
413#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 412#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
414 printk(KERN_DEBUG "%s: Added IBSS STA %pM\n", 413 wiphy_debug(local->hw.wiphy, "Added IBSS STA %pM\n",
415 wiphy_name(local->hw.wiphy), sta->sta.addr); 414 sta->sta.addr);
416#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 415#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
417 416
418 ieee80211_queue_work(&local->hw, &local->sta_finish_work); 417 ieee80211_queue_work(&local->hw, &local->sta_finish_work);
@@ -459,8 +458,7 @@ int sta_info_insert_rcu(struct sta_info *sta) __acquires(RCU)
459 } 458 }
460 459
461#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 460#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
462 printk(KERN_DEBUG "%s: Inserted STA %pM\n", 461 wiphy_debug(local->hw.wiphy, "Inserted STA %pM\n", sta->sta.addr);
463 wiphy_name(local->hw.wiphy), sta->sta.addr);
464#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 462#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
465 463
466 /* move reference to rcu-protected */ 464 /* move reference to rcu-protected */
@@ -618,7 +616,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
618 struct ieee80211_sub_if_data *sdata; 616 struct ieee80211_sub_if_data *sdata;
619 struct sk_buff *skb; 617 struct sk_buff *skb;
620 unsigned long flags; 618 unsigned long flags;
621 int ret; 619 int ret, i;
622 620
623 might_sleep(); 621 might_sleep();
624 622
@@ -635,7 +633,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
635 * will be sufficient. 633 * will be sufficient.
636 */ 634 */
637 set_sta_flags(sta, WLAN_STA_BLOCK_BA); 635 set_sta_flags(sta, WLAN_STA_BLOCK_BA);
638 ieee80211_sta_tear_down_BA_sessions(sta); 636 ieee80211_sta_tear_down_BA_sessions(sta, true);
639 637
640 spin_lock_irqsave(&local->sta_lock, flags); 638 spin_lock_irqsave(&local->sta_lock, flags);
641 ret = sta_info_hash_del(local, sta); 639 ret = sta_info_hash_del(local, sta);
@@ -646,10 +644,10 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
646 if (ret) 644 if (ret)
647 return ret; 645 return ret;
648 646
649 if (sta->key) { 647 for (i = 0; i < NUM_DEFAULT_KEYS; i++)
650 ieee80211_key_free(local, sta->key); 648 ieee80211_key_free(local, sta->gtk[i]);
651 WARN_ON(sta->key); 649 if (sta->ptk)
652 } 650 ieee80211_key_free(local, sta->ptk);
653 651
654 sta->dead = true; 652 sta->dead = true;
655 653
@@ -690,8 +688,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
690#endif 688#endif
691 689
692#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 690#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
693 printk(KERN_DEBUG "%s: Removed STA %pM\n", 691 wiphy_debug(local->hw.wiphy, "Removed STA %pM\n", sta->sta.addr);
694 wiphy_name(local->hw.wiphy), sta->sta.addr);
695#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ 692#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */
696 cancel_work_sync(&sta->drv_unblock_wk); 693 cancel_work_sync(&sta->drv_unblock_wk);
697 694
@@ -841,13 +838,20 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata,
841 mutex_unlock(&local->sta_mtx); 838 mutex_unlock(&local->sta_mtx);
842} 839}
843 840
844struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, 841struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw,
845 const u8 *addr) 842 const u8 *addr,
843 const u8 *localaddr)
846{ 844{
847 struct sta_info *sta, *nxt; 845 struct sta_info *sta, *nxt;
848 846
849 /* Just return a random station ... first in list ... */ 847 /*
848 * Just return a random station if localaddr is NULL
849 * ... first in list.
850 */
850 for_each_sta_info(hw_to_local(hw), addr, sta, nxt) { 851 for_each_sta_info(hw_to_local(hw), addr, sta, nxt) {
852 if (localaddr &&
853 compare_ether_addr(sta->sdata->vif.addr, localaddr) != 0)
854 continue;
851 if (!sta->uploaded) 855 if (!sta->uploaded)
852 return NULL; 856 return NULL;
853 return &sta->sta; 857 return &sta->sta;
@@ -855,7 +859,7 @@ struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw,
855 859
856 return NULL; 860 return NULL;
857} 861}
858EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw); 862EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_ifaddr);
859 863
860struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, 864struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif,
861 const u8 *addr) 865 const u8 *addr)
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 54262e72376d..9265acadef32 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -79,6 +79,7 @@ enum ieee80211_sta_info_flags {
79 * @dialog_token: dialog token for aggregation session 79 * @dialog_token: dialog token for aggregation session
80 * @state: session state (see above) 80 * @state: session state (see above)
81 * @stop_initiator: initiator of a session stop 81 * @stop_initiator: initiator of a session stop
82 * @tx_stop: TX DelBA frame when stopping
82 * 83 *
83 * This structure is protected by RCU and the per-station 84 * This structure is protected by RCU and the per-station
84 * spinlock. Assignments to the array holding it must hold 85 * spinlock. Assignments to the array holding it must hold
@@ -95,6 +96,7 @@ struct tid_ampdu_tx {
95 unsigned long state; 96 unsigned long state;
96 u8 dialog_token; 97 u8 dialog_token;
97 u8 stop_initiator; 98 u8 stop_initiator;
99 bool tx_stop;
98}; 100};
99 101
100/** 102/**
@@ -103,6 +105,7 @@ struct tid_ampdu_tx {
103 * @reorder_buf: buffer to reorder incoming aggregated MPDUs 105 * @reorder_buf: buffer to reorder incoming aggregated MPDUs
104 * @reorder_time: jiffies when skb was added 106 * @reorder_time: jiffies when skb was added
105 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value) 107 * @session_timer: check if peer keeps Tx-ing on the TID (by timeout value)
108 * @reorder_timer: releases expired frames from the reorder buffer.
106 * @head_seq_num: head sequence number in reordering buffer. 109 * @head_seq_num: head sequence number in reordering buffer.
107 * @stored_mpdu_num: number of MPDUs in reordering buffer 110 * @stored_mpdu_num: number of MPDUs in reordering buffer
108 * @ssn: Starting Sequence Number expected to be aggregated. 111 * @ssn: Starting Sequence Number expected to be aggregated.
@@ -110,20 +113,25 @@ struct tid_ampdu_tx {
110 * @timeout: reset timer value (in TUs). 113 * @timeout: reset timer value (in TUs).
111 * @dialog_token: dialog token for aggregation session 114 * @dialog_token: dialog token for aggregation session
112 * @rcu_head: RCU head used for freeing this struct 115 * @rcu_head: RCU head used for freeing this struct
116 * @reorder_lock: serializes access to reorder buffer, see below.
113 * 117 *
114 * This structure is protected by RCU and the per-station 118 * This structure is protected by RCU and the per-station
115 * spinlock. Assignments to the array holding it must hold 119 * spinlock. Assignments to the array holding it must hold
116 * the spinlock, only the RX path can access it under RCU 120 * the spinlock.
117 * lock-free. The RX path, since it is single-threaded, 121 *
118 * can even modify the structure without locking since the 122 * The @reorder_lock is used to protect the variables and
119 * only other modifications to it are done when the struct 123 * arrays such as @reorder_buf, @reorder_time, @head_seq_num,
120 * can not yet or no longer be found by the RX path. 124 * @stored_mpdu_num and @reorder_time from being corrupted by
125 * concurrent access of the RX path and the expired frame
126 * release timer.
121 */ 127 */
122struct tid_ampdu_rx { 128struct tid_ampdu_rx {
123 struct rcu_head rcu_head; 129 struct rcu_head rcu_head;
130 spinlock_t reorder_lock;
124 struct sk_buff **reorder_buf; 131 struct sk_buff **reorder_buf;
125 unsigned long *reorder_time; 132 unsigned long *reorder_time;
126 struct timer_list session_timer; 133 struct timer_list session_timer;
134 struct timer_list reorder_timer;
127 u16 head_seq_num; 135 u16 head_seq_num;
128 u16 stored_mpdu_num; 136 u16 stored_mpdu_num;
129 u16 ssn; 137 u16 ssn;
@@ -191,7 +199,8 @@ enum plink_state {
191 * @hnext: hash table linked list pointer 199 * @hnext: hash table linked list pointer
192 * @local: pointer to the global information 200 * @local: pointer to the global information
193 * @sdata: virtual interface this station belongs to 201 * @sdata: virtual interface this station belongs to
194 * @key: peer key negotiated with this station, if any 202 * @ptk: peer key negotiated with this station, if any
203 * @gtk: group keys negotiated with this station, if any
195 * @rate_ctrl: rate control algorithm reference 204 * @rate_ctrl: rate control algorithm reference
196 * @rate_ctrl_priv: rate control private per-STA pointer 205 * @rate_ctrl_priv: rate control private per-STA pointer
197 * @last_tx_rate: rate used for last transmit, to report to userspace as 206 * @last_tx_rate: rate used for last transmit, to report to userspace as
@@ -246,7 +255,8 @@ struct sta_info {
246 struct sta_info *hnext; 255 struct sta_info *hnext;
247 struct ieee80211_local *local; 256 struct ieee80211_local *local;
248 struct ieee80211_sub_if_data *sdata; 257 struct ieee80211_sub_if_data *sdata;
249 struct ieee80211_key *key; 258 struct ieee80211_key *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
259 struct ieee80211_key *ptk;
250 struct rate_control_ref *rate_ctrl; 260 struct rate_control_ref *rate_ctrl;
251 void *rate_ctrl_priv; 261 void *rate_ctrl_priv;
252 spinlock_t lock; 262 spinlock_t lock;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 34da67995d94..3153c19893b8 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -58,6 +58,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
58 info->control.vif = &sta->sdata->vif; 58 info->control.vif = &sta->sdata->vif;
59 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING | 59 info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING |
60 IEEE80211_TX_INTFL_RETRANSMISSION; 60 IEEE80211_TX_INTFL_RETRANSMISSION;
61 info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS;
61 62
62 sta->tx_filtered_count++; 63 sta->tx_filtered_count++;
63 64
@@ -114,11 +115,10 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
114 115
115#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 116#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
116 if (net_ratelimit()) 117 if (net_ratelimit())
117 printk(KERN_DEBUG "%s: dropped TX filtered frame, " 118 wiphy_debug(local->hw.wiphy,
118 "queue_len=%d PS=%d @%lu\n", 119 "dropped TX filtered frame, queue_len=%d PS=%d @%lu\n",
119 wiphy_name(local->hw.wiphy), 120 skb_queue_len(&sta->tx_filtered),
120 skb_queue_len(&sta->tx_filtered), 121 !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies);
121 !!test_sta_flags(sta, WLAN_STA_PS_STA), jiffies);
122#endif 122#endif
123 dev_kfree_skb(skb); 123 dev_kfree_skb(skb);
124} 124}
@@ -176,7 +176,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
176 176
177 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { 177 for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
178 /* the HW cannot have attempted that rate */ 178 /* the HW cannot have attempted that rate */
179 if (i >= hw->max_rates) { 179 if (i >= hw->max_report_rates) {
180 info->status.rates[i].idx = -1; 180 info->status.rates[i].idx = -1;
181 info->status.rates[i].count = 0; 181 info->status.rates[i].count = 0;
182 } else if (info->status.rates[i].idx >= 0) { 182 } else if (info->status.rates[i].idx >= 0) {
@@ -296,7 +296,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
296 } 296 }
297 297
298 if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) 298 if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX)
299 cfg80211_action_tx_status( 299 cfg80211_mgmt_tx_status(
300 skb->dev, (unsigned long) skb, skb->data, skb->len, 300 skb->dev, (unsigned long) skb, skb->data, skb->len,
301 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); 301 !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC);
302 302
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index c54db966926b..96c594309506 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -273,6 +273,9 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
273 */ 273 */
274 return TX_DROP; 274 return TX_DROP;
275 275
276 if (tx->sdata->vif.type == NL80211_IFTYPE_WDS)
277 return TX_CONTINUE;
278
276 if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT) 279 if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
277 return TX_CONTINUE; 280 return TX_CONTINUE;
278 281
@@ -351,8 +354,8 @@ static void purge_old_ps_buffers(struct ieee80211_local *local)
351 354
352 local->total_ps_buffered = total; 355 local->total_ps_buffered = total;
353#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG 356#ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
354 printk(KERN_DEBUG "%s: PS buffers full - purged %d frames\n", 357 wiphy_debug(local->hw.wiphy, "PS buffers full - purged %d frames\n",
355 wiphy_name(local->hw.wiphy), purged); 358 purged);
356#endif 359#endif
357} 360}
358 361
@@ -509,6 +512,18 @@ ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx)
509} 512}
510 513
511static ieee80211_tx_result debug_noinline 514static ieee80211_tx_result debug_noinline
515ieee80211_tx_h_check_control_port_protocol(struct ieee80211_tx_data *tx)
516{
517 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
518
519 if (unlikely(tx->sdata->control_port_protocol == tx->skb->protocol &&
520 tx->sdata->control_port_no_encrypt))
521 info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
522
523 return TX_CONTINUE;
524}
525
526static ieee80211_tx_result debug_noinline
512ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) 527ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
513{ 528{
514 struct ieee80211_key *key = NULL; 529 struct ieee80211_key *key = NULL;
@@ -517,7 +532,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
517 532
518 if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) 533 if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT))
519 tx->key = NULL; 534 tx->key = NULL;
520 else if (tx->sta && (key = rcu_dereference(tx->sta->key))) 535 else if (tx->sta && (key = rcu_dereference(tx->sta->ptk)))
521 tx->key = key; 536 tx->key = key;
522 else if (ieee80211_is_mgmt(hdr->frame_control) && 537 else if (ieee80211_is_mgmt(hdr->frame_control) &&
523 is_multicast_ether_addr(hdr->addr1) && 538 is_multicast_ether_addr(hdr->addr1) &&
@@ -527,7 +542,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
527 else if ((key = rcu_dereference(tx->sdata->default_key))) 542 else if ((key = rcu_dereference(tx->sdata->default_key)))
528 tx->key = key; 543 tx->key = key;
529 else if (tx->sdata->drop_unencrypted && 544 else if (tx->sdata->drop_unencrypted &&
530 (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) && 545 (tx->skb->protocol != tx->sdata->control_port_protocol) &&
531 !(info->flags & IEEE80211_TX_CTL_INJECTED) && 546 !(info->flags & IEEE80211_TX_CTL_INJECTED) &&
532 (!ieee80211_is_robust_mgmt_frame(hdr) || 547 (!ieee80211_is_robust_mgmt_frame(hdr) ||
533 (ieee80211_is_action(hdr->frame_control) && 548 (ieee80211_is_action(hdr->frame_control) &&
@@ -543,15 +558,16 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
543 tx->key->tx_rx_count++; 558 tx->key->tx_rx_count++;
544 /* TODO: add threshold stuff again */ 559 /* TODO: add threshold stuff again */
545 560
546 switch (tx->key->conf.alg) { 561 switch (tx->key->conf.cipher) {
547 case ALG_WEP: 562 case WLAN_CIPHER_SUITE_WEP40:
563 case WLAN_CIPHER_SUITE_WEP104:
548 if (ieee80211_is_auth(hdr->frame_control)) 564 if (ieee80211_is_auth(hdr->frame_control))
549 break; 565 break;
550 case ALG_TKIP: 566 case WLAN_CIPHER_SUITE_TKIP:
551 if (!ieee80211_is_data_present(hdr->frame_control)) 567 if (!ieee80211_is_data_present(hdr->frame_control))
552 tx->key = NULL; 568 tx->key = NULL;
553 break; 569 break;
554 case ALG_CCMP: 570 case WLAN_CIPHER_SUITE_CCMP:
555 if (!ieee80211_is_data_present(hdr->frame_control) && 571 if (!ieee80211_is_data_present(hdr->frame_control) &&
556 !ieee80211_use_mfp(hdr->frame_control, tx->sta, 572 !ieee80211_use_mfp(hdr->frame_control, tx->sta,
557 tx->skb)) 573 tx->skb))
@@ -561,7 +577,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
561 IEEE80211_KEY_FLAG_SW_MGMT) && 577 IEEE80211_KEY_FLAG_SW_MGMT) &&
562 ieee80211_is_mgmt(hdr->frame_control); 578 ieee80211_is_mgmt(hdr->frame_control);
563 break; 579 break;
564 case ALG_AES_CMAC: 580 case WLAN_CIPHER_SUITE_AES_CMAC:
565 if (!ieee80211_is_mgmt(hdr->frame_control)) 581 if (!ieee80211_is_mgmt(hdr->frame_control))
566 tx->key = NULL; 582 tx->key = NULL;
567 break; 583 break;
@@ -946,22 +962,31 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx)
946static ieee80211_tx_result debug_noinline 962static ieee80211_tx_result debug_noinline
947ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) 963ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
948{ 964{
965 struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
966
949 if (!tx->key) 967 if (!tx->key)
950 return TX_CONTINUE; 968 return TX_CONTINUE;
951 969
952 switch (tx->key->conf.alg) { 970 switch (tx->key->conf.cipher) {
953 case ALG_WEP: 971 case WLAN_CIPHER_SUITE_WEP40:
972 case WLAN_CIPHER_SUITE_WEP104:
954 return ieee80211_crypto_wep_encrypt(tx); 973 return ieee80211_crypto_wep_encrypt(tx);
955 case ALG_TKIP: 974 case WLAN_CIPHER_SUITE_TKIP:
956 return ieee80211_crypto_tkip_encrypt(tx); 975 return ieee80211_crypto_tkip_encrypt(tx);
957 case ALG_CCMP: 976 case WLAN_CIPHER_SUITE_CCMP:
958 return ieee80211_crypto_ccmp_encrypt(tx); 977 return ieee80211_crypto_ccmp_encrypt(tx);
959 case ALG_AES_CMAC: 978 case WLAN_CIPHER_SUITE_AES_CMAC:
960 return ieee80211_crypto_aes_cmac_encrypt(tx); 979 return ieee80211_crypto_aes_cmac_encrypt(tx);
980 default:
981 /* handle hw-only algorithm */
982 if (info->control.hw_key) {
983 ieee80211_tx_set_protected(tx);
984 return TX_CONTINUE;
985 }
986 break;
987
961 } 988 }
962 989
963 /* not reached */
964 WARN_ON(1);
965 return TX_DROP; 990 return TX_DROP;
966} 991}
967 992
@@ -1339,6 +1364,7 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
1339 CALL_TXH(ieee80211_tx_h_dynamic_ps); 1364 CALL_TXH(ieee80211_tx_h_dynamic_ps);
1340 CALL_TXH(ieee80211_tx_h_check_assoc); 1365 CALL_TXH(ieee80211_tx_h_check_assoc);
1341 CALL_TXH(ieee80211_tx_h_ps_buf); 1366 CALL_TXH(ieee80211_tx_h_ps_buf);
1367 CALL_TXH(ieee80211_tx_h_check_control_port_protocol);
1342 CALL_TXH(ieee80211_tx_h_select_key); 1368 CALL_TXH(ieee80211_tx_h_select_key);
1343 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)) 1369 if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
1344 CALL_TXH(ieee80211_tx_h_rate_ctrl); 1370 CALL_TXH(ieee80211_tx_h_rate_ctrl);
@@ -1511,8 +1537,8 @@ static int ieee80211_skb_resize(struct ieee80211_local *local,
1511 I802_DEBUG_INC(local->tx_expand_skb_head); 1537 I802_DEBUG_INC(local->tx_expand_skb_head);
1512 1538
1513 if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) { 1539 if (pskb_expand_head(skb, head_need, tail_need, GFP_ATOMIC)) {
1514 printk(KERN_DEBUG "%s: failed to reallocate TX buffer\n", 1540 wiphy_debug(local->hw.wiphy,
1515 wiphy_name(local->hw.wiphy)); 1541 "failed to reallocate TX buffer\n");
1516 return -ENOMEM; 1542 return -ENOMEM;
1517 } 1543 }
1518 1544
@@ -1586,6 +1612,7 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
1586 return; 1612 return;
1587 } 1613 }
1588 1614
1615 hdr = (struct ieee80211_hdr *) skb->data;
1589 info->control.vif = &sdata->vif; 1616 info->control.vif = &sdata->vif;
1590 1617
1591 if (ieee80211_vif_is_mesh(&sdata->vif) && 1618 if (ieee80211_vif_is_mesh(&sdata->vif) &&
@@ -1699,7 +1726,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1699 u16 ethertype, hdrlen, meshhdrlen = 0; 1726 u16 ethertype, hdrlen, meshhdrlen = 0;
1700 __le16 fc; 1727 __le16 fc;
1701 struct ieee80211_hdr hdr; 1728 struct ieee80211_hdr hdr;
1702 struct ieee80211s_hdr mesh_hdr; 1729 struct ieee80211s_hdr mesh_hdr __maybe_unused;
1703 const u8 *encaps_data; 1730 const u8 *encaps_data;
1704 int encaps_len, skip_header_bytes; 1731 int encaps_len, skip_header_bytes;
1705 int nh_pos, h_pos; 1732 int nh_pos, h_pos;
@@ -1816,7 +1843,8 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1816#endif 1843#endif
1817 case NL80211_IFTYPE_STATION: 1844 case NL80211_IFTYPE_STATION:
1818 memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); 1845 memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN);
1819 if (sdata->u.mgd.use_4addr && ethertype != ETH_P_PAE) { 1846 if (sdata->u.mgd.use_4addr &&
1847 cpu_to_be16(ethertype) != sdata->control_port_protocol) {
1820 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); 1848 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS);
1821 /* RA TA DA SA */ 1849 /* RA TA DA SA */
1822 memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN); 1850 memcpy(hdr.addr2, sdata->vif.addr, ETH_ALEN);
@@ -1869,7 +1897,7 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
1869 if (!ieee80211_vif_is_mesh(&sdata->vif) && 1897 if (!ieee80211_vif_is_mesh(&sdata->vif) &&
1870 unlikely(!is_multicast_ether_addr(hdr.addr1) && 1898 unlikely(!is_multicast_ether_addr(hdr.addr1) &&
1871 !(sta_flags & WLAN_STA_AUTHORIZED) && 1899 !(sta_flags & WLAN_STA_AUTHORIZED) &&
1872 !(ethertype == ETH_P_PAE && 1900 !(cpu_to_be16(ethertype) == sdata->control_port_protocol &&
1873 compare_ether_addr(sdata->vif.addr, 1901 compare_ether_addr(sdata->vif.addr,
1874 skb->data + ETH_ALEN) == 0))) { 1902 skb->data + ETH_ALEN) == 0))) {
1875#ifdef CONFIG_MAC80211_VERBOSE_DEBUG 1903#ifdef CONFIG_MAC80211_VERBOSE_DEBUG
@@ -2068,8 +2096,7 @@ void ieee80211_tx_pending(unsigned long data)
2068 2096
2069 if (skb_queue_empty(&local->pending[i])) 2097 if (skb_queue_empty(&local->pending[i]))
2070 list_for_each_entry_rcu(sdata, &local->interfaces, list) 2098 list_for_each_entry_rcu(sdata, &local->interfaces, list)
2071 netif_tx_wake_queue( 2099 netif_wake_subqueue(sdata->dev, i);
2072 netdev_get_tx_queue(sdata->dev, i));
2073 } 2100 }
2074 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); 2101 spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
2075 2102
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 748387d45bc0..0b6fc92bc0d7 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -283,8 +283,11 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
283 283
284 if (skb_queue_empty(&local->pending[queue])) { 284 if (skb_queue_empty(&local->pending[queue])) {
285 rcu_read_lock(); 285 rcu_read_lock();
286 list_for_each_entry_rcu(sdata, &local->interfaces, list) 286 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
287 netif_tx_wake_queue(netdev_get_tx_queue(sdata->dev, queue)); 287 if (test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
288 continue;
289 netif_wake_subqueue(sdata->dev, queue);
290 }
288 rcu_read_unlock(); 291 rcu_read_unlock();
289 } else 292 } else
290 tasklet_schedule(&local->tx_pending_tasklet); 293 tasklet_schedule(&local->tx_pending_tasklet);
@@ -323,7 +326,7 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
323 326
324 rcu_read_lock(); 327 rcu_read_lock();
325 list_for_each_entry_rcu(sdata, &local->interfaces, list) 328 list_for_each_entry_rcu(sdata, &local->interfaces, list)
326 netif_tx_stop_queue(netdev_get_tx_queue(sdata->dev, queue)); 329 netif_stop_subqueue(sdata->dev, queue);
327 rcu_read_unlock(); 330 rcu_read_unlock();
328} 331}
329 332
@@ -471,16 +474,10 @@ void ieee80211_iterate_active_interfaces(
471 474
472 list_for_each_entry(sdata, &local->interfaces, list) { 475 list_for_each_entry(sdata, &local->interfaces, list) {
473 switch (sdata->vif.type) { 476 switch (sdata->vif.type) {
474 case __NL80211_IFTYPE_AFTER_LAST:
475 case NL80211_IFTYPE_UNSPECIFIED:
476 case NL80211_IFTYPE_MONITOR: 477 case NL80211_IFTYPE_MONITOR:
477 case NL80211_IFTYPE_AP_VLAN: 478 case NL80211_IFTYPE_AP_VLAN:
478 continue; 479 continue;
479 case NL80211_IFTYPE_AP: 480 default:
480 case NL80211_IFTYPE_STATION:
481 case NL80211_IFTYPE_ADHOC:
482 case NL80211_IFTYPE_WDS:
483 case NL80211_IFTYPE_MESH_POINT:
484 break; 481 break;
485 } 482 }
486 if (ieee80211_sdata_running(sdata)) 483 if (ieee80211_sdata_running(sdata))
@@ -505,16 +502,10 @@ void ieee80211_iterate_active_interfaces_atomic(
505 502
506 list_for_each_entry_rcu(sdata, &local->interfaces, list) { 503 list_for_each_entry_rcu(sdata, &local->interfaces, list) {
507 switch (sdata->vif.type) { 504 switch (sdata->vif.type) {
508 case __NL80211_IFTYPE_AFTER_LAST:
509 case NL80211_IFTYPE_UNSPECIFIED:
510 case NL80211_IFTYPE_MONITOR: 505 case NL80211_IFTYPE_MONITOR:
511 case NL80211_IFTYPE_AP_VLAN: 506 case NL80211_IFTYPE_AP_VLAN:
512 continue; 507 continue;
513 case NL80211_IFTYPE_AP: 508 default:
514 case NL80211_IFTYPE_STATION:
515 case NL80211_IFTYPE_ADHOC:
516 case NL80211_IFTYPE_WDS:
517 case NL80211_IFTYPE_MESH_POINT:
518 break; 509 break;
519 } 510 }
520 if (ieee80211_sdata_running(sdata)) 511 if (ieee80211_sdata_running(sdata))
@@ -904,26 +895,34 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata,
904 895
905int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer, 896int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
906 const u8 *ie, size_t ie_len, 897 const u8 *ie, size_t ie_len,
907 enum ieee80211_band band) 898 enum ieee80211_band band, u32 rate_mask,
899 u8 channel)
908{ 900{
909 struct ieee80211_supported_band *sband; 901 struct ieee80211_supported_band *sband;
910 u8 *pos; 902 u8 *pos;
911 size_t offset = 0, noffset; 903 size_t offset = 0, noffset;
912 int supp_rates_len, i; 904 int supp_rates_len, i;
905 u8 rates[32];
906 int num_rates;
907 int ext_rates_len;
913 908
914 sband = local->hw.wiphy->bands[band]; 909 sband = local->hw.wiphy->bands[band];
915 910
916 pos = buffer; 911 pos = buffer;
917 912
918 supp_rates_len = min_t(int, sband->n_bitrates, 8); 913 num_rates = 0;
914 for (i = 0; i < sband->n_bitrates; i++) {
915 if ((BIT(i) & rate_mask) == 0)
916 continue; /* skip rate */
917 rates[num_rates++] = (u8) (sband->bitrates[i].bitrate / 5);
918 }
919
920 supp_rates_len = min_t(int, num_rates, 8);
919 921
920 *pos++ = WLAN_EID_SUPP_RATES; 922 *pos++ = WLAN_EID_SUPP_RATES;
921 *pos++ = supp_rates_len; 923 *pos++ = supp_rates_len;
922 924 memcpy(pos, rates, supp_rates_len);
923 for (i = 0; i < supp_rates_len; i++) { 925 pos += supp_rates_len;
924 int rate = sband->bitrates[i].bitrate;
925 *pos++ = (u8) (rate / 5);
926 }
927 926
928 /* insert "request information" if in custom IEs */ 927 /* insert "request information" if in custom IEs */
929 if (ie && ie_len) { 928 if (ie && ie_len) {
@@ -941,14 +940,18 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
941 offset = noffset; 940 offset = noffset;
942 } 941 }
943 942
944 if (sband->n_bitrates > i) { 943 ext_rates_len = num_rates - supp_rates_len;
944 if (ext_rates_len > 0) {
945 *pos++ = WLAN_EID_EXT_SUPP_RATES; 945 *pos++ = WLAN_EID_EXT_SUPP_RATES;
946 *pos++ = sband->n_bitrates - i; 946 *pos++ = ext_rates_len;
947 memcpy(pos, rates + supp_rates_len, ext_rates_len);
948 pos += ext_rates_len;
949 }
947 950
948 for (; i < sband->n_bitrates; i++) { 951 if (channel && sband->band == IEEE80211_BAND_2GHZ) {
949 int rate = sband->bitrates[i].bitrate; 952 *pos++ = WLAN_EID_DS_PARAMS;
950 *pos++ = (u8) (rate / 5); 953 *pos++ = 1;
951 } 954 *pos++ = channel;
952 } 955 }
953 956
954 /* insert custom IEs that go before HT */ 957 /* insert custom IEs that go before HT */
@@ -1017,6 +1020,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1017 struct ieee80211_mgmt *mgmt; 1020 struct ieee80211_mgmt *mgmt;
1018 size_t buf_len; 1021 size_t buf_len;
1019 u8 *buf; 1022 u8 *buf;
1023 u8 chan;
1020 1024
1021 /* FIXME: come up with a proper value */ 1025 /* FIXME: come up with a proper value */
1022 buf = kmalloc(200 + ie_len, GFP_KERNEL); 1026 buf = kmalloc(200 + ie_len, GFP_KERNEL);
@@ -1026,8 +1030,14 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst,
1026 return; 1030 return;
1027 } 1031 }
1028 1032
1033 chan = ieee80211_frequency_to_channel(
1034 local->hw.conf.channel->center_freq);
1035
1029 buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len, 1036 buf_len = ieee80211_build_preq_ies(local, buf, ie, ie_len,
1030 local->hw.conf.channel->band); 1037 local->hw.conf.channel->band,
1038 sdata->rc_rateidx_mask
1039 [local->hw.conf.channel->band],
1040 chan);
1031 1041
1032 skb = ieee80211_probereq_get(&local->hw, &sdata->vif, 1042 skb = ieee80211_probereq_get(&local->hw, &sdata->vif,
1033 ssid, ssid_len, 1043 ssid, ssid_len,
@@ -1189,7 +1199,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1189 /* ignore virtual */ 1199 /* ignore virtual */
1190 break; 1200 break;
1191 case NL80211_IFTYPE_UNSPECIFIED: 1201 case NL80211_IFTYPE_UNSPECIFIED:
1192 case __NL80211_IFTYPE_AFTER_LAST: 1202 case NUM_NL80211_IFTYPES:
1203 case NL80211_IFTYPE_P2P_CLIENT:
1204 case NL80211_IFTYPE_P2P_GO:
1193 WARN_ON(1); 1205 WARN_ON(1);
1194 break; 1206 break;
1195 } 1207 }
@@ -1209,7 +1221,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
1209 mutex_lock(&local->sta_mtx); 1221 mutex_lock(&local->sta_mtx);
1210 1222
1211 list_for_each_entry(sta, &local->sta_list, list) { 1223 list_for_each_entry(sta, &local->sta_list, list) {
1212 ieee80211_sta_tear_down_BA_sessions(sta); 1224 ieee80211_sta_tear_down_BA_sessions(sta, true);
1213 clear_sta_flags(sta, WLAN_STA_BLOCK_BA); 1225 clear_sta_flags(sta, WLAN_STA_BLOCK_BA);
1214 } 1226 }
1215 1227
@@ -1285,17 +1297,13 @@ static int check_mgd_smps(struct ieee80211_if_managed *ifmgd,
1285} 1297}
1286 1298
1287/* must hold iflist_mtx */ 1299/* must hold iflist_mtx */
1288void ieee80211_recalc_smps(struct ieee80211_local *local, 1300void ieee80211_recalc_smps(struct ieee80211_local *local)
1289 struct ieee80211_sub_if_data *forsdata)
1290{ 1301{
1291 struct ieee80211_sub_if_data *sdata; 1302 struct ieee80211_sub_if_data *sdata;
1292 enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_OFF; 1303 enum ieee80211_smps_mode smps_mode = IEEE80211_SMPS_OFF;
1293 int count = 0; 1304 int count = 0;
1294 1305
1295 if (forsdata) 1306 lockdep_assert_held(&local->iflist_mtx);
1296 WARN_ON(!mutex_is_locked(&forsdata->u.mgd.mtx));
1297
1298 WARN_ON(!mutex_is_locked(&local->iflist_mtx));
1299 1307
1300 /* 1308 /*
1301 * This function could be improved to handle multiple 1309 * This function could be improved to handle multiple
@@ -1308,22 +1316,12 @@ void ieee80211_recalc_smps(struct ieee80211_local *local,
1308 */ 1316 */
1309 1317
1310 list_for_each_entry(sdata, &local->interfaces, list) { 1318 list_for_each_entry(sdata, &local->interfaces, list) {
1311 if (!netif_running(sdata->dev)) 1319 if (!ieee80211_sdata_running(sdata))
1312 continue; 1320 continue;
1313 if (sdata->vif.type != NL80211_IFTYPE_STATION) 1321 if (sdata->vif.type != NL80211_IFTYPE_STATION)
1314 goto set; 1322 goto set;
1315 if (sdata != forsdata) { 1323
1316 /* 1324 count += check_mgd_smps(&sdata->u.mgd, &smps_mode);
1317 * This nested is ok -- we are holding the iflist_mtx
1318 * so can't get here twice or so. But it's required
1319 * since normally we acquire it first and then the
1320 * iflist_mtx.
1321 */
1322 mutex_lock_nested(&sdata->u.mgd.mtx, SINGLE_DEPTH_NESTING);
1323 count += check_mgd_smps(&sdata->u.mgd, &smps_mode);
1324 mutex_unlock(&sdata->u.mgd.mtx);
1325 } else
1326 count += check_mgd_smps(&sdata->u.mgd, &smps_mode);
1327 1325
1328 if (count > 1) { 1326 if (count > 1) {
1329 smps_mode = IEEE80211_SMPS_OFF; 1327 smps_mode = IEEE80211_SMPS_OFF;
diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c
index 9ebc8d8a1f5b..2ff6d1e3ed21 100644
--- a/net/mac80211/wep.c
+++ b/net/mac80211/wep.c
@@ -222,7 +222,7 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
222 struct ieee80211_key *key) 222 struct ieee80211_key *key)
223{ 223{
224 u32 klen; 224 u32 klen;
225 u8 *rc4key; 225 u8 rc4key[3 + WLAN_KEY_LEN_WEP104];
226 u8 keyidx; 226 u8 keyidx;
227 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; 227 struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
228 unsigned int hdrlen; 228 unsigned int hdrlen;
@@ -240,15 +240,11 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
240 240
241 keyidx = skb->data[hdrlen + 3] >> 6; 241 keyidx = skb->data[hdrlen + 3] >> 6;
242 242
243 if (!key || keyidx != key->conf.keyidx || key->conf.alg != ALG_WEP) 243 if (!key || keyidx != key->conf.keyidx)
244 return -1; 244 return -1;
245 245
246 klen = 3 + key->conf.keylen; 246 klen = 3 + key->conf.keylen;
247 247
248 rc4key = kmalloc(klen, GFP_ATOMIC);
249 if (!rc4key)
250 return -1;
251
252 /* Prepend 24-bit IV to RC4 key */ 248 /* Prepend 24-bit IV to RC4 key */
253 memcpy(rc4key, skb->data + hdrlen, 3); 249 memcpy(rc4key, skb->data + hdrlen, 3);
254 250
@@ -260,8 +256,6 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local,
260 len)) 256 len))
261 ret = -1; 257 ret = -1;
262 258
263 kfree(rc4key);
264
265 /* Trim ICV */ 259 /* Trim ICV */
266 skb_trim(skb, skb->len - WEP_ICV_LEN); 260 skb_trim(skb, skb->len - WEP_ICV_LEN);
267 261
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 81d4ad64184a..ae344d1ba056 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -43,7 +43,7 @@ enum work_action {
43/* utils */ 43/* utils */
44static inline void ASSERT_WORK_MTX(struct ieee80211_local *local) 44static inline void ASSERT_WORK_MTX(struct ieee80211_local *local)
45{ 45{
46 WARN_ON(!mutex_is_locked(&local->work_mtx)); 46 lockdep_assert_held(&local->mtx);
47} 47}
48 48
49/* 49/*
@@ -757,7 +757,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
757 mgmt = (struct ieee80211_mgmt *) skb->data; 757 mgmt = (struct ieee80211_mgmt *) skb->data;
758 fc = le16_to_cpu(mgmt->frame_control); 758 fc = le16_to_cpu(mgmt->frame_control);
759 759
760 mutex_lock(&local->work_mtx); 760 mutex_lock(&local->mtx);
761 761
762 list_for_each_entry(wk, &local->work_list, list) { 762 list_for_each_entry(wk, &local->work_list, list) {
763 const u8 *bssid = NULL; 763 const u8 *bssid = NULL;
@@ -833,7 +833,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
833 WARN(1, "unexpected: %d", rma); 833 WARN(1, "unexpected: %d", rma);
834 } 834 }
835 835
836 mutex_unlock(&local->work_mtx); 836 mutex_unlock(&local->mtx);
837 837
838 if (rma != WORK_ACT_DONE) 838 if (rma != WORK_ACT_DONE)
839 goto out; 839 goto out;
@@ -845,9 +845,9 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
845 case WORK_DONE_REQUEUE: 845 case WORK_DONE_REQUEUE:
846 synchronize_rcu(); 846 synchronize_rcu();
847 wk->started = false; /* restart */ 847 wk->started = false; /* restart */
848 mutex_lock(&local->work_mtx); 848 mutex_lock(&local->mtx);
849 list_add_tail(&wk->list, &local->work_list); 849 list_add_tail(&wk->list, &local->work_list);
850 mutex_unlock(&local->work_mtx); 850 mutex_unlock(&local->mtx);
851 } 851 }
852 852
853 out: 853 out:
@@ -888,9 +888,9 @@ static void ieee80211_work_work(struct work_struct *work)
888 while ((skb = skb_dequeue(&local->work_skb_queue))) 888 while ((skb = skb_dequeue(&local->work_skb_queue)))
889 ieee80211_work_rx_queued_mgmt(local, skb); 889 ieee80211_work_rx_queued_mgmt(local, skb);
890 890
891 ieee80211_recalc_idle(local); 891 mutex_lock(&local->mtx);
892 892
893 mutex_lock(&local->work_mtx); 893 ieee80211_recalc_idle(local);
894 894
895 list_for_each_entry_safe(wk, tmp, &local->work_list, list) { 895 list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
896 bool started = wk->started; 896 bool started = wk->started;
@@ -995,20 +995,16 @@ static void ieee80211_work_work(struct work_struct *work)
995 run_again(local, jiffies + HZ/2); 995 run_again(local, jiffies + HZ/2);
996 } 996 }
997 997
998 mutex_lock(&local->scan_mtx);
999
1000 if (list_empty(&local->work_list) && local->scan_req && 998 if (list_empty(&local->work_list) && local->scan_req &&
1001 !local->scanning) 999 !local->scanning)
1002 ieee80211_queue_delayed_work(&local->hw, 1000 ieee80211_queue_delayed_work(&local->hw,
1003 &local->scan_work, 1001 &local->scan_work,
1004 round_jiffies_relative(0)); 1002 round_jiffies_relative(0));
1005 1003
1006 mutex_unlock(&local->scan_mtx);
1007
1008 mutex_unlock(&local->work_mtx);
1009
1010 ieee80211_recalc_idle(local); 1004 ieee80211_recalc_idle(local);
1011 1005
1006 mutex_unlock(&local->mtx);
1007
1012 list_for_each_entry_safe(wk, tmp, &free_work, list) { 1008 list_for_each_entry_safe(wk, tmp, &free_work, list) {
1013 wk->done(wk, NULL); 1009 wk->done(wk, NULL);
1014 list_del(&wk->list); 1010 list_del(&wk->list);
@@ -1035,16 +1031,15 @@ void ieee80211_add_work(struct ieee80211_work *wk)
1035 wk->started = false; 1031 wk->started = false;
1036 1032
1037 local = wk->sdata->local; 1033 local = wk->sdata->local;
1038 mutex_lock(&local->work_mtx); 1034 mutex_lock(&local->mtx);
1039 list_add_tail(&wk->list, &local->work_list); 1035 list_add_tail(&wk->list, &local->work_list);
1040 mutex_unlock(&local->work_mtx); 1036 mutex_unlock(&local->mtx);
1041 1037
1042 ieee80211_queue_work(&local->hw, &local->work_work); 1038 ieee80211_queue_work(&local->hw, &local->work_work);
1043} 1039}
1044 1040
1045void ieee80211_work_init(struct ieee80211_local *local) 1041void ieee80211_work_init(struct ieee80211_local *local)
1046{ 1042{
1047 mutex_init(&local->work_mtx);
1048 INIT_LIST_HEAD(&local->work_list); 1043 INIT_LIST_HEAD(&local->work_list);
1049 setup_timer(&local->work_timer, ieee80211_work_timer, 1044 setup_timer(&local->work_timer, ieee80211_work_timer,
1050 (unsigned long)local); 1045 (unsigned long)local);
@@ -1057,7 +1052,7 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
1057 struct ieee80211_local *local = sdata->local; 1052 struct ieee80211_local *local = sdata->local;
1058 struct ieee80211_work *wk; 1053 struct ieee80211_work *wk;
1059 1054
1060 mutex_lock(&local->work_mtx); 1055 mutex_lock(&local->mtx);
1061 list_for_each_entry(wk, &local->work_list, list) { 1056 list_for_each_entry(wk, &local->work_list, list) {
1062 if (wk->sdata != sdata) 1057 if (wk->sdata != sdata)
1063 continue; 1058 continue;
@@ -1065,19 +1060,19 @@ void ieee80211_work_purge(struct ieee80211_sub_if_data *sdata)
1065 wk->started = true; 1060 wk->started = true;
1066 wk->timeout = jiffies; 1061 wk->timeout = jiffies;
1067 } 1062 }
1068 mutex_unlock(&local->work_mtx); 1063 mutex_unlock(&local->mtx);
1069 1064
1070 /* run cleanups etc. */ 1065 /* run cleanups etc. */
1071 ieee80211_work_work(&local->work_work); 1066 ieee80211_work_work(&local->work_work);
1072 1067
1073 mutex_lock(&local->work_mtx); 1068 mutex_lock(&local->mtx);
1074 list_for_each_entry(wk, &local->work_list, list) { 1069 list_for_each_entry(wk, &local->work_list, list) {
1075 if (wk->sdata != sdata) 1070 if (wk->sdata != sdata)
1076 continue; 1071 continue;
1077 WARN_ON(1); 1072 WARN_ON(1);
1078 break; 1073 break;
1079 } 1074 }
1080 mutex_unlock(&local->work_mtx); 1075 mutex_unlock(&local->mtx);
1081} 1076}
1082 1077
1083ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata, 1078ieee80211_rx_result ieee80211_work_rx_mgmt(struct ieee80211_sub_if_data *sdata,
@@ -1163,7 +1158,7 @@ int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
1163 struct ieee80211_work *wk, *tmp; 1158 struct ieee80211_work *wk, *tmp;
1164 bool found = false; 1159 bool found = false;
1165 1160
1166 mutex_lock(&local->work_mtx); 1161 mutex_lock(&local->mtx);
1167 list_for_each_entry_safe(wk, tmp, &local->work_list, list) { 1162 list_for_each_entry_safe(wk, tmp, &local->work_list, list) {
1168 if ((unsigned long) wk == cookie) { 1163 if ((unsigned long) wk == cookie) {
1169 wk->timeout = jiffies; 1164 wk->timeout = jiffies;
@@ -1171,7 +1166,7 @@ int ieee80211_wk_cancel_remain_on_channel(struct ieee80211_sub_if_data *sdata,
1171 break; 1166 break;
1172 } 1167 }
1173 } 1168 }
1174 mutex_unlock(&local->work_mtx); 1169 mutex_unlock(&local->mtx);
1175 1170
1176 if (!found) 1171 if (!found)
1177 return -ENOENT; 1172 return -ENOENT;
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 8d59d27d887e..bee230d8fd11 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -36,8 +36,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx)
36 int tail; 36 int tail;
37 37
38 hdr = (struct ieee80211_hdr *)skb->data; 38 hdr = (struct ieee80211_hdr *)skb->data;
39 if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 || 39 if (!tx->key || tx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP ||
40 !ieee80211_is_data_present(hdr->frame_control)) 40 skb->len < 24 || !ieee80211_is_data_present(hdr->frame_control))
41 return TX_CONTINUE; 41 return TX_CONTINUE;
42 42
43 hdrlen = ieee80211_hdrlen(hdr->frame_control); 43 hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -94,7 +94,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
94 if (status->flag & RX_FLAG_MMIC_STRIPPED) 94 if (status->flag & RX_FLAG_MMIC_STRIPPED)
95 return RX_CONTINUE; 95 return RX_CONTINUE;
96 96
97 if (!rx->key || rx->key->conf.alg != ALG_TKIP || 97 if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP ||
98 !ieee80211_has_protected(hdr->frame_control) || 98 !ieee80211_has_protected(hdr->frame_control) ||
99 !ieee80211_is_data_present(hdr->frame_control)) 99 !ieee80211_is_data_present(hdr->frame_control))
100 return RX_CONTINUE; 100 return RX_CONTINUE;
@@ -117,7 +117,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx)
117 key = &rx->key->conf.key[key_offset]; 117 key = &rx->key->conf.key[key_offset];
118 michael_mic(key, hdr, data, data_len, mic); 118 michael_mic(key, hdr, data, data_len, mic);
119 if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { 119 if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) {
120 if (!(rx->flags & IEEE80211_RX_RA_MATCH)) 120 if (!(status->rx_flags & IEEE80211_RX_RA_MATCH))
121 return RX_DROP_UNUSABLE; 121 return RX_DROP_UNUSABLE;
122 122
123 mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx, 123 mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx,
@@ -221,19 +221,13 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx)
221 if (!rx->sta || skb->len - hdrlen < 12) 221 if (!rx->sta || skb->len - hdrlen < 12)
222 return RX_DROP_UNUSABLE; 222 return RX_DROP_UNUSABLE;
223 223
224 if (status->flag & RX_FLAG_DECRYPTED) { 224 /*
225 if (status->flag & RX_FLAG_IV_STRIPPED) { 225 * Let TKIP code verify IV, but skip decryption.
226 /* 226 * In the case where hardware checks the IV as well,
227 * Hardware took care of all processing, including 227 * we don't even get here, see ieee80211_rx_h_decrypt()
228 * replay protection, and stripped the ICV/IV so 228 */
229 * we cannot do any checks here. 229 if (status->flag & RX_FLAG_DECRYPTED)
230 */
231 return RX_CONTINUE;
232 }
233
234 /* let TKIP code verify IV, but skip decryption */
235 hwaccel = 1; 230 hwaccel = 1;
236 }
237 231
238 res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm, 232 res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm,
239 key, skb->data + hdrlen, 233 key, skb->data + hdrlen,
@@ -447,10 +441,6 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
447 if (!rx->sta || data_len < 0) 441 if (!rx->sta || data_len < 0)
448 return RX_DROP_UNUSABLE; 442 return RX_DROP_UNUSABLE;
449 443
450 if ((status->flag & RX_FLAG_DECRYPTED) &&
451 (status->flag & RX_FLAG_IV_STRIPPED))
452 return RX_CONTINUE;
453
454 ccmp_hdr2pn(pn, skb->data + hdrlen); 444 ccmp_hdr2pn(pn, skb->data + hdrlen);
455 445
456 queue = ieee80211_is_mgmt(hdr->frame_control) ? 446 queue = ieee80211_is_mgmt(hdr->frame_control) ?
@@ -564,10 +554,6 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
564 if (!ieee80211_is_mgmt(hdr->frame_control)) 554 if (!ieee80211_is_mgmt(hdr->frame_control))
565 return RX_CONTINUE; 555 return RX_CONTINUE;
566 556
567 if ((status->flag & RX_FLAG_DECRYPTED) &&
568 (status->flag & RX_FLAG_IV_STRIPPED))
569 return RX_CONTINUE;
570
571 if (skb->len < 24 + sizeof(*mmie)) 557 if (skb->len < 24 + sizeof(*mmie))
572 return RX_DROP_UNUSABLE; 558 return RX_DROP_UNUSABLE;
573 559
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index fdaec7daff1d..85dabb86be6f 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -105,10 +105,8 @@ EXPORT_SYMBOL(nf_register_hooks);
105 105
106void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) 106void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
107{ 107{
108 unsigned int i; 108 while (n-- > 0)
109 109 nf_unregister_hook(&reg[n]);
110 for (i = 0; i < n; i++)
111 nf_unregister_hook(&reg[i]);
112} 110}
113EXPORT_SYMBOL(nf_unregister_hooks); 111EXPORT_SYMBOL(nf_unregister_hooks);
114 112
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 46a77d5c3887..a22dac227055 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -3,7 +3,7 @@
3# 3#
4menuconfig IP_VS 4menuconfig IP_VS
5 tristate "IP virtual server support" 5 tristate "IP virtual server support"
6 depends on NET && INET && NETFILTER && NF_CONNTRACK 6 depends on NET && INET && NETFILTER
7 ---help--- 7 ---help---
8 IP Virtual Server support will let you build a high-performance 8 IP Virtual Server support will let you build a high-performance
9 virtual server based on cluster of two or more real servers. This 9 virtual server based on cluster of two or more real servers. This
@@ -235,7 +235,8 @@ comment 'IPVS application helper'
235 235
236config IP_VS_FTP 236config IP_VS_FTP
237 tristate "FTP protocol helper" 237 tristate "FTP protocol helper"
238 depends on IP_VS_PROTO_TCP && NF_NAT 238 depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT
239 select IP_VS_NFCT
239 ---help--- 240 ---help---
240 FTP is a protocol that transfers IP address and/or port number in 241 FTP is a protocol that transfers IP address and/or port number in
241 the payload. In the virtual server via Network Address Translation, 242 the payload. In the virtual server via Network Address Translation,
@@ -247,4 +248,19 @@ config IP_VS_FTP
247 If you want to compile it in kernel, say Y. To compile it as a 248 If you want to compile it in kernel, say Y. To compile it as a
248 module, choose M here. If unsure, say N. 249 module, choose M here. If unsure, say N.
249 250
251config IP_VS_NFCT
252 bool "Netfilter connection tracking"
253 depends on NF_CONNTRACK
254 ---help---
255 The Netfilter connection tracking support allows the IPVS
256 connection state to be exported to the Netfilter framework
257 for filtering purposes.
258
259config IP_VS_PE_SIP
260 tristate "SIP persistence engine"
261 depends on IP_VS_PROTO_UDP
262 depends on NF_CONNTRACK_SIP
263 ---help---
264 Allow persistence based on the SIP Call-ID
265
250endif # IP_VS 266endif # IP_VS
diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile
index e3baefd7066e..34ee602ddb66 100644
--- a/net/netfilter/ipvs/Makefile
+++ b/net/netfilter/ipvs/Makefile
@@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
9ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o 9ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
10ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o 10ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o
11 11
12ip_vs-extra_objs-y :=
13ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
14
12ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ 15ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
13 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ 16 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
14 ip_vs_est.o ip_vs_proto.o \ 17 ip_vs_est.o ip_vs_proto.o ip_vs_pe.o \
15 $(ip_vs_proto-objs-y) 18 $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
16 19
17 20
18# IPVS core 21# IPVS core
@@ -32,3 +35,6 @@ obj-$(CONFIG_IP_VS_NQ) += ip_vs_nq.o
32 35
33# IPVS application helpers 36# IPVS application helpers
34obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o 37obj-$(CONFIG_IP_VS_FTP) += ip_vs_ftp.o
38
39# IPVS connection template retrievers
40obj-$(CONFIG_IP_VS_PE_SIP) += ip_vs_pe_sip.o
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index e76f87f4aca8..a475edee0912 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -103,8 +103,8 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
103 goto out; 103 goto out;
104 104
105 list_add(&inc->a_list, &app->incs_list); 105 list_add(&inc->a_list, &app->incs_list);
106 IP_VS_DBG(9, "%s application %s:%u registered\n", 106 IP_VS_DBG(9, "%s App %s:%u registered\n",
107 pp->name, inc->name, inc->port); 107 pp->name, inc->name, ntohs(inc->port));
108 108
109 return 0; 109 return 0;
110 110
@@ -130,7 +130,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
130 pp->unregister_app(inc); 130 pp->unregister_app(inc);
131 131
132 IP_VS_DBG(9, "%s App %s:%u unregistered\n", 132 IP_VS_DBG(9, "%s App %s:%u unregistered\n",
133 pp->name, inc->name, inc->port); 133 pp->name, inc->name, ntohs(inc->port));
134 134
135 list_del(&inc->a_list); 135 list_del(&inc->a_list);
136 136
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index b71c69a2db13..e9adecdc8ca4 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -148,6 +148,42 @@ static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
148 & ip_vs_conn_tab_mask; 148 & ip_vs_conn_tab_mask;
149} 149}
150 150
151static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
152 bool inverse)
153{
154 const union nf_inet_addr *addr;
155 __be16 port;
156
157 if (p->pe_data && p->pe->hashkey_raw)
158 return p->pe->hashkey_raw(p, ip_vs_conn_rnd, inverse) &
159 ip_vs_conn_tab_mask;
160
161 if (likely(!inverse)) {
162 addr = p->caddr;
163 port = p->cport;
164 } else {
165 addr = p->vaddr;
166 port = p->vport;
167 }
168
169 return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);
170}
171
172static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
173{
174 struct ip_vs_conn_param p;
175
176 ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
177 NULL, 0, &p);
178
179 if (cp->dest && cp->dest->svc->pe) {
180 p.pe = cp->dest->svc->pe;
181 p.pe_data = cp->pe_data;
182 p.pe_data_len = cp->pe_data_len;
183 }
184
185 return ip_vs_conn_hashkey_param(&p, false);
186}
151 187
152/* 188/*
153 * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. 189 * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
@@ -162,7 +198,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
162 return 0; 198 return 0;
163 199
164 /* Hash by protocol, client address and port */ 200 /* Hash by protocol, client address and port */
165 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); 201 hash = ip_vs_conn_hashkey_conn(cp);
166 202
167 ct_write_lock(hash); 203 ct_write_lock(hash);
168 spin_lock(&cp->lock); 204 spin_lock(&cp->lock);
@@ -195,7 +231,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
195 int ret; 231 int ret;
196 232
197 /* unhash it and decrease its reference counter */ 233 /* unhash it and decrease its reference counter */
198 hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); 234 hash = ip_vs_conn_hashkey_conn(cp);
199 235
200 ct_write_lock(hash); 236 ct_write_lock(hash);
201 spin_lock(&cp->lock); 237 spin_lock(&cp->lock);
@@ -218,27 +254,26 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp)
218/* 254/*
219 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. 255 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
220 * Called for pkts coming from OUTside-to-INside. 256 * Called for pkts coming from OUTside-to-INside.
221 * s_addr, s_port: pkt source address (foreign host) 257 * p->caddr, p->cport: pkt source address (foreign host)
222 * d_addr, d_port: pkt dest address (load balancer) 258 * p->vaddr, p->vport: pkt dest address (load balancer)
223 */ 259 */
224static inline struct ip_vs_conn *__ip_vs_conn_in_get 260static inline struct ip_vs_conn *
225(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, 261__ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
226 const union nf_inet_addr *d_addr, __be16 d_port)
227{ 262{
228 unsigned hash; 263 unsigned hash;
229 struct ip_vs_conn *cp; 264 struct ip_vs_conn *cp;
230 265
231 hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); 266 hash = ip_vs_conn_hashkey_param(p, false);
232 267
233 ct_read_lock(hash); 268 ct_read_lock(hash);
234 269
235 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 270 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
236 if (cp->af == af && 271 if (cp->af == p->af &&
237 ip_vs_addr_equal(af, s_addr, &cp->caddr) && 272 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
238 ip_vs_addr_equal(af, d_addr, &cp->vaddr) && 273 ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
239 s_port == cp->cport && d_port == cp->vport && 274 p->cport == cp->cport && p->vport == cp->vport &&
240 ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && 275 ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
241 protocol == cp->protocol) { 276 p->protocol == cp->protocol) {
242 /* HIT */ 277 /* HIT */
243 atomic_inc(&cp->refcnt); 278 atomic_inc(&cp->refcnt);
244 ct_read_unlock(hash); 279 ct_read_unlock(hash);
@@ -251,99 +286,111 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get
251 return NULL; 286 return NULL;
252} 287}
253 288
254struct ip_vs_conn *ip_vs_conn_in_get 289struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
255(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
256 const union nf_inet_addr *d_addr, __be16 d_port)
257{ 290{
258 struct ip_vs_conn *cp; 291 struct ip_vs_conn *cp;
259 292
260 cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port); 293 cp = __ip_vs_conn_in_get(p);
261 if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) 294 if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) {
262 cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr, 295 struct ip_vs_conn_param cport_zero_p = *p;
263 d_port); 296 cport_zero_p.cport = 0;
297 cp = __ip_vs_conn_in_get(&cport_zero_p);
298 }
264 299
265 IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n", 300 IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n",
266 ip_vs_proto_name(protocol), 301 ip_vs_proto_name(p->protocol),
267 IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), 302 IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
268 IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), 303 IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
269 cp ? "hit" : "not hit"); 304 cp ? "hit" : "not hit");
270 305
271 return cp; 306 return cp;
272} 307}
273 308
309static int
310ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
311 const struct ip_vs_iphdr *iph,
312 unsigned int proto_off, int inverse,
313 struct ip_vs_conn_param *p)
314{
315 __be16 _ports[2], *pptr;
316
317 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
318 if (pptr == NULL)
319 return 1;
320
321 if (likely(!inverse))
322 ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
323 &iph->daddr, pptr[1], p);
324 else
325 ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
326 &iph->saddr, pptr[0], p);
327 return 0;
328}
329
274struct ip_vs_conn * 330struct ip_vs_conn *
275ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, 331ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
276 struct ip_vs_protocol *pp, 332 struct ip_vs_protocol *pp,
277 const struct ip_vs_iphdr *iph, 333 const struct ip_vs_iphdr *iph,
278 unsigned int proto_off, int inverse) 334 unsigned int proto_off, int inverse)
279{ 335{
280 __be16 _ports[2], *pptr; 336 struct ip_vs_conn_param p;
281 337
282 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 338 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
283 if (pptr == NULL)
284 return NULL; 339 return NULL;
285 340
286 if (likely(!inverse)) 341 return ip_vs_conn_in_get(&p);
287 return ip_vs_conn_in_get(af, iph->protocol,
288 &iph->saddr, pptr[0],
289 &iph->daddr, pptr[1]);
290 else
291 return ip_vs_conn_in_get(af, iph->protocol,
292 &iph->daddr, pptr[1],
293 &iph->saddr, pptr[0]);
294} 342}
295EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto); 343EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto);
296 344
297/* Get reference to connection template */ 345/* Get reference to connection template */
298struct ip_vs_conn *ip_vs_ct_in_get 346struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
299(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
300 const union nf_inet_addr *d_addr, __be16 d_port)
301{ 347{
302 unsigned hash; 348 unsigned hash;
303 struct ip_vs_conn *cp; 349 struct ip_vs_conn *cp;
304 350
305 hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); 351 hash = ip_vs_conn_hashkey_param(p, false);
306 352
307 ct_read_lock(hash); 353 ct_read_lock(hash);
308 354
309 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 355 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
310 if (cp->af == af && 356 if (p->pe_data && p->pe->ct_match) {
311 ip_vs_addr_equal(af, s_addr, &cp->caddr) && 357 if (p->pe->ct_match(p, cp))
358 goto out;
359 continue;
360 }
361
362 if (cp->af == p->af &&
363 ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
312 /* protocol should only be IPPROTO_IP if 364 /* protocol should only be IPPROTO_IP if
313 * d_addr is a fwmark */ 365 * p->vaddr is a fwmark */
314 ip_vs_addr_equal(protocol == IPPROTO_IP ? AF_UNSPEC : af, 366 ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC :
315 d_addr, &cp->vaddr) && 367 p->af, p->vaddr, &cp->vaddr) &&
316 s_port == cp->cport && d_port == cp->vport && 368 p->cport == cp->cport && p->vport == cp->vport &&
317 cp->flags & IP_VS_CONN_F_TEMPLATE && 369 cp->flags & IP_VS_CONN_F_TEMPLATE &&
318 protocol == cp->protocol) { 370 p->protocol == cp->protocol)
319 /* HIT */
320 atomic_inc(&cp->refcnt);
321 goto out; 371 goto out;
322 }
323 } 372 }
324 cp = NULL; 373 cp = NULL;
325 374
326 out: 375 out:
376 if (cp)
377 atomic_inc(&cp->refcnt);
327 ct_read_unlock(hash); 378 ct_read_unlock(hash);
328 379
329 IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", 380 IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n",
330 ip_vs_proto_name(protocol), 381 ip_vs_proto_name(p->protocol),
331 IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), 382 IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
332 IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), 383 IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
333 cp ? "hit" : "not hit"); 384 cp ? "hit" : "not hit");
334 385
335 return cp; 386 return cp;
336} 387}
337 388
338/* 389/* Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab.
339 * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. 390 * Called for pkts coming from inside-to-OUTside.
340 * Called for pkts coming from inside-to-OUTside. 391 * p->caddr, p->cport: pkt source address (inside host)
341 * s_addr, s_port: pkt source address (inside host) 392 * p->vaddr, p->vport: pkt dest address (foreign host) */
342 * d_addr, d_port: pkt dest address (foreign host) 393struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
343 */
344struct ip_vs_conn *ip_vs_conn_out_get
345(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port,
346 const union nf_inet_addr *d_addr, __be16 d_port)
347{ 394{
348 unsigned hash; 395 unsigned hash;
349 struct ip_vs_conn *cp, *ret=NULL; 396 struct ip_vs_conn *cp, *ret=NULL;
@@ -351,16 +398,16 @@ struct ip_vs_conn *ip_vs_conn_out_get
351 /* 398 /*
352 * Check for "full" addressed entries 399 * Check for "full" addressed entries
353 */ 400 */
354 hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port); 401 hash = ip_vs_conn_hashkey_param(p, true);
355 402
356 ct_read_lock(hash); 403 ct_read_lock(hash);
357 404
358 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { 405 list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
359 if (cp->af == af && 406 if (cp->af == p->af &&
360 ip_vs_addr_equal(af, d_addr, &cp->caddr) && 407 ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
361 ip_vs_addr_equal(af, s_addr, &cp->daddr) && 408 ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
362 d_port == cp->cport && s_port == cp->dport && 409 p->vport == cp->cport && p->cport == cp->dport &&
363 protocol == cp->protocol) { 410 p->protocol == cp->protocol) {
364 /* HIT */ 411 /* HIT */
365 atomic_inc(&cp->refcnt); 412 atomic_inc(&cp->refcnt);
366 ret = cp; 413 ret = cp;
@@ -371,9 +418,9 @@ struct ip_vs_conn *ip_vs_conn_out_get
371 ct_read_unlock(hash); 418 ct_read_unlock(hash);
372 419
373 IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", 420 IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n",
374 ip_vs_proto_name(protocol), 421 ip_vs_proto_name(p->protocol),
375 IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), 422 IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport),
376 IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), 423 IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
377 ret ? "hit" : "not hit"); 424 ret ? "hit" : "not hit");
378 425
379 return ret; 426 return ret;
@@ -385,20 +432,12 @@ ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
385 const struct ip_vs_iphdr *iph, 432 const struct ip_vs_iphdr *iph,
386 unsigned int proto_off, int inverse) 433 unsigned int proto_off, int inverse)
387{ 434{
388 __be16 _ports[2], *pptr; 435 struct ip_vs_conn_param p;
389 436
390 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 437 if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p))
391 if (pptr == NULL)
392 return NULL; 438 return NULL;
393 439
394 if (likely(!inverse)) 440 return ip_vs_conn_out_get(&p);
395 return ip_vs_conn_out_get(af, iph->protocol,
396 &iph->saddr, pptr[0],
397 &iph->daddr, pptr[1]);
398 else
399 return ip_vs_conn_out_get(af, iph->protocol,
400 &iph->daddr, pptr[1],
401 &iph->saddr, pptr[0]);
402} 441}
403EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); 442EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto);
404 443
@@ -505,6 +544,8 @@ static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest)
505static inline void 544static inline void
506ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) 545ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
507{ 546{
547 unsigned int conn_flags;
548
508 /* if dest is NULL, then return directly */ 549 /* if dest is NULL, then return directly */
509 if (!dest) 550 if (!dest)
510 return; 551 return;
@@ -512,16 +553,20 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest)
512 /* Increase the refcnt counter of the dest */ 553 /* Increase the refcnt counter of the dest */
513 atomic_inc(&dest->refcnt); 554 atomic_inc(&dest->refcnt);
514 555
556 conn_flags = atomic_read(&dest->conn_flags);
557 if (cp->protocol != IPPROTO_UDP)
558 conn_flags &= ~IP_VS_CONN_F_ONE_PACKET;
515 /* Bind with the destination and its corresponding transmitter */ 559 /* Bind with the destination and its corresponding transmitter */
516 if ((cp->flags & IP_VS_CONN_F_SYNC) && 560 if (cp->flags & IP_VS_CONN_F_SYNC) {
517 (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
518 /* if the connection is not template and is created 561 /* if the connection is not template and is created
519 * by sync, preserve the activity flag. 562 * by sync, preserve the activity flag.
520 */ 563 */
521 cp->flags |= atomic_read(&dest->conn_flags) & 564 if (!(cp->flags & IP_VS_CONN_F_TEMPLATE))
522 (~IP_VS_CONN_F_INACTIVE); 565 conn_flags &= ~IP_VS_CONN_F_INACTIVE;
523 else 566 /* connections inherit forwarding method from dest */
524 cp->flags |= atomic_read(&dest->conn_flags); 567 cp->flags &= ~IP_VS_CONN_F_FWD_MASK;
568 }
569 cp->flags |= conn_flags;
525 cp->dest = dest; 570 cp->dest = dest;
526 571
527 IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d " 572 IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d "
@@ -717,6 +762,10 @@ static void ip_vs_conn_expire(unsigned long data)
717 if (cp->control) 762 if (cp->control)
718 ip_vs_control_del(cp); 763 ip_vs_control_del(cp);
719 764
765 if (cp->flags & IP_VS_CONN_F_NFCT)
766 ip_vs_conn_drop_conntrack(cp);
767
768 kfree(cp->pe_data);
720 if (unlikely(cp->app != NULL)) 769 if (unlikely(cp->app != NULL))
721 ip_vs_unbind_app(cp); 770 ip_vs_unbind_app(cp);
722 ip_vs_unbind_dest(cp); 771 ip_vs_unbind_dest(cp);
@@ -751,13 +800,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
751 * Create a new connection entry and hash it into the ip_vs_conn_tab 800 * Create a new connection entry and hash it into the ip_vs_conn_tab
752 */ 801 */
753struct ip_vs_conn * 802struct ip_vs_conn *
754ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, 803ip_vs_conn_new(const struct ip_vs_conn_param *p,
755 const union nf_inet_addr *vaddr, __be16 vport,
756 const union nf_inet_addr *daddr, __be16 dport, unsigned flags, 804 const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
757 struct ip_vs_dest *dest) 805 struct ip_vs_dest *dest)
758{ 806{
759 struct ip_vs_conn *cp; 807 struct ip_vs_conn *cp;
760 struct ip_vs_protocol *pp = ip_vs_proto_get(proto); 808 struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
761 809
762 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); 810 cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
763 if (cp == NULL) { 811 if (cp == NULL) {
@@ -767,17 +815,21 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
767 815
768 INIT_LIST_HEAD(&cp->c_list); 816 INIT_LIST_HEAD(&cp->c_list);
769 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); 817 setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
770 cp->af = af; 818 cp->af = p->af;
771 cp->protocol = proto; 819 cp->protocol = p->protocol;
772 ip_vs_addr_copy(af, &cp->caddr, caddr); 820 ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
773 cp->cport = cport; 821 cp->cport = p->cport;
774 ip_vs_addr_copy(af, &cp->vaddr, vaddr); 822 ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr);
775 cp->vport = vport; 823 cp->vport = p->vport;
776 /* proto should only be IPPROTO_IP if d_addr is a fwmark */ 824 /* proto should only be IPPROTO_IP if d_addr is a fwmark */
777 ip_vs_addr_copy(proto == IPPROTO_IP ? AF_UNSPEC : af, 825 ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
778 &cp->daddr, daddr); 826 &cp->daddr, daddr);
779 cp->dport = dport; 827 cp->dport = dport;
780 cp->flags = flags; 828 cp->flags = flags;
829 if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) {
830 cp->pe_data = p->pe_data;
831 cp->pe_data_len = p->pe_data_len;
832 }
781 spin_lock_init(&cp->lock); 833 spin_lock_init(&cp->lock);
782 834
783 /* 835 /*
@@ -803,7 +855,7 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
803 855
804 /* Bind its packet transmitter */ 856 /* Bind its packet transmitter */
805#ifdef CONFIG_IP_VS_IPV6 857#ifdef CONFIG_IP_VS_IPV6
806 if (af == AF_INET6) 858 if (p->af == AF_INET6)
807 ip_vs_bind_xmit_v6(cp); 859 ip_vs_bind_xmit_v6(cp);
808 else 860 else
809#endif 861#endif
@@ -812,13 +864,22 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport,
812 if (unlikely(pp && atomic_read(&pp->appcnt))) 864 if (unlikely(pp && atomic_read(&pp->appcnt)))
813 ip_vs_bind_app(cp, pp); 865 ip_vs_bind_app(cp, pp);
814 866
867 /*
868 * Allow conntrack to be preserved. By default, conntrack
869 * is created and destroyed for every packet.
870 * Sometimes keeping conntrack can be useful for
871 * IP_VS_CONN_F_ONE_PACKET too.
872 */
873
874 if (ip_vs_conntrack_enabled())
875 cp->flags |= IP_VS_CONN_F_NFCT;
876
815 /* Hash it in the ip_vs_conn_tab finally */ 877 /* Hash it in the ip_vs_conn_tab finally */
816 ip_vs_conn_hash(cp); 878 ip_vs_conn_hash(cp);
817 879
818 return cp; 880 return cp;
819} 881}
820 882
821
822/* 883/*
823 * /proc/net/ip_vs_conn entries 884 * /proc/net/ip_vs_conn entries
824 */ 885 */
@@ -834,7 +895,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
834 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { 895 list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
835 if (pos-- == 0) { 896 if (pos-- == 0) {
836 seq->private = &ip_vs_conn_tab[idx]; 897 seq->private = &ip_vs_conn_tab[idx];
837 return cp; 898 return cp;
838 } 899 }
839 } 900 }
840 ct_read_unlock_bh(idx); 901 ct_read_unlock_bh(idx);
@@ -891,30 +952,45 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
891 952
892 if (v == SEQ_START_TOKEN) 953 if (v == SEQ_START_TOKEN)
893 seq_puts(seq, 954 seq_puts(seq,
894 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires\n"); 955 "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");
895 else { 956 else {
896 const struct ip_vs_conn *cp = v; 957 const struct ip_vs_conn *cp = v;
958 char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
959 size_t len = 0;
960
961 if (cp->dest && cp->pe_data &&
962 cp->dest->svc->pe->show_pe_data) {
963 pe_data[0] = ' ';
964 len = strlen(cp->dest->svc->pe->name);
965 memcpy(pe_data + 1, cp->dest->svc->pe->name, len);
966 pe_data[len + 1] = ' ';
967 len += 2;
968 len += cp->dest->svc->pe->show_pe_data(cp,
969 pe_data + len);
970 }
971 pe_data[len] = '\0';
897 972
898#ifdef CONFIG_IP_VS_IPV6 973#ifdef CONFIG_IP_VS_IPV6
899 if (cp->af == AF_INET6) 974 if (cp->af == AF_INET6)
900 seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %7lu\n", 975 seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X "
976 "%pI6 %04X %-11s %7lu%s\n",
901 ip_vs_proto_name(cp->protocol), 977 ip_vs_proto_name(cp->protocol),
902 &cp->caddr.in6, ntohs(cp->cport), 978 &cp->caddr.in6, ntohs(cp->cport),
903 &cp->vaddr.in6, ntohs(cp->vport), 979 &cp->vaddr.in6, ntohs(cp->vport),
904 &cp->daddr.in6, ntohs(cp->dport), 980 &cp->daddr.in6, ntohs(cp->dport),
905 ip_vs_state_name(cp->protocol, cp->state), 981 ip_vs_state_name(cp->protocol, cp->state),
906 (cp->timer.expires-jiffies)/HZ); 982 (cp->timer.expires-jiffies)/HZ, pe_data);
907 else 983 else
908#endif 984#endif
909 seq_printf(seq, 985 seq_printf(seq,
910 "%-3s %08X %04X %08X %04X" 986 "%-3s %08X %04X %08X %04X"
911 " %08X %04X %-11s %7lu\n", 987 " %08X %04X %-11s %7lu%s\n",
912 ip_vs_proto_name(cp->protocol), 988 ip_vs_proto_name(cp->protocol),
913 ntohl(cp->caddr.ip), ntohs(cp->cport), 989 ntohl(cp->caddr.ip), ntohs(cp->cport),
914 ntohl(cp->vaddr.ip), ntohs(cp->vport), 990 ntohl(cp->vaddr.ip), ntohs(cp->vport),
915 ntohl(cp->daddr.ip), ntohs(cp->dport), 991 ntohl(cp->daddr.ip), ntohs(cp->dport),
916 ip_vs_state_name(cp->protocol, cp->state), 992 ip_vs_state_name(cp->protocol, cp->state),
917 (cp->timer.expires-jiffies)/HZ); 993 (cp->timer.expires-jiffies)/HZ, pe_data);
918 } 994 }
919 return 0; 995 return 0;
920} 996}
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 4c2f89df5cce..b4e51e9c5a04 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -40,6 +40,7 @@
40#include <net/udp.h> 40#include <net/udp.h>
41#include <net/icmp.h> /* for icmp_send */ 41#include <net/icmp.h> /* for icmp_send */
42#include <net/route.h> 42#include <net/route.h>
43#include <net/ip6_checksum.h>
43 44
44#include <linux/netfilter.h> 45#include <linux/netfilter.h>
45#include <linux/netfilter_ipv4.h> 46#include <linux/netfilter_ipv4.h>
@@ -47,6 +48,7 @@
47#ifdef CONFIG_IP_VS_IPV6 48#ifdef CONFIG_IP_VS_IPV6
48#include <net/ipv6.h> 49#include <net/ipv6.h>
49#include <linux/netfilter_ipv6.h> 50#include <linux/netfilter_ipv6.h>
51#include <net/ip6_route.h>
50#endif 52#endif
51 53
52#include <net/ip_vs.h> 54#include <net/ip_vs.h>
@@ -175,6 +177,18 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
175 return pp->state_transition(cp, direction, skb, pp); 177 return pp->state_transition(cp, direction, skb, pp);
176} 178}
177 179
180static inline void
181ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
182 struct sk_buff *skb, int protocol,
183 const union nf_inet_addr *caddr, __be16 cport,
184 const union nf_inet_addr *vaddr, __be16 vport,
185 struct ip_vs_conn_param *p)
186{
187 ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
188 p->pe = svc->pe;
189 if (p->pe && p->pe->fill_param)
190 p->pe->fill_param(p, skb);
191}
178 192
179/* 193/*
180 * IPVS persistent scheduling function 194 * IPVS persistent scheduling function
@@ -185,15 +199,16 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
185 */ 199 */
186static struct ip_vs_conn * 200static struct ip_vs_conn *
187ip_vs_sched_persist(struct ip_vs_service *svc, 201ip_vs_sched_persist(struct ip_vs_service *svc,
188 const struct sk_buff *skb, 202 struct sk_buff *skb,
189 __be16 ports[2]) 203 __be16 ports[2])
190{ 204{
191 struct ip_vs_conn *cp = NULL; 205 struct ip_vs_conn *cp = NULL;
192 struct ip_vs_iphdr iph; 206 struct ip_vs_iphdr iph;
193 struct ip_vs_dest *dest; 207 struct ip_vs_dest *dest;
194 struct ip_vs_conn *ct; 208 struct ip_vs_conn *ct;
195 __be16 dport; /* destination port to forward */ 209 __be16 dport = 0; /* destination port to forward */
196 __be16 flags; 210 unsigned int flags;
211 struct ip_vs_conn_param param;
197 union nf_inet_addr snet; /* source network of the client, 212 union nf_inet_addr snet; /* source network of the client,
198 after masking */ 213 after masking */
199 214
@@ -226,120 +241,75 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
226 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport> 241 * service, and a template like <caddr, 0, vaddr, vport, daddr, dport>
227 * is created for other persistent services. 242 * is created for other persistent services.
228 */ 243 */
229 if (ports[1] == svc->port) { 244 {
230 /* Check if a template already exists */ 245 int protocol = iph.protocol;
231 if (svc->port != FTPPORT) 246 const union nf_inet_addr *vaddr = &iph.daddr;
232 ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, 247 const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
233 &iph.daddr, ports[1]); 248 __be16 vport = 0;
234 else 249
235 ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, 250 if (ports[1] == svc->port) {
236 &iph.daddr, 0); 251 /* non-FTP template:
237 252 * <protocol, caddr, 0, vaddr, vport, daddr, dport>
238 if (!ct || !ip_vs_check_template(ct)) { 253 * FTP template:
239 /* 254 * <protocol, caddr, 0, vaddr, 0, daddr, 0>
240 * No template found or the dest of the connection
241 * template is not available.
242 */
243 dest = svc->scheduler->schedule(svc, skb);
244 if (dest == NULL) {
245 IP_VS_DBG(1, "p-schedule: no dest found.\n");
246 return NULL;
247 }
248
249 /*
250 * Create a template like <protocol,caddr,0,
251 * vaddr,vport,daddr,dport> for non-ftp service,
252 * and <protocol,caddr,0,vaddr,0,daddr,0>
253 * for ftp service.
254 */ 255 */
255 if (svc->port != FTPPORT) 256 if (svc->port != FTPPORT)
256 ct = ip_vs_conn_new(svc->af, iph.protocol, 257 vport = ports[1];
257 &snet, 0,
258 &iph.daddr,
259 ports[1],
260 &dest->addr, dest->port,
261 IP_VS_CONN_F_TEMPLATE,
262 dest);
263 else
264 ct = ip_vs_conn_new(svc->af, iph.protocol,
265 &snet, 0,
266 &iph.daddr, 0,
267 &dest->addr, 0,
268 IP_VS_CONN_F_TEMPLATE,
269 dest);
270 if (ct == NULL)
271 return NULL;
272
273 ct->timeout = svc->timeout;
274 } else { 258 } else {
275 /* set destination with the found template */ 259 /* Note: persistent fwmark-based services and
276 dest = ct->dest; 260 * persistent port zero service are handled here.
277 } 261 * fwmark template:
278 dport = dest->port; 262 * <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
279 } else { 263 * port zero template:
280 /* 264 * <protocol,caddr,0,vaddr,0,daddr,0>
281 * Note: persistent fwmark-based services and persistent
282 * port zero service are handled here.
283 * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0>
284 * port zero template: <protocol,caddr,0,vaddr,0,daddr,0>
285 */
286 if (svc->fwmark) {
287 union nf_inet_addr fwmark = {
288 .ip = htonl(svc->fwmark)
289 };
290
291 ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0,
292 &fwmark, 0);
293 } else
294 ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0,
295 &iph.daddr, 0);
296
297 if (!ct || !ip_vs_check_template(ct)) {
298 /*
299 * If it is not persistent port zero, return NULL,
300 * otherwise create a connection template.
301 */ 265 */
302 if (svc->port) 266 if (svc->fwmark) {
303 return NULL; 267 protocol = IPPROTO_IP;
304 268 vaddr = &fwmark;
305 dest = svc->scheduler->schedule(svc, skb);
306 if (dest == NULL) {
307 IP_VS_DBG(1, "p-schedule: no dest found.\n");
308 return NULL;
309 } 269 }
270 }
271 ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
272 vaddr, vport, &param);
273 }
310 274
311 /* 275 /* Check if a template already exists */
312 * Create a template according to the service 276 ct = ip_vs_ct_in_get(&param);
313 */ 277 if (!ct || !ip_vs_check_template(ct)) {
314 if (svc->fwmark) { 278 /* No template found or the dest of the connection
315 union nf_inet_addr fwmark = { 279 * template is not available.
316 .ip = htonl(svc->fwmark) 280 */
317 }; 281 dest = svc->scheduler->schedule(svc, skb);
318 282 if (!dest) {
319 ct = ip_vs_conn_new(svc->af, IPPROTO_IP, 283 IP_VS_DBG(1, "p-schedule: no dest found.\n");
320 &snet, 0, 284 kfree(param.pe_data);
321 &fwmark, 0, 285 return NULL;
322 &dest->addr, 0,
323 IP_VS_CONN_F_TEMPLATE,
324 dest);
325 } else
326 ct = ip_vs_conn_new(svc->af, iph.protocol,
327 &snet, 0,
328 &iph.daddr, 0,
329 &dest->addr, 0,
330 IP_VS_CONN_F_TEMPLATE,
331 dest);
332 if (ct == NULL)
333 return NULL;
334
335 ct->timeout = svc->timeout;
336 } else {
337 /* set destination with the found template */
338 dest = ct->dest;
339 } 286 }
340 dport = ports[1]; 287
288 if (ports[1] == svc->port && svc->port != FTPPORT)
289 dport = dest->port;
290
291 /* Create a template
292 * This adds param.pe_data to the template,
293 * and thus param.pe_data will be destroyed
294 * when the template expires */
295 ct = ip_vs_conn_new(&param, &dest->addr, dport,
296 IP_VS_CONN_F_TEMPLATE, dest);
297 if (ct == NULL) {
298 kfree(param.pe_data);
299 return NULL;
300 }
301
302 ct->timeout = svc->timeout;
303 } else {
304 /* set destination with the found template */
305 dest = ct->dest;
306 kfree(param.pe_data);
341 } 307 }
342 308
309 dport = ports[1];
310 if (dport == svc->port && dest->port)
311 dport = dest->port;
312
343 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET 313 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET
344 && iph.protocol == IPPROTO_UDP)? 314 && iph.protocol == IPPROTO_UDP)?
345 IP_VS_CONN_F_ONE_PACKET : 0; 315 IP_VS_CONN_F_ONE_PACKET : 0;
@@ -347,12 +317,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
347 /* 317 /*
348 * Create a new connection according to the template 318 * Create a new connection according to the template
349 */ 319 */
350 cp = ip_vs_conn_new(svc->af, iph.protocol, 320 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
351 &iph.saddr, ports[0], 321 &iph.daddr, ports[1], &param);
352 &iph.daddr, ports[1], 322 cp = ip_vs_conn_new(&param, &dest->addr, dport, flags, dest);
353 &dest->addr, dport,
354 flags,
355 dest);
356 if (cp == NULL) { 323 if (cp == NULL) {
357 ip_vs_conn_put(ct); 324 ip_vs_conn_put(ct);
358 return NULL; 325 return NULL;
@@ -376,23 +343,53 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
376 * Protocols supported: TCP, UDP 343 * Protocols supported: TCP, UDP
377 */ 344 */
378struct ip_vs_conn * 345struct ip_vs_conn *
379ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) 346ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
347 struct ip_vs_protocol *pp, int *ignored)
380{ 348{
381 struct ip_vs_conn *cp = NULL; 349 struct ip_vs_conn *cp = NULL;
382 struct ip_vs_iphdr iph; 350 struct ip_vs_iphdr iph;
383 struct ip_vs_dest *dest; 351 struct ip_vs_dest *dest;
384 __be16 _ports[2], *pptr, flags; 352 __be16 _ports[2], *pptr;
353 unsigned int flags;
385 354
355 *ignored = 1;
386 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); 356 ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
387 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); 357 pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
388 if (pptr == NULL) 358 if (pptr == NULL)
389 return NULL; 359 return NULL;
390 360
391 /* 361 /*
362 * FTPDATA needs this check when using local real server.
363 * Never schedule Active FTPDATA connections from real server.
364 * For LVS-NAT they must be already created. For other methods
365 * with persistence the connection is created on SYN+ACK.
366 */
367 if (pptr[0] == FTPDATA) {
368 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
369 "Not scheduling FTPDATA");
370 return NULL;
371 }
372
373 /*
374 * Do not schedule replies from local real server. It is risky
375 * for fwmark services but mostly for persistent services.
376 */
377 if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
378 (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
379 (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
380 IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
381 "Not scheduling reply for existing connection");
382 __ip_vs_conn_put(cp);
383 return NULL;
384 }
385
386 /*
392 * Persistent service 387 * Persistent service
393 */ 388 */
394 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 389 if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
390 *ignored = 0;
395 return ip_vs_sched_persist(svc, skb, pptr); 391 return ip_vs_sched_persist(svc, skb, pptr);
392 }
396 393
397 /* 394 /*
398 * Non-persistent service 395 * Non-persistent service
@@ -405,6 +402,8 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
405 return NULL; 402 return NULL;
406 } 403 }
407 404
405 *ignored = 0;
406
408 dest = svc->scheduler->schedule(svc, skb); 407 dest = svc->scheduler->schedule(svc, skb);
409 if (dest == NULL) { 408 if (dest == NULL) {
410 IP_VS_DBG(1, "Schedule: no dest found.\n"); 409 IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -418,14 +417,16 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
418 /* 417 /*
419 * Create a connection entry. 418 * Create a connection entry.
420 */ 419 */
421 cp = ip_vs_conn_new(svc->af, iph.protocol, 420 {
422 &iph.saddr, pptr[0], 421 struct ip_vs_conn_param p;
423 &iph.daddr, pptr[1], 422 ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
424 &dest->addr, dest->port ? dest->port : pptr[1], 423 pptr[0], &iph.daddr, pptr[1], &p);
425 flags, 424 cp = ip_vs_conn_new(&p, &dest->addr,
426 dest); 425 dest->port ? dest->port : pptr[1],
427 if (cp == NULL) 426 flags, dest);
428 return NULL; 427 if (!cp)
428 return NULL;
429 }
429 430
430 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " 431 IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
431 "d:%s:%u conn->flags:%X conn->refcnt:%d\n", 432 "d:%s:%u conn->flags:%X conn->refcnt:%d\n",
@@ -472,23 +473,26 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
472 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { 473 if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
473 int ret, cs; 474 int ret, cs;
474 struct ip_vs_conn *cp; 475 struct ip_vs_conn *cp;
475 __u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && 476 unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
476 iph.protocol == IPPROTO_UDP)? 477 iph.protocol == IPPROTO_UDP)?
477 IP_VS_CONN_F_ONE_PACKET : 0; 478 IP_VS_CONN_F_ONE_PACKET : 0;
478 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; 479 union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } };
479 480
480 ip_vs_service_put(svc); 481 ip_vs_service_put(svc);
481 482
482 /* create a new connection entry */ 483 /* create a new connection entry */
483 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); 484 IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
484 cp = ip_vs_conn_new(svc->af, iph.protocol, 485 {
485 &iph.saddr, pptr[0], 486 struct ip_vs_conn_param p;
486 &iph.daddr, pptr[1], 487 ip_vs_conn_fill_param(svc->af, iph.protocol,
487 &daddr, 0, 488 &iph.saddr, pptr[0],
488 IP_VS_CONN_F_BYPASS | flags, 489 &iph.daddr, pptr[1], &p);
489 NULL); 490 cp = ip_vs_conn_new(&p, &daddr, 0,
490 if (cp == NULL) 491 IP_VS_CONN_F_BYPASS | flags,
491 return NF_DROP; 492 NULL);
493 if (!cp)
494 return NF_DROP;
495 }
492 496
493 /* statistics */ 497 /* statistics */
494 ip_vs_in_stats(cp, skb); 498 ip_vs_in_stats(cp, skb);
@@ -526,9 +530,14 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
526 * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ 530 * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ
527 */ 531 */
528#ifdef CONFIG_IP_VS_IPV6 532#ifdef CONFIG_IP_VS_IPV6
529 if (svc->af == AF_INET6) 533 if (svc->af == AF_INET6) {
534 if (!skb->dev) {
535 struct net *net = dev_net(skb_dst(skb)->dev);
536
537 skb->dev = net->loopback_dev;
538 }
530 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); 539 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
531 else 540 } else
532#endif 541#endif
533 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); 542 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
534 543
@@ -540,6 +549,15 @@ __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
540 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); 549 return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
541} 550}
542 551
552static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
553{
554 if (NF_INET_LOCAL_IN == hooknum)
555 return IP_DEFRAG_VS_IN;
556 if (NF_INET_FORWARD == hooknum)
557 return IP_DEFRAG_VS_FWD;
558 return IP_DEFRAG_VS_OUT;
559}
560
543static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) 561static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
544{ 562{
545 int err = ip_defrag(skb, user); 563 int err = ip_defrag(skb, user);
@@ -600,10 +618,10 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
600 skb->ip_summed = CHECKSUM_UNNECESSARY; 618 skb->ip_summed = CHECKSUM_UNNECESSARY;
601 619
602 if (inout) 620 if (inout)
603 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 621 IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
604 "Forwarding altered outgoing ICMP"); 622 "Forwarding altered outgoing ICMP");
605 else 623 else
606 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 624 IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph,
607 "Forwarding altered incoming ICMP"); 625 "Forwarding altered incoming ICMP");
608} 626}
609 627
@@ -637,17 +655,21 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
637 } 655 }
638 656
639 /* And finally the ICMP checksum */ 657 /* And finally the ICMP checksum */
640 icmph->icmp6_cksum = 0; 658 icmph->icmp6_cksum = ~csum_ipv6_magic(&iph->saddr, &iph->daddr,
641 /* TODO IPv6: is this correct for ICMPv6? */ 659 skb->len - icmp_offset,
642 ip_vs_checksum_complete(skb, icmp_offset); 660 IPPROTO_ICMPV6, 0);
643 skb->ip_summed = CHECKSUM_UNNECESSARY; 661 skb->csum_start = skb_network_header(skb) - skb->head + icmp_offset;
662 skb->csum_offset = offsetof(struct icmp6hdr, icmp6_cksum);
663 skb->ip_summed = CHECKSUM_PARTIAL;
644 664
645 if (inout) 665 if (inout)
646 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 666 IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
647 "Forwarding altered outgoing ICMPv6"); 667 (void *)ciph - (void *)iph,
668 "Forwarding altered outgoing ICMPv6");
648 else 669 else
649 IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, 670 IP_VS_DBG_PKT(11, AF_INET6, pp, skb,
650 "Forwarding altered incoming ICMPv6"); 671 (void *)ciph - (void *)iph,
672 "Forwarding altered incoming ICMPv6");
651} 673}
652#endif 674#endif
653 675
@@ -688,10 +710,25 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
688#endif 710#endif
689 ip_vs_nat_icmp(skb, pp, cp, 1); 711 ip_vs_nat_icmp(skb, pp, cp, 1);
690 712
713#ifdef CONFIG_IP_VS_IPV6
714 if (af == AF_INET6) {
715 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
716 goto out;
717 } else
718#endif
719 if ((sysctl_ip_vs_snat_reroute ||
720 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
721 ip_route_me_harder(skb, RTN_LOCAL) != 0)
722 goto out;
723
691 /* do the statistics and put it back */ 724 /* do the statistics and put it back */
692 ip_vs_out_stats(cp, skb); 725 ip_vs_out_stats(cp, skb);
693 726
694 skb->ipvs_property = 1; 727 skb->ipvs_property = 1;
728 if (!(cp->flags & IP_VS_CONN_F_NFCT))
729 ip_vs_notrack(skb);
730 else
731 ip_vs_update_conntrack(skb, cp, 0);
695 verdict = NF_ACCEPT; 732 verdict = NF_ACCEPT;
696 733
697out: 734out:
@@ -705,7 +742,8 @@ out:
705 * Find any that might be relevant, check against existing connections. 742 * Find any that might be relevant, check against existing connections.
706 * Currently handles error types - unreachable, quench, ttl exceeded. 743 * Currently handles error types - unreachable, quench, ttl exceeded.
707 */ 744 */
708static int ip_vs_out_icmp(struct sk_buff *skb, int *related) 745static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
746 unsigned int hooknum)
709{ 747{
710 struct iphdr *iph; 748 struct iphdr *iph;
711 struct icmphdr _icmph, *ic; 749 struct icmphdr _icmph, *ic;
@@ -720,7 +758,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
720 758
721 /* reassemble IP fragments */ 759 /* reassemble IP fragments */
722 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 760 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
723 if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) 761 if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
724 return NF_STOLEN; 762 return NF_STOLEN;
725 } 763 }
726 764
@@ -763,7 +801,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
763 pp->dont_defrag)) 801 pp->dont_defrag))
764 return NF_ACCEPT; 802 return NF_ACCEPT;
765 803
766 IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for"); 804 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
805 "Checking outgoing ICMP for");
767 806
768 offset += cih->ihl * 4; 807 offset += cih->ihl * 4;
769 808
@@ -779,7 +818,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
779} 818}
780 819
781#ifdef CONFIG_IP_VS_IPV6 820#ifdef CONFIG_IP_VS_IPV6
782static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) 821static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
822 unsigned int hooknum)
783{ 823{
784 struct ipv6hdr *iph; 824 struct ipv6hdr *iph;
785 struct icmp6hdr _icmph, *ic; 825 struct icmp6hdr _icmph, *ic;
@@ -795,7 +835,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
795 835
796 /* reassemble IP fragments */ 836 /* reassemble IP fragments */
797 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { 837 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
798 if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT)) 838 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
799 return NF_STOLEN; 839 return NF_STOLEN;
800 } 840 }
801 841
@@ -838,7 +878,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
838 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) 878 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
839 return NF_ACCEPT; 879 return NF_ACCEPT;
840 880
841 IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for"); 881 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
882 "Checking outgoing ICMPv6 for");
842 883
843 offset += sizeof(struct ipv6hdr); 884 offset += sizeof(struct ipv6hdr);
844 885
@@ -886,7 +927,7 @@ static unsigned int
886handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, 927handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
887 struct ip_vs_conn *cp, int ihl) 928 struct ip_vs_conn *cp, int ihl)
888{ 929{
889 IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); 930 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
890 931
891 if (!skb_make_writable(skb, ihl)) 932 if (!skb_make_writable(skb, ihl))
892 goto drop; 933 goto drop;
@@ -905,6 +946,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
905 ip_send_check(ip_hdr(skb)); 946 ip_send_check(ip_hdr(skb));
906 } 947 }
907 948
949 /*
950 * nf_iterate does not expect change in the skb->dst->dev.
951 * It looks like it is not fatal to enable this code for hooks
952 * where our handlers are at the end of the chain list and
953 * when all next handlers use skb->dst->dev and not outdev.
954 * It will definitely route properly the inout NAT traffic
955 * when multiple paths are used.
956 */
957
908 /* For policy routing, packets originating from this 958 /* For policy routing, packets originating from this
909 * machine itself may be routed differently to packets 959 * machine itself may be routed differently to packets
910 * passing through. We want this packet to be routed as 960 * passing through. We want this packet to be routed as
@@ -913,21 +963,25 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
913 */ 963 */
914#ifdef CONFIG_IP_VS_IPV6 964#ifdef CONFIG_IP_VS_IPV6
915 if (af == AF_INET6) { 965 if (af == AF_INET6) {
916 if (ip6_route_me_harder(skb) != 0) 966 if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
917 goto drop; 967 goto drop;
918 } else 968 } else
919#endif 969#endif
920 if (ip_route_me_harder(skb, RTN_LOCAL) != 0) 970 if ((sysctl_ip_vs_snat_reroute ||
971 skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
972 ip_route_me_harder(skb, RTN_LOCAL) != 0)
921 goto drop; 973 goto drop;
922 974
923 IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); 975 IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
924 976
925 ip_vs_out_stats(cp, skb); 977 ip_vs_out_stats(cp, skb);
926 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); 978 ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
927 ip_vs_update_conntrack(skb, cp, 0);
928 ip_vs_conn_put(cp);
929
930 skb->ipvs_property = 1; 979 skb->ipvs_property = 1;
980 if (!(cp->flags & IP_VS_CONN_F_NFCT))
981 ip_vs_notrack(skb);
982 else
983 ip_vs_update_conntrack(skb, cp, 0);
984 ip_vs_conn_put(cp);
931 985
932 LeaveFunction(11); 986 LeaveFunction(11);
933 return NF_ACCEPT; 987 return NF_ACCEPT;
@@ -935,35 +989,46 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
935drop: 989drop:
936 ip_vs_conn_put(cp); 990 ip_vs_conn_put(cp);
937 kfree_skb(skb); 991 kfree_skb(skb);
992 LeaveFunction(11);
938 return NF_STOLEN; 993 return NF_STOLEN;
939} 994}
940 995
941/* 996/*
942 * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
943 * Check if outgoing packet belongs to the established ip_vs_conn. 997 * Check if outgoing packet belongs to the established ip_vs_conn.
944 */ 998 */
945static unsigned int 999static unsigned int
946ip_vs_out(unsigned int hooknum, struct sk_buff *skb, 1000ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
947 const struct net_device *in, const struct net_device *out,
948 int (*okfn)(struct sk_buff *))
949{ 1001{
950 struct ip_vs_iphdr iph; 1002 struct ip_vs_iphdr iph;
951 struct ip_vs_protocol *pp; 1003 struct ip_vs_protocol *pp;
952 struct ip_vs_conn *cp; 1004 struct ip_vs_conn *cp;
953 int af;
954 1005
955 EnterFunction(11); 1006 EnterFunction(11);
956 1007
957 af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; 1008 /* Already marked as IPVS request or reply? */
958
959 if (skb->ipvs_property) 1009 if (skb->ipvs_property)
960 return NF_ACCEPT; 1010 return NF_ACCEPT;
961 1011
1012 /* Bad... Do not break raw sockets */
1013 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
1014 af == AF_INET)) {
1015 struct sock *sk = skb->sk;
1016 struct inet_sock *inet = inet_sk(skb->sk);
1017
1018 if (inet && sk->sk_family == PF_INET && inet->nodefrag)
1019 return NF_ACCEPT;
1020 }
1021
1022 if (unlikely(!skb_dst(skb)))
1023 return NF_ACCEPT;
1024
962 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1025 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
963#ifdef CONFIG_IP_VS_IPV6 1026#ifdef CONFIG_IP_VS_IPV6
964 if (af == AF_INET6) { 1027 if (af == AF_INET6) {
965 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1028 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
966 int related, verdict = ip_vs_out_icmp_v6(skb, &related); 1029 int related;
1030 int verdict = ip_vs_out_icmp_v6(skb, &related,
1031 hooknum);
967 1032
968 if (related) 1033 if (related)
969 return verdict; 1034 return verdict;
@@ -972,7 +1037,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
972 } else 1037 } else
973#endif 1038#endif
974 if (unlikely(iph.protocol == IPPROTO_ICMP)) { 1039 if (unlikely(iph.protocol == IPPROTO_ICMP)) {
975 int related, verdict = ip_vs_out_icmp(skb, &related); 1040 int related;
1041 int verdict = ip_vs_out_icmp(skb, &related, hooknum);
976 1042
977 if (related) 1043 if (related)
978 return verdict; 1044 return verdict;
@@ -986,19 +1052,19 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
986 /* reassemble IP fragments */ 1052 /* reassemble IP fragments */
987#ifdef CONFIG_IP_VS_IPV6 1053#ifdef CONFIG_IP_VS_IPV6
988 if (af == AF_INET6) { 1054 if (af == AF_INET6) {
989 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1055 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
990 int related, verdict = ip_vs_out_icmp_v6(skb, &related); 1056 if (ip_vs_gather_frags_v6(skb,
991 1057 ip_vs_defrag_user(hooknum)))
992 if (related) 1058 return NF_STOLEN;
993 return verdict;
994
995 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
996 } 1059 }
1060
1061 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
997 } else 1062 } else
998#endif 1063#endif
999 if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) && 1064 if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) &&
1000 !pp->dont_defrag)) { 1065 !pp->dont_defrag)) {
1001 if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) 1066 if (ip_vs_gather_frags(skb,
1067 ip_vs_defrag_user(hooknum)))
1002 return NF_STOLEN; 1068 return NF_STOLEN;
1003 1069
1004 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1070 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
@@ -1009,55 +1075,123 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
1009 */ 1075 */
1010 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); 1076 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
1011 1077
1012 if (unlikely(!cp)) { 1078 if (likely(cp))
1013 if (sysctl_ip_vs_nat_icmp_send && 1079 return handle_response(af, skb, pp, cp, iph.len);
1014 (pp->protocol == IPPROTO_TCP || 1080 if (sysctl_ip_vs_nat_icmp_send &&
1015 pp->protocol == IPPROTO_UDP || 1081 (pp->protocol == IPPROTO_TCP ||
1016 pp->protocol == IPPROTO_SCTP)) { 1082 pp->protocol == IPPROTO_UDP ||
1017 __be16 _ports[2], *pptr; 1083 pp->protocol == IPPROTO_SCTP)) {
1018 1084 __be16 _ports[2], *pptr;
1019 pptr = skb_header_pointer(skb, iph.len, 1085
1020 sizeof(_ports), _ports); 1086 pptr = skb_header_pointer(skb, iph.len,
1021 if (pptr == NULL) 1087 sizeof(_ports), _ports);
1022 return NF_ACCEPT; /* Not for me */ 1088 if (pptr == NULL)
1023 if (ip_vs_lookup_real_service(af, iph.protocol, 1089 return NF_ACCEPT; /* Not for me */
1024 &iph.saddr, 1090 if (ip_vs_lookup_real_service(af, iph.protocol,
1025 pptr[0])) { 1091 &iph.saddr,
1026 /* 1092 pptr[0])) {
1027 * Notify the real server: there is no 1093 /*
1028 * existing entry if it is not RST 1094 * Notify the real server: there is no
1029 * packet or not TCP packet. 1095 * existing entry if it is not RST
1030 */ 1096 * packet or not TCP packet.
1031 if ((iph.protocol != IPPROTO_TCP && 1097 */
1032 iph.protocol != IPPROTO_SCTP) 1098 if ((iph.protocol != IPPROTO_TCP &&
1033 || ((iph.protocol == IPPROTO_TCP 1099 iph.protocol != IPPROTO_SCTP)
1034 && !is_tcp_reset(skb, iph.len)) 1100 || ((iph.protocol == IPPROTO_TCP
1035 || (iph.protocol == IPPROTO_SCTP 1101 && !is_tcp_reset(skb, iph.len))
1036 && !is_sctp_abort(skb, 1102 || (iph.protocol == IPPROTO_SCTP
1037 iph.len)))) { 1103 && !is_sctp_abort(skb,
1104 iph.len)))) {
1038#ifdef CONFIG_IP_VS_IPV6 1105#ifdef CONFIG_IP_VS_IPV6
1039 if (af == AF_INET6) 1106 if (af == AF_INET6) {
1040 icmpv6_send(skb, 1107 struct net *net =
1041 ICMPV6_DEST_UNREACH, 1108 dev_net(skb_dst(skb)->dev);
1042 ICMPV6_PORT_UNREACH, 1109
1043 0); 1110 if (!skb->dev)
1044 else 1111 skb->dev = net->loopback_dev;
1112 icmpv6_send(skb,
1113 ICMPV6_DEST_UNREACH,
1114 ICMPV6_PORT_UNREACH,
1115 0);
1116 } else
1045#endif 1117#endif
1046 icmp_send(skb, 1118 icmp_send(skb,
1047 ICMP_DEST_UNREACH, 1119 ICMP_DEST_UNREACH,
1048 ICMP_PORT_UNREACH, 0); 1120 ICMP_PORT_UNREACH, 0);
1049 return NF_DROP; 1121 return NF_DROP;
1050 }
1051 } 1122 }
1052 } 1123 }
1053 IP_VS_DBG_PKT(12, pp, skb, 0,
1054 "packet continues traversal as normal");
1055 return NF_ACCEPT;
1056 } 1124 }
1125 IP_VS_DBG_PKT(12, af, pp, skb, 0,
1126 "ip_vs_out: packet continues traversal as normal");
1127 return NF_ACCEPT;
1128}
1129
1130/*
1131 * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
1132 * used only for VS/NAT.
1133 * Check if packet is reply for established ip_vs_conn.
1134 */
1135static unsigned int
1136ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb,
1137 const struct net_device *in, const struct net_device *out,
1138 int (*okfn)(struct sk_buff *))
1139{
1140 return ip_vs_out(hooknum, skb, AF_INET);
1141}
1142
1143/*
1144 * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
1145 * Check if packet is reply for established ip_vs_conn.
1146 */
1147static unsigned int
1148ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb,
1149 const struct net_device *in, const struct net_device *out,
1150 int (*okfn)(struct sk_buff *))
1151{
1152 unsigned int verdict;
1153
1154 /* Disable BH in LOCAL_OUT until all places are fixed */
1155 local_bh_disable();
1156 verdict = ip_vs_out(hooknum, skb, AF_INET);
1157 local_bh_enable();
1158 return verdict;
1159}
1160
1161#ifdef CONFIG_IP_VS_IPV6
1162
1163/*
1164 * It is hooked at the NF_INET_FORWARD and NF_INET_LOCAL_IN chain,
1165 * used only for VS/NAT.
1166 * Check if packet is reply for established ip_vs_conn.
1167 */
1168static unsigned int
1169ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb,
1170 const struct net_device *in, const struct net_device *out,
1171 int (*okfn)(struct sk_buff *))
1172{
1173 return ip_vs_out(hooknum, skb, AF_INET6);
1174}
1057 1175
1058 return handle_response(af, skb, pp, cp, iph.len); 1176/*
1177 * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT.
1178 * Check if packet is reply for established ip_vs_conn.
1179 */
1180static unsigned int
1181ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
1182 const struct net_device *in, const struct net_device *out,
1183 int (*okfn)(struct sk_buff *))
1184{
1185 unsigned int verdict;
1186
1187 /* Disable BH in LOCAL_OUT until all places are fixed */
1188 local_bh_disable();
1189 verdict = ip_vs_out(hooknum, skb, AF_INET6);
1190 local_bh_enable();
1191 return verdict;
1059} 1192}
1060 1193
1194#endif
1061 1195
1062/* 1196/*
1063 * Handle ICMP messages in the outside-to-inside direction (incoming). 1197 * Handle ICMP messages in the outside-to-inside direction (incoming).
@@ -1081,8 +1215,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1081 1215
1082 /* reassemble IP fragments */ 1216 /* reassemble IP fragments */
1083 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { 1217 if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
1084 if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ? 1218 if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
1085 IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
1086 return NF_STOLEN; 1219 return NF_STOLEN;
1087 } 1220 }
1088 1221
@@ -1125,7 +1258,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1125 pp->dont_defrag)) 1258 pp->dont_defrag))
1126 return NF_ACCEPT; 1259 return NF_ACCEPT;
1127 1260
1128 IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for"); 1261 IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
1262 "Checking incoming ICMP for");
1129 1263
1130 offset += cih->ihl * 4; 1264 offset += cih->ihl * 4;
1131 1265
@@ -1159,7 +1293,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
1159 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) 1293 if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
1160 offset += 2 * sizeof(__u16); 1294 offset += 2 * sizeof(__u16);
1161 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset); 1295 verdict = ip_vs_icmp_xmit(skb, cp, pp, offset);
1162 /* do not touch skb anymore */ 1296 /* LOCALNODE from FORWARD hook is not supported */
1297 if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
1298 skb_rtable(skb)->rt_flags & RTCF_LOCAL) {
1299 IP_VS_DBG(1, "%s(): "
1300 "local delivery to %pI4 but in FORWARD\n",
1301 __func__, &skb_rtable(skb)->rt_dst);
1302 verdict = NF_DROP;
1303 }
1163 1304
1164 out: 1305 out:
1165 __ip_vs_conn_put(cp); 1306 __ip_vs_conn_put(cp);
@@ -1180,14 +1321,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1180 struct ip_vs_protocol *pp; 1321 struct ip_vs_protocol *pp;
1181 unsigned int offset, verdict; 1322 unsigned int offset, verdict;
1182 union nf_inet_addr snet; 1323 union nf_inet_addr snet;
1324 struct rt6_info *rt;
1183 1325
1184 *related = 1; 1326 *related = 1;
1185 1327
1186 /* reassemble IP fragments */ 1328 /* reassemble IP fragments */
1187 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { 1329 if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) {
1188 if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ? 1330 if (ip_vs_gather_frags_v6(skb, ip_vs_defrag_user(hooknum)))
1189 IP_DEFRAG_VS_IN :
1190 IP_DEFRAG_VS_FWD))
1191 return NF_STOLEN; 1331 return NF_STOLEN;
1192 } 1332 }
1193 1333
@@ -1230,7 +1370,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1230 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) 1370 if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag))
1231 return NF_ACCEPT; 1371 return NF_ACCEPT;
1232 1372
1233 IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for"); 1373 IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
1374 "Checking incoming ICMPv6 for");
1234 1375
1235 offset += sizeof(struct ipv6hdr); 1376 offset += sizeof(struct ipv6hdr);
1236 1377
@@ -1258,7 +1399,15 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1258 IPPROTO_SCTP == cih->nexthdr) 1399 IPPROTO_SCTP == cih->nexthdr)
1259 offset += 2 * sizeof(__u16); 1400 offset += 2 * sizeof(__u16);
1260 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); 1401 verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
1261 /* do not touch skb anymore */ 1402 /* LOCALNODE from FORWARD hook is not supported */
1403 if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD &&
1404 (rt = (struct rt6_info *) skb_dst(skb)) &&
1405 rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK) {
1406 IP_VS_DBG(1, "%s(): "
1407 "local delivery to %pI6 but in FORWARD\n",
1408 __func__, &rt->rt6i_dst);
1409 verdict = NF_DROP;
1410 }
1262 1411
1263 __ip_vs_conn_put(cp); 1412 __ip_vs_conn_put(cp);
1264 1413
@@ -1272,35 +1421,49 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
1272 * and send it on its way... 1421 * and send it on its way...
1273 */ 1422 */
1274static unsigned int 1423static unsigned int
1275ip_vs_in(unsigned int hooknum, struct sk_buff *skb, 1424ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
1276 const struct net_device *in, const struct net_device *out,
1277 int (*okfn)(struct sk_buff *))
1278{ 1425{
1279 struct ip_vs_iphdr iph; 1426 struct ip_vs_iphdr iph;
1280 struct ip_vs_protocol *pp; 1427 struct ip_vs_protocol *pp;
1281 struct ip_vs_conn *cp; 1428 struct ip_vs_conn *cp;
1282 int ret, restart, af, pkts; 1429 int ret, restart, pkts;
1283 1430
1284 af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; 1431 /* Already marked as IPVS request or reply? */
1285 1432 if (skb->ipvs_property)
1286 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); 1433 return NF_ACCEPT;
1287 1434
1288 /* 1435 /*
1289 * Big tappo: only PACKET_HOST, including loopback for local client 1436 * Big tappo:
1290 * Don't handle local packets on IPv6 for now 1437 * - remote client: only PACKET_HOST
1438 * - route: used for struct net when skb->dev is unset
1291 */ 1439 */
1292 if (unlikely(skb->pkt_type != PACKET_HOST)) { 1440 if (unlikely((skb->pkt_type != PACKET_HOST &&
1293 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n", 1441 hooknum != NF_INET_LOCAL_OUT) ||
1294 skb->pkt_type, 1442 !skb_dst(skb))) {
1295 iph.protocol, 1443 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1296 IP_VS_DBG_ADDR(af, &iph.daddr)); 1444 IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
1445 " ignored in hook %u\n",
1446 skb->pkt_type, iph.protocol,
1447 IP_VS_DBG_ADDR(af, &iph.daddr), hooknum);
1297 return NF_ACCEPT; 1448 return NF_ACCEPT;
1298 } 1449 }
1450 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
1451
1452 /* Bad... Do not break raw sockets */
1453 if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
1454 af == AF_INET)) {
1455 struct sock *sk = skb->sk;
1456 struct inet_sock *inet = inet_sk(skb->sk);
1457
1458 if (inet && sk->sk_family == PF_INET && inet->nodefrag)
1459 return NF_ACCEPT;
1460 }
1299 1461
1300#ifdef CONFIG_IP_VS_IPV6 1462#ifdef CONFIG_IP_VS_IPV6
1301 if (af == AF_INET6) { 1463 if (af == AF_INET6) {
1302 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { 1464 if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
1303 int related, verdict = ip_vs_in_icmp_v6(skb, &related, hooknum); 1465 int related;
1466 int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum);
1304 1467
1305 if (related) 1468 if (related)
1306 return verdict; 1469 return verdict;
@@ -1309,7 +1472,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1309 } else 1472 } else
1310#endif 1473#endif
1311 if (unlikely(iph.protocol == IPPROTO_ICMP)) { 1474 if (unlikely(iph.protocol == IPPROTO_ICMP)) {
1312 int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); 1475 int related;
1476 int verdict = ip_vs_in_icmp(skb, &related, hooknum);
1313 1477
1314 if (related) 1478 if (related)
1315 return verdict; 1479 return verdict;
@@ -1329,23 +1493,18 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1329 if (unlikely(!cp)) { 1493 if (unlikely(!cp)) {
1330 int v; 1494 int v;
1331 1495
1332 /* For local client packets, it could be a response */
1333 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
1334 if (cp)
1335 return handle_response(af, skb, pp, cp, iph.len);
1336
1337 if (!pp->conn_schedule(af, skb, pp, &v, &cp)) 1496 if (!pp->conn_schedule(af, skb, pp, &v, &cp))
1338 return v; 1497 return v;
1339 } 1498 }
1340 1499
1341 if (unlikely(!cp)) { 1500 if (unlikely(!cp)) {
1342 /* sorry, all this trouble for a no-hit :) */ 1501 /* sorry, all this trouble for a no-hit :) */
1343 IP_VS_DBG_PKT(12, pp, skb, 0, 1502 IP_VS_DBG_PKT(12, af, pp, skb, 0,
1344 "packet continues traversal as normal"); 1503 "ip_vs_in: packet continues traversal as normal");
1345 return NF_ACCEPT; 1504 return NF_ACCEPT;
1346 } 1505 }
1347 1506
1348 IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet"); 1507 IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
1349 1508
1350 /* Check the server status */ 1509 /* Check the server status */
1351 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { 1510 if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
@@ -1381,8 +1540,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1381 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1540 if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1382 cp->protocol == IPPROTO_SCTP) { 1541 cp->protocol == IPPROTO_SCTP) {
1383 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED && 1542 if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
1384 (atomic_read(&cp->in_pkts) % 1543 (pkts % sysctl_ip_vs_sync_threshold[1]
1385 sysctl_ip_vs_sync_threshold[1]
1386 == sysctl_ip_vs_sync_threshold[0])) || 1544 == sysctl_ip_vs_sync_threshold[0])) ||
1387 (cp->old_state != cp->state && 1545 (cp->old_state != cp->state &&
1388 ((cp->state == IP_VS_SCTP_S_CLOSED) || 1546 ((cp->state == IP_VS_SCTP_S_CLOSED) ||
@@ -1393,7 +1551,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
1393 } 1551 }
1394 } 1552 }
1395 1553
1396 if (af == AF_INET && 1554 /* Keep this block last: TCP and others with pp->num_states <= 1 */
1555 else if (af == AF_INET &&
1397 (ip_vs_sync_state & IP_VS_STATE_MASTER) && 1556 (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
1398 (((cp->protocol != IPPROTO_TCP || 1557 (((cp->protocol != IPPROTO_TCP ||
1399 cp->state == IP_VS_TCP_S_ESTABLISHED) && 1558 cp->state == IP_VS_TCP_S_ESTABLISHED) &&
@@ -1412,6 +1571,72 @@ out:
1412 return ret; 1571 return ret;
1413} 1572}
1414 1573
1574/*
1575 * AF_INET handler in NF_INET_LOCAL_IN chain
1576 * Schedule and forward packets from remote clients
1577 */
1578static unsigned int
1579ip_vs_remote_request4(unsigned int hooknum, struct sk_buff *skb,
1580 const struct net_device *in,
1581 const struct net_device *out,
1582 int (*okfn)(struct sk_buff *))
1583{
1584 return ip_vs_in(hooknum, skb, AF_INET);
1585}
1586
1587/*
1588 * AF_INET handler in NF_INET_LOCAL_OUT chain
1589 * Schedule and forward packets from local clients
1590 */
1591static unsigned int
1592ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
1593 const struct net_device *in, const struct net_device *out,
1594 int (*okfn)(struct sk_buff *))
1595{
1596 unsigned int verdict;
1597
1598 /* Disable BH in LOCAL_OUT until all places are fixed */
1599 local_bh_disable();
1600 verdict = ip_vs_in(hooknum, skb, AF_INET);
1601 local_bh_enable();
1602 return verdict;
1603}
1604
1605#ifdef CONFIG_IP_VS_IPV6
1606
1607/*
1608 * AF_INET6 handler in NF_INET_LOCAL_IN chain
1609 * Schedule and forward packets from remote clients
1610 */
1611static unsigned int
1612ip_vs_remote_request6(unsigned int hooknum, struct sk_buff *skb,
1613 const struct net_device *in,
1614 const struct net_device *out,
1615 int (*okfn)(struct sk_buff *))
1616{
1617 return ip_vs_in(hooknum, skb, AF_INET6);
1618}
1619
1620/*
1621 * AF_INET6 handler in NF_INET_LOCAL_OUT chain
1622 * Schedule and forward packets from local clients
1623 */
1624static unsigned int
1625ip_vs_local_request6(unsigned int hooknum, struct sk_buff *skb,
1626 const struct net_device *in, const struct net_device *out,
1627 int (*okfn)(struct sk_buff *))
1628{
1629 unsigned int verdict;
1630
1631 /* Disable BH in LOCAL_OUT until all places are fixed */
1632 local_bh_disable();
1633 verdict = ip_vs_in(hooknum, skb, AF_INET6);
1634 local_bh_enable();
1635 return verdict;
1636}
1637
1638#endif
1639
1415 1640
1416/* 1641/*
1417 * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP 1642 * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
@@ -1452,23 +1677,39 @@ ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb,
1452 1677
1453 1678
1454static struct nf_hook_ops ip_vs_ops[] __read_mostly = { 1679static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1680 /* After packet filtering, change source only for VS/NAT */
1681 {
1682 .hook = ip_vs_reply4,
1683 .owner = THIS_MODULE,
1684 .pf = PF_INET,
1685 .hooknum = NF_INET_LOCAL_IN,
1686 .priority = 99,
1687 },
1455 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1688 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1456 * or VS/NAT(change destination), so that filtering rules can be 1689 * or VS/NAT(change destination), so that filtering rules can be
1457 * applied to IPVS. */ 1690 * applied to IPVS. */
1458 { 1691 {
1459 .hook = ip_vs_in, 1692 .hook = ip_vs_remote_request4,
1460 .owner = THIS_MODULE, 1693 .owner = THIS_MODULE,
1461 .pf = PF_INET, 1694 .pf = PF_INET,
1462 .hooknum = NF_INET_LOCAL_IN, 1695 .hooknum = NF_INET_LOCAL_IN,
1463 .priority = 100, 1696 .priority = 101,
1464 }, 1697 },
1465 /* After packet filtering, change source only for VS/NAT */ 1698 /* Before ip_vs_in, change source only for VS/NAT */
1466 { 1699 {
1467 .hook = ip_vs_out, 1700 .hook = ip_vs_local_reply4,
1468 .owner = THIS_MODULE, 1701 .owner = THIS_MODULE,
1469 .pf = PF_INET, 1702 .pf = PF_INET,
1470 .hooknum = NF_INET_FORWARD, 1703 .hooknum = NF_INET_LOCAL_OUT,
1471 .priority = 100, 1704 .priority = -99,
1705 },
1706 /* After mangle, schedule and forward local requests */
1707 {
1708 .hook = ip_vs_local_request4,
1709 .owner = THIS_MODULE,
1710 .pf = PF_INET,
1711 .hooknum = NF_INET_LOCAL_OUT,
1712 .priority = -98,
1472 }, 1713 },
1473 /* After packet filtering (but before ip_vs_out_icmp), catch icmp 1714 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1474 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1715 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1476,27 +1717,51 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1476 .hook = ip_vs_forward_icmp, 1717 .hook = ip_vs_forward_icmp,
1477 .owner = THIS_MODULE, 1718 .owner = THIS_MODULE,
1478 .pf = PF_INET, 1719 .pf = PF_INET,
1479 .hooknum = NF_INET_FORWARD, 1720 .hooknum = NF_INET_FORWARD,
1480 .priority = 99, 1721 .priority = 99,
1722 },
1723 /* After packet filtering, change source only for VS/NAT */
1724 {
1725 .hook = ip_vs_reply4,
1726 .owner = THIS_MODULE,
1727 .pf = PF_INET,
1728 .hooknum = NF_INET_FORWARD,
1729 .priority = 100,
1481 }, 1730 },
1482#ifdef CONFIG_IP_VS_IPV6 1731#ifdef CONFIG_IP_VS_IPV6
1732 /* After packet filtering, change source only for VS/NAT */
1733 {
1734 .hook = ip_vs_reply6,
1735 .owner = THIS_MODULE,
1736 .pf = PF_INET6,
1737 .hooknum = NF_INET_LOCAL_IN,
1738 .priority = 99,
1739 },
1483 /* After packet filtering, forward packet through VS/DR, VS/TUN, 1740 /* After packet filtering, forward packet through VS/DR, VS/TUN,
1484 * or VS/NAT(change destination), so that filtering rules can be 1741 * or VS/NAT(change destination), so that filtering rules can be
1485 * applied to IPVS. */ 1742 * applied to IPVS. */
1486 { 1743 {
1487 .hook = ip_vs_in, 1744 .hook = ip_vs_remote_request6,
1488 .owner = THIS_MODULE, 1745 .owner = THIS_MODULE,
1489 .pf = PF_INET6, 1746 .pf = PF_INET6,
1490 .hooknum = NF_INET_LOCAL_IN, 1747 .hooknum = NF_INET_LOCAL_IN,
1491 .priority = 100, 1748 .priority = 101,
1492 }, 1749 },
1493 /* After packet filtering, change source only for VS/NAT */ 1750 /* Before ip_vs_in, change source only for VS/NAT */
1751 {
1752 .hook = ip_vs_local_reply6,
1753 .owner = THIS_MODULE,
1754 .pf = PF_INET,
1755 .hooknum = NF_INET_LOCAL_OUT,
1756 .priority = -99,
1757 },
1758 /* After mangle, schedule and forward local requests */
1494 { 1759 {
1495 .hook = ip_vs_out, 1760 .hook = ip_vs_local_request6,
1496 .owner = THIS_MODULE, 1761 .owner = THIS_MODULE,
1497 .pf = PF_INET6, 1762 .pf = PF_INET6,
1498 .hooknum = NF_INET_FORWARD, 1763 .hooknum = NF_INET_LOCAL_OUT,
1499 .priority = 100, 1764 .priority = -98,
1500 }, 1765 },
1501 /* After packet filtering (but before ip_vs_out_icmp), catch icmp 1766 /* After packet filtering (but before ip_vs_out_icmp), catch icmp
1502 * destined for 0.0.0.0/0, which is for incoming IPVS connections */ 1767 * destined for 0.0.0.0/0, which is for incoming IPVS connections */
@@ -1504,8 +1769,16 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
1504 .hook = ip_vs_forward_icmp_v6, 1769 .hook = ip_vs_forward_icmp_v6,
1505 .owner = THIS_MODULE, 1770 .owner = THIS_MODULE,
1506 .pf = PF_INET6, 1771 .pf = PF_INET6,
1507 .hooknum = NF_INET_FORWARD, 1772 .hooknum = NF_INET_FORWARD,
1508 .priority = 99, 1773 .priority = 99,
1774 },
1775 /* After packet filtering, change source only for VS/NAT */
1776 {
1777 .hook = ip_vs_reply6,
1778 .owner = THIS_MODULE,
1779 .pf = PF_INET6,
1780 .hooknum = NF_INET_FORWARD,
1781 .priority = 100,
1509 }, 1782 },
1510#endif 1783#endif
1511}; 1784};
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 0f0c079c422a..5f5daa30b0af 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -61,7 +61,7 @@ static DEFINE_RWLOCK(__ip_vs_svc_lock);
61static DEFINE_RWLOCK(__ip_vs_rs_lock); 61static DEFINE_RWLOCK(__ip_vs_rs_lock);
62 62
63/* lock for state and timeout tables */ 63/* lock for state and timeout tables */
64static DEFINE_RWLOCK(__ip_vs_securetcp_lock); 64static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65 65
66/* lock for drop entry handling */ 66/* lock for drop entry handling */
67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); 67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
@@ -88,6 +88,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
88int sysctl_ip_vs_expire_quiescent_template = 0; 88int sysctl_ip_vs_expire_quiescent_template = 0;
89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; 89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90int sysctl_ip_vs_nat_icmp_send = 0; 90int sysctl_ip_vs_nat_icmp_send = 0;
91#ifdef CONFIG_IP_VS_NFCT
92int sysctl_ip_vs_conntrack;
93#endif
94int sysctl_ip_vs_snat_reroute = 1;
91 95
92 96
93#ifdef CONFIG_IP_VS_DEBUG 97#ifdef CONFIG_IP_VS_DEBUG
@@ -204,7 +208,7 @@ static void update_defense_level(void)
204 spin_unlock(&__ip_vs_droppacket_lock); 208 spin_unlock(&__ip_vs_droppacket_lock);
205 209
206 /* secure_tcp */ 210 /* secure_tcp */
207 write_lock(&__ip_vs_securetcp_lock); 211 spin_lock(&ip_vs_securetcp_lock);
208 switch (sysctl_ip_vs_secure_tcp) { 212 switch (sysctl_ip_vs_secure_tcp) {
209 case 0: 213 case 0:
210 if (old_secure_tcp >= 2) 214 if (old_secure_tcp >= 2)
@@ -238,7 +242,7 @@ static void update_defense_level(void)
238 old_secure_tcp = sysctl_ip_vs_secure_tcp; 242 old_secure_tcp = sysctl_ip_vs_secure_tcp;
239 if (to_change >= 0) 243 if (to_change >= 0)
240 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); 244 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
241 write_unlock(&__ip_vs_securetcp_lock); 245 spin_unlock(&ip_vs_securetcp_lock);
242 246
243 local_bh_enable(); 247 local_bh_enable();
244} 248}
@@ -401,7 +405,7 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
401 * Get service by {proto,addr,port} in the service table. 405 * Get service by {proto,addr,port} in the service table.
402 */ 406 */
403static inline struct ip_vs_service * 407static inline struct ip_vs_service *
404__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr, 408__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
405 __be16 vport) 409 __be16 vport)
406{ 410{
407 unsigned hash; 411 unsigned hash;
@@ -416,7 +420,6 @@ __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
416 && (svc->port == vport) 420 && (svc->port == vport)
417 && (svc->protocol == protocol)) { 421 && (svc->protocol == protocol)) {
418 /* HIT */ 422 /* HIT */
419 atomic_inc(&svc->usecnt);
420 return svc; 423 return svc;
421 } 424 }
422 } 425 }
@@ -429,7 +432,7 @@ __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
429 * Get service by {fwmark} in the service table. 432 * Get service by {fwmark} in the service table.
430 */ 433 */
431static inline struct ip_vs_service * 434static inline struct ip_vs_service *
432__ip_vs_svc_fwm_get(int af, __u32 fwmark) 435__ip_vs_svc_fwm_find(int af, __u32 fwmark)
433{ 436{
434 unsigned hash; 437 unsigned hash;
435 struct ip_vs_service *svc; 438 struct ip_vs_service *svc;
@@ -440,7 +443,6 @@ __ip_vs_svc_fwm_get(int af, __u32 fwmark)
440 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { 443 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
441 if (svc->fwmark == fwmark && svc->af == af) { 444 if (svc->fwmark == fwmark && svc->af == af) {
442 /* HIT */ 445 /* HIT */
443 atomic_inc(&svc->usecnt);
444 return svc; 446 return svc;
445 } 447 }
446 } 448 }
@@ -459,14 +461,14 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
459 /* 461 /*
460 * Check the table hashed by fwmark first 462 * Check the table hashed by fwmark first
461 */ 463 */
462 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark))) 464 if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
463 goto out; 465 goto out;
464 466
465 /* 467 /*
466 * Check the table hashed by <protocol,addr,port> 468 * Check the table hashed by <protocol,addr,port>
467 * for "full" addressed entries 469 * for "full" addressed entries
468 */ 470 */
469 svc = __ip_vs_service_get(af, protocol, vaddr, vport); 471 svc = __ip_vs_service_find(af, protocol, vaddr, vport);
470 472
471 if (svc == NULL 473 if (svc == NULL
472 && protocol == IPPROTO_TCP 474 && protocol == IPPROTO_TCP
@@ -476,7 +478,7 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
476 * Check if ftp service entry exists, the packet 478 * Check if ftp service entry exists, the packet
477 * might belong to FTP data connections. 479 * might belong to FTP data connections.
478 */ 480 */
479 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT); 481 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
480 } 482 }
481 483
482 if (svc == NULL 484 if (svc == NULL
@@ -484,10 +486,12 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
484 /* 486 /*
485 * Check if the catch-all port (port zero) exists 487 * Check if the catch-all port (port zero) exists
486 */ 488 */
487 svc = __ip_vs_service_get(af, protocol, vaddr, 0); 489 svc = __ip_vs_service_find(af, protocol, vaddr, 0);
488 } 490 }
489 491
490 out: 492 out:
493 if (svc)
494 atomic_inc(&svc->usecnt);
491 read_unlock(&__ip_vs_svc_lock); 495 read_unlock(&__ip_vs_svc_lock);
492 496
493 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 497 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
@@ -506,14 +510,19 @@ __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
506 dest->svc = svc; 510 dest->svc = svc;
507} 511}
508 512
509static inline void 513static void
510__ip_vs_unbind_svc(struct ip_vs_dest *dest) 514__ip_vs_unbind_svc(struct ip_vs_dest *dest)
511{ 515{
512 struct ip_vs_service *svc = dest->svc; 516 struct ip_vs_service *svc = dest->svc;
513 517
514 dest->svc = NULL; 518 dest->svc = NULL;
515 if (atomic_dec_and_test(&svc->refcnt)) 519 if (atomic_dec_and_test(&svc->refcnt)) {
520 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
521 svc->fwmark,
522 IP_VS_DBG_ADDR(svc->af, &svc->addr),
523 ntohs(svc->port), atomic_read(&svc->usecnt));
516 kfree(svc); 524 kfree(svc);
525 }
517} 526}
518 527
519 528
@@ -758,31 +767,18 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
758 * Update a destination in the given service 767 * Update a destination in the given service
759 */ 768 */
760static void 769static void
761__ip_vs_update_dest(struct ip_vs_service *svc, 770__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
762 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest) 771 struct ip_vs_dest_user_kern *udest, int add)
763{ 772{
764 int conn_flags; 773 int conn_flags;
765 774
766 /* set the weight and the flags */ 775 /* set the weight and the flags */
767 atomic_set(&dest->weight, udest->weight); 776 atomic_set(&dest->weight, udest->weight);
768 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; 777 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
769 778 conn_flags |= IP_VS_CONN_F_INACTIVE;
770 /* check if local node and update the flags */
771#ifdef CONFIG_IP_VS_IPV6
772 if (svc->af == AF_INET6) {
773 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
774 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
775 | IP_VS_CONN_F_LOCALNODE;
776 }
777 } else
778#endif
779 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
780 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
781 | IP_VS_CONN_F_LOCALNODE;
782 }
783 779
784 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 780 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
785 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) { 781 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
786 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 782 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
787 } else { 783 } else {
788 /* 784 /*
@@ -813,6 +809,29 @@ __ip_vs_update_dest(struct ip_vs_service *svc,
813 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 809 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
814 dest->u_threshold = udest->u_threshold; 810 dest->u_threshold = udest->u_threshold;
815 dest->l_threshold = udest->l_threshold; 811 dest->l_threshold = udest->l_threshold;
812
813 spin_lock(&dest->dst_lock);
814 ip_vs_dst_reset(dest);
815 spin_unlock(&dest->dst_lock);
816
817 if (add)
818 ip_vs_new_estimator(&dest->stats);
819
820 write_lock_bh(&__ip_vs_svc_lock);
821
822 /* Wait until all other svc users go away */
823 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
824
825 if (add) {
826 list_add(&dest->n_list, &svc->destinations);
827 svc->num_dests++;
828 }
829
830 /* call the update_service, because server weight may be changed */
831 if (svc->scheduler->update_service)
832 svc->scheduler->update_service(svc);
833
834 write_unlock_bh(&__ip_vs_svc_lock);
816} 835}
817 836
818 837
@@ -843,7 +862,7 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
843 return -EINVAL; 862 return -EINVAL;
844 } 863 }
845 864
846 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); 865 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
847 if (dest == NULL) { 866 if (dest == NULL) {
848 pr_err("%s(): no memory.\n", __func__); 867 pr_err("%s(): no memory.\n", __func__);
849 return -ENOMEM; 868 return -ENOMEM;
@@ -860,13 +879,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
860 atomic_set(&dest->activeconns, 0); 879 atomic_set(&dest->activeconns, 0);
861 atomic_set(&dest->inactconns, 0); 880 atomic_set(&dest->inactconns, 0);
862 atomic_set(&dest->persistconns, 0); 881 atomic_set(&dest->persistconns, 0);
863 atomic_set(&dest->refcnt, 0); 882 atomic_set(&dest->refcnt, 1);
864 883
865 INIT_LIST_HEAD(&dest->d_list); 884 INIT_LIST_HEAD(&dest->d_list);
866 spin_lock_init(&dest->dst_lock); 885 spin_lock_init(&dest->dst_lock);
867 spin_lock_init(&dest->stats.lock); 886 spin_lock_init(&dest->stats.lock);
868 __ip_vs_update_dest(svc, dest, udest); 887 __ip_vs_update_dest(svc, dest, udest, 1);
869 ip_vs_new_estimator(&dest->stats);
870 888
871 *dest_p = dest; 889 *dest_p = dest;
872 890
@@ -926,65 +944,22 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
926 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 944 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
927 ntohs(dest->vport)); 945 ntohs(dest->vport));
928 946
929 __ip_vs_update_dest(svc, dest, udest);
930
931 /* 947 /*
932 * Get the destination from the trash 948 * Get the destination from the trash
933 */ 949 */
934 list_del(&dest->n_list); 950 list_del(&dest->n_list);
935 951
936 ip_vs_new_estimator(&dest->stats); 952 __ip_vs_update_dest(svc, dest, udest, 1);
937 953 ret = 0;
938 write_lock_bh(&__ip_vs_svc_lock); 954 } else {
939
940 /* 955 /*
941 * Wait until all other svc users go away. 956 * Allocate and initialize the dest structure
942 */ 957 */
943 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 958 ret = ip_vs_new_dest(svc, udest, &dest);
944
945 list_add(&dest->n_list, &svc->destinations);
946 svc->num_dests++;
947
948 /* call the update_service function of its scheduler */
949 if (svc->scheduler->update_service)
950 svc->scheduler->update_service(svc);
951
952 write_unlock_bh(&__ip_vs_svc_lock);
953 return 0;
954 }
955
956 /*
957 * Allocate and initialize the dest structure
958 */
959 ret = ip_vs_new_dest(svc, udest, &dest);
960 if (ret) {
961 return ret;
962 } 959 }
963
964 /*
965 * Add the dest entry into the list
966 */
967 atomic_inc(&dest->refcnt);
968
969 write_lock_bh(&__ip_vs_svc_lock);
970
971 /*
972 * Wait until all other svc users go away.
973 */
974 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
975
976 list_add(&dest->n_list, &svc->destinations);
977 svc->num_dests++;
978
979 /* call the update_service function of its scheduler */
980 if (svc->scheduler->update_service)
981 svc->scheduler->update_service(svc);
982
983 write_unlock_bh(&__ip_vs_svc_lock);
984
985 LeaveFunction(2); 960 LeaveFunction(2);
986 961
987 return 0; 962 return ret;
988} 963}
989 964
990 965
@@ -1023,19 +998,7 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1023 return -ENOENT; 998 return -ENOENT;
1024 } 999 }
1025 1000
1026 __ip_vs_update_dest(svc, dest, udest); 1001 __ip_vs_update_dest(svc, dest, udest, 0);
1027
1028 write_lock_bh(&__ip_vs_svc_lock);
1029
1030 /* Wait until all other svc users go away */
1031 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1032
1033 /* call the update_service, because server weight may be changed */
1034 if (svc->scheduler->update_service)
1035 svc->scheduler->update_service(svc);
1036
1037 write_unlock_bh(&__ip_vs_svc_lock);
1038
1039 LeaveFunction(2); 1002 LeaveFunction(2);
1040 1003
1041 return 0; 1004 return 0;
@@ -1062,6 +1025,10 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1062 * the destination into the trash. 1025 * the destination into the trash.
1063 */ 1026 */
1064 if (atomic_dec_and_test(&dest->refcnt)) { 1027 if (atomic_dec_and_test(&dest->refcnt)) {
1028 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1029 dest->vfwmark,
1030 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1031 ntohs(dest->port));
1065 ip_vs_dst_reset(dest); 1032 ip_vs_dst_reset(dest);
1066 /* simply decrease svc->refcnt here, let the caller check 1033 /* simply decrease svc->refcnt here, let the caller check
1067 and release the service if nobody refers to it. 1034 and release the service if nobody refers to it.
@@ -1128,7 +1095,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1128 /* 1095 /*
1129 * Wait until all other svc users go away. 1096 * Wait until all other svc users go away.
1130 */ 1097 */
1131 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1098 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1132 1099
1133 /* 1100 /*
1134 * Unlink dest from the service 1101 * Unlink dest from the service
@@ -1157,6 +1124,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1157{ 1124{
1158 int ret = 0; 1125 int ret = 0;
1159 struct ip_vs_scheduler *sched = NULL; 1126 struct ip_vs_scheduler *sched = NULL;
1127 struct ip_vs_pe *pe = NULL;
1160 struct ip_vs_service *svc = NULL; 1128 struct ip_vs_service *svc = NULL;
1161 1129
1162 /* increase the module use count */ 1130 /* increase the module use count */
@@ -1167,7 +1135,17 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1167 if (sched == NULL) { 1135 if (sched == NULL) {
1168 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1136 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1169 ret = -ENOENT; 1137 ret = -ENOENT;
1170 goto out_mod_dec; 1138 goto out_err;
1139 }
1140
1141 if (u->pe_name && *u->pe_name) {
1142 pe = ip_vs_pe_get(u->pe_name);
1143 if (pe == NULL) {
1144 pr_info("persistence engine module ip_vs_pe_%s "
1145 "not found\n", u->pe_name);
1146 ret = -ENOENT;
1147 goto out_err;
1148 }
1171 } 1149 }
1172 1150
1173#ifdef CONFIG_IP_VS_IPV6 1151#ifdef CONFIG_IP_VS_IPV6
@@ -1177,7 +1155,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1177 } 1155 }
1178#endif 1156#endif
1179 1157
1180 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); 1158 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1181 if (svc == NULL) { 1159 if (svc == NULL) {
1182 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1160 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1183 ret = -ENOMEM; 1161 ret = -ENOMEM;
@@ -1185,7 +1163,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1185 } 1163 }
1186 1164
1187 /* I'm the first user of the service */ 1165 /* I'm the first user of the service */
1188 atomic_set(&svc->usecnt, 1); 1166 atomic_set(&svc->usecnt, 0);
1189 atomic_set(&svc->refcnt, 0); 1167 atomic_set(&svc->refcnt, 0);
1190 1168
1191 svc->af = u->af; 1169 svc->af = u->af;
@@ -1207,6 +1185,10 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1207 goto out_err; 1185 goto out_err;
1208 sched = NULL; 1186 sched = NULL;
1209 1187
1188 /* Bind the ct retriever */
1189 ip_vs_bind_pe(svc, pe);
1190 pe = NULL;
1191
1210 /* Update the virtual service counters */ 1192 /* Update the virtual service counters */
1211 if (svc->port == FTPPORT) 1193 if (svc->port == FTPPORT)
1212 atomic_inc(&ip_vs_ftpsvc_counter); 1194 atomic_inc(&ip_vs_ftpsvc_counter);
@@ -1227,10 +1209,9 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1227 *svc_p = svc; 1209 *svc_p = svc;
1228 return 0; 1210 return 0;
1229 1211
1230 out_err: 1212 out_err:
1231 if (svc != NULL) { 1213 if (svc != NULL) {
1232 if (svc->scheduler) 1214 ip_vs_unbind_scheduler(svc);
1233 ip_vs_unbind_scheduler(svc);
1234 if (svc->inc) { 1215 if (svc->inc) {
1235 local_bh_disable(); 1216 local_bh_disable();
1236 ip_vs_app_inc_put(svc->inc); 1217 ip_vs_app_inc_put(svc->inc);
@@ -1239,8 +1220,8 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
1239 kfree(svc); 1220 kfree(svc);
1240 } 1221 }
1241 ip_vs_scheduler_put(sched); 1222 ip_vs_scheduler_put(sched);
1223 ip_vs_pe_put(pe);
1242 1224
1243 out_mod_dec:
1244 /* decrease the module use count */ 1225 /* decrease the module use count */
1245 ip_vs_use_count_dec(); 1226 ip_vs_use_count_dec();
1246 1227
@@ -1255,6 +1236,7 @@ static int
1255ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1236ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1256{ 1237{
1257 struct ip_vs_scheduler *sched, *old_sched; 1238 struct ip_vs_scheduler *sched, *old_sched;
1239 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1258 int ret = 0; 1240 int ret = 0;
1259 1241
1260 /* 1242 /*
@@ -1267,6 +1249,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1267 } 1249 }
1268 old_sched = sched; 1250 old_sched = sched;
1269 1251
1252 if (u->pe_name && *u->pe_name) {
1253 pe = ip_vs_pe_get(u->pe_name);
1254 if (pe == NULL) {
1255 pr_info("persistence engine module ip_vs_pe_%s "
1256 "not found\n", u->pe_name);
1257 ret = -ENOENT;
1258 goto out;
1259 }
1260 old_pe = pe;
1261 }
1262
1270#ifdef CONFIG_IP_VS_IPV6 1263#ifdef CONFIG_IP_VS_IPV6
1271 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { 1264 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1272 ret = -EINVAL; 1265 ret = -EINVAL;
@@ -1279,7 +1272,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1279 /* 1272 /*
1280 * Wait until all other svc users go away. 1273 * Wait until all other svc users go away.
1281 */ 1274 */
1282 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1275 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1283 1276
1284 /* 1277 /*
1285 * Set the flags and timeout value 1278 * Set the flags and timeout value
@@ -1318,15 +1311,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1318 } 1311 }
1319 } 1312 }
1320 1313
1314 old_pe = svc->pe;
1315 if (pe != old_pe) {
1316 ip_vs_unbind_pe(svc);
1317 ip_vs_bind_pe(svc, pe);
1318 }
1319
1321 out_unlock: 1320 out_unlock:
1322 write_unlock_bh(&__ip_vs_svc_lock); 1321 write_unlock_bh(&__ip_vs_svc_lock);
1323#ifdef CONFIG_IP_VS_IPV6
1324 out: 1322 out:
1325#endif 1323 ip_vs_scheduler_put(old_sched);
1326 1324 ip_vs_pe_put(old_pe);
1327 if (old_sched)
1328 ip_vs_scheduler_put(old_sched);
1329
1330 return ret; 1325 return ret;
1331} 1326}
1332 1327
@@ -1340,6 +1335,9 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1340{ 1335{
1341 struct ip_vs_dest *dest, *nxt; 1336 struct ip_vs_dest *dest, *nxt;
1342 struct ip_vs_scheduler *old_sched; 1337 struct ip_vs_scheduler *old_sched;
1338 struct ip_vs_pe *old_pe;
1339
1340 pr_info("%s: enter\n", __func__);
1343 1341
1344 /* Count only IPv4 services for old get/setsockopt interface */ 1342 /* Count only IPv4 services for old get/setsockopt interface */
1345 if (svc->af == AF_INET) 1343 if (svc->af == AF_INET)
@@ -1350,8 +1348,12 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1350 /* Unbind scheduler */ 1348 /* Unbind scheduler */
1351 old_sched = svc->scheduler; 1349 old_sched = svc->scheduler;
1352 ip_vs_unbind_scheduler(svc); 1350 ip_vs_unbind_scheduler(svc);
1353 if (old_sched) 1351 ip_vs_scheduler_put(old_sched);
1354 ip_vs_scheduler_put(old_sched); 1352
1353 /* Unbind persistence engine */
1354 old_pe = svc->pe;
1355 ip_vs_unbind_pe(svc);
1356 ip_vs_pe_put(old_pe);
1355 1357
1356 /* Unbind app inc */ 1358 /* Unbind app inc */
1357 if (svc->inc) { 1359 if (svc->inc) {
@@ -1378,21 +1380,23 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
1378 /* 1380 /*
1379 * Free the service if nobody refers to it 1381 * Free the service if nobody refers to it
1380 */ 1382 */
1381 if (atomic_read(&svc->refcnt) == 0) 1383 if (atomic_read(&svc->refcnt) == 0) {
1384 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1385 svc->fwmark,
1386 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1387 ntohs(svc->port), atomic_read(&svc->usecnt));
1382 kfree(svc); 1388 kfree(svc);
1389 }
1383 1390
1384 /* decrease the module use count */ 1391 /* decrease the module use count */
1385 ip_vs_use_count_dec(); 1392 ip_vs_use_count_dec();
1386} 1393}
1387 1394
1388/* 1395/*
1389 * Delete a service from the service list 1396 * Unlink a service from list and try to delete it if its refcnt reached 0
1390 */ 1397 */
1391static int ip_vs_del_service(struct ip_vs_service *svc) 1398static void ip_vs_unlink_service(struct ip_vs_service *svc)
1392{ 1399{
1393 if (svc == NULL)
1394 return -EEXIST;
1395
1396 /* 1400 /*
1397 * Unhash it from the service table 1401 * Unhash it from the service table
1398 */ 1402 */
@@ -1403,11 +1407,21 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
1403 /* 1407 /*
1404 * Wait until all the svc users go away. 1408 * Wait until all the svc users go away.
1405 */ 1409 */
1406 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1410 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1407 1411
1408 __ip_vs_del_service(svc); 1412 __ip_vs_del_service(svc);
1409 1413
1410 write_unlock_bh(&__ip_vs_svc_lock); 1414 write_unlock_bh(&__ip_vs_svc_lock);
1415}
1416
1417/*
1418 * Delete a service from the service list
1419 */
1420static int ip_vs_del_service(struct ip_vs_service *svc)
1421{
1422 if (svc == NULL)
1423 return -EEXIST;
1424 ip_vs_unlink_service(svc);
1411 1425
1412 return 0; 1426 return 0;
1413} 1427}
@@ -1426,14 +1440,7 @@ static int ip_vs_flush(void)
1426 */ 1440 */
1427 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1441 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1428 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { 1442 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1429 write_lock_bh(&__ip_vs_svc_lock); 1443 ip_vs_unlink_service(svc);
1430 ip_vs_svc_unhash(svc);
1431 /*
1432 * Wait until all the svc users go away.
1433 */
1434 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1435 __ip_vs_del_service(svc);
1436 write_unlock_bh(&__ip_vs_svc_lock);
1437 } 1444 }
1438 } 1445 }
1439 1446
@@ -1443,14 +1450,7 @@ static int ip_vs_flush(void)
1443 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1450 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1444 list_for_each_entry_safe(svc, nxt, 1451 list_for_each_entry_safe(svc, nxt,
1445 &ip_vs_svc_fwm_table[idx], f_list) { 1452 &ip_vs_svc_fwm_table[idx], f_list) {
1446 write_lock_bh(&__ip_vs_svc_lock); 1453 ip_vs_unlink_service(svc);
1447 ip_vs_svc_unhash(svc);
1448 /*
1449 * Wait until all the svc users go away.
1450 */
1451 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1452 __ip_vs_del_service(svc);
1453 write_unlock_bh(&__ip_vs_svc_lock);
1454 } 1454 }
1455 } 1455 }
1456 1456
@@ -1579,6 +1579,15 @@ static struct ctl_table vs_vars[] = {
1579 .mode = 0644, 1579 .mode = 0644,
1580 .proc_handler = proc_do_defense_mode, 1580 .proc_handler = proc_do_defense_mode,
1581 }, 1581 },
1582#ifdef CONFIG_IP_VS_NFCT
1583 {
1584 .procname = "conntrack",
1585 .data = &sysctl_ip_vs_conntrack,
1586 .maxlen = sizeof(int),
1587 .mode = 0644,
1588 .proc_handler = &proc_dointvec,
1589 },
1590#endif
1582 { 1591 {
1583 .procname = "secure_tcp", 1592 .procname = "secure_tcp",
1584 .data = &sysctl_ip_vs_secure_tcp, 1593 .data = &sysctl_ip_vs_secure_tcp,
@@ -1586,6 +1595,13 @@ static struct ctl_table vs_vars[] = {
1586 .mode = 0644, 1595 .mode = 0644,
1587 .proc_handler = proc_do_defense_mode, 1596 .proc_handler = proc_do_defense_mode,
1588 }, 1597 },
1598 {
1599 .procname = "snat_reroute",
1600 .data = &sysctl_ip_vs_snat_reroute,
1601 .maxlen = sizeof(int),
1602 .mode = 0644,
1603 .proc_handler = &proc_dointvec,
1604 },
1589#if 0 1605#if 0
1590 { 1606 {
1591 .procname = "timeout_established", 1607 .procname = "timeout_established",
@@ -2041,6 +2057,8 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2041static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, 2057static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2042 struct ip_vs_service_user *usvc_compat) 2058 struct ip_vs_service_user *usvc_compat)
2043{ 2059{
2060 memset(usvc, 0, sizeof(*usvc));
2061
2044 usvc->af = AF_INET; 2062 usvc->af = AF_INET;
2045 usvc->protocol = usvc_compat->protocol; 2063 usvc->protocol = usvc_compat->protocol;
2046 usvc->addr.ip = usvc_compat->addr; 2064 usvc->addr.ip = usvc_compat->addr;
@@ -2058,6 +2076,8 @@ static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2058static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, 2076static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2059 struct ip_vs_dest_user *udest_compat) 2077 struct ip_vs_dest_user *udest_compat)
2060{ 2078{
2079 memset(udest, 0, sizeof(*udest));
2080
2061 udest->addr.ip = udest_compat->addr; 2081 udest->addr.ip = udest_compat->addr;
2062 udest->port = udest_compat->port; 2082 udest->port = udest_compat->port;
2063 udest->conn_flags = udest_compat->conn_flags; 2083 udest->conn_flags = udest_compat->conn_flags;
@@ -2147,10 +2167,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2147 2167
2148 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2168 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2149 if (usvc.fwmark == 0) 2169 if (usvc.fwmark == 0)
2150 svc = __ip_vs_service_get(usvc.af, usvc.protocol, 2170 svc = __ip_vs_service_find(usvc.af, usvc.protocol,
2151 &usvc.addr, usvc.port); 2171 &usvc.addr, usvc.port);
2152 else 2172 else
2153 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); 2173 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
2154 2174
2155 if (cmd != IP_VS_SO_SET_ADD 2175 if (cmd != IP_VS_SO_SET_ADD
2156 && (svc == NULL || svc->protocol != usvc.protocol)) { 2176 && (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2189,9 +2209,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2189 ret = -EINVAL; 2209 ret = -EINVAL;
2190 } 2210 }
2191 2211
2192 if (svc)
2193 ip_vs_service_put(svc);
2194
2195 out_unlock: 2212 out_unlock:
2196 mutex_unlock(&__ip_vs_mutex); 2213 mutex_unlock(&__ip_vs_mutex);
2197 out_dec: 2214 out_dec:
@@ -2284,10 +2301,10 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2284 int ret = 0; 2301 int ret = 0;
2285 2302
2286 if (get->fwmark) 2303 if (get->fwmark)
2287 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark); 2304 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
2288 else 2305 else
2289 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr, 2306 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
2290 get->port); 2307 get->port);
2291 2308
2292 if (svc) { 2309 if (svc) {
2293 int count = 0; 2310 int count = 0;
@@ -2315,7 +2332,6 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2315 } 2332 }
2316 count++; 2333 count++;
2317 } 2334 }
2318 ip_vs_service_put(svc);
2319 } else 2335 } else
2320 ret = -ESRCH; 2336 ret = -ESRCH;
2321 return ret; 2337 return ret;
@@ -2436,15 +2452,14 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2436 entry = (struct ip_vs_service_entry *)arg; 2452 entry = (struct ip_vs_service_entry *)arg;
2437 addr.ip = entry->addr; 2453 addr.ip = entry->addr;
2438 if (entry->fwmark) 2454 if (entry->fwmark)
2439 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark); 2455 svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
2440 else 2456 else
2441 svc = __ip_vs_service_get(AF_INET, entry->protocol, 2457 svc = __ip_vs_service_find(AF_INET, entry->protocol,
2442 &addr, entry->port); 2458 &addr, entry->port);
2443 if (svc) { 2459 if (svc) {
2444 ip_vs_copy_service(entry, svc); 2460 ip_vs_copy_service(entry, svc);
2445 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2461 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2446 ret = -EFAULT; 2462 ret = -EFAULT;
2447 ip_vs_service_put(svc);
2448 } else 2463 } else
2449 ret = -ESRCH; 2464 ret = -ESRCH;
2450 } 2465 }
@@ -2559,6 +2574,8 @@ static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2559 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, 2574 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2560 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, 2575 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2561 .len = IP_VS_SCHEDNAME_MAXLEN }, 2576 .len = IP_VS_SCHEDNAME_MAXLEN },
2577 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2578 .len = IP_VS_PENAME_MAXLEN },
2562 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, 2579 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2563 .len = sizeof(struct ip_vs_flags) }, 2580 .len = sizeof(struct ip_vs_flags) },
2564 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, 2581 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
@@ -2635,6 +2652,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
2635 } 2652 }
2636 2653
2637 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name); 2654 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2655 if (svc->pe)
2656 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2638 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags); 2657 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2639 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ); 2658 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2640 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask); 2659 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
@@ -2711,10 +2730,12 @@ nla_put_failure:
2711} 2730}
2712 2731
2713static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, 2732static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2714 struct nlattr *nla, int full_entry) 2733 struct nlattr *nla, int full_entry,
2734 struct ip_vs_service **ret_svc)
2715{ 2735{
2716 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; 2736 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2717 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; 2737 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2738 struct ip_vs_service *svc;
2718 2739
2719 /* Parse mandatory identifying service fields first */ 2740 /* Parse mandatory identifying service fields first */
2720 if (nla == NULL || 2741 if (nla == NULL ||
@@ -2750,14 +2771,21 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2750 usvc->fwmark = 0; 2771 usvc->fwmark = 0;
2751 } 2772 }
2752 2773
2774 if (usvc->fwmark)
2775 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
2776 else
2777 svc = __ip_vs_service_find(usvc->af, usvc->protocol,
2778 &usvc->addr, usvc->port);
2779 *ret_svc = svc;
2780
2753 /* If a full entry was requested, check for the additional fields */ 2781 /* If a full entry was requested, check for the additional fields */
2754 if (full_entry) { 2782 if (full_entry) {
2755 struct nlattr *nla_sched, *nla_flags, *nla_timeout, 2783 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2756 *nla_netmask; 2784 *nla_netmask;
2757 struct ip_vs_flags flags; 2785 struct ip_vs_flags flags;
2758 struct ip_vs_service *svc;
2759 2786
2760 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; 2787 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2788 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2761 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; 2789 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2762 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; 2790 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2763 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; 2791 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
@@ -2768,21 +2796,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2768 nla_memcpy(&flags, nla_flags, sizeof(flags)); 2796 nla_memcpy(&flags, nla_flags, sizeof(flags));
2769 2797
2770 /* prefill flags from service if it already exists */ 2798 /* prefill flags from service if it already exists */
2771 if (usvc->fwmark) 2799 if (svc)
2772 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2773 else
2774 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2775 &usvc->addr, usvc->port);
2776 if (svc) {
2777 usvc->flags = svc->flags; 2800 usvc->flags = svc->flags;
2778 ip_vs_service_put(svc);
2779 } else
2780 usvc->flags = 0;
2781 2801
2782 /* set new flags from userland */ 2802 /* set new flags from userland */
2783 usvc->flags = (usvc->flags & ~flags.mask) | 2803 usvc->flags = (usvc->flags & ~flags.mask) |
2784 (flags.flags & flags.mask); 2804 (flags.flags & flags.mask);
2785 usvc->sched_name = nla_data(nla_sched); 2805 usvc->sched_name = nla_data(nla_sched);
2806 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2786 usvc->timeout = nla_get_u32(nla_timeout); 2807 usvc->timeout = nla_get_u32(nla_timeout);
2787 usvc->netmask = nla_get_u32(nla_netmask); 2808 usvc->netmask = nla_get_u32(nla_netmask);
2788 } 2809 }
@@ -2793,17 +2814,11 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2793static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) 2814static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2794{ 2815{
2795 struct ip_vs_service_user_kern usvc; 2816 struct ip_vs_service_user_kern usvc;
2817 struct ip_vs_service *svc;
2796 int ret; 2818 int ret;
2797 2819
2798 ret = ip_vs_genl_parse_service(&usvc, nla, 0); 2820 ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
2799 if (ret) 2821 return ret ? ERR_PTR(ret) : svc;
2800 return ERR_PTR(ret);
2801
2802 if (usvc.fwmark)
2803 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2804 else
2805 return __ip_vs_service_get(usvc.af, usvc.protocol,
2806 &usvc.addr, usvc.port);
2807} 2822}
2808 2823
2809static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) 2824static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
@@ -2894,7 +2909,6 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2894 2909
2895nla_put_failure: 2910nla_put_failure:
2896 cb->args[0] = idx; 2911 cb->args[0] = idx;
2897 ip_vs_service_put(svc);
2898 2912
2899out_err: 2913out_err:
2900 mutex_unlock(&__ip_vs_mutex); 2914 mutex_unlock(&__ip_vs_mutex);
@@ -3107,17 +3121,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3107 3121
3108 ret = ip_vs_genl_parse_service(&usvc, 3122 ret = ip_vs_genl_parse_service(&usvc,
3109 info->attrs[IPVS_CMD_ATTR_SERVICE], 3123 info->attrs[IPVS_CMD_ATTR_SERVICE],
3110 need_full_svc); 3124 need_full_svc, &svc);
3111 if (ret) 3125 if (ret)
3112 goto out; 3126 goto out;
3113 3127
3114 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3115 if (usvc.fwmark == 0)
3116 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3117 &usvc.addr, usvc.port);
3118 else
3119 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
3120
3121 /* Unless we're adding a new service, the service must already exist */ 3128 /* Unless we're adding a new service, the service must already exist */
3122 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { 3129 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3123 ret = -ESRCH; 3130 ret = -ESRCH;
@@ -3151,6 +3158,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3151 break; 3158 break;
3152 case IPVS_CMD_DEL_SERVICE: 3159 case IPVS_CMD_DEL_SERVICE:
3153 ret = ip_vs_del_service(svc); 3160 ret = ip_vs_del_service(svc);
3161 /* do not use svc, it can be freed */
3154 break; 3162 break;
3155 case IPVS_CMD_NEW_DEST: 3163 case IPVS_CMD_NEW_DEST:
3156 ret = ip_vs_add_dest(svc, &udest); 3164 ret = ip_vs_add_dest(svc, &udest);
@@ -3169,8 +3177,6 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3169 } 3177 }
3170 3178
3171out: 3179out:
3172 if (svc)
3173 ip_vs_service_put(svc);
3174 mutex_unlock(&__ip_vs_mutex); 3180 mutex_unlock(&__ip_vs_mutex);
3175 3181
3176 return ret; 3182 return ret;
@@ -3216,7 +3222,6 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3216 goto out_err; 3222 goto out_err;
3217 } else if (svc) { 3223 } else if (svc) {
3218 ret = ip_vs_genl_fill_service(msg, svc); 3224 ret = ip_vs_genl_fill_service(msg, svc);
3219 ip_vs_service_put(svc);
3220 if (ret) 3225 if (ret)
3221 goto nla_put_failure; 3226 goto nla_put_failure;
3222 } else { 3227 } else {
@@ -3385,6 +3390,16 @@ int __init ip_vs_control_init(void)
3385 3390
3386 EnterFunction(2); 3391 EnterFunction(2);
3387 3392
3393 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3394 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3395 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3396 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3397 }
3398 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3399 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3400 }
3401 smp_wmb();
3402
3388 ret = nf_register_sockopt(&ip_vs_sockopts); 3403 ret = nf_register_sockopt(&ip_vs_sockopts);
3389 if (ret) { 3404 if (ret) {
3390 pr_err("cannot register sockopt.\n"); 3405 pr_err("cannot register sockopt.\n");
@@ -3403,15 +3418,6 @@ int __init ip_vs_control_init(void)
3403 3418
3404 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); 3419 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3405 3420
3406 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3407 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3408 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3409 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3410 }
3411 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3412 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3413 }
3414
3415 ip_vs_new_estimator(&ip_vs_stats); 3421 ip_vs_new_estimator(&ip_vs_stats);
3416 3422
3417 /* Hook the defense timer */ 3423 /* Hook the defense timer */
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 7e9af5b76d9e..75455000ad1c 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -20,17 +20,6 @@
20 * 20 *
21 * Author: Wouter Gadeyne 21 * Author: Wouter Gadeyne
22 * 22 *
23 *
24 * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from
25 * http://www.ssi.bg/~ja/nfct/:
26 *
27 * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
28 *
29 * Portions Copyright (C) 2001-2002
30 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
31 *
32 * Portions Copyright (C) 2003-2008
33 * Julian Anastasov
34 */ 23 */
35 24
36#define KMSG_COMPONENT "IPVS" 25#define KMSG_COMPONENT "IPVS"
@@ -58,16 +47,6 @@
58#define SERVER_STRING "227 Entering Passive Mode (" 47#define SERVER_STRING "227 Entering Passive Mode ("
59#define CLIENT_STRING "PORT " 48#define CLIENT_STRING "PORT "
60 49
61#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
62#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
63 &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
64 (T)->dst.protonum
65
66#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
67#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
68 &((C)->vaddr.ip), ntohs((C)->vport), \
69 &((C)->daddr.ip), ntohs((C)->dport), \
70 (C)->protocol, (C)->state
71 50
72/* 51/*
73 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper 52 * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
@@ -85,6 +64,8 @@ static int ip_vs_ftp_pasv;
85static int 64static int
86ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) 65ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
87{ 66{
67 /* We use connection tracking for the command connection */
68 cp->flags |= IP_VS_CONN_F_NFCT;
88 return 0; 69 return 0;
89} 70}
90 71
@@ -149,120 +130,6 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
149} 130}
150 131
151/* 132/*
152 * Called from init_conntrack() as expectfn handler.
153 */
154static void
155ip_vs_expect_callback(struct nf_conn *ct,
156 struct nf_conntrack_expect *exp)
157{
158 struct nf_conntrack_tuple *orig, new_reply;
159 struct ip_vs_conn *cp;
160
161 if (exp->tuple.src.l3num != PF_INET)
162 return;
163
164 /*
165 * We assume that no NF locks are held before this callback.
166 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
167 * expectations even if they use wildcard values, now we provide the
168 * actual values from the newly created original conntrack direction.
169 * The conntrack is confirmed when packet reaches IPVS hooks.
170 */
171
172 /* RS->CLIENT */
173 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
174 cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
175 &orig->src.u3, orig->src.u.tcp.port,
176 &orig->dst.u3, orig->dst.u.tcp.port);
177 if (cp) {
178 /* Change reply CLIENT->RS to CLIENT->VS */
179 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
180 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
181 FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
182 __func__, ct, ct->status,
183 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
184 ARG_CONN(cp));
185 new_reply.dst.u3 = cp->vaddr;
186 new_reply.dst.u.tcp.port = cp->vport;
187 IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
188 ", inout cp=" FMT_CONN "\n",
189 __func__, ct,
190 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
191 ARG_CONN(cp));
192 goto alter;
193 }
194
195 /* CLIENT->VS */
196 cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
197 &orig->src.u3, orig->src.u.tcp.port,
198 &orig->dst.u3, orig->dst.u.tcp.port);
199 if (cp) {
200 /* Change reply VS->CLIENT to RS->CLIENT */
201 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
202 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
203 FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
204 __func__, ct, ct->status,
205 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
206 ARG_CONN(cp));
207 new_reply.src.u3 = cp->daddr;
208 new_reply.src.u.tcp.port = cp->dport;
209 IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", "
210 FMT_TUPLE ", outin cp=" FMT_CONN "\n",
211 __func__, ct,
212 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
213 ARG_CONN(cp));
214 goto alter;
215 }
216
217 IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE
218 " - unknown expect\n",
219 __func__, ct, ct->status, ARG_TUPLE(orig));
220 return;
221
222alter:
223 /* Never alter conntrack for non-NAT conns */
224 if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
225 nf_conntrack_alter_reply(ct, &new_reply);
226 ip_vs_conn_put(cp);
227 return;
228}
229
230/*
231 * Create NF conntrack expectation with wildcard (optional) source port.
232 * Then the default callback function will alter the reply and will confirm
233 * the conntrack entry when the first packet comes.
234 */
235static void
236ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct,
237 struct ip_vs_conn *cp, u_int8_t proto,
238 const __be16 *port, int from_rs)
239{
240 struct nf_conntrack_expect *exp;
241
242 BUG_ON(!ct || ct == &nf_conntrack_untracked);
243
244 exp = nf_ct_expect_alloc(ct);
245 if (!exp)
246 return;
247
248 if (from_rs)
249 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
250 nf_ct_l3num(ct), &cp->daddr, &cp->caddr,
251 proto, port, &cp->cport);
252 else
253 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT,
254 nf_ct_l3num(ct), &cp->caddr, &cp->vaddr,
255 proto, port, &cp->vport);
256
257 exp->expectfn = ip_vs_expect_callback;
258
259 IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n",
260 __func__, ct, ARG_TUPLE(&exp->tuple));
261 nf_ct_expect_related(exp);
262 nf_ct_expect_put(exp);
263}
264
265/*
266 * Look at outgoing ftp packets to catch the response to a PASV command 133 * Look at outgoing ftp packets to catch the response to a PASV command
267 * from the server (inside-to-outside). 134 * from the server (inside-to-outside).
268 * When we see one, we build a connection entry with the client address, 135 * When we see one, we build a connection entry with the client address,
@@ -328,14 +195,19 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
328 /* 195 /*
329 * Now update or create an connection entry for it 196 * Now update or create an connection entry for it
330 */ 197 */
331 n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port, 198 {
332 &cp->caddr, 0); 199 struct ip_vs_conn_param p;
200 ip_vs_conn_fill_param(AF_INET, iph->protocol,
201 &from, port, &cp->caddr, 0, &p);
202 n_cp = ip_vs_conn_out_get(&p);
203 }
333 if (!n_cp) { 204 if (!n_cp) {
334 n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP, 205 struct ip_vs_conn_param p;
335 &cp->caddr, 0, 206 ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,
336 &cp->vaddr, port, 207 0, &cp->vaddr, port, &p);
337 &from, port, 208 n_cp = ip_vs_conn_new(&p, &from, port,
338 IP_VS_CONN_F_NO_CPORT, 209 IP_VS_CONN_F_NO_CPORT |
210 IP_VS_CONN_F_NFCT,
339 cp->dest); 211 cp->dest);
340 if (!n_cp) 212 if (!n_cp)
341 return 0; 213 return 0;
@@ -370,9 +242,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
370 ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, 242 ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
371 start-data, end-start, 243 start-data, end-start,
372 buf, buf_len); 244 buf, buf_len);
373 if (ret) 245 if (ret) {
374 ip_vs_expect_related(skb, ct, n_cp, 246 ip_vs_nfct_expect_related(skb, ct, n_cp,
375 IPPROTO_TCP, NULL, 0); 247 IPPROTO_TCP, 0, 0);
248 if (skb->ip_summed == CHECKSUM_COMPLETE)
249 skb->ip_summed = CHECKSUM_UNNECESSARY;
250 /* csum is updated */
251 ret = 1;
252 }
376 } 253 }
377 254
378 /* 255 /*
@@ -479,21 +356,22 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
479 ip_vs_proto_name(iph->protocol), 356 ip_vs_proto_name(iph->protocol),
480 &to.ip, ntohs(port), &cp->vaddr.ip, 0); 357 &to.ip, ntohs(port), &cp->vaddr.ip, 0);
481 358
482 n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol, 359 {
483 &to, port, 360 struct ip_vs_conn_param p;
484 &cp->vaddr, htons(ntohs(cp->vport)-1)); 361 ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,
485 if (!n_cp) {
486 n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP,
487 &to, port,
488 &cp->vaddr, htons(ntohs(cp->vport)-1), 362 &cp->vaddr, htons(ntohs(cp->vport)-1),
489 &cp->daddr, htons(ntohs(cp->dport)-1), 363 &p);
490 0, 364 n_cp = ip_vs_conn_in_get(&p);
491 cp->dest); 365 if (!n_cp) {
492 if (!n_cp) 366 n_cp = ip_vs_conn_new(&p, &cp->daddr,
493 return 0; 367 htons(ntohs(cp->dport)-1),
368 IP_VS_CONN_F_NFCT, cp->dest);
369 if (!n_cp)
370 return 0;
494 371
495 /* add its controller */ 372 /* add its controller */
496 ip_vs_control_add(n_cp, cp); 373 ip_vs_control_add(n_cp, cp);
374 }
497 } 375 }
498 376
499 /* 377 /*
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
new file mode 100644
index 000000000000..4680647cd450
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -0,0 +1,292 @@
1/*
2 * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
3 *
4 * Portions Copyright (C) 2001-2002
5 * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
6 *
7 * Portions Copyright (C) 2003-2010
8 * Julian Anastasov
9 *
10 *
11 * This code is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 *
25 *
26 * Authors:
27 * Ben North <ben@redfrontdoor.org>
28 * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
29 * Hannes Eder <heder@google.com> Extend NFCT support for FTP, ipvs match
30 *
31 *
32 * Current status:
33 *
34 * - provide conntrack confirmation for new and related connections, by
35 * this way we can see their proper conntrack state in all hooks
36 * - support for all forwarding methods, not only NAT
37 * - FTP support (NAT), ability to support other NAT apps with expectations
38 * - to correctly create expectations for related NAT connections the proper
39 * NF conntrack support must be already installed, eg. ip_vs_ftp requires
40 * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables
41 * NAT rules are needed)
42 * - alter reply for NAT when forwarding packet in original direction:
43 * conntrack from client in NEW or RELATED (Passive FTP DATA) state or
44 * when RELATED conntrack is created from real server (Active FTP DATA)
45 * - if iptables_nat is not loaded the Passive FTP will not work (the
46 * PASV response can not be NAT-ed) but Active FTP should work
47 *
48 */
49
50#define KMSG_COMPONENT "IPVS"
51#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52
53#include <linux/module.h>
54#include <linux/types.h>
55#include <linux/kernel.h>
56#include <linux/errno.h>
57#include <linux/compiler.h>
58#include <linux/vmalloc.h>
59#include <linux/skbuff.h>
60#include <net/ip.h>
61#include <linux/netfilter.h>
62#include <linux/netfilter_ipv4.h>
63#include <net/ip_vs.h>
64#include <net/netfilter/nf_conntrack_core.h>
65#include <net/netfilter/nf_conntrack_expect.h>
66#include <net/netfilter/nf_conntrack_helper.h>
67#include <net/netfilter/nf_conntrack_zones.h>
68
69
70#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
71#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
72 &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
73 (T)->dst.protonum
74
75#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
76#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
77 &((C)->vaddr.ip), ntohs((C)->vport), \
78 &((C)->daddr.ip), ntohs((C)->dport), \
79 (C)->protocol, (C)->state
80
81void
82ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
83{
84 enum ip_conntrack_info ctinfo;
85 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
86 struct nf_conntrack_tuple new_tuple;
87
88 if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) ||
89 nf_ct_is_dying(ct))
90 return;
91
92 /* Never alter conntrack for non-NAT conns */
93 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
94 return;
95
96 /* Alter reply only in original direction */
97 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
98 return;
99
100 /*
101 * The connection is not yet in the hashtable, so we update it.
102 * CIP->VIP will remain the same, so leave the tuple in
103 * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
104 * real-server we will see RIP->DIP.
105 */
106 new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
107 /*
108 * This will also take care of UDP and other protocols.
109 */
110 if (outin) {
111 new_tuple.src.u3 = cp->daddr;
112 if (new_tuple.dst.protonum != IPPROTO_ICMP &&
113 new_tuple.dst.protonum != IPPROTO_ICMPV6)
114 new_tuple.src.u.tcp.port = cp->dport;
115 } else {
116 new_tuple.dst.u3 = cp->vaddr;
117 if (new_tuple.dst.protonum != IPPROTO_ICMP &&
118 new_tuple.dst.protonum != IPPROTO_ICMPV6)
119 new_tuple.dst.u.tcp.port = cp->vport;
120 }
121 IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
122 "ctinfo=%d, old reply=" FMT_TUPLE
123 ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n",
124 __func__, ct, ct->status, ctinfo,
125 ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple),
126 ARG_TUPLE(&new_tuple), ARG_CONN(cp));
127 nf_conntrack_alter_reply(ct, &new_tuple);
128}
129
130int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp)
131{
132 return nf_conntrack_confirm(skb);
133}
134
135/*
136 * Called from init_conntrack() as expectfn handler.
137 */
138static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
139 struct nf_conntrack_expect *exp)
140{
141 struct nf_conntrack_tuple *orig, new_reply;
142 struct ip_vs_conn *cp;
143 struct ip_vs_conn_param p;
144
145 if (exp->tuple.src.l3num != PF_INET)
146 return;
147
148 /*
149 * We assume that no NF locks are held before this callback.
150 * ip_vs_conn_out_get and ip_vs_conn_in_get should match their
151 * expectations even if they use wildcard values, now we provide the
152 * actual values from the newly created original conntrack direction.
153 * The conntrack is confirmed when packet reaches IPVS hooks.
154 */
155
156 /* RS->CLIENT */
157 orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
158 ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,
159 &orig->src.u3, orig->src.u.tcp.port,
160 &orig->dst.u3, orig->dst.u.tcp.port, &p);
161 cp = ip_vs_conn_out_get(&p);
162 if (cp) {
163 /* Change reply CLIENT->RS to CLIENT->VS */
164 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
165 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
166 FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
167 __func__, ct, ct->status,
168 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
169 ARG_CONN(cp));
170 new_reply.dst.u3 = cp->vaddr;
171 new_reply.dst.u.tcp.port = cp->vport;
172 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
173 ", inout cp=" FMT_CONN "\n",
174 __func__, ct,
175 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
176 ARG_CONN(cp));
177 goto alter;
178 }
179
180 /* CLIENT->VS */
181 cp = ip_vs_conn_in_get(&p);
182 if (cp) {
183 /* Change reply VS->CLIENT to RS->CLIENT */
184 new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
185 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
186 FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
187 __func__, ct, ct->status,
188 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
189 ARG_CONN(cp));
190 new_reply.src.u3 = cp->daddr;
191 new_reply.src.u.tcp.port = cp->dport;
192 IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", "
193 FMT_TUPLE ", outin cp=" FMT_CONN "\n",
194 __func__, ct,
195 ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
196 ARG_CONN(cp));
197 goto alter;
198 }
199
200 IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
201 " - unknown expect\n",
202 __func__, ct, ct->status, ARG_TUPLE(orig));
203 return;
204
205alter:
206 /* Never alter conntrack for non-NAT conns */
207 if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
208 nf_conntrack_alter_reply(ct, &new_reply);
209 ip_vs_conn_put(cp);
210 return;
211}
212
213/*
214 * Create NF conntrack expectation with wildcard (optional) source port.
215 * Then the default callback function will alter the reply and will confirm
216 * the conntrack entry when the first packet comes.
217 * Use port 0 to expect connection from any port.
218 */
219void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
220 struct ip_vs_conn *cp, u_int8_t proto,
221 const __be16 port, int from_rs)
222{
223 struct nf_conntrack_expect *exp;
224
225 if (ct == NULL || nf_ct_is_untracked(ct))
226 return;
227
228 exp = nf_ct_expect_alloc(ct);
229 if (!exp)
230 return;
231
232 nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
233 from_rs ? &cp->daddr : &cp->caddr,
234 from_rs ? &cp->caddr : &cp->vaddr,
235 proto, port ? &port : NULL,
236 from_rs ? &cp->cport : &cp->vport);
237
238 exp->expectfn = ip_vs_nfct_expect_callback;
239
240 IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
241 __func__, ct, ARG_TUPLE(&exp->tuple));
242 nf_ct_expect_related(exp);
243 nf_ct_expect_put(exp);
244}
245EXPORT_SYMBOL(ip_vs_nfct_expect_related);
246
247/*
248 * Our connection was terminated, try to drop the conntrack immediately
249 */
250void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
251{
252 struct nf_conntrack_tuple_hash *h;
253 struct nf_conn *ct;
254 struct nf_conntrack_tuple tuple;
255
256 if (!cp->cport)
257 return;
258
259 tuple = (struct nf_conntrack_tuple) {
260 .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
261 tuple.src.u3 = cp->caddr;
262 tuple.src.u.all = cp->cport;
263 tuple.src.l3num = cp->af;
264 tuple.dst.u3 = cp->vaddr;
265 tuple.dst.u.all = cp->vport;
266
267 IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
268 " for conn " FMT_CONN "\n",
269 __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
270
271 h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
272 if (h) {
273 ct = nf_ct_tuplehash_to_ctrack(h);
274 /* Show what happens instead of calling nf_ct_kill() */
275 if (del_timer(&ct->timeout)) {
276 IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
277 FMT_TUPLE "\n",
278 __func__, ct, ARG_TUPLE(&tuple));
279 if (ct->timeout.function)
280 ct->timeout.function(ct->timeout.data);
281 } else {
282 IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
283 FMT_TUPLE "\n",
284 __func__, ct, ARG_TUPLE(&tuple));
285 }
286 nf_ct_put(ct);
287 } else {
288 IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
289 __func__, ARG_TUPLE(&tuple));
290 }
291}
292
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
new file mode 100644
index 000000000000..3414af70ee12
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -0,0 +1,147 @@
1#define KMSG_COMPONENT "IPVS"
2#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
3
4#include <linux/module.h>
5#include <linux/spinlock.h>
6#include <linux/interrupt.h>
7#include <asm/string.h>
8#include <linux/kmod.h>
9#include <linux/sysctl.h>
10
11#include <net/ip_vs.h>
12
13/* IPVS pe list */
14static LIST_HEAD(ip_vs_pe);
15
16/* lock for service table */
17static DEFINE_SPINLOCK(ip_vs_pe_lock);
18
19/* Bind a service with a pe */
20void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe)
21{
22 svc->pe = pe;
23}
24
25/* Unbind a service from its pe */
26void ip_vs_unbind_pe(struct ip_vs_service *svc)
27{
28 svc->pe = NULL;
29}
30
31/* Get pe in the pe list by name */
32static struct ip_vs_pe *
33ip_vs_pe_getbyname(const char *pe_name)
34{
35 struct ip_vs_pe *pe;
36
37 IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,
38 pe_name);
39
40 spin_lock_bh(&ip_vs_pe_lock);
41
42 list_for_each_entry(pe, &ip_vs_pe, n_list) {
43 /* Test and get the modules atomically */
44 if (pe->module &&
45 !try_module_get(pe->module)) {
46 /* This pe is just deleted */
47 continue;
48 }
49 if (strcmp(pe_name, pe->name)==0) {
50 /* HIT */
51 spin_unlock_bh(&ip_vs_pe_lock);
52 return pe;
53 }
54 if (pe->module)
55 module_put(pe->module);
56 }
57
58 spin_unlock_bh(&ip_vs_pe_lock);
59 return NULL;
60}
61
62/* Lookup pe and try to load it if it doesn't exist */
63struct ip_vs_pe *ip_vs_pe_get(const char *name)
64{
65 struct ip_vs_pe *pe;
66
67 /* Search for the pe by name */
68 pe = ip_vs_pe_getbyname(name);
69
70 /* If pe not found, load the module and search again */
71 if (!pe) {
72 request_module("ip_vs_pe_%s", name);
73 pe = ip_vs_pe_getbyname(name);
74 }
75
76 return pe;
77}
78
79void ip_vs_pe_put(struct ip_vs_pe *pe)
80{
81 if (pe && pe->module)
82 module_put(pe->module);
83}
84
85/* Register a pe in the pe list */
86int register_ip_vs_pe(struct ip_vs_pe *pe)
87{
88 struct ip_vs_pe *tmp;
89
90 /* increase the module use count */
91 ip_vs_use_count_inc();
92
93 spin_lock_bh(&ip_vs_pe_lock);
94
95 if (!list_empty(&pe->n_list)) {
96 spin_unlock_bh(&ip_vs_pe_lock);
97 ip_vs_use_count_dec();
98 pr_err("%s(): [%s] pe already linked\n",
99 __func__, pe->name);
100 return -EINVAL;
101 }
102
103 /* Make sure that the pe with this name doesn't exist
104 * in the pe list.
105 */
106 list_for_each_entry(tmp, &ip_vs_pe, n_list) {
107 if (strcmp(tmp->name, pe->name) == 0) {
108 spin_unlock_bh(&ip_vs_pe_lock);
109 ip_vs_use_count_dec();
110 pr_err("%s(): [%s] pe already existed "
111 "in the system\n", __func__, pe->name);
112 return -EINVAL;
113 }
114 }
115 /* Add it into the d-linked pe list */
116 list_add(&pe->n_list, &ip_vs_pe);
117 spin_unlock_bh(&ip_vs_pe_lock);
118
119 pr_info("[%s] pe registered.\n", pe->name);
120
121 return 0;
122}
123EXPORT_SYMBOL_GPL(register_ip_vs_pe);
124
125/* Unregister a pe from the pe list */
126int unregister_ip_vs_pe(struct ip_vs_pe *pe)
127{
128 spin_lock_bh(&ip_vs_pe_lock);
129 if (list_empty(&pe->n_list)) {
130 spin_unlock_bh(&ip_vs_pe_lock);
131 pr_err("%s(): [%s] pe is not in the list. failed\n",
132 __func__, pe->name);
133 return -EINVAL;
134 }
135
136 /* Remove it from the d-linked pe list */
137 list_del(&pe->n_list);
138 spin_unlock_bh(&ip_vs_pe_lock);
139
140 /* decrease the module use count */
141 ip_vs_use_count_dec();
142
143 pr_info("[%s] pe unregistered.\n", pe->name);
144
145 return 0;
146}
147EXPORT_SYMBOL_GPL(unregister_ip_vs_pe);
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
new file mode 100644
index 000000000000..b8b4e9620f3e
--- /dev/null
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -0,0 +1,169 @@
1#define KMSG_COMPONENT "IPVS"
2#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
3
4#include <linux/module.h>
5#include <linux/kernel.h>
6
7#include <net/ip_vs.h>
8#include <net/netfilter/nf_conntrack.h>
9#include <linux/netfilter/nf_conntrack_sip.h>
10
11#ifdef CONFIG_IP_VS_DEBUG
12static const char *ip_vs_dbg_callid(char *buf, size_t buf_len,
13 const char *callid, size_t callid_len,
14 int *idx)
15{
16 size_t len = min(min(callid_len, (size_t)64), buf_len - *idx - 1);
17 memcpy(buf + *idx, callid, len);
18 buf[*idx+len] = '\0';
19 *idx += len + 1;
20 return buf + *idx - len;
21}
22
23#define IP_VS_DEBUG_CALLID(callid, len) \
24 ip_vs_dbg_callid(ip_vs_dbg_buf, sizeof(ip_vs_dbg_buf), \
25 callid, len, &ip_vs_dbg_idx)
26#endif
27
28static int get_callid(const char *dptr, unsigned int dataoff,
29 unsigned int datalen,
30 unsigned int *matchoff, unsigned int *matchlen)
31{
32 /* Find callid */
33 while (1) {
34 int ret = ct_sip_get_header(NULL, dptr, dataoff, datalen,
35 SIP_HDR_CALL_ID, matchoff,
36 matchlen);
37 if (ret > 0)
38 break;
39 if (!ret)
40 return 0;
41 dataoff += *matchoff;
42 }
43
44 /* Empty callid is useless */
45 if (!*matchlen)
46 return -EINVAL;
47
48 /* Too large is useless */
49 if (*matchlen > IP_VS_PEDATA_MAXLEN)
50 return -EINVAL;
51
52 /* SIP headers are always followed by a line terminator */
53 if (*matchoff + *matchlen == datalen)
54 return -EINVAL;
55
56 /* RFC 2543 allows lines to be terminated with CR, LF or CRLF,
57 * RFC 3261 allows only CRLF, we support both. */
58 if (*(dptr + *matchoff + *matchlen) != '\r' &&
59 *(dptr + *matchoff + *matchlen) != '\n')
60 return -EINVAL;
61
62 IP_VS_DBG_BUF(9, "SIP callid %s (%d bytes)\n",
63 IP_VS_DEBUG_CALLID(dptr + *matchoff, *matchlen),
64 *matchlen);
65 return 0;
66}
67
68static int
69ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
70{
71 struct ip_vs_iphdr iph;
72 unsigned int dataoff, datalen, matchoff, matchlen;
73 const char *dptr;
74
75 ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
76
77 /* Only useful with UDP */
78 if (iph.protocol != IPPROTO_UDP)
79 return -EINVAL;
80
81 /* No Data ? */
82 dataoff = iph.len + sizeof(struct udphdr);
83 if (dataoff >= skb->len)
84 return -EINVAL;
85
86 dptr = skb->data + dataoff;
87 datalen = skb->len - dataoff;
88
89 if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
90 return -EINVAL;
91
92 p->pe_data = kmalloc(matchlen, GFP_ATOMIC);
93 if (!p->pe_data)
94 return -ENOMEM;
95
96 /* N.B: pe_data is only set on success,
97 * this allows fallback to the default persistence logic on failure
98 */
99 memcpy(p->pe_data, dptr + matchoff, matchlen);
100 p->pe_data_len = matchlen;
101
102 return 0;
103}
104
105static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p,
106 struct ip_vs_conn *ct)
107
108{
109 bool ret = 0;
110
111 if (ct->af == p->af &&
112 ip_vs_addr_equal(p->af, p->caddr, &ct->caddr) &&
113 /* protocol should only be IPPROTO_IP if
114 * d_addr is a fwmark */
115 ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
116 p->vaddr, &ct->vaddr) &&
117 ct->vport == p->vport &&
118 ct->flags & IP_VS_CONN_F_TEMPLATE &&
119 ct->protocol == p->protocol &&
120 ct->pe_data && ct->pe_data_len == p->pe_data_len &&
121 !memcmp(ct->pe_data, p->pe_data, p->pe_data_len))
122 ret = 1;
123
124 IP_VS_DBG_BUF(9, "SIP template match %s %s->%s:%d %s\n",
125 ip_vs_proto_name(p->protocol),
126 IP_VS_DEBUG_CALLID(p->pe_data, p->pe_data_len),
127 IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport),
128 ret ? "hit" : "not hit");
129
130 return ret;
131}
132
133static u32 ip_vs_sip_hashkey_raw(const struct ip_vs_conn_param *p,
134 u32 initval, bool inverse)
135{
136 return jhash(p->pe_data, p->pe_data_len, initval);
137}
138
139static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf)
140{
141 memcpy(buf, cp->pe_data, cp->pe_data_len);
142 return cp->pe_data_len;
143}
144
145static struct ip_vs_pe ip_vs_sip_pe =
146{
147 .name = "sip",
148 .refcnt = ATOMIC_INIT(0),
149 .module = THIS_MODULE,
150 .n_list = LIST_HEAD_INIT(ip_vs_sip_pe.n_list),
151 .fill_param = ip_vs_sip_fill_param,
152 .ct_match = ip_vs_sip_ct_match,
153 .hashkey_raw = ip_vs_sip_hashkey_raw,
154 .show_pe_data = ip_vs_sip_show_pe_data,
155};
156
157static int __init ip_vs_sip_init(void)
158{
159 return register_ip_vs_pe(&ip_vs_sip_pe);
160}
161
162static void __exit ip_vs_sip_cleanup(void)
163{
164 unregister_ip_vs_pe(&ip_vs_sip_pe);
165}
166
167module_init(ip_vs_sip_init);
168module_exit(ip_vs_sip_cleanup);
169MODULE_LICENSE("GPL");
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 027f654799fe..c53998390877 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -172,8 +172,8 @@ ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
172 else if (ih->frag_off & htons(IP_OFFSET)) 172 else if (ih->frag_off & htons(IP_OFFSET))
173 sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr); 173 sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr);
174 else { 174 else {
175 __be16 _ports[2], *pptr 175 __be16 _ports[2], *pptr;
176; 176
177 pptr = skb_header_pointer(skb, offset + ih->ihl*4, 177 pptr = skb_header_pointer(skb, offset + ih->ihl*4,
178 sizeof(_ports), _ports); 178 sizeof(_ports), _ports);
179 if (pptr == NULL) 179 if (pptr == NULL)
@@ -223,13 +223,13 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
223 223
224 224
225void 225void
226ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, 226ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
227 const struct sk_buff *skb, 227 const struct sk_buff *skb,
228 int offset, 228 int offset,
229 const char *msg) 229 const char *msg)
230{ 230{
231#ifdef CONFIG_IP_VS_IPV6 231#ifdef CONFIG_IP_VS_IPV6
232 if (skb->protocol == htons(ETH_P_IPV6)) 232 if (af == AF_INET6)
233 ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); 233 ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg);
234 else 234 else
235#endif 235#endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 1892dfc12fdd..3a0461117d3f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -40,6 +40,19 @@ struct isakmp_hdr {
40 40
41#define PORT_ISAKMP 500 41#define PORT_ISAKMP 500
42 42
43static void
44ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
45 int inverse, struct ip_vs_conn_param *p)
46{
47 if (likely(!inverse))
48 ip_vs_conn_fill_param(af, IPPROTO_UDP,
49 &iph->saddr, htons(PORT_ISAKMP),
50 &iph->daddr, htons(PORT_ISAKMP), p);
51 else
52 ip_vs_conn_fill_param(af, IPPROTO_UDP,
53 &iph->daddr, htons(PORT_ISAKMP),
54 &iph->saddr, htons(PORT_ISAKMP), p);
55}
43 56
44static struct ip_vs_conn * 57static struct ip_vs_conn *
45ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, 58ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
@@ -47,21 +60,10 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
47 int inverse) 60 int inverse)
48{ 61{
49 struct ip_vs_conn *cp; 62 struct ip_vs_conn *cp;
63 struct ip_vs_conn_param p;
50 64
51 if (likely(!inverse)) { 65 ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
52 cp = ip_vs_conn_in_get(af, IPPROTO_UDP, 66 cp = ip_vs_conn_in_get(&p);
53 &iph->saddr,
54 htons(PORT_ISAKMP),
55 &iph->daddr,
56 htons(PORT_ISAKMP));
57 } else {
58 cp = ip_vs_conn_in_get(af, IPPROTO_UDP,
59 &iph->daddr,
60 htons(PORT_ISAKMP),
61 &iph->saddr,
62 htons(PORT_ISAKMP));
63 }
64
65 if (!cp) { 67 if (!cp) {
66 /* 68 /*
67 * We are not sure if the packet is from our 69 * We are not sure if the packet is from our
@@ -87,21 +89,10 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
87 int inverse) 89 int inverse)
88{ 90{
89 struct ip_vs_conn *cp; 91 struct ip_vs_conn *cp;
92 struct ip_vs_conn_param p;
90 93
91 if (likely(!inverse)) { 94 ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
92 cp = ip_vs_conn_out_get(af, IPPROTO_UDP, 95 cp = ip_vs_conn_out_get(&p);
93 &iph->saddr,
94 htons(PORT_ISAKMP),
95 &iph->daddr,
96 htons(PORT_ISAKMP));
97 } else {
98 cp = ip_vs_conn_out_get(af, IPPROTO_UDP,
99 &iph->daddr,
100 htons(PORT_ISAKMP),
101 &iph->saddr,
102 htons(PORT_ISAKMP));
103 }
104
105 if (!cp) { 96 if (!cp) {
106 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " 97 IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
107 "%s%s %s->%s\n", 98 "%s%s %s->%s\n",
@@ -126,54 +117,6 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
126 return 0; 117 return 0;
127} 118}
128 119
129
130static void
131ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
132 int offset, const char *msg)
133{
134 char buf[256];
135 struct iphdr _iph, *ih;
136
137 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
138 if (ih == NULL)
139 sprintf(buf, "TRUNCATED");
140 else
141 sprintf(buf, "%pI4->%pI4", &ih->saddr, &ih->daddr);
142
143 pr_debug("%s: %s %s\n", msg, pp->name, buf);
144}
145
146#ifdef CONFIG_IP_VS_IPV6
147static void
148ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
149 int offset, const char *msg)
150{
151 char buf[256];
152 struct ipv6hdr _iph, *ih;
153
154 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
155 if (ih == NULL)
156 sprintf(buf, "TRUNCATED");
157 else
158 sprintf(buf, "%pI6->%pI6", &ih->saddr, &ih->daddr);
159
160 pr_debug("%s: %s %s\n", msg, pp->name, buf);
161}
162#endif
163
164static void
165ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb,
166 int offset, const char *msg)
167{
168#ifdef CONFIG_IP_VS_IPV6
169 if (skb->protocol == htons(ETH_P_IPV6))
170 ah_esp_debug_packet_v6(pp, skb, offset, msg);
171 else
172#endif
173 ah_esp_debug_packet_v4(pp, skb, offset, msg);
174}
175
176
177static void ah_esp_init(struct ip_vs_protocol *pp) 120static void ah_esp_init(struct ip_vs_protocol *pp)
178{ 121{
179 /* nothing to do now */ 122 /* nothing to do now */
@@ -204,7 +147,7 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
204 .register_app = NULL, 147 .register_app = NULL,
205 .unregister_app = NULL, 148 .unregister_app = NULL,
206 .app_conn_bind = NULL, 149 .app_conn_bind = NULL,
207 .debug_packet = ah_esp_debug_packet, 150 .debug_packet = ip_vs_tcpudp_debug_packet,
208 .timeout_change = NULL, /* ISAKMP */ 151 .timeout_change = NULL, /* ISAKMP */
209 .set_state_timeout = NULL, 152 .set_state_timeout = NULL,
210}; 153};
@@ -228,7 +171,7 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
228 .register_app = NULL, 171 .register_app = NULL,
229 .unregister_app = NULL, 172 .unregister_app = NULL,
230 .app_conn_bind = NULL, 173 .app_conn_bind = NULL,
231 .debug_packet = ah_esp_debug_packet, 174 .debug_packet = ip_vs_tcpudp_debug_packet,
232 .timeout_change = NULL, /* ISAKMP */ 175 .timeout_change = NULL, /* ISAKMP */
233}; 176};
234#endif 177#endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 4c0855cb006e..1ea96bcd342b 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -31,6 +31,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
31 if ((sch->type == SCTP_CID_INIT) && 31 if ((sch->type == SCTP_CID_INIT) &&
32 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, 32 (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
33 &iph.daddr, sh->dest))) { 33 &iph.daddr, sh->dest))) {
34 int ignored;
35
34 if (ip_vs_todrop()) { 36 if (ip_vs_todrop()) {
35 /* 37 /*
36 * It seems that we are very loaded. 38 * It seems that we are very loaded.
@@ -44,8 +46,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
44 * Let the virtual server select a real server for the 46 * Let the virtual server select a real server for the
45 * incoming connection, and create a connection entry. 47 * incoming connection, and create a connection entry.
46 */ 48 */
47 *cpp = ip_vs_schedule(svc, skb); 49 *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
48 if (!*cpp) { 50 if (!*cpp && !ignored) {
49 *verdict = ip_vs_leave(svc, skb, pp); 51 *verdict = ip_vs_leave(svc, skb, pp);
50 return 0; 52 return 0;
51 } 53 }
@@ -61,6 +63,7 @@ sctp_snat_handler(struct sk_buff *skb,
61{ 63{
62 sctp_sctphdr_t *sctph; 64 sctp_sctphdr_t *sctph;
63 unsigned int sctphoff; 65 unsigned int sctphoff;
66 struct sk_buff *iter;
64 __be32 crc32; 67 __be32 crc32;
65 68
66#ifdef CONFIG_IP_VS_IPV6 69#ifdef CONFIG_IP_VS_IPV6
@@ -89,8 +92,8 @@ sctp_snat_handler(struct sk_buff *skb,
89 92
90 /* Calculate the checksum */ 93 /* Calculate the checksum */
91 crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); 94 crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
92 for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next) 95 skb_walk_frags(skb, iter)
93 crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb), 96 crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter),
94 crc32); 97 crc32);
95 crc32 = sctp_end_cksum(crc32); 98 crc32 = sctp_end_cksum(crc32);
96 sctph->checksum = crc32; 99 sctph->checksum = crc32;
@@ -102,9 +105,9 @@ static int
102sctp_dnat_handler(struct sk_buff *skb, 105sctp_dnat_handler(struct sk_buff *skb,
103 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 106 struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
104{ 107{
105
106 sctp_sctphdr_t *sctph; 108 sctp_sctphdr_t *sctph;
107 unsigned int sctphoff; 109 unsigned int sctphoff;
110 struct sk_buff *iter;
108 __be32 crc32; 111 __be32 crc32;
109 112
110#ifdef CONFIG_IP_VS_IPV6 113#ifdef CONFIG_IP_VS_IPV6
@@ -133,8 +136,8 @@ sctp_dnat_handler(struct sk_buff *skb,
133 136
134 /* Calculate the checksum */ 137 /* Calculate the checksum */
135 crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff); 138 crc32 = sctp_start_cksum((u8 *) sctph, skb_headlen(skb) - sctphoff);
136 for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next) 139 skb_walk_frags(skb, iter)
137 crc32 = sctp_update_cksum((u8 *) skb->data, skb_headlen(skb), 140 crc32 = sctp_update_cksum((u8 *) iter->data, skb_headlen(iter),
138 crc32); 141 crc32);
139 crc32 = sctp_end_cksum(crc32); 142 crc32 = sctp_end_cksum(crc32);
140 sctph->checksum = crc32; 143 sctph->checksum = crc32;
@@ -145,9 +148,9 @@ sctp_dnat_handler(struct sk_buff *skb,
145static int 148static int
146sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) 149sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
147{ 150{
148 struct sk_buff *list = skb_shinfo(skb)->frag_list;
149 unsigned int sctphoff; 151 unsigned int sctphoff;
150 struct sctphdr *sh, _sctph; 152 struct sctphdr *sh, _sctph;
153 struct sk_buff *iter;
151 __le32 cmp; 154 __le32 cmp;
152 __le32 val; 155 __le32 val;
153 __u32 tmp; 156 __u32 tmp;
@@ -166,15 +169,15 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
166 cmp = sh->checksum; 169 cmp = sh->checksum;
167 170
168 tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb)); 171 tmp = sctp_start_cksum((__u8 *) sh, skb_headlen(skb));
169 for (; list; list = list->next) 172 skb_walk_frags(skb, iter)
170 tmp = sctp_update_cksum((__u8 *) list->data, 173 tmp = sctp_update_cksum((__u8 *) iter->data,
171 skb_headlen(list), tmp); 174 skb_headlen(iter), tmp);
172 175
173 val = sctp_end_cksum(tmp); 176 val = sctp_end_cksum(tmp);
174 177
175 if (val != cmp) { 178 if (val != cmp) {
176 /* CRC failure, dump it. */ 179 /* CRC failure, dump it. */
177 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 180 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
178 "Failed checksum for"); 181 "Failed checksum for");
179 return 0; 182 return 0;
180 } 183 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 282d24de8592..f6c5200e2146 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -43,9 +43,12 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
43 return 0; 43 return 0;
44 } 44 }
45 45
46 /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
46 if (th->syn && 47 if (th->syn &&
47 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, 48 (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
48 th->dest))) { 49 th->dest))) {
50 int ignored;
51
49 if (ip_vs_todrop()) { 52 if (ip_vs_todrop()) {
50 /* 53 /*
51 * It seems that we are very loaded. 54 * It seems that we are very loaded.
@@ -60,8 +63,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
60 * Let the virtual server select a real server for the 63 * Let the virtual server select a real server for the
61 * incoming connection, and create a connection entry. 64 * incoming connection, and create a connection entry.
62 */ 65 */
63 *cpp = ip_vs_schedule(svc, skb); 66 *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
64 if (!*cpp) { 67 if (!*cpp && !ignored) {
65 *verdict = ip_vs_leave(svc, skb, pp); 68 *verdict = ip_vs_leave(svc, skb, pp);
66 return 0; 69 return 0;
67 } 70 }
@@ -101,15 +104,15 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph,
101#ifdef CONFIG_IP_VS_IPV6 104#ifdef CONFIG_IP_VS_IPV6
102 if (af == AF_INET6) 105 if (af == AF_INET6)
103 tcph->check = 106 tcph->check =
104 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, 107 ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
105 ip_vs_check_diff2(oldlen, newlen, 108 ip_vs_check_diff2(oldlen, newlen,
106 ~csum_unfold(tcph->check)))); 109 csum_unfold(tcph->check))));
107 else 110 else
108#endif 111#endif
109 tcph->check = 112 tcph->check =
110 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, 113 ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
111 ip_vs_check_diff2(oldlen, newlen, 114 ip_vs_check_diff2(oldlen, newlen,
112 ~csum_unfold(tcph->check)))); 115 csum_unfold(tcph->check))));
113} 116}
114 117
115 118
@@ -120,6 +123,7 @@ tcp_snat_handler(struct sk_buff *skb,
120 struct tcphdr *tcph; 123 struct tcphdr *tcph;
121 unsigned int tcphoff; 124 unsigned int tcphoff;
122 int oldlen; 125 int oldlen;
126 int payload_csum = 0;
123 127
124#ifdef CONFIG_IP_VS_IPV6 128#ifdef CONFIG_IP_VS_IPV6
125 if (cp->af == AF_INET6) 129 if (cp->af == AF_INET6)
@@ -134,13 +138,20 @@ tcp_snat_handler(struct sk_buff *skb,
134 return 0; 138 return 0;
135 139
136 if (unlikely(cp->app != NULL)) { 140 if (unlikely(cp->app != NULL)) {
141 int ret;
142
137 /* Some checks before mangling */ 143 /* Some checks before mangling */
138 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 144 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
139 return 0; 145 return 0;
140 146
141 /* Call application helper if needed */ 147 /* Call application helper if needed */
142 if (!ip_vs_app_pkt_out(cp, skb)) 148 if (!(ret = ip_vs_app_pkt_out(cp, skb)))
143 return 0; 149 return 0;
150 /* ret=2: csum update is needed after payload mangling */
151 if (ret == 1)
152 oldlen = skb->len - tcphoff;
153 else
154 payload_csum = 1;
144 } 155 }
145 156
146 tcph = (void *)skb_network_header(skb) + tcphoff; 157 tcph = (void *)skb_network_header(skb) + tcphoff;
@@ -151,12 +162,13 @@ tcp_snat_handler(struct sk_buff *skb,
151 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, 162 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
152 htons(oldlen), 163 htons(oldlen),
153 htons(skb->len - tcphoff)); 164 htons(skb->len - tcphoff));
154 } else if (!cp->app) { 165 } else if (!payload_csum) {
155 /* Only port and addr are changed, do fast csum update */ 166 /* Only port and addr are changed, do fast csum update */
156 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, 167 tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
157 cp->dport, cp->vport); 168 cp->dport, cp->vport);
158 if (skb->ip_summed == CHECKSUM_COMPLETE) 169 if (skb->ip_summed == CHECKSUM_COMPLETE)
159 skb->ip_summed = CHECKSUM_NONE; 170 skb->ip_summed = (cp->app && pp->csum_check) ?
171 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
160 } else { 172 } else {
161 /* full checksum calculation */ 173 /* full checksum calculation */
162 tcph->check = 0; 174 tcph->check = 0;
@@ -174,6 +186,7 @@ tcp_snat_handler(struct sk_buff *skb,
174 skb->len - tcphoff, 186 skb->len - tcphoff,
175 cp->protocol, 187 cp->protocol,
176 skb->csum); 188 skb->csum);
189 skb->ip_summed = CHECKSUM_UNNECESSARY;
177 190
178 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", 191 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
179 pp->name, tcph->check, 192 pp->name, tcph->check,
@@ -190,6 +203,7 @@ tcp_dnat_handler(struct sk_buff *skb,
190 struct tcphdr *tcph; 203 struct tcphdr *tcph;
191 unsigned int tcphoff; 204 unsigned int tcphoff;
192 int oldlen; 205 int oldlen;
206 int payload_csum = 0;
193 207
194#ifdef CONFIG_IP_VS_IPV6 208#ifdef CONFIG_IP_VS_IPV6
195 if (cp->af == AF_INET6) 209 if (cp->af == AF_INET6)
@@ -204,6 +218,8 @@ tcp_dnat_handler(struct sk_buff *skb,
204 return 0; 218 return 0;
205 219
206 if (unlikely(cp->app != NULL)) { 220 if (unlikely(cp->app != NULL)) {
221 int ret;
222
207 /* Some checks before mangling */ 223 /* Some checks before mangling */
208 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 224 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
209 return 0; 225 return 0;
@@ -212,8 +228,13 @@ tcp_dnat_handler(struct sk_buff *skb,
212 * Attempt ip_vs_app call. 228 * Attempt ip_vs_app call.
213 * It will fix ip_vs_conn and iph ack_seq stuff 229 * It will fix ip_vs_conn and iph ack_seq stuff
214 */ 230 */
215 if (!ip_vs_app_pkt_in(cp, skb)) 231 if (!(ret = ip_vs_app_pkt_in(cp, skb)))
216 return 0; 232 return 0;
233 /* ret=2: csum update is needed after payload mangling */
234 if (ret == 1)
235 oldlen = skb->len - tcphoff;
236 else
237 payload_csum = 1;
217 } 238 }
218 239
219 tcph = (void *)skb_network_header(skb) + tcphoff; 240 tcph = (void *)skb_network_header(skb) + tcphoff;
@@ -223,15 +244,16 @@ tcp_dnat_handler(struct sk_buff *skb,
223 * Adjust TCP checksums 244 * Adjust TCP checksums
224 */ 245 */
225 if (skb->ip_summed == CHECKSUM_PARTIAL) { 246 if (skb->ip_summed == CHECKSUM_PARTIAL) {
226 tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, 247 tcp_partial_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
227 htons(oldlen), 248 htons(oldlen),
228 htons(skb->len - tcphoff)); 249 htons(skb->len - tcphoff));
229 } else if (!cp->app) { 250 } else if (!payload_csum) {
230 /* Only port and addr are changed, do fast csum update */ 251 /* Only port and addr are changed, do fast csum update */
231 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, 252 tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
232 cp->vport, cp->dport); 253 cp->vport, cp->dport);
233 if (skb->ip_summed == CHECKSUM_COMPLETE) 254 if (skb->ip_summed == CHECKSUM_COMPLETE)
234 skb->ip_summed = CHECKSUM_NONE; 255 skb->ip_summed = (cp->app && pp->csum_check) ?
256 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
235 } else { 257 } else {
236 /* full checksum calculation */ 258 /* full checksum calculation */
237 tcph->check = 0; 259 tcph->check = 0;
@@ -278,7 +300,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
278 skb->len - tcphoff, 300 skb->len - tcphoff,
279 ipv6_hdr(skb)->nexthdr, 301 ipv6_hdr(skb)->nexthdr,
280 skb->csum)) { 302 skb->csum)) {
281 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 303 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
282 "Failed checksum for"); 304 "Failed checksum for");
283 return 0; 305 return 0;
284 } 306 }
@@ -289,7 +311,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
289 skb->len - tcphoff, 311 skb->len - tcphoff,
290 ip_hdr(skb)->protocol, 312 ip_hdr(skb)->protocol,
291 skb->csum)) { 313 skb->csum)) {
292 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 314 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
293 "Failed checksum for"); 315 "Failed checksum for");
294 return 0; 316 return 0;
295 } 317 }
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 8553231b5d41..9d106a06bb0a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -46,6 +46,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
46 svc = ip_vs_service_get(af, skb->mark, iph.protocol, 46 svc = ip_vs_service_get(af, skb->mark, iph.protocol,
47 &iph.daddr, uh->dest); 47 &iph.daddr, uh->dest);
48 if (svc) { 48 if (svc) {
49 int ignored;
50
49 if (ip_vs_todrop()) { 51 if (ip_vs_todrop()) {
50 /* 52 /*
51 * It seems that we are very loaded. 53 * It seems that we are very loaded.
@@ -60,8 +62,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
60 * Let the virtual server select a real server for the 62 * Let the virtual server select a real server for the
61 * incoming connection, and create a connection entry. 63 * incoming connection, and create a connection entry.
62 */ 64 */
63 *cpp = ip_vs_schedule(svc, skb); 65 *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
64 if (!*cpp) { 66 if (!*cpp && !ignored) {
65 *verdict = ip_vs_leave(svc, skb, pp); 67 *verdict = ip_vs_leave(svc, skb, pp);
66 return 0; 68 return 0;
67 } 69 }
@@ -102,15 +104,15 @@ udp_partial_csum_update(int af, struct udphdr *uhdr,
102#ifdef CONFIG_IP_VS_IPV6 104#ifdef CONFIG_IP_VS_IPV6
103 if (af == AF_INET6) 105 if (af == AF_INET6)
104 uhdr->check = 106 uhdr->check =
105 csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, 107 ~csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6,
106 ip_vs_check_diff2(oldlen, newlen, 108 ip_vs_check_diff2(oldlen, newlen,
107 ~csum_unfold(uhdr->check)))); 109 csum_unfold(uhdr->check))));
108 else 110 else
109#endif 111#endif
110 uhdr->check = 112 uhdr->check =
111 csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, 113 ~csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip,
112 ip_vs_check_diff2(oldlen, newlen, 114 ip_vs_check_diff2(oldlen, newlen,
113 ~csum_unfold(uhdr->check)))); 115 csum_unfold(uhdr->check))));
114} 116}
115 117
116 118
@@ -121,6 +123,7 @@ udp_snat_handler(struct sk_buff *skb,
121 struct udphdr *udph; 123 struct udphdr *udph;
122 unsigned int udphoff; 124 unsigned int udphoff;
123 int oldlen; 125 int oldlen;
126 int payload_csum = 0;
124 127
125#ifdef CONFIG_IP_VS_IPV6 128#ifdef CONFIG_IP_VS_IPV6
126 if (cp->af == AF_INET6) 129 if (cp->af == AF_INET6)
@@ -135,6 +138,8 @@ udp_snat_handler(struct sk_buff *skb,
135 return 0; 138 return 0;
136 139
137 if (unlikely(cp->app != NULL)) { 140 if (unlikely(cp->app != NULL)) {
141 int ret;
142
138 /* Some checks before mangling */ 143 /* Some checks before mangling */
139 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 144 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
140 return 0; 145 return 0;
@@ -142,8 +147,13 @@ udp_snat_handler(struct sk_buff *skb,
142 /* 147 /*
143 * Call application helper if needed 148 * Call application helper if needed
144 */ 149 */
145 if (!ip_vs_app_pkt_out(cp, skb)) 150 if (!(ret = ip_vs_app_pkt_out(cp, skb)))
146 return 0; 151 return 0;
152 /* ret=2: csum update is needed after payload mangling */
153 if (ret == 1)
154 oldlen = skb->len - udphoff;
155 else
156 payload_csum = 1;
147 } 157 }
148 158
149 udph = (void *)skb_network_header(skb) + udphoff; 159 udph = (void *)skb_network_header(skb) + udphoff;
@@ -156,12 +166,13 @@ udp_snat_handler(struct sk_buff *skb,
156 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, 166 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
157 htons(oldlen), 167 htons(oldlen),
158 htons(skb->len - udphoff)); 168 htons(skb->len - udphoff));
159 } else if (!cp->app && (udph->check != 0)) { 169 } else if (!payload_csum && (udph->check != 0)) {
160 /* Only port and addr are changed, do fast csum update */ 170 /* Only port and addr are changed, do fast csum update */
161 udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, 171 udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
162 cp->dport, cp->vport); 172 cp->dport, cp->vport);
163 if (skb->ip_summed == CHECKSUM_COMPLETE) 173 if (skb->ip_summed == CHECKSUM_COMPLETE)
164 skb->ip_summed = CHECKSUM_NONE; 174 skb->ip_summed = (cp->app && pp->csum_check) ?
175 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
165 } else { 176 } else {
166 /* full checksum calculation */ 177 /* full checksum calculation */
167 udph->check = 0; 178 udph->check = 0;
@@ -181,6 +192,7 @@ udp_snat_handler(struct sk_buff *skb,
181 skb->csum); 192 skb->csum);
182 if (udph->check == 0) 193 if (udph->check == 0)
183 udph->check = CSUM_MANGLED_0; 194 udph->check = CSUM_MANGLED_0;
195 skb->ip_summed = CHECKSUM_UNNECESSARY;
184 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", 196 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
185 pp->name, udph->check, 197 pp->name, udph->check,
186 (char*)&(udph->check) - (char*)udph); 198 (char*)&(udph->check) - (char*)udph);
@@ -196,6 +208,7 @@ udp_dnat_handler(struct sk_buff *skb,
196 struct udphdr *udph; 208 struct udphdr *udph;
197 unsigned int udphoff; 209 unsigned int udphoff;
198 int oldlen; 210 int oldlen;
211 int payload_csum = 0;
199 212
200#ifdef CONFIG_IP_VS_IPV6 213#ifdef CONFIG_IP_VS_IPV6
201 if (cp->af == AF_INET6) 214 if (cp->af == AF_INET6)
@@ -210,6 +223,8 @@ udp_dnat_handler(struct sk_buff *skb,
210 return 0; 223 return 0;
211 224
212 if (unlikely(cp->app != NULL)) { 225 if (unlikely(cp->app != NULL)) {
226 int ret;
227
213 /* Some checks before mangling */ 228 /* Some checks before mangling */
214 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) 229 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
215 return 0; 230 return 0;
@@ -218,8 +233,13 @@ udp_dnat_handler(struct sk_buff *skb,
218 * Attempt ip_vs_app call. 233 * Attempt ip_vs_app call.
219 * It will fix ip_vs_conn 234 * It will fix ip_vs_conn
220 */ 235 */
221 if (!ip_vs_app_pkt_in(cp, skb)) 236 if (!(ret = ip_vs_app_pkt_in(cp, skb)))
222 return 0; 237 return 0;
238 /* ret=2: csum update is needed after payload mangling */
239 if (ret == 1)
240 oldlen = skb->len - udphoff;
241 else
242 payload_csum = 1;
223 } 243 }
224 244
225 udph = (void *)skb_network_header(skb) + udphoff; 245 udph = (void *)skb_network_header(skb) + udphoff;
@@ -229,15 +249,16 @@ udp_dnat_handler(struct sk_buff *skb,
229 * Adjust UDP checksums 249 * Adjust UDP checksums
230 */ 250 */
231 if (skb->ip_summed == CHECKSUM_PARTIAL) { 251 if (skb->ip_summed == CHECKSUM_PARTIAL) {
232 udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, 252 udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
233 htons(oldlen), 253 htons(oldlen),
234 htons(skb->len - udphoff)); 254 htons(skb->len - udphoff));
235 } else if (!cp->app && (udph->check != 0)) { 255 } else if (!payload_csum && (udph->check != 0)) {
236 /* Only port and addr are changed, do fast csum update */ 256 /* Only port and addr are changed, do fast csum update */
237 udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, 257 udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
238 cp->vport, cp->dport); 258 cp->vport, cp->dport);
239 if (skb->ip_summed == CHECKSUM_COMPLETE) 259 if (skb->ip_summed == CHECKSUM_COMPLETE)
240 skb->ip_summed = CHECKSUM_NONE; 260 skb->ip_summed = (cp->app && pp->csum_check) ?
261 CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
241 } else { 262 } else {
242 /* full checksum calculation */ 263 /* full checksum calculation */
243 udph->check = 0; 264 udph->check = 0;
@@ -293,7 +314,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
293 skb->len - udphoff, 314 skb->len - udphoff,
294 ipv6_hdr(skb)->nexthdr, 315 ipv6_hdr(skb)->nexthdr,
295 skb->csum)) { 316 skb->csum)) {
296 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 317 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
297 "Failed checksum for"); 318 "Failed checksum for");
298 return 0; 319 return 0;
299 } 320 }
@@ -304,7 +325,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
304 skb->len - udphoff, 325 skb->len - udphoff,
305 ip_hdr(skb)->protocol, 326 ip_hdr(skb)->protocol,
306 skb->csum)) { 327 skb->csum)) {
307 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 328 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
308 "Failed checksum for"); 329 "Failed checksum for");
309 return 0; 330 return 0;
310 } 331 }
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index bbc1ac795952..076ebe00435d 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -35,7 +35,7 @@
35static LIST_HEAD(ip_vs_schedulers); 35static LIST_HEAD(ip_vs_schedulers);
36 36
37/* lock for service table */ 37/* lock for service table */
38static DEFINE_RWLOCK(__ip_vs_sched_lock); 38static DEFINE_SPINLOCK(ip_vs_sched_lock);
39 39
40 40
41/* 41/*
@@ -46,15 +46,6 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
46{ 46{
47 int ret; 47 int ret;
48 48
49 if (svc == NULL) {
50 pr_err("%s(): svc arg NULL\n", __func__);
51 return -EINVAL;
52 }
53 if (scheduler == NULL) {
54 pr_err("%s(): scheduler arg NULL\n", __func__);
55 return -EINVAL;
56 }
57
58 svc->scheduler = scheduler; 49 svc->scheduler = scheduler;
59 50
60 if (scheduler->init_service) { 51 if (scheduler->init_service) {
@@ -74,18 +65,10 @@ int ip_vs_bind_scheduler(struct ip_vs_service *svc,
74 */ 65 */
75int ip_vs_unbind_scheduler(struct ip_vs_service *svc) 66int ip_vs_unbind_scheduler(struct ip_vs_service *svc)
76{ 67{
77 struct ip_vs_scheduler *sched; 68 struct ip_vs_scheduler *sched = svc->scheduler;
78 69
79 if (svc == NULL) { 70 if (!sched)
80 pr_err("%s(): svc arg NULL\n", __func__); 71 return 0;
81 return -EINVAL;
82 }
83
84 sched = svc->scheduler;
85 if (sched == NULL) {
86 pr_err("%s(): svc isn't bound\n", __func__);
87 return -EINVAL;
88 }
89 72
90 if (sched->done_service) { 73 if (sched->done_service) {
91 if (sched->done_service(svc) != 0) { 74 if (sched->done_service(svc) != 0) {
@@ -108,7 +91,7 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
108 91
109 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name); 92 IP_VS_DBG(2, "%s(): sched_name \"%s\"\n", __func__, sched_name);
110 93
111 read_lock_bh(&__ip_vs_sched_lock); 94 spin_lock_bh(&ip_vs_sched_lock);
112 95
113 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 96 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
114 /* 97 /*
@@ -122,14 +105,14 @@ static struct ip_vs_scheduler *ip_vs_sched_getbyname(const char *sched_name)
122 } 105 }
123 if (strcmp(sched_name, sched->name)==0) { 106 if (strcmp(sched_name, sched->name)==0) {
124 /* HIT */ 107 /* HIT */
125 read_unlock_bh(&__ip_vs_sched_lock); 108 spin_unlock_bh(&ip_vs_sched_lock);
126 return sched; 109 return sched;
127 } 110 }
128 if (sched->module) 111 if (sched->module)
129 module_put(sched->module); 112 module_put(sched->module);
130 } 113 }
131 114
132 read_unlock_bh(&__ip_vs_sched_lock); 115 spin_unlock_bh(&ip_vs_sched_lock);
133 return NULL; 116 return NULL;
134} 117}
135 118
@@ -159,7 +142,7 @@ struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
159 142
160void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler) 143void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
161{ 144{
162 if (scheduler->module) 145 if (scheduler && scheduler->module)
163 module_put(scheduler->module); 146 module_put(scheduler->module);
164} 147}
165 148
@@ -184,10 +167,10 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
184 /* increase the module use count */ 167 /* increase the module use count */
185 ip_vs_use_count_inc(); 168 ip_vs_use_count_inc();
186 169
187 write_lock_bh(&__ip_vs_sched_lock); 170 spin_lock_bh(&ip_vs_sched_lock);
188 171
189 if (!list_empty(&scheduler->n_list)) { 172 if (!list_empty(&scheduler->n_list)) {
190 write_unlock_bh(&__ip_vs_sched_lock); 173 spin_unlock_bh(&ip_vs_sched_lock);
191 ip_vs_use_count_dec(); 174 ip_vs_use_count_dec();
192 pr_err("%s(): [%s] scheduler already linked\n", 175 pr_err("%s(): [%s] scheduler already linked\n",
193 __func__, scheduler->name); 176 __func__, scheduler->name);
@@ -200,7 +183,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
200 */ 183 */
201 list_for_each_entry(sched, &ip_vs_schedulers, n_list) { 184 list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
202 if (strcmp(scheduler->name, sched->name) == 0) { 185 if (strcmp(scheduler->name, sched->name) == 0) {
203 write_unlock_bh(&__ip_vs_sched_lock); 186 spin_unlock_bh(&ip_vs_sched_lock);
204 ip_vs_use_count_dec(); 187 ip_vs_use_count_dec();
205 pr_err("%s(): [%s] scheduler already existed " 188 pr_err("%s(): [%s] scheduler already existed "
206 "in the system\n", __func__, scheduler->name); 189 "in the system\n", __func__, scheduler->name);
@@ -211,7 +194,7 @@ int register_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
211 * Add it into the d-linked scheduler list 194 * Add it into the d-linked scheduler list
212 */ 195 */
213 list_add(&scheduler->n_list, &ip_vs_schedulers); 196 list_add(&scheduler->n_list, &ip_vs_schedulers);
214 write_unlock_bh(&__ip_vs_sched_lock); 197 spin_unlock_bh(&ip_vs_sched_lock);
215 198
216 pr_info("[%s] scheduler registered.\n", scheduler->name); 199 pr_info("[%s] scheduler registered.\n", scheduler->name);
217 200
@@ -229,9 +212,9 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
229 return -EINVAL; 212 return -EINVAL;
230 } 213 }
231 214
232 write_lock_bh(&__ip_vs_sched_lock); 215 spin_lock_bh(&ip_vs_sched_lock);
233 if (list_empty(&scheduler->n_list)) { 216 if (list_empty(&scheduler->n_list)) {
234 write_unlock_bh(&__ip_vs_sched_lock); 217 spin_unlock_bh(&ip_vs_sched_lock);
235 pr_err("%s(): [%s] scheduler is not in the list. failed\n", 218 pr_err("%s(): [%s] scheduler is not in the list. failed\n",
236 __func__, scheduler->name); 219 __func__, scheduler->name);
237 return -EINVAL; 220 return -EINVAL;
@@ -241,7 +224,7 @@ int unregister_ip_vs_scheduler(struct ip_vs_scheduler *scheduler)
241 * Remove it from the d-linked scheduler list 224 * Remove it from the d-linked scheduler list
242 */ 225 */
243 list_del(&scheduler->n_list); 226 list_del(&scheduler->n_list);
244 write_unlock_bh(&__ip_vs_sched_lock); 227 spin_unlock_bh(&ip_vs_sched_lock);
245 228
246 /* decrease the module use count */ 229 /* decrease the module use count */
247 ip_vs_use_count_dec(); 230 ip_vs_use_count_dec();
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 7ba06939829f..ab85aedea17e 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -288,6 +288,16 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
288 ip_vs_sync_conn(cp->control); 288 ip_vs_sync_conn(cp->control);
289} 289}
290 290
291static inline int
292ip_vs_conn_fill_param_sync(int af, int protocol,
293 const union nf_inet_addr *caddr, __be16 cport,
294 const union nf_inet_addr *vaddr, __be16 vport,
295 struct ip_vs_conn_param *p)
296{
297 /* XXX: Need to take into account persistence engine */
298 ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);
299 return 0;
300}
291 301
292/* 302/*
293 * Process received multicast message and create the corresponding 303 * Process received multicast message and create the corresponding
@@ -301,6 +311,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
301 struct ip_vs_conn *cp; 311 struct ip_vs_conn *cp;
302 struct ip_vs_protocol *pp; 312 struct ip_vs_protocol *pp;
303 struct ip_vs_dest *dest; 313 struct ip_vs_dest *dest;
314 struct ip_vs_conn_param param;
304 char *p; 315 char *p;
305 int i; 316 int i;
306 317
@@ -370,18 +381,20 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
370 } 381 }
371 } 382 }
372 383
373 if (!(flags & IP_VS_CONN_F_TEMPLATE)) 384 {
374 cp = ip_vs_conn_in_get(AF_INET, s->protocol, 385 if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,
375 (union nf_inet_addr *)&s->caddr, 386 (union nf_inet_addr *)&s->caddr,
376 s->cport, 387 s->cport,
377 (union nf_inet_addr *)&s->vaddr, 388 (union nf_inet_addr *)&s->vaddr,
378 s->vport); 389 s->vport, &param)) {
379 else 390 pr_err("ip_vs_conn_fill_param_sync failed");
380 cp = ip_vs_ct_in_get(AF_INET, s->protocol, 391 return;
381 (union nf_inet_addr *)&s->caddr, 392 }
382 s->cport, 393 if (!(flags & IP_VS_CONN_F_TEMPLATE))
383 (union nf_inet_addr *)&s->vaddr, 394 cp = ip_vs_conn_in_get(&param);
384 s->vport); 395 else
396 cp = ip_vs_ct_in_get(&param);
397 }
385 if (!cp) { 398 if (!cp) {
386 /* 399 /*
387 * Find the appropriate destination for the connection. 400 * Find the appropriate destination for the connection.
@@ -406,14 +419,9 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen)
406 else 419 else
407 flags &= ~IP_VS_CONN_F_INACTIVE; 420 flags &= ~IP_VS_CONN_F_INACTIVE;
408 } 421 }
409 cp = ip_vs_conn_new(AF_INET, s->protocol, 422 cp = ip_vs_conn_new(&param,
410 (union nf_inet_addr *)&s->caddr,
411 s->cport,
412 (union nf_inet_addr *)&s->vaddr,
413 s->vport,
414 (union nf_inet_addr *)&s->daddr, 423 (union nf_inet_addr *)&s->daddr,
415 s->dport, 424 s->dport, flags, dest);
416 flags, dest);
417 if (dest) 425 if (dest)
418 atomic_dec(&dest->refcnt); 426 atomic_dec(&dest->refcnt);
419 if (!cp) { 427 if (!cp) {
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 49df6bea6a2d..de04ea39cde8 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -11,6 +11,16 @@
11 * 11 *
12 * Changes: 12 * Changes:
13 * 13 *
14 * Description of forwarding methods:
15 * - all transmitters are called from LOCAL_IN (remote clients) and
16 * LOCAL_OUT (local clients) but for ICMP can be called from FORWARD
17 * - not all connections have destination server, for example,
18 * connections in backup server when fwmark is used
19 * - bypass connections use daddr from packet
20 * LOCAL_OUT rules:
21 * - skb->dev is NULL, skb->protocol is not set (both are set in POST_ROUTING)
22 * - skb->pkt_type is not set yet
23 * - the only place where we can see skb->sk != NULL
14 */ 24 */
15 25
16#define KMSG_COMPONENT "IPVS" 26#define KMSG_COMPONENT "IPVS"
@@ -26,9 +36,9 @@
26#include <net/route.h> /* for ip_route_output */ 36#include <net/route.h> /* for ip_route_output */
27#include <net/ipv6.h> 37#include <net/ipv6.h>
28#include <net/ip6_route.h> 38#include <net/ip6_route.h>
39#include <net/addrconf.h>
29#include <linux/icmpv6.h> 40#include <linux/icmpv6.h>
30#include <linux/netfilter.h> 41#include <linux/netfilter.h>
31#include <net/netfilter/nf_conntrack.h>
32#include <linux/netfilter_ipv4.h> 42#include <linux/netfilter_ipv4.h>
33 43
34#include <net/ip_vs.h> 44#include <net/ip_vs.h>
@@ -38,26 +48,27 @@
38 * Destination cache to speed up outgoing route lookup 48 * Destination cache to speed up outgoing route lookup
39 */ 49 */
40static inline void 50static inline void
41__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst) 51__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst,
52 u32 dst_cookie)
42{ 53{
43 struct dst_entry *old_dst; 54 struct dst_entry *old_dst;
44 55
45 old_dst = dest->dst_cache; 56 old_dst = dest->dst_cache;
46 dest->dst_cache = dst; 57 dest->dst_cache = dst;
47 dest->dst_rtos = rtos; 58 dest->dst_rtos = rtos;
59 dest->dst_cookie = dst_cookie;
48 dst_release(old_dst); 60 dst_release(old_dst);
49} 61}
50 62
51static inline struct dst_entry * 63static inline struct dst_entry *
52__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie) 64__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
53{ 65{
54 struct dst_entry *dst = dest->dst_cache; 66 struct dst_entry *dst = dest->dst_cache;
55 67
56 if (!dst) 68 if (!dst)
57 return NULL; 69 return NULL;
58 if ((dst->obsolete 70 if ((dst->obsolete || rtos != dest->dst_rtos) &&
59 || (dest->af == AF_INET && rtos != dest->dst_rtos)) && 71 dst->ops->check(dst, dest->dst_cookie) == NULL) {
60 dst->ops->check(dst, cookie) == NULL) {
61 dest->dst_cache = NULL; 72 dest->dst_cache = NULL;
62 dst_release(dst); 73 dst_release(dst);
63 return NULL; 74 return NULL;
@@ -66,16 +77,24 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
66 return dst; 77 return dst;
67} 78}
68 79
80/*
81 * Get route to destination or remote server
82 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
83 * &4=Allow redirect from remote daddr to local
84 */
69static struct rtable * 85static struct rtable *
70__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) 86__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
87 __be32 daddr, u32 rtos, int rt_mode)
71{ 88{
89 struct net *net = dev_net(skb_dst(skb)->dev);
72 struct rtable *rt; /* Route to the other host */ 90 struct rtable *rt; /* Route to the other host */
73 struct ip_vs_dest *dest = cp->dest; 91 struct rtable *ort; /* Original route */
92 int local;
74 93
75 if (dest) { 94 if (dest) {
76 spin_lock(&dest->dst_lock); 95 spin_lock(&dest->dst_lock);
77 if (!(rt = (struct rtable *) 96 if (!(rt = (struct rtable *)
78 __ip_vs_dst_check(dest, rtos, 0))) { 97 __ip_vs_dst_check(dest, rtos))) {
79 struct flowi fl = { 98 struct flowi fl = {
80 .oif = 0, 99 .oif = 0,
81 .nl_u = { 100 .nl_u = {
@@ -85,13 +104,13 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
85 .tos = rtos, } }, 104 .tos = rtos, } },
86 }; 105 };
87 106
88 if (ip_route_output_key(&init_net, &rt, &fl)) { 107 if (ip_route_output_key(net, &rt, &fl)) {
89 spin_unlock(&dest->dst_lock); 108 spin_unlock(&dest->dst_lock);
90 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 109 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
91 &dest->addr.ip); 110 &dest->addr.ip);
92 return NULL; 111 return NULL;
93 } 112 }
94 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst)); 113 __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
95 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", 114 IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
96 &dest->addr.ip, 115 &dest->addr.ip,
97 atomic_read(&rt->dst.__refcnt), rtos); 116 atomic_read(&rt->dst.__refcnt), rtos);
@@ -102,78 +121,199 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
102 .oif = 0, 121 .oif = 0,
103 .nl_u = { 122 .nl_u = {
104 .ip4_u = { 123 .ip4_u = {
105 .daddr = cp->daddr.ip, 124 .daddr = daddr,
106 .saddr = 0, 125 .saddr = 0,
107 .tos = rtos, } }, 126 .tos = rtos, } },
108 }; 127 };
109 128
110 if (ip_route_output_key(&init_net, &rt, &fl)) { 129 if (ip_route_output_key(net, &rt, &fl)) {
111 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", 130 IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
112 &cp->daddr.ip); 131 &daddr);
113 return NULL; 132 return NULL;
114 } 133 }
115 } 134 }
116 135
136 local = rt->rt_flags & RTCF_LOCAL;
137 if (!((local ? 1 : 2) & rt_mode)) {
138 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n",
139 (rt->rt_flags & RTCF_LOCAL) ?
140 "local":"non-local", &rt->rt_dst);
141 ip_rt_put(rt);
142 return NULL;
143 }
144 if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) &&
145 ort->rt_flags & RTCF_LOCAL)) {
146 IP_VS_DBG_RL("Redirect from non-local address %pI4 to local "
147 "requires NAT method, dest: %pI4\n",
148 &ip_hdr(skb)->daddr, &rt->rt_dst);
149 ip_rt_put(rt);
150 return NULL;
151 }
152 if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) {
153 IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 "
154 "to non-local address, dest: %pI4\n",
155 &ip_hdr(skb)->saddr, &rt->rt_dst);
156 ip_rt_put(rt);
157 return NULL;
158 }
159
117 return rt; 160 return rt;
118} 161}
119 162
163/* Reroute packet to local IPv4 stack after DNAT */
164static int
165__ip_vs_reroute_locally(struct sk_buff *skb)
166{
167 struct rtable *rt = skb_rtable(skb);
168 struct net_device *dev = rt->dst.dev;
169 struct net *net = dev_net(dev);
170 struct iphdr *iph = ip_hdr(skb);
171
172 if (rt->fl.iif) {
173 unsigned long orefdst = skb->_skb_refdst;
174
175 if (ip_route_input(skb, iph->daddr, iph->saddr,
176 iph->tos, skb->dev))
177 return 0;
178 refdst_drop(orefdst);
179 } else {
180 struct flowi fl = {
181 .oif = 0,
182 .nl_u = {
183 .ip4_u = {
184 .daddr = iph->daddr,
185 .saddr = iph->saddr,
186 .tos = RT_TOS(iph->tos),
187 }
188 },
189 .mark = skb->mark,
190 };
191 struct rtable *rt;
192
193 if (ip_route_output_key(net, &rt, &fl))
194 return 0;
195 if (!(rt->rt_flags & RTCF_LOCAL)) {
196 ip_rt_put(rt);
197 return 0;
198 }
199 /* Drop old route. */
200 skb_dst_drop(skb);
201 skb_dst_set(skb, &rt->dst);
202 }
203 return 1;
204}
205
120#ifdef CONFIG_IP_VS_IPV6 206#ifdef CONFIG_IP_VS_IPV6
207
208static inline int __ip_vs_is_local_route6(struct rt6_info *rt)
209{
210 return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK;
211}
212
213static struct dst_entry *
214__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
215 struct in6_addr *ret_saddr, int do_xfrm)
216{
217 struct dst_entry *dst;
218 struct flowi fl = {
219 .oif = 0,
220 .nl_u = {
221 .ip6_u = {
222 .daddr = *daddr,
223 },
224 },
225 };
226
227 dst = ip6_route_output(net, NULL, &fl);
228 if (dst->error)
229 goto out_err;
230 if (!ret_saddr)
231 return dst;
232 if (ipv6_addr_any(&fl.fl6_src) &&
233 ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev,
234 &fl.fl6_dst, 0, &fl.fl6_src) < 0)
235 goto out_err;
236 if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0)
237 goto out_err;
238 ipv6_addr_copy(ret_saddr, &fl.fl6_src);
239 return dst;
240
241out_err:
242 dst_release(dst);
243 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr);
244 return NULL;
245}
246
247/*
248 * Get route to destination or remote server
249 * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest,
250 * &4=Allow redirect from remote daddr to local
251 */
121static struct rt6_info * 252static struct rt6_info *
122__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) 253__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
254 struct in6_addr *daddr, struct in6_addr *ret_saddr,
255 int do_xfrm, int rt_mode)
123{ 256{
257 struct net *net = dev_net(skb_dst(skb)->dev);
124 struct rt6_info *rt; /* Route to the other host */ 258 struct rt6_info *rt; /* Route to the other host */
125 struct ip_vs_dest *dest = cp->dest; 259 struct rt6_info *ort; /* Original route */
260 struct dst_entry *dst;
261 int local;
126 262
127 if (dest) { 263 if (dest) {
128 spin_lock(&dest->dst_lock); 264 spin_lock(&dest->dst_lock);
129 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0); 265 rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0);
130 if (!rt) { 266 if (!rt) {
131 struct flowi fl = { 267 u32 cookie;
132 .oif = 0,
133 .nl_u = {
134 .ip6_u = {
135 .daddr = dest->addr.in6,
136 .saddr = {
137 .s6_addr32 =
138 { 0, 0, 0, 0 },
139 },
140 },
141 },
142 };
143 268
144 rt = (struct rt6_info *)ip6_route_output(&init_net, 269 dst = __ip_vs_route_output_v6(net, &dest->addr.in6,
145 NULL, &fl); 270 &dest->dst_saddr,
146 if (!rt) { 271 do_xfrm);
272 if (!dst) {
147 spin_unlock(&dest->dst_lock); 273 spin_unlock(&dest->dst_lock);
148 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
149 &dest->addr.in6);
150 return NULL; 274 return NULL;
151 } 275 }
152 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst)); 276 rt = (struct rt6_info *) dst;
153 IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", 277 cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
154 &dest->addr.in6, 278 __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie);
279 IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n",
280 &dest->addr.in6, &dest->dst_saddr,
155 atomic_read(&rt->dst.__refcnt)); 281 atomic_read(&rt->dst.__refcnt));
156 } 282 }
283 if (ret_saddr)
284 ipv6_addr_copy(ret_saddr, &dest->dst_saddr);
157 spin_unlock(&dest->dst_lock); 285 spin_unlock(&dest->dst_lock);
158 } else { 286 } else {
159 struct flowi fl = { 287 dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
160 .oif = 0, 288 if (!dst)
161 .nl_u = {
162 .ip6_u = {
163 .daddr = cp->daddr.in6,
164 .saddr = {
165 .s6_addr32 = { 0, 0, 0, 0 },
166 },
167 },
168 },
169 };
170
171 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
172 if (!rt) {
173 IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
174 &cp->daddr.in6);
175 return NULL; 289 return NULL;
176 } 290 rt = (struct rt6_info *) dst;
291 }
292
293 local = __ip_vs_is_local_route6(rt);
294 if (!((local ? 1 : 2) & rt_mode)) {
295 IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n",
296 local ? "local":"non-local", daddr);
297 dst_release(&rt->dst);
298 return NULL;
299 }
300 if (local && !(rt_mode & 4) &&
301 !((ort = (struct rt6_info *) skb_dst(skb)) &&
302 __ip_vs_is_local_route6(ort))) {
303 IP_VS_DBG_RL("Redirect from non-local address %pI6 to local "
304 "requires NAT method, dest: %pI6\n",
305 &ipv6_hdr(skb)->daddr, daddr);
306 dst_release(&rt->dst);
307 return NULL;
308 }
309 if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
310 ipv6_addr_type(&ipv6_hdr(skb)->saddr) &
311 IPV6_ADDR_LOOPBACK)) {
312 IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 "
313 "to non-local address, dest: %pI6\n",
314 &ipv6_hdr(skb)->saddr, daddr);
315 dst_release(&rt->dst);
316 return NULL;
177 } 317 }
178 318
179 return rt; 319 return rt;
@@ -194,12 +334,44 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
194 dst_release(old_dst); 334 dst_release(old_dst);
195} 335}
196 336
197#define IP_VS_XMIT(pf, skb, rt) \ 337#define IP_VS_XMIT_TUNNEL(skb, cp) \
338({ \
339 int __ret = NF_ACCEPT; \
340 \
341 (skb)->ipvs_property = 1; \
342 if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \
343 __ret = ip_vs_confirm_conntrack(skb, cp); \
344 if (__ret == NF_ACCEPT) { \
345 nf_reset(skb); \
346 skb_forward_csum(skb); \
347 } \
348 __ret; \
349})
350
351#define IP_VS_XMIT_NAT(pf, skb, cp, local) \
352do { \
353 (skb)->ipvs_property = 1; \
354 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
355 ip_vs_notrack(skb); \
356 else \
357 ip_vs_update_conntrack(skb, cp, 1); \
358 if (local) \
359 return NF_ACCEPT; \
360 skb_forward_csum(skb); \
361 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
362 skb_dst(skb)->dev, dst_output); \
363} while (0)
364
365#define IP_VS_XMIT(pf, skb, cp, local) \
198do { \ 366do { \
199 (skb)->ipvs_property = 1; \ 367 (skb)->ipvs_property = 1; \
368 if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \
369 ip_vs_notrack(skb); \
370 if (local) \
371 return NF_ACCEPT; \
200 skb_forward_csum(skb); \ 372 skb_forward_csum(skb); \
201 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ 373 NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \
202 (rt)->dst.dev, dst_output); \ 374 skb_dst(skb)->dev, dst_output); \
203} while (0) 375} while (0)
204 376
205 377
@@ -211,7 +383,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
211 struct ip_vs_protocol *pp) 383 struct ip_vs_protocol *pp)
212{ 384{
213 /* we do not touch skb and do not need pskb ptr */ 385 /* we do not touch skb and do not need pskb ptr */
214 return NF_ACCEPT; 386 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
215} 387}
216 388
217 389
@@ -226,24 +398,13 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
226{ 398{
227 struct rtable *rt; /* Route to the other host */ 399 struct rtable *rt; /* Route to the other host */
228 struct iphdr *iph = ip_hdr(skb); 400 struct iphdr *iph = ip_hdr(skb);
229 u8 tos = iph->tos;
230 int mtu; 401 int mtu;
231 struct flowi fl = {
232 .oif = 0,
233 .nl_u = {
234 .ip4_u = {
235 .daddr = iph->daddr,
236 .saddr = 0,
237 .tos = RT_TOS(tos), } },
238 };
239 402
240 EnterFunction(10); 403 EnterFunction(10);
241 404
242 if (ip_route_output_key(&init_net, &rt, &fl)) { 405 if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr,
243 IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n", 406 RT_TOS(iph->tos), 2)))
244 __func__, &iph->daddr);
245 goto tx_error_icmp; 407 goto tx_error_icmp;
246 }
247 408
248 /* MTU checking */ 409 /* MTU checking */
249 mtu = dst_mtu(&rt->dst); 410 mtu = dst_mtu(&rt->dst);
@@ -271,7 +432,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
271 /* Another hack: avoid icmp_send in ip_fragment */ 432 /* Another hack: avoid icmp_send in ip_fragment */
272 skb->local_df = 1; 433 skb->local_df = 1;
273 434
274 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 435 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
275 436
276 LeaveFunction(10); 437 LeaveFunction(10);
277 return NF_STOLEN; 438 return NF_STOLEN;
@@ -292,28 +453,22 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
292 struct rt6_info *rt; /* Route to the other host */ 453 struct rt6_info *rt; /* Route to the other host */
293 struct ipv6hdr *iph = ipv6_hdr(skb); 454 struct ipv6hdr *iph = ipv6_hdr(skb);
294 int mtu; 455 int mtu;
295 struct flowi fl = {
296 .oif = 0,
297 .nl_u = {
298 .ip6_u = {
299 .daddr = iph->daddr,
300 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
301 };
302 456
303 EnterFunction(10); 457 EnterFunction(10);
304 458
305 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); 459 if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 2)))
306 if (!rt) {
307 IP_VS_DBG_RL("%s(): ip6_route_output error, dest: %pI6\n",
308 __func__, &iph->daddr);
309 goto tx_error_icmp; 460 goto tx_error_icmp;
310 }
311 461
312 /* MTU checking */ 462 /* MTU checking */
313 mtu = dst_mtu(&rt->dst); 463 mtu = dst_mtu(&rt->dst);
314 if (skb->len > mtu) { 464 if (skb->len > mtu) {
315 dst_release(&rt->dst); 465 if (!skb->dev) {
466 struct net *net = dev_net(skb_dst(skb)->dev);
467
468 skb->dev = net->loopback_dev;
469 }
316 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 470 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
471 dst_release(&rt->dst);
317 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 472 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
318 goto tx_error; 473 goto tx_error;
319 } 474 }
@@ -335,7 +490,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
335 /* Another hack: avoid icmp_send in ip_fragment */ 490 /* Another hack: avoid icmp_send in ip_fragment */
336 skb->local_df = 1; 491 skb->local_df = 1;
337 492
338 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 493 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
339 494
340 LeaveFunction(10); 495 LeaveFunction(10);
341 return NF_STOLEN; 496 return NF_STOLEN;
@@ -349,36 +504,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
349} 504}
350#endif 505#endif
351 506
352void
353ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
354{
355 struct nf_conn *ct = (struct nf_conn *)skb->nfct;
356 struct nf_conntrack_tuple new_tuple;
357
358 if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct))
359 return;
360
361 /*
362 * The connection is not yet in the hashtable, so we update it.
363 * CIP->VIP will remain the same, so leave the tuple in
364 * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the
365 * real-server we will see RIP->DIP.
366 */
367 new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
368 if (outin)
369 new_tuple.src.u3 = cp->daddr;
370 else
371 new_tuple.dst.u3 = cp->vaddr;
372 /*
373 * This will also take care of UDP and other protocols.
374 */
375 if (outin)
376 new_tuple.src.u.tcp.port = cp->dport;
377 else
378 new_tuple.dst.u.tcp.port = cp->vport;
379 nf_conntrack_alter_reply(ct, &new_tuple);
380}
381
382/* 507/*
383 * NAT transmitter (only for outside-to-inside nat forwarding) 508 * NAT transmitter (only for outside-to-inside nat forwarding)
384 * Not used for related ICMP 509 * Not used for related ICMP
@@ -390,6 +515,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
390 struct rtable *rt; /* Route to the other host */ 515 struct rtable *rt; /* Route to the other host */
391 int mtu; 516 int mtu;
392 struct iphdr *iph = ip_hdr(skb); 517 struct iphdr *iph = ip_hdr(skb);
518 int local;
393 519
394 EnterFunction(10); 520 EnterFunction(10);
395 521
@@ -403,16 +529,42 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
403 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 529 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
404 } 530 }
405 531
406 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) 532 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
533 RT_TOS(iph->tos), 1|2|4)))
407 goto tx_error_icmp; 534 goto tx_error_icmp;
535 local = rt->rt_flags & RTCF_LOCAL;
536 /*
537 * Avoid duplicate tuple in reply direction for NAT traffic
538 * to local address when connection is sync-ed
539 */
540#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
541 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
542 enum ip_conntrack_info ctinfo;
543 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
544
545 if (ct && !nf_ct_is_untracked(ct)) {
546 IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
547 "ip_vs_nat_xmit(): "
548 "stopping DNAT to local address");
549 goto tx_error_put;
550 }
551 }
552#endif
553
554 /* From world but DNAT to loopback address? */
555 if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
556 IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
557 "stopping DNAT to loopback address");
558 goto tx_error_put;
559 }
408 560
409 /* MTU checking */ 561 /* MTU checking */
410 mtu = dst_mtu(&rt->dst); 562 mtu = dst_mtu(&rt->dst);
411 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { 563 if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
412 ip_rt_put(rt);
413 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 564 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
414 IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); 565 IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0,
415 goto tx_error; 566 "ip_vs_nat_xmit(): frag needed for");
567 goto tx_error_put;
416 } 568 }
417 569
418 /* copy-on-write the packet before mangling it */ 570 /* copy-on-write the packet before mangling it */
@@ -422,19 +574,28 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
422 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 574 if (skb_cow(skb, rt->dst.dev->hard_header_len))
423 goto tx_error_put; 575 goto tx_error_put;
424 576
425 /* drop old route */
426 skb_dst_drop(skb);
427 skb_dst_set(skb, &rt->dst);
428
429 /* mangle the packet */ 577 /* mangle the packet */
430 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 578 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
431 goto tx_error; 579 goto tx_error_put;
432 ip_hdr(skb)->daddr = cp->daddr.ip; 580 ip_hdr(skb)->daddr = cp->daddr.ip;
433 ip_send_check(ip_hdr(skb)); 581 ip_send_check(ip_hdr(skb));
434 582
435 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 583 if (!local) {
584 /* drop old route */
585 skb_dst_drop(skb);
586 skb_dst_set(skb, &rt->dst);
587 } else {
588 ip_rt_put(rt);
589 /*
590 * Some IPv4 replies get local address from routes,
591 * not from iph, so while we DNAT after routing
592 * we need this second input/output route.
593 */
594 if (!__ip_vs_reroute_locally(skb))
595 goto tx_error;
596 }
436 597
437 ip_vs_update_conntrack(skb, cp, 1); 598 IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
438 599
439 /* FIXME: when application helper enlarges the packet and the length 600 /* FIXME: when application helper enlarges the packet and the length
440 is larger than the MTU of outgoing device, there will be still 601 is larger than the MTU of outgoing device, there will be still
@@ -443,7 +604,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
443 /* Another hack: avoid icmp_send in ip_fragment */ 604 /* Another hack: avoid icmp_send in ip_fragment */
444 skb->local_df = 1; 605 skb->local_df = 1;
445 606
446 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 607 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
447 608
448 LeaveFunction(10); 609 LeaveFunction(10);
449 return NF_STOLEN; 610 return NF_STOLEN;
@@ -451,8 +612,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
451 tx_error_icmp: 612 tx_error_icmp:
452 dst_link_failure(skb); 613 dst_link_failure(skb);
453 tx_error: 614 tx_error:
454 LeaveFunction(10);
455 kfree_skb(skb); 615 kfree_skb(skb);
616 LeaveFunction(10);
456 return NF_STOLEN; 617 return NF_STOLEN;
457 tx_error_put: 618 tx_error_put:
458 ip_rt_put(rt); 619 ip_rt_put(rt);
@@ -466,6 +627,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
466{ 627{
467 struct rt6_info *rt; /* Route to the other host */ 628 struct rt6_info *rt; /* Route to the other host */
468 int mtu; 629 int mtu;
630 int local;
469 631
470 EnterFunction(10); 632 EnterFunction(10);
471 633
@@ -480,18 +642,49 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
480 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); 642 IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
481 } 643 }
482 644
483 rt = __ip_vs_get_out_rt_v6(cp); 645 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
484 if (!rt) 646 0, 1|2|4)))
485 goto tx_error_icmp; 647 goto tx_error_icmp;
648 local = __ip_vs_is_local_route6(rt);
649 /*
650 * Avoid duplicate tuple in reply direction for NAT traffic
651 * to local address when connection is sync-ed
652 */
653#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
654 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
655 enum ip_conntrack_info ctinfo;
656 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
657
658 if (ct && !nf_ct_is_untracked(ct)) {
659 IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
660 "ip_vs_nat_xmit_v6(): "
661 "stopping DNAT to local address");
662 goto tx_error_put;
663 }
664 }
665#endif
666
667 /* From world but DNAT to loopback address? */
668 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
669 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
670 IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
671 "ip_vs_nat_xmit_v6(): "
672 "stopping DNAT to loopback address");
673 goto tx_error_put;
674 }
486 675
487 /* MTU checking */ 676 /* MTU checking */
488 mtu = dst_mtu(&rt->dst); 677 mtu = dst_mtu(&rt->dst);
489 if (skb->len > mtu) { 678 if (skb->len > mtu) {
490 dst_release(&rt->dst); 679 if (!skb->dev) {
680 struct net *net = dev_net(skb_dst(skb)->dev);
681
682 skb->dev = net->loopback_dev;
683 }
491 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 684 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
492 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 685 IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0,
493 "ip_vs_nat_xmit_v6(): frag needed for"); 686 "ip_vs_nat_xmit_v6(): frag needed for");
494 goto tx_error; 687 goto tx_error_put;
495 } 688 }
496 689
497 /* copy-on-write the packet before mangling it */ 690 /* copy-on-write the packet before mangling it */
@@ -501,18 +694,21 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
501 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 694 if (skb_cow(skb, rt->dst.dev->hard_header_len))
502 goto tx_error_put; 695 goto tx_error_put;
503 696
504 /* drop old route */
505 skb_dst_drop(skb);
506 skb_dst_set(skb, &rt->dst);
507
508 /* mangle the packet */ 697 /* mangle the packet */
509 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) 698 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
510 goto tx_error; 699 goto tx_error;
511 ipv6_hdr(skb)->daddr = cp->daddr.in6; 700 ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6);
512 701
513 IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); 702 if (!local || !skb->dev) {
703 /* drop the old route when skb is not shared */
704 skb_dst_drop(skb);
705 skb_dst_set(skb, &rt->dst);
706 } else {
707 /* destined to loopback, do we need to change route? */
708 dst_release(&rt->dst);
709 }
514 710
515 ip_vs_update_conntrack(skb, cp, 1); 711 IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
516 712
517 /* FIXME: when application helper enlarges the packet and the length 713 /* FIXME: when application helper enlarges the packet and the length
518 is larger than the MTU of outgoing device, there will be still 714 is larger than the MTU of outgoing device, there will be still
@@ -521,7 +717,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
521 /* Another hack: avoid icmp_send in ip_fragment */ 717 /* Another hack: avoid icmp_send in ip_fragment */
522 skb->local_df = 1; 718 skb->local_df = 1;
523 719
524 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 720 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
525 721
526 LeaveFunction(10); 722 LeaveFunction(10);
527 return NF_STOLEN; 723 return NF_STOLEN;
@@ -567,30 +763,27 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
567 struct iphdr *old_iph = ip_hdr(skb); 763 struct iphdr *old_iph = ip_hdr(skb);
568 u8 tos = old_iph->tos; 764 u8 tos = old_iph->tos;
569 __be16 df = old_iph->frag_off; 765 __be16 df = old_iph->frag_off;
570 sk_buff_data_t old_transport_header = skb->transport_header;
571 struct iphdr *iph; /* Our new IP header */ 766 struct iphdr *iph; /* Our new IP header */
572 unsigned int max_headroom; /* The extra header space needed */ 767 unsigned int max_headroom; /* The extra header space needed */
573 int mtu; 768 int mtu;
769 int ret;
574 770
575 EnterFunction(10); 771 EnterFunction(10);
576 772
577 if (skb->protocol != htons(ETH_P_IP)) { 773 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
578 IP_VS_DBG_RL("%s(): protocol error, " 774 RT_TOS(tos), 1|2)))
579 "ETH_P_IP: %d, skb protocol: %d\n",
580 __func__, htons(ETH_P_IP), skb->protocol);
581 goto tx_error;
582 }
583
584 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
585 goto tx_error_icmp; 775 goto tx_error_icmp;
776 if (rt->rt_flags & RTCF_LOCAL) {
777 ip_rt_put(rt);
778 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
779 }
586 780
587 tdev = rt->dst.dev; 781 tdev = rt->dst.dev;
588 782
589 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); 783 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
590 if (mtu < 68) { 784 if (mtu < 68) {
591 ip_rt_put(rt);
592 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); 785 IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
593 goto tx_error; 786 goto tx_error_put;
594 } 787 }
595 if (skb_dst(skb)) 788 if (skb_dst(skb))
596 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 789 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
@@ -600,9 +793,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
600 if ((old_iph->frag_off & htons(IP_DF)) 793 if ((old_iph->frag_off & htons(IP_DF))
601 && mtu < ntohs(old_iph->tot_len)) { 794 && mtu < ntohs(old_iph->tot_len)) {
602 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); 795 icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
603 ip_rt_put(rt);
604 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 796 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
605 goto tx_error; 797 goto tx_error_put;
606 } 798 }
607 799
608 /* 800 /*
@@ -625,7 +817,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
625 old_iph = ip_hdr(skb); 817 old_iph = ip_hdr(skb);
626 } 818 }
627 819
628 skb->transport_header = old_transport_header; 820 skb->transport_header = skb->network_header;
629 821
630 /* fix old IP header checksum */ 822 /* fix old IP header checksum */
631 ip_send_check(old_iph); 823 ip_send_check(old_iph);
@@ -655,7 +847,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
655 /* Another hack: avoid icmp_send in ip_fragment */ 847 /* Another hack: avoid icmp_send in ip_fragment */
656 skb->local_df = 1; 848 skb->local_df = 1;
657 849
658 ip_local_out(skb); 850 ret = IP_VS_XMIT_TUNNEL(skb, cp);
851 if (ret == NF_ACCEPT)
852 ip_local_out(skb);
853 else if (ret == NF_DROP)
854 kfree_skb(skb);
659 855
660 LeaveFunction(10); 856 LeaveFunction(10);
661 857
@@ -667,6 +863,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
667 kfree_skb(skb); 863 kfree_skb(skb);
668 LeaveFunction(10); 864 LeaveFunction(10);
669 return NF_STOLEN; 865 return NF_STOLEN;
866tx_error_put:
867 ip_rt_put(rt);
868 goto tx_error;
670} 869}
671 870
672#ifdef CONFIG_IP_VS_IPV6 871#ifdef CONFIG_IP_VS_IPV6
@@ -675,43 +874,44 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
675 struct ip_vs_protocol *pp) 874 struct ip_vs_protocol *pp)
676{ 875{
677 struct rt6_info *rt; /* Route to the other host */ 876 struct rt6_info *rt; /* Route to the other host */
877 struct in6_addr saddr; /* Source for tunnel */
678 struct net_device *tdev; /* Device to other host */ 878 struct net_device *tdev; /* Device to other host */
679 struct ipv6hdr *old_iph = ipv6_hdr(skb); 879 struct ipv6hdr *old_iph = ipv6_hdr(skb);
680 sk_buff_data_t old_transport_header = skb->transport_header;
681 struct ipv6hdr *iph; /* Our new IP header */ 880 struct ipv6hdr *iph; /* Our new IP header */
682 unsigned int max_headroom; /* The extra header space needed */ 881 unsigned int max_headroom; /* The extra header space needed */
683 int mtu; 882 int mtu;
883 int ret;
684 884
685 EnterFunction(10); 885 EnterFunction(10);
686 886
687 if (skb->protocol != htons(ETH_P_IPV6)) { 887 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6,
688 IP_VS_DBG_RL("%s(): protocol error, " 888 &saddr, 1, 1|2)))
689 "ETH_P_IPV6: %d, skb protocol: %d\n",
690 __func__, htons(ETH_P_IPV6), skb->protocol);
691 goto tx_error;
692 }
693
694 rt = __ip_vs_get_out_rt_v6(cp);
695 if (!rt)
696 goto tx_error_icmp; 889 goto tx_error_icmp;
890 if (__ip_vs_is_local_route6(rt)) {
891 dst_release(&rt->dst);
892 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
893 }
697 894
698 tdev = rt->dst.dev; 895 tdev = rt->dst.dev;
699 896
700 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); 897 mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
701 /* TODO IPv6: do we need this check in IPv6? */ 898 if (mtu < IPV6_MIN_MTU) {
702 if (mtu < 1280) { 899 IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
703 dst_release(&rt->dst); 900 IPV6_MIN_MTU);
704 IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__); 901 goto tx_error_put;
705 goto tx_error;
706 } 902 }
707 if (skb_dst(skb)) 903 if (skb_dst(skb))
708 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); 904 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
709 905
710 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { 906 if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
907 if (!skb->dev) {
908 struct net *net = dev_net(skb_dst(skb)->dev);
909
910 skb->dev = net->loopback_dev;
911 }
711 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 912 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
712 dst_release(&rt->dst);
713 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 913 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
714 goto tx_error; 914 goto tx_error_put;
715 } 915 }
716 916
717 /* 917 /*
@@ -734,7 +934,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
734 old_iph = ipv6_hdr(skb); 934 old_iph = ipv6_hdr(skb);
735 } 935 }
736 936
737 skb->transport_header = old_transport_header; 937 skb->transport_header = skb->network_header;
738 938
739 skb_push(skb, sizeof(struct ipv6hdr)); 939 skb_push(skb, sizeof(struct ipv6hdr));
740 skb_reset_network_header(skb); 940 skb_reset_network_header(skb);
@@ -754,14 +954,18 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
754 be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); 954 be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
755 iph->priority = old_iph->priority; 955 iph->priority = old_iph->priority;
756 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); 956 memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
757 iph->daddr = rt->rt6i_dst.addr; 957 ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
758 iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */ 958 ipv6_addr_copy(&iph->saddr, &saddr);
759 iph->hop_limit = old_iph->hop_limit; 959 iph->hop_limit = old_iph->hop_limit;
760 960
761 /* Another hack: avoid icmp_send in ip_fragment */ 961 /* Another hack: avoid icmp_send in ip_fragment */
762 skb->local_df = 1; 962 skb->local_df = 1;
763 963
764 ip6_local_out(skb); 964 ret = IP_VS_XMIT_TUNNEL(skb, cp);
965 if (ret == NF_ACCEPT)
966 ip6_local_out(skb);
967 else if (ret == NF_DROP)
968 kfree_skb(skb);
765 969
766 LeaveFunction(10); 970 LeaveFunction(10);
767 971
@@ -773,6 +977,9 @@ tx_error:
773 kfree_skb(skb); 977 kfree_skb(skb);
774 LeaveFunction(10); 978 LeaveFunction(10);
775 return NF_STOLEN; 979 return NF_STOLEN;
980tx_error_put:
981 dst_release(&rt->dst);
982 goto tx_error;
776} 983}
777#endif 984#endif
778 985
@@ -791,8 +998,13 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
791 998
792 EnterFunction(10); 999 EnterFunction(10);
793 1000
794 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) 1001 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1002 RT_TOS(iph->tos), 1|2)))
795 goto tx_error_icmp; 1003 goto tx_error_icmp;
1004 if (rt->rt_flags & RTCF_LOCAL) {
1005 ip_rt_put(rt);
1006 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1);
1007 }
796 1008
797 /* MTU checking */ 1009 /* MTU checking */
798 mtu = dst_mtu(&rt->dst); 1010 mtu = dst_mtu(&rt->dst);
@@ -820,7 +1032,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
820 /* Another hack: avoid icmp_send in ip_fragment */ 1032 /* Another hack: avoid icmp_send in ip_fragment */
821 skb->local_df = 1; 1033 skb->local_df = 1;
822 1034
823 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 1035 IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0);
824 1036
825 LeaveFunction(10); 1037 LeaveFunction(10);
826 return NF_STOLEN; 1038 return NF_STOLEN;
@@ -843,13 +1055,22 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
843 1055
844 EnterFunction(10); 1056 EnterFunction(10);
845 1057
846 rt = __ip_vs_get_out_rt_v6(cp); 1058 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
847 if (!rt) 1059 0, 1|2)))
848 goto tx_error_icmp; 1060 goto tx_error_icmp;
1061 if (__ip_vs_is_local_route6(rt)) {
1062 dst_release(&rt->dst);
1063 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1);
1064 }
849 1065
850 /* MTU checking */ 1066 /* MTU checking */
851 mtu = dst_mtu(&rt->dst); 1067 mtu = dst_mtu(&rt->dst);
852 if (skb->len > mtu) { 1068 if (skb->len > mtu) {
1069 if (!skb->dev) {
1070 struct net *net = dev_net(skb_dst(skb)->dev);
1071
1072 skb->dev = net->loopback_dev;
1073 }
853 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1074 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
854 dst_release(&rt->dst); 1075 dst_release(&rt->dst);
855 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1076 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
@@ -873,7 +1094,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
873 /* Another hack: avoid icmp_send in ip_fragment */ 1094 /* Another hack: avoid icmp_send in ip_fragment */
874 skb->local_df = 1; 1095 skb->local_df = 1;
875 1096
876 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 1097 IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0);
877 1098
878 LeaveFunction(10); 1099 LeaveFunction(10);
879 return NF_STOLEN; 1100 return NF_STOLEN;
@@ -899,6 +1120,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
899 struct rtable *rt; /* Route to the other host */ 1120 struct rtable *rt; /* Route to the other host */
900 int mtu; 1121 int mtu;
901 int rc; 1122 int rc;
1123 int local;
902 1124
903 EnterFunction(10); 1125 EnterFunction(10);
904 1126
@@ -919,16 +1141,43 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
919 * mangle and send the packet here (only for VS/NAT) 1141 * mangle and send the packet here (only for VS/NAT)
920 */ 1142 */
921 1143
922 if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos)))) 1144 if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
1145 RT_TOS(ip_hdr(skb)->tos), 1|2|4)))
923 goto tx_error_icmp; 1146 goto tx_error_icmp;
1147 local = rt->rt_flags & RTCF_LOCAL;
1148
1149 /*
1150 * Avoid duplicate tuple in reply direction for NAT traffic
1151 * to local address when connection is sync-ed
1152 */
1153#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1154 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1155 enum ip_conntrack_info ctinfo;
1156 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1157
1158 if (ct && !nf_ct_is_untracked(ct)) {
1159 IP_VS_DBG(10, "%s(): "
1160 "stopping DNAT to local address %pI4\n",
1161 __func__, &cp->daddr.ip);
1162 goto tx_error_put;
1163 }
1164 }
1165#endif
1166
1167 /* From world but DNAT to loopback address? */
1168 if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) {
1169 IP_VS_DBG(1, "%s(): "
1170 "stopping DNAT to loopback %pI4\n",
1171 __func__, &cp->daddr.ip);
1172 goto tx_error_put;
1173 }
924 1174
925 /* MTU checking */ 1175 /* MTU checking */
926 mtu = dst_mtu(&rt->dst); 1176 mtu = dst_mtu(&rt->dst);
927 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { 1177 if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
928 ip_rt_put(rt);
929 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); 1178 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
930 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1179 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
931 goto tx_error; 1180 goto tx_error_put;
932 } 1181 }
933 1182
934 /* copy-on-write the packet before mangling it */ 1183 /* copy-on-write the packet before mangling it */
@@ -938,16 +1187,27 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
938 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1187 if (skb_cow(skb, rt->dst.dev->hard_header_len))
939 goto tx_error_put; 1188 goto tx_error_put;
940 1189
941 /* drop the old route when skb is not shared */
942 skb_dst_drop(skb);
943 skb_dst_set(skb, &rt->dst);
944
945 ip_vs_nat_icmp(skb, pp, cp, 0); 1190 ip_vs_nat_icmp(skb, pp, cp, 0);
946 1191
1192 if (!local) {
1193 /* drop the old route when skb is not shared */
1194 skb_dst_drop(skb);
1195 skb_dst_set(skb, &rt->dst);
1196 } else {
1197 ip_rt_put(rt);
1198 /*
1199 * Some IPv4 replies get local address from routes,
1200 * not from iph, so while we DNAT after routing
1201 * we need this second input/output route.
1202 */
1203 if (!__ip_vs_reroute_locally(skb))
1204 goto tx_error;
1205 }
1206
947 /* Another hack: avoid icmp_send in ip_fragment */ 1207 /* Another hack: avoid icmp_send in ip_fragment */
948 skb->local_df = 1; 1208 skb->local_df = 1;
949 1209
950 IP_VS_XMIT(NFPROTO_IPV4, skb, rt); 1210 IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local);
951 1211
952 rc = NF_STOLEN; 1212 rc = NF_STOLEN;
953 goto out; 1213 goto out;
@@ -973,6 +1233,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
973 struct rt6_info *rt; /* Route to the other host */ 1233 struct rt6_info *rt; /* Route to the other host */
974 int mtu; 1234 int mtu;
975 int rc; 1235 int rc;
1236 int local;
976 1237
977 EnterFunction(10); 1238 EnterFunction(10);
978 1239
@@ -993,17 +1254,49 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
993 * mangle and send the packet here (only for VS/NAT) 1254 * mangle and send the packet here (only for VS/NAT)
994 */ 1255 */
995 1256
996 rt = __ip_vs_get_out_rt_v6(cp); 1257 if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL,
997 if (!rt) 1258 0, 1|2|4)))
998 goto tx_error_icmp; 1259 goto tx_error_icmp;
999 1260
1261 local = __ip_vs_is_local_route6(rt);
1262 /*
1263 * Avoid duplicate tuple in reply direction for NAT traffic
1264 * to local address when connection is sync-ed
1265 */
1266#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1267 if (cp->flags & IP_VS_CONN_F_SYNC && local) {
1268 enum ip_conntrack_info ctinfo;
1269 struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
1270
1271 if (ct && !nf_ct_is_untracked(ct)) {
1272 IP_VS_DBG(10, "%s(): "
1273 "stopping DNAT to local address %pI6\n",
1274 __func__, &cp->daddr.in6);
1275 goto tx_error_put;
1276 }
1277 }
1278#endif
1279
1280 /* From world but DNAT to loopback address? */
1281 if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
1282 ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) {
1283 IP_VS_DBG(1, "%s(): "
1284 "stopping DNAT to loopback %pI6\n",
1285 __func__, &cp->daddr.in6);
1286 goto tx_error_put;
1287 }
1288
1000 /* MTU checking */ 1289 /* MTU checking */
1001 mtu = dst_mtu(&rt->dst); 1290 mtu = dst_mtu(&rt->dst);
1002 if (skb->len > mtu) { 1291 if (skb->len > mtu) {
1003 dst_release(&rt->dst); 1292 if (!skb->dev) {
1293 struct net *net = dev_net(skb_dst(skb)->dev);
1294
1295 skb->dev = net->loopback_dev;
1296 }
1004 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); 1297 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
1005 IP_VS_DBG_RL("%s(): frag needed\n", __func__); 1298 IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1006 goto tx_error; 1299 goto tx_error_put;
1007 } 1300 }
1008 1301
1009 /* copy-on-write the packet before mangling it */ 1302 /* copy-on-write the packet before mangling it */
@@ -1013,16 +1306,21 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
1013 if (skb_cow(skb, rt->dst.dev->hard_header_len)) 1306 if (skb_cow(skb, rt->dst.dev->hard_header_len))
1014 goto tx_error_put; 1307 goto tx_error_put;
1015 1308
1016 /* drop the old route when skb is not shared */
1017 skb_dst_drop(skb);
1018 skb_dst_set(skb, &rt->dst);
1019
1020 ip_vs_nat_icmp_v6(skb, pp, cp, 0); 1309 ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1021 1310
1311 if (!local || !skb->dev) {
1312 /* drop the old route when skb is not shared */
1313 skb_dst_drop(skb);
1314 skb_dst_set(skb, &rt->dst);
1315 } else {
1316 /* destined to loopback, do we need to change route? */
1317 dst_release(&rt->dst);
1318 }
1319
1022 /* Another hack: avoid icmp_send in ip_fragment */ 1320 /* Another hack: avoid icmp_send in ip_fragment */
1023 skb->local_df = 1; 1321 skb->local_df = 1;
1024 1322
1025 IP_VS_XMIT(NFPROTO_IPV6, skb, rt); 1323 IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local);
1026 1324
1027 rc = NF_STOLEN; 1325 rc = NF_STOLEN;
1028 goto out; 1326 goto out;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index df3eedb142ff..1eacf8d9966a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -65,32 +65,42 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
65DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); 65DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
66EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); 66EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
67 67
68static int nf_conntrack_hash_rnd_initted; 68static unsigned int nf_conntrack_hash_rnd __read_mostly;
69static unsigned int nf_conntrack_hash_rnd;
70 69
71static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple, 70static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
72 u16 zone, unsigned int size, unsigned int rnd)
73{ 71{
74 unsigned int n; 72 unsigned int n;
75 u_int32_t h;
76 73
77 /* The direction must be ignored, so we hash everything up to the 74 /* The direction must be ignored, so we hash everything up to the
78 * destination ports (which is a multiple of 4) and treat the last 75 * destination ports (which is a multiple of 4) and treat the last
79 * three bytes manually. 76 * three bytes manually.
80 */ 77 */
81 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); 78 n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
82 h = jhash2((u32 *)tuple, n, 79 return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^
83 zone ^ rnd ^ (((__force __u16)tuple->dst.u.all << 16) | 80 (((__force __u16)tuple->dst.u.all << 16) |
84 tuple->dst.protonum)); 81 tuple->dst.protonum));
82}
83
84static u32 __hash_bucket(u32 hash, unsigned int size)
85{
86 return ((u64)hash * size) >> 32;
87}
88
89static u32 hash_bucket(u32 hash, const struct net *net)
90{
91 return __hash_bucket(hash, net->ct.htable_size);
92}
85 93
86 return ((u64)h * size) >> 32; 94static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
95 u16 zone, unsigned int size)
96{
97 return __hash_bucket(hash_conntrack_raw(tuple, zone), size);
87} 98}
88 99
89static inline u_int32_t hash_conntrack(const struct net *net, u16 zone, 100static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
90 const struct nf_conntrack_tuple *tuple) 101 const struct nf_conntrack_tuple *tuple)
91{ 102{
92 return __hash_conntrack(tuple, zone, net->ct.htable_size, 103 return __hash_conntrack(tuple, zone, net->ct.htable_size);
93 nf_conntrack_hash_rnd);
94} 104}
95 105
96bool 106bool
@@ -292,20 +302,20 @@ static void death_by_timeout(unsigned long ul_conntrack)
292 * OR 302 * OR
293 * - Caller must lock nf_conntrack_lock before calling this function 303 * - Caller must lock nf_conntrack_lock before calling this function
294 */ 304 */
295struct nf_conntrack_tuple_hash * 305static struct nf_conntrack_tuple_hash *
296__nf_conntrack_find(struct net *net, u16 zone, 306____nf_conntrack_find(struct net *net, u16 zone,
297 const struct nf_conntrack_tuple *tuple) 307 const struct nf_conntrack_tuple *tuple, u32 hash)
298{ 308{
299 struct nf_conntrack_tuple_hash *h; 309 struct nf_conntrack_tuple_hash *h;
300 struct hlist_nulls_node *n; 310 struct hlist_nulls_node *n;
301 unsigned int hash = hash_conntrack(net, zone, tuple); 311 unsigned int bucket = hash_bucket(hash, net);
302 312
303 /* Disable BHs the entire time since we normally need to disable them 313 /* Disable BHs the entire time since we normally need to disable them
304 * at least once for the stats anyway. 314 * at least once for the stats anyway.
305 */ 315 */
306 local_bh_disable(); 316 local_bh_disable();
307begin: 317begin:
308 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { 318 hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
309 if (nf_ct_tuple_equal(tuple, &h->tuple) && 319 if (nf_ct_tuple_equal(tuple, &h->tuple) &&
310 nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) { 320 nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
311 NF_CT_STAT_INC(net, found); 321 NF_CT_STAT_INC(net, found);
@@ -319,7 +329,7 @@ begin:
319 * not the expected one, we must restart lookup. 329 * not the expected one, we must restart lookup.
320 * We probably met an item that was moved to another chain. 330 * We probably met an item that was moved to another chain.
321 */ 331 */
322 if (get_nulls_value(n) != hash) { 332 if (get_nulls_value(n) != bucket) {
323 NF_CT_STAT_INC(net, search_restart); 333 NF_CT_STAT_INC(net, search_restart);
324 goto begin; 334 goto begin;
325 } 335 }
@@ -327,19 +337,27 @@ begin:
327 337
328 return NULL; 338 return NULL;
329} 339}
340
341struct nf_conntrack_tuple_hash *
342__nf_conntrack_find(struct net *net, u16 zone,
343 const struct nf_conntrack_tuple *tuple)
344{
345 return ____nf_conntrack_find(net, zone, tuple,
346 hash_conntrack_raw(tuple, zone));
347}
330EXPORT_SYMBOL_GPL(__nf_conntrack_find); 348EXPORT_SYMBOL_GPL(__nf_conntrack_find);
331 349
332/* Find a connection corresponding to a tuple. */ 350/* Find a connection corresponding to a tuple. */
333struct nf_conntrack_tuple_hash * 351static struct nf_conntrack_tuple_hash *
334nf_conntrack_find_get(struct net *net, u16 zone, 352__nf_conntrack_find_get(struct net *net, u16 zone,
335 const struct nf_conntrack_tuple *tuple) 353 const struct nf_conntrack_tuple *tuple, u32 hash)
336{ 354{
337 struct nf_conntrack_tuple_hash *h; 355 struct nf_conntrack_tuple_hash *h;
338 struct nf_conn *ct; 356 struct nf_conn *ct;
339 357
340 rcu_read_lock(); 358 rcu_read_lock();
341begin: 359begin:
342 h = __nf_conntrack_find(net, zone, tuple); 360 h = ____nf_conntrack_find(net, zone, tuple, hash);
343 if (h) { 361 if (h) {
344 ct = nf_ct_tuplehash_to_ctrack(h); 362 ct = nf_ct_tuplehash_to_ctrack(h);
345 if (unlikely(nf_ct_is_dying(ct) || 363 if (unlikely(nf_ct_is_dying(ct) ||
@@ -357,6 +375,14 @@ begin:
357 375
358 return h; 376 return h;
359} 377}
378
379struct nf_conntrack_tuple_hash *
380nf_conntrack_find_get(struct net *net, u16 zone,
381 const struct nf_conntrack_tuple *tuple)
382{
383 return __nf_conntrack_find_get(net, zone, tuple,
384 hash_conntrack_raw(tuple, zone));
385}
360EXPORT_SYMBOL_GPL(nf_conntrack_find_get); 386EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
361 387
362static void __nf_conntrack_hash_insert(struct nf_conn *ct, 388static void __nf_conntrack_hash_insert(struct nf_conn *ct,
@@ -409,8 +435,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
409 return NF_ACCEPT; 435 return NF_ACCEPT;
410 436
411 zone = nf_ct_zone(ct); 437 zone = nf_ct_zone(ct);
412 hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 438 /* reuse the hash saved before */
413 repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); 439 hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
440 hash = hash_bucket(hash, net);
441 repl_hash = hash_conntrack(net, zone,
442 &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
414 443
415 /* We're not in hash table, and we refuse to set up related 444 /* We're not in hash table, and we refuse to set up related
416 connections for unconfirmed conns. But packet copies and 445 connections for unconfirmed conns. But packet copies and
@@ -567,17 +596,29 @@ static noinline int early_drop(struct net *net, unsigned int hash)
567 return dropped; 596 return dropped;
568} 597}
569 598
570struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone, 599static struct nf_conn *
571 const struct nf_conntrack_tuple *orig, 600__nf_conntrack_alloc(struct net *net, u16 zone,
572 const struct nf_conntrack_tuple *repl, 601 const struct nf_conntrack_tuple *orig,
573 gfp_t gfp) 602 const struct nf_conntrack_tuple *repl,
603 gfp_t gfp, u32 hash)
574{ 604{
575 struct nf_conn *ct; 605 struct nf_conn *ct;
576 606
577 if (unlikely(!nf_conntrack_hash_rnd_initted)) { 607 if (unlikely(!nf_conntrack_hash_rnd)) {
578 get_random_bytes(&nf_conntrack_hash_rnd, 608 unsigned int rand;
579 sizeof(nf_conntrack_hash_rnd)); 609
580 nf_conntrack_hash_rnd_initted = 1; 610 /*
611 * Why not initialize nf_conntrack_rnd in a "init()" function ?
612 * Because there isn't enough entropy when system initializing,
613 * and we initialize it as late as possible.
614 */
615 do {
616 get_random_bytes(&rand, sizeof(rand));
617 } while (!rand);
618 cmpxchg(&nf_conntrack_hash_rnd, 0, rand);
619
620 /* recompute the hash as nf_conntrack_hash_rnd is initialized */
621 hash = hash_conntrack_raw(orig, zone);
581 } 622 }
582 623
583 /* We don't want any race condition at early drop stage */ 624 /* We don't want any race condition at early drop stage */
@@ -585,8 +626,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
585 626
586 if (nf_conntrack_max && 627 if (nf_conntrack_max &&
587 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { 628 unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
588 unsigned int hash = hash_conntrack(net, zone, orig); 629 if (!early_drop(net, hash_bucket(hash, net))) {
589 if (!early_drop(net, hash)) {
590 atomic_dec(&net->ct.count); 630 atomic_dec(&net->ct.count);
591 if (net_ratelimit()) 631 if (net_ratelimit())
592 printk(KERN_WARNING 632 printk(KERN_WARNING
@@ -616,7 +656,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
616 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; 656 ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
617 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL; 657 ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
618 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; 658 ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
619 ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL; 659 /* save hash for reusing when confirming */
660 *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
620 /* Don't set timer yet: wait for confirmation */ 661 /* Don't set timer yet: wait for confirmation */
621 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); 662 setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
622 write_pnet(&ct->ct_net, net); 663 write_pnet(&ct->ct_net, net);
@@ -643,6 +684,14 @@ out_free:
643 return ERR_PTR(-ENOMEM); 684 return ERR_PTR(-ENOMEM);
644#endif 685#endif
645} 686}
687
688struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
689 const struct nf_conntrack_tuple *orig,
690 const struct nf_conntrack_tuple *repl,
691 gfp_t gfp)
692{
693 return __nf_conntrack_alloc(net, zone, orig, repl, gfp, 0);
694}
646EXPORT_SYMBOL_GPL(nf_conntrack_alloc); 695EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
647 696
648void nf_conntrack_free(struct nf_conn *ct) 697void nf_conntrack_free(struct nf_conn *ct)
@@ -664,7 +713,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
664 struct nf_conntrack_l3proto *l3proto, 713 struct nf_conntrack_l3proto *l3proto,
665 struct nf_conntrack_l4proto *l4proto, 714 struct nf_conntrack_l4proto *l4proto,
666 struct sk_buff *skb, 715 struct sk_buff *skb,
667 unsigned int dataoff) 716 unsigned int dataoff, u32 hash)
668{ 717{
669 struct nf_conn *ct; 718 struct nf_conn *ct;
670 struct nf_conn_help *help; 719 struct nf_conn_help *help;
@@ -678,7 +727,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
678 return NULL; 727 return NULL;
679 } 728 }
680 729
681 ct = nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC); 730 ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
731 hash);
682 if (IS_ERR(ct)) { 732 if (IS_ERR(ct)) {
683 pr_debug("Can't allocate conntrack.\n"); 733 pr_debug("Can't allocate conntrack.\n");
684 return (struct nf_conntrack_tuple_hash *)ct; 734 return (struct nf_conntrack_tuple_hash *)ct;
@@ -755,6 +805,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
755 struct nf_conntrack_tuple_hash *h; 805 struct nf_conntrack_tuple_hash *h;
756 struct nf_conn *ct; 806 struct nf_conn *ct;
757 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; 807 u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
808 u32 hash;
758 809
759 if (!nf_ct_get_tuple(skb, skb_network_offset(skb), 810 if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
760 dataoff, l3num, protonum, &tuple, l3proto, 811 dataoff, l3num, protonum, &tuple, l3proto,
@@ -764,10 +815,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
764 } 815 }
765 816
766 /* look for tuple match */ 817 /* look for tuple match */
767 h = nf_conntrack_find_get(net, zone, &tuple); 818 hash = hash_conntrack_raw(&tuple, zone);
819 h = __nf_conntrack_find_get(net, zone, &tuple, hash);
768 if (!h) { 820 if (!h) {
769 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto, 821 h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
770 skb, dataoff); 822 skb, dataoff, hash);
771 if (!h) 823 if (!h)
772 return NULL; 824 return NULL;
773 if (IS_ERR(h)) 825 if (IS_ERR(h))
@@ -1307,8 +1359,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
1307 ct = nf_ct_tuplehash_to_ctrack(h); 1359 ct = nf_ct_tuplehash_to_ctrack(h);
1308 hlist_nulls_del_rcu(&h->hnnode); 1360 hlist_nulls_del_rcu(&h->hnnode);
1309 bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct), 1361 bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
1310 hashsize, 1362 hashsize);
1311 nf_conntrack_hash_rnd);
1312 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); 1363 hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
1313 } 1364 }
1314 } 1365 }
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index acb29ccaa41f..46e8966912b1 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -38,25 +38,30 @@ static int nf_ct_expect_hash_rnd_initted __read_mostly;
38 38
39static struct kmem_cache *nf_ct_expect_cachep __read_mostly; 39static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
40 40
41static HLIST_HEAD(nf_ct_userspace_expect_list);
42
41/* nf_conntrack_expect helper functions */ 43/* nf_conntrack_expect helper functions */
42void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) 44void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
45 u32 pid, int report)
43{ 46{
44 struct nf_conn_help *master_help = nfct_help(exp->master); 47 struct nf_conn_help *master_help = nfct_help(exp->master);
45 struct net *net = nf_ct_exp_net(exp); 48 struct net *net = nf_ct_exp_net(exp);
46 49
47 NF_CT_ASSERT(master_help);
48 NF_CT_ASSERT(!timer_pending(&exp->timeout)); 50 NF_CT_ASSERT(!timer_pending(&exp->timeout));
49 51
50 hlist_del_rcu(&exp->hnode); 52 hlist_del_rcu(&exp->hnode);
51 net->ct.expect_count--; 53 net->ct.expect_count--;
52 54
53 hlist_del(&exp->lnode); 55 hlist_del(&exp->lnode);
54 master_help->expecting[exp->class]--; 56 if (!(exp->flags & NF_CT_EXPECT_USERSPACE))
57 master_help->expecting[exp->class]--;
58
59 nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
55 nf_ct_expect_put(exp); 60 nf_ct_expect_put(exp);
56 61
57 NF_CT_STAT_INC(net, expect_delete); 62 NF_CT_STAT_INC(net, expect_delete);
58} 63}
59EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); 64EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
60 65
61static void nf_ct_expectation_timed_out(unsigned long ul_expect) 66static void nf_ct_expectation_timed_out(unsigned long ul_expect)
62{ 67{
@@ -320,16 +325,21 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
320 325
321 atomic_inc(&exp->use); 326 atomic_inc(&exp->use);
322 327
323 hlist_add_head(&exp->lnode, &master_help->expectations); 328 if (master_help) {
324 master_help->expecting[exp->class]++; 329 hlist_add_head(&exp->lnode, &master_help->expectations);
330 master_help->expecting[exp->class]++;
331 } else if (exp->flags & NF_CT_EXPECT_USERSPACE)
332 hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list);
325 333
326 hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); 334 hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
327 net->ct.expect_count++; 335 net->ct.expect_count++;
328 336
329 setup_timer(&exp->timeout, nf_ct_expectation_timed_out, 337 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
330 (unsigned long)exp); 338 (unsigned long)exp);
331 p = &master_help->helper->expect_policy[exp->class]; 339 if (master_help) {
332 exp->timeout.expires = jiffies + p->timeout * HZ; 340 p = &master_help->helper->expect_policy[exp->class];
341 exp->timeout.expires = jiffies + p->timeout * HZ;
342 }
333 add_timer(&exp->timeout); 343 add_timer(&exp->timeout);
334 344
335 atomic_inc(&exp->use); 345 atomic_inc(&exp->use);
@@ -380,7 +390,9 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
380 unsigned int h; 390 unsigned int h;
381 int ret = 1; 391 int ret = 1;
382 392
383 if (!master_help->helper) { 393 /* Don't allow expectations created from kernel-space with no helper */
394 if (!(expect->flags & NF_CT_EXPECT_USERSPACE) &&
395 (!master_help || (master_help && !master_help->helper))) {
384 ret = -ESHUTDOWN; 396 ret = -ESHUTDOWN;
385 goto out; 397 goto out;
386 } 398 }
@@ -398,13 +410,16 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
398 } 410 }
399 } 411 }
400 /* Will be over limit? */ 412 /* Will be over limit? */
401 p = &master_help->helper->expect_policy[expect->class]; 413 if (master_help) {
402 if (p->max_expected && 414 p = &master_help->helper->expect_policy[expect->class];
403 master_help->expecting[expect->class] >= p->max_expected) { 415 if (p->max_expected &&
404 evict_oldest_expect(master, expect); 416 master_help->expecting[expect->class] >= p->max_expected) {
405 if (master_help->expecting[expect->class] >= p->max_expected) { 417 evict_oldest_expect(master, expect);
406 ret = -EMFILE; 418 if (master_help->expecting[expect->class]
407 goto out; 419 >= p->max_expected) {
420 ret = -EMFILE;
421 goto out;
422 }
408 } 423 }
409 } 424 }
410 425
@@ -439,6 +454,21 @@ out:
439} 454}
440EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); 455EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
441 456
457void nf_ct_remove_userspace_expectations(void)
458{
459 struct nf_conntrack_expect *exp;
460 struct hlist_node *n, *next;
461
462 hlist_for_each_entry_safe(exp, n, next,
463 &nf_ct_userspace_expect_list, lnode) {
464 if (del_timer(&exp->timeout)) {
465 nf_ct_unlink_expect(exp);
466 nf_ct_expect_put(exp);
467 }
468 }
469}
470EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations);
471
442#ifdef CONFIG_PROC_FS 472#ifdef CONFIG_PROC_FS
443struct ct_expect_iter_state { 473struct ct_expect_iter_state {
444 struct seq_net_private p; 474 struct seq_net_private p;
@@ -529,8 +559,12 @@ static int exp_seq_show(struct seq_file *s, void *v)
529 seq_printf(s, "PERMANENT"); 559 seq_printf(s, "PERMANENT");
530 delim = ","; 560 delim = ",";
531 } 561 }
532 if (expect->flags & NF_CT_EXPECT_INACTIVE) 562 if (expect->flags & NF_CT_EXPECT_INACTIVE) {
533 seq_printf(s, "%sINACTIVE", delim); 563 seq_printf(s, "%sINACTIVE", delim);
564 delim = ",";
565 }
566 if (expect->flags & NF_CT_EXPECT_USERSPACE)
567 seq_printf(s, "%sUSERSPACE", delim);
534 568
535 helper = rcu_dereference(nfct_help(expect->master)->helper); 569 helper = rcu_dereference(nfct_help(expect->master)->helper);
536 if (helper) { 570 if (helper) {
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 146476c6441a..b729ace1dcc1 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1588,8 +1588,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
1588 const struct nf_conntrack_expect *exp) 1588 const struct nf_conntrack_expect *exp)
1589{ 1589{
1590 struct nf_conn *master = exp->master; 1590 struct nf_conn *master = exp->master;
1591 struct nf_conntrack_helper *helper;
1592 long timeout = (exp->timeout.expires - jiffies) / HZ; 1591 long timeout = (exp->timeout.expires - jiffies) / HZ;
1592 struct nf_conn_help *help;
1593 1593
1594 if (timeout < 0) 1594 if (timeout < 0)
1595 timeout = 0; 1595 timeout = 0;
@@ -1605,9 +1605,15 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
1605 1605
1606 NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)); 1606 NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout));
1607 NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)); 1607 NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp));
1608 helper = rcu_dereference(nfct_help(master)->helper); 1608 NLA_PUT_BE32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags));
1609 if (helper) 1609 help = nfct_help(master);
1610 NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name); 1610 if (help) {
1611 struct nf_conntrack_helper *helper;
1612
1613 helper = rcu_dereference(help->helper);
1614 if (helper)
1615 NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name);
1616 }
1611 1617
1612 return 0; 1618 return 0;
1613 1619
@@ -1654,17 +1660,20 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
1654 struct nlmsghdr *nlh; 1660 struct nlmsghdr *nlh;
1655 struct nfgenmsg *nfmsg; 1661 struct nfgenmsg *nfmsg;
1656 struct sk_buff *skb; 1662 struct sk_buff *skb;
1657 unsigned int type; 1663 unsigned int type, group;
1658 int flags = 0; 1664 int flags = 0;
1659 1665
1660 if (events & (1 << IPEXP_NEW)) { 1666 if (events & (1 << IPEXP_DESTROY)) {
1667 type = IPCTNL_MSG_EXP_DELETE;
1668 group = NFNLGRP_CONNTRACK_EXP_DESTROY;
1669 } else if (events & (1 << IPEXP_NEW)) {
1661 type = IPCTNL_MSG_EXP_NEW; 1670 type = IPCTNL_MSG_EXP_NEW;
1662 flags = NLM_F_CREATE|NLM_F_EXCL; 1671 flags = NLM_F_CREATE|NLM_F_EXCL;
1672 group = NFNLGRP_CONNTRACK_EXP_NEW;
1663 } else 1673 } else
1664 return 0; 1674 return 0;
1665 1675
1666 if (!item->report && 1676 if (!item->report && !nfnetlink_has_listeners(net, group))
1667 !nfnetlink_has_listeners(net, NFNLGRP_CONNTRACK_EXP_NEW))
1668 return 0; 1677 return 0;
1669 1678
1670 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); 1679 skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
@@ -1687,8 +1696,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
1687 rcu_read_unlock(); 1696 rcu_read_unlock();
1688 1697
1689 nlmsg_end(skb, nlh); 1698 nlmsg_end(skb, nlh);
1690 nfnetlink_send(skb, net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, 1699 nfnetlink_send(skb, net, item->pid, group, item->report, GFP_ATOMIC);
1691 item->report, GFP_ATOMIC);
1692 return 0; 1700 return 0;
1693 1701
1694nla_put_failure: 1702nla_put_failure:
@@ -1761,6 +1769,8 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = {
1761 [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 }, 1769 [CTA_EXPECT_TIMEOUT] = { .type = NLA_U32 },
1762 [CTA_EXPECT_ID] = { .type = NLA_U32 }, 1770 [CTA_EXPECT_ID] = { .type = NLA_U32 },
1763 [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING }, 1771 [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING },
1772 [CTA_EXPECT_ZONE] = { .type = NLA_U16 },
1773 [CTA_EXPECT_FLAGS] = { .type = NLA_U32 },
1764}; 1774};
1765 1775
1766static int 1776static int
@@ -1869,7 +1879,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1869 } 1879 }
1870 1880
1871 /* after list removal, usage count == 1 */ 1881 /* after list removal, usage count == 1 */
1872 nf_ct_unexpect_related(exp); 1882 spin_lock_bh(&nf_conntrack_lock);
1883 if (del_timer(&exp->timeout)) {
1884 nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).pid,
1885 nlmsg_report(nlh));
1886 nf_ct_expect_put(exp);
1887 }
1888 spin_unlock_bh(&nf_conntrack_lock);
1873 /* have to put what we 'get' above. 1889 /* have to put what we 'get' above.
1874 * after this line usage count == 0 */ 1890 * after this line usage count == 0 */
1875 nf_ct_expect_put(exp); 1891 nf_ct_expect_put(exp);
@@ -1886,7 +1902,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1886 m_help = nfct_help(exp->master); 1902 m_help = nfct_help(exp->master);
1887 if (!strcmp(m_help->helper->name, name) && 1903 if (!strcmp(m_help->helper->name, name) &&
1888 del_timer(&exp->timeout)) { 1904 del_timer(&exp->timeout)) {
1889 nf_ct_unlink_expect(exp); 1905 nf_ct_unlink_expect_report(exp,
1906 NETLINK_CB(skb).pid,
1907 nlmsg_report(nlh));
1890 nf_ct_expect_put(exp); 1908 nf_ct_expect_put(exp);
1891 } 1909 }
1892 } 1910 }
@@ -1900,7 +1918,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
1900 &net->ct.expect_hash[i], 1918 &net->ct.expect_hash[i],
1901 hnode) { 1919 hnode) {
1902 if (del_timer(&exp->timeout)) { 1920 if (del_timer(&exp->timeout)) {
1903 nf_ct_unlink_expect(exp); 1921 nf_ct_unlink_expect_report(exp,
1922 NETLINK_CB(skb).pid,
1923 nlmsg_report(nlh));
1904 nf_ct_expect_put(exp); 1924 nf_ct_expect_put(exp);
1905 } 1925 }
1906 } 1926 }
@@ -1946,23 +1966,35 @@ ctnetlink_create_expect(struct net *net, u16 zone,
1946 if (!h) 1966 if (!h)
1947 return -ENOENT; 1967 return -ENOENT;
1948 ct = nf_ct_tuplehash_to_ctrack(h); 1968 ct = nf_ct_tuplehash_to_ctrack(h);
1949 help = nfct_help(ct);
1950
1951 if (!help || !help->helper) {
1952 /* such conntrack hasn't got any helper, abort */
1953 err = -EOPNOTSUPP;
1954 goto out;
1955 }
1956
1957 exp = nf_ct_expect_alloc(ct); 1969 exp = nf_ct_expect_alloc(ct);
1958 if (!exp) { 1970 if (!exp) {
1959 err = -ENOMEM; 1971 err = -ENOMEM;
1960 goto out; 1972 goto out;
1961 } 1973 }
1974 help = nfct_help(ct);
1975 if (!help) {
1976 if (!cda[CTA_EXPECT_TIMEOUT]) {
1977 err = -EINVAL;
1978 goto out;
1979 }
1980 exp->timeout.expires =
1981 jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ;
1982
1983 exp->flags = NF_CT_EXPECT_USERSPACE;
1984 if (cda[CTA_EXPECT_FLAGS]) {
1985 exp->flags |=
1986 ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
1987 }
1988 } else {
1989 if (cda[CTA_EXPECT_FLAGS]) {
1990 exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS]));
1991 exp->flags &= ~NF_CT_EXPECT_USERSPACE;
1992 } else
1993 exp->flags = 0;
1994 }
1962 1995
1963 exp->class = 0; 1996 exp->class = 0;
1964 exp->expectfn = NULL; 1997 exp->expectfn = NULL;
1965 exp->flags = 0;
1966 exp->master = ct; 1998 exp->master = ct;
1967 exp->helper = NULL; 1999 exp->helper = NULL;
1968 memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); 2000 memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
@@ -2130,6 +2162,7 @@ static void __exit ctnetlink_exit(void)
2130{ 2162{
2131 pr_info("ctnetlink: unregistering from nfnetlink.\n"); 2163 pr_info("ctnetlink: unregistering from nfnetlink.\n");
2132 2164
2165 nf_ct_remove_userspace_expectations();
2133#ifdef CONFIG_NF_CONNTRACK_EVENTS 2166#ifdef CONFIG_NF_CONNTRACK_EVENTS
2134 nf_ct_expect_unregister_notifier(&ctnl_notifier_exp); 2167 nf_ct_expect_unregister_notifier(&ctnl_notifier_exp);
2135 nf_conntrack_unregister_notifier(&ctnl_notifier); 2168 nf_conntrack_unregister_notifier(&ctnl_notifier);
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index f64de9544866..bcf47eb518ef 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -130,6 +130,44 @@ static int digits_len(const struct nf_conn *ct, const char *dptr,
130 return len; 130 return len;
131} 131}
132 132
133static int iswordc(const char c)
134{
135 if (isalnum(c) || c == '!' || c == '"' || c == '%' ||
136 (c >= '(' && c <= '/') || c == ':' || c == '<' || c == '>' ||
137 c == '?' || (c >= '[' && c <= ']') || c == '_' || c == '`' ||
138 c == '{' || c == '}' || c == '~')
139 return 1;
140 return 0;
141}
142
143static int word_len(const char *dptr, const char *limit)
144{
145 int len = 0;
146 while (dptr < limit && iswordc(*dptr)) {
147 dptr++;
148 len++;
149 }
150 return len;
151}
152
153static int callid_len(const struct nf_conn *ct, const char *dptr,
154 const char *limit, int *shift)
155{
156 int len, domain_len;
157
158 len = word_len(dptr, limit);
159 dptr += len;
160 if (!len || dptr == limit || *dptr != '@')
161 return len;
162 dptr++;
163 len++;
164
165 domain_len = word_len(dptr, limit);
166 if (!domain_len)
167 return 0;
168 return len + domain_len;
169}
170
133/* get media type + port length */ 171/* get media type + port length */
134static int media_len(const struct nf_conn *ct, const char *dptr, 172static int media_len(const struct nf_conn *ct, const char *dptr,
135 const char *limit, int *shift) 173 const char *limit, int *shift)
@@ -152,6 +190,9 @@ static int parse_addr(const struct nf_conn *ct, const char *cp,
152 const char *end; 190 const char *end;
153 int ret = 0; 191 int ret = 0;
154 192
193 if (!ct)
194 return 0;
195
155 memset(addr, 0, sizeof(*addr)); 196 memset(addr, 0, sizeof(*addr));
156 switch (nf_ct_l3num(ct)) { 197 switch (nf_ct_l3num(ct)) {
157 case AF_INET: 198 case AF_INET:
@@ -296,6 +337,7 @@ static const struct sip_header ct_sip_hdrs[] = {
296 [SIP_HDR_VIA_TCP] = SIP_HDR("Via", "v", "TCP ", epaddr_len), 337 [SIP_HDR_VIA_TCP] = SIP_HDR("Via", "v", "TCP ", epaddr_len),
297 [SIP_HDR_EXPIRES] = SIP_HDR("Expires", NULL, NULL, digits_len), 338 [SIP_HDR_EXPIRES] = SIP_HDR("Expires", NULL, NULL, digits_len),
298 [SIP_HDR_CONTENT_LENGTH] = SIP_HDR("Content-Length", "l", NULL, digits_len), 339 [SIP_HDR_CONTENT_LENGTH] = SIP_HDR("Content-Length", "l", NULL, digits_len),
340 [SIP_HDR_CALL_ID] = SIP_HDR("Call-Id", "i", NULL, callid_len),
299}; 341};
300 342
301static const char *sip_follow_continuation(const char *dptr, const char *limit) 343static const char *sip_follow_continuation(const char *dptr, const char *limit)
diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c
index daab8c4a903c..4d87befb04c0 100644
--- a/net/netfilter/nf_tproxy_core.c
+++ b/net/netfilter/nf_tproxy_core.c
@@ -18,41 +18,6 @@
18#include <net/udp.h> 18#include <net/udp.h>
19#include <net/netfilter/nf_tproxy_core.h> 19#include <net/netfilter/nf_tproxy_core.h>
20 20
21struct sock *
22nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
23 const __be32 saddr, const __be32 daddr,
24 const __be16 sport, const __be16 dport,
25 const struct net_device *in, bool listening_only)
26{
27 struct sock *sk;
28
29 /* look up socket */
30 switch (protocol) {
31 case IPPROTO_TCP:
32 if (listening_only)
33 sk = __inet_lookup_listener(net, &tcp_hashinfo,
34 daddr, ntohs(dport),
35 in->ifindex);
36 else
37 sk = __inet_lookup(net, &tcp_hashinfo,
38 saddr, sport, daddr, dport,
39 in->ifindex);
40 break;
41 case IPPROTO_UDP:
42 sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
43 in->ifindex);
44 break;
45 default:
46 WARN_ON(1);
47 sk = NULL;
48 }
49
50 pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n",
51 protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk);
52
53 return sk;
54}
55EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4);
56 21
57static void 22static void
58nf_tproxy_destructor(struct sk_buff *skb) 23nf_tproxy_destructor(struct sk_buff *skb)
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index e34622fa0003..80463507420e 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -116,10 +116,8 @@ EXPORT_SYMBOL(xt_register_targets);
116void 116void
117xt_unregister_targets(struct xt_target *target, unsigned int n) 117xt_unregister_targets(struct xt_target *target, unsigned int n)
118{ 118{
119 unsigned int i; 119 while (n-- > 0)
120 120 xt_unregister_target(&target[n]);
121 for (i = 0; i < n; i++)
122 xt_unregister_target(&target[i]);
123} 121}
124EXPORT_SYMBOL(xt_unregister_targets); 122EXPORT_SYMBOL(xt_unregister_targets);
125 123
@@ -174,10 +172,8 @@ EXPORT_SYMBOL(xt_register_matches);
174void 172void
175xt_unregister_matches(struct xt_match *match, unsigned int n) 173xt_unregister_matches(struct xt_match *match, unsigned int n)
176{ 174{
177 unsigned int i; 175 while (n-- > 0)
178 176 xt_unregister_match(&match[n]);
179 for (i = 0; i < n; i++)
180 xt_unregister_match(&match[i]);
181} 177}
182EXPORT_SYMBOL(xt_unregister_matches); 178EXPORT_SYMBOL(xt_unregister_matches);
183 179
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index c61294d85fda..19c482caf30b 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Transparent proxy support for Linux/iptables 2 * Transparent proxy support for Linux/iptables
3 * 3 *
4 * Copyright (c) 2006-2007 BalaBit IT Ltd. 4 * Copyright (c) 2006-2010 BalaBit IT Ltd.
5 * Author: Balazs Scheidler, Krisztian Kovacs 5 * Author: Balazs Scheidler, Krisztian Kovacs
6 * 6 *
7 * This program is free software; you can redistribute it and/or modify 7 * This program is free software; you can redistribute it and/or modify
@@ -16,19 +16,96 @@
16#include <net/checksum.h> 16#include <net/checksum.h>
17#include <net/udp.h> 17#include <net/udp.h>
18#include <net/inet_sock.h> 18#include <net/inet_sock.h>
19 19#include <linux/inetdevice.h>
20#include <linux/netfilter/x_tables.h> 20#include <linux/netfilter/x_tables.h>
21#include <linux/netfilter_ipv4/ip_tables.h> 21#include <linux/netfilter_ipv4/ip_tables.h>
22#include <linux/netfilter/xt_TPROXY.h>
23 22
24#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 23#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
24#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
25#include <net/if_inet6.h>
26#include <net/addrconf.h>
27#include <linux/netfilter_ipv6/ip6_tables.h>
28#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
29#endif
30
25#include <net/netfilter/nf_tproxy_core.h> 31#include <net/netfilter/nf_tproxy_core.h>
32#include <linux/netfilter/xt_TPROXY.h>
33
34static inline __be32
35tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
36{
37 struct in_device *indev;
38 __be32 laddr;
39
40 if (user_laddr)
41 return user_laddr;
42
43 laddr = 0;
44 rcu_read_lock();
45 indev = __in_dev_get_rcu(skb->dev);
46 for_primary_ifa(indev) {
47 laddr = ifa->ifa_local;
48 break;
49 } endfor_ifa(indev);
50 rcu_read_unlock();
51
52 return laddr ? laddr : daddr;
53}
54
55/**
56 * tproxy_handle_time_wait4() - handle IPv4 TCP TIME_WAIT reopen redirections
57 * @skb: The skb being processed.
58 * @laddr: IPv4 address to redirect to or zero.
59 * @lport: TCP port to redirect to or zero.
60 * @sk: The TIME_WAIT TCP socket found by the lookup.
61 *
62 * We have to handle SYN packets arriving to TIME_WAIT sockets
63 * differently: instead of reopening the connection we should rather
64 * redirect the new connection to the proxy if there's a listener
65 * socket present.
66 *
67 * tproxy_handle_time_wait4() consumes the socket reference passed in.
68 *
69 * Returns the listener socket if there's one, the TIME_WAIT socket if
70 * no such listener is found, or NULL if the TCP header is incomplete.
71 */
72static struct sock *
73tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
74 struct sock *sk)
75{
76 const struct iphdr *iph = ip_hdr(skb);
77 struct tcphdr _hdr, *hp;
78
79 hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
80 if (hp == NULL) {
81 inet_twsk_put(inet_twsk(sk));
82 return NULL;
83 }
84
85 if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
86 /* SYN to a TIME_WAIT socket, we'd rather redirect it
87 * to a listener socket if there's one */
88 struct sock *sk2;
89
90 sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
91 iph->saddr, laddr ? laddr : iph->daddr,
92 hp->source, lport ? lport : hp->dest,
93 skb->dev, NFT_LOOKUP_LISTENER);
94 if (sk2) {
95 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
96 inet_twsk_put(inet_twsk(sk));
97 sk = sk2;
98 }
99 }
100
101 return sk;
102}
26 103
27static unsigned int 104static unsigned int
28tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par) 105tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
106 u_int32_t mark_mask, u_int32_t mark_value)
29{ 107{
30 const struct iphdr *iph = ip_hdr(skb); 108 const struct iphdr *iph = ip_hdr(skb);
31 const struct xt_tproxy_target_info *tgi = par->targinfo;
32 struct udphdr _hdr, *hp; 109 struct udphdr _hdr, *hp;
33 struct sock *sk; 110 struct sock *sk;
34 111
@@ -36,12 +113,195 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
36 if (hp == NULL) 113 if (hp == NULL)
37 return NF_DROP; 114 return NF_DROP;
38 115
116 /* check if there's an ongoing connection on the packet
117 * addresses, this happens if the redirect already happened
118 * and the current packet belongs to an already established
119 * connection */
39 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, 120 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
40 iph->saddr, 121 iph->saddr, iph->daddr,
41 tgi->laddr ? tgi->laddr : iph->daddr, 122 hp->source, hp->dest,
42 hp->source, 123 skb->dev, NFT_LOOKUP_ESTABLISHED);
43 tgi->lport ? tgi->lport : hp->dest, 124
44 par->in, true); 125 laddr = tproxy_laddr4(skb, laddr, iph->daddr);
126 if (!lport)
127 lport = hp->dest;
128
129 /* UDP has no TCP_TIME_WAIT state, so we never enter here */
130 if (sk && sk->sk_state == TCP_TIME_WAIT)
131 /* reopening a TIME_WAIT connection needs special handling */
132 sk = tproxy_handle_time_wait4(skb, laddr, lport, sk);
133 else if (!sk)
134 /* no, there's no established connection, check if
135 * there's a listener on the redirected addr/port */
136 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
137 iph->saddr, laddr,
138 hp->source, lport,
139 skb->dev, NFT_LOOKUP_LISTENER);
140
141 /* NOTE: assign_sock consumes our sk reference */
142 if (sk && nf_tproxy_assign_sock(skb, sk)) {
143 /* This should be in a separate target, but we don't do multiple
144 targets on the same rule yet */
145 skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
146
147 pr_debug("redirecting: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
148 iph->protocol, &iph->daddr, ntohs(hp->dest),
149 &laddr, ntohs(lport), skb->mark);
150 return NF_ACCEPT;
151 }
152
153 pr_debug("no socket, dropping: proto %hhu %pI4:%hu -> %pI4:%hu, mark: %x\n",
154 iph->protocol, &iph->saddr, ntohs(hp->source),
155 &iph->daddr, ntohs(hp->dest), skb->mark);
156 return NF_DROP;
157}
158
159static unsigned int
160tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
161{
162 const struct xt_tproxy_target_info *tgi = par->targinfo;
163
164 return tproxy_tg4(skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
165}
166
167static unsigned int
168tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
169{
170 const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
171
172 return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
173}
174
175#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
176
177static inline const struct in6_addr *
178tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
179 const struct in6_addr *daddr)
180{
181 struct inet6_dev *indev;
182 struct inet6_ifaddr *ifa;
183 struct in6_addr *laddr;
184
185 if (!ipv6_addr_any(user_laddr))
186 return user_laddr;
187 laddr = NULL;
188
189 rcu_read_lock();
190 indev = __in6_dev_get(skb->dev);
191 if (indev)
192 list_for_each_entry(ifa, &indev->addr_list, if_list) {
193 if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
194 continue;
195
196 laddr = &ifa->addr;
197 break;
198 }
199 rcu_read_unlock();
200
201 return laddr ? laddr : daddr;
202}
203
204/**
205 * tproxy_handle_time_wait6() - handle IPv6 TCP TIME_WAIT reopen redirections
206 * @skb: The skb being processed.
207 * @tproto: Transport protocol.
208 * @thoff: Transport protocol header offset.
209 * @par: Iptables target parameters.
210 * @sk: The TIME_WAIT TCP socket found by the lookup.
211 *
212 * We have to handle SYN packets arriving to TIME_WAIT sockets
213 * differently: instead of reopening the connection we should rather
214 * redirect the new connection to the proxy if there's a listener
215 * socket present.
216 *
217 * tproxy_handle_time_wait6() consumes the socket reference passed in.
218 *
219 * Returns the listener socket if there's one, the TIME_WAIT socket if
220 * no such listener is found, or NULL if the TCP header is incomplete.
221 */
222static struct sock *
223tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
224 const struct xt_action_param *par,
225 struct sock *sk)
226{
227 const struct ipv6hdr *iph = ipv6_hdr(skb);
228 struct tcphdr _hdr, *hp;
229 const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
230
231 hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
232 if (hp == NULL) {
233 inet_twsk_put(inet_twsk(sk));
234 return NULL;
235 }
236
237 if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
238 /* SYN to a TIME_WAIT socket, we'd rather redirect it
239 * to a listener socket if there's one */
240 struct sock *sk2;
241
242 sk2 = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
243 &iph->saddr,
244 tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
245 hp->source,
246 tgi->lport ? tgi->lport : hp->dest,
247 skb->dev, NFT_LOOKUP_LISTENER);
248 if (sk2) {
249 inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
250 inet_twsk_put(inet_twsk(sk));
251 sk = sk2;
252 }
253 }
254
255 return sk;
256}
257
258static unsigned int
259tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
260{
261 const struct ipv6hdr *iph = ipv6_hdr(skb);
262 const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
263 struct udphdr _hdr, *hp;
264 struct sock *sk;
265 const struct in6_addr *laddr;
266 __be16 lport;
267 int thoff;
268 int tproto;
269
270 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
271 if (tproto < 0) {
272 pr_debug("unable to find transport header in IPv6 packet, dropping\n");
273 return NF_DROP;
274 }
275
276 hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
277 if (hp == NULL) {
278 pr_debug("unable to grab transport header contents in IPv6 packet, dropping\n");
279 return NF_DROP;
280 }
281
282 /* check if there's an ongoing connection on the packet
283 * addresses, this happens if the redirect already happened
284 * and the current packet belongs to an already established
285 * connection */
286 sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
287 &iph->saddr, &iph->daddr,
288 hp->source, hp->dest,
289 par->in, NFT_LOOKUP_ESTABLISHED);
290
291 laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
292 lport = tgi->lport ? tgi->lport : hp->dest;
293
294 /* UDP has no TCP_TIME_WAIT state, so we never enter here */
295 if (sk && sk->sk_state == TCP_TIME_WAIT)
296 /* reopening a TIME_WAIT connection needs special handling */
297 sk = tproxy_handle_time_wait6(skb, tproto, thoff, par, sk);
298 else if (!sk)
299 /* no there's no established connection, check if
300 * there's a listener on the redirected addr/port */
301 sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
302 &iph->saddr, laddr,
303 hp->source, lport,
304 par->in, NFT_LOOKUP_LISTENER);
45 305
46 /* NOTE: assign_sock consumes our sk reference */ 306 /* NOTE: assign_sock consumes our sk reference */
47 if (sk && nf_tproxy_assign_sock(skb, sk)) { 307 if (sk && nf_tproxy_assign_sock(skb, sk)) {
@@ -49,19 +309,34 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
49 targets on the same rule yet */ 309 targets on the same rule yet */
50 skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; 310 skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
51 311
52 pr_debug("redirecting: proto %u %08x:%u -> %08x:%u, mark: %x\n", 312 pr_debug("redirecting: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
53 iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), 313 tproto, &iph->saddr, ntohs(hp->source),
54 ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); 314 laddr, ntohs(lport), skb->mark);
55 return NF_ACCEPT; 315 return NF_ACCEPT;
56 } 316 }
57 317
58 pr_debug("no socket, dropping: proto %u %08x:%u -> %08x:%u, mark: %x\n", 318 pr_debug("no socket, dropping: proto %hhu %pI6:%hu -> %pI6:%hu, mark: %x\n",
59 iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), 319 tproto, &iph->saddr, ntohs(hp->source),
60 ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); 320 &iph->daddr, ntohs(hp->dest), skb->mark);
321
61 return NF_DROP; 322 return NF_DROP;
62} 323}
63 324
64static int tproxy_tg_check(const struct xt_tgchk_param *par) 325static int tproxy_tg6_check(const struct xt_tgchk_param *par)
326{
327 const struct ip6t_ip6 *i = par->entryinfo;
328
329 if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
330 && !(i->flags & IP6T_INV_PROTO))
331 return 0;
332
333 pr_info("Can be used only in combination with "
334 "either -p tcp or -p udp\n");
335 return -EINVAL;
336}
337#endif
338
339static int tproxy_tg4_check(const struct xt_tgchk_param *par)
65{ 340{
66 const struct ipt_ip *i = par->entryinfo; 341 const struct ipt_ip *i = par->entryinfo;
67 342
@@ -74,31 +349,64 @@ static int tproxy_tg_check(const struct xt_tgchk_param *par)
74 return -EINVAL; 349 return -EINVAL;
75} 350}
76 351
77static struct xt_target tproxy_tg_reg __read_mostly = { 352static struct xt_target tproxy_tg_reg[] __read_mostly = {
78 .name = "TPROXY", 353 {
79 .family = AF_INET, 354 .name = "TPROXY",
80 .table = "mangle", 355 .family = NFPROTO_IPV4,
81 .target = tproxy_tg, 356 .table = "mangle",
82 .targetsize = sizeof(struct xt_tproxy_target_info), 357 .target = tproxy_tg4_v0,
83 .checkentry = tproxy_tg_check, 358 .revision = 0,
84 .hooks = 1 << NF_INET_PRE_ROUTING, 359 .targetsize = sizeof(struct xt_tproxy_target_info),
85 .me = THIS_MODULE, 360 .checkentry = tproxy_tg4_check,
361 .hooks = 1 << NF_INET_PRE_ROUTING,
362 .me = THIS_MODULE,
363 },
364 {
365 .name = "TPROXY",
366 .family = NFPROTO_IPV4,
367 .table = "mangle",
368 .target = tproxy_tg4_v1,
369 .revision = 1,
370 .targetsize = sizeof(struct xt_tproxy_target_info_v1),
371 .checkentry = tproxy_tg4_check,
372 .hooks = 1 << NF_INET_PRE_ROUTING,
373 .me = THIS_MODULE,
374 },
375#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
376 {
377 .name = "TPROXY",
378 .family = NFPROTO_IPV6,
379 .table = "mangle",
380 .target = tproxy_tg6_v1,
381 .revision = 1,
382 .targetsize = sizeof(struct xt_tproxy_target_info_v1),
383 .checkentry = tproxy_tg6_check,
384 .hooks = 1 << NF_INET_PRE_ROUTING,
385 .me = THIS_MODULE,
386 },
387#endif
388
86}; 389};
87 390
88static int __init tproxy_tg_init(void) 391static int __init tproxy_tg_init(void)
89{ 392{
90 nf_defrag_ipv4_enable(); 393 nf_defrag_ipv4_enable();
91 return xt_register_target(&tproxy_tg_reg); 394#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
395 nf_defrag_ipv6_enable();
396#endif
397
398 return xt_register_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
92} 399}
93 400
94static void __exit tproxy_tg_exit(void) 401static void __exit tproxy_tg_exit(void)
95{ 402{
96 xt_unregister_target(&tproxy_tg_reg); 403 xt_unregister_targets(tproxy_tg_reg, ARRAY_SIZE(tproxy_tg_reg));
97} 404}
98 405
99module_init(tproxy_tg_init); 406module_init(tproxy_tg_init);
100module_exit(tproxy_tg_exit); 407module_exit(tproxy_tg_exit);
101MODULE_LICENSE("GPL"); 408MODULE_LICENSE("GPL");
102MODULE_AUTHOR("Krisztian Kovacs"); 409MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
103MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module."); 410MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module.");
104MODULE_ALIAS("ipt_TPROXY"); 411MODULE_ALIAS("ipt_TPROXY");
412MODULE_ALIAS("ip6t_TPROXY");
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index b46a8390896d..9228ee0dc11a 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -448,6 +448,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
448{ 448{
449 __be16 _ports[2], *ports; 449 __be16 _ports[2], *ports;
450 u8 nexthdr; 450 u8 nexthdr;
451 int poff;
451 452
452 memset(dst, 0, sizeof(*dst)); 453 memset(dst, 0, sizeof(*dst));
453 454
@@ -492,19 +493,13 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
492 return 0; 493 return 0;
493 } 494 }
494 495
495 switch (nexthdr) { 496 poff = proto_ports_offset(nexthdr);
496 case IPPROTO_TCP: 497 if (poff >= 0) {
497 case IPPROTO_UDP: 498 ports = skb_header_pointer(skb, protoff + poff, sizeof(_ports),
498 case IPPROTO_UDPLITE:
499 case IPPROTO_SCTP:
500 case IPPROTO_DCCP:
501 ports = skb_header_pointer(skb, protoff, sizeof(_ports),
502 &_ports); 499 &_ports);
503 break; 500 } else {
504 default:
505 _ports[0] = _ports[1] = 0; 501 _ports[0] = _ports[1] = 0;
506 ports = _ports; 502 ports = _ports;
507 break;
508 } 503 }
509 if (!ports) 504 if (!ports)
510 return -1; 505 return -1;
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 7a4d66db95ae..9127a3d8aa35 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -16,7 +16,6 @@
16#include <linux/ip_vs.h> 16#include <linux/ip_vs.h>
17#include <linux/types.h> 17#include <linux/types.h>
18#include <linux/netfilter/x_tables.h> 18#include <linux/netfilter/x_tables.h>
19#include <linux/netfilter/x_tables.h>
20#include <linux/netfilter/xt_ipvs.h> 19#include <linux/netfilter/xt_ipvs.h>
21#include <net/netfilter/nf_conntrack.h> 20#include <net/netfilter/nf_conntrack.h>
22 21
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 1ca89908cbad..2dbd4c857735 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -14,6 +14,7 @@
14#include <linux/skbuff.h> 14#include <linux/skbuff.h>
15#include <linux/netfilter/x_tables.h> 15#include <linux/netfilter/x_tables.h>
16#include <linux/netfilter_ipv4/ip_tables.h> 16#include <linux/netfilter_ipv4/ip_tables.h>
17#include <linux/netfilter_ipv6/ip6_tables.h>
17#include <net/tcp.h> 18#include <net/tcp.h>
18#include <net/udp.h> 19#include <net/udp.h>
19#include <net/icmp.h> 20#include <net/icmp.h>
@@ -21,6 +22,7 @@
21#include <net/inet_sock.h> 22#include <net/inet_sock.h>
22#include <net/netfilter/nf_tproxy_core.h> 23#include <net/netfilter/nf_tproxy_core.h>
23#include <net/netfilter/ipv4/nf_defrag_ipv4.h> 24#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
25#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
24 26
25#include <linux/netfilter/xt_socket.h> 27#include <linux/netfilter/xt_socket.h>
26 28
@@ -30,7 +32,7 @@
30#endif 32#endif
31 33
32static int 34static int
33extract_icmp_fields(const struct sk_buff *skb, 35extract_icmp4_fields(const struct sk_buff *skb,
34 u8 *protocol, 36 u8 *protocol,
35 __be32 *raddr, 37 __be32 *raddr,
36 __be32 *laddr, 38 __be32 *laddr,
@@ -86,7 +88,6 @@ extract_icmp_fields(const struct sk_buff *skb,
86 return 0; 88 return 0;
87} 89}
88 90
89
90static bool 91static bool
91socket_match(const struct sk_buff *skb, struct xt_action_param *par, 92socket_match(const struct sk_buff *skb, struct xt_action_param *par,
92 const struct xt_socket_mtinfo1 *info) 93 const struct xt_socket_mtinfo1 *info)
@@ -115,7 +116,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
115 dport = hp->dest; 116 dport = hp->dest;
116 117
117 } else if (iph->protocol == IPPROTO_ICMP) { 118 } else if (iph->protocol == IPPROTO_ICMP) {
118 if (extract_icmp_fields(skb, &protocol, &saddr, &daddr, 119 if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
119 &sport, &dport)) 120 &sport, &dport))
120 return false; 121 return false;
121 } else { 122 } else {
@@ -142,7 +143,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
142#endif 143#endif
143 144
144 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, 145 sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
145 saddr, daddr, sport, dport, par->in, false); 146 saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
146 if (sk != NULL) { 147 if (sk != NULL) {
147 bool wildcard; 148 bool wildcard;
148 bool transparent = true; 149 bool transparent = true;
@@ -165,32 +166,157 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
165 sk = NULL; 166 sk = NULL;
166 } 167 }
167 168
168 pr_debug("proto %u %08x:%u -> %08x:%u (orig %08x:%u) sock %p\n", 169 pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n",
169 protocol, ntohl(saddr), ntohs(sport), 170 protocol, &saddr, ntohs(sport),
170 ntohl(daddr), ntohs(dport), 171 &daddr, ntohs(dport),
171 ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk); 172 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
172 173
173 return (sk != NULL); 174 return (sk != NULL);
174} 175}
175 176
176static bool 177static bool
177socket_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) 178socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
178{ 179{
179 return socket_match(skb, par, NULL); 180 return socket_match(skb, par, NULL);
180} 181}
181 182
182static bool 183static bool
183socket_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) 184socket_mt4_v1(const struct sk_buff *skb, struct xt_action_param *par)
184{ 185{
185 return socket_match(skb, par, par->matchinfo); 186 return socket_match(skb, par, par->matchinfo);
186} 187}
187 188
189#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
190
191static int
192extract_icmp6_fields(const struct sk_buff *skb,
193 unsigned int outside_hdrlen,
194 u8 *protocol,
195 struct in6_addr **raddr,
196 struct in6_addr **laddr,
197 __be16 *rport,
198 __be16 *lport)
199{
200 struct ipv6hdr *inside_iph, _inside_iph;
201 struct icmp6hdr *icmph, _icmph;
202 __be16 *ports, _ports[2];
203 u8 inside_nexthdr;
204 int inside_hdrlen;
205
206 icmph = skb_header_pointer(skb, outside_hdrlen,
207 sizeof(_icmph), &_icmph);
208 if (icmph == NULL)
209 return 1;
210
211 if (icmph->icmp6_type & ICMPV6_INFOMSG_MASK)
212 return 1;
213
214 inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(_icmph), sizeof(_inside_iph), &_inside_iph);
215 if (inside_iph == NULL)
216 return 1;
217 inside_nexthdr = inside_iph->nexthdr;
218
219 inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), &inside_nexthdr);
220 if (inside_hdrlen < 0)
221 return 1; /* hjm: Packet has no/incomplete transport layer headers. */
222
223 if (inside_nexthdr != IPPROTO_TCP &&
224 inside_nexthdr != IPPROTO_UDP)
225 return 1;
226
227 ports = skb_header_pointer(skb, inside_hdrlen,
228 sizeof(_ports), &_ports);
229 if (ports == NULL)
230 return 1;
231
232 /* the inside IP packet is the one quoted from our side, thus
233 * its saddr is the local address */
234 *protocol = inside_nexthdr;
235 *laddr = &inside_iph->saddr;
236 *lport = ports[0];
237 *raddr = &inside_iph->daddr;
238 *rport = ports[1];
239
240 return 0;
241}
242
243static bool
244socket_mt6_v1(const struct sk_buff *skb, struct xt_action_param *par)
245{
246 struct ipv6hdr *iph = ipv6_hdr(skb);
247 struct udphdr _hdr, *hp = NULL;
248 struct sock *sk;
249 struct in6_addr *daddr, *saddr;
250 __be16 dport, sport;
251 int thoff;
252 u8 tproto;
253 const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
254
255 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
256 if (tproto < 0) {
257 pr_debug("unable to find transport header in IPv6 packet, dropping\n");
258 return NF_DROP;
259 }
260
261 if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
262 hp = skb_header_pointer(skb, thoff,
263 sizeof(_hdr), &_hdr);
264 if (hp == NULL)
265 return false;
266
267 saddr = &iph->saddr;
268 sport = hp->source;
269 daddr = &iph->daddr;
270 dport = hp->dest;
271
272 } else if (tproto == IPPROTO_ICMPV6) {
273 if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
274 &sport, &dport))
275 return false;
276 } else {
277 return false;
278 }
279
280 sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
281 saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY);
282 if (sk != NULL) {
283 bool wildcard;
284 bool transparent = true;
285
286 /* Ignore sockets listening on INADDR_ANY */
287 wildcard = (sk->sk_state != TCP_TIME_WAIT &&
288 ipv6_addr_any(&inet6_sk(sk)->rcv_saddr));
289
290 /* Ignore non-transparent sockets,
291 if XT_SOCKET_TRANSPARENT is used */
292 if (info && info->flags & XT_SOCKET_TRANSPARENT)
293 transparent = ((sk->sk_state != TCP_TIME_WAIT &&
294 inet_sk(sk)->transparent) ||
295 (sk->sk_state == TCP_TIME_WAIT &&
296 inet_twsk(sk)->tw_transparent));
297
298 nf_tproxy_put_sock(sk);
299
300 if (wildcard || !transparent)
301 sk = NULL;
302 }
303
304 pr_debug("proto %hhu %pI6:%hu -> %pI6:%hu "
305 "(orig %pI6:%hu) sock %p\n",
306 tproto, saddr, ntohs(sport),
307 daddr, ntohs(dport),
308 &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
309
310 return (sk != NULL);
311}
312#endif
313
188static struct xt_match socket_mt_reg[] __read_mostly = { 314static struct xt_match socket_mt_reg[] __read_mostly = {
189 { 315 {
190 .name = "socket", 316 .name = "socket",
191 .revision = 0, 317 .revision = 0,
192 .family = NFPROTO_IPV4, 318 .family = NFPROTO_IPV4,
193 .match = socket_mt_v0, 319 .match = socket_mt4_v0,
194 .hooks = (1 << NF_INET_PRE_ROUTING) | 320 .hooks = (1 << NF_INET_PRE_ROUTING) |
195 (1 << NF_INET_LOCAL_IN), 321 (1 << NF_INET_LOCAL_IN),
196 .me = THIS_MODULE, 322 .me = THIS_MODULE,
@@ -199,17 +325,33 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
199 .name = "socket", 325 .name = "socket",
200 .revision = 1, 326 .revision = 1,
201 .family = NFPROTO_IPV4, 327 .family = NFPROTO_IPV4,
202 .match = socket_mt_v1, 328 .match = socket_mt4_v1,
203 .matchsize = sizeof(struct xt_socket_mtinfo1), 329 .matchsize = sizeof(struct xt_socket_mtinfo1),
204 .hooks = (1 << NF_INET_PRE_ROUTING) | 330 .hooks = (1 << NF_INET_PRE_ROUTING) |
205 (1 << NF_INET_LOCAL_IN), 331 (1 << NF_INET_LOCAL_IN),
206 .me = THIS_MODULE, 332 .me = THIS_MODULE,
207 }, 333 },
334#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
335 {
336 .name = "socket",
337 .revision = 1,
338 .family = NFPROTO_IPV6,
339 .match = socket_mt6_v1,
340 .matchsize = sizeof(struct xt_socket_mtinfo1),
341 .hooks = (1 << NF_INET_PRE_ROUTING) |
342 (1 << NF_INET_LOCAL_IN),
343 .me = THIS_MODULE,
344 },
345#endif
208}; 346};
209 347
210static int __init socket_mt_init(void) 348static int __init socket_mt_init(void)
211{ 349{
212 nf_defrag_ipv4_enable(); 350 nf_defrag_ipv4_enable();
351#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
352 nf_defrag_ipv6_enable();
353#endif
354
213 return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); 355 return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
214} 356}
215 357
@@ -225,3 +367,4 @@ MODULE_LICENSE("GPL");
225MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); 367MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler");
226MODULE_DESCRIPTION("x_tables socket match module"); 368MODULE_DESCRIPTION("x_tables socket match module");
227MODULE_ALIAS("ipt_socket"); 369MODULE_ALIAS("ipt_socket");
370MODULE_ALIAS("ip6t_socket");
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 26ed3e8587c2..1781d99145e2 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -547,8 +547,20 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
547 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; 547 info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN;
548 info.attrs = family->attrbuf; 548 info.attrs = family->attrbuf;
549 genl_info_net_set(&info, net); 549 genl_info_net_set(&info, net);
550 memset(&info.user_ptr, 0, sizeof(info.user_ptr));
550 551
551 return ops->doit(skb, &info); 552 if (family->pre_doit) {
553 err = family->pre_doit(ops, skb, &info);
554 if (err)
555 return err;
556 }
557
558 err = ops->doit(skb, &info);
559
560 if (family->post_doit)
561 family->post_doit(ops, skb, &info);
562
563 return err;
552} 564}
553 565
554static void genl_rcv(struct sk_buff *skb) 566static void genl_rcv(struct sk_buff *skb)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9a17f28b1253..3616f27b9d46 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -488,7 +488,7 @@ retry:
488 skb->dev = dev; 488 skb->dev = dev;
489 skb->priority = sk->sk_priority; 489 skb->priority = sk->sk_priority;
490 skb->mark = sk->sk_mark; 490 skb->mark = sk->sk_mark;
491 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 491 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
492 if (err < 0) 492 if (err < 0)
493 goto out_unlock; 493 goto out_unlock;
494 494
@@ -1209,7 +1209,7 @@ static int packet_snd(struct socket *sock,
1209 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); 1209 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
1210 if (err) 1210 if (err)
1211 goto out_free; 1211 goto out_free;
1212 err = sock_tx_timestamp(msg, sk, skb_tx(skb)); 1212 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
1213 if (err < 0) 1213 if (err < 0)
1214 goto out_free; 1214 goto out_free;
1215 1215
diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig
index 6ec7d55b1769..0d9b8a220a78 100644
--- a/net/phonet/Kconfig
+++ b/net/phonet/Kconfig
@@ -14,3 +14,15 @@ config PHONET
14 14
15 To compile this driver as a module, choose M here: the module 15 To compile this driver as a module, choose M here: the module
16 will be called phonet. If unsure, say N. 16 will be called phonet. If unsure, say N.
17
18config PHONET_PIPECTRLR
19 bool "Phonet Pipe Controller (EXPERIMENTAL)"
20 depends on PHONET && EXPERIMENTAL
21 default N
22 help
23 The Pipe Controller implementation in Phonet stack to support Pipe
24 data with Nokia Slim modems like WG2.5 used on ST-Ericsson U8500
25 platform.
26
27 This option is incompatible with older Nokia modems.
28 Say N here unless you really know what you are doing.
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 73aee7f2fcdc..fd95beb72f5d 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -251,6 +251,16 @@ int pn_skb_send(struct sock *sk, struct sk_buff *skb,
251 else if (phonet_address_lookup(net, daddr) == 0) { 251 else if (phonet_address_lookup(net, daddr) == 0) {
252 dev = phonet_device_get(net); 252 dev = phonet_device_get(net);
253 skb->pkt_type = PACKET_LOOPBACK; 253 skb->pkt_type = PACKET_LOOPBACK;
254 } else if (pn_sockaddr_get_object(target) == 0) {
255 /* Resource routing (small race until phonet_rcv()) */
256 struct sock *sk = pn_find_sock_by_res(net,
257 target->spn_resource);
258 if (sk) {
259 sock_put(sk);
260 dev = phonet_device_get(net);
261 skb->pkt_type = PACKET_LOOPBACK;
262 } else
263 dev = phonet_route_output(net, daddr);
254 } else 264 } else
255 dev = phonet_route_output(net, daddr); 265 dev = phonet_route_output(net, daddr);
256 266
@@ -383,6 +393,13 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
383 goto out; 393 goto out;
384 } 394 }
385 395
396 /* resource routing */
397 if (pn_sockaddr_get_object(&sa) == 0) {
398 struct sock *sk = pn_find_sock_by_res(net, sa.spn_resource);
399 if (sk)
400 return sk_receive_skb(sk, skb, 0);
401 }
402
386 /* check if we are the destination */ 403 /* check if we are the destination */
387 if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) { 404 if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) {
388 /* Phonet packet input */ 405 /* Phonet packet input */
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 1bd38db4fe1e..2f032381bd45 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -52,6 +52,19 @@ static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg)
52 answ = skb ? skb->len : 0; 52 answ = skb ? skb->len : 0;
53 release_sock(sk); 53 release_sock(sk);
54 return put_user(answ, (int __user *)arg); 54 return put_user(answ, (int __user *)arg);
55
56 case SIOCPNADDRESOURCE:
57 case SIOCPNDELRESOURCE: {
58 u32 res;
59 if (get_user(res, (u32 __user *)arg))
60 return -EFAULT;
61 if (res >= 256)
62 return -EINVAL;
63 if (cmd == SIOCPNADDRESOURCE)
64 return pn_sock_bind_res(sk, res);
65 else
66 return pn_sock_unbind_res(sk, res);
67 }
55 } 68 }
56 69
57 return -ENOIOCTLCMD; 70 return -ENOIOCTLCMD;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 15003021f4f0..3e60f2e4e6c2 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -109,6 +109,210 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb,
109} 109}
110 110
111#define PAD 0x00 111#define PAD 0x00
112
113#ifdef CONFIG_PHONET_PIPECTRLR
114static u8 pipe_negotiate_fc(u8 *host_fc, u8 *remote_fc, int len)
115{
116 int i, j;
117 u8 base_fc, final_fc;
118
119 for (i = 0; i < len; i++) {
120 base_fc = host_fc[i];
121 for (j = 0; j < len; j++) {
122 if (remote_fc[j] == base_fc) {
123 final_fc = base_fc;
124 goto done;
125 }
126 }
127 }
128 return -EINVAL;
129
130done:
131 return final_fc;
132
133}
134
135static int pipe_get_flow_info(struct sock *sk, struct sk_buff *skb,
136 u8 *pref_rx_fc, u8 *req_tx_fc)
137{
138 struct pnpipehdr *hdr;
139 u8 n_sb;
140
141 if (!pskb_may_pull(skb, sizeof(*hdr) + 4))
142 return -EINVAL;
143
144 hdr = pnp_hdr(skb);
145 n_sb = hdr->data[4];
146
147 __skb_pull(skb, sizeof(*hdr) + 4);
148 while (n_sb > 0) {
149 u8 type, buf[3], len = sizeof(buf);
150 u8 *data = pep_get_sb(skb, &type, &len, buf);
151
152 if (data == NULL)
153 return -EINVAL;
154
155 switch (type) {
156 case PN_PIPE_SB_REQUIRED_FC_TX:
157 if (len < 3 || (data[2] | data[3] | data[4]) > 3)
158 break;
159 req_tx_fc[0] = data[2];
160 req_tx_fc[1] = data[3];
161 req_tx_fc[2] = data[4];
162 break;
163
164 case PN_PIPE_SB_PREFERRED_FC_RX:
165 if (len < 3 || (data[2] | data[3] | data[4]) > 3)
166 break;
167 pref_rx_fc[0] = data[2];
168 pref_rx_fc[1] = data[3];
169 pref_rx_fc[2] = data[4];
170 break;
171
172 }
173 n_sb--;
174 }
175 return 0;
176}
177
178static int pipe_handler_send_req(struct sock *sk, u8 utid,
179 u8 msg_id, gfp_t priority)
180{
181 int len;
182 struct pnpipehdr *ph;
183 struct sk_buff *skb;
184 struct pep_sock *pn = pep_sk(sk);
185
186 static const u8 data[4] = {
187 PAD, PAD, PAD, PAD,
188 };
189
190 switch (msg_id) {
191 case PNS_PEP_CONNECT_REQ:
192 len = sizeof(data);
193 break;
194
195 case PNS_PEP_DISCONNECT_REQ:
196 case PNS_PEP_ENABLE_REQ:
197 case PNS_PEP_DISABLE_REQ:
198 len = 0;
199 break;
200
201 default:
202 return -EINVAL;
203 }
204
205 skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority);
206 if (!skb)
207 return -ENOMEM;
208 skb_set_owner_w(skb, sk);
209
210 skb_reserve(skb, MAX_PNPIPE_HEADER);
211 if (len) {
212 __skb_put(skb, len);
213 skb_copy_to_linear_data(skb, data, len);
214 }
215 __skb_push(skb, sizeof(*ph));
216 skb_reset_transport_header(skb);
217 ph = pnp_hdr(skb);
218 ph->utid = utid;
219 ph->message_id = msg_id;
220 ph->pipe_handle = pn->pipe_handle;
221 ph->error_code = PN_PIPE_NO_ERROR;
222
223 return pn_skb_send(sk, skb, &pn->remote_pep);
224}
225
226static int pipe_handler_send_created_ind(struct sock *sk,
227 u8 utid, u8 msg_id)
228{
229 int err_code;
230 struct pnpipehdr *ph;
231 struct sk_buff *skb;
232
233 struct pep_sock *pn = pep_sk(sk);
234 static u8 data[4] = {
235 0x03, 0x04,
236 };
237 data[2] = pn->tx_fc;
238 data[3] = pn->rx_fc;
239
240 /*
241 * actually, below is number of sub-blocks and not error code.
242 * Pipe_created_ind message format does not have any
243 * error code field. However, the Phonet stack will always send
244 * an error code as part of pnpipehdr. So, use that err_code to
245 * specify the number of sub-blocks.
246 */
247 err_code = 0x01;
248
249 skb = alloc_skb(MAX_PNPIPE_HEADER + sizeof(data), GFP_ATOMIC);
250 if (!skb)
251 return -ENOMEM;
252 skb_set_owner_w(skb, sk);
253
254 skb_reserve(skb, MAX_PNPIPE_HEADER);
255 __skb_put(skb, sizeof(data));
256 skb_copy_to_linear_data(skb, data, sizeof(data));
257 __skb_push(skb, sizeof(*ph));
258 skb_reset_transport_header(skb);
259 ph = pnp_hdr(skb);
260 ph->utid = utid;
261 ph->message_id = msg_id;
262 ph->pipe_handle = pn->pipe_handle;
263 ph->error_code = err_code;
264
265 return pn_skb_send(sk, skb, &pn->remote_pep);
266}
267
268static int pipe_handler_send_ind(struct sock *sk, u8 utid, u8 msg_id)
269{
270 int err_code;
271 struct pnpipehdr *ph;
272 struct sk_buff *skb;
273 struct pep_sock *pn = pep_sk(sk);
274
275 /*
276 * actually, below is a filler.
277 * Pipe_enabled/disabled_ind message format does not have any
278 * error code field. However, the Phonet stack will always send
279 * an error code as part of pnpipehdr. So, use that err_code to
280 * specify the filler value.
281 */
282 err_code = 0x0;
283
284 skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC);
285 if (!skb)
286 return -ENOMEM;
287 skb_set_owner_w(skb, sk);
288
289 skb_reserve(skb, MAX_PNPIPE_HEADER);
290 __skb_push(skb, sizeof(*ph));
291 skb_reset_transport_header(skb);
292 ph = pnp_hdr(skb);
293 ph->utid = utid;
294 ph->message_id = msg_id;
295 ph->pipe_handle = pn->pipe_handle;
296 ph->error_code = err_code;
297
298 return pn_skb_send(sk, skb, &pn->remote_pep);
299}
300
301static int pipe_handler_enable_pipe(struct sock *sk, int enable)
302{
303 int utid, req;
304
305 if (enable) {
306 utid = PNS_PIPE_ENABLE_UTID;
307 req = PNS_PEP_ENABLE_REQ;
308 } else {
309 utid = PNS_PIPE_DISABLE_UTID;
310 req = PNS_PEP_DISABLE_REQ;
311 }
312 return pipe_handler_send_req(sk, utid, req, GFP_ATOMIC);
313}
314#endif
315
112static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) 316static int pep_accept_conn(struct sock *sk, struct sk_buff *skb)
113{ 317{
114 static const u8 data[20] = { 318 static const u8 data[20] = {
@@ -192,7 +396,11 @@ static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority)
192 ph->data[3] = PAD; 396 ph->data[3] = PAD;
193 ph->data[4] = status; 397 ph->data[4] = status;
194 398
399#ifdef CONFIG_PHONET_PIPECTRLR
400 return pn_skb_send(sk, skb, &pn->remote_pep);
401#else
195 return pn_skb_send(sk, skb, &pipe_srv); 402 return pn_skb_send(sk, skb, &pipe_srv);
403#endif
196} 404}
197 405
198/* Send our RX flow control information to the sender. 406/* Send our RX flow control information to the sender.
@@ -324,11 +532,35 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
324 sk->sk_state_change(sk); 532 sk->sk_state_change(sk);
325 break; 533 break;
326 534
535#ifdef CONFIG_PHONET_PIPECTRLR
536 case PNS_PEP_DISCONNECT_RESP:
537 pn->pipe_state = PIPE_IDLE;
538 sk->sk_state = TCP_CLOSE;
539 break;
540#endif
541
327 case PNS_PEP_ENABLE_REQ: 542 case PNS_PEP_ENABLE_REQ:
328 /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */ 543 /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */
329 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 544 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
330 break; 545 break;
331 546
547#ifdef CONFIG_PHONET_PIPECTRLR
548 case PNS_PEP_ENABLE_RESP:
549 pn->pipe_state = PIPE_ENABLED;
550 pipe_handler_send_ind(sk, PNS_PIPE_ENABLED_IND_UTID,
551 PNS_PIPE_ENABLED_IND);
552
553 if (!pn_flow_safe(pn->tx_fc)) {
554 atomic_set(&pn->tx_credits, 1);
555 sk->sk_write_space(sk);
556 }
557 if (sk->sk_state == TCP_ESTABLISHED)
558 break; /* Nothing to do */
559 sk->sk_state = TCP_ESTABLISHED;
560 pipe_grant_credits(sk);
561 break;
562#endif
563
332 case PNS_PEP_RESET_REQ: 564 case PNS_PEP_RESET_REQ:
333 switch (hdr->state_after_reset) { 565 switch (hdr->state_after_reset) {
334 case PN_PIPE_DISABLE: 566 case PN_PIPE_DISABLE:
@@ -347,6 +579,17 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
347 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 579 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
348 break; 580 break;
349 581
582#ifdef CONFIG_PHONET_PIPECTRLR
583 case PNS_PEP_DISABLE_RESP:
584 pn->pipe_state = PIPE_DISABLED;
585 atomic_set(&pn->tx_credits, 0);
586 pipe_handler_send_ind(sk, PNS_PIPE_DISABLED_IND_UTID,
587 PNS_PIPE_DISABLED_IND);
588 sk->sk_state = TCP_SYN_RECV;
589 pn->rx_credits = 0;
590 break;
591#endif
592
350 case PNS_PEP_CTRL_REQ: 593 case PNS_PEP_CTRL_REQ:
351 if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) { 594 if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) {
352 atomic_inc(&sk->sk_drops); 595 atomic_inc(&sk->sk_drops);
@@ -438,6 +681,42 @@ static void pipe_destruct(struct sock *sk)
438 skb_queue_purge(&pn->ctrlreq_queue); 681 skb_queue_purge(&pn->ctrlreq_queue);
439} 682}
440 683
684#ifdef CONFIG_PHONET_PIPECTRLR
685static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
686{
687 struct pep_sock *pn = pep_sk(sk);
688 u8 host_pref_rx_fc[3] = {3, 2, 1}, host_req_tx_fc[3] = {3, 2, 1};
689 u8 remote_pref_rx_fc[3], remote_req_tx_fc[3];
690 u8 negotiated_rx_fc, negotiated_tx_fc;
691 int ret;
692
693 pipe_get_flow_info(sk, skb, remote_pref_rx_fc,
694 remote_req_tx_fc);
695 negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc,
696 host_pref_rx_fc,
697 sizeof(host_pref_rx_fc));
698 negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc,
699 remote_pref_rx_fc,
700 sizeof(host_pref_rx_fc));
701
702 pn->pipe_state = PIPE_DISABLED;
703 sk->sk_state = TCP_SYN_RECV;
704 sk->sk_backlog_rcv = pipe_do_rcv;
705 sk->sk_destruct = pipe_destruct;
706 pn->rx_credits = 0;
707 pn->rx_fc = negotiated_rx_fc;
708 pn->tx_fc = negotiated_tx_fc;
709 sk->sk_state_change(sk);
710
711 ret = pipe_handler_send_created_ind(sk,
712 PNS_PIPE_CREATED_IND_UTID,
713 PNS_PIPE_CREATED_IND
714 );
715
716 return ret;
717}
718#endif
719
441static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) 720static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
442{ 721{
443 struct sock *newsk; 722 struct sock *newsk;
@@ -601,6 +880,12 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb)
601 err = pep_connreq_rcv(sk, skb); 880 err = pep_connreq_rcv(sk, skb);
602 break; 881 break;
603 882
883#ifdef CONFIG_PHONET_PIPECTRLR
884 case PNS_PEP_CONNECT_RESP:
885 err = pep_connresp_rcv(sk, skb);
886 break;
887#endif
888
604 case PNS_PEP_DISCONNECT_REQ: 889 case PNS_PEP_DISCONNECT_REQ:
605 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); 890 pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC);
606 break; 891 break;
@@ -621,6 +906,28 @@ drop:
621 return err; 906 return err;
622} 907}
623 908
909static int pipe_do_remove(struct sock *sk)
910{
911 struct pep_sock *pn = pep_sk(sk);
912 struct pnpipehdr *ph;
913 struct sk_buff *skb;
914
915 skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_KERNEL);
916 if (!skb)
917 return -ENOMEM;
918
919 skb_reserve(skb, MAX_PNPIPE_HEADER);
920 __skb_push(skb, sizeof(*ph));
921 skb_reset_transport_header(skb);
922 ph = pnp_hdr(skb);
923 ph->utid = 0;
924 ph->message_id = PNS_PIPE_REMOVE_REQ;
925 ph->pipe_handle = pn->pipe_handle;
926 ph->data[0] = PAD;
927
928 return pn_skb_send(sk, skb, &pipe_srv);
929}
930
624/* associated socket ceases to exist */ 931/* associated socket ceases to exist */
625static void pep_sock_close(struct sock *sk, long timeout) 932static void pep_sock_close(struct sock *sk, long timeout)
626{ 933{
@@ -639,7 +946,22 @@ static void pep_sock_close(struct sock *sk, long timeout)
639 sk_for_each_safe(sknode, p, n, &pn->ackq) 946 sk_for_each_safe(sknode, p, n, &pn->ackq)
640 sk_del_node_init(sknode); 947 sk_del_node_init(sknode);
641 sk->sk_state = TCP_CLOSE; 948 sk->sk_state = TCP_CLOSE;
949 } else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED))
950 /* Forcefully remove dangling Phonet pipe */
951 pipe_do_remove(sk);
952
953#ifdef CONFIG_PHONET_PIPECTRLR
954 if (pn->pipe_state != PIPE_IDLE) {
955 /* send pep disconnect request */
956 pipe_handler_send_req(sk,
957 PNS_PEP_DISCONNECT_UTID, PNS_PEP_DISCONNECT_REQ,
958 GFP_KERNEL);
959
960 pn->pipe_state = PIPE_IDLE;
961 sk->sk_state = TCP_CLOSE;
642 } 962 }
963#endif
964
643 ifindex = pn->ifindex; 965 ifindex = pn->ifindex;
644 pn->ifindex = 0; 966 pn->ifindex = 0;
645 release_sock(sk); 967 release_sock(sk);
@@ -716,6 +1038,20 @@ out:
716 return newsk; 1038 return newsk;
717} 1039}
718 1040
1041#ifdef CONFIG_PHONET_PIPECTRLR
1042static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len)
1043{
1044 struct pep_sock *pn = pep_sk(sk);
1045 struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
1046
1047 memcpy(&pn->remote_pep, spn, sizeof(struct sockaddr_pn));
1048
1049 return pipe_handler_send_req(sk,
1050 PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ,
1051 GFP_ATOMIC);
1052}
1053#endif
1054
719static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) 1055static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
720{ 1056{
721 struct pep_sock *pn = pep_sk(sk); 1057 struct pep_sock *pn = pep_sk(sk);
@@ -767,6 +1103,18 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
767 1103
768 lock_sock(sk); 1104 lock_sock(sk);
769 switch (optname) { 1105 switch (optname) {
1106#ifdef CONFIG_PHONET_PIPECTRLR
1107 case PNPIPE_PIPE_HANDLE:
1108 if (val) {
1109 if (pn->pipe_state > PIPE_IDLE) {
1110 err = -EFAULT;
1111 break;
1112 }
1113 pn->pipe_handle = val;
1114 break;
1115 }
1116#endif
1117
770 case PNPIPE_ENCAP: 1118 case PNPIPE_ENCAP:
771 if (val && val != PNPIPE_ENCAP_IP) { 1119 if (val && val != PNPIPE_ENCAP_IP) {
772 err = -EINVAL; 1120 err = -EINVAL;
@@ -792,6 +1140,17 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
792 err = 0; 1140 err = 0;
793 } 1141 }
794 goto out_norel; 1142 goto out_norel;
1143
1144#ifdef CONFIG_PHONET_PIPECTRLR
1145 case PNPIPE_ENABLE:
1146 if (pn->pipe_state <= PIPE_IDLE) {
1147 err = -ENOTCONN;
1148 break;
1149 }
1150 err = pipe_handler_enable_pipe(sk, val);
1151 break;
1152#endif
1153
795 default: 1154 default:
796 err = -ENOPROTOOPT; 1155 err = -ENOPROTOOPT;
797 } 1156 }
@@ -816,9 +1175,19 @@ static int pep_getsockopt(struct sock *sk, int level, int optname,
816 case PNPIPE_ENCAP: 1175 case PNPIPE_ENCAP:
817 val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE; 1176 val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE;
818 break; 1177 break;
1178
819 case PNPIPE_IFINDEX: 1179 case PNPIPE_IFINDEX:
820 val = pn->ifindex; 1180 val = pn->ifindex;
821 break; 1181 break;
1182
1183#ifdef CONFIG_PHONET_PIPECTRLR
1184 case PNPIPE_ENABLE:
1185 if (pn->pipe_state <= PIPE_IDLE)
1186 return -ENOTCONN;
1187 val = pn->pipe_state != PIPE_DISABLED;
1188 break;
1189#endif
1190
822 default: 1191 default:
823 return -ENOPROTOOPT; 1192 return -ENOPROTOOPT;
824 } 1193 }
@@ -835,6 +1204,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
835{ 1204{
836 struct pep_sock *pn = pep_sk(sk); 1205 struct pep_sock *pn = pep_sk(sk);
837 struct pnpipehdr *ph; 1206 struct pnpipehdr *ph;
1207 int err;
838 1208
839 if (pn_flow_safe(pn->tx_fc) && 1209 if (pn_flow_safe(pn->tx_fc) &&
840 !atomic_add_unless(&pn->tx_credits, -1, 0)) { 1210 !atomic_add_unless(&pn->tx_credits, -1, 0)) {
@@ -852,8 +1222,16 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
852 } else 1222 } else
853 ph->message_id = PNS_PIPE_DATA; 1223 ph->message_id = PNS_PIPE_DATA;
854 ph->pipe_handle = pn->pipe_handle; 1224 ph->pipe_handle = pn->pipe_handle;
1225#ifdef CONFIG_PHONET_PIPECTRLR
1226 err = pn_skb_send(sk, skb, &pn->remote_pep);
1227#else
1228 err = pn_skb_send(sk, skb, &pipe_srv);
1229#endif
1230
1231 if (err && pn_flow_safe(pn->tx_fc))
1232 atomic_inc(&pn->tx_credits);
1233 return err;
855 1234
856 return pn_skb_send(sk, skb, &pipe_srv);
857} 1235}
858 1236
859static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, 1237static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
@@ -873,7 +1251,7 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
873 skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len, 1251 skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len,
874 flags & MSG_DONTWAIT, &err); 1252 flags & MSG_DONTWAIT, &err);
875 if (!skb) 1253 if (!skb)
876 return -ENOBUFS; 1254 return err;
877 1255
878 skb_reserve(skb, MAX_PHONET_HEADER + 3); 1256 skb_reserve(skb, MAX_PHONET_HEADER + 3);
879 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); 1257 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
@@ -1045,6 +1423,8 @@ static void pep_sock_unhash(struct sock *sk)
1045 struct sock *skparent = NULL; 1423 struct sock *skparent = NULL;
1046 1424
1047 lock_sock(sk); 1425 lock_sock(sk);
1426
1427#ifndef CONFIG_PHONET_PIPECTRLR
1048 if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { 1428 if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) {
1049 skparent = pn->listener; 1429 skparent = pn->listener;
1050 release_sock(sk); 1430 release_sock(sk);
@@ -1054,6 +1434,7 @@ static void pep_sock_unhash(struct sock *sk)
1054 sk_del_node_init(sk); 1434 sk_del_node_init(sk);
1055 sk = skparent; 1435 sk = skparent;
1056 } 1436 }
1437#endif
1057 /* Unhash a listening sock only when it is closed 1438 /* Unhash a listening sock only when it is closed
1058 * and all of its active connected pipes are closed. */ 1439 * and all of its active connected pipes are closed. */
1059 if (hlist_empty(&pn->hlist)) 1440 if (hlist_empty(&pn->hlist))
@@ -1067,6 +1448,9 @@ static void pep_sock_unhash(struct sock *sk)
1067static struct proto pep_proto = { 1448static struct proto pep_proto = {
1068 .close = pep_sock_close, 1449 .close = pep_sock_close,
1069 .accept = pep_sock_accept, 1450 .accept = pep_sock_accept,
1451#ifdef CONFIG_PHONET_PIPECTRLR
1452 .connect = pep_sock_connect,
1453#endif
1070 .ioctl = pep_ioctl, 1454 .ioctl = pep_ioctl,
1071 .init = pep_init, 1455 .init = pep_init,
1072 .setsockopt = pep_setsockopt, 1456 .setsockopt = pep_setsockopt,
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index b18e48fae975..947038ddd04c 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -292,8 +292,7 @@ static void phonet_route_autodel(struct net_device *dev)
292 if (bitmap_empty(deleted, 64)) 292 if (bitmap_empty(deleted, 64))
293 return; /* short-circuit RCU */ 293 return; /* short-circuit RCU */
294 synchronize_rcu(); 294 synchronize_rcu();
295 for (i = find_first_bit(deleted, 64); i < 64; 295 for_each_set_bit(i, deleted, 64) {
296 i = find_next_bit(deleted, 64, i + 1)) {
297 rtm_phonet_notify(RTM_DELROUTE, dev, i); 296 rtm_phonet_notify(RTM_DELROUTE, dev, i);
298 dev_put(dev); 297 dev_put(dev);
299 } 298 }
@@ -374,6 +373,7 @@ int __init phonet_device_init(void)
374 if (err) 373 if (err)
375 return err; 374 return err;
376 375
376 proc_net_fops_create(&init_net, "pnresource", 0, &pn_res_seq_fops);
377 register_netdevice_notifier(&phonet_device_notifier); 377 register_netdevice_notifier(&phonet_device_notifier);
378 err = phonet_netlink_register(); 378 err = phonet_netlink_register();
379 if (err) 379 if (err)
@@ -386,6 +386,7 @@ void phonet_device_exit(void)
386 rtnl_unregister_all(PF_PHONET); 386 rtnl_unregister_all(PF_PHONET);
387 unregister_netdevice_notifier(&phonet_device_notifier); 387 unregister_netdevice_notifier(&phonet_device_notifier);
388 unregister_pernet_device(&phonet_net_ops); 388 unregister_pernet_device(&phonet_net_ops);
389 proc_net_remove(&init_net, "pnresource");
389} 390}
390 391
391int phonet_route_add(struct net_device *dev, u8 daddr) 392int phonet_route_add(struct net_device *dev, u8 daddr)
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 6e9848bf0370..25f746d20c1f 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -158,6 +158,7 @@ void pn_sock_unhash(struct sock *sk)
158 spin_lock_bh(&pnsocks.lock); 158 spin_lock_bh(&pnsocks.lock);
159 sk_del_node_init(sk); 159 sk_del_node_init(sk);
160 spin_unlock_bh(&pnsocks.lock); 160 spin_unlock_bh(&pnsocks.lock);
161 pn_sock_unbind_all_res(sk);
161} 162}
162EXPORT_SYMBOL(pn_sock_unhash); 163EXPORT_SYMBOL(pn_sock_unhash);
163 164
@@ -224,6 +225,101 @@ static int pn_socket_autobind(struct socket *sock)
224 return 0; /* socket was already bound */ 225 return 0; /* socket was already bound */
225} 226}
226 227
228#ifdef CONFIG_PHONET_PIPECTRLR
229static int pn_socket_connect(struct socket *sock, struct sockaddr *addr,
230 int len, int flags)
231{
232 struct sock *sk = sock->sk;
233 struct sockaddr_pn *spn = (struct sockaddr_pn *)addr;
234 long timeo;
235 int err;
236
237 if (len < sizeof(struct sockaddr_pn))
238 return -EINVAL;
239 if (spn->spn_family != AF_PHONET)
240 return -EAFNOSUPPORT;
241
242 lock_sock(sk);
243
244 switch (sock->state) {
245 case SS_UNCONNECTED:
246 sk->sk_state = TCP_CLOSE;
247 break;
248 case SS_CONNECTING:
249 switch (sk->sk_state) {
250 case TCP_SYN_RECV:
251 sock->state = SS_CONNECTED;
252 err = -EISCONN;
253 goto out;
254 case TCP_CLOSE:
255 err = -EALREADY;
256 if (flags & O_NONBLOCK)
257 goto out;
258 goto wait_connect;
259 }
260 break;
261 case SS_CONNECTED:
262 switch (sk->sk_state) {
263 case TCP_SYN_RECV:
264 err = -EISCONN;
265 goto out;
266 case TCP_CLOSE:
267 sock->state = SS_UNCONNECTED;
268 break;
269 }
270 break;
271 case SS_DISCONNECTING:
272 case SS_FREE:
273 break;
274 }
275 sk->sk_state = TCP_CLOSE;
276 sk_stream_kill_queues(sk);
277
278 sock->state = SS_CONNECTING;
279 err = sk->sk_prot->connect(sk, addr, len);
280 if (err < 0) {
281 sock->state = SS_UNCONNECTED;
282 sk->sk_state = TCP_CLOSE;
283 goto out;
284 }
285
286 err = -EINPROGRESS;
287wait_connect:
288 if (sk->sk_state != TCP_SYN_RECV && (flags & O_NONBLOCK))
289 goto out;
290
291 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
292 release_sock(sk);
293
294 err = -ERESTARTSYS;
295 timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
296 sk->sk_state != TCP_CLOSE,
297 timeo);
298
299 lock_sock(sk);
300 if (timeo < 0)
301 goto out; /* -ERESTARTSYS */
302
303 err = -ETIMEDOUT;
304 if (timeo == 0 && sk->sk_state != TCP_SYN_RECV)
305 goto out;
306
307 if (sk->sk_state != TCP_SYN_RECV) {
308 sock->state = SS_UNCONNECTED;
309 err = sock_error(sk);
310 if (!err)
311 err = -ECONNREFUSED;
312 goto out;
313 }
314 sock->state = SS_CONNECTED;
315 err = 0;
316
317out:
318 release_sock(sk);
319 return err;
320}
321#endif
322
227static int pn_socket_accept(struct socket *sock, struct socket *newsock, 323static int pn_socket_accept(struct socket *sock, struct socket *newsock,
228 int flags) 324 int flags)
229{ 325{
@@ -281,7 +377,9 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
281 if (!mask && sk->sk_state == TCP_CLOSE_WAIT) 377 if (!mask && sk->sk_state == TCP_CLOSE_WAIT)
282 return POLLHUP; 378 return POLLHUP;
283 379
284 if (sk->sk_state == TCP_ESTABLISHED && atomic_read(&pn->tx_credits)) 380 if (sk->sk_state == TCP_ESTABLISHED &&
381 atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf &&
382 atomic_read(&pn->tx_credits))
285 mask |= POLLOUT | POLLWRNORM | POLLWRBAND; 383 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
286 384
287 return mask; 385 return mask;
@@ -390,7 +488,11 @@ const struct proto_ops phonet_stream_ops = {
390 .owner = THIS_MODULE, 488 .owner = THIS_MODULE,
391 .release = pn_socket_release, 489 .release = pn_socket_release,
392 .bind = pn_socket_bind, 490 .bind = pn_socket_bind,
491#ifdef CONFIG_PHONET_PIPECTRLR
492 .connect = pn_socket_connect,
493#else
393 .connect = sock_no_connect, 494 .connect = sock_no_connect,
495#endif
394 .socketpair = sock_no_socketpair, 496 .socketpair = sock_no_socketpair,
395 .accept = pn_socket_accept, 497 .accept = pn_socket_accept,
396 .getname = pn_socket_getname, 498 .getname = pn_socket_getname,
@@ -563,3 +665,188 @@ const struct file_operations pn_sock_seq_fops = {
563 .release = seq_release_net, 665 .release = seq_release_net,
564}; 666};
565#endif 667#endif
668
669static struct {
670 struct sock *sk[256];
671} pnres;
672
673/*
674 * Find and hold socket based on resource.
675 */
676struct sock *pn_find_sock_by_res(struct net *net, u8 res)
677{
678 struct sock *sk;
679
680 if (!net_eq(net, &init_net))
681 return NULL;
682
683 rcu_read_lock();
684 sk = rcu_dereference(pnres.sk[res]);
685 if (sk)
686 sock_hold(sk);
687 rcu_read_unlock();
688 return sk;
689}
690
691static DEFINE_MUTEX(resource_mutex);
692
693int pn_sock_bind_res(struct sock *sk, u8 res)
694{
695 int ret = -EADDRINUSE;
696
697 if (!net_eq(sock_net(sk), &init_net))
698 return -ENOIOCTLCMD;
699 if (!capable(CAP_SYS_ADMIN))
700 return -EPERM;
701 if (pn_socket_autobind(sk->sk_socket))
702 return -EAGAIN;
703
704 mutex_lock(&resource_mutex);
705 if (pnres.sk[res] == NULL) {
706 sock_hold(sk);
707 rcu_assign_pointer(pnres.sk[res], sk);
708 ret = 0;
709 }
710 mutex_unlock(&resource_mutex);
711 return ret;
712}
713
714int pn_sock_unbind_res(struct sock *sk, u8 res)
715{
716 int ret = -ENOENT;
717
718 if (!capable(CAP_SYS_ADMIN))
719 return -EPERM;
720
721 mutex_lock(&resource_mutex);
722 if (pnres.sk[res] == sk) {
723 rcu_assign_pointer(pnres.sk[res], NULL);
724 ret = 0;
725 }
726 mutex_unlock(&resource_mutex);
727
728 if (ret == 0) {
729 synchronize_rcu();
730 sock_put(sk);
731 }
732 return ret;
733}
734
735void pn_sock_unbind_all_res(struct sock *sk)
736{
737 unsigned res, match = 0;
738
739 mutex_lock(&resource_mutex);
740 for (res = 0; res < 256; res++) {
741 if (pnres.sk[res] == sk) {
742 rcu_assign_pointer(pnres.sk[res], NULL);
743 match++;
744 }
745 }
746 mutex_unlock(&resource_mutex);
747
748 if (match == 0)
749 return;
750 synchronize_rcu();
751 while (match > 0) {
752 sock_put(sk);
753 match--;
754 }
755}
756
757#ifdef CONFIG_PROC_FS
758static struct sock **pn_res_get_idx(struct seq_file *seq, loff_t pos)
759{
760 struct net *net = seq_file_net(seq);
761 unsigned i;
762
763 if (!net_eq(net, &init_net))
764 return NULL;
765
766 for (i = 0; i < 256; i++) {
767 if (pnres.sk[i] == NULL)
768 continue;
769 if (!pos)
770 return pnres.sk + i;
771 pos--;
772 }
773 return NULL;
774}
775
776static struct sock **pn_res_get_next(struct seq_file *seq, struct sock **sk)
777{
778 struct net *net = seq_file_net(seq);
779 unsigned i;
780
781 BUG_ON(!net_eq(net, &init_net));
782
783 for (i = (sk - pnres.sk) + 1; i < 256; i++)
784 if (pnres.sk[i])
785 return pnres.sk + i;
786 return NULL;
787}
788
789static void *pn_res_seq_start(struct seq_file *seq, loff_t *pos)
790 __acquires(resource_mutex)
791{
792 mutex_lock(&resource_mutex);
793 return *pos ? pn_res_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
794}
795
796static void *pn_res_seq_next(struct seq_file *seq, void *v, loff_t *pos)
797{
798 struct sock **sk;
799
800 if (v == SEQ_START_TOKEN)
801 sk = pn_res_get_idx(seq, 0);
802 else
803 sk = pn_res_get_next(seq, v);
804 (*pos)++;
805 return sk;
806}
807
808static void pn_res_seq_stop(struct seq_file *seq, void *v)
809 __releases(resource_mutex)
810{
811 mutex_unlock(&resource_mutex);
812}
813
814static int pn_res_seq_show(struct seq_file *seq, void *v)
815{
816 int len;
817
818 if (v == SEQ_START_TOKEN)
819 seq_printf(seq, "%s%n", "rs uid inode", &len);
820 else {
821 struct sock **psk = v;
822 struct sock *sk = *psk;
823
824 seq_printf(seq, "%02X %5d %lu%n",
825 (int) (psk - pnres.sk), sock_i_uid(sk),
826 sock_i_ino(sk), &len);
827 }
828 seq_printf(seq, "%*s\n", 63 - len, "");
829 return 0;
830}
831
832static const struct seq_operations pn_res_seq_ops = {
833 .start = pn_res_seq_start,
834 .next = pn_res_seq_next,
835 .stop = pn_res_seq_stop,
836 .show = pn_res_seq_show,
837};
838
839static int pn_res_open(struct inode *inode, struct file *file)
840{
841 return seq_open_net(inode, file, &pn_res_seq_ops,
842 sizeof(struct seq_net_private));
843}
844
845const struct file_operations pn_res_seq_fops = {
846 .owner = THIS_MODULE,
847 .open = pn_res_open,
848 .read = seq_read,
849 .llseek = seq_lseek,
850 .release = seq_release_net,
851};
852#endif
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index aebfecbdb841..bb6ad81b671d 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -39,7 +39,15 @@
39#include <net/sock.h> 39#include <net/sock.h>
40 40
41#include "rds.h" 41#include "rds.h"
42#include "rdma.h" 42
43char *rds_str_array(char **array, size_t elements, size_t index)
44{
45 if ((index < elements) && array[index])
46 return array[index];
47 else
48 return "unknown";
49}
50EXPORT_SYMBOL(rds_str_array);
43 51
44/* this is just used for stats gathering :/ */ 52/* this is just used for stats gathering :/ */
45static DEFINE_SPINLOCK(rds_sock_lock); 53static DEFINE_SPINLOCK(rds_sock_lock);
@@ -62,7 +70,7 @@ static int rds_release(struct socket *sock)
62 struct rds_sock *rs; 70 struct rds_sock *rs;
63 unsigned long flags; 71 unsigned long flags;
64 72
65 if (sk == NULL) 73 if (!sk)
66 goto out; 74 goto out;
67 75
68 rs = rds_sk_to_rs(sk); 76 rs = rds_sk_to_rs(sk);
@@ -73,7 +81,15 @@ static int rds_release(struct socket *sock)
73 * with the socket. */ 81 * with the socket. */
74 rds_clear_recv_queue(rs); 82 rds_clear_recv_queue(rs);
75 rds_cong_remove_socket(rs); 83 rds_cong_remove_socket(rs);
84
85 /*
86 * the binding lookup hash uses rcu, we need to
87 * make sure we sychronize_rcu before we free our
88 * entry
89 */
76 rds_remove_bound(rs); 90 rds_remove_bound(rs);
91 synchronize_rcu();
92
77 rds_send_drop_to(rs, NULL); 93 rds_send_drop_to(rs, NULL);
78 rds_rdma_drop_keys(rs); 94 rds_rdma_drop_keys(rs);
79 rds_notify_queue_get(rs, NULL); 95 rds_notify_queue_get(rs, NULL);
@@ -83,6 +99,8 @@ static int rds_release(struct socket *sock)
83 rds_sock_count--; 99 rds_sock_count--;
84 spin_unlock_irqrestore(&rds_sock_lock, flags); 100 spin_unlock_irqrestore(&rds_sock_lock, flags);
85 101
102 rds_trans_put(rs->rs_transport);
103
86 sock->sk = NULL; 104 sock->sk = NULL;
87 sock_put(sk); 105 sock_put(sk);
88out: 106out:
@@ -514,7 +532,7 @@ out:
514 spin_unlock_irqrestore(&rds_sock_lock, flags); 532 spin_unlock_irqrestore(&rds_sock_lock, flags);
515} 533}
516 534
517static void __exit rds_exit(void) 535static void rds_exit(void)
518{ 536{
519 sock_unregister(rds_family_ops.family); 537 sock_unregister(rds_family_ops.family);
520 proto_unregister(&rds_proto); 538 proto_unregister(&rds_proto);
@@ -529,7 +547,7 @@ static void __exit rds_exit(void)
529} 547}
530module_exit(rds_exit); 548module_exit(rds_exit);
531 549
532static int __init rds_init(void) 550static int rds_init(void)
533{ 551{
534 int ret; 552 int ret;
535 553
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 5d95fc007f1a..2f6b3fcc79f8 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -34,45 +34,52 @@
34#include <net/sock.h> 34#include <net/sock.h>
35#include <linux/in.h> 35#include <linux/in.h>
36#include <linux/if_arp.h> 36#include <linux/if_arp.h>
37#include <linux/jhash.h>
37#include "rds.h" 38#include "rds.h"
38 39
39/* 40#define BIND_HASH_SIZE 1024
40 * XXX this probably still needs more work.. no INADDR_ANY, and rbtrees aren't 41static struct hlist_head bind_hash_table[BIND_HASH_SIZE];
41 * particularly zippy.
42 *
43 * This is now called for every incoming frame so we arguably care much more
44 * about it than we used to.
45 */
46static DEFINE_SPINLOCK(rds_bind_lock); 42static DEFINE_SPINLOCK(rds_bind_lock);
47static struct rb_root rds_bind_tree = RB_ROOT;
48 43
49static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port, 44static struct hlist_head *hash_to_bucket(__be32 addr, __be16 port)
50 struct rds_sock *insert) 45{
46 return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) &
47 (BIND_HASH_SIZE - 1));
48}
49
50static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
51 struct rds_sock *insert)
51{ 52{
52 struct rb_node **p = &rds_bind_tree.rb_node;
53 struct rb_node *parent = NULL;
54 struct rds_sock *rs; 53 struct rds_sock *rs;
54 struct hlist_node *node;
55 struct hlist_head *head = hash_to_bucket(addr, port);
55 u64 cmp; 56 u64 cmp;
56 u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port); 57 u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
57 58
58 while (*p) { 59 rcu_read_lock();
59 parent = *p; 60 hlist_for_each_entry_rcu(rs, node, head, rs_bound_node) {
60 rs = rb_entry(parent, struct rds_sock, rs_bound_node);
61
62 cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) | 61 cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
63 be16_to_cpu(rs->rs_bound_port); 62 be16_to_cpu(rs->rs_bound_port);
64 63
65 if (needle < cmp) 64 if (cmp == needle) {
66 p = &(*p)->rb_left; 65 rcu_read_unlock();
67 else if (needle > cmp)
68 p = &(*p)->rb_right;
69 else
70 return rs; 66 return rs;
67 }
71 } 68 }
69 rcu_read_unlock();
72 70
73 if (insert) { 71 if (insert) {
74 rb_link_node(&insert->rs_bound_node, parent, p); 72 /*
75 rb_insert_color(&insert->rs_bound_node, &rds_bind_tree); 73 * make sure our addr and port are set before
74 * we are added to the list, other people
75 * in rcu will find us as soon as the
76 * hlist_add_head_rcu is done
77 */
78 insert->rs_bound_addr = addr;
79 insert->rs_bound_port = port;
80 rds_sock_addref(insert);
81
82 hlist_add_head_rcu(&insert->rs_bound_node, head);
76 } 83 }
77 return NULL; 84 return NULL;
78} 85}
@@ -86,15 +93,13 @@ static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port,
86struct rds_sock *rds_find_bound(__be32 addr, __be16 port) 93struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
87{ 94{
88 struct rds_sock *rs; 95 struct rds_sock *rs;
89 unsigned long flags;
90 96
91 spin_lock_irqsave(&rds_bind_lock, flags); 97 rs = rds_bind_lookup(addr, port, NULL);
92 rs = rds_bind_tree_walk(addr, port, NULL); 98
93 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) 99 if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
94 rds_sock_addref(rs); 100 rds_sock_addref(rs);
95 else 101 else
96 rs = NULL; 102 rs = NULL;
97 spin_unlock_irqrestore(&rds_bind_lock, flags);
98 103
99 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, 104 rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
100 ntohs(port)); 105 ntohs(port));
@@ -121,22 +126,15 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
121 do { 126 do {
122 if (rover == 0) 127 if (rover == 0)
123 rover++; 128 rover++;
124 if (rds_bind_tree_walk(addr, cpu_to_be16(rover), rs) == NULL) { 129 if (!rds_bind_lookup(addr, cpu_to_be16(rover), rs)) {
125 *port = cpu_to_be16(rover); 130 *port = rs->rs_bound_port;
126 ret = 0; 131 ret = 0;
132 rdsdebug("rs %p binding to %pI4:%d\n",
133 rs, &addr, (int)ntohs(*port));
127 break; 134 break;
128 } 135 }
129 } while (rover++ != last); 136 } while (rover++ != last);
130 137
131 if (ret == 0) {
132 rs->rs_bound_addr = addr;
133 rs->rs_bound_port = *port;
134 rds_sock_addref(rs);
135
136 rdsdebug("rs %p binding to %pI4:%d\n",
137 rs, &addr, (int)ntohs(*port));
138 }
139
140 spin_unlock_irqrestore(&rds_bind_lock, flags); 138 spin_unlock_irqrestore(&rds_bind_lock, flags);
141 139
142 return ret; 140 return ret;
@@ -153,7 +151,7 @@ void rds_remove_bound(struct rds_sock *rs)
153 rs, &rs->rs_bound_addr, 151 rs, &rs->rs_bound_addr,
154 ntohs(rs->rs_bound_port)); 152 ntohs(rs->rs_bound_port));
155 153
156 rb_erase(&rs->rs_bound_node, &rds_bind_tree); 154 hlist_del_init_rcu(&rs->rs_bound_node);
157 rds_sock_put(rs); 155 rds_sock_put(rs);
158 rs->rs_bound_addr = 0; 156 rs->rs_bound_addr = 0;
159 } 157 }
@@ -184,7 +182,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
184 goto out; 182 goto out;
185 183
186 trans = rds_trans_get_preferred(sin->sin_addr.s_addr); 184 trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
187 if (trans == NULL) { 185 if (!trans) {
188 ret = -EADDRNOTAVAIL; 186 ret = -EADDRNOTAVAIL;
189 rds_remove_bound(rs); 187 rds_remove_bound(rs);
190 if (printk_ratelimit()) 188 if (printk_ratelimit())
@@ -198,5 +196,9 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
198 196
199out: 197out:
200 release_sock(sk); 198 release_sock(sk);
199
200 /* we might have called rds_remove_bound on error */
201 if (ret)
202 synchronize_rcu();
201 return ret; 203 return ret;
202} 204}
diff --git a/net/rds/cong.c b/net/rds/cong.c
index 0871a29f0780..75ea686f27d5 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -141,7 +141,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
141 unsigned long flags; 141 unsigned long flags;
142 142
143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL); 143 map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL);
144 if (map == NULL) 144 if (!map)
145 return NULL; 145 return NULL;
146 146
147 map->m_addr = addr; 147 map->m_addr = addr;
@@ -159,7 +159,7 @@ static struct rds_cong_map *rds_cong_from_addr(__be32 addr)
159 ret = rds_cong_tree_walk(addr, map); 159 ret = rds_cong_tree_walk(addr, map);
160 spin_unlock_irqrestore(&rds_cong_lock, flags); 160 spin_unlock_irqrestore(&rds_cong_lock, flags);
161 161
162 if (ret == NULL) { 162 if (!ret) {
163 ret = map; 163 ret = map;
164 map = NULL; 164 map = NULL;
165 } 165 }
@@ -205,7 +205,7 @@ int rds_cong_get_maps(struct rds_connection *conn)
205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr); 205 conn->c_lcong = rds_cong_from_addr(conn->c_laddr);
206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr); 206 conn->c_fcong = rds_cong_from_addr(conn->c_faddr);
207 207
208 if (conn->c_lcong == NULL || conn->c_fcong == NULL) 208 if (!(conn->c_lcong && conn->c_fcong))
209 return -ENOMEM; 209 return -ENOMEM;
210 210
211 return 0; 211 return 0;
@@ -221,7 +221,7 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) { 221 list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
222 if (!test_and_set_bit(0, &conn->c_map_queued)) { 222 if (!test_and_set_bit(0, &conn->c_map_queued)) {
223 rds_stats_inc(s_cong_update_queued); 223 rds_stats_inc(s_cong_update_queued);
224 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 224 rds_send_xmit(conn);
225 } 225 }
226 } 226 }
227 227
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 7619b671ca28..9334d892366e 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -37,7 +37,6 @@
37 37
38#include "rds.h" 38#include "rds.h"
39#include "loop.h" 39#include "loop.h"
40#include "rdma.h"
41 40
42#define RDS_CONNECTION_HASH_BITS 12 41#define RDS_CONNECTION_HASH_BITS 12
43#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS) 42#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS)
@@ -63,18 +62,7 @@ static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr)
63 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \ 62 var |= RDS_INFO_CONNECTION_FLAG_##suffix; \
64} while (0) 63} while (0)
65 64
66static inline int rds_conn_is_sending(struct rds_connection *conn) 65/* rcu read lock must be held or the connection spinlock */
67{
68 int ret = 0;
69
70 if (!mutex_trylock(&conn->c_send_lock))
71 ret = 1;
72 else
73 mutex_unlock(&conn->c_send_lock);
74
75 return ret;
76}
77
78static struct rds_connection *rds_conn_lookup(struct hlist_head *head, 66static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
79 __be32 laddr, __be32 faddr, 67 __be32 laddr, __be32 faddr,
80 struct rds_transport *trans) 68 struct rds_transport *trans)
@@ -82,7 +70,7 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
82 struct rds_connection *conn, *ret = NULL; 70 struct rds_connection *conn, *ret = NULL;
83 struct hlist_node *pos; 71 struct hlist_node *pos;
84 72
85 hlist_for_each_entry(conn, pos, head, c_hash_node) { 73 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
86 if (conn->c_faddr == faddr && conn->c_laddr == laddr && 74 if (conn->c_faddr == faddr && conn->c_laddr == laddr &&
87 conn->c_trans == trans) { 75 conn->c_trans == trans) {
88 ret = conn; 76 ret = conn;
@@ -100,7 +88,7 @@ static struct rds_connection *rds_conn_lookup(struct hlist_head *head,
100 * and receiving over this connection again in the future. It is up to 88 * and receiving over this connection again in the future. It is up to
101 * the transport to have serialized this call with its send and recv. 89 * the transport to have serialized this call with its send and recv.
102 */ 90 */
103void rds_conn_reset(struct rds_connection *conn) 91static void rds_conn_reset(struct rds_connection *conn)
104{ 92{
105 rdsdebug("connection %pI4 to %pI4 reset\n", 93 rdsdebug("connection %pI4 to %pI4 reset\n",
106 &conn->c_laddr, &conn->c_faddr); 94 &conn->c_laddr, &conn->c_faddr);
@@ -129,10 +117,11 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
129{ 117{
130 struct rds_connection *conn, *parent = NULL; 118 struct rds_connection *conn, *parent = NULL;
131 struct hlist_head *head = rds_conn_bucket(laddr, faddr); 119 struct hlist_head *head = rds_conn_bucket(laddr, faddr);
120 struct rds_transport *loop_trans;
132 unsigned long flags; 121 unsigned long flags;
133 int ret; 122 int ret;
134 123
135 spin_lock_irqsave(&rds_conn_lock, flags); 124 rcu_read_lock();
136 conn = rds_conn_lookup(head, laddr, faddr, trans); 125 conn = rds_conn_lookup(head, laddr, faddr, trans);
137 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && 126 if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
138 !is_outgoing) { 127 !is_outgoing) {
@@ -143,12 +132,12 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
143 parent = conn; 132 parent = conn;
144 conn = parent->c_passive; 133 conn = parent->c_passive;
145 } 134 }
146 spin_unlock_irqrestore(&rds_conn_lock, flags); 135 rcu_read_unlock();
147 if (conn) 136 if (conn)
148 goto out; 137 goto out;
149 138
150 conn = kmem_cache_zalloc(rds_conn_slab, gfp); 139 conn = kmem_cache_zalloc(rds_conn_slab, gfp);
151 if (conn == NULL) { 140 if (!conn) {
152 conn = ERR_PTR(-ENOMEM); 141 conn = ERR_PTR(-ENOMEM);
153 goto out; 142 goto out;
154 } 143 }
@@ -159,7 +148,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
159 spin_lock_init(&conn->c_lock); 148 spin_lock_init(&conn->c_lock);
160 conn->c_next_tx_seq = 1; 149 conn->c_next_tx_seq = 1;
161 150
162 mutex_init(&conn->c_send_lock); 151 init_waitqueue_head(&conn->c_waitq);
163 INIT_LIST_HEAD(&conn->c_send_queue); 152 INIT_LIST_HEAD(&conn->c_send_queue);
164 INIT_LIST_HEAD(&conn->c_retrans); 153 INIT_LIST_HEAD(&conn->c_retrans);
165 154
@@ -175,7 +164,9 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
175 * can bind to the destination address then we'd rather the messages 164 * can bind to the destination address then we'd rather the messages
176 * flow through loopback rather than either transport. 165 * flow through loopback rather than either transport.
177 */ 166 */
178 if (rds_trans_get_preferred(faddr)) { 167 loop_trans = rds_trans_get_preferred(faddr);
168 if (loop_trans) {
169 rds_trans_put(loop_trans);
179 conn->c_loopback = 1; 170 conn->c_loopback = 1;
180 if (is_outgoing && trans->t_prefer_loopback) { 171 if (is_outgoing && trans->t_prefer_loopback) {
181 /* "outgoing" connection - and the transport 172 /* "outgoing" connection - and the transport
@@ -238,7 +229,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
238 kmem_cache_free(rds_conn_slab, conn); 229 kmem_cache_free(rds_conn_slab, conn);
239 conn = found; 230 conn = found;
240 } else { 231 } else {
241 hlist_add_head(&conn->c_hash_node, head); 232 hlist_add_head_rcu(&conn->c_hash_node, head);
242 rds_cong_add_conn(conn); 233 rds_cong_add_conn(conn);
243 rds_conn_count++; 234 rds_conn_count++;
244 } 235 }
@@ -263,21 +254,91 @@ struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
263} 254}
264EXPORT_SYMBOL_GPL(rds_conn_create_outgoing); 255EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
265 256
257void rds_conn_shutdown(struct rds_connection *conn)
258{
259 /* shut it down unless it's down already */
260 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
261 /*
262 * Quiesce the connection mgmt handlers before we start tearing
263 * things down. We don't hold the mutex for the entire
264 * duration of the shutdown operation, else we may be
265 * deadlocking with the CM handler. Instead, the CM event
266 * handler is supposed to check for state DISCONNECTING
267 */
268 mutex_lock(&conn->c_cm_lock);
269 if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING)
270 && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
271 rds_conn_error(conn, "shutdown called in state %d\n",
272 atomic_read(&conn->c_state));
273 mutex_unlock(&conn->c_cm_lock);
274 return;
275 }
276 mutex_unlock(&conn->c_cm_lock);
277
278 wait_event(conn->c_waitq,
279 !test_bit(RDS_IN_XMIT, &conn->c_flags));
280
281 conn->c_trans->conn_shutdown(conn);
282 rds_conn_reset(conn);
283
284 if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
285 /* This can happen - eg when we're in the middle of tearing
286 * down the connection, and someone unloads the rds module.
287 * Quite reproduceable with loopback connections.
288 * Mostly harmless.
289 */
290 rds_conn_error(conn,
291 "%s: failed to transition to state DOWN, "
292 "current state is %d\n",
293 __func__,
294 atomic_read(&conn->c_state));
295 return;
296 }
297 }
298
299 /* Then reconnect if it's still live.
300 * The passive side of an IB loopback connection is never added
301 * to the conn hash, so we never trigger a reconnect on this
302 * conn - the reconnect is always triggered by the active peer. */
303 cancel_delayed_work_sync(&conn->c_conn_w);
304 rcu_read_lock();
305 if (!hlist_unhashed(&conn->c_hash_node)) {
306 rcu_read_unlock();
307 rds_queue_reconnect(conn);
308 } else {
309 rcu_read_unlock();
310 }
311}
312
313/*
314 * Stop and free a connection.
315 *
316 * This can only be used in very limited circumstances. It assumes that once
317 * the conn has been shutdown that no one else is referencing the connection.
318 * We can only ensure this in the rmmod path in the current code.
319 */
266void rds_conn_destroy(struct rds_connection *conn) 320void rds_conn_destroy(struct rds_connection *conn)
267{ 321{
268 struct rds_message *rm, *rtmp; 322 struct rds_message *rm, *rtmp;
323 unsigned long flags;
269 324
270 rdsdebug("freeing conn %p for %pI4 -> " 325 rdsdebug("freeing conn %p for %pI4 -> "
271 "%pI4\n", conn, &conn->c_laddr, 326 "%pI4\n", conn, &conn->c_laddr,
272 &conn->c_faddr); 327 &conn->c_faddr);
273 328
274 hlist_del_init(&conn->c_hash_node); 329 /* Ensure conn will not be scheduled for reconnect */
330 spin_lock_irq(&rds_conn_lock);
331 hlist_del_init_rcu(&conn->c_hash_node);
332 spin_unlock_irq(&rds_conn_lock);
333 synchronize_rcu();
275 334
276 /* wait for the rds thread to shut it down */ 335 /* shut the connection down */
277 atomic_set(&conn->c_state, RDS_CONN_ERROR); 336 rds_conn_drop(conn);
278 cancel_delayed_work(&conn->c_conn_w); 337 flush_work(&conn->c_down_w);
279 queue_work(rds_wq, &conn->c_down_w); 338
280 flush_workqueue(rds_wq); 339 /* make sure lingering queued work won't try to ref the conn */
340 cancel_delayed_work_sync(&conn->c_send_w);
341 cancel_delayed_work_sync(&conn->c_recv_w);
281 342
282 /* tear down queued messages */ 343 /* tear down queued messages */
283 list_for_each_entry_safe(rm, rtmp, 344 list_for_each_entry_safe(rm, rtmp,
@@ -302,7 +363,9 @@ void rds_conn_destroy(struct rds_connection *conn)
302 BUG_ON(!list_empty(&conn->c_retrans)); 363 BUG_ON(!list_empty(&conn->c_retrans));
303 kmem_cache_free(rds_conn_slab, conn); 364 kmem_cache_free(rds_conn_slab, conn);
304 365
366 spin_lock_irqsave(&rds_conn_lock, flags);
305 rds_conn_count--; 367 rds_conn_count--;
368 spin_unlock_irqrestore(&rds_conn_lock, flags);
306} 369}
307EXPORT_SYMBOL_GPL(rds_conn_destroy); 370EXPORT_SYMBOL_GPL(rds_conn_destroy);
308 371
@@ -316,23 +379,23 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
316 struct list_head *list; 379 struct list_head *list;
317 struct rds_connection *conn; 380 struct rds_connection *conn;
318 struct rds_message *rm; 381 struct rds_message *rm;
319 unsigned long flags;
320 unsigned int total = 0; 382 unsigned int total = 0;
383 unsigned long flags;
321 size_t i; 384 size_t i;
322 385
323 len /= sizeof(struct rds_info_message); 386 len /= sizeof(struct rds_info_message);
324 387
325 spin_lock_irqsave(&rds_conn_lock, flags); 388 rcu_read_lock();
326 389
327 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 390 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
328 i++, head++) { 391 i++, head++) {
329 hlist_for_each_entry(conn, pos, head, c_hash_node) { 392 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
330 if (want_send) 393 if (want_send)
331 list = &conn->c_send_queue; 394 list = &conn->c_send_queue;
332 else 395 else
333 list = &conn->c_retrans; 396 list = &conn->c_retrans;
334 397
335 spin_lock(&conn->c_lock); 398 spin_lock_irqsave(&conn->c_lock, flags);
336 399
337 /* XXX too lazy to maintain counts.. */ 400 /* XXX too lazy to maintain counts.. */
338 list_for_each_entry(rm, list, m_conn_item) { 401 list_for_each_entry(rm, list, m_conn_item) {
@@ -343,11 +406,10 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len,
343 conn->c_faddr, 0); 406 conn->c_faddr, 0);
344 } 407 }
345 408
346 spin_unlock(&conn->c_lock); 409 spin_unlock_irqrestore(&conn->c_lock, flags);
347 } 410 }
348 } 411 }
349 412 rcu_read_unlock();
350 spin_unlock_irqrestore(&rds_conn_lock, flags);
351 413
352 lens->nr = total; 414 lens->nr = total;
353 lens->each = sizeof(struct rds_info_message); 415 lens->each = sizeof(struct rds_info_message);
@@ -377,19 +439,17 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
377 uint64_t buffer[(item_len + 7) / 8]; 439 uint64_t buffer[(item_len + 7) / 8];
378 struct hlist_head *head; 440 struct hlist_head *head;
379 struct hlist_node *pos; 441 struct hlist_node *pos;
380 struct hlist_node *tmp;
381 struct rds_connection *conn; 442 struct rds_connection *conn;
382 unsigned long flags;
383 size_t i; 443 size_t i;
384 444
385 spin_lock_irqsave(&rds_conn_lock, flags); 445 rcu_read_lock();
386 446
387 lens->nr = 0; 447 lens->nr = 0;
388 lens->each = item_len; 448 lens->each = item_len;
389 449
390 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); 450 for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash);
391 i++, head++) { 451 i++, head++) {
392 hlist_for_each_entry_safe(conn, pos, tmp, head, c_hash_node) { 452 hlist_for_each_entry_rcu(conn, pos, head, c_hash_node) {
393 453
394 /* XXX no c_lock usage.. */ 454 /* XXX no c_lock usage.. */
395 if (!visitor(conn, buffer)) 455 if (!visitor(conn, buffer))
@@ -405,8 +465,7 @@ void rds_for_each_conn_info(struct socket *sock, unsigned int len,
405 lens->nr++; 465 lens->nr++;
406 } 466 }
407 } 467 }
408 468 rcu_read_unlock();
409 spin_unlock_irqrestore(&rds_conn_lock, flags);
410} 469}
411EXPORT_SYMBOL_GPL(rds_for_each_conn_info); 470EXPORT_SYMBOL_GPL(rds_for_each_conn_info);
412 471
@@ -423,8 +482,8 @@ static int rds_conn_info_visitor(struct rds_connection *conn,
423 sizeof(cinfo->transport)); 482 sizeof(cinfo->transport));
424 cinfo->flags = 0; 483 cinfo->flags = 0;
425 484
426 rds_conn_info_set(cinfo->flags, 485 rds_conn_info_set(cinfo->flags, test_bit(RDS_IN_XMIT, &conn->c_flags),
427 rds_conn_is_sending(conn), SENDING); 486 SENDING);
428 /* XXX Future: return the state rather than these funky bits */ 487 /* XXX Future: return the state rather than these funky bits */
429 rds_conn_info_set(cinfo->flags, 488 rds_conn_info_set(cinfo->flags,
430 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING, 489 atomic_read(&conn->c_state) == RDS_CONN_CONNECTING,
@@ -444,12 +503,12 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
444 sizeof(struct rds_info_connection)); 503 sizeof(struct rds_info_connection));
445} 504}
446 505
447int __init rds_conn_init(void) 506int rds_conn_init(void)
448{ 507{
449 rds_conn_slab = kmem_cache_create("rds_connection", 508 rds_conn_slab = kmem_cache_create("rds_connection",
450 sizeof(struct rds_connection), 509 sizeof(struct rds_connection),
451 0, 0, NULL); 510 0, 0, NULL);
452 if (rds_conn_slab == NULL) 511 if (!rds_conn_slab)
453 return -ENOMEM; 512 return -ENOMEM;
454 513
455 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); 514 rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info);
@@ -487,6 +546,18 @@ void rds_conn_drop(struct rds_connection *conn)
487EXPORT_SYMBOL_GPL(rds_conn_drop); 546EXPORT_SYMBOL_GPL(rds_conn_drop);
488 547
489/* 548/*
549 * If the connection is down, trigger a connect. We may have scheduled a
550 * delayed reconnect however - in this case we should not interfere.
551 */
552void rds_conn_connect_if_down(struct rds_connection *conn)
553{
554 if (rds_conn_state(conn) == RDS_CONN_DOWN &&
555 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
556 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
557}
558EXPORT_SYMBOL_GPL(rds_conn_connect_if_down);
559
560/*
490 * An error occurred on the connection 561 * An error occurred on the connection
491 */ 562 */
492void 563void
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 8f2d6dd7700a..4123967d4d65 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -42,7 +42,7 @@
42#include "rds.h" 42#include "rds.h"
43#include "ib.h" 43#include "ib.h"
44 44
45unsigned int fmr_pool_size = RDS_FMR_POOL_SIZE; 45static unsigned int fmr_pool_size = RDS_FMR_POOL_SIZE;
46unsigned int fmr_message_size = RDS_FMR_SIZE + 1; /* +1 allows for unaligned MRs */ 46unsigned int fmr_message_size = RDS_FMR_SIZE + 1; /* +1 allows for unaligned MRs */
47unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; 47unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
48 48
@@ -53,13 +53,72 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
53module_param(rds_ib_retry_count, int, 0444); 53module_param(rds_ib_retry_count, int, 0444);
54MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error"); 54MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
55 55
56/*
57 * we have a clumsy combination of RCU and a rwsem protecting this list
58 * because it is used both in the get_mr fast path and while blocking in
59 * the FMR flushing path.
60 */
61DECLARE_RWSEM(rds_ib_devices_lock);
56struct list_head rds_ib_devices; 62struct list_head rds_ib_devices;
57 63
58/* NOTE: if also grabbing ibdev lock, grab this first */ 64/* NOTE: if also grabbing ibdev lock, grab this first */
59DEFINE_SPINLOCK(ib_nodev_conns_lock); 65DEFINE_SPINLOCK(ib_nodev_conns_lock);
60LIST_HEAD(ib_nodev_conns); 66LIST_HEAD(ib_nodev_conns);
61 67
62void rds_ib_add_one(struct ib_device *device) 68static void rds_ib_nodev_connect(void)
69{
70 struct rds_ib_connection *ic;
71
72 spin_lock(&ib_nodev_conns_lock);
73 list_for_each_entry(ic, &ib_nodev_conns, ib_node)
74 rds_conn_connect_if_down(ic->conn);
75 spin_unlock(&ib_nodev_conns_lock);
76}
77
78static void rds_ib_dev_shutdown(struct rds_ib_device *rds_ibdev)
79{
80 struct rds_ib_connection *ic;
81 unsigned long flags;
82
83 spin_lock_irqsave(&rds_ibdev->spinlock, flags);
84 list_for_each_entry(ic, &rds_ibdev->conn_list, ib_node)
85 rds_conn_drop(ic->conn);
86 spin_unlock_irqrestore(&rds_ibdev->spinlock, flags);
87}
88
89/*
90 * rds_ib_destroy_mr_pool() blocks on a few things and mrs drop references
91 * from interrupt context so we push freing off into a work struct in krdsd.
92 */
93static void rds_ib_dev_free(struct work_struct *work)
94{
95 struct rds_ib_ipaddr *i_ipaddr, *i_next;
96 struct rds_ib_device *rds_ibdev = container_of(work,
97 struct rds_ib_device, free_work);
98
99 if (rds_ibdev->mr_pool)
100 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
101 if (rds_ibdev->mr)
102 ib_dereg_mr(rds_ibdev->mr);
103 if (rds_ibdev->pd)
104 ib_dealloc_pd(rds_ibdev->pd);
105
106 list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) {
107 list_del(&i_ipaddr->list);
108 kfree(i_ipaddr);
109 }
110
111 kfree(rds_ibdev);
112}
113
114void rds_ib_dev_put(struct rds_ib_device *rds_ibdev)
115{
116 BUG_ON(atomic_read(&rds_ibdev->refcount) <= 0);
117 if (atomic_dec_and_test(&rds_ibdev->refcount))
118 queue_work(rds_wq, &rds_ibdev->free_work);
119}
120
121static void rds_ib_add_one(struct ib_device *device)
63{ 122{
64 struct rds_ib_device *rds_ibdev; 123 struct rds_ib_device *rds_ibdev;
65 struct ib_device_attr *dev_attr; 124 struct ib_device_attr *dev_attr;
@@ -77,11 +136,14 @@ void rds_ib_add_one(struct ib_device *device)
77 goto free_attr; 136 goto free_attr;
78 } 137 }
79 138
80 rds_ibdev = kmalloc(sizeof *rds_ibdev, GFP_KERNEL); 139 rds_ibdev = kzalloc_node(sizeof(struct rds_ib_device), GFP_KERNEL,
140 ibdev_to_node(device));
81 if (!rds_ibdev) 141 if (!rds_ibdev)
82 goto free_attr; 142 goto free_attr;
83 143
84 spin_lock_init(&rds_ibdev->spinlock); 144 spin_lock_init(&rds_ibdev->spinlock);
145 atomic_set(&rds_ibdev->refcount, 1);
146 INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
85 147
86 rds_ibdev->max_wrs = dev_attr->max_qp_wr; 148 rds_ibdev->max_wrs = dev_attr->max_qp_wr;
87 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); 149 rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
@@ -91,68 +153,107 @@ void rds_ib_add_one(struct ib_device *device)
91 min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) : 153 min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) :
92 fmr_pool_size; 154 fmr_pool_size;
93 155
156 rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
157 rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
158
94 rds_ibdev->dev = device; 159 rds_ibdev->dev = device;
95 rds_ibdev->pd = ib_alloc_pd(device); 160 rds_ibdev->pd = ib_alloc_pd(device);
96 if (IS_ERR(rds_ibdev->pd)) 161 if (IS_ERR(rds_ibdev->pd)) {
97 goto free_dev; 162 rds_ibdev->pd = NULL;
163 goto put_dev;
164 }
98 165
99 rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, 166 rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, IB_ACCESS_LOCAL_WRITE);
100 IB_ACCESS_LOCAL_WRITE); 167 if (IS_ERR(rds_ibdev->mr)) {
101 if (IS_ERR(rds_ibdev->mr)) 168 rds_ibdev->mr = NULL;
102 goto err_pd; 169 goto put_dev;
170 }
103 171
104 rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev); 172 rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev);
105 if (IS_ERR(rds_ibdev->mr_pool)) { 173 if (IS_ERR(rds_ibdev->mr_pool)) {
106 rds_ibdev->mr_pool = NULL; 174 rds_ibdev->mr_pool = NULL;
107 goto err_mr; 175 goto put_dev;
108 } 176 }
109 177
110 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); 178 INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
111 INIT_LIST_HEAD(&rds_ibdev->conn_list); 179 INIT_LIST_HEAD(&rds_ibdev->conn_list);
112 list_add_tail(&rds_ibdev->list, &rds_ib_devices); 180
181 down_write(&rds_ib_devices_lock);
182 list_add_tail_rcu(&rds_ibdev->list, &rds_ib_devices);
183 up_write(&rds_ib_devices_lock);
184 atomic_inc(&rds_ibdev->refcount);
113 185
114 ib_set_client_data(device, &rds_ib_client, rds_ibdev); 186 ib_set_client_data(device, &rds_ib_client, rds_ibdev);
187 atomic_inc(&rds_ibdev->refcount);
115 188
116 goto free_attr; 189 rds_ib_nodev_connect();
117 190
118err_mr: 191put_dev:
119 ib_dereg_mr(rds_ibdev->mr); 192 rds_ib_dev_put(rds_ibdev);
120err_pd:
121 ib_dealloc_pd(rds_ibdev->pd);
122free_dev:
123 kfree(rds_ibdev);
124free_attr: 193free_attr:
125 kfree(dev_attr); 194 kfree(dev_attr);
126} 195}
127 196
128void rds_ib_remove_one(struct ib_device *device) 197/*
198 * New connections use this to find the device to associate with the
199 * connection. It's not in the fast path so we're not concerned about the
200 * performance of the IB call. (As of this writing, it uses an interrupt
201 * blocking spinlock to serialize walking a per-device list of all registered
202 * clients.)
203 *
204 * RCU is used to handle incoming connections racing with device teardown.
205 * Rather than use a lock to serialize removal from the client_data and
206 * getting a new reference, we use an RCU grace period. The destruction
207 * path removes the device from client_data and then waits for all RCU
208 * readers to finish.
209 *
210 * A new connection can get NULL from this if its arriving on a
211 * device that is in the process of being removed.
212 */
213struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device)
129{ 214{
130 struct rds_ib_device *rds_ibdev; 215 struct rds_ib_device *rds_ibdev;
131 struct rds_ib_ipaddr *i_ipaddr, *i_next;
132 216
217 rcu_read_lock();
133 rds_ibdev = ib_get_client_data(device, &rds_ib_client); 218 rds_ibdev = ib_get_client_data(device, &rds_ib_client);
134 if (!rds_ibdev) 219 if (rds_ibdev)
135 return; 220 atomic_inc(&rds_ibdev->refcount);
221 rcu_read_unlock();
222 return rds_ibdev;
223}
136 224
137 list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { 225/*
138 list_del(&i_ipaddr->list); 226 * The IB stack is letting us know that a device is going away. This can
139 kfree(i_ipaddr); 227 * happen if the underlying HCA driver is removed or if PCI hotplug is removing
140 } 228 * the pci function, for example.
229 *
230 * This can be called at any time and can be racing with any other RDS path.
231 */
232static void rds_ib_remove_one(struct ib_device *device)
233{
234 struct rds_ib_device *rds_ibdev;
141 235
142 rds_ib_destroy_conns(rds_ibdev); 236 rds_ibdev = ib_get_client_data(device, &rds_ib_client);
237 if (!rds_ibdev)
238 return;
143 239
144 if (rds_ibdev->mr_pool) 240 rds_ib_dev_shutdown(rds_ibdev);
145 rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
146 241
147 ib_dereg_mr(rds_ibdev->mr); 242 /* stop connection attempts from getting a reference to this device. */
243 ib_set_client_data(device, &rds_ib_client, NULL);
148 244
149 while (ib_dealloc_pd(rds_ibdev->pd)) { 245 down_write(&rds_ib_devices_lock);
150 rdsdebug("Failed to dealloc pd %p\n", rds_ibdev->pd); 246 list_del_rcu(&rds_ibdev->list);
151 msleep(1); 247 up_write(&rds_ib_devices_lock);
152 }
153 248
154 list_del(&rds_ibdev->list); 249 /*
155 kfree(rds_ibdev); 250 * This synchronize rcu is waiting for readers of both the ib
251 * client data and the devices list to finish before we drop
252 * both of those references.
253 */
254 synchronize_rcu();
255 rds_ib_dev_put(rds_ibdev);
256 rds_ib_dev_put(rds_ibdev);
156} 257}
157 258
158struct ib_client rds_ib_client = { 259struct ib_client rds_ib_client = {
@@ -186,7 +287,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,
186 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); 287 rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid);
187 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); 288 rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid);
188 289
189 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 290 rds_ibdev = ic->rds_ibdev;
190 iinfo->max_send_wr = ic->i_send_ring.w_nr; 291 iinfo->max_send_wr = ic->i_send_ring.w_nr;
191 iinfo->max_recv_wr = ic->i_recv_ring.w_nr; 292 iinfo->max_recv_wr = ic->i_recv_ring.w_nr;
192 iinfo->max_send_sge = rds_ibdev->max_sge; 293 iinfo->max_send_sge = rds_ibdev->max_sge;
@@ -248,29 +349,36 @@ static int rds_ib_laddr_check(__be32 addr)
248 return ret; 349 return ret;
249} 350}
250 351
352static void rds_ib_unregister_client(void)
353{
354 ib_unregister_client(&rds_ib_client);
355 /* wait for rds_ib_dev_free() to complete */
356 flush_workqueue(rds_wq);
357}
358
251void rds_ib_exit(void) 359void rds_ib_exit(void)
252{ 360{
253 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); 361 rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
362 rds_ib_unregister_client();
254 rds_ib_destroy_nodev_conns(); 363 rds_ib_destroy_nodev_conns();
255 ib_unregister_client(&rds_ib_client);
256 rds_ib_sysctl_exit(); 364 rds_ib_sysctl_exit();
257 rds_ib_recv_exit(); 365 rds_ib_recv_exit();
258 rds_trans_unregister(&rds_ib_transport); 366 rds_trans_unregister(&rds_ib_transport);
367 rds_ib_fmr_exit();
259} 368}
260 369
261struct rds_transport rds_ib_transport = { 370struct rds_transport rds_ib_transport = {
262 .laddr_check = rds_ib_laddr_check, 371 .laddr_check = rds_ib_laddr_check,
263 .xmit_complete = rds_ib_xmit_complete, 372 .xmit_complete = rds_ib_xmit_complete,
264 .xmit = rds_ib_xmit, 373 .xmit = rds_ib_xmit,
265 .xmit_cong_map = NULL,
266 .xmit_rdma = rds_ib_xmit_rdma, 374 .xmit_rdma = rds_ib_xmit_rdma,
375 .xmit_atomic = rds_ib_xmit_atomic,
267 .recv = rds_ib_recv, 376 .recv = rds_ib_recv,
268 .conn_alloc = rds_ib_conn_alloc, 377 .conn_alloc = rds_ib_conn_alloc,
269 .conn_free = rds_ib_conn_free, 378 .conn_free = rds_ib_conn_free,
270 .conn_connect = rds_ib_conn_connect, 379 .conn_connect = rds_ib_conn_connect,
271 .conn_shutdown = rds_ib_conn_shutdown, 380 .conn_shutdown = rds_ib_conn_shutdown,
272 .inc_copy_to_user = rds_ib_inc_copy_to_user, 381 .inc_copy_to_user = rds_ib_inc_copy_to_user,
273 .inc_purge = rds_ib_inc_purge,
274 .inc_free = rds_ib_inc_free, 382 .inc_free = rds_ib_inc_free,
275 .cm_initiate_connect = rds_ib_cm_initiate_connect, 383 .cm_initiate_connect = rds_ib_cm_initiate_connect,
276 .cm_handle_connect = rds_ib_cm_handle_connect, 384 .cm_handle_connect = rds_ib_cm_handle_connect,
@@ -286,16 +394,20 @@ struct rds_transport rds_ib_transport = {
286 .t_type = RDS_TRANS_IB 394 .t_type = RDS_TRANS_IB
287}; 395};
288 396
289int __init rds_ib_init(void) 397int rds_ib_init(void)
290{ 398{
291 int ret; 399 int ret;
292 400
293 INIT_LIST_HEAD(&rds_ib_devices); 401 INIT_LIST_HEAD(&rds_ib_devices);
294 402
295 ret = ib_register_client(&rds_ib_client); 403 ret = rds_ib_fmr_init();
296 if (ret) 404 if (ret)
297 goto out; 405 goto out;
298 406
407 ret = ib_register_client(&rds_ib_client);
408 if (ret)
409 goto out_fmr_exit;
410
299 ret = rds_ib_sysctl_init(); 411 ret = rds_ib_sysctl_init();
300 if (ret) 412 if (ret)
301 goto out_ibreg; 413 goto out_ibreg;
@@ -317,7 +429,9 @@ out_recv:
317out_sysctl: 429out_sysctl:
318 rds_ib_sysctl_exit(); 430 rds_ib_sysctl_exit();
319out_ibreg: 431out_ibreg:
320 ib_unregister_client(&rds_ib_client); 432 rds_ib_unregister_client();
433out_fmr_exit:
434 rds_ib_fmr_exit();
321out: 435out:
322 return ret; 436 return ret;
323} 437}
diff --git a/net/rds/ib.h b/net/rds/ib.h
index 64df4e79b29f..e34ad032b66d 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -3,11 +3,13 @@
3 3
4#include <rdma/ib_verbs.h> 4#include <rdma/ib_verbs.h>
5#include <rdma/rdma_cm.h> 5#include <rdma/rdma_cm.h>
6#include <linux/pci.h>
7#include <linux/slab.h>
6#include "rds.h" 8#include "rds.h"
7#include "rdma_transport.h" 9#include "rdma_transport.h"
8 10
9#define RDS_FMR_SIZE 256 11#define RDS_FMR_SIZE 256
10#define RDS_FMR_POOL_SIZE 4096 12#define RDS_FMR_POOL_SIZE 8192
11 13
12#define RDS_IB_MAX_SGE 8 14#define RDS_IB_MAX_SGE 8
13#define RDS_IB_RECV_SGE 2 15#define RDS_IB_RECV_SGE 2
@@ -19,6 +21,9 @@
19 21
20#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */ 22#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */
21 23
24#define RDS_IB_RECYCLE_BATCH_COUNT 32
25
26extern struct rw_semaphore rds_ib_devices_lock;
22extern struct list_head rds_ib_devices; 27extern struct list_head rds_ib_devices;
23 28
24/* 29/*
@@ -26,20 +31,29 @@ extern struct list_head rds_ib_devices;
26 * try and minimize the amount of memory tied up both the device and 31 * try and minimize the amount of memory tied up both the device and
27 * socket receive queues. 32 * socket receive queues.
28 */ 33 */
29/* page offset of the final full frag that fits in the page */
30#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE)
31struct rds_page_frag { 34struct rds_page_frag {
32 struct list_head f_item; 35 struct list_head f_item;
33 struct page *f_page; 36 struct list_head f_cache_entry;
34 unsigned long f_offset; 37 struct scatterlist f_sg;
35 dma_addr_t f_mapped;
36}; 38};
37 39
38struct rds_ib_incoming { 40struct rds_ib_incoming {
39 struct list_head ii_frags; 41 struct list_head ii_frags;
42 struct list_head ii_cache_entry;
40 struct rds_incoming ii_inc; 43 struct rds_incoming ii_inc;
41}; 44};
42 45
46struct rds_ib_cache_head {
47 struct list_head *first;
48 unsigned long count;
49};
50
51struct rds_ib_refill_cache {
52 struct rds_ib_cache_head *percpu;
53 struct list_head *xfer;
54 struct list_head *ready;
55};
56
43struct rds_ib_connect_private { 57struct rds_ib_connect_private {
44 /* Add new fields at the end, and don't permute existing fields. */ 58 /* Add new fields at the end, and don't permute existing fields. */
45 __be32 dp_saddr; 59 __be32 dp_saddr;
@@ -53,8 +67,7 @@ struct rds_ib_connect_private {
53}; 67};
54 68
55struct rds_ib_send_work { 69struct rds_ib_send_work {
56 struct rds_message *s_rm; 70 void *s_op;
57 struct rds_rdma_op *s_op;
58 struct ib_send_wr s_wr; 71 struct ib_send_wr s_wr;
59 struct ib_sge s_sge[RDS_IB_MAX_SGE]; 72 struct ib_sge s_sge[RDS_IB_MAX_SGE];
60 unsigned long s_queued; 73 unsigned long s_queued;
@@ -92,10 +105,11 @@ struct rds_ib_connection {
92 105
93 /* tx */ 106 /* tx */
94 struct rds_ib_work_ring i_send_ring; 107 struct rds_ib_work_ring i_send_ring;
95 struct rds_message *i_rm; 108 struct rm_data_op *i_data_op;
96 struct rds_header *i_send_hdrs; 109 struct rds_header *i_send_hdrs;
97 u64 i_send_hdrs_dma; 110 u64 i_send_hdrs_dma;
98 struct rds_ib_send_work *i_sends; 111 struct rds_ib_send_work *i_sends;
112 atomic_t i_signaled_sends;
99 113
100 /* rx */ 114 /* rx */
101 struct tasklet_struct i_recv_tasklet; 115 struct tasklet_struct i_recv_tasklet;
@@ -106,8 +120,9 @@ struct rds_ib_connection {
106 struct rds_header *i_recv_hdrs; 120 struct rds_header *i_recv_hdrs;
107 u64 i_recv_hdrs_dma; 121 u64 i_recv_hdrs_dma;
108 struct rds_ib_recv_work *i_recvs; 122 struct rds_ib_recv_work *i_recvs;
109 struct rds_page_frag i_frag;
110 u64 i_ack_recv; /* last ACK received */ 123 u64 i_ack_recv; /* last ACK received */
124 struct rds_ib_refill_cache i_cache_incs;
125 struct rds_ib_refill_cache i_cache_frags;
111 126
112 /* sending acks */ 127 /* sending acks */
113 unsigned long i_ack_flags; 128 unsigned long i_ack_flags;
@@ -138,7 +153,6 @@ struct rds_ib_connection {
138 153
139 /* Batched completions */ 154 /* Batched completions */
140 unsigned int i_unsignaled_wrs; 155 unsigned int i_unsignaled_wrs;
141 long i_unsignaled_bytes;
142}; 156};
143 157
144/* This assumes that atomic_t is at least 32 bits */ 158/* This assumes that atomic_t is at least 32 bits */
@@ -164,9 +178,17 @@ struct rds_ib_device {
164 unsigned int max_fmrs; 178 unsigned int max_fmrs;
165 int max_sge; 179 int max_sge;
166 unsigned int max_wrs; 180 unsigned int max_wrs;
181 unsigned int max_initiator_depth;
182 unsigned int max_responder_resources;
167 spinlock_t spinlock; /* protect the above */ 183 spinlock_t spinlock; /* protect the above */
184 atomic_t refcount;
185 struct work_struct free_work;
168}; 186};
169 187
188#define pcidev_to_node(pcidev) pcibus_to_node(pcidev->bus)
189#define ibdev_to_node(ibdev) pcidev_to_node(to_pci_dev(ibdev->dma_device))
190#define rdsibdev_to_node(rdsibdev) ibdev_to_node(rdsibdev->dev)
191
170/* bits for i_ack_flags */ 192/* bits for i_ack_flags */
171#define IB_ACK_IN_FLIGHT 0 193#define IB_ACK_IN_FLIGHT 0
172#define IB_ACK_REQUESTED 1 194#define IB_ACK_REQUESTED 1
@@ -202,6 +224,8 @@ struct rds_ib_statistics {
202 uint64_t s_ib_rdma_mr_pool_flush; 224 uint64_t s_ib_rdma_mr_pool_flush;
203 uint64_t s_ib_rdma_mr_pool_wait; 225 uint64_t s_ib_rdma_mr_pool_wait;
204 uint64_t s_ib_rdma_mr_pool_depleted; 226 uint64_t s_ib_rdma_mr_pool_depleted;
227 uint64_t s_ib_atomic_cswp;
228 uint64_t s_ib_atomic_fadd;
205}; 229};
206 230
207extern struct workqueue_struct *rds_ib_wq; 231extern struct workqueue_struct *rds_ib_wq;
@@ -241,11 +265,10 @@ static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev,
241 265
242/* ib.c */ 266/* ib.c */
243extern struct rds_transport rds_ib_transport; 267extern struct rds_transport rds_ib_transport;
244extern void rds_ib_add_one(struct ib_device *device); 268struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
245extern void rds_ib_remove_one(struct ib_device *device); 269void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
246extern struct ib_client rds_ib_client; 270extern struct ib_client rds_ib_client;
247 271
248extern unsigned int fmr_pool_size;
249extern unsigned int fmr_message_size; 272extern unsigned int fmr_message_size;
250extern unsigned int rds_ib_retry_count; 273extern unsigned int rds_ib_retry_count;
251 274
@@ -258,7 +281,7 @@ void rds_ib_conn_free(void *arg);
258int rds_ib_conn_connect(struct rds_connection *conn); 281int rds_ib_conn_connect(struct rds_connection *conn);
259void rds_ib_conn_shutdown(struct rds_connection *conn); 282void rds_ib_conn_shutdown(struct rds_connection *conn);
260void rds_ib_state_change(struct sock *sk); 283void rds_ib_state_change(struct sock *sk);
261int __init rds_ib_listen_init(void); 284int rds_ib_listen_init(void);
262void rds_ib_listen_stop(void); 285void rds_ib_listen_stop(void);
263void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...); 286void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...);
264int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, 287int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
@@ -275,15 +298,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn,
275int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); 298int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
276void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 299void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
277void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); 300void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
278void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock); 301void rds_ib_destroy_nodev_conns(void);
279static inline void rds_ib_destroy_nodev_conns(void)
280{
281 __rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock);
282}
283static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev)
284{
285 __rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock);
286}
287struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); 302struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
288void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); 303void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
289void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); 304void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
@@ -292,14 +307,16 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
292void rds_ib_sync_mr(void *trans_private, int dir); 307void rds_ib_sync_mr(void *trans_private, int dir);
293void rds_ib_free_mr(void *trans_private, int invalidate); 308void rds_ib_free_mr(void *trans_private, int invalidate);
294void rds_ib_flush_mrs(void); 309void rds_ib_flush_mrs(void);
310int rds_ib_fmr_init(void);
311void rds_ib_fmr_exit(void);
295 312
296/* ib_recv.c */ 313/* ib_recv.c */
297int __init rds_ib_recv_init(void); 314int rds_ib_recv_init(void);
298void rds_ib_recv_exit(void); 315void rds_ib_recv_exit(void);
299int rds_ib_recv(struct rds_connection *conn); 316int rds_ib_recv(struct rds_connection *conn);
300int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 317int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic);
301 gfp_t page_gfp, int prefill); 318void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
302void rds_ib_inc_purge(struct rds_incoming *inc); 319void rds_ib_recv_refill(struct rds_connection *conn, int prefill);
303void rds_ib_inc_free(struct rds_incoming *inc); 320void rds_ib_inc_free(struct rds_incoming *inc);
304int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 321int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
305 size_t size); 322 size_t size);
@@ -325,17 +342,19 @@ u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest);
325extern wait_queue_head_t rds_ib_ring_empty_wait; 342extern wait_queue_head_t rds_ib_ring_empty_wait;
326 343
327/* ib_send.c */ 344/* ib_send.c */
345char *rds_ib_wc_status_str(enum ib_wc_status status);
328void rds_ib_xmit_complete(struct rds_connection *conn); 346void rds_ib_xmit_complete(struct rds_connection *conn);
329int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, 347int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
330 unsigned int hdr_off, unsigned int sg, unsigned int off); 348 unsigned int hdr_off, unsigned int sg, unsigned int off);
331void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context); 349void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context);
332void rds_ib_send_init_ring(struct rds_ib_connection *ic); 350void rds_ib_send_init_ring(struct rds_ib_connection *ic);
333void rds_ib_send_clear_ring(struct rds_ib_connection *ic); 351void rds_ib_send_clear_ring(struct rds_ib_connection *ic);
334int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); 352int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
335void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits); 353void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits);
336void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); 354void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted);
337int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, 355int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted,
338 u32 *adv_credits, int need_posted, int max_posted); 356 u32 *adv_credits, int need_posted, int max_posted);
357int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op);
339 358
340/* ib_stats.c */ 359/* ib_stats.c */
341DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); 360DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats);
@@ -344,7 +363,7 @@ unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
344 unsigned int avail); 363 unsigned int avail);
345 364
346/* ib_sysctl.c */ 365/* ib_sysctl.c */
347int __init rds_ib_sysctl_init(void); 366int rds_ib_sysctl_init(void);
348void rds_ib_sysctl_exit(void); 367void rds_ib_sysctl_exit(void);
349extern unsigned long rds_ib_sysctl_max_send_wr; 368extern unsigned long rds_ib_sysctl_max_send_wr;
350extern unsigned long rds_ib_sysctl_max_recv_wr; 369extern unsigned long rds_ib_sysctl_max_recv_wr;
@@ -352,30 +371,5 @@ extern unsigned long rds_ib_sysctl_max_unsig_wrs;
352extern unsigned long rds_ib_sysctl_max_unsig_bytes; 371extern unsigned long rds_ib_sysctl_max_unsig_bytes;
353extern unsigned long rds_ib_sysctl_max_recv_allocation; 372extern unsigned long rds_ib_sysctl_max_recv_allocation;
354extern unsigned int rds_ib_sysctl_flow_control; 373extern unsigned int rds_ib_sysctl_flow_control;
355extern ctl_table rds_ib_sysctl_table[];
356
357/*
358 * Helper functions for getting/setting the header and data SGEs in
359 * RDS packets (not RDMA)
360 *
361 * From version 3.1 onwards, header is in front of data in the sge.
362 */
363static inline struct ib_sge *
364rds_ib_header_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
365{
366 if (ic->conn->c_version > RDS_PROTOCOL_3_0)
367 return &sge[0];
368 else
369 return &sge[1];
370}
371
372static inline struct ib_sge *
373rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge)
374{
375 if (ic->conn->c_version > RDS_PROTOCOL_3_0)
376 return &sge[1];
377 else
378 return &sge[0];
379}
380 374
381#endif 375#endif
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index f68832798db2..ee369d201a65 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -38,6 +38,36 @@
38#include "rds.h" 38#include "rds.h"
39#include "ib.h" 39#include "ib.h"
40 40
41static char *rds_ib_event_type_strings[] = {
42#define RDS_IB_EVENT_STRING(foo) \
43 [IB_EVENT_##foo] = __stringify(IB_EVENT_##foo)
44 RDS_IB_EVENT_STRING(CQ_ERR),
45 RDS_IB_EVENT_STRING(QP_FATAL),
46 RDS_IB_EVENT_STRING(QP_REQ_ERR),
47 RDS_IB_EVENT_STRING(QP_ACCESS_ERR),
48 RDS_IB_EVENT_STRING(COMM_EST),
49 RDS_IB_EVENT_STRING(SQ_DRAINED),
50 RDS_IB_EVENT_STRING(PATH_MIG),
51 RDS_IB_EVENT_STRING(PATH_MIG_ERR),
52 RDS_IB_EVENT_STRING(DEVICE_FATAL),
53 RDS_IB_EVENT_STRING(PORT_ACTIVE),
54 RDS_IB_EVENT_STRING(PORT_ERR),
55 RDS_IB_EVENT_STRING(LID_CHANGE),
56 RDS_IB_EVENT_STRING(PKEY_CHANGE),
57 RDS_IB_EVENT_STRING(SM_CHANGE),
58 RDS_IB_EVENT_STRING(SRQ_ERR),
59 RDS_IB_EVENT_STRING(SRQ_LIMIT_REACHED),
60 RDS_IB_EVENT_STRING(QP_LAST_WQE_REACHED),
61 RDS_IB_EVENT_STRING(CLIENT_REREGISTER),
62#undef RDS_IB_EVENT_STRING
63};
64
65static char *rds_ib_event_str(enum ib_event_type type)
66{
67 return rds_str_array(rds_ib_event_type_strings,
68 ARRAY_SIZE(rds_ib_event_type_strings), type);
69};
70
41/* 71/*
42 * Set the selected protocol version 72 * Set the selected protocol version
43 */ 73 */
@@ -95,7 +125,6 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
95{ 125{
96 const struct rds_ib_connect_private *dp = NULL; 126 const struct rds_ib_connect_private *dp = NULL;
97 struct rds_ib_connection *ic = conn->c_transport_data; 127 struct rds_ib_connection *ic = conn->c_transport_data;
98 struct rds_ib_device *rds_ibdev;
99 struct ib_qp_attr qp_attr; 128 struct ib_qp_attr qp_attr;
100 int err; 129 int err;
101 130
@@ -111,11 +140,21 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
111 } 140 }
112 } 141 }
113 142
114 printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", 143 if (conn->c_version < RDS_PROTOCOL(3,1)) {
115 &conn->c_faddr, 144 printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed,"
116 RDS_PROTOCOL_MAJOR(conn->c_version), 145 " no longer supported\n",
117 RDS_PROTOCOL_MINOR(conn->c_version), 146 &conn->c_faddr,
118 ic->i_flowctl ? ", flow control" : ""); 147 RDS_PROTOCOL_MAJOR(conn->c_version),
148 RDS_PROTOCOL_MINOR(conn->c_version));
149 rds_conn_destroy(conn);
150 return;
151 } else {
152 printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n",
153 &conn->c_faddr,
154 RDS_PROTOCOL_MAJOR(conn->c_version),
155 RDS_PROTOCOL_MINOR(conn->c_version),
156 ic->i_flowctl ? ", flow control" : "");
157 }
119 158
120 /* 159 /*
121 * Init rings and fill recv. this needs to wait until protocol negotiation 160 * Init rings and fill recv. this needs to wait until protocol negotiation
@@ -125,7 +164,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
125 rds_ib_recv_init_ring(ic); 164 rds_ib_recv_init_ring(ic);
126 /* Post receive buffers - as a side effect, this will update 165 /* Post receive buffers - as a side effect, this will update
127 * the posted credit count. */ 166 * the posted credit count. */
128 rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); 167 rds_ib_recv_refill(conn, 1);
129 168
130 /* Tune RNR behavior */ 169 /* Tune RNR behavior */
131 rds_ib_tune_rnr(ic, &qp_attr); 170 rds_ib_tune_rnr(ic, &qp_attr);
@@ -135,12 +174,11 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
135 if (err) 174 if (err)
136 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); 175 printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err);
137 176
138 /* update ib_device with this local ipaddr & conn */ 177 /* update ib_device with this local ipaddr */
139 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 178 err = rds_ib_update_ipaddr(ic->rds_ibdev, conn->c_laddr);
140 err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr);
141 if (err) 179 if (err)
142 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); 180 printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n",
143 rds_ib_add_conn(rds_ibdev, conn); 181 err);
144 182
145 /* If the peer gave us the last packet it saw, process this as if 183 /* If the peer gave us the last packet it saw, process this as if
146 * we had received a regular ACK. */ 184 * we had received a regular ACK. */
@@ -153,18 +191,23 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even
153static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, 191static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
154 struct rdma_conn_param *conn_param, 192 struct rdma_conn_param *conn_param,
155 struct rds_ib_connect_private *dp, 193 struct rds_ib_connect_private *dp,
156 u32 protocol_version) 194 u32 protocol_version,
195 u32 max_responder_resources,
196 u32 max_initiator_depth)
157{ 197{
198 struct rds_ib_connection *ic = conn->c_transport_data;
199 struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
200
158 memset(conn_param, 0, sizeof(struct rdma_conn_param)); 201 memset(conn_param, 0, sizeof(struct rdma_conn_param));
159 /* XXX tune these? */ 202
160 conn_param->responder_resources = 1; 203 conn_param->responder_resources =
161 conn_param->initiator_depth = 1; 204 min_t(u32, rds_ibdev->max_responder_resources, max_responder_resources);
205 conn_param->initiator_depth =
206 min_t(u32, rds_ibdev->max_initiator_depth, max_initiator_depth);
162 conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7); 207 conn_param->retry_count = min_t(unsigned int, rds_ib_retry_count, 7);
163 conn_param->rnr_retry_count = 7; 208 conn_param->rnr_retry_count = 7;
164 209
165 if (dp) { 210 if (dp) {
166 struct rds_ib_connection *ic = conn->c_transport_data;
167
168 memset(dp, 0, sizeof(*dp)); 211 memset(dp, 0, sizeof(*dp));
169 dp->dp_saddr = conn->c_laddr; 212 dp->dp_saddr = conn->c_laddr;
170 dp->dp_daddr = conn->c_faddr; 213 dp->dp_daddr = conn->c_faddr;
@@ -189,7 +232,8 @@ static void rds_ib_cm_fill_conn_param(struct rds_connection *conn,
189 232
190static void rds_ib_cq_event_handler(struct ib_event *event, void *data) 233static void rds_ib_cq_event_handler(struct ib_event *event, void *data)
191{ 234{
192 rdsdebug("event %u data %p\n", event->event, data); 235 rdsdebug("event %u (%s) data %p\n",
236 event->event, rds_ib_event_str(event->event), data);
193} 237}
194 238
195static void rds_ib_qp_event_handler(struct ib_event *event, void *data) 239static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
@@ -197,16 +241,18 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
197 struct rds_connection *conn = data; 241 struct rds_connection *conn = data;
198 struct rds_ib_connection *ic = conn->c_transport_data; 242 struct rds_ib_connection *ic = conn->c_transport_data;
199 243
200 rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event); 244 rdsdebug("conn %p ic %p event %u (%s)\n", conn, ic, event->event,
245 rds_ib_event_str(event->event));
201 246
202 switch (event->event) { 247 switch (event->event) {
203 case IB_EVENT_COMM_EST: 248 case IB_EVENT_COMM_EST:
204 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); 249 rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
205 break; 250 break;
206 default: 251 default:
207 rdsdebug("Fatal QP Event %u " 252 rdsdebug("Fatal QP Event %u (%s) "
208 "- connection %pI4->%pI4, reconnecting\n", 253 "- connection %pI4->%pI4, reconnecting\n",
209 event->event, &conn->c_laddr, &conn->c_faddr); 254 event->event, rds_ib_event_str(event->event),
255 &conn->c_laddr, &conn->c_faddr);
210 rds_conn_drop(conn); 256 rds_conn_drop(conn);
211 break; 257 break;
212 } 258 }
@@ -224,18 +270,16 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
224 struct rds_ib_device *rds_ibdev; 270 struct rds_ib_device *rds_ibdev;
225 int ret; 271 int ret;
226 272
227 /* rds_ib_add_one creates a rds_ib_device object per IB device, 273 /*
228 * and allocates a protection domain, memory range and FMR pool 274 * It's normal to see a null device if an incoming connection races
229 * for each. If that fails for any reason, it will not register 275 * with device removal, so we don't print a warning.
230 * the rds_ibdev at all.
231 */ 276 */
232 rds_ibdev = ib_get_client_data(dev, &rds_ib_client); 277 rds_ibdev = rds_ib_get_client_data(dev);
233 if (rds_ibdev == NULL) { 278 if (!rds_ibdev)
234 if (printk_ratelimit())
235 printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n",
236 dev->name);
237 return -EOPNOTSUPP; 279 return -EOPNOTSUPP;
238 } 280
281 /* add the conn now so that connection establishment has the dev */
282 rds_ib_add_conn(rds_ibdev, conn);
239 283
240 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) 284 if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1)
241 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); 285 rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1);
@@ -306,7 +350,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
306 ic->i_send_ring.w_nr * 350 ic->i_send_ring.w_nr *
307 sizeof(struct rds_header), 351 sizeof(struct rds_header),
308 &ic->i_send_hdrs_dma, GFP_KERNEL); 352 &ic->i_send_hdrs_dma, GFP_KERNEL);
309 if (ic->i_send_hdrs == NULL) { 353 if (!ic->i_send_hdrs) {
310 ret = -ENOMEM; 354 ret = -ENOMEM;
311 rdsdebug("ib_dma_alloc_coherent send failed\n"); 355 rdsdebug("ib_dma_alloc_coherent send failed\n");
312 goto out; 356 goto out;
@@ -316,7 +360,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
316 ic->i_recv_ring.w_nr * 360 ic->i_recv_ring.w_nr *
317 sizeof(struct rds_header), 361 sizeof(struct rds_header),
318 &ic->i_recv_hdrs_dma, GFP_KERNEL); 362 &ic->i_recv_hdrs_dma, GFP_KERNEL);
319 if (ic->i_recv_hdrs == NULL) { 363 if (!ic->i_recv_hdrs) {
320 ret = -ENOMEM; 364 ret = -ENOMEM;
321 rdsdebug("ib_dma_alloc_coherent recv failed\n"); 365 rdsdebug("ib_dma_alloc_coherent recv failed\n");
322 goto out; 366 goto out;
@@ -324,22 +368,24 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
324 368
325 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), 369 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
326 &ic->i_ack_dma, GFP_KERNEL); 370 &ic->i_ack_dma, GFP_KERNEL);
327 if (ic->i_ack == NULL) { 371 if (!ic->i_ack) {
328 ret = -ENOMEM; 372 ret = -ENOMEM;
329 rdsdebug("ib_dma_alloc_coherent ack failed\n"); 373 rdsdebug("ib_dma_alloc_coherent ack failed\n");
330 goto out; 374 goto out;
331 } 375 }
332 376
333 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); 377 ic->i_sends = vmalloc_node(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work),
334 if (ic->i_sends == NULL) { 378 ibdev_to_node(dev));
379 if (!ic->i_sends) {
335 ret = -ENOMEM; 380 ret = -ENOMEM;
336 rdsdebug("send allocation failed\n"); 381 rdsdebug("send allocation failed\n");
337 goto out; 382 goto out;
338 } 383 }
339 memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); 384 memset(ic->i_sends, 0, ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work));
340 385
341 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); 386 ic->i_recvs = vmalloc_node(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work),
342 if (ic->i_recvs == NULL) { 387 ibdev_to_node(dev));
388 if (!ic->i_recvs) {
343 ret = -ENOMEM; 389 ret = -ENOMEM;
344 rdsdebug("recv allocation failed\n"); 390 rdsdebug("recv allocation failed\n");
345 goto out; 391 goto out;
@@ -352,6 +398,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
352 ic->i_send_cq, ic->i_recv_cq); 398 ic->i_send_cq, ic->i_recv_cq);
353 399
354out: 400out:
401 rds_ib_dev_put(rds_ibdev);
355 return ret; 402 return ret;
356} 403}
357 404
@@ -409,7 +456,7 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
409 struct rds_ib_connection *ic = NULL; 456 struct rds_ib_connection *ic = NULL;
410 struct rdma_conn_param conn_param; 457 struct rdma_conn_param conn_param;
411 u32 version; 458 u32 version;
412 int err, destroy = 1; 459 int err = 1, destroy = 1;
413 460
414 /* Check whether the remote protocol version matches ours. */ 461 /* Check whether the remote protocol version matches ours. */
415 version = rds_ib_protocol_compatible(event); 462 version = rds_ib_protocol_compatible(event);
@@ -448,7 +495,6 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
448 /* Wait and see - our connect may still be succeeding */ 495 /* Wait and see - our connect may still be succeeding */
449 rds_ib_stats_inc(s_ib_connect_raced); 496 rds_ib_stats_inc(s_ib_connect_raced);
450 } 497 }
451 mutex_unlock(&conn->c_cm_lock);
452 goto out; 498 goto out;
453 } 499 }
454 500
@@ -475,24 +521,23 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
475 err = rds_ib_setup_qp(conn); 521 err = rds_ib_setup_qp(conn);
476 if (err) { 522 if (err) {
477 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err); 523 rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err);
478 mutex_unlock(&conn->c_cm_lock);
479 goto out; 524 goto out;
480 } 525 }
481 526
482 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version); 527 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version,
528 event->param.conn.responder_resources,
529 event->param.conn.initiator_depth);
483 530
484 /* rdma_accept() calls rdma_reject() internally if it fails */ 531 /* rdma_accept() calls rdma_reject() internally if it fails */
485 err = rdma_accept(cm_id, &conn_param); 532 err = rdma_accept(cm_id, &conn_param);
486 mutex_unlock(&conn->c_cm_lock); 533 if (err)
487 if (err) {
488 rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err); 534 rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err);
489 goto out;
490 }
491
492 return 0;
493 535
494out: 536out:
495 rdma_reject(cm_id, NULL, 0); 537 if (conn)
538 mutex_unlock(&conn->c_cm_lock);
539 if (err)
540 rdma_reject(cm_id, NULL, 0);
496 return destroy; 541 return destroy;
497} 542}
498 543
@@ -516,8 +561,8 @@ int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id)
516 goto out; 561 goto out;
517 } 562 }
518 563
519 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION); 564 rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION,
520 565 UINT_MAX, UINT_MAX);
521 ret = rdma_connect(cm_id, &conn_param); 566 ret = rdma_connect(cm_id, &conn_param);
522 if (ret) 567 if (ret)
523 rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); 568 rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret);
@@ -601,9 +646,19 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
601 ic->i_cm_id, err); 646 ic->i_cm_id, err);
602 } 647 }
603 648
649 /*
650 * We want to wait for tx and rx completion to finish
651 * before we tear down the connection, but we have to be
652 * careful not to get stuck waiting on a send ring that
653 * only has unsignaled sends in it. We've shutdown new
654 * sends before getting here so by waiting for signaled
655 * sends to complete we're ensured that there will be no
656 * more tx processing.
657 */
604 wait_event(rds_ib_ring_empty_wait, 658 wait_event(rds_ib_ring_empty_wait,
605 rds_ib_ring_empty(&ic->i_send_ring) && 659 rds_ib_ring_empty(&ic->i_recv_ring) &&
606 rds_ib_ring_empty(&ic->i_recv_ring)); 660 (atomic_read(&ic->i_signaled_sends) == 0));
661 tasklet_kill(&ic->i_recv_tasklet);
607 662
608 if (ic->i_send_hdrs) 663 if (ic->i_send_hdrs)
609 ib_dma_free_coherent(dev, 664 ib_dma_free_coherent(dev,
@@ -654,9 +709,12 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
654 BUG_ON(ic->rds_ibdev); 709 BUG_ON(ic->rds_ibdev);
655 710
656 /* Clear pending transmit */ 711 /* Clear pending transmit */
657 if (ic->i_rm) { 712 if (ic->i_data_op) {
658 rds_message_put(ic->i_rm); 713 struct rds_message *rm;
659 ic->i_rm = NULL; 714
715 rm = container_of(ic->i_data_op, struct rds_message, data);
716 rds_message_put(rm);
717 ic->i_data_op = NULL;
660 } 718 }
661 719
662 /* Clear the ACK state */ 720 /* Clear the ACK state */
@@ -690,12 +748,19 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
690{ 748{
691 struct rds_ib_connection *ic; 749 struct rds_ib_connection *ic;
692 unsigned long flags; 750 unsigned long flags;
751 int ret;
693 752
694 /* XXX too lazy? */ 753 /* XXX too lazy? */
695 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL); 754 ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL);
696 if (ic == NULL) 755 if (!ic)
697 return -ENOMEM; 756 return -ENOMEM;
698 757
758 ret = rds_ib_recv_alloc_caches(ic);
759 if (ret) {
760 kfree(ic);
761 return ret;
762 }
763
699 INIT_LIST_HEAD(&ic->ib_node); 764 INIT_LIST_HEAD(&ic->ib_node);
700 tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn, 765 tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn,
701 (unsigned long) ic); 766 (unsigned long) ic);
@@ -703,6 +768,7 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
703#ifndef KERNEL_HAS_ATOMIC64 768#ifndef KERNEL_HAS_ATOMIC64
704 spin_lock_init(&ic->i_ack_lock); 769 spin_lock_init(&ic->i_ack_lock);
705#endif 770#endif
771 atomic_set(&ic->i_signaled_sends, 0);
706 772
707 /* 773 /*
708 * rds_ib_conn_shutdown() waits for these to be emptied so they 774 * rds_ib_conn_shutdown() waits for these to be emptied so they
@@ -744,6 +810,8 @@ void rds_ib_conn_free(void *arg)
744 list_del(&ic->ib_node); 810 list_del(&ic->ib_node);
745 spin_unlock_irq(lock_ptr); 811 spin_unlock_irq(lock_ptr);
746 812
813 rds_ib_recv_free_caches(ic);
814
747 kfree(ic); 815 kfree(ic);
748} 816}
749 817
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index a54cd63f9e35..18a833c450c8 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -32,11 +32,16 @@
32 */ 32 */
33#include <linux/kernel.h> 33#include <linux/kernel.h>
34#include <linux/slab.h> 34#include <linux/slab.h>
35#include <linux/rculist.h>
35 36
36#include "rds.h" 37#include "rds.h"
37#include "rdma.h"
38#include "ib.h" 38#include "ib.h"
39#include "xlist.h"
39 40
41static struct workqueue_struct *rds_ib_fmr_wq;
42
43static DEFINE_PER_CPU(unsigned long, clean_list_grace);
44#define CLEAN_LIST_BUSY_BIT 0
40 45
41/* 46/*
42 * This is stored as mr->r_trans_private. 47 * This is stored as mr->r_trans_private.
@@ -45,7 +50,11 @@ struct rds_ib_mr {
45 struct rds_ib_device *device; 50 struct rds_ib_device *device;
46 struct rds_ib_mr_pool *pool; 51 struct rds_ib_mr_pool *pool;
47 struct ib_fmr *fmr; 52 struct ib_fmr *fmr;
48 struct list_head list; 53
54 struct xlist_head xlist;
55
56 /* unmap_list is for freeing */
57 struct list_head unmap_list;
49 unsigned int remap_count; 58 unsigned int remap_count;
50 59
51 struct scatterlist *sg; 60 struct scatterlist *sg;
@@ -59,14 +68,16 @@ struct rds_ib_mr {
59 */ 68 */
60struct rds_ib_mr_pool { 69struct rds_ib_mr_pool {
61 struct mutex flush_lock; /* serialize fmr invalidate */ 70 struct mutex flush_lock; /* serialize fmr invalidate */
62 struct work_struct flush_worker; /* flush worker */ 71 struct delayed_work flush_worker; /* flush worker */
63 72
64 spinlock_t list_lock; /* protect variables below */
65 atomic_t item_count; /* total # of MRs */ 73 atomic_t item_count; /* total # of MRs */
66 atomic_t dirty_count; /* # dirty of MRs */ 74 atomic_t dirty_count; /* # dirty of MRs */
67 struct list_head drop_list; /* MRs that have reached their max_maps limit */ 75
68 struct list_head free_list; /* unused MRs */ 76 struct xlist_head drop_list; /* MRs that have reached their max_maps limit */
69 struct list_head clean_list; /* unused & unamapped MRs */ 77 struct xlist_head free_list; /* unused MRs */
78 struct xlist_head clean_list; /* global unused & unamapped MRs */
79 wait_queue_head_t flush_wait;
80
70 atomic_t free_pinned; /* memory pinned by free MRs */ 81 atomic_t free_pinned; /* memory pinned by free MRs */
71 unsigned long max_items; 82 unsigned long max_items;
72 unsigned long max_items_soft; 83 unsigned long max_items_soft;
@@ -74,7 +85,7 @@ struct rds_ib_mr_pool {
74 struct ib_fmr_attr fmr_attr; 85 struct ib_fmr_attr fmr_attr;
75}; 86};
76 87
77static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all); 88static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all, struct rds_ib_mr **);
78static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr); 89static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr);
79static void rds_ib_mr_pool_flush_worker(struct work_struct *work); 90static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
80 91
@@ -83,16 +94,17 @@ static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
83 struct rds_ib_device *rds_ibdev; 94 struct rds_ib_device *rds_ibdev;
84 struct rds_ib_ipaddr *i_ipaddr; 95 struct rds_ib_ipaddr *i_ipaddr;
85 96
86 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 97 rcu_read_lock();
87 spin_lock_irq(&rds_ibdev->spinlock); 98 list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
88 list_for_each_entry(i_ipaddr, &rds_ibdev->ipaddr_list, list) { 99 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
89 if (i_ipaddr->ipaddr == ipaddr) { 100 if (i_ipaddr->ipaddr == ipaddr) {
90 spin_unlock_irq(&rds_ibdev->spinlock); 101 atomic_inc(&rds_ibdev->refcount);
102 rcu_read_unlock();
91 return rds_ibdev; 103 return rds_ibdev;
92 } 104 }
93 } 105 }
94 spin_unlock_irq(&rds_ibdev->spinlock);
95 } 106 }
107 rcu_read_unlock();
96 108
97 return NULL; 109 return NULL;
98} 110}
@@ -108,7 +120,7 @@ static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
108 i_ipaddr->ipaddr = ipaddr; 120 i_ipaddr->ipaddr = ipaddr;
109 121
110 spin_lock_irq(&rds_ibdev->spinlock); 122 spin_lock_irq(&rds_ibdev->spinlock);
111 list_add_tail(&i_ipaddr->list, &rds_ibdev->ipaddr_list); 123 list_add_tail_rcu(&i_ipaddr->list, &rds_ibdev->ipaddr_list);
112 spin_unlock_irq(&rds_ibdev->spinlock); 124 spin_unlock_irq(&rds_ibdev->spinlock);
113 125
114 return 0; 126 return 0;
@@ -116,17 +128,24 @@ static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
116 128
117static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 129static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
118{ 130{
119 struct rds_ib_ipaddr *i_ipaddr, *next; 131 struct rds_ib_ipaddr *i_ipaddr;
132 struct rds_ib_ipaddr *to_free = NULL;
133
120 134
121 spin_lock_irq(&rds_ibdev->spinlock); 135 spin_lock_irq(&rds_ibdev->spinlock);
122 list_for_each_entry_safe(i_ipaddr, next, &rds_ibdev->ipaddr_list, list) { 136 list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
123 if (i_ipaddr->ipaddr == ipaddr) { 137 if (i_ipaddr->ipaddr == ipaddr) {
124 list_del(&i_ipaddr->list); 138 list_del_rcu(&i_ipaddr->list);
125 kfree(i_ipaddr); 139 to_free = i_ipaddr;
126 break; 140 break;
127 } 141 }
128 } 142 }
129 spin_unlock_irq(&rds_ibdev->spinlock); 143 spin_unlock_irq(&rds_ibdev->spinlock);
144
145 if (to_free) {
146 synchronize_rcu();
147 kfree(to_free);
148 }
130} 149}
131 150
132int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 151int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
@@ -134,8 +153,10 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
134 struct rds_ib_device *rds_ibdev_old; 153 struct rds_ib_device *rds_ibdev_old;
135 154
136 rds_ibdev_old = rds_ib_get_device(ipaddr); 155 rds_ibdev_old = rds_ib_get_device(ipaddr);
137 if (rds_ibdev_old) 156 if (rds_ibdev_old) {
138 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr); 157 rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
158 rds_ib_dev_put(rds_ibdev_old);
159 }
139 160
140 return rds_ib_add_ipaddr(rds_ibdev, ipaddr); 161 return rds_ib_add_ipaddr(rds_ibdev, ipaddr);
141} 162}
@@ -150,12 +171,13 @@ void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *con
150 BUG_ON(list_empty(&ic->ib_node)); 171 BUG_ON(list_empty(&ic->ib_node));
151 list_del(&ic->ib_node); 172 list_del(&ic->ib_node);
152 173
153 spin_lock_irq(&rds_ibdev->spinlock); 174 spin_lock(&rds_ibdev->spinlock);
154 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); 175 list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
155 spin_unlock_irq(&rds_ibdev->spinlock); 176 spin_unlock(&rds_ibdev->spinlock);
156 spin_unlock_irq(&ib_nodev_conns_lock); 177 spin_unlock_irq(&ib_nodev_conns_lock);
157 178
158 ic->rds_ibdev = rds_ibdev; 179 ic->rds_ibdev = rds_ibdev;
180 atomic_inc(&rds_ibdev->refcount);
159} 181}
160 182
161void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) 183void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
@@ -175,18 +197,18 @@ void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *
175 spin_unlock(&ib_nodev_conns_lock); 197 spin_unlock(&ib_nodev_conns_lock);
176 198
177 ic->rds_ibdev = NULL; 199 ic->rds_ibdev = NULL;
200 rds_ib_dev_put(rds_ibdev);
178} 201}
179 202
180void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock) 203void rds_ib_destroy_nodev_conns(void)
181{ 204{
182 struct rds_ib_connection *ic, *_ic; 205 struct rds_ib_connection *ic, *_ic;
183 LIST_HEAD(tmp_list); 206 LIST_HEAD(tmp_list);
184 207
185 /* avoid calling conn_destroy with irqs off */ 208 /* avoid calling conn_destroy with irqs off */
186 spin_lock_irq(list_lock); 209 spin_lock_irq(&ib_nodev_conns_lock);
187 list_splice(list, &tmp_list); 210 list_splice(&ib_nodev_conns, &tmp_list);
188 INIT_LIST_HEAD(list); 211 spin_unlock_irq(&ib_nodev_conns_lock);
189 spin_unlock_irq(list_lock);
190 212
191 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) 213 list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node)
192 rds_conn_destroy(ic->conn); 214 rds_conn_destroy(ic->conn);
@@ -200,12 +222,12 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
200 if (!pool) 222 if (!pool)
201 return ERR_PTR(-ENOMEM); 223 return ERR_PTR(-ENOMEM);
202 224
203 INIT_LIST_HEAD(&pool->free_list); 225 INIT_XLIST_HEAD(&pool->free_list);
204 INIT_LIST_HEAD(&pool->drop_list); 226 INIT_XLIST_HEAD(&pool->drop_list);
205 INIT_LIST_HEAD(&pool->clean_list); 227 INIT_XLIST_HEAD(&pool->clean_list);
206 mutex_init(&pool->flush_lock); 228 mutex_init(&pool->flush_lock);
207 spin_lock_init(&pool->list_lock); 229 init_waitqueue_head(&pool->flush_wait);
208 INIT_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); 230 INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
209 231
210 pool->fmr_attr.max_pages = fmr_message_size; 232 pool->fmr_attr.max_pages = fmr_message_size;
211 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps; 233 pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
@@ -233,34 +255,60 @@ void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_co
233 255
234void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) 256void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
235{ 257{
236 flush_workqueue(rds_wq); 258 cancel_delayed_work_sync(&pool->flush_worker);
237 rds_ib_flush_mr_pool(pool, 1); 259 rds_ib_flush_mr_pool(pool, 1, NULL);
238 WARN_ON(atomic_read(&pool->item_count)); 260 WARN_ON(atomic_read(&pool->item_count));
239 WARN_ON(atomic_read(&pool->free_pinned)); 261 WARN_ON(atomic_read(&pool->free_pinned));
240 kfree(pool); 262 kfree(pool);
241} 263}
242 264
265static void refill_local(struct rds_ib_mr_pool *pool, struct xlist_head *xl,
266 struct rds_ib_mr **ibmr_ret)
267{
268 struct xlist_head *ibmr_xl;
269 ibmr_xl = xlist_del_head_fast(xl);
270 *ibmr_ret = list_entry(ibmr_xl, struct rds_ib_mr, xlist);
271}
272
243static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) 273static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool)
244{ 274{
245 struct rds_ib_mr *ibmr = NULL; 275 struct rds_ib_mr *ibmr = NULL;
246 unsigned long flags; 276 struct xlist_head *ret;
277 unsigned long *flag;
247 278
248 spin_lock_irqsave(&pool->list_lock, flags); 279 preempt_disable();
249 if (!list_empty(&pool->clean_list)) { 280 flag = &__get_cpu_var(clean_list_grace);
250 ibmr = list_entry(pool->clean_list.next, struct rds_ib_mr, list); 281 set_bit(CLEAN_LIST_BUSY_BIT, flag);
251 list_del_init(&ibmr->list); 282 ret = xlist_del_head(&pool->clean_list);
252 } 283 if (ret)
253 spin_unlock_irqrestore(&pool->list_lock, flags); 284 ibmr = list_entry(ret, struct rds_ib_mr, xlist);
254 285
286 clear_bit(CLEAN_LIST_BUSY_BIT, flag);
287 preempt_enable();
255 return ibmr; 288 return ibmr;
256} 289}
257 290
291static inline void wait_clean_list_grace(void)
292{
293 int cpu;
294 unsigned long *flag;
295
296 for_each_online_cpu(cpu) {
297 flag = &per_cpu(clean_list_grace, cpu);
298 while (test_bit(CLEAN_LIST_BUSY_BIT, flag))
299 cpu_relax();
300 }
301}
302
258static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) 303static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
259{ 304{
260 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 305 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
261 struct rds_ib_mr *ibmr = NULL; 306 struct rds_ib_mr *ibmr = NULL;
262 int err = 0, iter = 0; 307 int err = 0, iter = 0;
263 308
309 if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
310 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
311
264 while (1) { 312 while (1) {
265 ibmr = rds_ib_reuse_fmr(pool); 313 ibmr = rds_ib_reuse_fmr(pool);
266 if (ibmr) 314 if (ibmr)
@@ -287,19 +335,24 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
287 335
288 /* We do have some empty MRs. Flush them out. */ 336 /* We do have some empty MRs. Flush them out. */
289 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait); 337 rds_ib_stats_inc(s_ib_rdma_mr_pool_wait);
290 rds_ib_flush_mr_pool(pool, 0); 338 rds_ib_flush_mr_pool(pool, 0, &ibmr);
339 if (ibmr)
340 return ibmr;
291 } 341 }
292 342
293 ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL); 343 ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, rdsibdev_to_node(rds_ibdev));
294 if (!ibmr) { 344 if (!ibmr) {
295 err = -ENOMEM; 345 err = -ENOMEM;
296 goto out_no_cigar; 346 goto out_no_cigar;
297 } 347 }
298 348
349 memset(ibmr, 0, sizeof(*ibmr));
350
299 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, 351 ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd,
300 (IB_ACCESS_LOCAL_WRITE | 352 (IB_ACCESS_LOCAL_WRITE |
301 IB_ACCESS_REMOTE_READ | 353 IB_ACCESS_REMOTE_READ |
302 IB_ACCESS_REMOTE_WRITE), 354 IB_ACCESS_REMOTE_WRITE|
355 IB_ACCESS_REMOTE_ATOMIC),
303 &pool->fmr_attr); 356 &pool->fmr_attr);
304 if (IS_ERR(ibmr->fmr)) { 357 if (IS_ERR(ibmr->fmr)) {
305 err = PTR_ERR(ibmr->fmr); 358 err = PTR_ERR(ibmr->fmr);
@@ -367,7 +420,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm
367 if (page_cnt > fmr_message_size) 420 if (page_cnt > fmr_message_size)
368 return -EINVAL; 421 return -EINVAL;
369 422
370 dma_pages = kmalloc(sizeof(u64) * page_cnt, GFP_ATOMIC); 423 dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
424 rdsibdev_to_node(rds_ibdev));
371 if (!dma_pages) 425 if (!dma_pages)
372 return -ENOMEM; 426 return -ENOMEM;
373 427
@@ -441,7 +495,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
441 495
442 /* FIXME we need a way to tell a r/w MR 496 /* FIXME we need a way to tell a r/w MR
443 * from a r/o MR */ 497 * from a r/o MR */
444 BUG_ON(in_interrupt()); 498 BUG_ON(irqs_disabled());
445 set_page_dirty(page); 499 set_page_dirty(page);
446 put_page(page); 500 put_page(page);
447 } 501 }
@@ -477,33 +531,109 @@ static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int fr
477} 531}
478 532
479/* 533/*
534 * given an xlist of mrs, put them all into the list_head for more processing
535 */
536static void xlist_append_to_list(struct xlist_head *xlist, struct list_head *list)
537{
538 struct rds_ib_mr *ibmr;
539 struct xlist_head splice;
540 struct xlist_head *cur;
541 struct xlist_head *next;
542
543 splice.next = NULL;
544 xlist_splice(xlist, &splice);
545 cur = splice.next;
546 while (cur) {
547 next = cur->next;
548 ibmr = list_entry(cur, struct rds_ib_mr, xlist);
549 list_add_tail(&ibmr->unmap_list, list);
550 cur = next;
551 }
552}
553
554/*
555 * this takes a list head of mrs and turns it into an xlist of clusters.
556 * each cluster has an xlist of MR_CLUSTER_SIZE mrs that are ready for
557 * reuse.
558 */
559static void list_append_to_xlist(struct rds_ib_mr_pool *pool,
560 struct list_head *list, struct xlist_head *xlist,
561 struct xlist_head **tail_ret)
562{
563 struct rds_ib_mr *ibmr;
564 struct xlist_head *cur_mr = xlist;
565 struct xlist_head *tail_mr = NULL;
566
567 list_for_each_entry(ibmr, list, unmap_list) {
568 tail_mr = &ibmr->xlist;
569 tail_mr->next = NULL;
570 cur_mr->next = tail_mr;
571 cur_mr = tail_mr;
572 }
573 *tail_ret = tail_mr;
574}
575
576/*
480 * Flush our pool of MRs. 577 * Flush our pool of MRs.
481 * At a minimum, all currently unused MRs are unmapped. 578 * At a minimum, all currently unused MRs are unmapped.
482 * If the number of MRs allocated exceeds the limit, we also try 579 * If the number of MRs allocated exceeds the limit, we also try
483 * to free as many MRs as needed to get back to this limit. 580 * to free as many MRs as needed to get back to this limit.
484 */ 581 */
485static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all) 582static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
583 int free_all, struct rds_ib_mr **ibmr_ret)
486{ 584{
487 struct rds_ib_mr *ibmr, *next; 585 struct rds_ib_mr *ibmr, *next;
586 struct xlist_head clean_xlist;
587 struct xlist_head *clean_tail;
488 LIST_HEAD(unmap_list); 588 LIST_HEAD(unmap_list);
489 LIST_HEAD(fmr_list); 589 LIST_HEAD(fmr_list);
490 unsigned long unpinned = 0; 590 unsigned long unpinned = 0;
491 unsigned long flags;
492 unsigned int nfreed = 0, ncleaned = 0, free_goal; 591 unsigned int nfreed = 0, ncleaned = 0, free_goal;
493 int ret = 0; 592 int ret = 0;
494 593
495 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush); 594 rds_ib_stats_inc(s_ib_rdma_mr_pool_flush);
496 595
497 mutex_lock(&pool->flush_lock); 596 if (ibmr_ret) {
597 DEFINE_WAIT(wait);
598 while(!mutex_trylock(&pool->flush_lock)) {
599 ibmr = rds_ib_reuse_fmr(pool);
600 if (ibmr) {
601 *ibmr_ret = ibmr;
602 finish_wait(&pool->flush_wait, &wait);
603 goto out_nolock;
604 }
605
606 prepare_to_wait(&pool->flush_wait, &wait,
607 TASK_UNINTERRUPTIBLE);
608 if (xlist_empty(&pool->clean_list))
609 schedule();
610
611 ibmr = rds_ib_reuse_fmr(pool);
612 if (ibmr) {
613 *ibmr_ret = ibmr;
614 finish_wait(&pool->flush_wait, &wait);
615 goto out_nolock;
616 }
617 }
618 finish_wait(&pool->flush_wait, &wait);
619 } else
620 mutex_lock(&pool->flush_lock);
621
622 if (ibmr_ret) {
623 ibmr = rds_ib_reuse_fmr(pool);
624 if (ibmr) {
625 *ibmr_ret = ibmr;
626 goto out;
627 }
628 }
498 629
499 spin_lock_irqsave(&pool->list_lock, flags);
500 /* Get the list of all MRs to be dropped. Ordering matters - 630 /* Get the list of all MRs to be dropped. Ordering matters -
501 * we want to put drop_list ahead of free_list. */ 631 * we want to put drop_list ahead of free_list.
502 list_splice_init(&pool->free_list, &unmap_list); 632 */
503 list_splice_init(&pool->drop_list, &unmap_list); 633 xlist_append_to_list(&pool->drop_list, &unmap_list);
634 xlist_append_to_list(&pool->free_list, &unmap_list);
504 if (free_all) 635 if (free_all)
505 list_splice_init(&pool->clean_list, &unmap_list); 636 xlist_append_to_list(&pool->clean_list, &unmap_list);
506 spin_unlock_irqrestore(&pool->list_lock, flags);
507 637
508 free_goal = rds_ib_flush_goal(pool, free_all); 638 free_goal = rds_ib_flush_goal(pool, free_all);
509 639
@@ -511,19 +641,20 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
511 goto out; 641 goto out;
512 642
513 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ 643 /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
514 list_for_each_entry(ibmr, &unmap_list, list) 644 list_for_each_entry(ibmr, &unmap_list, unmap_list)
515 list_add(&ibmr->fmr->list, &fmr_list); 645 list_add(&ibmr->fmr->list, &fmr_list);
646
516 ret = ib_unmap_fmr(&fmr_list); 647 ret = ib_unmap_fmr(&fmr_list);
517 if (ret) 648 if (ret)
518 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret); 649 printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret);
519 650
520 /* Now we can destroy the DMA mapping and unpin any pages */ 651 /* Now we can destroy the DMA mapping and unpin any pages */
521 list_for_each_entry_safe(ibmr, next, &unmap_list, list) { 652 list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) {
522 unpinned += ibmr->sg_len; 653 unpinned += ibmr->sg_len;
523 __rds_ib_teardown_mr(ibmr); 654 __rds_ib_teardown_mr(ibmr);
524 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) { 655 if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) {
525 rds_ib_stats_inc(s_ib_rdma_mr_free); 656 rds_ib_stats_inc(s_ib_rdma_mr_free);
526 list_del(&ibmr->list); 657 list_del(&ibmr->unmap_list);
527 ib_dealloc_fmr(ibmr->fmr); 658 ib_dealloc_fmr(ibmr->fmr);
528 kfree(ibmr); 659 kfree(ibmr);
529 nfreed++; 660 nfreed++;
@@ -531,9 +662,27 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
531 ncleaned++; 662 ncleaned++;
532 } 663 }
533 664
534 spin_lock_irqsave(&pool->list_lock, flags); 665 if (!list_empty(&unmap_list)) {
535 list_splice(&unmap_list, &pool->clean_list); 666 /* we have to make sure that none of the things we're about
536 spin_unlock_irqrestore(&pool->list_lock, flags); 667 * to put on the clean list would race with other cpus trying
668 * to pull items off. The xlist would explode if we managed to
669 * remove something from the clean list and then add it back again
670 * while another CPU was spinning on that same item in xlist_del_head.
671 *
672 * This is pretty unlikely, but just in case wait for an xlist grace period
673 * here before adding anything back into the clean list.
674 */
675 wait_clean_list_grace();
676
677 list_append_to_xlist(pool, &unmap_list, &clean_xlist, &clean_tail);
678 if (ibmr_ret)
679 refill_local(pool, &clean_xlist, ibmr_ret);
680
681 /* refill_local may have emptied our list */
682 if (!xlist_empty(&clean_xlist))
683 xlist_add(clean_xlist.next, clean_tail, &pool->clean_list);
684
685 }
537 686
538 atomic_sub(unpinned, &pool->free_pinned); 687 atomic_sub(unpinned, &pool->free_pinned);
539 atomic_sub(ncleaned, &pool->dirty_count); 688 atomic_sub(ncleaned, &pool->dirty_count);
@@ -541,14 +690,35 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all)
541 690
542out: 691out:
543 mutex_unlock(&pool->flush_lock); 692 mutex_unlock(&pool->flush_lock);
693 if (waitqueue_active(&pool->flush_wait))
694 wake_up(&pool->flush_wait);
695out_nolock:
544 return ret; 696 return ret;
545} 697}
546 698
699int rds_ib_fmr_init(void)
700{
701 rds_ib_fmr_wq = create_workqueue("rds_fmr_flushd");
702 if (!rds_ib_fmr_wq)
703 return -ENOMEM;
704 return 0;
705}
706
707/*
708 * By the time this is called all the IB devices should have been torn down and
709 * had their pools freed. As each pool is freed its work struct is waited on,
710 * so the pool flushing work queue should be idle by the time we get here.
711 */
712void rds_ib_fmr_exit(void)
713{
714 destroy_workqueue(rds_ib_fmr_wq);
715}
716
547static void rds_ib_mr_pool_flush_worker(struct work_struct *work) 717static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
548{ 718{
549 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker); 719 struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
550 720
551 rds_ib_flush_mr_pool(pool, 0); 721 rds_ib_flush_mr_pool(pool, 0, NULL);
552} 722}
553 723
554void rds_ib_free_mr(void *trans_private, int invalidate) 724void rds_ib_free_mr(void *trans_private, int invalidate)
@@ -556,47 +726,49 @@ void rds_ib_free_mr(void *trans_private, int invalidate)
556 struct rds_ib_mr *ibmr = trans_private; 726 struct rds_ib_mr *ibmr = trans_private;
557 struct rds_ib_device *rds_ibdev = ibmr->device; 727 struct rds_ib_device *rds_ibdev = ibmr->device;
558 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 728 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
559 unsigned long flags;
560 729
561 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); 730 rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
562 731
563 /* Return it to the pool's free list */ 732 /* Return it to the pool's free list */
564 spin_lock_irqsave(&pool->list_lock, flags);
565 if (ibmr->remap_count >= pool->fmr_attr.max_maps) 733 if (ibmr->remap_count >= pool->fmr_attr.max_maps)
566 list_add(&ibmr->list, &pool->drop_list); 734 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->drop_list);
567 else 735 else
568 list_add(&ibmr->list, &pool->free_list); 736 xlist_add(&ibmr->xlist, &ibmr->xlist, &pool->free_list);
569 737
570 atomic_add(ibmr->sg_len, &pool->free_pinned); 738 atomic_add(ibmr->sg_len, &pool->free_pinned);
571 atomic_inc(&pool->dirty_count); 739 atomic_inc(&pool->dirty_count);
572 spin_unlock_irqrestore(&pool->list_lock, flags);
573 740
574 /* If we've pinned too many pages, request a flush */ 741 /* If we've pinned too many pages, request a flush */
575 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 742 if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
576 atomic_read(&pool->dirty_count) >= pool->max_items / 10) 743 atomic_read(&pool->dirty_count) >= pool->max_items / 10)
577 queue_work(rds_wq, &pool->flush_worker); 744 queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
578 745
579 if (invalidate) { 746 if (invalidate) {
580 if (likely(!in_interrupt())) { 747 if (likely(!in_interrupt())) {
581 rds_ib_flush_mr_pool(pool, 0); 748 rds_ib_flush_mr_pool(pool, 0, NULL);
582 } else { 749 } else {
583 /* We get here if the user created a MR marked 750 /* We get here if the user created a MR marked
584 * as use_once and invalidate at the same time. */ 751 * as use_once and invalidate at the same time. */
585 queue_work(rds_wq, &pool->flush_worker); 752 queue_delayed_work(rds_ib_fmr_wq,
753 &pool->flush_worker, 10);
586 } 754 }
587 } 755 }
756
757 rds_ib_dev_put(rds_ibdev);
588} 758}
589 759
590void rds_ib_flush_mrs(void) 760void rds_ib_flush_mrs(void)
591{ 761{
592 struct rds_ib_device *rds_ibdev; 762 struct rds_ib_device *rds_ibdev;
593 763
764 down_read(&rds_ib_devices_lock);
594 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 765 list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
595 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; 766 struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
596 767
597 if (pool) 768 if (pool)
598 rds_ib_flush_mr_pool(pool, 0); 769 rds_ib_flush_mr_pool(pool, 0, NULL);
599 } 770 }
771 up_read(&rds_ib_devices_lock);
600} 772}
601 773
602void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, 774void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
@@ -628,6 +800,7 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
628 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret); 800 printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret);
629 801
630 ibmr->device = rds_ibdev; 802 ibmr->device = rds_ibdev;
803 rds_ibdev = NULL;
631 804
632 out: 805 out:
633 if (ret) { 806 if (ret) {
@@ -635,5 +808,8 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
635 rds_ib_free_mr(ibmr, 0); 808 rds_ib_free_mr(ibmr, 0);
636 ibmr = ERR_PTR(ret); 809 ibmr = ERR_PTR(ret);
637 } 810 }
811 if (rds_ibdev)
812 rds_ib_dev_put(rds_ibdev);
638 return ibmr; 813 return ibmr;
639} 814}
815
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index c74e9904a6b2..e29e0ca32f74 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -43,42 +43,6 @@ static struct kmem_cache *rds_ib_incoming_slab;
43static struct kmem_cache *rds_ib_frag_slab; 43static struct kmem_cache *rds_ib_frag_slab;
44static atomic_t rds_ib_allocation = ATOMIC_INIT(0); 44static atomic_t rds_ib_allocation = ATOMIC_INIT(0);
45 45
46static void rds_ib_frag_drop_page(struct rds_page_frag *frag)
47{
48 rdsdebug("frag %p page %p\n", frag, frag->f_page);
49 __free_page(frag->f_page);
50 frag->f_page = NULL;
51}
52
53static void rds_ib_frag_free(struct rds_page_frag *frag)
54{
55 rdsdebug("frag %p page %p\n", frag, frag->f_page);
56 BUG_ON(frag->f_page != NULL);
57 kmem_cache_free(rds_ib_frag_slab, frag);
58}
59
60/*
61 * We map a page at a time. Its fragments are posted in order. This
62 * is called in fragment order as the fragments get send completion events.
63 * Only the last frag in the page performs the unmapping.
64 *
65 * It's OK for ring cleanup to call this in whatever order it likes because
66 * DMA is not in flight and so we can unmap while other ring entries still
67 * hold page references in their frags.
68 */
69static void rds_ib_recv_unmap_page(struct rds_ib_connection *ic,
70 struct rds_ib_recv_work *recv)
71{
72 struct rds_page_frag *frag = recv->r_frag;
73
74 rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page);
75 if (frag->f_mapped)
76 ib_dma_unmap_page(ic->i_cm_id->device,
77 frag->f_mapped,
78 RDS_FRAG_SIZE, DMA_FROM_DEVICE);
79 frag->f_mapped = 0;
80}
81
82void rds_ib_recv_init_ring(struct rds_ib_connection *ic) 46void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
83{ 47{
84 struct rds_ib_recv_work *recv; 48 struct rds_ib_recv_work *recv;
@@ -95,16 +59,161 @@ void rds_ib_recv_init_ring(struct rds_ib_connection *ic)
95 recv->r_wr.sg_list = recv->r_sge; 59 recv->r_wr.sg_list = recv->r_sge;
96 recv->r_wr.num_sge = RDS_IB_RECV_SGE; 60 recv->r_wr.num_sge = RDS_IB_RECV_SGE;
97 61
98 sge = rds_ib_data_sge(ic, recv->r_sge); 62 sge = &recv->r_sge[0];
63 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header));
64 sge->length = sizeof(struct rds_header);
65 sge->lkey = ic->i_mr->lkey;
66
67 sge = &recv->r_sge[1];
99 sge->addr = 0; 68 sge->addr = 0;
100 sge->length = RDS_FRAG_SIZE; 69 sge->length = RDS_FRAG_SIZE;
101 sge->lkey = ic->i_mr->lkey; 70 sge->lkey = ic->i_mr->lkey;
71 }
72}
102 73
103 sge = rds_ib_header_sge(ic, recv->r_sge); 74/*
104 sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); 75 * The entire 'from' list, including the from element itself, is put on
105 sge->length = sizeof(struct rds_header); 76 * to the tail of the 'to' list.
106 sge->lkey = ic->i_mr->lkey; 77 */
78static void list_splice_entire_tail(struct list_head *from,
79 struct list_head *to)
80{
81 struct list_head *from_last = from->prev;
82
83 list_splice_tail(from_last, to);
84 list_add_tail(from_last, to);
85}
86
87static void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache)
88{
89 struct list_head *tmp;
90
91 tmp = xchg(&cache->xfer, NULL);
92 if (tmp) {
93 if (cache->ready)
94 list_splice_entire_tail(tmp, cache->ready);
95 else
96 cache->ready = tmp;
97 }
98}
99
100static int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache)
101{
102 struct rds_ib_cache_head *head;
103 int cpu;
104
105 cache->percpu = alloc_percpu(struct rds_ib_cache_head);
106 if (!cache->percpu)
107 return -ENOMEM;
108
109 for_each_possible_cpu(cpu) {
110 head = per_cpu_ptr(cache->percpu, cpu);
111 head->first = NULL;
112 head->count = 0;
113 }
114 cache->xfer = NULL;
115 cache->ready = NULL;
116
117 return 0;
118}
119
120int rds_ib_recv_alloc_caches(struct rds_ib_connection *ic)
121{
122 int ret;
123
124 ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs);
125 if (!ret) {
126 ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags);
127 if (ret)
128 free_percpu(ic->i_cache_incs.percpu);
107 } 129 }
130
131 return ret;
132}
133
134static void rds_ib_cache_splice_all_lists(struct rds_ib_refill_cache *cache,
135 struct list_head *caller_list)
136{
137 struct rds_ib_cache_head *head;
138 int cpu;
139
140 for_each_possible_cpu(cpu) {
141 head = per_cpu_ptr(cache->percpu, cpu);
142 if (head->first) {
143 list_splice_entire_tail(head->first, caller_list);
144 head->first = NULL;
145 }
146 }
147
148 if (cache->ready) {
149 list_splice_entire_tail(cache->ready, caller_list);
150 cache->ready = NULL;
151 }
152}
153
154void rds_ib_recv_free_caches(struct rds_ib_connection *ic)
155{
156 struct rds_ib_incoming *inc;
157 struct rds_ib_incoming *inc_tmp;
158 struct rds_page_frag *frag;
159 struct rds_page_frag *frag_tmp;
160 LIST_HEAD(list);
161
162 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
163 rds_ib_cache_splice_all_lists(&ic->i_cache_incs, &list);
164 free_percpu(ic->i_cache_incs.percpu);
165
166 list_for_each_entry_safe(inc, inc_tmp, &list, ii_cache_entry) {
167 list_del(&inc->ii_cache_entry);
168 WARN_ON(!list_empty(&inc->ii_frags));
169 kmem_cache_free(rds_ib_incoming_slab, inc);
170 }
171
172 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
173 rds_ib_cache_splice_all_lists(&ic->i_cache_frags, &list);
174 free_percpu(ic->i_cache_frags.percpu);
175
176 list_for_each_entry_safe(frag, frag_tmp, &list, f_cache_entry) {
177 list_del(&frag->f_cache_entry);
178 WARN_ON(!list_empty(&frag->f_item));
179 kmem_cache_free(rds_ib_frag_slab, frag);
180 }
181}
182
183/* fwd decl */
184static void rds_ib_recv_cache_put(struct list_head *new_item,
185 struct rds_ib_refill_cache *cache);
186static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache);
187
188
189/* Recycle frag and attached recv buffer f_sg */
190static void rds_ib_frag_free(struct rds_ib_connection *ic,
191 struct rds_page_frag *frag)
192{
193 rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg));
194
195 rds_ib_recv_cache_put(&frag->f_cache_entry, &ic->i_cache_frags);
196}
197
198/* Recycle inc after freeing attached frags */
199void rds_ib_inc_free(struct rds_incoming *inc)
200{
201 struct rds_ib_incoming *ibinc;
202 struct rds_page_frag *frag;
203 struct rds_page_frag *pos;
204 struct rds_ib_connection *ic = inc->i_conn->c_transport_data;
205
206 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc);
207
208 /* Free attached frags */
209 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) {
210 list_del_init(&frag->f_item);
211 rds_ib_frag_free(ic, frag);
212 }
213 BUG_ON(!list_empty(&ibinc->ii_frags));
214
215 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
216 rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs);
108} 217}
109 218
110static void rds_ib_recv_clear_one(struct rds_ib_connection *ic, 219static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
@@ -115,10 +224,8 @@ static void rds_ib_recv_clear_one(struct rds_ib_connection *ic,
115 recv->r_ibinc = NULL; 224 recv->r_ibinc = NULL;
116 } 225 }
117 if (recv->r_frag) { 226 if (recv->r_frag) {
118 rds_ib_recv_unmap_page(ic, recv); 227 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
119 if (recv->r_frag->f_page) 228 rds_ib_frag_free(ic, recv->r_frag);
120 rds_ib_frag_drop_page(recv->r_frag);
121 rds_ib_frag_free(recv->r_frag);
122 recv->r_frag = NULL; 229 recv->r_frag = NULL;
123 } 230 }
124} 231}
@@ -129,84 +236,111 @@ void rds_ib_recv_clear_ring(struct rds_ib_connection *ic)
129 236
130 for (i = 0; i < ic->i_recv_ring.w_nr; i++) 237 for (i = 0; i < ic->i_recv_ring.w_nr; i++)
131 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]); 238 rds_ib_recv_clear_one(ic, &ic->i_recvs[i]);
132
133 if (ic->i_frag.f_page)
134 rds_ib_frag_drop_page(&ic->i_frag);
135} 239}
136 240
137static int rds_ib_recv_refill_one(struct rds_connection *conn, 241static struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *ic,
138 struct rds_ib_recv_work *recv, 242 gfp_t slab_mask)
139 gfp_t kptr_gfp, gfp_t page_gfp)
140{ 243{
141 struct rds_ib_connection *ic = conn->c_transport_data; 244 struct rds_ib_incoming *ibinc;
142 dma_addr_t dma_addr; 245 struct list_head *cache_item;
143 struct ib_sge *sge; 246 int avail_allocs;
144 int ret = -ENOMEM;
145 247
146 if (recv->r_ibinc == NULL) { 248 cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs);
147 if (!atomic_add_unless(&rds_ib_allocation, 1, rds_ib_sysctl_max_recv_allocation)) { 249 if (cache_item) {
250 ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry);
251 } else {
252 avail_allocs = atomic_add_unless(&rds_ib_allocation,
253 1, rds_ib_sysctl_max_recv_allocation);
254 if (!avail_allocs) {
148 rds_ib_stats_inc(s_ib_rx_alloc_limit); 255 rds_ib_stats_inc(s_ib_rx_alloc_limit);
149 goto out; 256 return NULL;
150 } 257 }
151 recv->r_ibinc = kmem_cache_alloc(rds_ib_incoming_slab, 258 ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask);
152 kptr_gfp); 259 if (!ibinc) {
153 if (recv->r_ibinc == NULL) {
154 atomic_dec(&rds_ib_allocation); 260 atomic_dec(&rds_ib_allocation);
155 goto out; 261 return NULL;
156 } 262 }
157 INIT_LIST_HEAD(&recv->r_ibinc->ii_frags);
158 rds_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr);
159 } 263 }
264 INIT_LIST_HEAD(&ibinc->ii_frags);
265 rds_inc_init(&ibinc->ii_inc, ic->conn, ic->conn->c_faddr);
160 266
161 if (recv->r_frag == NULL) { 267 return ibinc;
162 recv->r_frag = kmem_cache_alloc(rds_ib_frag_slab, kptr_gfp); 268}
163 if (recv->r_frag == NULL) 269
164 goto out; 270static struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic,
165 INIT_LIST_HEAD(&recv->r_frag->f_item); 271 gfp_t slab_mask, gfp_t page_mask)
166 recv->r_frag->f_page = NULL; 272{
273 struct rds_page_frag *frag;
274 struct list_head *cache_item;
275 int ret;
276
277 cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags);
278 if (cache_item) {
279 frag = container_of(cache_item, struct rds_page_frag, f_cache_entry);
280 } else {
281 frag = kmem_cache_alloc(rds_ib_frag_slab, slab_mask);
282 if (!frag)
283 return NULL;
284
285 sg_init_table(&frag->f_sg, 1);
286 ret = rds_page_remainder_alloc(&frag->f_sg,
287 RDS_FRAG_SIZE, page_mask);
288 if (ret) {
289 kmem_cache_free(rds_ib_frag_slab, frag);
290 return NULL;
291 }
167 } 292 }
168 293
169 if (ic->i_frag.f_page == NULL) { 294 INIT_LIST_HEAD(&frag->f_item);
170 ic->i_frag.f_page = alloc_page(page_gfp); 295
171 if (ic->i_frag.f_page == NULL) 296 return frag;
172 goto out; 297}
173 ic->i_frag.f_offset = 0; 298
299static int rds_ib_recv_refill_one(struct rds_connection *conn,
300 struct rds_ib_recv_work *recv, int prefill)
301{
302 struct rds_ib_connection *ic = conn->c_transport_data;
303 struct ib_sge *sge;
304 int ret = -ENOMEM;
305 gfp_t slab_mask = GFP_NOWAIT;
306 gfp_t page_mask = GFP_NOWAIT;
307
308 if (prefill) {
309 slab_mask = GFP_KERNEL;
310 page_mask = GFP_HIGHUSER;
174 } 311 }
175 312
176 dma_addr = ib_dma_map_page(ic->i_cm_id->device, 313 if (!ic->i_cache_incs.ready)
177 ic->i_frag.f_page, 314 rds_ib_cache_xfer_to_ready(&ic->i_cache_incs);
178 ic->i_frag.f_offset, 315 if (!ic->i_cache_frags.ready)
179 RDS_FRAG_SIZE, 316 rds_ib_cache_xfer_to_ready(&ic->i_cache_frags);
180 DMA_FROM_DEVICE);
181 if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr))
182 goto out;
183 317
184 /* 318 /*
185 * Once we get the RDS_PAGE_LAST_OFF frag then rds_ib_frag_unmap() 319 * ibinc was taken from recv if recv contained the start of a message.
186 * must be called on this recv. This happens as completions hit 320 * recvs that were continuations will still have this allocated.
187 * in order or on connection shutdown.
188 */ 321 */
189 recv->r_frag->f_page = ic->i_frag.f_page; 322 if (!recv->r_ibinc) {
190 recv->r_frag->f_offset = ic->i_frag.f_offset; 323 recv->r_ibinc = rds_ib_refill_one_inc(ic, slab_mask);
191 recv->r_frag->f_mapped = dma_addr; 324 if (!recv->r_ibinc)
325 goto out;
326 }
192 327
193 sge = rds_ib_data_sge(ic, recv->r_sge); 328 WARN_ON(recv->r_frag); /* leak! */
194 sge->addr = dma_addr; 329 recv->r_frag = rds_ib_refill_one_frag(ic, slab_mask, page_mask);
195 sge->length = RDS_FRAG_SIZE; 330 if (!recv->r_frag)
331 goto out;
332
333 ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg,
334 1, DMA_FROM_DEVICE);
335 WARN_ON(ret != 1);
196 336
197 sge = rds_ib_header_sge(ic, recv->r_sge); 337 sge = &recv->r_sge[0];
198 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); 338 sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header);
199 sge->length = sizeof(struct rds_header); 339 sge->length = sizeof(struct rds_header);
200 340
201 get_page(recv->r_frag->f_page); 341 sge = &recv->r_sge[1];
202 342 sge->addr = sg_dma_address(&recv->r_frag->f_sg);
203 if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) { 343 sge->length = sg_dma_len(&recv->r_frag->f_sg);
204 ic->i_frag.f_offset += RDS_FRAG_SIZE;
205 } else {
206 put_page(ic->i_frag.f_page);
207 ic->i_frag.f_page = NULL;
208 ic->i_frag.f_offset = 0;
209 }
210 344
211 ret = 0; 345 ret = 0;
212out: 346out:
@@ -216,13 +350,11 @@ out:
216/* 350/*
217 * This tries to allocate and post unused work requests after making sure that 351 * This tries to allocate and post unused work requests after making sure that
218 * they have all the allocations they need to queue received fragments into 352 * they have all the allocations they need to queue received fragments into
219 * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc 353 * sockets.
220 * pairs don't go unmatched.
221 * 354 *
222 * -1 is returned if posting fails due to temporary resource exhaustion. 355 * -1 is returned if posting fails due to temporary resource exhaustion.
223 */ 356 */
224int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 357void rds_ib_recv_refill(struct rds_connection *conn, int prefill)
225 gfp_t page_gfp, int prefill)
226{ 358{
227 struct rds_ib_connection *ic = conn->c_transport_data; 359 struct rds_ib_connection *ic = conn->c_transport_data;
228 struct rds_ib_recv_work *recv; 360 struct rds_ib_recv_work *recv;
@@ -236,28 +368,25 @@ int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
236 if (pos >= ic->i_recv_ring.w_nr) { 368 if (pos >= ic->i_recv_ring.w_nr) {
237 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n", 369 printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n",
238 pos); 370 pos);
239 ret = -EINVAL;
240 break; 371 break;
241 } 372 }
242 373
243 recv = &ic->i_recvs[pos]; 374 recv = &ic->i_recvs[pos];
244 ret = rds_ib_recv_refill_one(conn, recv, kptr_gfp, page_gfp); 375 ret = rds_ib_recv_refill_one(conn, recv, prefill);
245 if (ret) { 376 if (ret) {
246 ret = -1;
247 break; 377 break;
248 } 378 }
249 379
250 /* XXX when can this fail? */ 380 /* XXX when can this fail? */
251 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); 381 ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr);
252 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, 382 rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv,
253 recv->r_ibinc, recv->r_frag->f_page, 383 recv->r_ibinc, sg_page(&recv->r_frag->f_sg),
254 (long) recv->r_frag->f_mapped, ret); 384 (long) sg_dma_address(&recv->r_frag->f_sg), ret);
255 if (ret) { 385 if (ret) {
256 rds_ib_conn_error(conn, "recv post on " 386 rds_ib_conn_error(conn, "recv post on "
257 "%pI4 returned %d, disconnecting and " 387 "%pI4 returned %d, disconnecting and "
258 "reconnecting\n", &conn->c_faddr, 388 "reconnecting\n", &conn->c_faddr,
259 ret); 389 ret);
260 ret = -1;
261 break; 390 break;
262 } 391 }
263 392
@@ -270,37 +399,73 @@ int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
270 399
271 if (ret) 400 if (ret)
272 rds_ib_ring_unalloc(&ic->i_recv_ring, 1); 401 rds_ib_ring_unalloc(&ic->i_recv_ring, 1);
273 return ret;
274} 402}
275 403
276void rds_ib_inc_purge(struct rds_incoming *inc) 404/*
405 * We want to recycle several types of recv allocations, like incs and frags.
406 * To use this, the *_free() function passes in the ptr to a list_head within
407 * the recyclee, as well as the cache to put it on.
408 *
409 * First, we put the memory on a percpu list. When this reaches a certain size,
410 * We move it to an intermediate non-percpu list in a lockless manner, with some
411 * xchg/compxchg wizardry.
412 *
413 * N.B. Instead of a list_head as the anchor, we use a single pointer, which can
414 * be NULL and xchg'd. The list is actually empty when the pointer is NULL, and
415 * list_empty() will return true with one element is actually present.
416 */
417static void rds_ib_recv_cache_put(struct list_head *new_item,
418 struct rds_ib_refill_cache *cache)
277{ 419{
278 struct rds_ib_incoming *ibinc; 420 unsigned long flags;
279 struct rds_page_frag *frag; 421 struct rds_ib_cache_head *chp;
280 struct rds_page_frag *pos; 422 struct list_head *old;
281 423
282 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 424 local_irq_save(flags);
283 rdsdebug("purging ibinc %p inc %p\n", ibinc, inc);
284 425
285 list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) { 426 chp = per_cpu_ptr(cache->percpu, smp_processor_id());
286 list_del_init(&frag->f_item); 427 if (!chp->first)
287 rds_ib_frag_drop_page(frag); 428 INIT_LIST_HEAD(new_item);
288 rds_ib_frag_free(frag); 429 else /* put on front */
289 } 430 list_add_tail(new_item, chp->first);
431 chp->first = new_item;
432 chp->count++;
433
434 if (chp->count < RDS_IB_RECYCLE_BATCH_COUNT)
435 goto end;
436
437 /*
438 * Return our per-cpu first list to the cache's xfer by atomically
439 * grabbing the current xfer list, appending it to our per-cpu list,
440 * and then atomically returning that entire list back to the
441 * cache's xfer list as long as it's still empty.
442 */
443 do {
444 old = xchg(&cache->xfer, NULL);
445 if (old)
446 list_splice_entire_tail(old, chp->first);
447 old = cmpxchg(&cache->xfer, NULL, chp->first);
448 } while (old);
449
450 chp->first = NULL;
451 chp->count = 0;
452end:
453 local_irq_restore(flags);
290} 454}
291 455
292void rds_ib_inc_free(struct rds_incoming *inc) 456static struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache)
293{ 457{
294 struct rds_ib_incoming *ibinc; 458 struct list_head *head = cache->ready;
295 459
296 ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 460 if (head) {
461 if (!list_empty(head)) {
462 cache->ready = head->next;
463 list_del_init(head);
464 } else
465 cache->ready = NULL;
466 }
297 467
298 rds_ib_inc_purge(inc); 468 return head;
299 rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc);
300 BUG_ON(!list_empty(&ibinc->ii_frags));
301 kmem_cache_free(rds_ib_incoming_slab, ibinc);
302 atomic_dec(&rds_ib_allocation);
303 BUG_ON(atomic_read(&rds_ib_allocation) < 0);
304} 469}
305 470
306int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, 471int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
@@ -336,13 +501,13 @@ int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
336 to_copy = min_t(unsigned long, to_copy, len - copied); 501 to_copy = min_t(unsigned long, to_copy, len - copied);
337 502
338 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag " 503 rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag "
339 "[%p, %lu] + %lu\n", 504 "[%p, %u] + %lu\n",
340 to_copy, iov->iov_base, iov->iov_len, iov_off, 505 to_copy, iov->iov_base, iov->iov_len, iov_off,
341 frag->f_page, frag->f_offset, frag_off); 506 sg_page(&frag->f_sg), frag->f_sg.offset, frag_off);
342 507
343 /* XXX needs + offset for multiple recvs per page */ 508 /* XXX needs + offset for multiple recvs per page */
344 ret = rds_page_copy_to_user(frag->f_page, 509 ret = rds_page_copy_to_user(sg_page(&frag->f_sg),
345 frag->f_offset + frag_off, 510 frag->f_sg.offset + frag_off,
346 iov->iov_base + iov_off, 511 iov->iov_base + iov_off,
347 to_copy); 512 to_copy);
348 if (ret) { 513 if (ret) {
@@ -557,47 +722,6 @@ u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic)
557 return rds_ib_get_ack(ic); 722 return rds_ib_get_ack(ic);
558} 723}
559 724
560static struct rds_header *rds_ib_get_header(struct rds_connection *conn,
561 struct rds_ib_recv_work *recv,
562 u32 data_len)
563{
564 struct rds_ib_connection *ic = conn->c_transport_data;
565 void *hdr_buff = &ic->i_recv_hdrs[recv - ic->i_recvs];
566 void *addr;
567 u32 misplaced_hdr_bytes;
568
569 /*
570 * Support header at the front (RDS 3.1+) as well as header-at-end.
571 *
572 * Cases:
573 * 1) header all in header buff (great!)
574 * 2) header all in data page (copy all to header buff)
575 * 3) header split across hdr buf + data page
576 * (move bit in hdr buff to end before copying other bit from data page)
577 */
578 if (conn->c_version > RDS_PROTOCOL_3_0 || data_len == RDS_FRAG_SIZE)
579 return hdr_buff;
580
581 if (data_len <= (RDS_FRAG_SIZE - sizeof(struct rds_header))) {
582 addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0);
583 memcpy(hdr_buff,
584 addr + recv->r_frag->f_offset + data_len,
585 sizeof(struct rds_header));
586 kunmap_atomic(addr, KM_SOFTIRQ0);
587 return hdr_buff;
588 }
589
590 misplaced_hdr_bytes = (sizeof(struct rds_header) - (RDS_FRAG_SIZE - data_len));
591
592 memmove(hdr_buff + misplaced_hdr_bytes, hdr_buff, misplaced_hdr_bytes);
593
594 addr = kmap_atomic(recv->r_frag->f_page, KM_SOFTIRQ0);
595 memcpy(hdr_buff, addr + recv->r_frag->f_offset + data_len,
596 sizeof(struct rds_header) - misplaced_hdr_bytes);
597 kunmap_atomic(addr, KM_SOFTIRQ0);
598 return hdr_buff;
599}
600
601/* 725/*
602 * It's kind of lame that we're copying from the posted receive pages into 726 * It's kind of lame that we're copying from the posted receive pages into
603 * long-lived bitmaps. We could have posted the bitmaps and rdma written into 727 * long-lived bitmaps. We could have posted the bitmaps and rdma written into
@@ -639,7 +763,7 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
639 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); 763 to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off);
640 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ 764 BUG_ON(to_copy & 7); /* Must be 64bit aligned. */
641 765
642 addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0); 766 addr = kmap_atomic(sg_page(&frag->f_sg), KM_SOFTIRQ0);
643 767
644 src = addr + frag_off; 768 src = addr + frag_off;
645 dst = (void *)map->m_page_addrs[map_page] + map_off; 769 dst = (void *)map->m_page_addrs[map_page] + map_off;
@@ -710,7 +834,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
710 } 834 }
711 data_len -= sizeof(struct rds_header); 835 data_len -= sizeof(struct rds_header);
712 836
713 ihdr = rds_ib_get_header(conn, recv, data_len); 837 ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs];
714 838
715 /* Validate the checksum. */ 839 /* Validate the checksum. */
716 if (!rds_message_verify_checksum(ihdr)) { 840 if (!rds_message_verify_checksum(ihdr)) {
@@ -742,12 +866,12 @@ static void rds_ib_process_recv(struct rds_connection *conn,
742 * the inc is freed. We don't go that route, so we have to drop the 866 * the inc is freed. We don't go that route, so we have to drop the
743 * page ref ourselves. We can't just leave the page on the recv 867 * page ref ourselves. We can't just leave the page on the recv
744 * because that confuses the dma mapping of pages and each recv's use 868 * because that confuses the dma mapping of pages and each recv's use
745 * of a partial page. We can leave the frag, though, it will be 869 * of a partial page.
746 * reused.
747 * 870 *
748 * FIXME: Fold this into the code path below. 871 * FIXME: Fold this into the code path below.
749 */ 872 */
750 rds_ib_frag_drop_page(recv->r_frag); 873 rds_ib_frag_free(ic, recv->r_frag);
874 recv->r_frag = NULL;
751 return; 875 return;
752 } 876 }
753 877
@@ -757,7 +881,7 @@ static void rds_ib_process_recv(struct rds_connection *conn,
757 * into the inc and save the inc so we can hang upcoming fragments 881 * into the inc and save the inc so we can hang upcoming fragments
758 * off its list. 882 * off its list.
759 */ 883 */
760 if (ibinc == NULL) { 884 if (!ibinc) {
761 ibinc = recv->r_ibinc; 885 ibinc = recv->r_ibinc;
762 recv->r_ibinc = NULL; 886 recv->r_ibinc = NULL;
763 ic->i_ibinc = ibinc; 887 ic->i_ibinc = ibinc;
@@ -842,32 +966,38 @@ static inline void rds_poll_cq(struct rds_ib_connection *ic,
842 struct rds_ib_recv_work *recv; 966 struct rds_ib_recv_work *recv;
843 967
844 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) { 968 while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
845 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", 969 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
846 (unsigned long long)wc.wr_id, wc.status, wc.byte_len, 970 (unsigned long long)wc.wr_id, wc.status,
971 rds_ib_wc_status_str(wc.status), wc.byte_len,
847 be32_to_cpu(wc.ex.imm_data)); 972 be32_to_cpu(wc.ex.imm_data));
848 rds_ib_stats_inc(s_ib_rx_cq_event); 973 rds_ib_stats_inc(s_ib_rx_cq_event);
849 974
850 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)]; 975 recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
851 976
852 rds_ib_recv_unmap_page(ic, recv); 977 ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
853 978
854 /* 979 /*
855 * Also process recvs in connecting state because it is possible 980 * Also process recvs in connecting state because it is possible
856 * to get a recv completion _before_ the rdmacm ESTABLISHED 981 * to get a recv completion _before_ the rdmacm ESTABLISHED
857 * event is processed. 982 * event is processed.
858 */ 983 */
859 if (rds_conn_up(conn) || rds_conn_connecting(conn)) { 984 if (wc.status == IB_WC_SUCCESS) {
985 rds_ib_process_recv(conn, recv, wc.byte_len, state);
986 } else {
860 /* We expect errors as the qp is drained during shutdown */ 987 /* We expect errors as the qp is drained during shutdown */
861 if (wc.status == IB_WC_SUCCESS) { 988 if (rds_conn_up(conn) || rds_conn_connecting(conn))
862 rds_ib_process_recv(conn, recv, wc.byte_len, state); 989 rds_ib_conn_error(conn, "recv completion on %pI4 had "
863 } else { 990 "status %u (%s), disconnecting and "
864 rds_ib_conn_error(conn, "recv completion on " 991 "reconnecting\n", &conn->c_faddr,
865 "%pI4 had status %u, disconnecting and " 992 wc.status,
866 "reconnecting\n", &conn->c_faddr, 993 rds_ib_wc_status_str(wc.status));
867 wc.status);
868 }
869 } 994 }
870 995
996 /*
997 * It's very important that we only free this ring entry if we've truly
998 * freed the resources allocated to the entry. The refilling path can
999 * leak if we don't.
1000 */
871 rds_ib_ring_free(&ic->i_recv_ring, 1); 1001 rds_ib_ring_free(&ic->i_recv_ring, 1);
872 } 1002 }
873} 1003}
@@ -897,11 +1027,8 @@ void rds_ib_recv_tasklet_fn(unsigned long data)
897 if (rds_ib_ring_empty(&ic->i_recv_ring)) 1027 if (rds_ib_ring_empty(&ic->i_recv_ring))
898 rds_ib_stats_inc(s_ib_rx_ring_empty); 1028 rds_ib_stats_inc(s_ib_rx_ring_empty);
899 1029
900 /*
901 * If the ring is running low, then schedule the thread to refill.
902 */
903 if (rds_ib_ring_low(&ic->i_recv_ring)) 1030 if (rds_ib_ring_low(&ic->i_recv_ring))
904 queue_delayed_work(rds_wq, &conn->c_recv_w, 0); 1031 rds_ib_recv_refill(conn, 0);
905} 1032}
906 1033
907int rds_ib_recv(struct rds_connection *conn) 1034int rds_ib_recv(struct rds_connection *conn)
@@ -910,25 +1037,13 @@ int rds_ib_recv(struct rds_connection *conn)
910 int ret = 0; 1037 int ret = 0;
911 1038
912 rdsdebug("conn %p\n", conn); 1039 rdsdebug("conn %p\n", conn);
913
914 /*
915 * If we get a temporary posting failure in this context then
916 * we're really low and we want the caller to back off for a bit.
917 */
918 mutex_lock(&ic->i_recv_mutex);
919 if (rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0))
920 ret = -ENOMEM;
921 else
922 rds_ib_stats_inc(s_ib_rx_refill_from_thread);
923 mutex_unlock(&ic->i_recv_mutex);
924
925 if (rds_conn_up(conn)) 1040 if (rds_conn_up(conn))
926 rds_ib_attempt_ack(ic); 1041 rds_ib_attempt_ack(ic);
927 1042
928 return ret; 1043 return ret;
929} 1044}
930 1045
931int __init rds_ib_recv_init(void) 1046int rds_ib_recv_init(void)
932{ 1047{
933 struct sysinfo si; 1048 struct sysinfo si;
934 int ret = -ENOMEM; 1049 int ret = -ENOMEM;
@@ -939,14 +1054,14 @@ int __init rds_ib_recv_init(void)
939 1054
940 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming", 1055 rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming",
941 sizeof(struct rds_ib_incoming), 1056 sizeof(struct rds_ib_incoming),
942 0, 0, NULL); 1057 0, SLAB_HWCACHE_ALIGN, NULL);
943 if (rds_ib_incoming_slab == NULL) 1058 if (!rds_ib_incoming_slab)
944 goto out; 1059 goto out;
945 1060
946 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag", 1061 rds_ib_frag_slab = kmem_cache_create("rds_ib_frag",
947 sizeof(struct rds_page_frag), 1062 sizeof(struct rds_page_frag),
948 0, 0, NULL); 1063 0, SLAB_HWCACHE_ALIGN, NULL);
949 if (rds_ib_frag_slab == NULL) 1064 if (!rds_ib_frag_slab)
950 kmem_cache_destroy(rds_ib_incoming_slab); 1065 kmem_cache_destroy(rds_ib_incoming_slab);
951 else 1066 else
952 ret = 0; 1067 ret = 0;
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 17fa80803ab0..71f373c421bc 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -36,11 +36,49 @@
36#include <linux/dmapool.h> 36#include <linux/dmapool.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40#include "ib.h" 39#include "ib.h"
41 40
42static void rds_ib_send_rdma_complete(struct rds_message *rm, 41static char *rds_ib_wc_status_strings[] = {
43 int wc_status) 42#define RDS_IB_WC_STATUS_STR(foo) \
43 [IB_WC_##foo] = __stringify(IB_WC_##foo)
44 RDS_IB_WC_STATUS_STR(SUCCESS),
45 RDS_IB_WC_STATUS_STR(LOC_LEN_ERR),
46 RDS_IB_WC_STATUS_STR(LOC_QP_OP_ERR),
47 RDS_IB_WC_STATUS_STR(LOC_EEC_OP_ERR),
48 RDS_IB_WC_STATUS_STR(LOC_PROT_ERR),
49 RDS_IB_WC_STATUS_STR(WR_FLUSH_ERR),
50 RDS_IB_WC_STATUS_STR(MW_BIND_ERR),
51 RDS_IB_WC_STATUS_STR(BAD_RESP_ERR),
52 RDS_IB_WC_STATUS_STR(LOC_ACCESS_ERR),
53 RDS_IB_WC_STATUS_STR(REM_INV_REQ_ERR),
54 RDS_IB_WC_STATUS_STR(REM_ACCESS_ERR),
55 RDS_IB_WC_STATUS_STR(REM_OP_ERR),
56 RDS_IB_WC_STATUS_STR(RETRY_EXC_ERR),
57 RDS_IB_WC_STATUS_STR(RNR_RETRY_EXC_ERR),
58 RDS_IB_WC_STATUS_STR(LOC_RDD_VIOL_ERR),
59 RDS_IB_WC_STATUS_STR(REM_INV_RD_REQ_ERR),
60 RDS_IB_WC_STATUS_STR(REM_ABORT_ERR),
61 RDS_IB_WC_STATUS_STR(INV_EECN_ERR),
62 RDS_IB_WC_STATUS_STR(INV_EEC_STATE_ERR),
63 RDS_IB_WC_STATUS_STR(FATAL_ERR),
64 RDS_IB_WC_STATUS_STR(RESP_TIMEOUT_ERR),
65 RDS_IB_WC_STATUS_STR(GENERAL_ERR),
66#undef RDS_IB_WC_STATUS_STR
67};
68
69char *rds_ib_wc_status_str(enum ib_wc_status status)
70{
71 return rds_str_array(rds_ib_wc_status_strings,
72 ARRAY_SIZE(rds_ib_wc_status_strings), status);
73}
74
75/*
76 * Convert IB-specific error message to RDS error message and call core
77 * completion handler.
78 */
79static void rds_ib_send_complete(struct rds_message *rm,
80 int wc_status,
81 void (*complete)(struct rds_message *rm, int status))
44{ 82{
45 int notify_status; 83 int notify_status;
46 84
@@ -60,69 +98,125 @@ static void rds_ib_send_rdma_complete(struct rds_message *rm,
60 notify_status = RDS_RDMA_OTHER_ERROR; 98 notify_status = RDS_RDMA_OTHER_ERROR;
61 break; 99 break;
62 } 100 }
63 rds_rdma_send_complete(rm, notify_status); 101 complete(rm, notify_status);
102}
103
104static void rds_ib_send_unmap_data(struct rds_ib_connection *ic,
105 struct rm_data_op *op,
106 int wc_status)
107{
108 if (op->op_nents)
109 ib_dma_unmap_sg(ic->i_cm_id->device,
110 op->op_sg, op->op_nents,
111 DMA_TO_DEVICE);
64} 112}
65 113
66static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, 114static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic,
67 struct rds_rdma_op *op) 115 struct rm_rdma_op *op,
116 int wc_status)
68{ 117{
69 if (op->r_mapped) { 118 if (op->op_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device, 119 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents, 120 op->op_sg, op->op_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 121 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0; 122 op->op_mapped = 0;
74 } 123 }
124
125 /* If the user asked for a completion notification on this
126 * message, we can implement three different semantics:
127 * 1. Notify when we received the ACK on the RDS message
128 * that was queued with the RDMA. This provides reliable
129 * notification of RDMA status at the expense of a one-way
130 * packet delay.
131 * 2. Notify when the IB stack gives us the completion event for
132 * the RDMA operation.
133 * 3. Notify when the IB stack gives us the completion event for
134 * the accompanying RDS messages.
135 * Here, we implement approach #3. To implement approach #2,
136 * we would need to take an event for the rdma WR. To implement #1,
137 * don't call rds_rdma_send_complete at all, and fall back to the notify
138 * handling in the ACK processing code.
139 *
140 * Note: There's no need to explicitly sync any RDMA buffers using
141 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
142 * operation itself unmapped the RDMA buffers, which takes care
143 * of synching.
144 */
145 rds_ib_send_complete(container_of(op, struct rds_message, rdma),
146 wc_status, rds_rdma_send_complete);
147
148 if (op->op_write)
149 rds_stats_add(s_send_rdma_bytes, op->op_bytes);
150 else
151 rds_stats_add(s_recv_rdma_bytes, op->op_bytes);
75} 152}
76 153
77static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, 154static void rds_ib_send_unmap_atomic(struct rds_ib_connection *ic,
78 struct rds_ib_send_work *send, 155 struct rm_atomic_op *op,
79 int wc_status) 156 int wc_status)
80{ 157{
81 struct rds_message *rm = send->s_rm; 158 /* unmap atomic recvbuf */
82 159 if (op->op_mapped) {
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm); 160 ib_dma_unmap_sg(ic->i_cm_id->device, op->op_sg, 1,
84 161 DMA_FROM_DEVICE);
85 ib_dma_unmap_sg(ic->i_cm_id->device, 162 op->op_mapped = 0;
86 rm->m_sg, rm->m_nents, 163 }
87 DMA_TO_DEVICE);
88
89 if (rm->m_rdma_op != NULL) {
90 rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
91
92 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics:
94 * 1. Notify when we received the ACK on the RDS message
95 * that was queued with the RDMA. This provides reliable
96 * notification of RDMA status at the expense of a one-way
97 * packet delay.
98 * 2. Notify when the IB stack gives us the completion event for
99 * the RDMA operation.
100 * 3. Notify when the IB stack gives us the completion event for
101 * the accompanying RDS messages.
102 * Here, we implement approach #3. To implement approach #2,
103 * call rds_rdma_send_complete from the cq_handler. To implement #1,
104 * don't call rds_rdma_send_complete at all, and fall back to the notify
105 * handling in the ACK processing code.
106 *
107 * Note: There's no need to explicitly sync any RDMA buffers using
108 * ib_dma_sync_sg_for_cpu - the completion for the RDMA
109 * operation itself unmapped the RDMA buffers, which takes care
110 * of synching.
111 */
112 rds_ib_send_rdma_complete(rm, wc_status);
113 164
114 if (rm->m_rdma_op->r_write) 165 rds_ib_send_complete(container_of(op, struct rds_message, atomic),
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); 166 wc_status, rds_atomic_send_complete);
116 else 167
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); 168 if (op->op_type == RDS_ATOMIC_TYPE_CSWP)
169 rds_ib_stats_inc(s_ib_atomic_cswp);
170 else
171 rds_ib_stats_inc(s_ib_atomic_fadd);
172}
173
174/*
175 * Unmap the resources associated with a struct send_work.
176 *
177 * Returns the rm for no good reason other than it is unobtainable
178 * other than by switching on wr.opcode, currently, and the caller,
179 * the event handler, needs it.
180 */
181static struct rds_message *rds_ib_send_unmap_op(struct rds_ib_connection *ic,
182 struct rds_ib_send_work *send,
183 int wc_status)
184{
185 struct rds_message *rm = NULL;
186
187 /* In the error case, wc.opcode sometimes contains garbage */
188 switch (send->s_wr.opcode) {
189 case IB_WR_SEND:
190 if (send->s_op) {
191 rm = container_of(send->s_op, struct rds_message, data);
192 rds_ib_send_unmap_data(ic, send->s_op, wc_status);
193 }
194 break;
195 case IB_WR_RDMA_WRITE:
196 case IB_WR_RDMA_READ:
197 if (send->s_op) {
198 rm = container_of(send->s_op, struct rds_message, rdma);
199 rds_ib_send_unmap_rdma(ic, send->s_op, wc_status);
200 }
201 break;
202 case IB_WR_ATOMIC_FETCH_AND_ADD:
203 case IB_WR_ATOMIC_CMP_AND_SWP:
204 if (send->s_op) {
205 rm = container_of(send->s_op, struct rds_message, atomic);
206 rds_ib_send_unmap_atomic(ic, send->s_op, wc_status);
207 }
208 break;
209 default:
210 if (printk_ratelimit())
211 printk(KERN_NOTICE
212 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
213 __func__, send->s_wr.opcode);
214 break;
118 } 215 }
119 216
120 /* If anyone waited for this message to get flushed out, wake 217 send->s_wr.opcode = 0xdead;
121 * them up now */
122 rds_message_unmapped(rm);
123 218
124 rds_message_put(rm); 219 return rm;
125 send->s_rm = NULL;
126} 220}
127 221
128void rds_ib_send_init_ring(struct rds_ib_connection *ic) 222void rds_ib_send_init_ring(struct rds_ib_connection *ic)
@@ -133,23 +227,18 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
133 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { 227 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
134 struct ib_sge *sge; 228 struct ib_sge *sge;
135 229
136 send->s_rm = NULL;
137 send->s_op = NULL; 230 send->s_op = NULL;
138 231
139 send->s_wr.wr_id = i; 232 send->s_wr.wr_id = i;
140 send->s_wr.sg_list = send->s_sge; 233 send->s_wr.sg_list = send->s_sge;
141 send->s_wr.num_sge = 1;
142 send->s_wr.opcode = IB_WR_SEND;
143 send->s_wr.send_flags = 0;
144 send->s_wr.ex.imm_data = 0; 234 send->s_wr.ex.imm_data = 0;
145 235
146 sge = rds_ib_data_sge(ic, send->s_sge); 236 sge = &send->s_sge[0];
147 sge->lkey = ic->i_mr->lkey;
148
149 sge = rds_ib_header_sge(ic, send->s_sge);
150 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); 237 sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header));
151 sge->length = sizeof(struct rds_header); 238 sge->length = sizeof(struct rds_header);
152 sge->lkey = ic->i_mr->lkey; 239 sge->lkey = ic->i_mr->lkey;
240
241 send->s_sge[1].lkey = ic->i_mr->lkey;
153 } 242 }
154} 243}
155 244
@@ -159,16 +248,24 @@ void rds_ib_send_clear_ring(struct rds_ib_connection *ic)
159 u32 i; 248 u32 i;
160 249
161 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { 250 for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) {
162 if (send->s_wr.opcode == 0xdead) 251 if (send->s_op && send->s_wr.opcode != 0xdead)
163 continue; 252 rds_ib_send_unmap_op(ic, send, IB_WC_WR_FLUSH_ERR);
164 if (send->s_rm)
165 rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR);
166 if (send->s_op)
167 rds_ib_send_unmap_rdma(ic, send->s_op);
168 } 253 }
169} 254}
170 255
171/* 256/*
257 * The only fast path caller always has a non-zero nr, so we don't
258 * bother testing nr before performing the atomic sub.
259 */
260static void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr)
261{
262 if ((atomic_sub_return(nr, &ic->i_signaled_sends) == 0) &&
263 waitqueue_active(&rds_ib_ring_empty_wait))
264 wake_up(&rds_ib_ring_empty_wait);
265 BUG_ON(atomic_read(&ic->i_signaled_sends) < 0);
266}
267
268/*
172 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc 269 * The _oldest/_free ring operations here race cleanly with the alloc/unalloc
173 * operations performed in the send path. As the sender allocs and potentially 270 * operations performed in the send path. As the sender allocs and potentially
174 * unallocs the next free entry in the ring it doesn't alter which is 271 * unallocs the next free entry in the ring it doesn't alter which is
@@ -178,12 +275,14 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
178{ 275{
179 struct rds_connection *conn = context; 276 struct rds_connection *conn = context;
180 struct rds_ib_connection *ic = conn->c_transport_data; 277 struct rds_ib_connection *ic = conn->c_transport_data;
278 struct rds_message *rm = NULL;
181 struct ib_wc wc; 279 struct ib_wc wc;
182 struct rds_ib_send_work *send; 280 struct rds_ib_send_work *send;
183 u32 completed; 281 u32 completed;
184 u32 oldest; 282 u32 oldest;
185 u32 i = 0; 283 u32 i = 0;
186 int ret; 284 int ret;
285 int nr_sig = 0;
187 286
188 rdsdebug("cq %p conn %p\n", cq, conn); 287 rdsdebug("cq %p conn %p\n", cq, conn);
189 rds_ib_stats_inc(s_ib_tx_cq_call); 288 rds_ib_stats_inc(s_ib_tx_cq_call);
@@ -192,8 +291,9 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
192 rdsdebug("ib_req_notify_cq send failed: %d\n", ret); 291 rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
193 292
194 while (ib_poll_cq(cq, 1, &wc) > 0) { 293 while (ib_poll_cq(cq, 1, &wc) > 0) {
195 rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", 294 rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
196 (unsigned long long)wc.wr_id, wc.status, wc.byte_len, 295 (unsigned long long)wc.wr_id, wc.status,
296 rds_ib_wc_status_str(wc.status), wc.byte_len,
197 be32_to_cpu(wc.ex.imm_data)); 297 be32_to_cpu(wc.ex.imm_data));
198 rds_ib_stats_inc(s_ib_tx_cq_event); 298 rds_ib_stats_inc(s_ib_tx_cq_event);
199 299
@@ -210,51 +310,30 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
210 310
211 for (i = 0; i < completed; i++) { 311 for (i = 0; i < completed; i++) {
212 send = &ic->i_sends[oldest]; 312 send = &ic->i_sends[oldest];
313 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
314 nr_sig++;
213 315
214 /* In the error case, wc.opcode sometimes contains garbage */ 316 rm = rds_ib_send_unmap_op(ic, send, wc.status);
215 switch (send->s_wr.opcode) {
216 case IB_WR_SEND:
217 if (send->s_rm)
218 rds_ib_send_unmap_rm(ic, send, wc.status);
219 break;
220 case IB_WR_RDMA_WRITE:
221 case IB_WR_RDMA_READ:
222 /* Nothing to be done - the SG list will be unmapped
223 * when the SEND completes. */
224 break;
225 default:
226 if (printk_ratelimit())
227 printk(KERN_NOTICE
228 "RDS/IB: %s: unexpected opcode 0x%x in WR!\n",
229 __func__, send->s_wr.opcode);
230 break;
231 }
232 317
233 send->s_wr.opcode = 0xdead;
234 send->s_wr.num_sge = 1;
235 if (send->s_queued + HZ/2 < jiffies) 318 if (send->s_queued + HZ/2 < jiffies)
236 rds_ib_stats_inc(s_ib_tx_stalled); 319 rds_ib_stats_inc(s_ib_tx_stalled);
237 320
238 /* If a RDMA operation produced an error, signal this right 321 if (send->s_op) {
239 * away. If we don't, the subsequent SEND that goes with this 322 if (send->s_op == rm->m_final_op) {
240 * RDMA will be canceled with ERR_WFLUSH, and the application 323 /* If anyone waited for this message to get flushed out, wake
241 * never learn that the RDMA failed. */ 324 * them up now */
242 if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) { 325 rds_message_unmapped(rm);
243 struct rds_message *rm;
244
245 rm = rds_send_get_message(conn, send->s_op);
246 if (rm) {
247 if (rm->m_rdma_op)
248 rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
249 rds_ib_send_rdma_complete(rm, wc.status);
250 rds_message_put(rm);
251 } 326 }
327 rds_message_put(rm);
328 send->s_op = NULL;
252 } 329 }
253 330
254 oldest = (oldest + 1) % ic->i_send_ring.w_nr; 331 oldest = (oldest + 1) % ic->i_send_ring.w_nr;
255 } 332 }
256 333
257 rds_ib_ring_free(&ic->i_send_ring, completed); 334 rds_ib_ring_free(&ic->i_send_ring, completed);
335 rds_ib_sub_signaled(ic, nr_sig);
336 nr_sig = 0;
258 337
259 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) || 338 if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
260 test_bit(0, &conn->c_map_queued)) 339 test_bit(0, &conn->c_map_queued))
@@ -262,10 +341,10 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
262 341
263 /* We expect errors as the qp is drained during shutdown */ 342 /* We expect errors as the qp is drained during shutdown */
264 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) { 343 if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
265 rds_ib_conn_error(conn, 344 rds_ib_conn_error(conn, "send completion on %pI4 had status "
266 "send completion on %pI4 " 345 "%u (%s), disconnecting and reconnecting\n",
267 "had status %u, disconnecting and reconnecting\n", 346 &conn->c_faddr, wc.status,
268 &conn->c_faddr, wc.status); 347 rds_ib_wc_status_str(wc.status));
269 } 348 }
270 } 349 }
271} 350}
@@ -294,7 +373,7 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
294 * credits (see rds_ib_send_add_credits below). 373 * credits (see rds_ib_send_add_credits below).
295 * 374 *
296 * The RDS send code is essentially single-threaded; rds_send_xmit 375 * The RDS send code is essentially single-threaded; rds_send_xmit
297 * grabs c_send_lock to ensure exclusive access to the send ring. 376 * sets RDS_IN_XMIT to ensure exclusive access to the send ring.
298 * However, the ACK sending code is independent and can race with 377 * However, the ACK sending code is independent and can race with
299 * message SENDs. 378 * message SENDs.
300 * 379 *
@@ -413,40 +492,21 @@ void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted)
413 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 492 set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
414} 493}
415 494
416static inline void 495static inline int rds_ib_set_wr_signal_state(struct rds_ib_connection *ic,
417rds_ib_xmit_populate_wr(struct rds_ib_connection *ic, 496 struct rds_ib_send_work *send,
418 struct rds_ib_send_work *send, unsigned int pos, 497 bool notify)
419 unsigned long buffer, unsigned int length,
420 int send_flags)
421{ 498{
422 struct ib_sge *sge; 499 /*
423 500 * We want to delay signaling completions just enough to get
424 WARN_ON(pos != send - ic->i_sends); 501 * the batching benefits but not so much that we create dead time
425 502 * on the wire.
426 send->s_wr.send_flags = send_flags; 503 */
427 send->s_wr.opcode = IB_WR_SEND; 504 if (ic->i_unsignaled_wrs-- == 0 || notify) {
428 send->s_wr.num_sge = 2; 505 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
429 send->s_wr.next = NULL; 506 send->s_wr.send_flags |= IB_SEND_SIGNALED;
430 send->s_queued = jiffies; 507 return 1;
431 send->s_op = NULL;
432
433 if (length != 0) {
434 sge = rds_ib_data_sge(ic, send->s_sge);
435 sge->addr = buffer;
436 sge->length = length;
437 sge->lkey = ic->i_mr->lkey;
438
439 sge = rds_ib_header_sge(ic, send->s_sge);
440 } else {
441 /* We're sending a packet with no payload. There is only
442 * one SGE */
443 send->s_wr.num_sge = 1;
444 sge = &send->s_sge[0];
445 } 508 }
446 509 return 0;
447 sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header));
448 sge->length = sizeof(struct rds_header);
449 sge->lkey = ic->i_mr->lkey;
450} 510}
451 511
452/* 512/*
@@ -475,13 +535,14 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
475 u32 pos; 535 u32 pos;
476 u32 i; 536 u32 i;
477 u32 work_alloc; 537 u32 work_alloc;
478 u32 credit_alloc; 538 u32 credit_alloc = 0;
479 u32 posted; 539 u32 posted;
480 u32 adv_credits = 0; 540 u32 adv_credits = 0;
481 int send_flags = 0; 541 int send_flags = 0;
482 int sent; 542 int bytes_sent = 0;
483 int ret; 543 int ret;
484 int flow_controlled = 0; 544 int flow_controlled = 0;
545 int nr_sig = 0;
485 546
486 BUG_ON(off % RDS_FRAG_SIZE); 547 BUG_ON(off % RDS_FRAG_SIZE);
487 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); 548 BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
@@ -507,14 +568,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
507 goto out; 568 goto out;
508 } 569 }
509 570
510 credit_alloc = work_alloc;
511 if (ic->i_flowctl) { 571 if (ic->i_flowctl) {
512 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT); 572 credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0, RDS_MAX_ADV_CREDIT);
513 adv_credits += posted; 573 adv_credits += posted;
514 if (credit_alloc < work_alloc) { 574 if (credit_alloc < work_alloc) {
515 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc); 575 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc);
516 work_alloc = credit_alloc; 576 work_alloc = credit_alloc;
517 flow_controlled++; 577 flow_controlled = 1;
518 } 578 }
519 if (work_alloc == 0) { 579 if (work_alloc == 0) {
520 set_bit(RDS_LL_SEND_FULL, &conn->c_flags); 580 set_bit(RDS_LL_SEND_FULL, &conn->c_flags);
@@ -525,31 +585,25 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
525 } 585 }
526 586
527 /* map the message the first time we see it */ 587 /* map the message the first time we see it */
528 if (ic->i_rm == NULL) { 588 if (!ic->i_data_op) {
529 /* 589 if (rm->data.op_nents) {
530 printk(KERN_NOTICE "rds_ib_xmit prep msg dport=%u flags=0x%x len=%d\n", 590 rm->data.op_count = ib_dma_map_sg(dev,
531 be16_to_cpu(rm->m_inc.i_hdr.h_dport), 591 rm->data.op_sg,
532 rm->m_inc.i_hdr.h_flags, 592 rm->data.op_nents,
533 be32_to_cpu(rm->m_inc.i_hdr.h_len)); 593 DMA_TO_DEVICE);
534 */ 594 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
535 if (rm->m_nents) { 595 if (rm->data.op_count == 0) {
536 rm->m_count = ib_dma_map_sg(dev,
537 rm->m_sg, rm->m_nents, DMA_TO_DEVICE);
538 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count);
539 if (rm->m_count == 0) {
540 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); 596 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
541 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 597 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
542 ret = -ENOMEM; /* XXX ? */ 598 ret = -ENOMEM; /* XXX ? */
543 goto out; 599 goto out;
544 } 600 }
545 } else { 601 } else {
546 rm->m_count = 0; 602 rm->data.op_count = 0;
547 } 603 }
548 604
549 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
550 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes;
551 rds_message_addref(rm); 605 rds_message_addref(rm);
552 ic->i_rm = rm; 606 ic->i_data_op = &rm->data;
553 607
554 /* Finalize the header */ 608 /* Finalize the header */
555 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags)) 609 if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags))
@@ -559,10 +613,10 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
559 613
560 /* If it has a RDMA op, tell the peer we did it. This is 614 /* If it has a RDMA op, tell the peer we did it. This is
561 * used by the peer to release use-once RDMA MRs. */ 615 * used by the peer to release use-once RDMA MRs. */
562 if (rm->m_rdma_op) { 616 if (rm->rdma.op_active) {
563 struct rds_ext_header_rdma ext_hdr; 617 struct rds_ext_header_rdma ext_hdr;
564 618
565 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); 619 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
566 rds_message_add_extension(&rm->m_inc.i_hdr, 620 rds_message_add_extension(&rm->m_inc.i_hdr,
567 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); 621 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
568 } 622 }
@@ -582,99 +636,77 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
582 /* 636 /*
583 * Update adv_credits since we reset the ACK_REQUIRED bit. 637 * Update adv_credits since we reset the ACK_REQUIRED bit.
584 */ 638 */
585 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits); 639 if (ic->i_flowctl) {
586 adv_credits += posted; 640 rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
587 BUG_ON(adv_credits > 255); 641 adv_credits += posted;
642 BUG_ON(adv_credits > 255);
643 }
588 } 644 }
589 645
590 send = &ic->i_sends[pos];
591 first = send;
592 prev = NULL;
593 scat = &rm->m_sg[sg];
594 sent = 0;
595 i = 0;
596
597 /* Sometimes you want to put a fence between an RDMA 646 /* Sometimes you want to put a fence between an RDMA
598 * READ and the following SEND. 647 * READ and the following SEND.
599 * We could either do this all the time 648 * We could either do this all the time
600 * or when requested by the user. Right now, we let 649 * or when requested by the user. Right now, we let
601 * the application choose. 650 * the application choose.
602 */ 651 */
603 if (rm->m_rdma_op && rm->m_rdma_op->r_fence) 652 if (rm->rdma.op_active && rm->rdma.op_fence)
604 send_flags = IB_SEND_FENCE; 653 send_flags = IB_SEND_FENCE;
605 654
606 /* 655 /* Each frag gets a header. Msgs may be 0 bytes */
607 * We could be copying the header into the unused tail of the page. 656 send = &ic->i_sends[pos];
608 * That would need to be changed in the future when those pages might 657 first = send;
609 * be mapped userspace pages or page cache pages. So instead we always 658 prev = NULL;
610 * use a second sge and our long-lived ring of mapped headers. We send 659 scat = &ic->i_data_op->op_sg[sg];
611 * the header after the data so that the data payload can be aligned on 660 i = 0;
612 * the receiver. 661 do {
613 */ 662 unsigned int len = 0;
614 663
615 /* handle a 0-len message */ 664 /* Set up the header */
616 if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) { 665 send->s_wr.send_flags = send_flags;
617 rds_ib_xmit_populate_wr(ic, send, pos, 0, 0, send_flags); 666 send->s_wr.opcode = IB_WR_SEND;
618 goto add_header; 667 send->s_wr.num_sge = 1;
619 } 668 send->s_wr.next = NULL;
669 send->s_queued = jiffies;
670 send->s_op = NULL;
620 671
621 /* if there's data reference it with a chain of work reqs */ 672 send->s_sge[0].addr = ic->i_send_hdrs_dma
622 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { 673 + (pos * sizeof(struct rds_header));
623 unsigned int len; 674 send->s_sge[0].length = sizeof(struct rds_header);
624 675
625 send = &ic->i_sends[pos]; 676 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
626 677
627 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); 678 /* Set up the data, if present */
628 rds_ib_xmit_populate_wr(ic, send, pos, 679 if (i < work_alloc
629 ib_sg_dma_address(dev, scat) + off, len, 680 && scat != &rm->data.op_sg[rm->data.op_count]) {
630 send_flags); 681 len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off);
682 send->s_wr.num_sge = 2;
631 683
632 /* 684 send->s_sge[1].addr = ib_sg_dma_address(dev, scat) + off;
633 * We want to delay signaling completions just enough to get 685 send->s_sge[1].length = len;
634 * the batching benefits but not so much that we create dead time
635 * on the wire.
636 */
637 if (ic->i_unsignaled_wrs-- == 0) {
638 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
639 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
640 }
641 686
642 ic->i_unsignaled_bytes -= len; 687 bytes_sent += len;
643 if (ic->i_unsignaled_bytes <= 0) { 688 off += len;
644 ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes; 689 if (off == ib_sg_dma_len(dev, scat)) {
645 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 690 scat++;
691 off = 0;
692 }
646 } 693 }
647 694
695 rds_ib_set_wr_signal_state(ic, send, 0);
696
648 /* 697 /*
649 * Always signal the last one if we're stopping due to flow control. 698 * Always signal the last one if we're stopping due to flow control.
650 */ 699 */
651 if (flow_controlled && i == (work_alloc-1)) 700 if (ic->i_flowctl && flow_controlled && i == (work_alloc-1))
652 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 701 send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
653 702
703 if (send->s_wr.send_flags & IB_SEND_SIGNALED)
704 nr_sig++;
705
654 rdsdebug("send %p wr %p num_sge %u next %p\n", send, 706 rdsdebug("send %p wr %p num_sge %u next %p\n", send,
655 &send->s_wr, send->s_wr.num_sge, send->s_wr.next); 707 &send->s_wr, send->s_wr.num_sge, send->s_wr.next);
656 708
657 sent += len; 709 if (ic->i_flowctl && adv_credits) {
658 off += len;
659 if (off == ib_sg_dma_len(dev, scat)) {
660 scat++;
661 off = 0;
662 }
663
664add_header:
665 /* Tack on the header after the data. The header SGE should already
666 * have been set up to point to the right header buffer. */
667 memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header));
668
669 if (0) {
670 struct rds_header *hdr = &ic->i_send_hdrs[pos];
671
672 printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n",
673 be16_to_cpu(hdr->h_dport),
674 hdr->h_flags,
675 be32_to_cpu(hdr->h_len));
676 }
677 if (adv_credits) {
678 struct rds_header *hdr = &ic->i_send_hdrs[pos]; 710 struct rds_header *hdr = &ic->i_send_hdrs[pos];
679 711
680 /* add credit and redo the header checksum */ 712 /* add credit and redo the header checksum */
@@ -689,20 +721,25 @@ add_header:
689 prev = send; 721 prev = send;
690 722
691 pos = (pos + 1) % ic->i_send_ring.w_nr; 723 pos = (pos + 1) % ic->i_send_ring.w_nr;
692 } 724 send = &ic->i_sends[pos];
725 i++;
726
727 } while (i < work_alloc
728 && scat != &rm->data.op_sg[rm->data.op_count]);
693 729
694 /* Account the RDS header in the number of bytes we sent, but just once. 730 /* Account the RDS header in the number of bytes we sent, but just once.
695 * The caller has no concept of fragmentation. */ 731 * The caller has no concept of fragmentation. */
696 if (hdr_off == 0) 732 if (hdr_off == 0)
697 sent += sizeof(struct rds_header); 733 bytes_sent += sizeof(struct rds_header);
698 734
699 /* if we finished the message then send completion owns it */ 735 /* if we finished the message then send completion owns it */
700 if (scat == &rm->m_sg[rm->m_count]) { 736 if (scat == &rm->data.op_sg[rm->data.op_count]) {
701 prev->s_rm = ic->i_rm; 737 prev->s_op = ic->i_data_op;
702 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 738 prev->s_wr.send_flags |= IB_SEND_SOLICITED;
703 ic->i_rm = NULL; 739 ic->i_data_op = NULL;
704 } 740 }
705 741
742 /* Put back wrs & credits we didn't use */
706 if (i < work_alloc) { 743 if (i < work_alloc) {
707 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); 744 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
708 work_alloc = i; 745 work_alloc = i;
@@ -710,6 +747,9 @@ add_header:
710 if (ic->i_flowctl && i < credit_alloc) 747 if (ic->i_flowctl && i < credit_alloc)
711 rds_ib_send_add_credits(conn, credit_alloc - i); 748 rds_ib_send_add_credits(conn, credit_alloc - i);
712 749
750 if (nr_sig)
751 atomic_add(nr_sig, &ic->i_signaled_sends);
752
713 /* XXX need to worry about failed_wr and partial sends. */ 753 /* XXX need to worry about failed_wr and partial sends. */
714 failed_wr = &first->s_wr; 754 failed_wr = &first->s_wr;
715 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 755 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
@@ -720,32 +760,127 @@ add_header:
720 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 " 760 printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 "
721 "returned %d\n", &conn->c_faddr, ret); 761 "returned %d\n", &conn->c_faddr, ret);
722 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 762 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
723 if (prev->s_rm) { 763 rds_ib_sub_signaled(ic, nr_sig);
724 ic->i_rm = prev->s_rm; 764 if (prev->s_op) {
725 prev->s_rm = NULL; 765 ic->i_data_op = prev->s_op;
766 prev->s_op = NULL;
726 } 767 }
727 768
728 rds_ib_conn_error(ic->conn, "ib_post_send failed\n"); 769 rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
729 goto out; 770 goto out;
730 } 771 }
731 772
732 ret = sent; 773 ret = bytes_sent;
733out: 774out:
734 BUG_ON(adv_credits); 775 BUG_ON(adv_credits);
735 return ret; 776 return ret;
736} 777}
737 778
738int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) 779/*
780 * Issue atomic operation.
781 * A simplified version of the rdma case, we always map 1 SG, and
782 * only 8 bytes, for the return value from the atomic operation.
783 */
784int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op)
785{
786 struct rds_ib_connection *ic = conn->c_transport_data;
787 struct rds_ib_send_work *send = NULL;
788 struct ib_send_wr *failed_wr;
789 struct rds_ib_device *rds_ibdev;
790 u32 pos;
791 u32 work_alloc;
792 int ret;
793 int nr_sig = 0;
794
795 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client);
796
797 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, 1, &pos);
798 if (work_alloc != 1) {
799 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
800 rds_ib_stats_inc(s_ib_tx_ring_full);
801 ret = -ENOMEM;
802 goto out;
803 }
804
805 /* address of send request in ring */
806 send = &ic->i_sends[pos];
807 send->s_queued = jiffies;
808
809 if (op->op_type == RDS_ATOMIC_TYPE_CSWP) {
810 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_CMP_AND_SWP;
811 send->s_wr.wr.atomic.compare_add = op->op_m_cswp.compare;
812 send->s_wr.wr.atomic.swap = op->op_m_cswp.swap;
813 send->s_wr.wr.atomic.compare_add_mask = op->op_m_cswp.compare_mask;
814 send->s_wr.wr.atomic.swap_mask = op->op_m_cswp.swap_mask;
815 } else { /* FADD */
816 send->s_wr.opcode = IB_WR_MASKED_ATOMIC_FETCH_AND_ADD;
817 send->s_wr.wr.atomic.compare_add = op->op_m_fadd.add;
818 send->s_wr.wr.atomic.swap = 0;
819 send->s_wr.wr.atomic.compare_add_mask = op->op_m_fadd.nocarry_mask;
820 send->s_wr.wr.atomic.swap_mask = 0;
821 }
822 nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify);
823 send->s_wr.num_sge = 1;
824 send->s_wr.next = NULL;
825 send->s_wr.wr.atomic.remote_addr = op->op_remote_addr;
826 send->s_wr.wr.atomic.rkey = op->op_rkey;
827 send->s_op = op;
828 rds_message_addref(container_of(send->s_op, struct rds_message, atomic));
829
830 /* map 8 byte retval buffer to the device */
831 ret = ib_dma_map_sg(ic->i_cm_id->device, op->op_sg, 1, DMA_FROM_DEVICE);
832 rdsdebug("ic %p mapping atomic op %p. mapped %d pg\n", ic, op, ret);
833 if (ret != 1) {
834 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
835 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
836 ret = -ENOMEM; /* XXX ? */
837 goto out;
838 }
839
840 /* Convert our struct scatterlist to struct ib_sge */
841 send->s_sge[0].addr = ib_sg_dma_address(ic->i_cm_id->device, op->op_sg);
842 send->s_sge[0].length = ib_sg_dma_len(ic->i_cm_id->device, op->op_sg);
843 send->s_sge[0].lkey = ic->i_mr->lkey;
844
845 rdsdebug("rva %Lx rpa %Lx len %u\n", op->op_remote_addr,
846 send->s_sge[0].addr, send->s_sge[0].length);
847
848 if (nr_sig)
849 atomic_add(nr_sig, &ic->i_signaled_sends);
850
851 failed_wr = &send->s_wr;
852 ret = ib_post_send(ic->i_cm_id->qp, &send->s_wr, &failed_wr);
853 rdsdebug("ic %p send %p (wr %p) ret %d wr %p\n", ic,
854 send, &send->s_wr, ret, failed_wr);
855 BUG_ON(failed_wr != &send->s_wr);
856 if (ret) {
857 printk(KERN_WARNING "RDS/IB: atomic ib_post_send to %pI4 "
858 "returned %d\n", &conn->c_faddr, ret);
859 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
860 rds_ib_sub_signaled(ic, nr_sig);
861 goto out;
862 }
863
864 if (unlikely(failed_wr != &send->s_wr)) {
865 printk(KERN_WARNING "RDS/IB: atomic ib_post_send() rc=%d, but failed_wqe updated!\n", ret);
866 BUG_ON(failed_wr != &send->s_wr);
867 }
868
869out:
870 return ret;
871}
872
873int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
739{ 874{
740 struct rds_ib_connection *ic = conn->c_transport_data; 875 struct rds_ib_connection *ic = conn->c_transport_data;
741 struct rds_ib_send_work *send = NULL; 876 struct rds_ib_send_work *send = NULL;
742 struct rds_ib_send_work *first; 877 struct rds_ib_send_work *first;
743 struct rds_ib_send_work *prev; 878 struct rds_ib_send_work *prev;
744 struct ib_send_wr *failed_wr; 879 struct ib_send_wr *failed_wr;
745 struct rds_ib_device *rds_ibdev;
746 struct scatterlist *scat; 880 struct scatterlist *scat;
747 unsigned long len; 881 unsigned long len;
748 u64 remote_addr = op->r_remote_addr; 882 u64 remote_addr = op->op_remote_addr;
883 u32 max_sge = ic->rds_ibdev->max_sge;
749 u32 pos; 884 u32 pos;
750 u32 work_alloc; 885 u32 work_alloc;
751 u32 i; 886 u32 i;
@@ -753,29 +888,28 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
753 int sent; 888 int sent;
754 int ret; 889 int ret;
755 int num_sge; 890 int num_sge;
756 891 int nr_sig = 0;
757 rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); 892
758 893 /* map the op the first time we see it */
759 /* map the message the first time we see it */ 894 if (!op->op_mapped) {
760 if (!op->r_mapped) { 895 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
761 op->r_count = ib_dma_map_sg(ic->i_cm_id->device, 896 op->op_sg, op->op_nents, (op->op_write) ?
762 op->r_sg, op->r_nents, (op->r_write) ? 897 DMA_TO_DEVICE : DMA_FROM_DEVICE);
763 DMA_TO_DEVICE : DMA_FROM_DEVICE); 898 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
764 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); 899 if (op->op_count == 0) {
765 if (op->r_count == 0) {
766 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); 900 rds_ib_stats_inc(s_ib_tx_sg_mapping_failure);
767 ret = -ENOMEM; /* XXX ? */ 901 ret = -ENOMEM; /* XXX ? */
768 goto out; 902 goto out;
769 } 903 }
770 904
771 op->r_mapped = 1; 905 op->op_mapped = 1;
772 } 906 }
773 907
774 /* 908 /*
775 * Instead of knowing how to return a partial rdma read/write we insist that there 909 * Instead of knowing how to return a partial rdma read/write we insist that there
776 * be enough work requests to send the entire message. 910 * be enough work requests to send the entire message.
777 */ 911 */
778 i = ceil(op->r_count, rds_ibdev->max_sge); 912 i = ceil(op->op_count, max_sge);
779 913
780 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); 914 work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos);
781 if (work_alloc != i) { 915 if (work_alloc != i) {
@@ -788,30 +922,24 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
788 send = &ic->i_sends[pos]; 922 send = &ic->i_sends[pos];
789 first = send; 923 first = send;
790 prev = NULL; 924 prev = NULL;
791 scat = &op->r_sg[0]; 925 scat = &op->op_sg[0];
792 sent = 0; 926 sent = 0;
793 num_sge = op->r_count; 927 num_sge = op->op_count;
794 928
795 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { 929 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
796 send->s_wr.send_flags = 0; 930 send->s_wr.send_flags = 0;
797 send->s_queued = jiffies; 931 send->s_queued = jiffies;
798 /* 932 send->s_op = NULL;
799 * We want to delay signaling completions just enough to get 933
800 * the batching benefits but not so much that we create dead time on the wire. 934 nr_sig += rds_ib_set_wr_signal_state(ic, send, op->op_notify);
801 */
802 if (ic->i_unsignaled_wrs-- == 0) {
803 ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs;
804 send->s_wr.send_flags = IB_SEND_SIGNALED;
805 }
806 935
807 send->s_wr.opcode = op->r_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; 936 send->s_wr.opcode = op->op_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
808 send->s_wr.wr.rdma.remote_addr = remote_addr; 937 send->s_wr.wr.rdma.remote_addr = remote_addr;
809 send->s_wr.wr.rdma.rkey = op->r_key; 938 send->s_wr.wr.rdma.rkey = op->op_rkey;
810 send->s_op = op;
811 939
812 if (num_sge > rds_ibdev->max_sge) { 940 if (num_sge > max_sge) {
813 send->s_wr.num_sge = rds_ibdev->max_sge; 941 send->s_wr.num_sge = max_sge;
814 num_sge -= rds_ibdev->max_sge; 942 num_sge -= max_sge;
815 } else { 943 } else {
816 send->s_wr.num_sge = num_sge; 944 send->s_wr.num_sge = num_sge;
817 } 945 }
@@ -821,7 +949,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
821 if (prev) 949 if (prev)
822 prev->s_wr.next = &send->s_wr; 950 prev->s_wr.next = &send->s_wr;
823 951
824 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { 952 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
825 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 953 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
826 send->s_sge[j].addr = 954 send->s_sge[j].addr =
827 ib_sg_dma_address(ic->i_cm_id->device, scat); 955 ib_sg_dma_address(ic->i_cm_id->device, scat);
@@ -843,15 +971,20 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
843 send = ic->i_sends; 971 send = ic->i_sends;
844 } 972 }
845 973
846 /* if we finished the message then send completion owns it */ 974 /* give a reference to the last op */
847 if (scat == &op->r_sg[op->r_count]) 975 if (scat == &op->op_sg[op->op_count]) {
848 prev->s_wr.send_flags = IB_SEND_SIGNALED; 976 prev->s_op = op;
977 rds_message_addref(container_of(op, struct rds_message, rdma));
978 }
849 979
850 if (i < work_alloc) { 980 if (i < work_alloc) {
851 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); 981 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i);
852 work_alloc = i; 982 work_alloc = i;
853 } 983 }
854 984
985 if (nr_sig)
986 atomic_add(nr_sig, &ic->i_signaled_sends);
987
855 failed_wr = &first->s_wr; 988 failed_wr = &first->s_wr;
856 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); 989 ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr);
857 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, 990 rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic,
@@ -861,6 +994,7 @@ int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
861 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 " 994 printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 "
862 "returned %d\n", &conn->c_faddr, ret); 995 "returned %d\n", &conn->c_faddr, ret);
863 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); 996 rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc);
997 rds_ib_sub_signaled(ic, nr_sig);
864 goto out; 998 goto out;
865 } 999 }
866 1000
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index d2c904dd6fbc..2d5965d6e97c 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -67,6 +67,8 @@ static const char *const rds_ib_stat_names[] = {
67 "ib_rdma_mr_pool_flush", 67 "ib_rdma_mr_pool_flush",
68 "ib_rdma_mr_pool_wait", 68 "ib_rdma_mr_pool_wait",
69 "ib_rdma_mr_pool_depleted", 69 "ib_rdma_mr_pool_depleted",
70 "ib_atomic_cswp",
71 "ib_atomic_fadd",
70}; 72};
71 73
72unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter, 74unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter,
diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c
index 03f01cb4e0fe..1253b006efdb 100644
--- a/net/rds/ib_sysctl.c
+++ b/net/rds/ib_sysctl.c
@@ -49,10 +49,6 @@ unsigned long rds_ib_sysctl_max_unsig_wrs = 16;
49static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1; 49static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1;
50static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64; 50static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64;
51 51
52unsigned long rds_ib_sysctl_max_unsig_bytes = (16 << 20);
53static unsigned long rds_ib_sysctl_max_unsig_bytes_min = 1;
54static unsigned long rds_ib_sysctl_max_unsig_bytes_max = ~0UL;
55
56/* 52/*
57 * This sysctl does nothing. 53 * This sysctl does nothing.
58 * 54 *
@@ -65,7 +61,7 @@ static unsigned long rds_ib_sysctl_max_unsig_bytes_max = ~0UL;
65 */ 61 */
66unsigned int rds_ib_sysctl_flow_control = 0; 62unsigned int rds_ib_sysctl_flow_control = 0;
67 63
68ctl_table rds_ib_sysctl_table[] = { 64static ctl_table rds_ib_sysctl_table[] = {
69 { 65 {
70 .procname = "max_send_wr", 66 .procname = "max_send_wr",
71 .data = &rds_ib_sysctl_max_send_wr, 67 .data = &rds_ib_sysctl_max_send_wr,
@@ -94,15 +90,6 @@ ctl_table rds_ib_sysctl_table[] = {
94 .extra2 = &rds_ib_sysctl_max_unsig_wr_max, 90 .extra2 = &rds_ib_sysctl_max_unsig_wr_max,
95 }, 91 },
96 { 92 {
97 .procname = "max_unsignaled_bytes",
98 .data = &rds_ib_sysctl_max_unsig_bytes,
99 .maxlen = sizeof(unsigned long),
100 .mode = 0644,
101 .proc_handler = proc_doulongvec_minmax,
102 .extra1 = &rds_ib_sysctl_max_unsig_bytes_min,
103 .extra2 = &rds_ib_sysctl_max_unsig_bytes_max,
104 },
105 {
106 .procname = "max_recv_allocation", 93 .procname = "max_recv_allocation",
107 .data = &rds_ib_sysctl_max_recv_allocation, 94 .data = &rds_ib_sysctl_max_recv_allocation,
108 .maxlen = sizeof(unsigned long), 95 .maxlen = sizeof(unsigned long),
@@ -132,10 +119,10 @@ void rds_ib_sysctl_exit(void)
132 unregister_sysctl_table(rds_ib_sysctl_hdr); 119 unregister_sysctl_table(rds_ib_sysctl_hdr);
133} 120}
134 121
135int __init rds_ib_sysctl_init(void) 122int rds_ib_sysctl_init(void)
136{ 123{
137 rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table); 124 rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table);
138 if (rds_ib_sysctl_hdr == NULL) 125 if (!rds_ib_sysctl_hdr)
139 return -ENOMEM; 126 return -ENOMEM;
140 return 0; 127 return 0;
141} 128}
diff --git a/net/rds/info.c b/net/rds/info.c
index c45c4173a44d..4fdf1b6e84ff 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -76,7 +76,7 @@ void rds_info_register_func(int optname, rds_info_func func)
76 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); 76 BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST);
77 77
78 spin_lock(&rds_info_lock); 78 spin_lock(&rds_info_lock);
79 BUG_ON(rds_info_funcs[offset] != NULL); 79 BUG_ON(rds_info_funcs[offset]);
80 rds_info_funcs[offset] = func; 80 rds_info_funcs[offset] = func;
81 spin_unlock(&rds_info_lock); 81 spin_unlock(&rds_info_lock);
82} 82}
@@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(rds_info_deregister_func);
102 */ 102 */
103void rds_info_iter_unmap(struct rds_info_iterator *iter) 103void rds_info_iter_unmap(struct rds_info_iterator *iter)
104{ 104{
105 if (iter->addr != NULL) { 105 if (iter->addr) {
106 kunmap_atomic(iter->addr, KM_USER0); 106 kunmap_atomic(iter->addr, KM_USER0);
107 iter->addr = NULL; 107 iter->addr = NULL;
108 } 108 }
@@ -117,7 +117,7 @@ void rds_info_copy(struct rds_info_iterator *iter, void *data,
117 unsigned long this; 117 unsigned long this;
118 118
119 while (bytes) { 119 while (bytes) {
120 if (iter->addr == NULL) 120 if (!iter->addr)
121 iter->addr = kmap_atomic(*iter->pages, KM_USER0); 121 iter->addr = kmap_atomic(*iter->pages, KM_USER0);
122 122
123 this = min(bytes, PAGE_SIZE - iter->offset); 123 this = min(bytes, PAGE_SIZE - iter->offset);
@@ -188,7 +188,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
188 >> PAGE_SHIFT; 188 >> PAGE_SHIFT;
189 189
190 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); 190 pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL);
191 if (pages == NULL) { 191 if (!pages) {
192 ret = -ENOMEM; 192 ret = -ENOMEM;
193 goto out; 193 goto out;
194 } 194 }
@@ -206,7 +206,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
206 206
207call_func: 207call_func:
208 func = rds_info_funcs[optname - RDS_INFO_FIRST]; 208 func = rds_info_funcs[optname - RDS_INFO_FIRST];
209 if (func == NULL) { 209 if (!func) {
210 ret = -ENOPROTOOPT; 210 ret = -ENOPROTOOPT;
211 goto out; 211 goto out;
212 } 212 }
@@ -234,7 +234,7 @@ call_func:
234 ret = -EFAULT; 234 ret = -EFAULT;
235 235
236out: 236out:
237 for (i = 0; pages != NULL && i < nr_pages; i++) 237 for (i = 0; pages && i < nr_pages; i++)
238 put_page(pages[i]); 238 put_page(pages[i]);
239 kfree(pages); 239 kfree(pages);
240 240
diff --git a/net/rds/iw.c b/net/rds/iw.c
index c8f3d3525cb9..5a9676fe594f 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -56,7 +56,7 @@ struct list_head rds_iw_devices;
56DEFINE_SPINLOCK(iw_nodev_conns_lock); 56DEFINE_SPINLOCK(iw_nodev_conns_lock);
57LIST_HEAD(iw_nodev_conns); 57LIST_HEAD(iw_nodev_conns);
58 58
59void rds_iw_add_one(struct ib_device *device) 59static void rds_iw_add_one(struct ib_device *device)
60{ 60{
61 struct rds_iw_device *rds_iwdev; 61 struct rds_iw_device *rds_iwdev;
62 struct ib_device_attr *dev_attr; 62 struct ib_device_attr *dev_attr;
@@ -124,7 +124,7 @@ free_attr:
124 kfree(dev_attr); 124 kfree(dev_attr);
125} 125}
126 126
127void rds_iw_remove_one(struct ib_device *device) 127static void rds_iw_remove_one(struct ib_device *device)
128{ 128{
129 struct rds_iw_device *rds_iwdev; 129 struct rds_iw_device *rds_iwdev;
130 struct rds_iw_cm_id *i_cm_id, *next; 130 struct rds_iw_cm_id *i_cm_id, *next;
@@ -264,7 +264,6 @@ struct rds_transport rds_iw_transport = {
264 .laddr_check = rds_iw_laddr_check, 264 .laddr_check = rds_iw_laddr_check,
265 .xmit_complete = rds_iw_xmit_complete, 265 .xmit_complete = rds_iw_xmit_complete,
266 .xmit = rds_iw_xmit, 266 .xmit = rds_iw_xmit,
267 .xmit_cong_map = NULL,
268 .xmit_rdma = rds_iw_xmit_rdma, 267 .xmit_rdma = rds_iw_xmit_rdma,
269 .recv = rds_iw_recv, 268 .recv = rds_iw_recv,
270 .conn_alloc = rds_iw_conn_alloc, 269 .conn_alloc = rds_iw_conn_alloc,
@@ -272,7 +271,6 @@ struct rds_transport rds_iw_transport = {
272 .conn_connect = rds_iw_conn_connect, 271 .conn_connect = rds_iw_conn_connect,
273 .conn_shutdown = rds_iw_conn_shutdown, 272 .conn_shutdown = rds_iw_conn_shutdown,
274 .inc_copy_to_user = rds_iw_inc_copy_to_user, 273 .inc_copy_to_user = rds_iw_inc_copy_to_user,
275 .inc_purge = rds_iw_inc_purge,
276 .inc_free = rds_iw_inc_free, 274 .inc_free = rds_iw_inc_free,
277 .cm_initiate_connect = rds_iw_cm_initiate_connect, 275 .cm_initiate_connect = rds_iw_cm_initiate_connect,
278 .cm_handle_connect = rds_iw_cm_handle_connect, 276 .cm_handle_connect = rds_iw_cm_handle_connect,
@@ -289,7 +287,7 @@ struct rds_transport rds_iw_transport = {
289 .t_prefer_loopback = 1, 287 .t_prefer_loopback = 1,
290}; 288};
291 289
292int __init rds_iw_init(void) 290int rds_iw_init(void)
293{ 291{
294 int ret; 292 int ret;
295 293
diff --git a/net/rds/iw.h b/net/rds/iw.h
index eef2f0c28476..90151922178c 100644
--- a/net/rds/iw.h
+++ b/net/rds/iw.h
@@ -70,7 +70,7 @@ struct rds_iw_send_work {
70 struct rds_message *s_rm; 70 struct rds_message *s_rm;
71 71
72 /* We should really put these into a union: */ 72 /* We should really put these into a union: */
73 struct rds_rdma_op *s_op; 73 struct rm_rdma_op *s_op;
74 struct rds_iw_mapping *s_mapping; 74 struct rds_iw_mapping *s_mapping;
75 struct ib_mr *s_mr; 75 struct ib_mr *s_mr;
76 struct ib_fast_reg_page_list *s_page_list; 76 struct ib_fast_reg_page_list *s_page_list;
@@ -268,8 +268,6 @@ static inline u32 rds_iw_local_dma_lkey(struct rds_iw_connection *ic)
268 268
269/* ib.c */ 269/* ib.c */
270extern struct rds_transport rds_iw_transport; 270extern struct rds_transport rds_iw_transport;
271extern void rds_iw_add_one(struct ib_device *device);
272extern void rds_iw_remove_one(struct ib_device *device);
273extern struct ib_client rds_iw_client; 271extern struct ib_client rds_iw_client;
274 272
275extern unsigned int fastreg_pool_size; 273extern unsigned int fastreg_pool_size;
@@ -284,7 +282,7 @@ void rds_iw_conn_free(void *arg);
284int rds_iw_conn_connect(struct rds_connection *conn); 282int rds_iw_conn_connect(struct rds_connection *conn);
285void rds_iw_conn_shutdown(struct rds_connection *conn); 283void rds_iw_conn_shutdown(struct rds_connection *conn);
286void rds_iw_state_change(struct sock *sk); 284void rds_iw_state_change(struct sock *sk);
287int __init rds_iw_listen_init(void); 285int rds_iw_listen_init(void);
288void rds_iw_listen_stop(void); 286void rds_iw_listen_stop(void);
289void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...); 287void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...);
290int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, 288int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
@@ -318,15 +316,13 @@ void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents,
318void rds_iw_sync_mr(void *trans_private, int dir); 316void rds_iw_sync_mr(void *trans_private, int dir);
319void rds_iw_free_mr(void *trans_private, int invalidate); 317void rds_iw_free_mr(void *trans_private, int invalidate);
320void rds_iw_flush_mrs(void); 318void rds_iw_flush_mrs(void);
321void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id);
322 319
323/* ib_recv.c */ 320/* ib_recv.c */
324int __init rds_iw_recv_init(void); 321int rds_iw_recv_init(void);
325void rds_iw_recv_exit(void); 322void rds_iw_recv_exit(void);
326int rds_iw_recv(struct rds_connection *conn); 323int rds_iw_recv(struct rds_connection *conn);
327int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, 324int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
328 gfp_t page_gfp, int prefill); 325 gfp_t page_gfp, int prefill);
329void rds_iw_inc_purge(struct rds_incoming *inc);
330void rds_iw_inc_free(struct rds_incoming *inc); 326void rds_iw_inc_free(struct rds_incoming *inc);
331int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 327int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
332 size_t size); 328 size_t size);
@@ -358,7 +354,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
358void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context); 354void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context);
359void rds_iw_send_init_ring(struct rds_iw_connection *ic); 355void rds_iw_send_init_ring(struct rds_iw_connection *ic);
360void rds_iw_send_clear_ring(struct rds_iw_connection *ic); 356void rds_iw_send_clear_ring(struct rds_iw_connection *ic);
361int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); 357int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
362void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits); 358void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits);
363void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted); 359void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted);
364int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted, 360int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted,
@@ -371,7 +367,7 @@ unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter,
371 unsigned int avail); 367 unsigned int avail);
372 368
373/* ib_sysctl.c */ 369/* ib_sysctl.c */
374int __init rds_iw_sysctl_init(void); 370int rds_iw_sysctl_init(void);
375void rds_iw_sysctl_exit(void); 371void rds_iw_sysctl_exit(void);
376extern unsigned long rds_iw_sysctl_max_send_wr; 372extern unsigned long rds_iw_sysctl_max_send_wr;
377extern unsigned long rds_iw_sysctl_max_recv_wr; 373extern unsigned long rds_iw_sysctl_max_recv_wr;
@@ -379,7 +375,6 @@ extern unsigned long rds_iw_sysctl_max_unsig_wrs;
379extern unsigned long rds_iw_sysctl_max_unsig_bytes; 375extern unsigned long rds_iw_sysctl_max_unsig_bytes;
380extern unsigned long rds_iw_sysctl_max_recv_allocation; 376extern unsigned long rds_iw_sysctl_max_recv_allocation;
381extern unsigned int rds_iw_sysctl_flow_control; 377extern unsigned int rds_iw_sysctl_flow_control;
382extern ctl_table rds_iw_sysctl_table[];
383 378
384/* 379/*
385 * Helper functions for getting/setting the header and data SGEs in 380 * Helper functions for getting/setting the header and data SGEs in
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index b5dd6ac39be8..712cf2d1f28e 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -257,7 +257,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
257 * the rds_iwdev at all. 257 * the rds_iwdev at all.
258 */ 258 */
259 rds_iwdev = ib_get_client_data(dev, &rds_iw_client); 259 rds_iwdev = ib_get_client_data(dev, &rds_iw_client);
260 if (rds_iwdev == NULL) { 260 if (!rds_iwdev) {
261 if (printk_ratelimit()) 261 if (printk_ratelimit())
262 printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n", 262 printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n",
263 dev->name); 263 dev->name);
@@ -292,7 +292,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
292 ic->i_send_ring.w_nr * 292 ic->i_send_ring.w_nr *
293 sizeof(struct rds_header), 293 sizeof(struct rds_header),
294 &ic->i_send_hdrs_dma, GFP_KERNEL); 294 &ic->i_send_hdrs_dma, GFP_KERNEL);
295 if (ic->i_send_hdrs == NULL) { 295 if (!ic->i_send_hdrs) {
296 ret = -ENOMEM; 296 ret = -ENOMEM;
297 rdsdebug("ib_dma_alloc_coherent send failed\n"); 297 rdsdebug("ib_dma_alloc_coherent send failed\n");
298 goto out; 298 goto out;
@@ -302,7 +302,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
302 ic->i_recv_ring.w_nr * 302 ic->i_recv_ring.w_nr *
303 sizeof(struct rds_header), 303 sizeof(struct rds_header),
304 &ic->i_recv_hdrs_dma, GFP_KERNEL); 304 &ic->i_recv_hdrs_dma, GFP_KERNEL);
305 if (ic->i_recv_hdrs == NULL) { 305 if (!ic->i_recv_hdrs) {
306 ret = -ENOMEM; 306 ret = -ENOMEM;
307 rdsdebug("ib_dma_alloc_coherent recv failed\n"); 307 rdsdebug("ib_dma_alloc_coherent recv failed\n");
308 goto out; 308 goto out;
@@ -310,14 +310,14 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
310 310
311 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), 311 ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header),
312 &ic->i_ack_dma, GFP_KERNEL); 312 &ic->i_ack_dma, GFP_KERNEL);
313 if (ic->i_ack == NULL) { 313 if (!ic->i_ack) {
314 ret = -ENOMEM; 314 ret = -ENOMEM;
315 rdsdebug("ib_dma_alloc_coherent ack failed\n"); 315 rdsdebug("ib_dma_alloc_coherent ack failed\n");
316 goto out; 316 goto out;
317 } 317 }
318 318
319 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work)); 319 ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work));
320 if (ic->i_sends == NULL) { 320 if (!ic->i_sends) {
321 ret = -ENOMEM; 321 ret = -ENOMEM;
322 rdsdebug("send allocation failed\n"); 322 rdsdebug("send allocation failed\n");
323 goto out; 323 goto out;
@@ -325,7 +325,7 @@ static int rds_iw_setup_qp(struct rds_connection *conn)
325 rds_iw_send_init_ring(ic); 325 rds_iw_send_init_ring(ic);
326 326
327 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work)); 327 ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work));
328 if (ic->i_recvs == NULL) { 328 if (!ic->i_recvs) {
329 ret = -ENOMEM; 329 ret = -ENOMEM;
330 rdsdebug("recv allocation failed\n"); 330 rdsdebug("recv allocation failed\n");
331 goto out; 331 goto out;
@@ -696,7 +696,7 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp)
696 696
697 /* XXX too lazy? */ 697 /* XXX too lazy? */
698 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL); 698 ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL);
699 if (ic == NULL) 699 if (!ic)
700 return -ENOMEM; 700 return -ENOMEM;
701 701
702 INIT_LIST_HEAD(&ic->iw_node); 702 INIT_LIST_HEAD(&ic->iw_node);
diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c
index 13dc1862d862..59509e9a9e72 100644
--- a/net/rds/iw_rdma.c
+++ b/net/rds/iw_rdma.c
@@ -34,7 +34,6 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35 35
36#include "rds.h" 36#include "rds.h"
37#include "rdma.h"
38#include "iw.h" 37#include "iw.h"
39 38
40 39
@@ -158,7 +157,8 @@ static int rds_iw_add_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *
158 return 0; 157 return 0;
159} 158}
160 159
161void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id) 160static void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev,
161 struct rdma_cm_id *cm_id)
162{ 162{
163 struct rds_iw_cm_id *i_cm_id; 163 struct rds_iw_cm_id *i_cm_id;
164 164
@@ -207,9 +207,9 @@ void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *con
207 BUG_ON(list_empty(&ic->iw_node)); 207 BUG_ON(list_empty(&ic->iw_node));
208 list_del(&ic->iw_node); 208 list_del(&ic->iw_node);
209 209
210 spin_lock_irq(&rds_iwdev->spinlock); 210 spin_lock(&rds_iwdev->spinlock);
211 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); 211 list_add_tail(&ic->iw_node, &rds_iwdev->conn_list);
212 spin_unlock_irq(&rds_iwdev->spinlock); 212 spin_unlock(&rds_iwdev->spinlock);
213 spin_unlock_irq(&iw_nodev_conns_lock); 213 spin_unlock_irq(&iw_nodev_conns_lock);
214 214
215 ic->rds_iwdev = rds_iwdev; 215 ic->rds_iwdev = rds_iwdev;
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index 3d479067d54d..5e57347f49ff 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -53,7 +53,7 @@ static void rds_iw_frag_drop_page(struct rds_page_frag *frag)
53static void rds_iw_frag_free(struct rds_page_frag *frag) 53static void rds_iw_frag_free(struct rds_page_frag *frag)
54{ 54{
55 rdsdebug("frag %p page %p\n", frag, frag->f_page); 55 rdsdebug("frag %p page %p\n", frag, frag->f_page);
56 BUG_ON(frag->f_page != NULL); 56 BUG_ON(frag->f_page);
57 kmem_cache_free(rds_iw_frag_slab, frag); 57 kmem_cache_free(rds_iw_frag_slab, frag);
58} 58}
59 59
@@ -143,14 +143,14 @@ static int rds_iw_recv_refill_one(struct rds_connection *conn,
143 struct ib_sge *sge; 143 struct ib_sge *sge;
144 int ret = -ENOMEM; 144 int ret = -ENOMEM;
145 145
146 if (recv->r_iwinc == NULL) { 146 if (!recv->r_iwinc) {
147 if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) { 147 if (!atomic_add_unless(&rds_iw_allocation, 1, rds_iw_sysctl_max_recv_allocation)) {
148 rds_iw_stats_inc(s_iw_rx_alloc_limit); 148 rds_iw_stats_inc(s_iw_rx_alloc_limit);
149 goto out; 149 goto out;
150 } 150 }
151 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab, 151 recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab,
152 kptr_gfp); 152 kptr_gfp);
153 if (recv->r_iwinc == NULL) { 153 if (!recv->r_iwinc) {
154 atomic_dec(&rds_iw_allocation); 154 atomic_dec(&rds_iw_allocation);
155 goto out; 155 goto out;
156 } 156 }
@@ -158,17 +158,17 @@ static int rds_iw_recv_refill_one(struct rds_connection *conn,
158 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr); 158 rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr);
159 } 159 }
160 160
161 if (recv->r_frag == NULL) { 161 if (!recv->r_frag) {
162 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp); 162 recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp);
163 if (recv->r_frag == NULL) 163 if (!recv->r_frag)
164 goto out; 164 goto out;
165 INIT_LIST_HEAD(&recv->r_frag->f_item); 165 INIT_LIST_HEAD(&recv->r_frag->f_item);
166 recv->r_frag->f_page = NULL; 166 recv->r_frag->f_page = NULL;
167 } 167 }
168 168
169 if (ic->i_frag.f_page == NULL) { 169 if (!ic->i_frag.f_page) {
170 ic->i_frag.f_page = alloc_page(page_gfp); 170 ic->i_frag.f_page = alloc_page(page_gfp);
171 if (ic->i_frag.f_page == NULL) 171 if (!ic->i_frag.f_page)
172 goto out; 172 goto out;
173 ic->i_frag.f_offset = 0; 173 ic->i_frag.f_offset = 0;
174 } 174 }
@@ -273,7 +273,7 @@ int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp,
273 return ret; 273 return ret;
274} 274}
275 275
276void rds_iw_inc_purge(struct rds_incoming *inc) 276static void rds_iw_inc_purge(struct rds_incoming *inc)
277{ 277{
278 struct rds_iw_incoming *iwinc; 278 struct rds_iw_incoming *iwinc;
279 struct rds_page_frag *frag; 279 struct rds_page_frag *frag;
@@ -716,7 +716,7 @@ static void rds_iw_process_recv(struct rds_connection *conn,
716 * into the inc and save the inc so we can hang upcoming fragments 716 * into the inc and save the inc so we can hang upcoming fragments
717 * off its list. 717 * off its list.
718 */ 718 */
719 if (iwinc == NULL) { 719 if (!iwinc) {
720 iwinc = recv->r_iwinc; 720 iwinc = recv->r_iwinc;
721 recv->r_iwinc = NULL; 721 recv->r_iwinc = NULL;
722 ic->i_iwinc = iwinc; 722 ic->i_iwinc = iwinc;
@@ -887,7 +887,7 @@ int rds_iw_recv(struct rds_connection *conn)
887 return ret; 887 return ret;
888} 888}
889 889
890int __init rds_iw_recv_init(void) 890int rds_iw_recv_init(void)
891{ 891{
892 struct sysinfo si; 892 struct sysinfo si;
893 int ret = -ENOMEM; 893 int ret = -ENOMEM;
@@ -899,13 +899,13 @@ int __init rds_iw_recv_init(void)
899 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming", 899 rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming",
900 sizeof(struct rds_iw_incoming), 900 sizeof(struct rds_iw_incoming),
901 0, 0, NULL); 901 0, 0, NULL);
902 if (rds_iw_incoming_slab == NULL) 902 if (!rds_iw_incoming_slab)
903 goto out; 903 goto out;
904 904
905 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag", 905 rds_iw_frag_slab = kmem_cache_create("rds_iw_frag",
906 sizeof(struct rds_page_frag), 906 sizeof(struct rds_page_frag),
907 0, 0, NULL); 907 0, 0, NULL);
908 if (rds_iw_frag_slab == NULL) 908 if (!rds_iw_frag_slab)
909 kmem_cache_destroy(rds_iw_incoming_slab); 909 kmem_cache_destroy(rds_iw_incoming_slab);
910 else 910 else
911 ret = 0; 911 ret = 0;
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 52182ff7519e..6280ea020d4e 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -36,7 +36,6 @@
36#include <linux/dmapool.h> 36#include <linux/dmapool.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40#include "iw.h" 39#include "iw.h"
41 40
42static void rds_iw_send_rdma_complete(struct rds_message *rm, 41static void rds_iw_send_rdma_complete(struct rds_message *rm,
@@ -64,13 +63,13 @@ static void rds_iw_send_rdma_complete(struct rds_message *rm,
64} 63}
65 64
66static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic, 65static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic,
67 struct rds_rdma_op *op) 66 struct rm_rdma_op *op)
68{ 67{
69 if (op->r_mapped) { 68 if (op->op_mapped) {
70 ib_dma_unmap_sg(ic->i_cm_id->device, 69 ib_dma_unmap_sg(ic->i_cm_id->device,
71 op->r_sg, op->r_nents, 70 op->op_sg, op->op_nents,
72 op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 71 op->op_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
73 op->r_mapped = 0; 72 op->op_mapped = 0;
74 } 73 }
75} 74}
76 75
@@ -83,11 +82,11 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
83 rdsdebug("ic %p send %p rm %p\n", ic, send, rm); 82 rdsdebug("ic %p send %p rm %p\n", ic, send, rm);
84 83
85 ib_dma_unmap_sg(ic->i_cm_id->device, 84 ib_dma_unmap_sg(ic->i_cm_id->device,
86 rm->m_sg, rm->m_nents, 85 rm->data.op_sg, rm->data.op_nents,
87 DMA_TO_DEVICE); 86 DMA_TO_DEVICE);
88 87
89 if (rm->m_rdma_op != NULL) { 88 if (rm->rdma.op_active) {
90 rds_iw_send_unmap_rdma(ic, rm->m_rdma_op); 89 rds_iw_send_unmap_rdma(ic, &rm->rdma);
91 90
92 /* If the user asked for a completion notification on this 91 /* If the user asked for a completion notification on this
93 * message, we can implement three different semantics: 92 * message, we can implement three different semantics:
@@ -111,10 +110,10 @@ static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic,
111 */ 110 */
112 rds_iw_send_rdma_complete(rm, wc_status); 111 rds_iw_send_rdma_complete(rm, wc_status);
113 112
114 if (rm->m_rdma_op->r_write) 113 if (rm->rdma.op_write)
115 rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); 114 rds_stats_add(s_send_rdma_bytes, rm->rdma.op_bytes);
116 else 115 else
117 rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); 116 rds_stats_add(s_recv_rdma_bytes, rm->rdma.op_bytes);
118 } 117 }
119 118
120 /* If anyone waited for this message to get flushed out, wake 119 /* If anyone waited for this message to get flushed out, wake
@@ -556,25 +555,27 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
556 } 555 }
557 556
558 /* map the message the first time we see it */ 557 /* map the message the first time we see it */
559 if (ic->i_rm == NULL) { 558 if (!ic->i_rm) {
560 /* 559 /*
561 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n", 560 printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n",
562 be16_to_cpu(rm->m_inc.i_hdr.h_dport), 561 be16_to_cpu(rm->m_inc.i_hdr.h_dport),
563 rm->m_inc.i_hdr.h_flags, 562 rm->m_inc.i_hdr.h_flags,
564 be32_to_cpu(rm->m_inc.i_hdr.h_len)); 563 be32_to_cpu(rm->m_inc.i_hdr.h_len));
565 */ 564 */
566 if (rm->m_nents) { 565 if (rm->data.op_nents) {
567 rm->m_count = ib_dma_map_sg(dev, 566 rm->data.op_count = ib_dma_map_sg(dev,
568 rm->m_sg, rm->m_nents, DMA_TO_DEVICE); 567 rm->data.op_sg,
569 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count); 568 rm->data.op_nents,
570 if (rm->m_count == 0) { 569 DMA_TO_DEVICE);
570 rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->data.op_count);
571 if (rm->data.op_count == 0) {
571 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); 572 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
572 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); 573 rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc);
573 ret = -ENOMEM; /* XXX ? */ 574 ret = -ENOMEM; /* XXX ? */
574 goto out; 575 goto out;
575 } 576 }
576 } else { 577 } else {
577 rm->m_count = 0; 578 rm->data.op_count = 0;
578 } 579 }
579 580
580 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; 581 ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs;
@@ -590,10 +591,10 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
590 591
591 /* If it has a RDMA op, tell the peer we did it. This is 592 /* If it has a RDMA op, tell the peer we did it. This is
592 * used by the peer to release use-once RDMA MRs. */ 593 * used by the peer to release use-once RDMA MRs. */
593 if (rm->m_rdma_op) { 594 if (rm->rdma.op_active) {
594 struct rds_ext_header_rdma ext_hdr; 595 struct rds_ext_header_rdma ext_hdr;
595 596
596 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); 597 ext_hdr.h_rdma_rkey = cpu_to_be32(rm->rdma.op_rkey);
597 rds_message_add_extension(&rm->m_inc.i_hdr, 598 rds_message_add_extension(&rm->m_inc.i_hdr,
598 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); 599 RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr));
599 } 600 }
@@ -621,7 +622,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
621 send = &ic->i_sends[pos]; 622 send = &ic->i_sends[pos];
622 first = send; 623 first = send;
623 prev = NULL; 624 prev = NULL;
624 scat = &rm->m_sg[sg]; 625 scat = &rm->data.op_sg[sg];
625 sent = 0; 626 sent = 0;
626 i = 0; 627 i = 0;
627 628
@@ -631,7 +632,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
631 * or when requested by the user. Right now, we let 632 * or when requested by the user. Right now, we let
632 * the application choose. 633 * the application choose.
633 */ 634 */
634 if (rm->m_rdma_op && rm->m_rdma_op->r_fence) 635 if (rm->rdma.op_active && rm->rdma.op_fence)
635 send_flags = IB_SEND_FENCE; 636 send_flags = IB_SEND_FENCE;
636 637
637 /* 638 /*
@@ -650,7 +651,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
650 } 651 }
651 652
652 /* if there's data reference it with a chain of work reqs */ 653 /* if there's data reference it with a chain of work reqs */
653 for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { 654 for (; i < work_alloc && scat != &rm->data.op_sg[rm->data.op_count]; i++) {
654 unsigned int len; 655 unsigned int len;
655 656
656 send = &ic->i_sends[pos]; 657 send = &ic->i_sends[pos];
@@ -728,7 +729,7 @@ add_header:
728 sent += sizeof(struct rds_header); 729 sent += sizeof(struct rds_header);
729 730
730 /* if we finished the message then send completion owns it */ 731 /* if we finished the message then send completion owns it */
731 if (scat == &rm->m_sg[rm->m_count]) { 732 if (scat == &rm->data.op_sg[rm->data.op_count]) {
732 prev->s_rm = ic->i_rm; 733 prev->s_rm = ic->i_rm;
733 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; 734 prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED;
734 ic->i_rm = NULL; 735 ic->i_rm = NULL;
@@ -784,7 +785,7 @@ static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rd
784 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++); 785 ib_update_fast_reg_key(send->s_mr, send->s_remap_count++);
785} 786}
786 787
787int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) 788int rds_iw_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op)
788{ 789{
789 struct rds_iw_connection *ic = conn->c_transport_data; 790 struct rds_iw_connection *ic = conn->c_transport_data;
790 struct rds_iw_send_work *send = NULL; 791 struct rds_iw_send_work *send = NULL;
@@ -794,7 +795,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
794 struct rds_iw_device *rds_iwdev; 795 struct rds_iw_device *rds_iwdev;
795 struct scatterlist *scat; 796 struct scatterlist *scat;
796 unsigned long len; 797 unsigned long len;
797 u64 remote_addr = op->r_remote_addr; 798 u64 remote_addr = op->op_remote_addr;
798 u32 pos, fr_pos; 799 u32 pos, fr_pos;
799 u32 work_alloc; 800 u32 work_alloc;
800 u32 i; 801 u32 i;
@@ -806,21 +807,21 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
806 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); 807 rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client);
807 808
808 /* map the message the first time we see it */ 809 /* map the message the first time we see it */
809 if (!op->r_mapped) { 810 if (!op->op_mapped) {
810 op->r_count = ib_dma_map_sg(ic->i_cm_id->device, 811 op->op_count = ib_dma_map_sg(ic->i_cm_id->device,
811 op->r_sg, op->r_nents, (op->r_write) ? 812 op->op_sg, op->op_nents, (op->op_write) ?
812 DMA_TO_DEVICE : DMA_FROM_DEVICE); 813 DMA_TO_DEVICE : DMA_FROM_DEVICE);
813 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); 814 rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->op_count);
814 if (op->r_count == 0) { 815 if (op->op_count == 0) {
815 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); 816 rds_iw_stats_inc(s_iw_tx_sg_mapping_failure);
816 ret = -ENOMEM; /* XXX ? */ 817 ret = -ENOMEM; /* XXX ? */
817 goto out; 818 goto out;
818 } 819 }
819 820
820 op->r_mapped = 1; 821 op->op_mapped = 1;
821 } 822 }
822 823
823 if (!op->r_write) { 824 if (!op->op_write) {
824 /* Alloc space on the send queue for the fastreg */ 825 /* Alloc space on the send queue for the fastreg */
825 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos); 826 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos);
826 if (work_alloc != 1) { 827 if (work_alloc != 1) {
@@ -835,7 +836,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
835 * Instead of knowing how to return a partial rdma read/write we insist that there 836 * Instead of knowing how to return a partial rdma read/write we insist that there
836 * be enough work requests to send the entire message. 837 * be enough work requests to send the entire message.
837 */ 838 */
838 i = ceil(op->r_count, rds_iwdev->max_sge); 839 i = ceil(op->op_count, rds_iwdev->max_sge);
839 840
840 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos); 841 work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos);
841 if (work_alloc != i) { 842 if (work_alloc != i) {
@@ -846,17 +847,17 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
846 } 847 }
847 848
848 send = &ic->i_sends[pos]; 849 send = &ic->i_sends[pos];
849 if (!op->r_write) { 850 if (!op->op_write) {
850 first = prev = &ic->i_sends[fr_pos]; 851 first = prev = &ic->i_sends[fr_pos];
851 } else { 852 } else {
852 first = send; 853 first = send;
853 prev = NULL; 854 prev = NULL;
854 } 855 }
855 scat = &op->r_sg[0]; 856 scat = &op->op_sg[0];
856 sent = 0; 857 sent = 0;
857 num_sge = op->r_count; 858 num_sge = op->op_count;
858 859
859 for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { 860 for (i = 0; i < work_alloc && scat != &op->op_sg[op->op_count]; i++) {
860 send->s_wr.send_flags = 0; 861 send->s_wr.send_flags = 0;
861 send->s_queued = jiffies; 862 send->s_queued = jiffies;
862 863
@@ -873,13 +874,13 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
873 * for local access after RDS is finished with it, using 874 * for local access after RDS is finished with it, using
874 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed. 875 * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed.
875 */ 876 */
876 if (op->r_write) 877 if (op->op_write)
877 send->s_wr.opcode = IB_WR_RDMA_WRITE; 878 send->s_wr.opcode = IB_WR_RDMA_WRITE;
878 else 879 else
879 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV; 880 send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
880 881
881 send->s_wr.wr.rdma.remote_addr = remote_addr; 882 send->s_wr.wr.rdma.remote_addr = remote_addr;
882 send->s_wr.wr.rdma.rkey = op->r_key; 883 send->s_wr.wr.rdma.rkey = op->op_rkey;
883 send->s_op = op; 884 send->s_op = op;
884 885
885 if (num_sge > rds_iwdev->max_sge) { 886 if (num_sge > rds_iwdev->max_sge) {
@@ -893,7 +894,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
893 if (prev) 894 if (prev)
894 prev->s_wr.next = &send->s_wr; 895 prev->s_wr.next = &send->s_wr;
895 896
896 for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { 897 for (j = 0; j < send->s_wr.num_sge && scat != &op->op_sg[op->op_count]; j++) {
897 len = ib_sg_dma_len(ic->i_cm_id->device, scat); 898 len = ib_sg_dma_len(ic->i_cm_id->device, scat);
898 899
899 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) 900 if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV)
@@ -927,7 +928,7 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
927 } 928 }
928 929
929 /* if we finished the message then send completion owns it */ 930 /* if we finished the message then send completion owns it */
930 if (scat == &op->r_sg[op->r_count]) 931 if (scat == &op->op_sg[op->op_count])
931 first->s_wr.send_flags = IB_SEND_SIGNALED; 932 first->s_wr.send_flags = IB_SEND_SIGNALED;
932 933
933 if (i < work_alloc) { 934 if (i < work_alloc) {
@@ -941,9 +942,9 @@ int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op)
941 * adapters do not allow using the lkey for this at all. To bypass this use a 942 * adapters do not allow using the lkey for this at all. To bypass this use a
942 * fastreg_mr (or possibly a dma_mr) 943 * fastreg_mr (or possibly a dma_mr)
943 */ 944 */
944 if (!op->r_write) { 945 if (!op->op_write) {
945 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos], 946 rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos],
946 op->r_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr); 947 op->op_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr);
947 work_alloc++; 948 work_alloc++;
948 } 949 }
949 950
diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c
index 1c4428a61a02..e2e47176e729 100644
--- a/net/rds/iw_sysctl.c
+++ b/net/rds/iw_sysctl.c
@@ -55,7 +55,7 @@ static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL;
55 55
56unsigned int rds_iw_sysctl_flow_control = 1; 56unsigned int rds_iw_sysctl_flow_control = 1;
57 57
58ctl_table rds_iw_sysctl_table[] = { 58static ctl_table rds_iw_sysctl_table[] = {
59 { 59 {
60 .procname = "max_send_wr", 60 .procname = "max_send_wr",
61 .data = &rds_iw_sysctl_max_send_wr, 61 .data = &rds_iw_sysctl_max_send_wr,
@@ -122,10 +122,10 @@ void rds_iw_sysctl_exit(void)
122 unregister_sysctl_table(rds_iw_sysctl_hdr); 122 unregister_sysctl_table(rds_iw_sysctl_hdr);
123} 123}
124 124
125int __init rds_iw_sysctl_init(void) 125int rds_iw_sysctl_init(void)
126{ 126{
127 rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table); 127 rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table);
128 if (rds_iw_sysctl_hdr == NULL) 128 if (!rds_iw_sysctl_hdr)
129 return -ENOMEM; 129 return -ENOMEM;
130 return 0; 130 return 0;
131} 131}
diff --git a/net/rds/loop.c b/net/rds/loop.c
index dd9879379457..c390156b426f 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -61,10 +61,17 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
61 unsigned int hdr_off, unsigned int sg, 61 unsigned int hdr_off, unsigned int sg,
62 unsigned int off) 62 unsigned int off)
63{ 63{
64 /* Do not send cong updates to loopback */
65 if (rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
66 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
67 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
68 }
69
64 BUG_ON(hdr_off || sg || off); 70 BUG_ON(hdr_off || sg || off);
65 71
66 rds_inc_init(&rm->m_inc, conn, conn->c_laddr); 72 rds_inc_init(&rm->m_inc, conn, conn->c_laddr);
67 rds_message_addref(rm); /* for the inc */ 73 /* For the embedded inc. Matching put is in loop_inc_free() */
74 rds_message_addref(rm);
68 75
69 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc, 76 rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc,
70 GFP_KERNEL, KM_USER0); 77 GFP_KERNEL, KM_USER0);
@@ -77,16 +84,14 @@ static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm,
77 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); 84 return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len);
78} 85}
79 86
80static int rds_loop_xmit_cong_map(struct rds_connection *conn, 87/*
81 struct rds_cong_map *map, 88 * See rds_loop_xmit(). Since our inc is embedded in the rm, we
82 unsigned long offset) 89 * make sure the rm lives at least until the inc is done.
90 */
91static void rds_loop_inc_free(struct rds_incoming *inc)
83{ 92{
84 BUG_ON(offset); 93 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
85 BUG_ON(map != conn->c_lcong); 94 rds_message_put(rm);
86
87 rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
88
89 return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
90} 95}
91 96
92/* we need to at least give the thread something to succeed */ 97/* we need to at least give the thread something to succeed */
@@ -112,7 +117,7 @@ static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp)
112 unsigned long flags; 117 unsigned long flags;
113 118
114 lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL); 119 lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL);
115 if (lc == NULL) 120 if (!lc)
116 return -ENOMEM; 121 return -ENOMEM;
117 122
118 INIT_LIST_HEAD(&lc->loop_node); 123 INIT_LIST_HEAD(&lc->loop_node);
@@ -169,14 +174,12 @@ void rds_loop_exit(void)
169 */ 174 */
170struct rds_transport rds_loop_transport = { 175struct rds_transport rds_loop_transport = {
171 .xmit = rds_loop_xmit, 176 .xmit = rds_loop_xmit,
172 .xmit_cong_map = rds_loop_xmit_cong_map,
173 .recv = rds_loop_recv, 177 .recv = rds_loop_recv,
174 .conn_alloc = rds_loop_conn_alloc, 178 .conn_alloc = rds_loop_conn_alloc,
175 .conn_free = rds_loop_conn_free, 179 .conn_free = rds_loop_conn_free,
176 .conn_connect = rds_loop_conn_connect, 180 .conn_connect = rds_loop_conn_connect,
177 .conn_shutdown = rds_loop_conn_shutdown, 181 .conn_shutdown = rds_loop_conn_shutdown,
178 .inc_copy_to_user = rds_message_inc_copy_to_user, 182 .inc_copy_to_user = rds_message_inc_copy_to_user,
179 .inc_purge = rds_message_inc_purge, 183 .inc_free = rds_loop_inc_free,
180 .inc_free = rds_message_inc_free,
181 .t_name = "loopback", 184 .t_name = "loopback",
182}; 185};
diff --git a/net/rds/message.c b/net/rds/message.c
index 9a1d67e001ba..a84545dae370 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -34,9 +34,6 @@
34#include <linux/slab.h> 34#include <linux/slab.h>
35 35
36#include "rds.h" 36#include "rds.h"
37#include "rdma.h"
38
39static DECLARE_WAIT_QUEUE_HEAD(rds_message_flush_waitq);
40 37
41static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { 38static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = {
42[RDS_EXTHDR_NONE] = 0, 39[RDS_EXTHDR_NONE] = 0,
@@ -63,29 +60,31 @@ static void rds_message_purge(struct rds_message *rm)
63 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) 60 if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags)))
64 return; 61 return;
65 62
66 for (i = 0; i < rm->m_nents; i++) { 63 for (i = 0; i < rm->data.op_nents; i++) {
67 rdsdebug("putting data page %p\n", (void *)sg_page(&rm->m_sg[i])); 64 rdsdebug("putting data page %p\n", (void *)sg_page(&rm->data.op_sg[i]));
68 /* XXX will have to put_page for page refs */ 65 /* XXX will have to put_page for page refs */
69 __free_page(sg_page(&rm->m_sg[i])); 66 __free_page(sg_page(&rm->data.op_sg[i]));
70 } 67 }
71 rm->m_nents = 0; 68 rm->data.op_nents = 0;
72 69
73 if (rm->m_rdma_op) 70 if (rm->rdma.op_active)
74 rds_rdma_free_op(rm->m_rdma_op); 71 rds_rdma_free_op(&rm->rdma);
75 if (rm->m_rdma_mr) 72 if (rm->rdma.op_rdma_mr)
76 rds_mr_put(rm->m_rdma_mr); 73 rds_mr_put(rm->rdma.op_rdma_mr);
77}
78 74
79void rds_message_inc_purge(struct rds_incoming *inc) 75 if (rm->atomic.op_active)
80{ 76 rds_atomic_free_op(&rm->atomic);
81 struct rds_message *rm = container_of(inc, struct rds_message, m_inc); 77 if (rm->atomic.op_rdma_mr)
82 rds_message_purge(rm); 78 rds_mr_put(rm->atomic.op_rdma_mr);
83} 79}
84 80
85void rds_message_put(struct rds_message *rm) 81void rds_message_put(struct rds_message *rm)
86{ 82{
87 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); 83 rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount));
88 84 if (atomic_read(&rm->m_refcount) == 0) {
85printk(KERN_CRIT "danger refcount zero on %p\n", rm);
86WARN_ON(1);
87 }
89 if (atomic_dec_and_test(&rm->m_refcount)) { 88 if (atomic_dec_and_test(&rm->m_refcount)) {
90 BUG_ON(!list_empty(&rm->m_sock_item)); 89 BUG_ON(!list_empty(&rm->m_sock_item));
91 BUG_ON(!list_empty(&rm->m_conn_item)); 90 BUG_ON(!list_empty(&rm->m_conn_item));
@@ -96,12 +95,6 @@ void rds_message_put(struct rds_message *rm)
96} 95}
97EXPORT_SYMBOL_GPL(rds_message_put); 96EXPORT_SYMBOL_GPL(rds_message_put);
98 97
99void rds_message_inc_free(struct rds_incoming *inc)
100{
101 struct rds_message *rm = container_of(inc, struct rds_message, m_inc);
102 rds_message_put(rm);
103}
104
105void rds_message_populate_header(struct rds_header *hdr, __be16 sport, 98void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
106 __be16 dport, u64 seq) 99 __be16 dport, u64 seq)
107{ 100{
@@ -113,8 +106,8 @@ void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
113} 106}
114EXPORT_SYMBOL_GPL(rds_message_populate_header); 107EXPORT_SYMBOL_GPL(rds_message_populate_header);
115 108
116int rds_message_add_extension(struct rds_header *hdr, 109int rds_message_add_extension(struct rds_header *hdr, unsigned int type,
117 unsigned int type, const void *data, unsigned int len) 110 const void *data, unsigned int len)
118{ 111{
119 unsigned int ext_len = sizeof(u8) + len; 112 unsigned int ext_len = sizeof(u8) + len;
120 unsigned char *dst; 113 unsigned char *dst;
@@ -184,26 +177,6 @@ none:
184 return RDS_EXTHDR_NONE; 177 return RDS_EXTHDR_NONE;
185} 178}
186 179
187int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version)
188{
189 struct rds_ext_header_version ext_hdr;
190
191 ext_hdr.h_version = cpu_to_be32(version);
192 return rds_message_add_extension(hdr, RDS_EXTHDR_VERSION, &ext_hdr, sizeof(ext_hdr));
193}
194
195int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version)
196{
197 struct rds_ext_header_version ext_hdr;
198 unsigned int pos = 0, len = sizeof(ext_hdr);
199
200 /* We assume the version extension is the only one present */
201 if (rds_message_next_extension(hdr, &pos, &ext_hdr, &len) != RDS_EXTHDR_VERSION)
202 return 0;
203 *version = be32_to_cpu(ext_hdr.h_version);
204 return 1;
205}
206
207int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset) 180int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset)
208{ 181{
209 struct rds_ext_header_rdma_dest ext_hdr; 182 struct rds_ext_header_rdma_dest ext_hdr;
@@ -214,41 +187,68 @@ int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 o
214} 187}
215EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension); 188EXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension);
216 189
217struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp) 190/*
191 * Each rds_message is allocated with extra space for the scatterlist entries
192 * rds ops will need. This is to minimize memory allocation count. Then, each rds op
193 * can grab SGs when initializing its part of the rds_message.
194 */
195struct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp)
218{ 196{
219 struct rds_message *rm; 197 struct rds_message *rm;
220 198
221 rm = kzalloc(sizeof(struct rds_message) + 199 rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp);
222 (nents * sizeof(struct scatterlist)), gfp);
223 if (!rm) 200 if (!rm)
224 goto out; 201 goto out;
225 202
226 if (nents) 203 rm->m_used_sgs = 0;
227 sg_init_table(rm->m_sg, nents); 204 rm->m_total_sgs = extra_len / sizeof(struct scatterlist);
205
228 atomic_set(&rm->m_refcount, 1); 206 atomic_set(&rm->m_refcount, 1);
229 INIT_LIST_HEAD(&rm->m_sock_item); 207 INIT_LIST_HEAD(&rm->m_sock_item);
230 INIT_LIST_HEAD(&rm->m_conn_item); 208 INIT_LIST_HEAD(&rm->m_conn_item);
231 spin_lock_init(&rm->m_rs_lock); 209 spin_lock_init(&rm->m_rs_lock);
210 init_waitqueue_head(&rm->m_flush_wait);
232 211
233out: 212out:
234 return rm; 213 return rm;
235} 214}
236 215
216/*
217 * RDS ops use this to grab SG entries from the rm's sg pool.
218 */
219struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents)
220{
221 struct scatterlist *sg_first = (struct scatterlist *) &rm[1];
222 struct scatterlist *sg_ret;
223
224 WARN_ON(rm->m_used_sgs + nents > rm->m_total_sgs);
225 WARN_ON(!nents);
226
227 sg_ret = &sg_first[rm->m_used_sgs];
228 sg_init_table(sg_ret, nents);
229 rm->m_used_sgs += nents;
230
231 return sg_ret;
232}
233
237struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len) 234struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len)
238{ 235{
239 struct rds_message *rm; 236 struct rds_message *rm;
240 unsigned int i; 237 unsigned int i;
238 int num_sgs = ceil(total_len, PAGE_SIZE);
239 int extra_bytes = num_sgs * sizeof(struct scatterlist);
241 240
242 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL); 241 rm = rds_message_alloc(extra_bytes, GFP_NOWAIT);
243 if (rm == NULL) 242 if (!rm)
244 return ERR_PTR(-ENOMEM); 243 return ERR_PTR(-ENOMEM);
245 244
246 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); 245 set_bit(RDS_MSG_PAGEVEC, &rm->m_flags);
247 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 246 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
248 rm->m_nents = ceil(total_len, PAGE_SIZE); 247 rm->data.op_nents = ceil(total_len, PAGE_SIZE);
248 rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs);
249 249
250 for (i = 0; i < rm->m_nents; ++i) { 250 for (i = 0; i < rm->data.op_nents; ++i) {
251 sg_set_page(&rm->m_sg[i], 251 sg_set_page(&rm->data.op_sg[i],
252 virt_to_page(page_addrs[i]), 252 virt_to_page(page_addrs[i]),
253 PAGE_SIZE, 0); 253 PAGE_SIZE, 0);
254 } 254 }
@@ -256,40 +256,33 @@ struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned in
256 return rm; 256 return rm;
257} 257}
258 258
259struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, 259int rds_message_copy_from_user(struct rds_message *rm, struct iovec *first_iov,
260 size_t total_len) 260 size_t total_len)
261{ 261{
262 unsigned long to_copy; 262 unsigned long to_copy;
263 unsigned long iov_off; 263 unsigned long iov_off;
264 unsigned long sg_off; 264 unsigned long sg_off;
265 struct rds_message *rm;
266 struct iovec *iov; 265 struct iovec *iov;
267 struct scatterlist *sg; 266 struct scatterlist *sg;
268 int ret; 267 int ret = 0;
269
270 rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL);
271 if (rm == NULL) {
272 ret = -ENOMEM;
273 goto out;
274 }
275 268
276 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 269 rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len);
277 270
278 /* 271 /*
279 * now allocate and copy in the data payload. 272 * now allocate and copy in the data payload.
280 */ 273 */
281 sg = rm->m_sg; 274 sg = rm->data.op_sg;
282 iov = first_iov; 275 iov = first_iov;
283 iov_off = 0; 276 iov_off = 0;
284 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ 277 sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */
285 278
286 while (total_len) { 279 while (total_len) {
287 if (sg_page(sg) == NULL) { 280 if (!sg_page(sg)) {
288 ret = rds_page_remainder_alloc(sg, total_len, 281 ret = rds_page_remainder_alloc(sg, total_len,
289 GFP_HIGHUSER); 282 GFP_HIGHUSER);
290 if (ret) 283 if (ret)
291 goto out; 284 goto out;
292 rm->m_nents++; 285 rm->data.op_nents++;
293 sg_off = 0; 286 sg_off = 0;
294 } 287 }
295 288
@@ -320,14 +313,8 @@ struct rds_message *rds_message_copy_from_user(struct iovec *first_iov,
320 sg++; 313 sg++;
321 } 314 }
322 315
323 ret = 0;
324out: 316out:
325 if (ret) { 317 return ret;
326 if (rm)
327 rds_message_put(rm);
328 rm = ERR_PTR(ret);
329 }
330 return rm;
331} 318}
332 319
333int rds_message_inc_copy_to_user(struct rds_incoming *inc, 320int rds_message_inc_copy_to_user(struct rds_incoming *inc,
@@ -348,7 +335,7 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc,
348 335
349 iov = first_iov; 336 iov = first_iov;
350 iov_off = 0; 337 iov_off = 0;
351 sg = rm->m_sg; 338 sg = rm->data.op_sg;
352 vec_off = 0; 339 vec_off = 0;
353 copied = 0; 340 copied = 0;
354 341
@@ -394,15 +381,14 @@ int rds_message_inc_copy_to_user(struct rds_incoming *inc,
394 */ 381 */
395void rds_message_wait(struct rds_message *rm) 382void rds_message_wait(struct rds_message *rm)
396{ 383{
397 wait_event(rds_message_flush_waitq, 384 wait_event_interruptible(rm->m_flush_wait,
398 !test_bit(RDS_MSG_MAPPED, &rm->m_flags)); 385 !test_bit(RDS_MSG_MAPPED, &rm->m_flags));
399} 386}
400 387
401void rds_message_unmapped(struct rds_message *rm) 388void rds_message_unmapped(struct rds_message *rm)
402{ 389{
403 clear_bit(RDS_MSG_MAPPED, &rm->m_flags); 390 clear_bit(RDS_MSG_MAPPED, &rm->m_flags);
404 if (waitqueue_active(&rds_message_flush_waitq)) 391 wake_up_interruptible(&rm->m_flush_wait);
405 wake_up(&rds_message_flush_waitq);
406} 392}
407EXPORT_SYMBOL_GPL(rds_message_unmapped); 393EXPORT_SYMBOL_GPL(rds_message_unmapped);
408 394
diff --git a/net/rds/page.c b/net/rds/page.c
index 1dfbfea12e9b..d8acdebe3c7c 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -40,7 +40,8 @@ struct rds_page_remainder {
40 unsigned long r_offset; 40 unsigned long r_offset;
41}; 41};
42 42
43DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders); 43static DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder,
44 rds_page_remainders);
44 45
45/* 46/*
46 * returns 0 on success or -errno on failure. 47 * returns 0 on success or -errno on failure.
@@ -103,7 +104,7 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
103 /* jump straight to allocation if we're trying for a huge page */ 104 /* jump straight to allocation if we're trying for a huge page */
104 if (bytes >= PAGE_SIZE) { 105 if (bytes >= PAGE_SIZE) {
105 page = alloc_page(gfp); 106 page = alloc_page(gfp);
106 if (page == NULL) { 107 if (!page) {
107 ret = -ENOMEM; 108 ret = -ENOMEM;
108 } else { 109 } else {
109 sg_set_page(scat, page, PAGE_SIZE, 0); 110 sg_set_page(scat, page, PAGE_SIZE, 0);
@@ -149,7 +150,7 @@ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
149 rem = &per_cpu(rds_page_remainders, get_cpu()); 150 rem = &per_cpu(rds_page_remainders, get_cpu());
150 local_irq_save(flags); 151 local_irq_save(flags);
151 152
152 if (page == NULL) { 153 if (!page) {
153 ret = -ENOMEM; 154 ret = -ENOMEM;
154 break; 155 break;
155 } 156 }
@@ -173,6 +174,7 @@ out:
173 ret ? 0 : scat->length); 174 ret ? 0 : scat->length);
174 return ret; 175 return ret;
175} 176}
177EXPORT_SYMBOL_GPL(rds_page_remainder_alloc);
176 178
177static int rds_page_remainder_cpu_notify(struct notifier_block *self, 179static int rds_page_remainder_cpu_notify(struct notifier_block *self,
178 unsigned long action, void *hcpu) 180 unsigned long action, void *hcpu)
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 75fd13bb631b..1a41debca1ce 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -35,7 +35,7 @@
35#include <linux/rbtree.h> 35#include <linux/rbtree.h>
36#include <linux/dma-mapping.h> /* for DMA_*_DEVICE */ 36#include <linux/dma-mapping.h> /* for DMA_*_DEVICE */
37 37
38#include "rdma.h" 38#include "rds.h"
39 39
40/* 40/*
41 * XXX 41 * XXX
@@ -130,14 +130,22 @@ void rds_rdma_drop_keys(struct rds_sock *rs)
130{ 130{
131 struct rds_mr *mr; 131 struct rds_mr *mr;
132 struct rb_node *node; 132 struct rb_node *node;
133 unsigned long flags;
133 134
134 /* Release any MRs associated with this socket */ 135 /* Release any MRs associated with this socket */
136 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
135 while ((node = rb_first(&rs->rs_rdma_keys))) { 137 while ((node = rb_first(&rs->rs_rdma_keys))) {
136 mr = container_of(node, struct rds_mr, r_rb_node); 138 mr = container_of(node, struct rds_mr, r_rb_node);
137 if (mr->r_trans == rs->rs_transport) 139 if (mr->r_trans == rs->rs_transport)
138 mr->r_invalidate = 0; 140 mr->r_invalidate = 0;
141 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
142 RB_CLEAR_NODE(&mr->r_rb_node);
143 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
144 rds_destroy_mr(mr);
139 rds_mr_put(mr); 145 rds_mr_put(mr);
146 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
140 } 147 }
148 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
141 149
142 if (rs->rs_transport && rs->rs_transport->flush_mrs) 150 if (rs->rs_transport && rs->rs_transport->flush_mrs)
143 rs->rs_transport->flush_mrs(); 151 rs->rs_transport->flush_mrs();
@@ -181,7 +189,7 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
181 goto out; 189 goto out;
182 } 190 }
183 191
184 if (rs->rs_transport->get_mr == NULL) { 192 if (!rs->rs_transport->get_mr) {
185 ret = -EOPNOTSUPP; 193 ret = -EOPNOTSUPP;
186 goto out; 194 goto out;
187 } 195 }
@@ -197,13 +205,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
197 205
198 /* XXX clamp nr_pages to limit the size of this alloc? */ 206 /* XXX clamp nr_pages to limit the size of this alloc? */
199 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); 207 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
200 if (pages == NULL) { 208 if (!pages) {
201 ret = -ENOMEM; 209 ret = -ENOMEM;
202 goto out; 210 goto out;
203 } 211 }
204 212
205 mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL); 213 mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL);
206 if (mr == NULL) { 214 if (!mr) {
207 ret = -ENOMEM; 215 ret = -ENOMEM;
208 goto out; 216 goto out;
209 } 217 }
@@ -230,13 +238,13 @@ static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args,
230 * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to 238 * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to
231 * the zero page. 239 * the zero page.
232 */ 240 */
233 ret = rds_pin_pages(args->vec.addr & PAGE_MASK, nr_pages, pages, 1); 241 ret = rds_pin_pages(args->vec.addr, nr_pages, pages, 1);
234 if (ret < 0) 242 if (ret < 0)
235 goto out; 243 goto out;
236 244
237 nents = ret; 245 nents = ret;
238 sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL); 246 sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL);
239 if (sg == NULL) { 247 if (!sg) {
240 ret = -ENOMEM; 248 ret = -ENOMEM;
241 goto out; 249 goto out;
242 } 250 }
@@ -406,68 +414,127 @@ void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force)
406 414
407 spin_lock_irqsave(&rs->rs_rdma_lock, flags); 415 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
408 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 416 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
409 if (mr && (mr->r_use_once || force)) { 417 if (!mr) {
418 printk(KERN_ERR "rds: trying to unuse MR with unknown r_key %u!\n", r_key);
419 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
420 return;
421 }
422
423 if (mr->r_use_once || force) {
410 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); 424 rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys);
411 RB_CLEAR_NODE(&mr->r_rb_node); 425 RB_CLEAR_NODE(&mr->r_rb_node);
412 zot_me = 1; 426 zot_me = 1;
413 } else if (mr) 427 }
414 atomic_inc(&mr->r_refcount);
415 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); 428 spin_unlock_irqrestore(&rs->rs_rdma_lock, flags);
416 429
417 /* May have to issue a dma_sync on this memory region. 430 /* May have to issue a dma_sync on this memory region.
418 * Note we could avoid this if the operation was a RDMA READ, 431 * Note we could avoid this if the operation was a RDMA READ,
419 * but at this point we can't tell. */ 432 * but at this point we can't tell. */
420 if (mr != NULL) { 433 if (mr->r_trans->sync_mr)
421 if (mr->r_trans->sync_mr) 434 mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE);
422 mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); 435
423 436 /* If the MR was marked as invalidate, this will
424 /* If the MR was marked as invalidate, this will 437 * trigger an async flush. */
425 * trigger an async flush. */ 438 if (zot_me)
426 if (zot_me) 439 rds_destroy_mr(mr);
427 rds_destroy_mr(mr); 440 rds_mr_put(mr);
428 rds_mr_put(mr);
429 }
430} 441}
431 442
432void rds_rdma_free_op(struct rds_rdma_op *ro) 443void rds_rdma_free_op(struct rm_rdma_op *ro)
433{ 444{
434 unsigned int i; 445 unsigned int i;
435 446
436 for (i = 0; i < ro->r_nents; i++) { 447 for (i = 0; i < ro->op_nents; i++) {
437 struct page *page = sg_page(&ro->r_sg[i]); 448 struct page *page = sg_page(&ro->op_sg[i]);
438 449
439 /* Mark page dirty if it was possibly modified, which 450 /* Mark page dirty if it was possibly modified, which
440 * is the case for a RDMA_READ which copies from remote 451 * is the case for a RDMA_READ which copies from remote
441 * to local memory */ 452 * to local memory */
442 if (!ro->r_write) { 453 if (!ro->op_write) {
443 BUG_ON(in_interrupt()); 454 BUG_ON(irqs_disabled());
444 set_page_dirty(page); 455 set_page_dirty(page);
445 } 456 }
446 put_page(page); 457 put_page(page);
447 } 458 }
448 459
449 kfree(ro->r_notifier); 460 kfree(ro->op_notifier);
450 kfree(ro); 461 ro->op_notifier = NULL;
462 ro->op_active = 0;
463}
464
465void rds_atomic_free_op(struct rm_atomic_op *ao)
466{
467 struct page *page = sg_page(ao->op_sg);
468
469 /* Mark page dirty if it was possibly modified, which
470 * is the case for a RDMA_READ which copies from remote
471 * to local memory */
472 set_page_dirty(page);
473 put_page(page);
474
475 kfree(ao->op_notifier);
476 ao->op_notifier = NULL;
477 ao->op_active = 0;
451} 478}
452 479
480
453/* 481/*
454 * args is a pointer to an in-kernel copy in the sendmsg cmsg. 482 * Count the number of pages needed to describe an incoming iovec.
455 */ 483 */
456static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, 484static int rds_rdma_pages(struct rds_rdma_args *args)
457 struct rds_rdma_args *args)
458{ 485{
459 struct rds_iovec vec; 486 struct rds_iovec vec;
460 struct rds_rdma_op *op = NULL; 487 struct rds_iovec __user *local_vec;
488 unsigned int tot_pages = 0;
461 unsigned int nr_pages; 489 unsigned int nr_pages;
462 unsigned int max_pages; 490 unsigned int i;
491
492 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
493
494 /* figure out the number of pages in the vector */
495 for (i = 0; i < args->nr_local; i++) {
496 if (copy_from_user(&vec, &local_vec[i],
497 sizeof(struct rds_iovec)))
498 return -EFAULT;
499
500 nr_pages = rds_pages_in_vec(&vec);
501 if (nr_pages == 0)
502 return -EINVAL;
503
504 tot_pages += nr_pages;
505 }
506
507 return tot_pages;
508}
509
510int rds_rdma_extra_size(struct rds_rdma_args *args)
511{
512 return rds_rdma_pages(args) * sizeof(struct scatterlist);
513}
514
515/*
516 * The application asks for a RDMA transfer.
517 * Extract all arguments and set up the rdma_op
518 */
519int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
520 struct cmsghdr *cmsg)
521{
522 struct rds_rdma_args *args;
523 struct rds_iovec vec;
524 struct rm_rdma_op *op = &rm->rdma;
525 int nr_pages;
463 unsigned int nr_bytes; 526 unsigned int nr_bytes;
464 struct page **pages = NULL; 527 struct page **pages = NULL;
465 struct rds_iovec __user *local_vec; 528 struct rds_iovec __user *local_vec;
466 struct scatterlist *sg;
467 unsigned int nr; 529 unsigned int nr;
468 unsigned int i, j; 530 unsigned int i, j;
469 int ret; 531 int ret = 0;
470 532
533 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args))
534 || rm->rdma.op_active)
535 return -EINVAL;
536
537 args = CMSG_DATA(cmsg);
471 538
472 if (rs->rs_bound_addr == 0) { 539 if (rs->rs_bound_addr == 0) {
473 ret = -ENOTCONN; /* XXX not a great errno */ 540 ret = -ENOTCONN; /* XXX not a great errno */
@@ -479,61 +546,38 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
479 goto out; 546 goto out;
480 } 547 }
481 548
482 nr_pages = 0; 549 nr_pages = rds_rdma_pages(args);
483 max_pages = 0; 550 if (nr_pages < 0)
484
485 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
486
487 /* figure out the number of pages in the vector */
488 for (i = 0; i < args->nr_local; i++) {
489 if (copy_from_user(&vec, &local_vec[i],
490 sizeof(struct rds_iovec))) {
491 ret = -EFAULT;
492 goto out;
493 }
494
495 nr = rds_pages_in_vec(&vec);
496 if (nr == 0) {
497 ret = -EINVAL;
498 goto out;
499 }
500
501 max_pages = max(nr, max_pages);
502 nr_pages += nr;
503 }
504
505 pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL);
506 if (pages == NULL) {
507 ret = -ENOMEM;
508 goto out; 551 goto out;
509 }
510 552
511 op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL); 553 pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
512 if (op == NULL) { 554 if (!pages) {
513 ret = -ENOMEM; 555 ret = -ENOMEM;
514 goto out; 556 goto out;
515 } 557 }
516 558
517 op->r_write = !!(args->flags & RDS_RDMA_READWRITE); 559 op->op_write = !!(args->flags & RDS_RDMA_READWRITE);
518 op->r_fence = !!(args->flags & RDS_RDMA_FENCE); 560 op->op_fence = !!(args->flags & RDS_RDMA_FENCE);
519 op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); 561 op->op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
520 op->r_recverr = rs->rs_recverr; 562 op->op_silent = !!(args->flags & RDS_RDMA_SILENT);
563 op->op_active = 1;
564 op->op_recverr = rs->rs_recverr;
521 WARN_ON(!nr_pages); 565 WARN_ON(!nr_pages);
522 sg_init_table(op->r_sg, nr_pages); 566 op->op_sg = rds_message_alloc_sgs(rm, nr_pages);
523 567
524 if (op->r_notify || op->r_recverr) { 568 if (op->op_notify || op->op_recverr) {
525 /* We allocate an uninitialized notifier here, because 569 /* We allocate an uninitialized notifier here, because
526 * we don't want to do that in the completion handler. We 570 * we don't want to do that in the completion handler. We
527 * would have to use GFP_ATOMIC there, and don't want to deal 571 * would have to use GFP_ATOMIC there, and don't want to deal
528 * with failed allocations. 572 * with failed allocations.
529 */ 573 */
530 op->r_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL); 574 op->op_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL);
531 if (!op->r_notifier) { 575 if (!op->op_notifier) {
532 ret = -ENOMEM; 576 ret = -ENOMEM;
533 goto out; 577 goto out;
534 } 578 }
535 op->r_notifier->n_user_token = args->user_token; 579 op->op_notifier->n_user_token = args->user_token;
536 op->r_notifier->n_status = RDS_RDMA_SUCCESS; 580 op->op_notifier->n_status = RDS_RDMA_SUCCESS;
537 } 581 }
538 582
539 /* The cookie contains the R_Key of the remote memory region, and 583 /* The cookie contains the R_Key of the remote memory region, and
@@ -543,15 +587,17 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
543 * destination address (which is really an offset into the MR) 587 * destination address (which is really an offset into the MR)
544 * FIXME: We may want to move this into ib_rdma.c 588 * FIXME: We may want to move this into ib_rdma.c
545 */ 589 */
546 op->r_key = rds_rdma_cookie_key(args->cookie); 590 op->op_rkey = rds_rdma_cookie_key(args->cookie);
547 op->r_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie); 591 op->op_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie);
548 592
549 nr_bytes = 0; 593 nr_bytes = 0;
550 594
551 rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n", 595 rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n",
552 (unsigned long long)args->nr_local, 596 (unsigned long long)args->nr_local,
553 (unsigned long long)args->remote_vec.addr, 597 (unsigned long long)args->remote_vec.addr,
554 op->r_key); 598 op->op_rkey);
599
600 local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr;
555 601
556 for (i = 0; i < args->nr_local; i++) { 602 for (i = 0; i < args->nr_local; i++) {
557 if (copy_from_user(&vec, &local_vec[i], 603 if (copy_from_user(&vec, &local_vec[i],
@@ -569,15 +615,10 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
569 rs->rs_user_addr = vec.addr; 615 rs->rs_user_addr = vec.addr;
570 rs->rs_user_bytes = vec.bytes; 616 rs->rs_user_bytes = vec.bytes;
571 617
572 /* did the user change the vec under us? */
573 if (nr > max_pages || op->r_nents + nr > nr_pages) {
574 ret = -EINVAL;
575 goto out;
576 }
577 /* If it's a WRITE operation, we want to pin the pages for reading. 618 /* If it's a WRITE operation, we want to pin the pages for reading.
578 * If it's a READ operation, we need to pin the pages for writing. 619 * If it's a READ operation, we need to pin the pages for writing.
579 */ 620 */
580 ret = rds_pin_pages(vec.addr & PAGE_MASK, nr, pages, !op->r_write); 621 ret = rds_pin_pages(vec.addr, nr, pages, !op->op_write);
581 if (ret < 0) 622 if (ret < 0)
582 goto out; 623 goto out;
583 624
@@ -588,8 +629,9 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
588 629
589 for (j = 0; j < nr; j++) { 630 for (j = 0; j < nr; j++) {
590 unsigned int offset = vec.addr & ~PAGE_MASK; 631 unsigned int offset = vec.addr & ~PAGE_MASK;
632 struct scatterlist *sg;
591 633
592 sg = &op->r_sg[op->r_nents + j]; 634 sg = &op->op_sg[op->op_nents + j];
593 sg_set_page(sg, pages[j], 635 sg_set_page(sg, pages[j],
594 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), 636 min_t(unsigned int, vec.bytes, PAGE_SIZE - offset),
595 offset); 637 offset);
@@ -601,10 +643,9 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
601 vec.bytes -= sg->length; 643 vec.bytes -= sg->length;
602 } 644 }
603 645
604 op->r_nents += nr; 646 op->op_nents += nr;
605 } 647 }
606 648
607
608 if (nr_bytes > args->remote_vec.bytes) { 649 if (nr_bytes > args->remote_vec.bytes) {
609 rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n", 650 rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n",
610 nr_bytes, 651 nr_bytes,
@@ -612,38 +653,17 @@ static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs,
612 ret = -EINVAL; 653 ret = -EINVAL;
613 goto out; 654 goto out;
614 } 655 }
615 op->r_bytes = nr_bytes; 656 op->op_bytes = nr_bytes;
616 657
617 ret = 0; 658 ret = 0;
618out: 659out:
619 kfree(pages); 660 kfree(pages);
620 if (ret) { 661 if (ret)
621 if (op) 662 rds_rdma_free_op(op);
622 rds_rdma_free_op(op);
623 op = ERR_PTR(ret);
624 }
625 return op;
626}
627
628/*
629 * The application asks for a RDMA transfer.
630 * Extract all arguments and set up the rdma_op
631 */
632int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
633 struct cmsghdr *cmsg)
634{
635 struct rds_rdma_op *op;
636
637 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) ||
638 rm->m_rdma_op != NULL)
639 return -EINVAL;
640 663
641 op = rds_rdma_prepare(rs, CMSG_DATA(cmsg));
642 if (IS_ERR(op))
643 return PTR_ERR(op);
644 rds_stats_inc(s_send_rdma); 664 rds_stats_inc(s_send_rdma);
645 rm->m_rdma_op = op; 665
646 return 0; 666 return ret;
647} 667}
648 668
649/* 669/*
@@ -673,7 +693,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
673 693
674 spin_lock_irqsave(&rs->rs_rdma_lock, flags); 694 spin_lock_irqsave(&rs->rs_rdma_lock, flags);
675 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 695 mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL);
676 if (mr == NULL) 696 if (!mr)
677 err = -EINVAL; /* invalid r_key */ 697 err = -EINVAL; /* invalid r_key */
678 else 698 else
679 atomic_inc(&mr->r_refcount); 699 atomic_inc(&mr->r_refcount);
@@ -681,7 +701,7 @@ int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
681 701
682 if (mr) { 702 if (mr) {
683 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); 703 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE);
684 rm->m_rdma_mr = mr; 704 rm->rdma.op_rdma_mr = mr;
685 } 705 }
686 return err; 706 return err;
687} 707}
@@ -699,5 +719,98 @@ int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
699 rm->m_rdma_cookie != 0) 719 rm->m_rdma_cookie != 0)
700 return -EINVAL; 720 return -EINVAL;
701 721
702 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr); 722 return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->rdma.op_rdma_mr);
723}
724
725/*
726 * Fill in rds_message for an atomic request.
727 */
728int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
729 struct cmsghdr *cmsg)
730{
731 struct page *page = NULL;
732 struct rds_atomic_args *args;
733 int ret = 0;
734
735 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_atomic_args))
736 || rm->atomic.op_active)
737 return -EINVAL;
738
739 args = CMSG_DATA(cmsg);
740
741 /* Nonmasked & masked cmsg ops converted to masked hw ops */
742 switch (cmsg->cmsg_type) {
743 case RDS_CMSG_ATOMIC_FADD:
744 rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
745 rm->atomic.op_m_fadd.add = args->fadd.add;
746 rm->atomic.op_m_fadd.nocarry_mask = 0;
747 break;
748 case RDS_CMSG_MASKED_ATOMIC_FADD:
749 rm->atomic.op_type = RDS_ATOMIC_TYPE_FADD;
750 rm->atomic.op_m_fadd.add = args->m_fadd.add;
751 rm->atomic.op_m_fadd.nocarry_mask = args->m_fadd.nocarry_mask;
752 break;
753 case RDS_CMSG_ATOMIC_CSWP:
754 rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
755 rm->atomic.op_m_cswp.compare = args->cswp.compare;
756 rm->atomic.op_m_cswp.swap = args->cswp.swap;
757 rm->atomic.op_m_cswp.compare_mask = ~0;
758 rm->atomic.op_m_cswp.swap_mask = ~0;
759 break;
760 case RDS_CMSG_MASKED_ATOMIC_CSWP:
761 rm->atomic.op_type = RDS_ATOMIC_TYPE_CSWP;
762 rm->atomic.op_m_cswp.compare = args->m_cswp.compare;
763 rm->atomic.op_m_cswp.swap = args->m_cswp.swap;
764 rm->atomic.op_m_cswp.compare_mask = args->m_cswp.compare_mask;
765 rm->atomic.op_m_cswp.swap_mask = args->m_cswp.swap_mask;
766 break;
767 default:
768 BUG(); /* should never happen */
769 }
770
771 rm->atomic.op_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME);
772 rm->atomic.op_silent = !!(args->flags & RDS_RDMA_SILENT);
773 rm->atomic.op_active = 1;
774 rm->atomic.op_recverr = rs->rs_recverr;
775 rm->atomic.op_sg = rds_message_alloc_sgs(rm, 1);
776
777 /* verify 8 byte-aligned */
778 if (args->local_addr & 0x7) {
779 ret = -EFAULT;
780 goto err;
781 }
782
783 ret = rds_pin_pages(args->local_addr, 1, &page, 1);
784 if (ret != 1)
785 goto err;
786 ret = 0;
787
788 sg_set_page(rm->atomic.op_sg, page, 8, offset_in_page(args->local_addr));
789
790 if (rm->atomic.op_notify || rm->atomic.op_recverr) {
791 /* We allocate an uninitialized notifier here, because
792 * we don't want to do that in the completion handler. We
793 * would have to use GFP_ATOMIC there, and don't want to deal
794 * with failed allocations.
795 */
796 rm->atomic.op_notifier = kmalloc(sizeof(*rm->atomic.op_notifier), GFP_KERNEL);
797 if (!rm->atomic.op_notifier) {
798 ret = -ENOMEM;
799 goto err;
800 }
801
802 rm->atomic.op_notifier->n_user_token = args->user_token;
803 rm->atomic.op_notifier->n_status = RDS_RDMA_SUCCESS;
804 }
805
806 rm->atomic.op_rkey = rds_rdma_cookie_key(args->cookie);
807 rm->atomic.op_remote_addr = args->remote_addr + rds_rdma_cookie_offset(args->cookie);
808
809 return ret;
810err:
811 if (page)
812 put_page(page);
813 kfree(rm->atomic.op_notifier);
814
815 return ret;
703} 816}
diff --git a/net/rds/rdma.h b/net/rds/rdma.h
deleted file mode 100644
index 909c39835a5d..000000000000
--- a/net/rds/rdma.h
+++ /dev/null
@@ -1,85 +0,0 @@
1#ifndef _RDS_RDMA_H
2#define _RDS_RDMA_H
3
4#include <linux/rbtree.h>
5#include <linux/spinlock.h>
6#include <linux/scatterlist.h>
7
8#include "rds.h"
9
10struct rds_mr {
11 struct rb_node r_rb_node;
12 atomic_t r_refcount;
13 u32 r_key;
14
15 /* A copy of the creation flags */
16 unsigned int r_use_once:1;
17 unsigned int r_invalidate:1;
18 unsigned int r_write:1;
19
20 /* This is for RDS_MR_DEAD.
21 * It would be nice & consistent to make this part of the above
22 * bit field here, but we need to use test_and_set_bit.
23 */
24 unsigned long r_state;
25 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
26 struct rds_transport *r_trans;
27 void *r_trans_private;
28};
29
30/* Flags for mr->r_state */
31#define RDS_MR_DEAD 0
32
33struct rds_rdma_op {
34 u32 r_key;
35 u64 r_remote_addr;
36 unsigned int r_write:1;
37 unsigned int r_fence:1;
38 unsigned int r_notify:1;
39 unsigned int r_recverr:1;
40 unsigned int r_mapped:1;
41 struct rds_notifier *r_notifier;
42 unsigned int r_bytes;
43 unsigned int r_nents;
44 unsigned int r_count;
45 struct scatterlist r_sg[0];
46};
47
48static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
49{
50 return r_key | (((u64) offset) << 32);
51}
52
53static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
54{
55 return cookie;
56}
57
58static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
59{
60 return cookie >> 32;
61}
62
63int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
64int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
65int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
66void rds_rdma_drop_keys(struct rds_sock *rs);
67int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
68 struct cmsghdr *cmsg);
69int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
70 struct cmsghdr *cmsg);
71int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
72 struct cmsghdr *cmsg);
73int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
74 struct cmsghdr *cmsg);
75void rds_rdma_free_op(struct rds_rdma_op *ro);
76void rds_rdma_send_complete(struct rds_message *rm, int);
77
78extern void __rds_put_mr_final(struct rds_mr *mr);
79static inline void rds_mr_put(struct rds_mr *mr)
80{
81 if (atomic_dec_and_test(&mr->r_refcount))
82 __rds_put_mr_final(mr);
83}
84
85#endif
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index e599ba2f950d..4195a0539829 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -36,6 +36,34 @@
36 36
37static struct rdma_cm_id *rds_rdma_listen_id; 37static struct rdma_cm_id *rds_rdma_listen_id;
38 38
39static char *rds_cm_event_strings[] = {
40#define RDS_CM_EVENT_STRING(foo) \
41 [RDMA_CM_EVENT_##foo] = __stringify(RDMA_CM_EVENT_##foo)
42 RDS_CM_EVENT_STRING(ADDR_RESOLVED),
43 RDS_CM_EVENT_STRING(ADDR_ERROR),
44 RDS_CM_EVENT_STRING(ROUTE_RESOLVED),
45 RDS_CM_EVENT_STRING(ROUTE_ERROR),
46 RDS_CM_EVENT_STRING(CONNECT_REQUEST),
47 RDS_CM_EVENT_STRING(CONNECT_RESPONSE),
48 RDS_CM_EVENT_STRING(CONNECT_ERROR),
49 RDS_CM_EVENT_STRING(UNREACHABLE),
50 RDS_CM_EVENT_STRING(REJECTED),
51 RDS_CM_EVENT_STRING(ESTABLISHED),
52 RDS_CM_EVENT_STRING(DISCONNECTED),
53 RDS_CM_EVENT_STRING(DEVICE_REMOVAL),
54 RDS_CM_EVENT_STRING(MULTICAST_JOIN),
55 RDS_CM_EVENT_STRING(MULTICAST_ERROR),
56 RDS_CM_EVENT_STRING(ADDR_CHANGE),
57 RDS_CM_EVENT_STRING(TIMEWAIT_EXIT),
58#undef RDS_CM_EVENT_STRING
59};
60
61static char *rds_cm_event_str(enum rdma_cm_event_type type)
62{
63 return rds_str_array(rds_cm_event_strings,
64 ARRAY_SIZE(rds_cm_event_strings), type);
65};
66
39int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, 67int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
40 struct rdma_cm_event *event) 68 struct rdma_cm_event *event)
41{ 69{
@@ -44,8 +72,8 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
44 struct rds_transport *trans; 72 struct rds_transport *trans;
45 int ret = 0; 73 int ret = 0;
46 74
47 rdsdebug("conn %p id %p handling event %u\n", conn, cm_id, 75 rdsdebug("conn %p id %p handling event %u (%s)\n", conn, cm_id,
48 event->event); 76 event->event, rds_cm_event_str(event->event));
49 77
50 if (cm_id->device->node_type == RDMA_NODE_RNIC) 78 if (cm_id->device->node_type == RDMA_NODE_RNIC)
51 trans = &rds_iw_transport; 79 trans = &rds_iw_transport;
@@ -109,7 +137,8 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
109 137
110 default: 138 default:
111 /* things like device disconnect? */ 139 /* things like device disconnect? */
112 printk(KERN_ERR "RDS: unknown event %u!\n", event->event); 140 printk(KERN_ERR "RDS: unknown event %u (%s)!\n",
141 event->event, rds_cm_event_str(event->event));
113 break; 142 break;
114 } 143 }
115 144
@@ -117,12 +146,13 @@ out:
117 if (conn) 146 if (conn)
118 mutex_unlock(&conn->c_cm_lock); 147 mutex_unlock(&conn->c_cm_lock);
119 148
120 rdsdebug("id %p event %u handling ret %d\n", cm_id, event->event, ret); 149 rdsdebug("id %p event %u (%s) handling ret %d\n", cm_id, event->event,
150 rds_cm_event_str(event->event), ret);
121 151
122 return ret; 152 return ret;
123} 153}
124 154
125static int __init rds_rdma_listen_init(void) 155static int rds_rdma_listen_init(void)
126{ 156{
127 struct sockaddr_in sin; 157 struct sockaddr_in sin;
128 struct rdma_cm_id *cm_id; 158 struct rdma_cm_id *cm_id;
@@ -177,7 +207,7 @@ static void rds_rdma_listen_stop(void)
177 } 207 }
178} 208}
179 209
180int __init rds_rdma_init(void) 210static int rds_rdma_init(void)
181{ 211{
182 int ret; 212 int ret;
183 213
@@ -204,7 +234,7 @@ out:
204} 234}
205module_init(rds_rdma_init); 235module_init(rds_rdma_init);
206 236
207void rds_rdma_exit(void) 237static void rds_rdma_exit(void)
208{ 238{
209 /* stop listening first to ensure no new connections are attempted */ 239 /* stop listening first to ensure no new connections are attempted */
210 rds_rdma_listen_stop(); 240 rds_rdma_listen_stop();
diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h
index 2f2c7d976c21..faba4e382695 100644
--- a/net/rds/rdma_transport.h
+++ b/net/rds/rdma_transport.h
@@ -11,10 +11,6 @@ int rds_rdma_conn_connect(struct rds_connection *conn);
11int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, 11int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
12 struct rdma_cm_event *event); 12 struct rdma_cm_event *event);
13 13
14/* from rdma_transport.c */
15int rds_rdma_init(void);
16void rds_rdma_exit(void);
17
18/* from ib.c */ 14/* from ib.c */
19extern struct rds_transport rds_ib_transport; 15extern struct rds_transport rds_ib_transport;
20int rds_ib_init(void); 16int rds_ib_init(void);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c224b5bb3ba9..9542449c0720 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -80,6 +80,7 @@ enum {
80/* Bits for c_flags */ 80/* Bits for c_flags */
81#define RDS_LL_SEND_FULL 0 81#define RDS_LL_SEND_FULL 0
82#define RDS_RECONNECT_PENDING 1 82#define RDS_RECONNECT_PENDING 1
83#define RDS_IN_XMIT 2
83 84
84struct rds_connection { 85struct rds_connection {
85 struct hlist_node c_hash_node; 86 struct hlist_node c_hash_node;
@@ -91,12 +92,13 @@ struct rds_connection {
91 struct rds_cong_map *c_lcong; 92 struct rds_cong_map *c_lcong;
92 struct rds_cong_map *c_fcong; 93 struct rds_cong_map *c_fcong;
93 94
94 struct mutex c_send_lock; /* protect send ring */
95 struct rds_message *c_xmit_rm; 95 struct rds_message *c_xmit_rm;
96 unsigned long c_xmit_sg; 96 unsigned long c_xmit_sg;
97 unsigned int c_xmit_hdr_off; 97 unsigned int c_xmit_hdr_off;
98 unsigned int c_xmit_data_off; 98 unsigned int c_xmit_data_off;
99 unsigned int c_xmit_atomic_sent;
99 unsigned int c_xmit_rdma_sent; 100 unsigned int c_xmit_rdma_sent;
101 unsigned int c_xmit_data_sent;
100 102
101 spinlock_t c_lock; /* protect msg queues */ 103 spinlock_t c_lock; /* protect msg queues */
102 u64 c_next_tx_seq; 104 u64 c_next_tx_seq;
@@ -116,11 +118,10 @@ struct rds_connection {
116 struct delayed_work c_conn_w; 118 struct delayed_work c_conn_w;
117 struct work_struct c_down_w; 119 struct work_struct c_down_w;
118 struct mutex c_cm_lock; /* protect conn state & cm */ 120 struct mutex c_cm_lock; /* protect conn state & cm */
121 wait_queue_head_t c_waitq;
119 122
120 struct list_head c_map_item; 123 struct list_head c_map_item;
121 unsigned long c_map_queued; 124 unsigned long c_map_queued;
122 unsigned long c_map_offset;
123 unsigned long c_map_bytes;
124 125
125 unsigned int c_unacked_packets; 126 unsigned int c_unacked_packets;
126 unsigned int c_unacked_bytes; 127 unsigned int c_unacked_bytes;
@@ -206,6 +207,48 @@ struct rds_incoming {
206 rds_rdma_cookie_t i_rdma_cookie; 207 rds_rdma_cookie_t i_rdma_cookie;
207}; 208};
208 209
210struct rds_mr {
211 struct rb_node r_rb_node;
212 atomic_t r_refcount;
213 u32 r_key;
214
215 /* A copy of the creation flags */
216 unsigned int r_use_once:1;
217 unsigned int r_invalidate:1;
218 unsigned int r_write:1;
219
220 /* This is for RDS_MR_DEAD.
221 * It would be nice & consistent to make this part of the above
222 * bit field here, but we need to use test_and_set_bit.
223 */
224 unsigned long r_state;
225 struct rds_sock *r_sock; /* back pointer to the socket that owns us */
226 struct rds_transport *r_trans;
227 void *r_trans_private;
228};
229
230/* Flags for mr->r_state */
231#define RDS_MR_DEAD 0
232
233static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset)
234{
235 return r_key | (((u64) offset) << 32);
236}
237
238static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie)
239{
240 return cookie;
241}
242
243static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie)
244{
245 return cookie >> 32;
246}
247
248/* atomic operation types */
249#define RDS_ATOMIC_TYPE_CSWP 0
250#define RDS_ATOMIC_TYPE_FADD 1
251
209/* 252/*
210 * m_sock_item and m_conn_item are on lists that are serialized under 253 * m_sock_item and m_conn_item are on lists that are serialized under
211 * conn->c_lock. m_sock_item has additional meaning in that once it is empty 254 * conn->c_lock. m_sock_item has additional meaning in that once it is empty
@@ -258,13 +301,71 @@ struct rds_message {
258 * -> rs->rs_lock 301 * -> rs->rs_lock
259 */ 302 */
260 spinlock_t m_rs_lock; 303 spinlock_t m_rs_lock;
304 wait_queue_head_t m_flush_wait;
305
261 struct rds_sock *m_rs; 306 struct rds_sock *m_rs;
262 struct rds_rdma_op *m_rdma_op; 307
308 /* cookie to send to remote, in rds header */
263 rds_rdma_cookie_t m_rdma_cookie; 309 rds_rdma_cookie_t m_rdma_cookie;
264 struct rds_mr *m_rdma_mr; 310
265 unsigned int m_nents; 311 unsigned int m_used_sgs;
266 unsigned int m_count; 312 unsigned int m_total_sgs;
267 struct scatterlist m_sg[0]; 313
314 void *m_final_op;
315
316 struct {
317 struct rm_atomic_op {
318 int op_type;
319 union {
320 struct {
321 uint64_t compare;
322 uint64_t swap;
323 uint64_t compare_mask;
324 uint64_t swap_mask;
325 } op_m_cswp;
326 struct {
327 uint64_t add;
328 uint64_t nocarry_mask;
329 } op_m_fadd;
330 };
331
332 u32 op_rkey;
333 u64 op_remote_addr;
334 unsigned int op_notify:1;
335 unsigned int op_recverr:1;
336 unsigned int op_mapped:1;
337 unsigned int op_silent:1;
338 unsigned int op_active:1;
339 struct scatterlist *op_sg;
340 struct rds_notifier *op_notifier;
341
342 struct rds_mr *op_rdma_mr;
343 } atomic;
344 struct rm_rdma_op {
345 u32 op_rkey;
346 u64 op_remote_addr;
347 unsigned int op_write:1;
348 unsigned int op_fence:1;
349 unsigned int op_notify:1;
350 unsigned int op_recverr:1;
351 unsigned int op_mapped:1;
352 unsigned int op_silent:1;
353 unsigned int op_active:1;
354 unsigned int op_bytes;
355 unsigned int op_nents;
356 unsigned int op_count;
357 struct scatterlist *op_sg;
358 struct rds_notifier *op_notifier;
359
360 struct rds_mr *op_rdma_mr;
361 } rdma;
362 struct rm_data_op {
363 unsigned int op_active:1;
364 unsigned int op_nents;
365 unsigned int op_count;
366 struct scatterlist *op_sg;
367 } data;
368 };
268}; 369};
269 370
270/* 371/*
@@ -305,10 +406,6 @@ struct rds_notifier {
305 * transport is responsible for other serialization, including 406 * transport is responsible for other serialization, including
306 * rds_recv_incoming(). This is called in process context but 407 * rds_recv_incoming(). This is called in process context but
307 * should try hard not to block. 408 * should try hard not to block.
308 *
309 * @xmit_cong_map: This asks the transport to send the local bitmap down the
310 * given connection. XXX get a better story about the bitmap
311 * flag and header.
312 */ 409 */
313 410
314#define RDS_TRANS_IB 0 411#define RDS_TRANS_IB 0
@@ -332,13 +429,11 @@ struct rds_transport {
332 void (*xmit_complete)(struct rds_connection *conn); 429 void (*xmit_complete)(struct rds_connection *conn);
333 int (*xmit)(struct rds_connection *conn, struct rds_message *rm, 430 int (*xmit)(struct rds_connection *conn, struct rds_message *rm,
334 unsigned int hdr_off, unsigned int sg, unsigned int off); 431 unsigned int hdr_off, unsigned int sg, unsigned int off);
335 int (*xmit_cong_map)(struct rds_connection *conn, 432 int (*xmit_rdma)(struct rds_connection *conn, struct rm_rdma_op *op);
336 struct rds_cong_map *map, unsigned long offset); 433 int (*xmit_atomic)(struct rds_connection *conn, struct rm_atomic_op *op);
337 int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op);
338 int (*recv)(struct rds_connection *conn); 434 int (*recv)(struct rds_connection *conn);
339 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov, 435 int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov,
340 size_t size); 436 size_t size);
341 void (*inc_purge)(struct rds_incoming *inc);
342 void (*inc_free)(struct rds_incoming *inc); 437 void (*inc_free)(struct rds_incoming *inc);
343 438
344 int (*cm_handle_connect)(struct rdma_cm_id *cm_id, 439 int (*cm_handle_connect)(struct rdma_cm_id *cm_id,
@@ -367,17 +462,11 @@ struct rds_sock {
367 * bound_addr used for both incoming and outgoing, no INADDR_ANY 462 * bound_addr used for both incoming and outgoing, no INADDR_ANY
368 * support. 463 * support.
369 */ 464 */
370 struct rb_node rs_bound_node; 465 struct hlist_node rs_bound_node;
371 __be32 rs_bound_addr; 466 __be32 rs_bound_addr;
372 __be32 rs_conn_addr; 467 __be32 rs_conn_addr;
373 __be16 rs_bound_port; 468 __be16 rs_bound_port;
374 __be16 rs_conn_port; 469 __be16 rs_conn_port;
375
376 /*
377 * This is only used to communicate the transport between bind and
378 * initiating connections. All other trans use is referenced through
379 * the connection.
380 */
381 struct rds_transport *rs_transport; 470 struct rds_transport *rs_transport;
382 471
383 /* 472 /*
@@ -466,8 +555,8 @@ struct rds_statistics {
466 uint64_t s_recv_ping; 555 uint64_t s_recv_ping;
467 uint64_t s_send_queue_empty; 556 uint64_t s_send_queue_empty;
468 uint64_t s_send_queue_full; 557 uint64_t s_send_queue_full;
469 uint64_t s_send_sem_contention; 558 uint64_t s_send_lock_contention;
470 uint64_t s_send_sem_queue_raced; 559 uint64_t s_send_lock_queue_raced;
471 uint64_t s_send_immediate_retry; 560 uint64_t s_send_immediate_retry;
472 uint64_t s_send_delayed_retry; 561 uint64_t s_send_delayed_retry;
473 uint64_t s_send_drop_acked; 562 uint64_t s_send_drop_acked;
@@ -487,6 +576,7 @@ struct rds_statistics {
487}; 576};
488 577
489/* af_rds.c */ 578/* af_rds.c */
579char *rds_str_array(char **array, size_t elements, size_t index);
490void rds_sock_addref(struct rds_sock *rs); 580void rds_sock_addref(struct rds_sock *rs);
491void rds_sock_put(struct rds_sock *rs); 581void rds_sock_put(struct rds_sock *rs);
492void rds_wake_sk_sleep(struct rds_sock *rs); 582void rds_wake_sk_sleep(struct rds_sock *rs);
@@ -521,15 +611,16 @@ void rds_cong_exit(void);
521struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); 611struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
522 612
523/* conn.c */ 613/* conn.c */
524int __init rds_conn_init(void); 614int rds_conn_init(void);
525void rds_conn_exit(void); 615void rds_conn_exit(void);
526struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr, 616struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
527 struct rds_transport *trans, gfp_t gfp); 617 struct rds_transport *trans, gfp_t gfp);
528struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr, 618struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
529 struct rds_transport *trans, gfp_t gfp); 619 struct rds_transport *trans, gfp_t gfp);
620void rds_conn_shutdown(struct rds_connection *conn);
530void rds_conn_destroy(struct rds_connection *conn); 621void rds_conn_destroy(struct rds_connection *conn);
531void rds_conn_reset(struct rds_connection *conn);
532void rds_conn_drop(struct rds_connection *conn); 622void rds_conn_drop(struct rds_connection *conn);
623void rds_conn_connect_if_down(struct rds_connection *conn);
533void rds_for_each_conn_info(struct socket *sock, unsigned int len, 624void rds_for_each_conn_info(struct socket *sock, unsigned int len,
534 struct rds_info_iterator *iter, 625 struct rds_info_iterator *iter,
535 struct rds_info_lengths *lens, 626 struct rds_info_lengths *lens,
@@ -566,7 +657,8 @@ rds_conn_connecting(struct rds_connection *conn)
566 657
567/* message.c */ 658/* message.c */
568struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); 659struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp);
569struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, 660struct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents);
661int rds_message_copy_from_user(struct rds_message *rm, struct iovec *first_iov,
570 size_t total_len); 662 size_t total_len);
571struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); 663struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len);
572void rds_message_populate_header(struct rds_header *hdr, __be16 sport, 664void rds_message_populate_header(struct rds_header *hdr, __be16 sport,
@@ -575,12 +667,9 @@ int rds_message_add_extension(struct rds_header *hdr,
575 unsigned int type, const void *data, unsigned int len); 667 unsigned int type, const void *data, unsigned int len);
576int rds_message_next_extension(struct rds_header *hdr, 668int rds_message_next_extension(struct rds_header *hdr,
577 unsigned int *pos, void *buf, unsigned int *buflen); 669 unsigned int *pos, void *buf, unsigned int *buflen);
578int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version);
579int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version);
580int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset); 670int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset);
581int rds_message_inc_copy_to_user(struct rds_incoming *inc, 671int rds_message_inc_copy_to_user(struct rds_incoming *inc,
582 struct iovec *first_iov, size_t size); 672 struct iovec *first_iov, size_t size);
583void rds_message_inc_purge(struct rds_incoming *inc);
584void rds_message_inc_free(struct rds_incoming *inc); 673void rds_message_inc_free(struct rds_incoming *inc);
585void rds_message_addref(struct rds_message *rm); 674void rds_message_addref(struct rds_message *rm);
586void rds_message_put(struct rds_message *rm); 675void rds_message_put(struct rds_message *rm);
@@ -614,7 +703,6 @@ void rds_page_exit(void);
614/* recv.c */ 703/* recv.c */
615void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, 704void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
616 __be32 saddr); 705 __be32 saddr);
617void rds_inc_addref(struct rds_incoming *inc);
618void rds_inc_put(struct rds_incoming *inc); 706void rds_inc_put(struct rds_incoming *inc);
619void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, 707void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
620 struct rds_incoming *inc, gfp_t gfp, enum km_type km); 708 struct rds_incoming *inc, gfp_t gfp, enum km_type km);
@@ -636,14 +724,38 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest);
636typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack); 724typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack);
637void rds_send_drop_acked(struct rds_connection *conn, u64 ack, 725void rds_send_drop_acked(struct rds_connection *conn, u64 ack,
638 is_acked_func is_acked); 726 is_acked_func is_acked);
639int rds_send_acked_before(struct rds_connection *conn, u64 seq);
640void rds_send_remove_from_sock(struct list_head *messages, int status);
641int rds_send_pong(struct rds_connection *conn, __be16 dport); 727int rds_send_pong(struct rds_connection *conn, __be16 dport);
642struct rds_message *rds_send_get_message(struct rds_connection *, 728struct rds_message *rds_send_get_message(struct rds_connection *,
643 struct rds_rdma_op *); 729 struct rm_rdma_op *);
644 730
645/* rdma.c */ 731/* rdma.c */
646void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); 732void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force);
733int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen);
734int rds_get_mr_for_dest(struct rds_sock *rs, char __user *optval, int optlen);
735int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen);
736void rds_rdma_drop_keys(struct rds_sock *rs);
737int rds_rdma_extra_size(struct rds_rdma_args *args);
738int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
739 struct cmsghdr *cmsg);
740int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm,
741 struct cmsghdr *cmsg);
742int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm,
743 struct cmsghdr *cmsg);
744int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm,
745 struct cmsghdr *cmsg);
746void rds_rdma_free_op(struct rm_rdma_op *ro);
747void rds_atomic_free_op(struct rm_atomic_op *ao);
748void rds_rdma_send_complete(struct rds_message *rm, int wc_status);
749void rds_atomic_send_complete(struct rds_message *rm, int wc_status);
750int rds_cmsg_atomic(struct rds_sock *rs, struct rds_message *rm,
751 struct cmsghdr *cmsg);
752
753extern void __rds_put_mr_final(struct rds_mr *mr);
754static inline void rds_mr_put(struct rds_mr *mr)
755{
756 if (atomic_dec_and_test(&mr->r_refcount))
757 __rds_put_mr_final(mr);
758}
647 759
648/* stats.c */ 760/* stats.c */
649DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); 761DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
@@ -657,14 +769,14 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);
657 put_cpu(); \ 769 put_cpu(); \
658} while (0) 770} while (0)
659#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count) 771#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count)
660int __init rds_stats_init(void); 772int rds_stats_init(void);
661void rds_stats_exit(void); 773void rds_stats_exit(void);
662void rds_stats_info_copy(struct rds_info_iterator *iter, 774void rds_stats_info_copy(struct rds_info_iterator *iter,
663 uint64_t *values, const char *const *names, 775 uint64_t *values, const char *const *names,
664 size_t nr); 776 size_t nr);
665 777
666/* sysctl.c */ 778/* sysctl.c */
667int __init rds_sysctl_init(void); 779int rds_sysctl_init(void);
668void rds_sysctl_exit(void); 780void rds_sysctl_exit(void);
669extern unsigned long rds_sysctl_sndbuf_min; 781extern unsigned long rds_sysctl_sndbuf_min;
670extern unsigned long rds_sysctl_sndbuf_default; 782extern unsigned long rds_sysctl_sndbuf_default;
@@ -678,9 +790,10 @@ extern unsigned long rds_sysctl_trace_flags;
678extern unsigned int rds_sysctl_trace_level; 790extern unsigned int rds_sysctl_trace_level;
679 791
680/* threads.c */ 792/* threads.c */
681int __init rds_threads_init(void); 793int rds_threads_init(void);
682void rds_threads_exit(void); 794void rds_threads_exit(void);
683extern struct workqueue_struct *rds_wq; 795extern struct workqueue_struct *rds_wq;
796void rds_queue_reconnect(struct rds_connection *conn);
684void rds_connect_worker(struct work_struct *); 797void rds_connect_worker(struct work_struct *);
685void rds_shutdown_worker(struct work_struct *); 798void rds_shutdown_worker(struct work_struct *);
686void rds_send_worker(struct work_struct *); 799void rds_send_worker(struct work_struct *);
@@ -691,9 +804,10 @@ void rds_connect_complete(struct rds_connection *conn);
691int rds_trans_register(struct rds_transport *trans); 804int rds_trans_register(struct rds_transport *trans);
692void rds_trans_unregister(struct rds_transport *trans); 805void rds_trans_unregister(struct rds_transport *trans);
693struct rds_transport *rds_trans_get_preferred(__be32 addr); 806struct rds_transport *rds_trans_get_preferred(__be32 addr);
807void rds_trans_put(struct rds_transport *trans);
694unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, 808unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
695 unsigned int avail); 809 unsigned int avail);
696int __init rds_trans_init(void); 810int rds_trans_init(void);
697void rds_trans_exit(void); 811void rds_trans_exit(void);
698 812
699#endif 813#endif
diff --git a/net/rds/recv.c b/net/rds/recv.c
index c93588c2d553..596689e59272 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -36,7 +36,6 @@
36#include <linux/in.h> 36#include <linux/in.h>
37 37
38#include "rds.h" 38#include "rds.h"
39#include "rdma.h"
40 39
41void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, 40void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
42 __be32 saddr) 41 __be32 saddr)
@@ -49,12 +48,11 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn,
49} 48}
50EXPORT_SYMBOL_GPL(rds_inc_init); 49EXPORT_SYMBOL_GPL(rds_inc_init);
51 50
52void rds_inc_addref(struct rds_incoming *inc) 51static void rds_inc_addref(struct rds_incoming *inc)
53{ 52{
54 rdsdebug("addref inc %p ref %d\n", inc, atomic_read(&inc->i_refcount)); 53 rdsdebug("addref inc %p ref %d\n", inc, atomic_read(&inc->i_refcount));
55 atomic_inc(&inc->i_refcount); 54 atomic_inc(&inc->i_refcount);
56} 55}
57EXPORT_SYMBOL_GPL(rds_inc_addref);
58 56
59void rds_inc_put(struct rds_incoming *inc) 57void rds_inc_put(struct rds_incoming *inc)
60{ 58{
@@ -210,7 +208,7 @@ void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr,
210 } 208 }
211 209
212 rs = rds_find_bound(daddr, inc->i_hdr.h_dport); 210 rs = rds_find_bound(daddr, inc->i_hdr.h_dport);
213 if (rs == NULL) { 211 if (!rs) {
214 rds_stats_inc(s_recv_drop_no_sock); 212 rds_stats_inc(s_recv_drop_no_sock);
215 goto out; 213 goto out;
216 } 214 }
@@ -251,7 +249,7 @@ static int rds_next_incoming(struct rds_sock *rs, struct rds_incoming **inc)
251{ 249{
252 unsigned long flags; 250 unsigned long flags;
253 251
254 if (*inc == NULL) { 252 if (!*inc) {
255 read_lock_irqsave(&rs->rs_recv_lock, flags); 253 read_lock_irqsave(&rs->rs_recv_lock, flags);
256 if (!list_empty(&rs->rs_recv_queue)) { 254 if (!list_empty(&rs->rs_recv_queue)) {
257 *inc = list_entry(rs->rs_recv_queue.next, 255 *inc = list_entry(rs->rs_recv_queue.next,
@@ -334,10 +332,10 @@ int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr)
334 332
335 if (msghdr) { 333 if (msghdr) {
336 cmsg.user_token = notifier->n_user_token; 334 cmsg.user_token = notifier->n_user_token;
337 cmsg.status = notifier->n_status; 335 cmsg.status = notifier->n_status;
338 336
339 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS, 337 err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS,
340 sizeof(cmsg), &cmsg); 338 sizeof(cmsg), &cmsg);
341 if (err) 339 if (err)
342 break; 340 break;
343 } 341 }
diff --git a/net/rds/send.c b/net/rds/send.c
index 9c1c6bcaa6c9..0bc9db17a87d 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -37,7 +37,6 @@
37#include <linux/list.h> 37#include <linux/list.h>
38 38
39#include "rds.h" 39#include "rds.h"
40#include "rdma.h"
41 40
42/* When transmitting messages in rds_send_xmit, we need to emerge from 41/* When transmitting messages in rds_send_xmit, we need to emerge from
43 * time to time and briefly release the CPU. Otherwise the softlock watchdog 42 * time to time and briefly release the CPU. Otherwise the softlock watchdog
@@ -53,8 +52,11 @@ static int send_batch_count = 64;
53module_param(send_batch_count, int, 0444); 52module_param(send_batch_count, int, 0444);
54MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue"); 53MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue");
55 54
55static void rds_send_remove_from_sock(struct list_head *messages, int status);
56
56/* 57/*
57 * Reset the send state. Caller must hold c_send_lock when calling here. 58 * Reset the send state. Callers must ensure that this doesn't race with
59 * rds_send_xmit().
58 */ 60 */
59void rds_send_reset(struct rds_connection *conn) 61void rds_send_reset(struct rds_connection *conn)
60{ 62{
@@ -62,18 +64,22 @@ void rds_send_reset(struct rds_connection *conn)
62 unsigned long flags; 64 unsigned long flags;
63 65
64 if (conn->c_xmit_rm) { 66 if (conn->c_xmit_rm) {
67 rm = conn->c_xmit_rm;
68 conn->c_xmit_rm = NULL;
65 /* Tell the user the RDMA op is no longer mapped by the 69 /* Tell the user the RDMA op is no longer mapped by the
66 * transport. This isn't entirely true (it's flushed out 70 * transport. This isn't entirely true (it's flushed out
67 * independently) but as the connection is down, there's 71 * independently) but as the connection is down, there's
68 * no ongoing RDMA to/from that memory */ 72 * no ongoing RDMA to/from that memory */
69 rds_message_unmapped(conn->c_xmit_rm); 73 rds_message_unmapped(rm);
70 rds_message_put(conn->c_xmit_rm); 74 rds_message_put(rm);
71 conn->c_xmit_rm = NULL;
72 } 75 }
76
73 conn->c_xmit_sg = 0; 77 conn->c_xmit_sg = 0;
74 conn->c_xmit_hdr_off = 0; 78 conn->c_xmit_hdr_off = 0;
75 conn->c_xmit_data_off = 0; 79 conn->c_xmit_data_off = 0;
80 conn->c_xmit_atomic_sent = 0;
76 conn->c_xmit_rdma_sent = 0; 81 conn->c_xmit_rdma_sent = 0;
82 conn->c_xmit_data_sent = 0;
77 83
78 conn->c_map_queued = 0; 84 conn->c_map_queued = 0;
79 85
@@ -90,6 +96,25 @@ void rds_send_reset(struct rds_connection *conn)
90 spin_unlock_irqrestore(&conn->c_lock, flags); 96 spin_unlock_irqrestore(&conn->c_lock, flags);
91} 97}
92 98
99static int acquire_in_xmit(struct rds_connection *conn)
100{
101 return test_and_set_bit(RDS_IN_XMIT, &conn->c_flags) == 0;
102}
103
104static void release_in_xmit(struct rds_connection *conn)
105{
106 clear_bit(RDS_IN_XMIT, &conn->c_flags);
107 smp_mb__after_clear_bit();
108 /*
109 * We don't use wait_on_bit()/wake_up_bit() because our waking is in a
110 * hot path and finding waiters is very rare. We don't want to walk
111 * the system-wide hashed waitqueue buckets in the fast path only to
112 * almost never find waiters.
113 */
114 if (waitqueue_active(&conn->c_waitq))
115 wake_up_all(&conn->c_waitq);
116}
117
93/* 118/*
94 * We're making the concious trade-off here to only send one message 119 * We're making the concious trade-off here to only send one message
95 * down the connection at a time. 120 * down the connection at a time.
@@ -109,102 +134,69 @@ int rds_send_xmit(struct rds_connection *conn)
109 struct rds_message *rm; 134 struct rds_message *rm;
110 unsigned long flags; 135 unsigned long flags;
111 unsigned int tmp; 136 unsigned int tmp;
112 unsigned int send_quota = send_batch_count;
113 struct scatterlist *sg; 137 struct scatterlist *sg;
114 int ret = 0; 138 int ret = 0;
115 int was_empty = 0;
116 LIST_HEAD(to_be_dropped); 139 LIST_HEAD(to_be_dropped);
117 140
141restart:
142
118 /* 143 /*
119 * sendmsg calls here after having queued its message on the send 144 * sendmsg calls here after having queued its message on the send
120 * queue. We only have one task feeding the connection at a time. If 145 * queue. We only have one task feeding the connection at a time. If
121 * another thread is already feeding the queue then we back off. This 146 * another thread is already feeding the queue then we back off. This
122 * avoids blocking the caller and trading per-connection data between 147 * avoids blocking the caller and trading per-connection data between
123 * caches per message. 148 * caches per message.
124 *
125 * The sem holder will issue a retry if they notice that someone queued
126 * a message after they stopped walking the send queue but before they
127 * dropped the sem.
128 */ 149 */
129 if (!mutex_trylock(&conn->c_send_lock)) { 150 if (!acquire_in_xmit(conn)) {
130 rds_stats_inc(s_send_sem_contention); 151 rds_stats_inc(s_send_lock_contention);
131 ret = -ENOMEM; 152 ret = -ENOMEM;
132 goto out; 153 goto out;
133 } 154 }
134 155
156 /*
157 * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT,
158 * we do the opposite to avoid races.
159 */
160 if (!rds_conn_up(conn)) {
161 release_in_xmit(conn);
162 ret = 0;
163 goto out;
164 }
165
135 if (conn->c_trans->xmit_prepare) 166 if (conn->c_trans->xmit_prepare)
136 conn->c_trans->xmit_prepare(conn); 167 conn->c_trans->xmit_prepare(conn);
137 168
138 /* 169 /*
139 * spin trying to push headers and data down the connection until 170 * spin trying to push headers and data down the connection until
140 * the connection doens't make forward progress. 171 * the connection doesn't make forward progress.
141 */ 172 */
142 while (--send_quota) { 173 while (1) {
143 /*
144 * See if need to send a congestion map update if we're
145 * between sending messages. The send_sem protects our sole
146 * use of c_map_offset and _bytes.
147 * Note this is used only by transports that define a special
148 * xmit_cong_map function. For all others, we create allocate
149 * a cong_map message and treat it just like any other send.
150 */
151 if (conn->c_map_bytes) {
152 ret = conn->c_trans->xmit_cong_map(conn, conn->c_lcong,
153 conn->c_map_offset);
154 if (ret <= 0)
155 break;
156 174
157 conn->c_map_offset += ret;
158 conn->c_map_bytes -= ret;
159 if (conn->c_map_bytes)
160 continue;
161 }
162
163 /* If we're done sending the current message, clear the
164 * offset and S/G temporaries.
165 */
166 rm = conn->c_xmit_rm; 175 rm = conn->c_xmit_rm;
167 if (rm != NULL &&
168 conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
169 conn->c_xmit_sg == rm->m_nents) {
170 conn->c_xmit_rm = NULL;
171 conn->c_xmit_sg = 0;
172 conn->c_xmit_hdr_off = 0;
173 conn->c_xmit_data_off = 0;
174 conn->c_xmit_rdma_sent = 0;
175
176 /* Release the reference to the previous message. */
177 rds_message_put(rm);
178 rm = NULL;
179 }
180 176
181 /* If we're asked to send a cong map update, do so. 177 /*
178 * If between sending messages, we can send a pending congestion
179 * map update.
182 */ 180 */
183 if (rm == NULL && test_and_clear_bit(0, &conn->c_map_queued)) { 181 if (!rm && test_and_clear_bit(0, &conn->c_map_queued)) {
184 if (conn->c_trans->xmit_cong_map != NULL) {
185 conn->c_map_offset = 0;
186 conn->c_map_bytes = sizeof(struct rds_header) +
187 RDS_CONG_MAP_BYTES;
188 continue;
189 }
190
191 rm = rds_cong_update_alloc(conn); 182 rm = rds_cong_update_alloc(conn);
192 if (IS_ERR(rm)) { 183 if (IS_ERR(rm)) {
193 ret = PTR_ERR(rm); 184 ret = PTR_ERR(rm);
194 break; 185 break;
195 } 186 }
187 rm->data.op_active = 1;
196 188
197 conn->c_xmit_rm = rm; 189 conn->c_xmit_rm = rm;
198 } 190 }
199 191
200 /* 192 /*
201 * Grab the next message from the send queue, if there is one. 193 * If not already working on one, grab the next message.
202 * 194 *
203 * c_xmit_rm holds a ref while we're sending this message down 195 * c_xmit_rm holds a ref while we're sending this message down
204 * the connction. We can use this ref while holding the 196 * the connction. We can use this ref while holding the
205 * send_sem.. rds_send_reset() is serialized with it. 197 * send_sem.. rds_send_reset() is serialized with it.
206 */ 198 */
207 if (rm == NULL) { 199 if (!rm) {
208 unsigned int len; 200 unsigned int len;
209 201
210 spin_lock_irqsave(&conn->c_lock, flags); 202 spin_lock_irqsave(&conn->c_lock, flags);
@@ -224,10 +216,8 @@ int rds_send_xmit(struct rds_connection *conn)
224 216
225 spin_unlock_irqrestore(&conn->c_lock, flags); 217 spin_unlock_irqrestore(&conn->c_lock, flags);
226 218
227 if (rm == NULL) { 219 if (!rm)
228 was_empty = 1;
229 break; 220 break;
230 }
231 221
232 /* Unfortunately, the way Infiniband deals with 222 /* Unfortunately, the way Infiniband deals with
233 * RDMA to a bad MR key is by moving the entire 223 * RDMA to a bad MR key is by moving the entire
@@ -236,13 +226,12 @@ int rds_send_xmit(struct rds_connection *conn)
236 * connection. 226 * connection.
237 * Therefore, we never retransmit messages with RDMA ops. 227 * Therefore, we never retransmit messages with RDMA ops.
238 */ 228 */
239 if (rm->m_rdma_op && 229 if (rm->rdma.op_active &&
240 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { 230 test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
241 spin_lock_irqsave(&conn->c_lock, flags); 231 spin_lock_irqsave(&conn->c_lock, flags);
242 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 232 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
243 list_move(&rm->m_conn_item, &to_be_dropped); 233 list_move(&rm->m_conn_item, &to_be_dropped);
244 spin_unlock_irqrestore(&conn->c_lock, flags); 234 spin_unlock_irqrestore(&conn->c_lock, flags);
245 rds_message_put(rm);
246 continue; 235 continue;
247 } 236 }
248 237
@@ -263,23 +252,55 @@ int rds_send_xmit(struct rds_connection *conn)
263 conn->c_xmit_rm = rm; 252 conn->c_xmit_rm = rm;
264 } 253 }
265 254
266 /* 255 /* The transport either sends the whole rdma or none of it */
267 * Try and send an rdma message. Let's see if we can 256 if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) {
268 * keep this simple and require that the transport either 257 rm->m_final_op = &rm->rdma;
269 * send the whole rdma or none of it. 258 ret = conn->c_trans->xmit_rdma(conn, &rm->rdma);
270 */
271 if (rm->m_rdma_op && !conn->c_xmit_rdma_sent) {
272 ret = conn->c_trans->xmit_rdma(conn, rm->m_rdma_op);
273 if (ret) 259 if (ret)
274 break; 260 break;
275 conn->c_xmit_rdma_sent = 1; 261 conn->c_xmit_rdma_sent = 1;
262
276 /* The transport owns the mapped memory for now. 263 /* The transport owns the mapped memory for now.
277 * You can't unmap it while it's on the send queue */ 264 * You can't unmap it while it's on the send queue */
278 set_bit(RDS_MSG_MAPPED, &rm->m_flags); 265 set_bit(RDS_MSG_MAPPED, &rm->m_flags);
279 } 266 }
280 267
281 if (conn->c_xmit_hdr_off < sizeof(struct rds_header) || 268 if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) {
282 conn->c_xmit_sg < rm->m_nents) { 269 rm->m_final_op = &rm->atomic;
270 ret = conn->c_trans->xmit_atomic(conn, &rm->atomic);
271 if (ret)
272 break;
273 conn->c_xmit_atomic_sent = 1;
274
275 /* The transport owns the mapped memory for now.
276 * You can't unmap it while it's on the send queue */
277 set_bit(RDS_MSG_MAPPED, &rm->m_flags);
278 }
279
280 /*
281 * A number of cases require an RDS header to be sent
282 * even if there is no data.
283 * We permit 0-byte sends; rds-ping depends on this.
284 * However, if there are exclusively attached silent ops,
285 * we skip the hdr/data send, to enable silent operation.
286 */
287 if (rm->data.op_nents == 0) {
288 int ops_present;
289 int all_ops_are_silent = 1;
290
291 ops_present = (rm->atomic.op_active || rm->rdma.op_active);
292 if (rm->atomic.op_active && !rm->atomic.op_silent)
293 all_ops_are_silent = 0;
294 if (rm->rdma.op_active && !rm->rdma.op_silent)
295 all_ops_are_silent = 0;
296
297 if (ops_present && all_ops_are_silent
298 && !rm->m_rdma_cookie)
299 rm->data.op_active = 0;
300 }
301
302 if (rm->data.op_active && !conn->c_xmit_data_sent) {
303 rm->m_final_op = &rm->data;
283 ret = conn->c_trans->xmit(conn, rm, 304 ret = conn->c_trans->xmit(conn, rm,
284 conn->c_xmit_hdr_off, 305 conn->c_xmit_hdr_off,
285 conn->c_xmit_sg, 306 conn->c_xmit_sg,
@@ -295,7 +316,7 @@ int rds_send_xmit(struct rds_connection *conn)
295 ret -= tmp; 316 ret -= tmp;
296 } 317 }
297 318
298 sg = &rm->m_sg[conn->c_xmit_sg]; 319 sg = &rm->data.op_sg[conn->c_xmit_sg];
299 while (ret) { 320 while (ret) {
300 tmp = min_t(int, ret, sg->length - 321 tmp = min_t(int, ret, sg->length -
301 conn->c_xmit_data_off); 322 conn->c_xmit_data_off);
@@ -306,49 +327,63 @@ int rds_send_xmit(struct rds_connection *conn)
306 sg++; 327 sg++;
307 conn->c_xmit_sg++; 328 conn->c_xmit_sg++;
308 BUG_ON(ret != 0 && 329 BUG_ON(ret != 0 &&
309 conn->c_xmit_sg == rm->m_nents); 330 conn->c_xmit_sg == rm->data.op_nents);
310 } 331 }
311 } 332 }
333
334 if (conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
335 (conn->c_xmit_sg == rm->data.op_nents))
336 conn->c_xmit_data_sent = 1;
312 } 337 }
313 }
314 338
315 /* Nuke any messages we decided not to retransmit. */ 339 /*
316 if (!list_empty(&to_be_dropped)) 340 * A rm will only take multiple times through this loop
317 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED); 341 * if there is a data op. Thus, if the data is sent (or there was
342 * none), then we're done with the rm.
343 */
344 if (!rm->data.op_active || conn->c_xmit_data_sent) {
345 conn->c_xmit_rm = NULL;
346 conn->c_xmit_sg = 0;
347 conn->c_xmit_hdr_off = 0;
348 conn->c_xmit_data_off = 0;
349 conn->c_xmit_rdma_sent = 0;
350 conn->c_xmit_atomic_sent = 0;
351 conn->c_xmit_data_sent = 0;
352
353 rds_message_put(rm);
354 }
355 }
318 356
319 if (conn->c_trans->xmit_complete) 357 if (conn->c_trans->xmit_complete)
320 conn->c_trans->xmit_complete(conn); 358 conn->c_trans->xmit_complete(conn);
321 359
322 /* 360 release_in_xmit(conn);
323 * We might be racing with another sender who queued a message but
324 * backed off on noticing that we held the c_send_lock. If we check
325 * for queued messages after dropping the sem then either we'll
326 * see the queued message or the queuer will get the sem. If we
327 * notice the queued message then we trigger an immediate retry.
328 *
329 * We need to be careful only to do this when we stopped processing
330 * the send queue because it was empty. It's the only way we
331 * stop processing the loop when the transport hasn't taken
332 * responsibility for forward progress.
333 */
334 mutex_unlock(&conn->c_send_lock);
335 361
336 if (conn->c_map_bytes || (send_quota == 0 && !was_empty)) { 362 /* Nuke any messages we decided not to retransmit. */
337 /* We exhausted the send quota, but there's work left to 363 if (!list_empty(&to_be_dropped)) {
338 * do. Return and (re-)schedule the send worker. 364 /* irqs on here, so we can put(), unlike above */
339 */ 365 list_for_each_entry(rm, &to_be_dropped, m_conn_item)
340 ret = -EAGAIN; 366 rds_message_put(rm);
367 rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED);
341 } 368 }
342 369
343 if (ret == 0 && was_empty) { 370 /*
344 /* A simple bit test would be way faster than taking the 371 * Other senders can queue a message after we last test the send queue
345 * spin lock */ 372 * but before we clear RDS_IN_XMIT. In that case they'd back off and
346 spin_lock_irqsave(&conn->c_lock, flags); 373 * not try and send their newly queued message. We need to check the
374 * send queue after having cleared RDS_IN_XMIT so that their message
375 * doesn't get stuck on the send queue.
376 *
377 * If the transport cannot continue (i.e ret != 0), then it must
378 * call us when more room is available, such as from the tx
379 * completion handler.
380 */
381 if (ret == 0) {
382 smp_mb();
347 if (!list_empty(&conn->c_send_queue)) { 383 if (!list_empty(&conn->c_send_queue)) {
348 rds_stats_inc(s_send_sem_queue_raced); 384 rds_stats_inc(s_send_lock_queue_raced);
349 ret = -EAGAIN; 385 goto restart;
350 } 386 }
351 spin_unlock_irqrestore(&conn->c_lock, flags);
352 } 387 }
353out: 388out:
354 return ret; 389 return ret;
@@ -376,52 +411,60 @@ static inline int rds_send_is_acked(struct rds_message *rm, u64 ack,
376} 411}
377 412
378/* 413/*
379 * Returns true if there are no messages on the send and retransmit queues 414 * This is pretty similar to what happens below in the ACK
380 * which have a sequence number greater than or equal to the given sequence 415 * handling code - except that we call here as soon as we get
381 * number. 416 * the IB send completion on the RDMA op and the accompanying
417 * message.
382 */ 418 */
383int rds_send_acked_before(struct rds_connection *conn, u64 seq) 419void rds_rdma_send_complete(struct rds_message *rm, int status)
384{ 420{
385 struct rds_message *rm, *tmp; 421 struct rds_sock *rs = NULL;
386 int ret = 1; 422 struct rm_rdma_op *ro;
423 struct rds_notifier *notifier;
424 unsigned long flags;
387 425
388 spin_lock(&conn->c_lock); 426 spin_lock_irqsave(&rm->m_rs_lock, flags);
389 427
390 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { 428 ro = &rm->rdma;
391 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) 429 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
392 ret = 0; 430 ro->op_active && ro->op_notify && ro->op_notifier) {
393 break; 431 notifier = ro->op_notifier;
394 } 432 rs = rm->m_rs;
433 sock_hold(rds_rs_to_sk(rs));
395 434
396 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { 435 notifier->n_status = status;
397 if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) 436 spin_lock(&rs->rs_lock);
398 ret = 0; 437 list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
399 break; 438 spin_unlock(&rs->rs_lock);
439
440 ro->op_notifier = NULL;
400 } 441 }
401 442
402 spin_unlock(&conn->c_lock); 443 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
403 444
404 return ret; 445 if (rs) {
446 rds_wake_sk_sleep(rs);
447 sock_put(rds_rs_to_sk(rs));
448 }
405} 449}
450EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
406 451
407/* 452/*
408 * This is pretty similar to what happens below in the ACK 453 * Just like above, except looks at atomic op
409 * handling code - except that we call here as soon as we get
410 * the IB send completion on the RDMA op and the accompanying
411 * message.
412 */ 454 */
413void rds_rdma_send_complete(struct rds_message *rm, int status) 455void rds_atomic_send_complete(struct rds_message *rm, int status)
414{ 456{
415 struct rds_sock *rs = NULL; 457 struct rds_sock *rs = NULL;
416 struct rds_rdma_op *ro; 458 struct rm_atomic_op *ao;
417 struct rds_notifier *notifier; 459 struct rds_notifier *notifier;
460 unsigned long flags;
418 461
419 spin_lock(&rm->m_rs_lock); 462 spin_lock_irqsave(&rm->m_rs_lock, flags);
420 463
421 ro = rm->m_rdma_op; 464 ao = &rm->atomic;
422 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) && 465 if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)
423 ro && ro->r_notify && ro->r_notifier) { 466 && ao->op_active && ao->op_notify && ao->op_notifier) {
424 notifier = ro->r_notifier; 467 notifier = ao->op_notifier;
425 rs = rm->m_rs; 468 rs = rm->m_rs;
426 sock_hold(rds_rs_to_sk(rs)); 469 sock_hold(rds_rs_to_sk(rs));
427 470
@@ -430,17 +473,17 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
430 list_add_tail(&notifier->n_list, &rs->rs_notify_queue); 473 list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
431 spin_unlock(&rs->rs_lock); 474 spin_unlock(&rs->rs_lock);
432 475
433 ro->r_notifier = NULL; 476 ao->op_notifier = NULL;
434 } 477 }
435 478
436 spin_unlock(&rm->m_rs_lock); 479 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
437 480
438 if (rs) { 481 if (rs) {
439 rds_wake_sk_sleep(rs); 482 rds_wake_sk_sleep(rs);
440 sock_put(rds_rs_to_sk(rs)); 483 sock_put(rds_rs_to_sk(rs));
441 } 484 }
442} 485}
443EXPORT_SYMBOL_GPL(rds_rdma_send_complete); 486EXPORT_SYMBOL_GPL(rds_atomic_send_complete);
444 487
445/* 488/*
446 * This is the same as rds_rdma_send_complete except we 489 * This is the same as rds_rdma_send_complete except we
@@ -448,15 +491,23 @@ EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
448 * socket, socket lock) and can just move the notifier. 491 * socket, socket lock) and can just move the notifier.
449 */ 492 */
450static inline void 493static inline void
451__rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status) 494__rds_send_complete(struct rds_sock *rs, struct rds_message *rm, int status)
452{ 495{
453 struct rds_rdma_op *ro; 496 struct rm_rdma_op *ro;
497 struct rm_atomic_op *ao;
498
499 ro = &rm->rdma;
500 if (ro->op_active && ro->op_notify && ro->op_notifier) {
501 ro->op_notifier->n_status = status;
502 list_add_tail(&ro->op_notifier->n_list, &rs->rs_notify_queue);
503 ro->op_notifier = NULL;
504 }
454 505
455 ro = rm->m_rdma_op; 506 ao = &rm->atomic;
456 if (ro && ro->r_notify && ro->r_notifier) { 507 if (ao->op_active && ao->op_notify && ao->op_notifier) {
457 ro->r_notifier->n_status = status; 508 ao->op_notifier->n_status = status;
458 list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue); 509 list_add_tail(&ao->op_notifier->n_list, &rs->rs_notify_queue);
459 ro->r_notifier = NULL; 510 ao->op_notifier = NULL;
460 } 511 }
461 512
462 /* No need to wake the app - caller does this */ 513 /* No need to wake the app - caller does this */
@@ -468,7 +519,7 @@ __rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status
468 * So speed is not an issue here. 519 * So speed is not an issue here.
469 */ 520 */
470struct rds_message *rds_send_get_message(struct rds_connection *conn, 521struct rds_message *rds_send_get_message(struct rds_connection *conn,
471 struct rds_rdma_op *op) 522 struct rm_rdma_op *op)
472{ 523{
473 struct rds_message *rm, *tmp, *found = NULL; 524 struct rds_message *rm, *tmp, *found = NULL;
474 unsigned long flags; 525 unsigned long flags;
@@ -476,7 +527,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
476 spin_lock_irqsave(&conn->c_lock, flags); 527 spin_lock_irqsave(&conn->c_lock, flags);
477 528
478 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { 529 list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
479 if (rm->m_rdma_op == op) { 530 if (&rm->rdma == op) {
480 atomic_inc(&rm->m_refcount); 531 atomic_inc(&rm->m_refcount);
481 found = rm; 532 found = rm;
482 goto out; 533 goto out;
@@ -484,7 +535,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
484 } 535 }
485 536
486 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { 537 list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
487 if (rm->m_rdma_op == op) { 538 if (&rm->rdma == op) {
488 atomic_inc(&rm->m_refcount); 539 atomic_inc(&rm->m_refcount);
489 found = rm; 540 found = rm;
490 break; 541 break;
@@ -506,7 +557,7 @@ EXPORT_SYMBOL_GPL(rds_send_get_message);
506 * removing the messages from the 'messages' list regardless of if it found 557 * removing the messages from the 'messages' list regardless of if it found
507 * the messages on the socket list or not. 558 * the messages on the socket list or not.
508 */ 559 */
509void rds_send_remove_from_sock(struct list_head *messages, int status) 560static void rds_send_remove_from_sock(struct list_head *messages, int status)
510{ 561{
511 unsigned long flags; 562 unsigned long flags;
512 struct rds_sock *rs = NULL; 563 struct rds_sock *rs = NULL;
@@ -544,19 +595,20 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
544 spin_lock(&rs->rs_lock); 595 spin_lock(&rs->rs_lock);
545 596
546 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { 597 if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
547 struct rds_rdma_op *ro = rm->m_rdma_op; 598 struct rm_rdma_op *ro = &rm->rdma;
548 struct rds_notifier *notifier; 599 struct rds_notifier *notifier;
549 600
550 list_del_init(&rm->m_sock_item); 601 list_del_init(&rm->m_sock_item);
551 rds_send_sndbuf_remove(rs, rm); 602 rds_send_sndbuf_remove(rs, rm);
552 603
553 if (ro && ro->r_notifier && (status || ro->r_notify)) { 604 if (ro->op_active && ro->op_notifier &&
554 notifier = ro->r_notifier; 605 (ro->op_notify || (ro->op_recverr && status))) {
606 notifier = ro->op_notifier;
555 list_add_tail(&notifier->n_list, 607 list_add_tail(&notifier->n_list,
556 &rs->rs_notify_queue); 608 &rs->rs_notify_queue);
557 if (!notifier->n_status) 609 if (!notifier->n_status)
558 notifier->n_status = status; 610 notifier->n_status = status;
559 rm->m_rdma_op->r_notifier = NULL; 611 rm->rdma.op_notifier = NULL;
560 } 612 }
561 was_on_sock = 1; 613 was_on_sock = 1;
562 rm->m_rs = NULL; 614 rm->m_rs = NULL;
@@ -619,9 +671,8 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
619{ 671{
620 struct rds_message *rm, *tmp; 672 struct rds_message *rm, *tmp;
621 struct rds_connection *conn; 673 struct rds_connection *conn;
622 unsigned long flags, flags2; 674 unsigned long flags;
623 LIST_HEAD(list); 675 LIST_HEAD(list);
624 int wake = 0;
625 676
626 /* get all the messages we're dropping under the rs lock */ 677 /* get all the messages we're dropping under the rs lock */
627 spin_lock_irqsave(&rs->rs_lock, flags); 678 spin_lock_irqsave(&rs->rs_lock, flags);
@@ -631,59 +682,54 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
631 dest->sin_port != rm->m_inc.i_hdr.h_dport)) 682 dest->sin_port != rm->m_inc.i_hdr.h_dport))
632 continue; 683 continue;
633 684
634 wake = 1;
635 list_move(&rm->m_sock_item, &list); 685 list_move(&rm->m_sock_item, &list);
636 rds_send_sndbuf_remove(rs, rm); 686 rds_send_sndbuf_remove(rs, rm);
637 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); 687 clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
638 } 688 }
639 689
640 /* order flag updates with the rs lock */ 690 /* order flag updates with the rs lock */
641 if (wake) 691 smp_mb__after_clear_bit();
642 smp_mb__after_clear_bit();
643 692
644 spin_unlock_irqrestore(&rs->rs_lock, flags); 693 spin_unlock_irqrestore(&rs->rs_lock, flags);
645 694
646 conn = NULL; 695 if (list_empty(&list))
696 return;
647 697
648 /* now remove the messages from the conn list as needed */ 698 /* Remove the messages from the conn */
649 list_for_each_entry(rm, &list, m_sock_item) { 699 list_for_each_entry(rm, &list, m_sock_item) {
650 /* We do this here rather than in the loop above, so that
651 * we don't have to nest m_rs_lock under rs->rs_lock */
652 spin_lock_irqsave(&rm->m_rs_lock, flags2);
653 /* If this is a RDMA operation, notify the app. */
654 spin_lock(&rs->rs_lock);
655 __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
656 spin_unlock(&rs->rs_lock);
657 rm->m_rs = NULL;
658 spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
659 700
701 conn = rm->m_inc.i_conn;
702
703 spin_lock_irqsave(&conn->c_lock, flags);
660 /* 704 /*
661 * If we see this flag cleared then we're *sure* that someone 705 * Maybe someone else beat us to removing rm from the conn.
662 * else beat us to removing it from the conn. If we race 706 * If we race with their flag update we'll get the lock and
663 * with their flag update we'll get the lock and then really 707 * then really see that the flag has been cleared.
664 * see that the flag has been cleared.
665 */ 708 */
666 if (!test_bit(RDS_MSG_ON_CONN, &rm->m_flags)) 709 if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) {
710 spin_unlock_irqrestore(&conn->c_lock, flags);
667 continue; 711 continue;
668
669 if (conn != rm->m_inc.i_conn) {
670 if (conn)
671 spin_unlock_irqrestore(&conn->c_lock, flags);
672 conn = rm->m_inc.i_conn;
673 spin_lock_irqsave(&conn->c_lock, flags);
674 } 712 }
713 list_del_init(&rm->m_conn_item);
714 spin_unlock_irqrestore(&conn->c_lock, flags);
675 715
676 if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { 716 /*
677 list_del_init(&rm->m_conn_item); 717 * Couldn't grab m_rs_lock in top loop (lock ordering),
678 rds_message_put(rm); 718 * but we can now.
679 } 719 */
680 } 720 spin_lock_irqsave(&rm->m_rs_lock, flags);
681 721
682 if (conn) 722 spin_lock(&rs->rs_lock);
683 spin_unlock_irqrestore(&conn->c_lock, flags); 723 __rds_send_complete(rs, rm, RDS_RDMA_CANCELED);
724 spin_unlock(&rs->rs_lock);
684 725
685 if (wake) 726 rm->m_rs = NULL;
686 rds_wake_sk_sleep(rs); 727 spin_unlock_irqrestore(&rm->m_rs_lock, flags);
728
729 rds_message_put(rm);
730 }
731
732 rds_wake_sk_sleep(rs);
687 733
688 while (!list_empty(&list)) { 734 while (!list_empty(&list)) {
689 rm = list_entry(list.next, struct rds_message, m_sock_item); 735 rm = list_entry(list.next, struct rds_message, m_sock_item);
@@ -763,6 +809,63 @@ out:
763 return *queued; 809 return *queued;
764} 810}
765 811
812/*
813 * rds_message is getting to be quite complicated, and we'd like to allocate
814 * it all in one go. This figures out how big it needs to be up front.
815 */
816static int rds_rm_size(struct msghdr *msg, int data_len)
817{
818 struct cmsghdr *cmsg;
819 int size = 0;
820 int cmsg_groups = 0;
821 int retval;
822
823 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
824 if (!CMSG_OK(msg, cmsg))
825 return -EINVAL;
826
827 if (cmsg->cmsg_level != SOL_RDS)
828 continue;
829
830 switch (cmsg->cmsg_type) {
831 case RDS_CMSG_RDMA_ARGS:
832 cmsg_groups |= 1;
833 retval = rds_rdma_extra_size(CMSG_DATA(cmsg));
834 if (retval < 0)
835 return retval;
836 size += retval;
837
838 break;
839
840 case RDS_CMSG_RDMA_DEST:
841 case RDS_CMSG_RDMA_MAP:
842 cmsg_groups |= 2;
843 /* these are valid but do no add any size */
844 break;
845
846 case RDS_CMSG_ATOMIC_CSWP:
847 case RDS_CMSG_ATOMIC_FADD:
848 case RDS_CMSG_MASKED_ATOMIC_CSWP:
849 case RDS_CMSG_MASKED_ATOMIC_FADD:
850 cmsg_groups |= 1;
851 size += sizeof(struct scatterlist);
852 break;
853
854 default:
855 return -EINVAL;
856 }
857
858 }
859
860 size += ceil(data_len, PAGE_SIZE) * sizeof(struct scatterlist);
861
862 /* Ensure (DEST, MAP) are never used with (ARGS, ATOMIC) */
863 if (cmsg_groups == 3)
864 return -EINVAL;
865
866 return size;
867}
868
766static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, 869static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
767 struct msghdr *msg, int *allocated_mr) 870 struct msghdr *msg, int *allocated_mr)
768{ 871{
@@ -777,7 +880,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
777 continue; 880 continue;
778 881
779 /* As a side effect, RDMA_DEST and RDMA_MAP will set 882 /* As a side effect, RDMA_DEST and RDMA_MAP will set
780 * rm->m_rdma_cookie and rm->m_rdma_mr. 883 * rm->rdma.m_rdma_cookie and rm->rdma.m_rdma_mr.
781 */ 884 */
782 switch (cmsg->cmsg_type) { 885 switch (cmsg->cmsg_type) {
783 case RDS_CMSG_RDMA_ARGS: 886 case RDS_CMSG_RDMA_ARGS:
@@ -793,6 +896,12 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
793 if (!ret) 896 if (!ret)
794 *allocated_mr = 1; 897 *allocated_mr = 1;
795 break; 898 break;
899 case RDS_CMSG_ATOMIC_CSWP:
900 case RDS_CMSG_ATOMIC_FADD:
901 case RDS_CMSG_MASKED_ATOMIC_CSWP:
902 case RDS_CMSG_MASKED_ATOMIC_FADD:
903 ret = rds_cmsg_atomic(rs, rm, cmsg);
904 break;
796 905
797 default: 906 default:
798 return -EINVAL; 907 return -EINVAL;
@@ -850,13 +959,26 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
850 goto out; 959 goto out;
851 } 960 }
852 961
853 rm = rds_message_copy_from_user(msg->msg_iov, payload_len); 962 /* size of rm including all sgs */
854 if (IS_ERR(rm)) { 963 ret = rds_rm_size(msg, payload_len);
855 ret = PTR_ERR(rm); 964 if (ret < 0)
856 rm = NULL; 965 goto out;
966
967 rm = rds_message_alloc(ret, GFP_KERNEL);
968 if (!rm) {
969 ret = -ENOMEM;
857 goto out; 970 goto out;
858 } 971 }
859 972
973 /* Attach data to the rm */
974 if (payload_len) {
975 rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
976 ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len);
977 if (ret)
978 goto out;
979 }
980 rm->data.op_active = 1;
981
860 rm->m_daddr = daddr; 982 rm->m_daddr = daddr;
861 983
862 /* rds_conn_create has a spinlock that runs with IRQ off. 984 /* rds_conn_create has a spinlock that runs with IRQ off.
@@ -879,22 +1001,23 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
879 if (ret) 1001 if (ret)
880 goto out; 1002 goto out;
881 1003
882 if ((rm->m_rdma_cookie || rm->m_rdma_op) && 1004 if (rm->rdma.op_active && !conn->c_trans->xmit_rdma) {
883 conn->c_trans->xmit_rdma == NULL) {
884 if (printk_ratelimit()) 1005 if (printk_ratelimit())
885 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", 1006 printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
886 rm->m_rdma_op, conn->c_trans->xmit_rdma); 1007 &rm->rdma, conn->c_trans->xmit_rdma);
887 ret = -EOPNOTSUPP; 1008 ret = -EOPNOTSUPP;
888 goto out; 1009 goto out;
889 } 1010 }
890 1011
891 /* If the connection is down, trigger a connect. We may 1012 if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) {
892 * have scheduled a delayed reconnect however - in this case 1013 if (printk_ratelimit())
893 * we should not interfere. 1014 printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n",
894 */ 1015 &rm->atomic, conn->c_trans->xmit_atomic);
895 if (rds_conn_state(conn) == RDS_CONN_DOWN && 1016 ret = -EOPNOTSUPP;
896 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) 1017 goto out;
897 queue_delayed_work(rds_wq, &conn->c_conn_w, 0); 1018 }
1019
1020 rds_conn_connect_if_down(conn);
898 1021
899 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); 1022 ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
900 if (ret) { 1023 if (ret) {
@@ -938,7 +1061,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
938 rds_stats_inc(s_send_queued); 1061 rds_stats_inc(s_send_queued);
939 1062
940 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) 1063 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
941 rds_send_worker(&conn->c_send_w.work); 1064 rds_send_xmit(conn);
942 1065
943 rds_message_put(rm); 1066 rds_message_put(rm);
944 return payload_len; 1067 return payload_len;
@@ -966,20 +1089,15 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
966 int ret = 0; 1089 int ret = 0;
967 1090
968 rm = rds_message_alloc(0, GFP_ATOMIC); 1091 rm = rds_message_alloc(0, GFP_ATOMIC);
969 if (rm == NULL) { 1092 if (!rm) {
970 ret = -ENOMEM; 1093 ret = -ENOMEM;
971 goto out; 1094 goto out;
972 } 1095 }
973 1096
974 rm->m_daddr = conn->c_faddr; 1097 rm->m_daddr = conn->c_faddr;
1098 rm->data.op_active = 1;
975 1099
976 /* If the connection is down, trigger a connect. We may 1100 rds_conn_connect_if_down(conn);
977 * have scheduled a delayed reconnect however - in this case
978 * we should not interfere.
979 */
980 if (rds_conn_state(conn) == RDS_CONN_DOWN &&
981 !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags))
982 queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
983 1101
984 ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL); 1102 ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL);
985 if (ret) 1103 if (ret)
@@ -999,7 +1117,9 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
999 rds_stats_inc(s_send_queued); 1117 rds_stats_inc(s_send_queued);
1000 rds_stats_inc(s_send_pong); 1118 rds_stats_inc(s_send_pong);
1001 1119
1002 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 1120 if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
1121 rds_send_xmit(conn);
1122
1003 rds_message_put(rm); 1123 rds_message_put(rm);
1004 return 0; 1124 return 0;
1005 1125
diff --git a/net/rds/stats.c b/net/rds/stats.c
index 7598eb07cfb1..10c759ccac0c 100644
--- a/net/rds/stats.c
+++ b/net/rds/stats.c
@@ -57,8 +57,8 @@ static const char *const rds_stat_names[] = {
57 "recv_ping", 57 "recv_ping",
58 "send_queue_empty", 58 "send_queue_empty",
59 "send_queue_full", 59 "send_queue_full",
60 "send_sem_contention", 60 "send_lock_contention",
61 "send_sem_queue_raced", 61 "send_lock_queue_raced",
62 "send_immediate_retry", 62 "send_immediate_retry",
63 "send_delayed_retry", 63 "send_delayed_retry",
64 "send_drop_acked", 64 "send_drop_acked",
@@ -143,7 +143,7 @@ void rds_stats_exit(void)
143 rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info); 143 rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info);
144} 144}
145 145
146int __init rds_stats_init(void) 146int rds_stats_init(void)
147{ 147{
148 rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info); 148 rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info);
149 return 0; 149 return 0;
diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c
index 7829a20325d3..25ad0c77a26c 100644
--- a/net/rds/sysctl.c
+++ b/net/rds/sysctl.c
@@ -105,13 +105,13 @@ void rds_sysctl_exit(void)
105 unregister_sysctl_table(rds_sysctl_reg_table); 105 unregister_sysctl_table(rds_sysctl_reg_table);
106} 106}
107 107
108int __init rds_sysctl_init(void) 108int rds_sysctl_init(void)
109{ 109{
110 rds_sysctl_reconnect_min = msecs_to_jiffies(1); 110 rds_sysctl_reconnect_min = msecs_to_jiffies(1);
111 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; 111 rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min;
112 112
113 rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table); 113 rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table);
114 if (rds_sysctl_reg_table == NULL) 114 if (!rds_sysctl_reg_table)
115 return -ENOMEM; 115 return -ENOMEM;
116 return 0; 116 return 0;
117} 117}
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index babf4577ff7d..08a8c6cf2d10 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -41,7 +41,7 @@
41/* only for info exporting */ 41/* only for info exporting */
42static DEFINE_SPINLOCK(rds_tcp_tc_list_lock); 42static DEFINE_SPINLOCK(rds_tcp_tc_list_lock);
43static LIST_HEAD(rds_tcp_tc_list); 43static LIST_HEAD(rds_tcp_tc_list);
44unsigned int rds_tcp_tc_count; 44static unsigned int rds_tcp_tc_count;
45 45
46/* Track rds_tcp_connection structs so they can be cleaned up */ 46/* Track rds_tcp_connection structs so they can be cleaned up */
47static DEFINE_SPINLOCK(rds_tcp_conn_lock); 47static DEFINE_SPINLOCK(rds_tcp_conn_lock);
@@ -200,7 +200,7 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)
200 struct rds_tcp_connection *tc; 200 struct rds_tcp_connection *tc;
201 201
202 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); 202 tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp);
203 if (tc == NULL) 203 if (!tc)
204 return -ENOMEM; 204 return -ENOMEM;
205 205
206 tc->t_sock = NULL; 206 tc->t_sock = NULL;
@@ -243,7 +243,7 @@ static void rds_tcp_destroy_conns(void)
243 } 243 }
244} 244}
245 245
246void rds_tcp_exit(void) 246static void rds_tcp_exit(void)
247{ 247{
248 rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info); 248 rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
249 rds_tcp_listen_stop(); 249 rds_tcp_listen_stop();
@@ -258,7 +258,6 @@ struct rds_transport rds_tcp_transport = {
258 .laddr_check = rds_tcp_laddr_check, 258 .laddr_check = rds_tcp_laddr_check,
259 .xmit_prepare = rds_tcp_xmit_prepare, 259 .xmit_prepare = rds_tcp_xmit_prepare,
260 .xmit_complete = rds_tcp_xmit_complete, 260 .xmit_complete = rds_tcp_xmit_complete,
261 .xmit_cong_map = rds_tcp_xmit_cong_map,
262 .xmit = rds_tcp_xmit, 261 .xmit = rds_tcp_xmit,
263 .recv = rds_tcp_recv, 262 .recv = rds_tcp_recv,
264 .conn_alloc = rds_tcp_conn_alloc, 263 .conn_alloc = rds_tcp_conn_alloc,
@@ -266,7 +265,6 @@ struct rds_transport rds_tcp_transport = {
266 .conn_connect = rds_tcp_conn_connect, 265 .conn_connect = rds_tcp_conn_connect,
267 .conn_shutdown = rds_tcp_conn_shutdown, 266 .conn_shutdown = rds_tcp_conn_shutdown,
268 .inc_copy_to_user = rds_tcp_inc_copy_to_user, 267 .inc_copy_to_user = rds_tcp_inc_copy_to_user,
269 .inc_purge = rds_tcp_inc_purge,
270 .inc_free = rds_tcp_inc_free, 268 .inc_free = rds_tcp_inc_free,
271 .stats_info_copy = rds_tcp_stats_info_copy, 269 .stats_info_copy = rds_tcp_stats_info_copy,
272 .exit = rds_tcp_exit, 270 .exit = rds_tcp_exit,
@@ -276,14 +274,14 @@ struct rds_transport rds_tcp_transport = {
276 .t_prefer_loopback = 1, 274 .t_prefer_loopback = 1,
277}; 275};
278 276
279int __init rds_tcp_init(void) 277static int rds_tcp_init(void)
280{ 278{
281 int ret; 279 int ret;
282 280
283 rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection", 281 rds_tcp_conn_slab = kmem_cache_create("rds_tcp_connection",
284 sizeof(struct rds_tcp_connection), 282 sizeof(struct rds_tcp_connection),
285 0, 0, NULL); 283 0, 0, NULL);
286 if (rds_tcp_conn_slab == NULL) { 284 if (!rds_tcp_conn_slab) {
287 ret = -ENOMEM; 285 ret = -ENOMEM;
288 goto out; 286 goto out;
289 } 287 }
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 844fa6b9cf5a..9cf2927d0021 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -43,8 +43,6 @@ struct rds_tcp_statistics {
43}; 43};
44 44
45/* tcp.c */ 45/* tcp.c */
46int __init rds_tcp_init(void);
47void rds_tcp_exit(void);
48void rds_tcp_tune(struct socket *sock); 46void rds_tcp_tune(struct socket *sock);
49void rds_tcp_nonagle(struct socket *sock); 47void rds_tcp_nonagle(struct socket *sock);
50void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn); 48void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn);
@@ -61,16 +59,15 @@ void rds_tcp_conn_shutdown(struct rds_connection *conn);
61void rds_tcp_state_change(struct sock *sk); 59void rds_tcp_state_change(struct sock *sk);
62 60
63/* tcp_listen.c */ 61/* tcp_listen.c */
64int __init rds_tcp_listen_init(void); 62int rds_tcp_listen_init(void);
65void rds_tcp_listen_stop(void); 63void rds_tcp_listen_stop(void);
66void rds_tcp_listen_data_ready(struct sock *sk, int bytes); 64void rds_tcp_listen_data_ready(struct sock *sk, int bytes);
67 65
68/* tcp_recv.c */ 66/* tcp_recv.c */
69int __init rds_tcp_recv_init(void); 67int rds_tcp_recv_init(void);
70void rds_tcp_recv_exit(void); 68void rds_tcp_recv_exit(void);
71void rds_tcp_data_ready(struct sock *sk, int bytes); 69void rds_tcp_data_ready(struct sock *sk, int bytes);
72int rds_tcp_recv(struct rds_connection *conn); 70int rds_tcp_recv(struct rds_connection *conn);
73void rds_tcp_inc_purge(struct rds_incoming *inc);
74void rds_tcp_inc_free(struct rds_incoming *inc); 71void rds_tcp_inc_free(struct rds_incoming *inc);
75int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, 72int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov,
76 size_t size); 73 size_t size);
@@ -81,8 +78,6 @@ void rds_tcp_xmit_complete(struct rds_connection *conn);
81int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, 78int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
82 unsigned int hdr_off, unsigned int sg, unsigned int off); 79 unsigned int hdr_off, unsigned int sg, unsigned int off);
83void rds_tcp_write_space(struct sock *sk); 80void rds_tcp_write_space(struct sock *sk);
84int rds_tcp_xmit_cong_map(struct rds_connection *conn,
85 struct rds_cong_map *map, unsigned long offset);
86 81
87/* tcp_stats.c */ 82/* tcp_stats.c */
88DECLARE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats); 83DECLARE_PER_CPU(struct rds_tcp_statistics, rds_tcp_stats);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index c519939e8da9..af95c8e058fc 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -45,7 +45,7 @@ void rds_tcp_state_change(struct sock *sk)
45 45
46 read_lock_bh(&sk->sk_callback_lock); 46 read_lock_bh(&sk->sk_callback_lock);
47 conn = sk->sk_user_data; 47 conn = sk->sk_user_data;
48 if (conn == NULL) { 48 if (!conn) {
49 state_change = sk->sk_state_change; 49 state_change = sk->sk_state_change;
50 goto out; 50 goto out;
51 } 51 }
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 27844f231d10..8b5cc4aa8868 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -116,7 +116,7 @@ void rds_tcp_listen_data_ready(struct sock *sk, int bytes)
116 116
117 read_lock_bh(&sk->sk_callback_lock); 117 read_lock_bh(&sk->sk_callback_lock);
118 ready = sk->sk_user_data; 118 ready = sk->sk_user_data;
119 if (ready == NULL) { /* check for teardown race */ 119 if (!ready) { /* check for teardown race */
120 ready = sk->sk_data_ready; 120 ready = sk->sk_data_ready;
121 goto out; 121 goto out;
122 } 122 }
@@ -135,7 +135,7 @@ out:
135 ready(sk, bytes); 135 ready(sk, bytes);
136} 136}
137 137
138int __init rds_tcp_listen_init(void) 138int rds_tcp_listen_init(void)
139{ 139{
140 struct sockaddr_in sin; 140 struct sockaddr_in sin;
141 struct socket *sock = NULL; 141 struct socket *sock = NULL;
@@ -178,7 +178,7 @@ void rds_tcp_listen_stop(void)
178 struct socket *sock = rds_tcp_listen_sock; 178 struct socket *sock = rds_tcp_listen_sock;
179 struct sock *sk; 179 struct sock *sk;
180 180
181 if (sock == NULL) 181 if (!sock)
182 return; 182 return;
183 183
184 sk = sock->sk; 184 sk = sock->sk;
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index e43797404102..78205e25500a 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -39,7 +39,7 @@
39 39
40static struct kmem_cache *rds_tcp_incoming_slab; 40static struct kmem_cache *rds_tcp_incoming_slab;
41 41
42void rds_tcp_inc_purge(struct rds_incoming *inc) 42static void rds_tcp_inc_purge(struct rds_incoming *inc)
43{ 43{
44 struct rds_tcp_incoming *tinc; 44 struct rds_tcp_incoming *tinc;
45 tinc = container_of(inc, struct rds_tcp_incoming, ti_inc); 45 tinc = container_of(inc, struct rds_tcp_incoming, ti_inc);
@@ -190,10 +190,10 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
190 * processing. 190 * processing.
191 */ 191 */
192 while (left) { 192 while (left) {
193 if (tinc == NULL) { 193 if (!tinc) {
194 tinc = kmem_cache_alloc(rds_tcp_incoming_slab, 194 tinc = kmem_cache_alloc(rds_tcp_incoming_slab,
195 arg->gfp); 195 arg->gfp);
196 if (tinc == NULL) { 196 if (!tinc) {
197 desc->error = -ENOMEM; 197 desc->error = -ENOMEM;
198 goto out; 198 goto out;
199 } 199 }
@@ -229,7 +229,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
229 229
230 if (left && tc->t_tinc_data_rem) { 230 if (left && tc->t_tinc_data_rem) {
231 clone = skb_clone(skb, arg->gfp); 231 clone = skb_clone(skb, arg->gfp);
232 if (clone == NULL) { 232 if (!clone) {
233 desc->error = -ENOMEM; 233 desc->error = -ENOMEM;
234 goto out; 234 goto out;
235 } 235 }
@@ -272,7 +272,8 @@ out:
272} 272}
273 273
274/* the caller has to hold the sock lock */ 274/* the caller has to hold the sock lock */
275int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp, enum km_type km) 275static int rds_tcp_read_sock(struct rds_connection *conn, gfp_t gfp,
276 enum km_type km)
276{ 277{
277 struct rds_tcp_connection *tc = conn->c_transport_data; 278 struct rds_tcp_connection *tc = conn->c_transport_data;
278 struct socket *sock = tc->t_sock; 279 struct socket *sock = tc->t_sock;
@@ -326,7 +327,7 @@ void rds_tcp_data_ready(struct sock *sk, int bytes)
326 327
327 read_lock_bh(&sk->sk_callback_lock); 328 read_lock_bh(&sk->sk_callback_lock);
328 conn = sk->sk_user_data; 329 conn = sk->sk_user_data;
329 if (conn == NULL) { /* check for teardown race */ 330 if (!conn) { /* check for teardown race */
330 ready = sk->sk_data_ready; 331 ready = sk->sk_data_ready;
331 goto out; 332 goto out;
332 } 333 }
@@ -342,12 +343,12 @@ out:
342 ready(sk, bytes); 343 ready(sk, bytes);
343} 344}
344 345
345int __init rds_tcp_recv_init(void) 346int rds_tcp_recv_init(void)
346{ 347{
347 rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming", 348 rds_tcp_incoming_slab = kmem_cache_create("rds_tcp_incoming",
348 sizeof(struct rds_tcp_incoming), 349 sizeof(struct rds_tcp_incoming),
349 0, 0, NULL); 350 0, 0, NULL);
350 if (rds_tcp_incoming_slab == NULL) 351 if (!rds_tcp_incoming_slab)
351 return -ENOMEM; 352 return -ENOMEM;
352 return 0; 353 return 0;
353} 354}
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 2f012a07d94d..1b4fd68f0c7c 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -63,7 +63,7 @@ void rds_tcp_xmit_complete(struct rds_connection *conn)
63} 63}
64 64
65/* the core send_sem serializes this with other xmit and shutdown */ 65/* the core send_sem serializes this with other xmit and shutdown */
66int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len) 66static int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len)
67{ 67{
68 struct kvec vec = { 68 struct kvec vec = {
69 .iov_base = data, 69 .iov_base = data,
@@ -77,56 +77,6 @@ int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len)
77} 77}
78 78
79/* the core send_sem serializes this with other xmit and shutdown */ 79/* the core send_sem serializes this with other xmit and shutdown */
80int rds_tcp_xmit_cong_map(struct rds_connection *conn,
81 struct rds_cong_map *map, unsigned long offset)
82{
83 static struct rds_header rds_tcp_map_header = {
84 .h_flags = RDS_FLAG_CONG_BITMAP,
85 };
86 struct rds_tcp_connection *tc = conn->c_transport_data;
87 unsigned long i;
88 int ret;
89 int copied = 0;
90
91 /* Some problem claims cpu_to_be32(constant) isn't a constant. */
92 rds_tcp_map_header.h_len = cpu_to_be32(RDS_CONG_MAP_BYTES);
93
94 if (offset < sizeof(struct rds_header)) {
95 ret = rds_tcp_sendmsg(tc->t_sock,
96 (void *)&rds_tcp_map_header + offset,
97 sizeof(struct rds_header) - offset);
98 if (ret <= 0)
99 return ret;
100 offset += ret;
101 copied = ret;
102 if (offset < sizeof(struct rds_header))
103 return ret;
104 }
105
106 offset -= sizeof(struct rds_header);
107 i = offset / PAGE_SIZE;
108 offset = offset % PAGE_SIZE;
109 BUG_ON(i >= RDS_CONG_MAP_PAGES);
110
111 do {
112 ret = tc->t_sock->ops->sendpage(tc->t_sock,
113 virt_to_page(map->m_page_addrs[i]),
114 offset, PAGE_SIZE - offset,
115 MSG_DONTWAIT);
116 if (ret <= 0)
117 break;
118 copied += ret;
119 offset += ret;
120 if (offset == PAGE_SIZE) {
121 offset = 0;
122 i++;
123 }
124 } while (i < RDS_CONG_MAP_PAGES);
125
126 return copied ? copied : ret;
127}
128
129/* the core send_sem serializes this with other xmit and shutdown */
130int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, 80int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
131 unsigned int hdr_off, unsigned int sg, unsigned int off) 81 unsigned int hdr_off, unsigned int sg, unsigned int off)
132{ 82{
@@ -166,21 +116,21 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
166 goto out; 116 goto out;
167 } 117 }
168 118
169 while (sg < rm->m_nents) { 119 while (sg < rm->data.op_nents) {
170 ret = tc->t_sock->ops->sendpage(tc->t_sock, 120 ret = tc->t_sock->ops->sendpage(tc->t_sock,
171 sg_page(&rm->m_sg[sg]), 121 sg_page(&rm->data.op_sg[sg]),
172 rm->m_sg[sg].offset + off, 122 rm->data.op_sg[sg].offset + off,
173 rm->m_sg[sg].length - off, 123 rm->data.op_sg[sg].length - off,
174 MSG_DONTWAIT|MSG_NOSIGNAL); 124 MSG_DONTWAIT|MSG_NOSIGNAL);
175 rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->m_sg[sg]), 125 rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]),
176 rm->m_sg[sg].offset + off, rm->m_sg[sg].length - off, 126 rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off,
177 ret); 127 ret);
178 if (ret <= 0) 128 if (ret <= 0)
179 break; 129 break;
180 130
181 off += ret; 131 off += ret;
182 done += ret; 132 done += ret;
183 if (off == rm->m_sg[sg].length) { 133 if (off == rm->data.op_sg[sg].length) {
184 off = 0; 134 off = 0;
185 sg++; 135 sg++;
186 } 136 }
@@ -226,7 +176,7 @@ void rds_tcp_write_space(struct sock *sk)
226 176
227 read_lock_bh(&sk->sk_callback_lock); 177 read_lock_bh(&sk->sk_callback_lock);
228 conn = sk->sk_user_data; 178 conn = sk->sk_user_data;
229 if (conn == NULL) { 179 if (!conn) {
230 write_space = sk->sk_write_space; 180 write_space = sk->sk_write_space;
231 goto out; 181 goto out;
232 } 182 }
diff --git a/net/rds/threads.c b/net/rds/threads.c
index 786c20eaaf5e..0fd90f8c5f59 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -61,7 +61,7 @@
61 * 61 *
62 * Transition to state DISCONNECTING/DOWN: 62 * Transition to state DISCONNECTING/DOWN:
63 * - Inside the shutdown worker; synchronizes with xmit path 63 * - Inside the shutdown worker; synchronizes with xmit path
64 * through c_send_lock, and with connection management callbacks 64 * through RDS_IN_XMIT, and with connection management callbacks
65 * via c_cm_lock. 65 * via c_cm_lock.
66 * 66 *
67 * For receive callbacks, we rely on the underlying transport 67 * For receive callbacks, we rely on the underlying transport
@@ -110,7 +110,7 @@ EXPORT_SYMBOL_GPL(rds_connect_complete);
110 * We should *always* start with a random backoff; otherwise a broken connection 110 * We should *always* start with a random backoff; otherwise a broken connection
111 * will always take several iterations to be re-established. 111 * will always take several iterations to be re-established.
112 */ 112 */
113static void rds_queue_reconnect(struct rds_connection *conn) 113void rds_queue_reconnect(struct rds_connection *conn)
114{ 114{
115 unsigned long rand; 115 unsigned long rand;
116 116
@@ -156,58 +156,6 @@ void rds_connect_worker(struct work_struct *work)
156 } 156 }
157} 157}
158 158
159void rds_shutdown_worker(struct work_struct *work)
160{
161 struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
162
163 /* shut it down unless it's down already */
164 if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) {
165 /*
166 * Quiesce the connection mgmt handlers before we start tearing
167 * things down. We don't hold the mutex for the entire
168 * duration of the shutdown operation, else we may be
169 * deadlocking with the CM handler. Instead, the CM event
170 * handler is supposed to check for state DISCONNECTING
171 */
172 mutex_lock(&conn->c_cm_lock);
173 if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING) &&
174 !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) {
175 rds_conn_error(conn, "shutdown called in state %d\n",
176 atomic_read(&conn->c_state));
177 mutex_unlock(&conn->c_cm_lock);
178 return;
179 }
180 mutex_unlock(&conn->c_cm_lock);
181
182 mutex_lock(&conn->c_send_lock);
183 conn->c_trans->conn_shutdown(conn);
184 rds_conn_reset(conn);
185 mutex_unlock(&conn->c_send_lock);
186
187 if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) {
188 /* This can happen - eg when we're in the middle of tearing
189 * down the connection, and someone unloads the rds module.
190 * Quite reproduceable with loopback connections.
191 * Mostly harmless.
192 */
193 rds_conn_error(conn,
194 "%s: failed to transition to state DOWN, "
195 "current state is %d\n",
196 __func__,
197 atomic_read(&conn->c_state));
198 return;
199 }
200 }
201
202 /* Then reconnect if it's still live.
203 * The passive side of an IB loopback connection is never added
204 * to the conn hash, so we never trigger a reconnect on this
205 * conn - the reconnect is always triggered by the active peer. */
206 cancel_delayed_work(&conn->c_conn_w);
207 if (!hlist_unhashed(&conn->c_hash_node))
208 rds_queue_reconnect(conn);
209}
210
211void rds_send_worker(struct work_struct *work) 159void rds_send_worker(struct work_struct *work)
212{ 160{
213 struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work); 161 struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work);
@@ -252,15 +200,22 @@ void rds_recv_worker(struct work_struct *work)
252 } 200 }
253} 201}
254 202
203void rds_shutdown_worker(struct work_struct *work)
204{
205 struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w);
206
207 rds_conn_shutdown(conn);
208}
209
255void rds_threads_exit(void) 210void rds_threads_exit(void)
256{ 211{
257 destroy_workqueue(rds_wq); 212 destroy_workqueue(rds_wq);
258} 213}
259 214
260int __init rds_threads_init(void) 215int rds_threads_init(void)
261{ 216{
262 rds_wq = create_workqueue("krdsd"); 217 rds_wq = create_singlethread_workqueue("krdsd");
263 if (rds_wq == NULL) 218 if (!rds_wq)
264 return -ENOMEM; 219 return -ENOMEM;
265 220
266 return 0; 221 return 0;
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 7e1067901353..7f2ac4fec367 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -71,19 +71,28 @@ void rds_trans_unregister(struct rds_transport *trans)
71} 71}
72EXPORT_SYMBOL_GPL(rds_trans_unregister); 72EXPORT_SYMBOL_GPL(rds_trans_unregister);
73 73
74void rds_trans_put(struct rds_transport *trans)
75{
76 if (trans && trans->t_owner)
77 module_put(trans->t_owner);
78}
79
74struct rds_transport *rds_trans_get_preferred(__be32 addr) 80struct rds_transport *rds_trans_get_preferred(__be32 addr)
75{ 81{
76 struct rds_transport *ret = NULL; 82 struct rds_transport *ret = NULL;
77 int i; 83 struct rds_transport *trans;
84 unsigned int i;
78 85
79 if (IN_LOOPBACK(ntohl(addr))) 86 if (IN_LOOPBACK(ntohl(addr)))
80 return &rds_loop_transport; 87 return &rds_loop_transport;
81 88
82 down_read(&rds_trans_sem); 89 down_read(&rds_trans_sem);
83 for (i = 0; i < RDS_TRANS_COUNT; i++) 90 for (i = 0; i < RDS_TRANS_COUNT; i++) {
84 { 91 trans = transports[i];
85 if (transports[i] && (transports[i]->laddr_check(addr) == 0)) { 92
86 ret = transports[i]; 93 if (trans && (trans->laddr_check(addr) == 0) &&
94 (!trans->t_owner || try_module_get(trans->t_owner))) {
95 ret = trans;
87 break; 96 break;
88 } 97 }
89 } 98 }
diff --git a/net/rds/xlist.h b/net/rds/xlist.h
new file mode 100644
index 000000000000..e6b5190daddd
--- /dev/null
+++ b/net/rds/xlist.h
@@ -0,0 +1,80 @@
1#ifndef _LINUX_XLIST_H
2#define _LINUX_XLIST_H
3
4#include <linux/stddef.h>
5#include <linux/poison.h>
6#include <linux/prefetch.h>
7#include <asm/system.h>
8
9struct xlist_head {
10 struct xlist_head *next;
11};
12
13static inline void INIT_XLIST_HEAD(struct xlist_head *list)
14{
15 list->next = NULL;
16}
17
18static inline int xlist_empty(struct xlist_head *head)
19{
20 return head->next == NULL;
21}
22
23static inline void xlist_add(struct xlist_head *new, struct xlist_head *tail,
24 struct xlist_head *head)
25{
26 struct xlist_head *cur;
27 struct xlist_head *check;
28
29 while (1) {
30 cur = head->next;
31 tail->next = cur;
32 check = cmpxchg(&head->next, cur, new);
33 if (check == cur)
34 break;
35 }
36}
37
38static inline struct xlist_head *xlist_del_head(struct xlist_head *head)
39{
40 struct xlist_head *cur;
41 struct xlist_head *check;
42 struct xlist_head *next;
43
44 while (1) {
45 cur = head->next;
46 if (!cur)
47 goto out;
48
49 next = cur->next;
50 check = cmpxchg(&head->next, cur, next);
51 if (check == cur)
52 goto out;
53 }
54out:
55 return cur;
56}
57
58static inline struct xlist_head *xlist_del_head_fast(struct xlist_head *head)
59{
60 struct xlist_head *cur;
61
62 cur = head->next;
63 if (!cur)
64 return NULL;
65
66 head->next = cur->next;
67 return cur;
68}
69
70static inline void xlist_splice(struct xlist_head *list,
71 struct xlist_head *head)
72{
73 struct xlist_head *cur;
74
75 WARN_ON(head->next);
76 cur = xchg(&list->next, NULL);
77 head->next = cur;
78}
79
80#endif
diff --git a/net/rfkill/input.c b/net/rfkill/input.c
index 3713d7ecab96..1bca6d49ec96 100644
--- a/net/rfkill/input.c
+++ b/net/rfkill/input.c
@@ -142,7 +142,7 @@ static unsigned long rfkill_last_scheduled;
142static unsigned long rfkill_ratelimit(const unsigned long last) 142static unsigned long rfkill_ratelimit(const unsigned long last)
143{ 143{
144 const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY); 144 const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY);
145 return (time_after(jiffies, last + delay)) ? 0 : delay; 145 return time_after(jiffies, last + delay) ? 0 : delay;
146} 146}
147 147
148static void rfkill_schedule_ratelimited(void) 148static void rfkill_schedule_ratelimited(void)
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index a750a28e0221..fa5f5641a2c2 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -114,7 +114,7 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
114 if (ax25s) 114 if (ax25s)
115 ax25_cb_put(ax25s); 115 ax25_cb_put(ax25s);
116 116
117 return (neigh->ax25 != NULL); 117 return neigh->ax25 != NULL;
118} 118}
119 119
120/* 120/*
@@ -137,7 +137,7 @@ static int rose_link_up(struct rose_neigh *neigh)
137 if (ax25s) 137 if (ax25s)
138 ax25_cb_put(ax25s); 138 ax25_cb_put(ax25s);
139 139
140 return (neigh->ax25 != NULL); 140 return neigh->ax25 != NULL;
141} 141}
142 142
143/* 143/*
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2f691fb180d1..a36270a994d7 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -518,6 +518,16 @@ config NET_ACT_SKBEDIT
518 To compile this code as a module, choose M here: the 518 To compile this code as a module, choose M here: the
519 module will be called act_skbedit. 519 module will be called act_skbedit.
520 520
521config NET_ACT_CSUM
522 tristate "Checksum Updating"
523 depends on NET_CLS_ACT && INET
524 ---help---
525 Say Y here to update some common checksum after some direct
526 packet alterations.
527
528 To compile this code as a module, choose M here: the
529 module will be called act_csum.
530
521config NET_CLS_IND 531config NET_CLS_IND
522 bool "Incoming device classification" 532 bool "Incoming device classification"
523 depends on NET_CLS_U32 || NET_CLS_FW 533 depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index f14e71bfa58f..960f5dba6304 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
15obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o 15obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
16obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o 16obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o 17obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
18obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
18obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o 19obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
19obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o 20obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
20obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o 21obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
new file mode 100644
index 000000000000..67dc7ce9b63a
--- /dev/null
+++ b/net/sched/act_csum.c
@@ -0,0 +1,595 @@
1/*
2 * Checksum updating actions
3 *
4 * Copyright (c) 2010 Gregoire Baron <baronchon@n7mm.org>
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the Free
8 * Software Foundation; either version 2 of the License, or (at your option)
9 * any later version.
10 *
11 */
12
13#include <linux/types.h>
14#include <linux/init.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/spinlock.h>
18
19#include <linux/netlink.h>
20#include <net/netlink.h>
21#include <linux/rtnetlink.h>
22
23#include <linux/skbuff.h>
24
25#include <net/ip.h>
26#include <net/ipv6.h>
27#include <net/icmp.h>
28#include <linux/icmpv6.h>
29#include <linux/igmp.h>
30#include <net/tcp.h>
31#include <net/udp.h>
32#include <net/ip6_checksum.h>
33
34#include <net/act_api.h>
35
36#include <linux/tc_act/tc_csum.h>
37#include <net/tc_act/tc_csum.h>
38
39#define CSUM_TAB_MASK 15
40static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1];
41static u32 csum_idx_gen;
42static DEFINE_RWLOCK(csum_lock);
43
44static struct tcf_hashinfo csum_hash_info = {
45 .htab = tcf_csum_ht,
46 .hmask = CSUM_TAB_MASK,
47 .lock = &csum_lock,
48};
49
50static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = {
51 [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), },
52};
53
54static int tcf_csum_init(struct nlattr *nla, struct nlattr *est,
55 struct tc_action *a, int ovr, int bind)
56{
57 struct nlattr *tb[TCA_CSUM_MAX + 1];
58 struct tc_csum *parm;
59 struct tcf_common *pc;
60 struct tcf_csum *p;
61 int ret = 0, err;
62
63 if (nla == NULL)
64 return -EINVAL;
65
66 err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy);
67 if (err < 0)
68 return err;
69
70 if (tb[TCA_CSUM_PARMS] == NULL)
71 return -EINVAL;
72 parm = nla_data(tb[TCA_CSUM_PARMS]);
73
74 pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info);
75 if (!pc) {
76 pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind,
77 &csum_idx_gen, &csum_hash_info);
78 if (IS_ERR(pc))
79 return PTR_ERR(pc);
80 p = to_tcf_csum(pc);
81 ret = ACT_P_CREATED;
82 } else {
83 p = to_tcf_csum(pc);
84 if (!ovr) {
85 tcf_hash_release(pc, bind, &csum_hash_info);
86 return -EEXIST;
87 }
88 }
89
90 spin_lock_bh(&p->tcf_lock);
91 p->tcf_action = parm->action;
92 p->update_flags = parm->update_flags;
93 spin_unlock_bh(&p->tcf_lock);
94
95 if (ret == ACT_P_CREATED)
96 tcf_hash_insert(pc, &csum_hash_info);
97
98 return ret;
99}
100
101static int tcf_csum_cleanup(struct tc_action *a, int bind)
102{
103 struct tcf_csum *p = a->priv;
104 return tcf_hash_release(&p->common, bind, &csum_hash_info);
105}
106
107/**
108 * tcf_csum_skb_nextlayer - Get next layer pointer
109 * @skb: sk_buff to use
110 * @ihl: previous summed headers length
111 * @ipl: complete packet length
112 * @jhl: next header length
113 *
114 * Check the expected next layer availability in the specified sk_buff.
115 * Return the next layer pointer if pass, NULL otherwise.
116 */
117static void *tcf_csum_skb_nextlayer(struct sk_buff *skb,
118 unsigned int ihl, unsigned int ipl,
119 unsigned int jhl)
120{
121 int ntkoff = skb_network_offset(skb);
122 int hl = ihl + jhl;
123
124 if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) ||
125 (skb_cloned(skb) &&
126 !skb_clone_writable(skb, hl + ntkoff) &&
127 pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
128 return NULL;
129 else
130 return (void *)(skb_network_header(skb) + ihl);
131}
132
133static int tcf_csum_ipv4_icmp(struct sk_buff *skb,
134 unsigned int ihl, unsigned int ipl)
135{
136 struct icmphdr *icmph;
137
138 icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph));
139 if (icmph == NULL)
140 return 0;
141
142 icmph->checksum = 0;
143 skb->csum = csum_partial(icmph, ipl - ihl, 0);
144 icmph->checksum = csum_fold(skb->csum);
145
146 skb->ip_summed = CHECKSUM_NONE;
147
148 return 1;
149}
150
151static int tcf_csum_ipv4_igmp(struct sk_buff *skb,
152 unsigned int ihl, unsigned int ipl)
153{
154 struct igmphdr *igmph;
155
156 igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph));
157 if (igmph == NULL)
158 return 0;
159
160 igmph->csum = 0;
161 skb->csum = csum_partial(igmph, ipl - ihl, 0);
162 igmph->csum = csum_fold(skb->csum);
163
164 skb->ip_summed = CHECKSUM_NONE;
165
166 return 1;
167}
168
169static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h,
170 unsigned int ihl, unsigned int ipl)
171{
172 struct icmp6hdr *icmp6h;
173
174 icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h));
175 if (icmp6h == NULL)
176 return 0;
177
178 icmp6h->icmp6_cksum = 0;
179 skb->csum = csum_partial(icmp6h, ipl - ihl, 0);
180 icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
181 ipl - ihl, IPPROTO_ICMPV6,
182 skb->csum);
183
184 skb->ip_summed = CHECKSUM_NONE;
185
186 return 1;
187}
188
189static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph,
190 unsigned int ihl, unsigned int ipl)
191{
192 struct tcphdr *tcph;
193
194 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
195 if (tcph == NULL)
196 return 0;
197
198 tcph->check = 0;
199 skb->csum = csum_partial(tcph, ipl - ihl, 0);
200 tcph->check = tcp_v4_check(ipl - ihl,
201 iph->saddr, iph->daddr, skb->csum);
202
203 skb->ip_summed = CHECKSUM_NONE;
204
205 return 1;
206}
207
208static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h,
209 unsigned int ihl, unsigned int ipl)
210{
211 struct tcphdr *tcph;
212
213 tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph));
214 if (tcph == NULL)
215 return 0;
216
217 tcph->check = 0;
218 skb->csum = csum_partial(tcph, ipl - ihl, 0);
219 tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
220 ipl - ihl, IPPROTO_TCP,
221 skb->csum);
222
223 skb->ip_summed = CHECKSUM_NONE;
224
225 return 1;
226}
227
228static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph,
229 unsigned int ihl, unsigned int ipl, int udplite)
230{
231 struct udphdr *udph;
232 u16 ul;
233
234 /*
235 * Support both UDP and UDPLITE checksum algorithms, Don't use
236 * udph->len to get the real length without any protocol check,
237 * UDPLITE uses udph->len for another thing,
238 * Use iph->tot_len, or just ipl.
239 */
240
241 udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
242 if (udph == NULL)
243 return 0;
244
245 ul = ntohs(udph->len);
246
247 if (udplite || udph->check) {
248
249 udph->check = 0;
250
251 if (udplite) {
252 if (ul == 0)
253 skb->csum = csum_partial(udph, ipl - ihl, 0);
254 else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
255 skb->csum = csum_partial(udph, ul, 0);
256 else
257 goto ignore_obscure_skb;
258 } else {
259 if (ul != ipl - ihl)
260 goto ignore_obscure_skb;
261
262 skb->csum = csum_partial(udph, ul, 0);
263 }
264
265 udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
266 ul, iph->protocol,
267 skb->csum);
268
269 if (!udph->check)
270 udph->check = CSUM_MANGLED_0;
271 }
272
273 skb->ip_summed = CHECKSUM_NONE;
274
275ignore_obscure_skb:
276 return 1;
277}
278
279static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h,
280 unsigned int ihl, unsigned int ipl, int udplite)
281{
282 struct udphdr *udph;
283 u16 ul;
284
285 /*
286 * Support both UDP and UDPLITE checksum algorithms, Don't use
287 * udph->len to get the real length without any protocol check,
288 * UDPLITE uses udph->len for another thing,
289 * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl.
290 */
291
292 udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph));
293 if (udph == NULL)
294 return 0;
295
296 ul = ntohs(udph->len);
297
298 udph->check = 0;
299
300 if (udplite) {
301 if (ul == 0)
302 skb->csum = csum_partial(udph, ipl - ihl, 0);
303
304 else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl))
305 skb->csum = csum_partial(udph, ul, 0);
306
307 else
308 goto ignore_obscure_skb;
309 } else {
310 if (ul != ipl - ihl)
311 goto ignore_obscure_skb;
312
313 skb->csum = csum_partial(udph, ul, 0);
314 }
315
316 udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul,
317 udplite ? IPPROTO_UDPLITE : IPPROTO_UDP,
318 skb->csum);
319
320 if (!udph->check)
321 udph->check = CSUM_MANGLED_0;
322
323 skb->ip_summed = CHECKSUM_NONE;
324
325ignore_obscure_skb:
326 return 1;
327}
328
329static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags)
330{
331 struct iphdr *iph;
332 int ntkoff;
333
334 ntkoff = skb_network_offset(skb);
335
336 if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff))
337 goto fail;
338
339 iph = ip_hdr(skb);
340
341 switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) {
342 case IPPROTO_ICMP:
343 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
344 if (!tcf_csum_ipv4_icmp(skb, iph->ihl * 4,
345 ntohs(iph->tot_len)))
346 goto fail;
347 break;
348 case IPPROTO_IGMP:
349 if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP)
350 if (!tcf_csum_ipv4_igmp(skb, iph->ihl * 4,
351 ntohs(iph->tot_len)))
352 goto fail;
353 break;
354 case IPPROTO_TCP:
355 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
356 if (!tcf_csum_ipv4_tcp(skb, iph, iph->ihl * 4,
357 ntohs(iph->tot_len)))
358 goto fail;
359 break;
360 case IPPROTO_UDP:
361 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
362 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
363 ntohs(iph->tot_len), 0))
364 goto fail;
365 break;
366 case IPPROTO_UDPLITE:
367 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
368 if (!tcf_csum_ipv4_udp(skb, iph, iph->ihl * 4,
369 ntohs(iph->tot_len), 1))
370 goto fail;
371 break;
372 }
373
374 if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) {
375 if (skb_cloned(skb) &&
376 !skb_clone_writable(skb, sizeof(*iph) + ntkoff) &&
377 pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
378 goto fail;
379
380 ip_send_check(iph);
381 }
382
383 return 1;
384
385fail:
386 return 0;
387}
388
389static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh,
390 unsigned int ixhl, unsigned int *pl)
391{
392 int off, len, optlen;
393 unsigned char *xh = (void *)ip6xh;
394
395 off = sizeof(*ip6xh);
396 len = ixhl - off;
397
398 while (len > 1) {
399 switch (xh[off]) {
400 case IPV6_TLV_PAD0:
401 optlen = 1;
402 break;
403 case IPV6_TLV_JUMBO:
404 optlen = xh[off + 1] + 2;
405 if (optlen != 6 || len < 6 || (off & 3) != 2)
406 /* wrong jumbo option length/alignment */
407 return 0;
408 *pl = ntohl(*(__be32 *)(xh + off + 2));
409 goto done;
410 default:
411 optlen = xh[off + 1] + 2;
412 if (optlen > len)
413 /* ignore obscure options */
414 goto done;
415 break;
416 }
417 off += optlen;
418 len -= optlen;
419 }
420
421done:
422 return 1;
423}
424
425static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags)
426{
427 struct ipv6hdr *ip6h;
428 struct ipv6_opt_hdr *ip6xh;
429 unsigned int hl, ixhl;
430 unsigned int pl;
431 int ntkoff;
432 u8 nexthdr;
433
434 ntkoff = skb_network_offset(skb);
435
436 hl = sizeof(*ip6h);
437
438 if (!pskb_may_pull(skb, hl + ntkoff))
439 goto fail;
440
441 ip6h = ipv6_hdr(skb);
442
443 pl = ntohs(ip6h->payload_len);
444 nexthdr = ip6h->nexthdr;
445
446 do {
447 switch (nexthdr) {
448 case NEXTHDR_FRAGMENT:
449 goto ignore_skb;
450 case NEXTHDR_ROUTING:
451 case NEXTHDR_HOP:
452 case NEXTHDR_DEST:
453 if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff))
454 goto fail;
455 ip6xh = (void *)(skb_network_header(skb) + hl);
456 ixhl = ipv6_optlen(ip6xh);
457 if (!pskb_may_pull(skb, hl + ixhl + ntkoff))
458 goto fail;
459 if ((nexthdr == NEXTHDR_HOP) &&
460 !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl)))
461 goto fail;
462 nexthdr = ip6xh->nexthdr;
463 hl += ixhl;
464 break;
465 case IPPROTO_ICMPV6:
466 if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP)
467 if (!tcf_csum_ipv6_icmp(skb, ip6h,
468 hl, pl + sizeof(*ip6h)))
469 goto fail;
470 goto done;
471 case IPPROTO_TCP:
472 if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP)
473 if (!tcf_csum_ipv6_tcp(skb, ip6h,
474 hl, pl + sizeof(*ip6h)))
475 goto fail;
476 goto done;
477 case IPPROTO_UDP:
478 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP)
479 if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
480 pl + sizeof(*ip6h), 0))
481 goto fail;
482 goto done;
483 case IPPROTO_UDPLITE:
484 if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE)
485 if (!tcf_csum_ipv6_udp(skb, ip6h, hl,
486 pl + sizeof(*ip6h), 1))
487 goto fail;
488 goto done;
489 default:
490 goto ignore_skb;
491 }
492 } while (pskb_may_pull(skb, hl + 1 + ntkoff));
493
494done:
495ignore_skb:
496 return 1;
497
498fail:
499 return 0;
500}
501
502static int tcf_csum(struct sk_buff *skb,
503 struct tc_action *a, struct tcf_result *res)
504{
505 struct tcf_csum *p = a->priv;
506 int action;
507 u32 update_flags;
508
509 spin_lock(&p->tcf_lock);
510 p->tcf_tm.lastuse = jiffies;
511 p->tcf_bstats.bytes += qdisc_pkt_len(skb);
512 p->tcf_bstats.packets++;
513 action = p->tcf_action;
514 update_flags = p->update_flags;
515 spin_unlock(&p->tcf_lock);
516
517 if (unlikely(action == TC_ACT_SHOT))
518 goto drop;
519
520 switch (skb->protocol) {
521 case cpu_to_be16(ETH_P_IP):
522 if (!tcf_csum_ipv4(skb, update_flags))
523 goto drop;
524 break;
525 case cpu_to_be16(ETH_P_IPV6):
526 if (!tcf_csum_ipv6(skb, update_flags))
527 goto drop;
528 break;
529 }
530
531 return action;
532
533drop:
534 spin_lock(&p->tcf_lock);
535 p->tcf_qstats.drops++;
536 spin_unlock(&p->tcf_lock);
537 return TC_ACT_SHOT;
538}
539
540static int tcf_csum_dump(struct sk_buff *skb,
541 struct tc_action *a, int bind, int ref)
542{
543 unsigned char *b = skb_tail_pointer(skb);
544 struct tcf_csum *p = a->priv;
545 struct tc_csum opt = {
546 .update_flags = p->update_flags,
547 .index = p->tcf_index,
548 .action = p->tcf_action,
549 .refcnt = p->tcf_refcnt - ref,
550 .bindcnt = p->tcf_bindcnt - bind,
551 };
552 struct tcf_t t;
553
554 NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt);
555 t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install);
556 t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse);
557 t.expires = jiffies_to_clock_t(p->tcf_tm.expires);
558 NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t);
559
560 return skb->len;
561
562nla_put_failure:
563 nlmsg_trim(skb, b);
564 return -1;
565}
566
567static struct tc_action_ops act_csum_ops = {
568 .kind = "csum",
569 .hinfo = &csum_hash_info,
570 .type = TCA_ACT_CSUM,
571 .capab = TCA_CAP_NONE,
572 .owner = THIS_MODULE,
573 .act = tcf_csum,
574 .dump = tcf_csum_dump,
575 .cleanup = tcf_csum_cleanup,
576 .lookup = tcf_hash_search,
577 .init = tcf_csum_init,
578 .walk = tcf_generic_walker
579};
580
581MODULE_DESCRIPTION("Checksum updating actions");
582MODULE_LICENSE("GPL");
583
584static int __init csum_init_module(void)
585{
586 return tcf_register_action(&act_csum_ops);
587}
588
589static void __exit csum_cleanup_module(void)
590{
591 tcf_unregister_action(&act_csum_ops);
592}
593
594module_init(csum_init_module);
595module_exit(csum_cleanup_module);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index c7e59e6ec349..8daef9632255 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -39,7 +39,7 @@ static struct tcf_hashinfo ipt_hash_info = {
39 .lock = &ipt_lock, 39 .lock = &ipt_lock,
40}; 40};
41 41
42static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) 42static int ipt_init_target(struct xt_entry_target *t, char *table, unsigned int hook)
43{ 43{
44 struct xt_tgchk_param par; 44 struct xt_tgchk_param par;
45 struct xt_target *target; 45 struct xt_target *target;
@@ -66,7 +66,7 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
66 return 0; 66 return 0;
67} 67}
68 68
69static void ipt_destroy_target(struct ipt_entry_target *t) 69static void ipt_destroy_target(struct xt_entry_target *t)
70{ 70{
71 struct xt_tgdtor_param par = { 71 struct xt_tgdtor_param par = {
72 .target = t->u.kernel.target, 72 .target = t->u.kernel.target,
@@ -99,7 +99,7 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
99 [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ }, 99 [TCA_IPT_TABLE] = { .type = NLA_STRING, .len = IFNAMSIZ },
100 [TCA_IPT_HOOK] = { .type = NLA_U32 }, 100 [TCA_IPT_HOOK] = { .type = NLA_U32 },
101 [TCA_IPT_INDEX] = { .type = NLA_U32 }, 101 [TCA_IPT_INDEX] = { .type = NLA_U32 },
102 [TCA_IPT_TARG] = { .len = sizeof(struct ipt_entry_target) }, 102 [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) },
103}; 103};
104 104
105static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est, 105static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
@@ -108,7 +108,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
108 struct nlattr *tb[TCA_IPT_MAX + 1]; 108 struct nlattr *tb[TCA_IPT_MAX + 1];
109 struct tcf_ipt *ipt; 109 struct tcf_ipt *ipt;
110 struct tcf_common *pc; 110 struct tcf_common *pc;
111 struct ipt_entry_target *td, *t; 111 struct xt_entry_target *td, *t;
112 char *tname; 112 char *tname;
113 int ret = 0, err; 113 int ret = 0, err;
114 u32 hook = 0; 114 u32 hook = 0;
@@ -126,7 +126,7 @@ static int tcf_ipt_init(struct nlattr *nla, struct nlattr *est,
126 if (tb[TCA_IPT_TARG] == NULL) 126 if (tb[TCA_IPT_TARG] == NULL)
127 return -EINVAL; 127 return -EINVAL;
128 128
129 td = (struct ipt_entry_target *)nla_data(tb[TCA_IPT_TARG]); 129 td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]);
130 if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) 130 if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size)
131 return -EINVAL; 131 return -EINVAL;
132 132
@@ -230,7 +230,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
230 result = TC_ACT_SHOT; 230 result = TC_ACT_SHOT;
231 ipt->tcf_qstats.drops++; 231 ipt->tcf_qstats.drops++;
232 break; 232 break;
233 case IPT_CONTINUE: 233 case XT_CONTINUE:
234 result = TC_ACT_PIPE; 234 result = TC_ACT_PIPE;
235 break; 235 break;
236 default: 236 default:
@@ -249,7 +249,7 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int
249{ 249{
250 unsigned char *b = skb_tail_pointer(skb); 250 unsigned char *b = skb_tail_pointer(skb);
251 struct tcf_ipt *ipt = a->priv; 251 struct tcf_ipt *ipt = a->priv;
252 struct ipt_entry_target *t; 252 struct xt_entry_target *t;
253 struct tcf_t tm; 253 struct tcf_t tm;
254 struct tc_cnt c; 254 struct tc_cnt c;
255 255
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index e17096e3913c..5b271a18bc3a 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -111,44 +111,41 @@ static u32 flow_get_proto(struct sk_buff *skb)
111 } 111 }
112} 112}
113 113
114static int has_ports(u8 protocol)
115{
116 switch (protocol) {
117 case IPPROTO_TCP:
118 case IPPROTO_UDP:
119 case IPPROTO_UDPLITE:
120 case IPPROTO_SCTP:
121 case IPPROTO_DCCP:
122 case IPPROTO_ESP:
123 return 1;
124 default:
125 return 0;
126 }
127}
128
129static u32 flow_get_proto_src(struct sk_buff *skb) 114static u32 flow_get_proto_src(struct sk_buff *skb)
130{ 115{
131 switch (skb->protocol) { 116 switch (skb->protocol) {
132 case htons(ETH_P_IP): { 117 case htons(ETH_P_IP): {
133 struct iphdr *iph; 118 struct iphdr *iph;
119 int poff;
134 120
135 if (!pskb_network_may_pull(skb, sizeof(*iph))) 121 if (!pskb_network_may_pull(skb, sizeof(*iph)))
136 break; 122 break;
137 iph = ip_hdr(skb); 123 iph = ip_hdr(skb);
138 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 124 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
139 has_ports(iph->protocol) && 125 break;
140 pskb_network_may_pull(skb, iph->ihl * 4 + 2)) 126 poff = proto_ports_offset(iph->protocol);
141 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); 127 if (poff >= 0 &&
128 pskb_network_may_pull(skb, iph->ihl * 4 + 2 + poff)) {
129 iph = ip_hdr(skb);
130 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
131 poff));
132 }
142 break; 133 break;
143 } 134 }
144 case htons(ETH_P_IPV6): { 135 case htons(ETH_P_IPV6): {
145 struct ipv6hdr *iph; 136 struct ipv6hdr *iph;
137 int poff;
146 138
147 if (!pskb_network_may_pull(skb, sizeof(*iph) + 2)) 139 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 break; 140 break;
149 iph = ipv6_hdr(skb); 141 iph = ipv6_hdr(skb);
150 if (has_ports(iph->nexthdr)) 142 poff = proto_ports_offset(iph->nexthdr);
151 return ntohs(*(__be16 *)&iph[1]); 143 if (poff >= 0 &&
144 pskb_network_may_pull(skb, sizeof(*iph) + poff + 2)) {
145 iph = ipv6_hdr(skb);
146 return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
147 poff));
148 }
152 break; 149 break;
153 } 150 }
154 } 151 }
@@ -161,24 +158,36 @@ static u32 flow_get_proto_dst(struct sk_buff *skb)
161 switch (skb->protocol) { 158 switch (skb->protocol) {
162 case htons(ETH_P_IP): { 159 case htons(ETH_P_IP): {
163 struct iphdr *iph; 160 struct iphdr *iph;
161 int poff;
164 162
165 if (!pskb_network_may_pull(skb, sizeof(*iph))) 163 if (!pskb_network_may_pull(skb, sizeof(*iph)))
166 break; 164 break;
167 iph = ip_hdr(skb); 165 iph = ip_hdr(skb);
168 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 166 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
169 has_ports(iph->protocol) && 167 break;
170 pskb_network_may_pull(skb, iph->ihl * 4 + 4)) 168 poff = proto_ports_offset(iph->protocol);
171 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); 169 if (poff >= 0 &&
170 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
171 iph = ip_hdr(skb);
172 return ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 +
173 2 + poff));
174 }
172 break; 175 break;
173 } 176 }
174 case htons(ETH_P_IPV6): { 177 case htons(ETH_P_IPV6): {
175 struct ipv6hdr *iph; 178 struct ipv6hdr *iph;
179 int poff;
176 180
177 if (!pskb_network_may_pull(skb, sizeof(*iph) + 4)) 181 if (!pskb_network_may_pull(skb, sizeof(*iph)))
178 break; 182 break;
179 iph = ipv6_hdr(skb); 183 iph = ipv6_hdr(skb);
180 if (has_ports(iph->nexthdr)) 184 poff = proto_ports_offset(iph->nexthdr);
181 return ntohs(*(__be16 *)((void *)&iph[1] + 2)); 185 if (poff >= 0 &&
186 pskb_network_may_pull(skb, sizeof(*iph) + poff + 4)) {
187 iph = ipv6_hdr(skb);
188 return ntohs(*(__be16 *)((void *)iph + sizeof(*iph) +
189 poff + 2));
190 }
182 break; 191 break;
183 } 192 }
184 } 193 }
@@ -297,6 +306,11 @@ static u32 flow_get_vlan_tag(const struct sk_buff *skb)
297 return tag & VLAN_VID_MASK; 306 return tag & VLAN_VID_MASK;
298} 307}
299 308
309static u32 flow_get_rxhash(struct sk_buff *skb)
310{
311 return skb_get_rxhash(skb);
312}
313
300static u32 flow_key_get(struct sk_buff *skb, int key) 314static u32 flow_key_get(struct sk_buff *skb, int key)
301{ 315{
302 switch (key) { 316 switch (key) {
@@ -334,6 +348,8 @@ static u32 flow_key_get(struct sk_buff *skb, int key)
334 return flow_get_skgid(skb); 348 return flow_get_skgid(skb);
335 case FLOW_KEY_VLAN_TAG: 349 case FLOW_KEY_VLAN_TAG:
336 return flow_get_vlan_tag(skb); 350 return flow_get_vlan_tag(skb);
351 case FLOW_KEY_RXHASH:
352 return flow_get_rxhash(skb);
337 default: 353 default:
338 WARN_ON(1); 354 WARN_ON(1);
339 return 0; 355 return 0;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 3bcac8aa333c..34da5e29ea1a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -223,6 +223,11 @@ META_COLLECTOR(int_maclen)
223 dst->value = skb->mac_len; 223 dst->value = skb->mac_len;
224} 224}
225 225
226META_COLLECTOR(int_rxhash)
227{
228 dst->value = skb_get_rxhash(skb);
229}
230
226/************************************************************************** 231/**************************************************************************
227 * Netfilter 232 * Netfilter
228 **************************************************************************/ 233 **************************************************************************/
@@ -541,6 +546,7 @@ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
541 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), 546 [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off),
542 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), 547 [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend),
543 [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag), 548 [META_ID(VLAN_TAG)] = META_FUNC(int_vlan_tag),
549 [META_ID(RXHASH)] = META_FUNC(int_rxhash),
544 } 550 }
545}; 551};
546 552
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 408eea7086aa..b22ca2d1cebc 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -240,7 +240,10 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
240 if (q) 240 if (q)
241 goto out; 241 goto out;
242 242
243 q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); 243 if (dev_ingress_queue(dev))
244 q = qdisc_match_from_root(
245 dev_ingress_queue(dev)->qdisc_sleeping,
246 handle);
244out: 247out:
245 return q; 248 return q;
246} 249}
@@ -360,7 +363,7 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
360 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16); 363 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
361 } 364 }
362 365
363 if (!s || tsize != s->tsize || (!tab && tsize > 0)) 366 if (tsize != s->tsize || (!tab && tsize > 0))
364 return ERR_PTR(-EINVAL); 367 return ERR_PTR(-EINVAL);
365 368
366 spin_lock(&qdisc_stab_lock); 369 spin_lock(&qdisc_stab_lock);
@@ -690,6 +693,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
690 (new && new->flags & TCQ_F_INGRESS)) { 693 (new && new->flags & TCQ_F_INGRESS)) {
691 num_q = 1; 694 num_q = 1;
692 ingress = 1; 695 ingress = 1;
696 if (!dev_ingress_queue(dev))
697 return -ENOENT;
693 } 698 }
694 699
695 if (dev->flags & IFF_UP) 700 if (dev->flags & IFF_UP)
@@ -701,7 +706,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
701 } 706 }
702 707
703 for (i = 0; i < num_q; i++) { 708 for (i = 0; i < num_q; i++) {
704 struct netdev_queue *dev_queue = &dev->rx_queue; 709 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
705 710
706 if (!ingress) 711 if (!ingress)
707 dev_queue = netdev_get_tx_queue(dev, i); 712 dev_queue = netdev_get_tx_queue(dev, i);
@@ -979,7 +984,8 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
979 return -ENOENT; 984 return -ENOENT;
980 q = qdisc_leaf(p, clid); 985 q = qdisc_leaf(p, clid);
981 } else { /* ingress */ 986 } else { /* ingress */
982 q = dev->rx_queue.qdisc_sleeping; 987 if (dev_ingress_queue(dev))
988 q = dev_ingress_queue(dev)->qdisc_sleeping;
983 } 989 }
984 } else { 990 } else {
985 q = dev->qdisc; 991 q = dev->qdisc;
@@ -1043,8 +1049,9 @@ replay:
1043 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL) 1049 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
1044 return -ENOENT; 1050 return -ENOENT;
1045 q = qdisc_leaf(p, clid); 1051 q = qdisc_leaf(p, clid);
1046 } else { /*ingress */ 1052 } else { /* ingress */
1047 q = dev->rx_queue.qdisc_sleeping; 1053 if (dev_ingress_queue_create(dev))
1054 q = dev_ingress_queue(dev)->qdisc_sleeping;
1048 } 1055 }
1049 } else { 1056 } else {
1050 q = dev->qdisc; 1057 q = dev->qdisc;
@@ -1123,11 +1130,14 @@ replay:
1123create_n_graft: 1130create_n_graft:
1124 if (!(n->nlmsg_flags&NLM_F_CREATE)) 1131 if (!(n->nlmsg_flags&NLM_F_CREATE))
1125 return -ENOENT; 1132 return -ENOENT;
1126 if (clid == TC_H_INGRESS) 1133 if (clid == TC_H_INGRESS) {
1127 q = qdisc_create(dev, &dev->rx_queue, p, 1134 if (dev_ingress_queue(dev))
1128 tcm->tcm_parent, tcm->tcm_parent, 1135 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1129 tca, &err); 1136 tcm->tcm_parent, tcm->tcm_parent,
1130 else { 1137 tca, &err);
1138 else
1139 err = -ENOENT;
1140 } else {
1131 struct netdev_queue *dev_queue; 1141 struct netdev_queue *dev_queue;
1132 1142
1133 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) 1143 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
@@ -1304,8 +1314,10 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1304 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0) 1314 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
1305 goto done; 1315 goto done;
1306 1316
1307 dev_queue = &dev->rx_queue; 1317 dev_queue = dev_ingress_queue(dev);
1308 if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0) 1318 if (dev_queue &&
1319 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1320 &q_idx, s_q_idx) < 0)
1309 goto done; 1321 goto done;
1310 1322
1311cont: 1323cont:
@@ -1595,8 +1607,10 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1595 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) 1607 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
1596 goto done; 1608 goto done;
1597 1609
1598 dev_queue = &dev->rx_queue; 1610 dev_queue = dev_ingress_queue(dev);
1599 if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) 1611 if (dev_queue &&
1612 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1613 &t, s_t) < 0)
1600 goto done; 1614 goto done;
1601 1615
1602done: 1616done:
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 6318e1136b83..282540778aa8 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -275,8 +275,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
275 goto err_out; 275 goto err_out;
276 } 276 }
277 flow->filter_list = NULL; 277 flow->filter_list = NULL;
278 flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 278 flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
279 &pfifo_qdisc_ops, classid);
280 if (!flow->q) 279 if (!flow->q)
281 flow->q = &noop_qdisc; 280 flow->q = &noop_qdisc;
282 pr_debug("atm_tc_change: qdisc %p\n", flow->q); 281 pr_debug("atm_tc_change: qdisc %p\n", flow->q);
@@ -543,7 +542,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt)
543 INIT_LIST_HEAD(&p->flows); 542 INIT_LIST_HEAD(&p->flows);
544 INIT_LIST_HEAD(&p->link.list); 543 INIT_LIST_HEAD(&p->link.list);
545 list_add(&p->link.list, &p->flows); 544 list_add(&p->link.list, &p->flows);
546 p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 545 p->link.q = qdisc_create_dflt(sch->dev_queue,
547 &pfifo_qdisc_ops, sch->handle); 546 &pfifo_qdisc_ops, sch->handle);
548 if (!p->link.q) 547 if (!p->link.q)
549 p->link.q = &noop_qdisc; 548 p->link.q = &noop_qdisc;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 28c01ef5abc8..eb7631590865 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1379,9 +1379,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1379 q->link.sibling = &q->link; 1379 q->link.sibling = &q->link;
1380 q->link.common.classid = sch->handle; 1380 q->link.common.classid = sch->handle;
1381 q->link.qdisc = sch; 1381 q->link.qdisc = sch;
1382 if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1382 q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1383 &pfifo_qdisc_ops, 1383 sch->handle);
1384 sch->handle))) 1384 if (!q->link.q)
1385 q->link.q = &noop_qdisc; 1385 q->link.q = &noop_qdisc;
1386 1386
1387 q->link.priority = TC_CBQ_MAXPRIO-1; 1387 q->link.priority = TC_CBQ_MAXPRIO-1;
@@ -1623,7 +1623,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1623 struct cbq_class *cl = (struct cbq_class*)arg; 1623 struct cbq_class *cl = (struct cbq_class*)arg;
1624 1624
1625 if (new == NULL) { 1625 if (new == NULL) {
1626 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1626 new = qdisc_create_dflt(sch->dev_queue,
1627 &pfifo_qdisc_ops, cl->common.classid); 1627 &pfifo_qdisc_ops, cl->common.classid);
1628 if (new == NULL) 1628 if (new == NULL)
1629 return -ENOBUFS; 1629 return -ENOBUFS;
@@ -1874,8 +1874,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t
1874 cl->R_tab = rtab; 1874 cl->R_tab = rtab;
1875 rtab = NULL; 1875 rtab = NULL;
1876 cl->refcnt = 1; 1876 cl->refcnt = 1;
1877 if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1877 cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1878 &pfifo_qdisc_ops, classid))) 1878 if (!cl->q)
1879 cl->q = &noop_qdisc; 1879 cl->q = &noop_qdisc;
1880 cl->common.classid = classid; 1880 cl->common.classid = classid;
1881 cl->tparent = parent; 1881 cl->tparent = parent;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index b74046a95397..aa8b5313f8cf 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -110,7 +110,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
110 cl->refcnt = 1; 110 cl->refcnt = 1;
111 cl->common.classid = classid; 111 cl->common.classid = classid;
112 cl->quantum = quantum; 112 cl->quantum = quantum;
113 cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 113 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
114 &pfifo_qdisc_ops, classid); 114 &pfifo_qdisc_ops, classid);
115 if (cl->qdisc == NULL) 115 if (cl->qdisc == NULL)
116 cl->qdisc = &noop_qdisc; 116 cl->qdisc = &noop_qdisc;
@@ -218,7 +218,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
218 struct drr_class *cl = (struct drr_class *)arg; 218 struct drr_class *cl = (struct drr_class *)arg;
219 219
220 if (new == NULL) { 220 if (new == NULL) {
221 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 221 new = qdisc_create_dflt(sch->dev_queue,
222 &pfifo_qdisc_ops, cl->common.classid); 222 &pfifo_qdisc_ops, cl->common.classid);
223 if (new == NULL) 223 if (new == NULL)
224 new = &noop_qdisc; 224 new = &noop_qdisc;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 63d41f86679c..1d295d62bb5c 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -61,8 +61,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
61 sch, p, new, old); 61 sch, p, new, old);
62 62
63 if (new == NULL) { 63 if (new == NULL) {
64 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 64 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
65 &pfifo_qdisc_ops,
66 sch->handle); 65 sch->handle);
67 if (new == NULL) 66 if (new == NULL)
68 new = &noop_qdisc; 67 new = &noop_qdisc;
@@ -384,8 +383,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
384 p->default_index = default_index; 383 p->default_index = default_index;
385 p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); 384 p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
386 385
387 p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 386 p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle);
388 &pfifo_qdisc_ops, sch->handle);
389 if (p->q == NULL) 387 if (p->q == NULL)
390 p->q = &noop_qdisc; 388 p->q = &noop_qdisc;
391 389
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 5948bafa8ce2..4dfecb0cba37 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -172,8 +172,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
172 struct Qdisc *q; 172 struct Qdisc *q;
173 int err = -ENOMEM; 173 int err = -ENOMEM;
174 174
175 q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 175 q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1));
176 ops, TC_H_MAKE(sch->handle, 1));
177 if (q) { 176 if (q) {
178 err = fifo_set_limit(q, limit); 177 err = fifo_set_limit(q, limit);
179 if (err < 0) { 178 if (err < 0) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2aeb3a4386a1..5dbb3cd96e59 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -383,6 +383,7 @@ struct Qdisc noop_qdisc = {
383 .list = LIST_HEAD_INIT(noop_qdisc.list), 383 .list = LIST_HEAD_INIT(noop_qdisc.list),
384 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), 384 .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
385 .dev_queue = &noop_netdev_queue, 385 .dev_queue = &noop_netdev_queue,
386 .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
386}; 387};
387EXPORT_SYMBOL(noop_qdisc); 388EXPORT_SYMBOL(noop_qdisc);
388 389
@@ -409,6 +410,7 @@ static struct Qdisc noqueue_qdisc = {
409 .list = LIST_HEAD_INIT(noqueue_qdisc.list), 410 .list = LIST_HEAD_INIT(noqueue_qdisc.list),
410 .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), 411 .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
411 .dev_queue = &noqueue_netdev_queue, 412 .dev_queue = &noqueue_netdev_queue,
413 .busylock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.busylock),
412}; 414};
413 415
414 416
@@ -574,10 +576,8 @@ errout:
574 return ERR_PTR(err); 576 return ERR_PTR(err);
575} 577}
576 578
577struct Qdisc * qdisc_create_dflt(struct net_device *dev, 579struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
578 struct netdev_queue *dev_queue, 580 struct Qdisc_ops *ops, unsigned int parentid)
579 struct Qdisc_ops *ops,
580 unsigned int parentid)
581{ 581{
582 struct Qdisc *sch; 582 struct Qdisc *sch;
583 583
@@ -682,7 +682,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
682 struct Qdisc *qdisc; 682 struct Qdisc *qdisc;
683 683
684 if (dev->tx_queue_len) { 684 if (dev->tx_queue_len) {
685 qdisc = qdisc_create_dflt(dev, dev_queue, 685 qdisc = qdisc_create_dflt(dev_queue,
686 &pfifo_fast_ops, TC_H_ROOT); 686 &pfifo_fast_ops, TC_H_ROOT);
687 if (!qdisc) { 687 if (!qdisc) {
688 printk(KERN_INFO "%s: activation failed\n", dev->name); 688 printk(KERN_INFO "%s: activation failed\n", dev->name);
@@ -709,7 +709,7 @@ static void attach_default_qdiscs(struct net_device *dev)
709 dev->qdisc = txq->qdisc_sleeping; 709 dev->qdisc = txq->qdisc_sleeping;
710 atomic_inc(&dev->qdisc->refcnt); 710 atomic_inc(&dev->qdisc->refcnt);
711 } else { 711 } else {
712 qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT); 712 qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
713 if (qdisc) { 713 if (qdisc) {
714 qdisc->ops->attach(qdisc); 714 qdisc->ops->attach(qdisc);
715 dev->qdisc = qdisc; 715 dev->qdisc = qdisc;
@@ -753,7 +753,8 @@ void dev_activate(struct net_device *dev)
753 753
754 need_watchdog = 0; 754 need_watchdog = 0;
755 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog); 755 netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
756 transition_one_qdisc(dev, &dev->rx_queue, NULL); 756 if (dev_ingress_queue(dev))
757 transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
757 758
758 if (need_watchdog) { 759 if (need_watchdog) {
759 dev->trans_start = jiffies; 760 dev->trans_start = jiffies;
@@ -812,7 +813,8 @@ static bool some_qdisc_is_busy(struct net_device *dev)
812void dev_deactivate(struct net_device *dev) 813void dev_deactivate(struct net_device *dev)
813{ 814{
814 netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); 815 netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
815 dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc); 816 if (dev_ingress_queue(dev))
817 dev_deactivate_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
816 818
817 dev_watchdog_down(dev); 819 dev_watchdog_down(dev);
818 820
@@ -838,7 +840,8 @@ void dev_init_scheduler(struct net_device *dev)
838{ 840{
839 dev->qdisc = &noop_qdisc; 841 dev->qdisc = &noop_qdisc;
840 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); 842 netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
841 dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); 843 if (dev_ingress_queue(dev))
844 dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
842 845
843 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); 846 setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
844} 847}
@@ -861,7 +864,8 @@ static void shutdown_scheduler_queue(struct net_device *dev,
861void dev_shutdown(struct net_device *dev) 864void dev_shutdown(struct net_device *dev)
862{ 865{
863 netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); 866 netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
864 shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); 867 if (dev_ingress_queue(dev))
868 shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
865 qdisc_destroy(dev->qdisc); 869 qdisc_destroy(dev->qdisc);
866 dev->qdisc = &noop_qdisc; 870 dev->qdisc = &noop_qdisc;
867 871
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 47496098d35c..069c62b7bb36 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1088,7 +1088,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
1088 cl->refcnt = 1; 1088 cl->refcnt = 1;
1089 cl->sched = q; 1089 cl->sched = q;
1090 cl->cl_parent = parent; 1090 cl->cl_parent = parent;
1091 cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1091 cl->qdisc = qdisc_create_dflt(sch->dev_queue,
1092 &pfifo_qdisc_ops, classid); 1092 &pfifo_qdisc_ops, classid);
1093 if (cl->qdisc == NULL) 1093 if (cl->qdisc == NULL)
1094 cl->qdisc = &noop_qdisc; 1094 cl->qdisc = &noop_qdisc;
@@ -1209,8 +1209,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1209 if (cl->level > 0) 1209 if (cl->level > 0)
1210 return -EINVAL; 1210 return -EINVAL;
1211 if (new == NULL) { 1211 if (new == NULL) {
1212 new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1212 new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1213 &pfifo_qdisc_ops,
1214 cl->cl_common.classid); 1213 cl->cl_common.classid);
1215 if (new == NULL) 1214 if (new == NULL)
1216 new = &noop_qdisc; 1215 new = &noop_qdisc;
@@ -1452,8 +1451,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
1452 q->root.cl_common.classid = sch->handle; 1451 q->root.cl_common.classid = sch->handle;
1453 q->root.refcnt = 1; 1452 q->root.refcnt = 1;
1454 q->root.sched = q; 1453 q->root.sched = q;
1455 q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1454 q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1456 &pfifo_qdisc_ops,
1457 sch->handle); 1455 sch->handle);
1458 if (q->root.qdisc == NULL) 1456 if (q->root.qdisc == NULL)
1459 q->root.qdisc = &noop_qdisc; 1457 q->root.qdisc = &noop_qdisc;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 4be8d04b262d..01b519d6c52d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1121,8 +1121,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1121 if (cl->level) 1121 if (cl->level)
1122 return -EINVAL; 1122 return -EINVAL;
1123 if (new == NULL && 1123 if (new == NULL &&
1124 (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1124 (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1125 &pfifo_qdisc_ops,
1126 cl->common.classid)) == NULL) 1125 cl->common.classid)) == NULL)
1127 return -ENOBUFS; 1126 return -ENOBUFS;
1128 1127
@@ -1247,8 +1246,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
1247 return -EBUSY; 1246 return -EBUSY;
1248 1247
1249 if (!cl->level && htb_parent_last_child(cl)) { 1248 if (!cl->level && htb_parent_last_child(cl)) {
1250 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1249 new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1251 &pfifo_qdisc_ops,
1252 cl->parent->common.classid); 1250 cl->parent->common.classid);
1253 last_child = 1; 1251 last_child = 1;
1254 } 1252 }
@@ -1302,14 +1300,14 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1302 struct htb_class *cl = (struct htb_class *)*arg, *parent; 1300 struct htb_class *cl = (struct htb_class *)*arg, *parent;
1303 struct nlattr *opt = tca[TCA_OPTIONS]; 1301 struct nlattr *opt = tca[TCA_OPTIONS];
1304 struct qdisc_rate_table *rtab = NULL, *ctab = NULL; 1302 struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
1305 struct nlattr *tb[TCA_HTB_RTAB + 1]; 1303 struct nlattr *tb[__TCA_HTB_MAX];
1306 struct tc_htb_opt *hopt; 1304 struct tc_htb_opt *hopt;
1307 1305
1308 /* extract all subattrs from opt attr */ 1306 /* extract all subattrs from opt attr */
1309 if (!opt) 1307 if (!opt)
1310 goto failure; 1308 goto failure;
1311 1309
1312 err = nla_parse_nested(tb, TCA_HTB_RTAB, opt, htb_policy); 1310 err = nla_parse_nested(tb, TCA_HTB_MAX, opt, htb_policy);
1313 if (err < 0) 1311 if (err < 0)
1314 goto failure; 1312 goto failure;
1315 1313
@@ -1377,7 +1375,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
1377 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) 1375 /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL)
1378 so that can't be used inside of sch_tree_lock 1376 so that can't be used inside of sch_tree_lock
1379 -- thanks to Karlis Peisenieks */ 1377 -- thanks to Karlis Peisenieks */
1380 new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 1378 new_q = qdisc_create_dflt(sch->dev_queue,
1381 &pfifo_qdisc_ops, classid); 1379 &pfifo_qdisc_ops, classid);
1382 sch_tree_lock(sch); 1380 sch_tree_lock(sch);
1383 if (parent && !parent->level) { 1381 if (parent && !parent->level) {
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index fe91e50f9d98..ecc302f4d2a1 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -56,7 +56,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
56 56
57 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { 57 for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
58 dev_queue = netdev_get_tx_queue(dev, ntx); 58 dev_queue = netdev_get_tx_queue(dev, ntx);
59 qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops, 59 qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
60 TC_H_MAKE(TC_H_MAJ(sch->handle), 60 TC_H_MAKE(TC_H_MAJ(sch->handle),
61 TC_H_MIN(ntx + 1))); 61 TC_H_MIN(ntx + 1)));
62 if (qdisc == NULL) 62 if (qdisc == NULL)
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 6ae251279fc2..32690deab5d0 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -227,8 +227,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
227 for (i = 0; i < q->bands; i++) { 227 for (i = 0; i < q->bands; i++) {
228 if (q->queues[i] == &noop_qdisc) { 228 if (q->queues[i] == &noop_qdisc) {
229 struct Qdisc *child, *old; 229 struct Qdisc *child, *old;
230 child = qdisc_create_dflt(qdisc_dev(sch), 230 child = qdisc_create_dflt(sch->dev_queue,
231 sch->dev_queue,
232 &pfifo_qdisc_ops, 231 &pfifo_qdisc_ops,
233 TC_H_MAKE(sch->handle, 232 TC_H_MAKE(sch->handle,
234 i + 1)); 233 i + 1));
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 4714ff162bbd..e5593c083a78 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -538,8 +538,7 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
538 538
539 qdisc_watchdog_init(&q->watchdog, sch); 539 qdisc_watchdog_init(&q->watchdog, sch);
540 540
541 q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 541 q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops,
542 &tfifo_qdisc_ops,
543 TC_H_MAKE(sch->handle, 1)); 542 TC_H_MAKE(sch->handle, 1));
544 if (!q->qdisc) { 543 if (!q->qdisc) {
545 pr_debug("netem: qdisc create failed\n"); 544 pr_debug("netem: qdisc create failed\n");
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 0748fb1e3a49..b1c95bce33ce 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -200,7 +200,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
200 for (i=0; i<q->bands; i++) { 200 for (i=0; i<q->bands; i++) {
201 if (q->queues[i] == &noop_qdisc) { 201 if (q->queues[i] == &noop_qdisc) {
202 struct Qdisc *child, *old; 202 struct Qdisc *child, *old;
203 child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, 203 child = qdisc_create_dflt(sch->dev_queue,
204 &pfifo_qdisc_ops, 204 &pfifo_qdisc_ops,
205 TC_H_MAKE(sch->handle, i + 1)); 205 TC_H_MAKE(sch->handle, i + 1));
206 if (child) { 206 if (child) {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 201cbac2b32c..3cf478d012dd 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -123,40 +123,39 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
123 case htons(ETH_P_IP): 123 case htons(ETH_P_IP):
124 { 124 {
125 const struct iphdr *iph; 125 const struct iphdr *iph;
126 int poff;
126 127
127 if (!pskb_network_may_pull(skb, sizeof(*iph))) 128 if (!pskb_network_may_pull(skb, sizeof(*iph)))
128 goto err; 129 goto err;
129 iph = ip_hdr(skb); 130 iph = ip_hdr(skb);
130 h = (__force u32)iph->daddr; 131 h = (__force u32)iph->daddr;
131 h2 = (__force u32)iph->saddr ^ iph->protocol; 132 h2 = (__force u32)iph->saddr ^ iph->protocol;
132 if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && 133 if (iph->frag_off & htons(IP_MF|IP_OFFSET))
133 (iph->protocol == IPPROTO_TCP || 134 break;
134 iph->protocol == IPPROTO_UDP || 135 poff = proto_ports_offset(iph->protocol);
135 iph->protocol == IPPROTO_UDPLITE || 136 if (poff >= 0 &&
136 iph->protocol == IPPROTO_SCTP || 137 pskb_network_may_pull(skb, iph->ihl * 4 + 4 + poff)) {
137 iph->protocol == IPPROTO_DCCP || 138 iph = ip_hdr(skb);
138 iph->protocol == IPPROTO_ESP) && 139 h2 ^= *(u32*)((void *)iph + iph->ihl * 4 + poff);
139 pskb_network_may_pull(skb, iph->ihl * 4 + 4)) 140 }
140 h2 ^= *(((u32*)iph) + iph->ihl);
141 break; 141 break;
142 } 142 }
143 case htons(ETH_P_IPV6): 143 case htons(ETH_P_IPV6):
144 { 144 {
145 struct ipv6hdr *iph; 145 struct ipv6hdr *iph;
146 int poff;
146 147
147 if (!pskb_network_may_pull(skb, sizeof(*iph))) 148 if (!pskb_network_may_pull(skb, sizeof(*iph)))
148 goto err; 149 goto err;
149 iph = ipv6_hdr(skb); 150 iph = ipv6_hdr(skb);
150 h = (__force u32)iph->daddr.s6_addr32[3]; 151 h = (__force u32)iph->daddr.s6_addr32[3];
151 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr; 152 h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
152 if ((iph->nexthdr == IPPROTO_TCP || 153 poff = proto_ports_offset(iph->nexthdr);
153 iph->nexthdr == IPPROTO_UDP || 154 if (poff >= 0 &&
154 iph->nexthdr == IPPROTO_UDPLITE || 155 pskb_network_may_pull(skb, sizeof(*iph) + 4 + poff)) {
155 iph->nexthdr == IPPROTO_SCTP || 156 iph = ipv6_hdr(skb);
156 iph->nexthdr == IPPROTO_DCCP || 157 h2 ^= *(u32*)((void *)iph + sizeof(*iph) + poff);
157 iph->nexthdr == IPPROTO_ESP) && 158 }
158 pskb_network_may_pull(skb, sizeof(*iph) + 4))
159 h2 ^= *(u32*)&iph[1];
160 break; 159 break;
161 } 160 }
162 default: 161 default:
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index feaabc103ce6..401af9596709 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -241,11 +241,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *
241 } 241 }
242 if (neigh_event_send(n, skb_res) == 0) { 242 if (neigh_event_send(n, skb_res) == 0) {
243 int err; 243 int err;
244 char haddr[MAX_ADDR_LEN];
244 245
245 read_lock(&n->lock); 246 neigh_ha_snapshot(haddr, n, dev);
246 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 247 err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
247 n->ha, NULL, skb->len); 248 NULL, skb->len);
248 read_unlock(&n->lock);
249 249
250 if (err < 0) { 250 if (err < 0) {
251 neigh_release(n); 251 neigh_release(n);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 0b85e5256434..5f1fb8bd862d 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -48,6 +48,8 @@
48 * be incorporated into the next SCTP release. 48 * be incorporated into the next SCTP release.
49 */ 49 */
50 50
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
52
51#include <linux/types.h> 53#include <linux/types.h>
52#include <linux/fcntl.h> 54#include <linux/fcntl.h>
53#include <linux/poll.h> 55#include <linux/poll.h>
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 476caaf100ed..6c8556459a75 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -37,6 +37,8 @@
37 * be incorporated into the next SCTP release. 37 * be incorporated into the next SCTP release.
38 */ 38 */
39 39
40#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
41
40#include <linux/types.h> 42#include <linux/types.h>
41#include <linux/kernel.h> 43#include <linux/kernel.h>
42#include <linux/net.h> 44#include <linux/net.h>
diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c
index ccb6dc48d15b..397296fb156f 100644
--- a/net/sctp/inqueue.c
+++ b/net/sctp/inqueue.c
@@ -43,6 +43,8 @@
43 * be incorporated into the next SCTP release. 43 * be incorporated into the next SCTP release.
44 */ 44 */
45 45
46#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
47
46#include <net/sctp/sctp.h> 48#include <net/sctp/sctp.h>
47#include <net/sctp/sm.h> 49#include <net/sctp/sm.h>
48#include <linux/interrupt.h> 50#include <linux/interrupt.h>
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 732689140fb8..95e0c8eda1a0 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -47,6 +47,8 @@
47 * be incorporated into the next SCTP release. 47 * be incorporated into the next SCTP release.
48 */ 48 */
49 49
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
51
50#include <linux/module.h> 52#include <linux/module.h>
51#include <linux/errno.h> 53#include <linux/errno.h>
52#include <linux/types.h> 54#include <linux/types.h>
@@ -336,7 +338,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
336 memcpy(saddr, baddr, sizeof(union sctp_addr)); 338 memcpy(saddr, baddr, sizeof(union sctp_addr));
337 SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr); 339 SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr);
338 } else { 340 } else {
339 printk(KERN_ERR "%s: asoc:%p Could not find a valid source " 341 pr_err("%s: asoc:%p Could not find a valid source "
340 "address for the dest:%pI6\n", 342 "address for the dest:%pI6\n",
341 __func__, asoc, &daddr->v6.sin6_addr); 343 __func__, asoc, &daddr->v6.sin6_addr);
342 } 344 }
diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c
index f73ec0ea93ba..8ef8e7d9eb61 100644
--- a/net/sctp/objcnt.c
+++ b/net/sctp/objcnt.c
@@ -38,6 +38,8 @@
38 * be incorporated into the next SCTP release. 38 * be incorporated into the next SCTP release.
39 */ 39 */
40 40
41#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
42
41#include <linux/kernel.h> 43#include <linux/kernel.h>
42#include <net/sctp/sctp.h> 44#include <net/sctp/sctp.h>
43 45
@@ -134,8 +136,7 @@ void sctp_dbg_objcnt_init(void)
134 ent = proc_create("sctp_dbg_objcnt", 0, 136 ent = proc_create("sctp_dbg_objcnt", 0,
135 proc_net_sctp, &sctp_objcnt_ops); 137 proc_net_sctp, &sctp_objcnt_ops);
136 if (!ent) 138 if (!ent)
137 printk(KERN_WARNING 139 pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n");
138 "sctp_dbg_objcnt: Unable to create /proc entry.\n");
139} 140}
140 141
141/* Cleanup the objcount entry in the proc filesystem. */ 142/* Cleanup the objcount entry in the proc filesystem. */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index bcc4590ccaf2..60600d337a3a 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -41,6 +41,8 @@
41 * be incorporated into the next SCTP release. 41 * be incorporated into the next SCTP release.
42 */ 42 */
43 43
44#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
45
44#include <linux/types.h> 46#include <linux/types.h>
45#include <linux/kernel.h> 47#include <linux/kernel.h>
46#include <linux/wait.h> 48#include <linux/wait.h>
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index c04b2eb59186..8c6d379b4bb6 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/types.h> 51#include <linux/types.h>
50#include <linux/list.h> /* For struct list_head */ 52#include <linux/list.h> /* For struct list_head */
51#include <linux/socket.h> 53#include <linux/socket.h>
@@ -1463,23 +1465,23 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1463 /* Display the end of the 1465 /* Display the end of the
1464 * current range. 1466 * current range.
1465 */ 1467 */
1466 SCTP_DEBUG_PRINTK("-%08x", 1468 SCTP_DEBUG_PRINTK_CONT("-%08x",
1467 dbg_last_ack_tsn); 1469 dbg_last_ack_tsn);
1468 } 1470 }
1469 1471
1470 /* Start a new range. */ 1472 /* Start a new range. */
1471 SCTP_DEBUG_PRINTK(",%08x", tsn); 1473 SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
1472 dbg_ack_tsn = tsn; 1474 dbg_ack_tsn = tsn;
1473 break; 1475 break;
1474 1476
1475 case 1: /* The last TSN was NOT ACKed. */ 1477 case 1: /* The last TSN was NOT ACKed. */
1476 if (dbg_last_kept_tsn != dbg_kept_tsn) { 1478 if (dbg_last_kept_tsn != dbg_kept_tsn) {
1477 /* Display the end of current range. */ 1479 /* Display the end of current range. */
1478 SCTP_DEBUG_PRINTK("-%08x", 1480 SCTP_DEBUG_PRINTK_CONT("-%08x",
1479 dbg_last_kept_tsn); 1481 dbg_last_kept_tsn);
1480 } 1482 }
1481 1483
1482 SCTP_DEBUG_PRINTK("\n"); 1484 SCTP_DEBUG_PRINTK_CONT("\n");
1483 1485
1484 /* FALL THROUGH... */ 1486 /* FALL THROUGH... */
1485 default: 1487 default:
@@ -1526,18 +1528,18 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1526 break; 1528 break;
1527 1529
1528 if (dbg_last_kept_tsn != dbg_kept_tsn) 1530 if (dbg_last_kept_tsn != dbg_kept_tsn)
1529 SCTP_DEBUG_PRINTK("-%08x", 1531 SCTP_DEBUG_PRINTK_CONT("-%08x",
1530 dbg_last_kept_tsn); 1532 dbg_last_kept_tsn);
1531 1533
1532 SCTP_DEBUG_PRINTK(",%08x", tsn); 1534 SCTP_DEBUG_PRINTK_CONT(",%08x", tsn);
1533 dbg_kept_tsn = tsn; 1535 dbg_kept_tsn = tsn;
1534 break; 1536 break;
1535 1537
1536 case 0: 1538 case 0:
1537 if (dbg_last_ack_tsn != dbg_ack_tsn) 1539 if (dbg_last_ack_tsn != dbg_ack_tsn)
1538 SCTP_DEBUG_PRINTK("-%08x", 1540 SCTP_DEBUG_PRINTK_CONT("-%08x",
1539 dbg_last_ack_tsn); 1541 dbg_last_ack_tsn);
1540 SCTP_DEBUG_PRINTK("\n"); 1542 SCTP_DEBUG_PRINTK_CONT("\n");
1541 1543
1542 /* FALL THROUGH... */ 1544 /* FALL THROUGH... */
1543 default: 1545 default:
@@ -1556,17 +1558,17 @@ static void sctp_check_transmitted(struct sctp_outq *q,
1556 switch (dbg_prt_state) { 1558 switch (dbg_prt_state) {
1557 case 0: 1559 case 0:
1558 if (dbg_last_ack_tsn != dbg_ack_tsn) { 1560 if (dbg_last_ack_tsn != dbg_ack_tsn) {
1559 SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_ack_tsn); 1561 SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_ack_tsn);
1560 } else { 1562 } else {
1561 SCTP_DEBUG_PRINTK("\n"); 1563 SCTP_DEBUG_PRINTK_CONT("\n");
1562 } 1564 }
1563 break; 1565 break;
1564 1566
1565 case 1: 1567 case 1:
1566 if (dbg_last_kept_tsn != dbg_kept_tsn) { 1568 if (dbg_last_kept_tsn != dbg_kept_tsn) {
1567 SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_kept_tsn); 1569 SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_kept_tsn);
1568 } else { 1570 } else {
1569 SCTP_DEBUG_PRINTK("\n"); 1571 SCTP_DEBUG_PRINTK_CONT("\n");
1570 } 1572 }
1571 } 1573 }
1572#endif /* SCTP_DEBUG */ 1574#endif /* SCTP_DEBUG */
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
index 289b1ba62cac..bc6cd75cc1dc 100644
--- a/net/sctp/probe.c
+++ b/net/sctp/probe.c
@@ -22,6 +22,8 @@
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */ 23 */
24 24
25#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
26
25#include <linux/kernel.h> 27#include <linux/kernel.h>
26#include <linux/kprobes.h> 28#include <linux/kprobes.h>
27#include <linux/socket.h> 29#include <linux/socket.h>
@@ -193,7 +195,7 @@ static __init int sctpprobe_init(void)
193 if (ret) 195 if (ret)
194 goto remove_proc; 196 goto remove_proc;
195 197
196 pr_info("SCTP probe registered (port=%d)\n", port); 198 pr_info("probe registered (port=%d)\n", port);
197 199
198 return 0; 200 return 0;
199 201
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 5027b83f1cc0..1ef29c74d85e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/module.h> 51#include <linux/module.h>
50#include <linux/init.h> 52#include <linux/init.h>
51#include <linux/netdevice.h> 53#include <linux/netdevice.h>
@@ -707,8 +709,7 @@ static int sctp_ctl_sock_init(void)
707 &init_net); 709 &init_net);
708 710
709 if (err < 0) { 711 if (err < 0) {
710 printk(KERN_ERR 712 pr_err("Failed to create the SCTP control socket\n");
711 "SCTP: Failed to create the SCTP control socket.\n");
712 return err; 713 return err;
713 } 714 }
714 return 0; 715 return 0;
@@ -798,7 +799,7 @@ static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len)
798static int sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp) 799static int sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp)
799{ 800{
800 /* PF_INET only supports AF_INET addresses. */ 801 /* PF_INET only supports AF_INET addresses. */
801 return (AF_INET == family); 802 return AF_INET == family;
802} 803}
803 804
804/* Address matching with wildcards allowed. */ 805/* Address matching with wildcards allowed. */
@@ -1206,7 +1207,7 @@ SCTP_STATIC __init int sctp_init(void)
1206 __get_free_pages(GFP_ATOMIC, order); 1207 __get_free_pages(GFP_ATOMIC, order);
1207 } while (!sctp_assoc_hashtable && --order > 0); 1208 } while (!sctp_assoc_hashtable && --order > 0);
1208 if (!sctp_assoc_hashtable) { 1209 if (!sctp_assoc_hashtable) {
1209 printk(KERN_ERR "SCTP: Failed association hash alloc.\n"); 1210 pr_err("Failed association hash alloc\n");
1210 status = -ENOMEM; 1211 status = -ENOMEM;
1211 goto err_ahash_alloc; 1212 goto err_ahash_alloc;
1212 } 1213 }
@@ -1220,7 +1221,7 @@ SCTP_STATIC __init int sctp_init(void)
1220 sctp_ep_hashtable = (struct sctp_hashbucket *) 1221 sctp_ep_hashtable = (struct sctp_hashbucket *)
1221 kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL); 1222 kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL);
1222 if (!sctp_ep_hashtable) { 1223 if (!sctp_ep_hashtable) {
1223 printk(KERN_ERR "SCTP: Failed endpoint_hash alloc.\n"); 1224 pr_err("Failed endpoint_hash alloc\n");
1224 status = -ENOMEM; 1225 status = -ENOMEM;
1225 goto err_ehash_alloc; 1226 goto err_ehash_alloc;
1226 } 1227 }
@@ -1239,7 +1240,7 @@ SCTP_STATIC __init int sctp_init(void)
1239 __get_free_pages(GFP_ATOMIC, order); 1240 __get_free_pages(GFP_ATOMIC, order);
1240 } while (!sctp_port_hashtable && --order > 0); 1241 } while (!sctp_port_hashtable && --order > 0);
1241 if (!sctp_port_hashtable) { 1242 if (!sctp_port_hashtable) {
1242 printk(KERN_ERR "SCTP: Failed bind hash alloc."); 1243 pr_err("Failed bind hash alloc\n");
1243 status = -ENOMEM; 1244 status = -ENOMEM;
1244 goto err_bhash_alloc; 1245 goto err_bhash_alloc;
1245 } 1246 }
@@ -1248,8 +1249,7 @@ SCTP_STATIC __init int sctp_init(void)
1248 INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); 1249 INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain);
1249 } 1250 }
1250 1251
1251 printk(KERN_INFO "SCTP: Hash tables configured " 1252 pr_info("Hash tables configured (established %d bind %d)\n",
1252 "(established %d bind %d)\n",
1253 sctp_assoc_hashsize, sctp_port_hashsize); 1253 sctp_assoc_hashsize, sctp_port_hashsize);
1254 1254
1255 /* Disable ADDIP by default. */ 1255 /* Disable ADDIP by default. */
@@ -1290,8 +1290,7 @@ SCTP_STATIC __init int sctp_init(void)
1290 1290
1291 /* Initialize the control inode/socket for handling OOTB packets. */ 1291 /* Initialize the control inode/socket for handling OOTB packets. */
1292 if ((status = sctp_ctl_sock_init())) { 1292 if ((status = sctp_ctl_sock_init())) {
1293 printk (KERN_ERR 1293 pr_err("Failed to initialize the SCTP control sock\n");
1294 "SCTP: Failed to initialize the SCTP control sock.\n");
1295 goto err_ctl_sock_init; 1294 goto err_ctl_sock_init;
1296 } 1295 }
1297 1296
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 246f92924658..2cc46f0962ca 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -50,6 +50,8 @@
50 * be incorporated into the next SCTP release. 50 * be incorporated into the next SCTP release.
51 */ 51 */
52 52
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
54
53#include <linux/types.h> 55#include <linux/types.h>
54#include <linux/kernel.h> 56#include <linux/kernel.h>
55#include <linux/ip.h> 57#include <linux/ip.h>
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index f5e5e27cac5e..b21b218d564f 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -47,6 +47,8 @@
47 * be incorporated into the next SCTP release. 47 * be incorporated into the next SCTP release.
48 */ 48 */
49 49
50#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
51
50#include <linux/skbuff.h> 52#include <linux/skbuff.h>
51#include <linux/types.h> 53#include <linux/types.h>
52#include <linux/socket.h> 54#include <linux/socket.h>
@@ -1146,26 +1148,23 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype,
1146 1148
1147 case SCTP_DISPOSITION_VIOLATION: 1149 case SCTP_DISPOSITION_VIOLATION:
1148 if (net_ratelimit()) 1150 if (net_ratelimit())
1149 printk(KERN_ERR "sctp protocol violation state %d " 1151 pr_err("protocol violation state %d chunkid %d\n",
1150 "chunkid %d\n", state, subtype.chunk); 1152 state, subtype.chunk);
1151 break; 1153 break;
1152 1154
1153 case SCTP_DISPOSITION_NOT_IMPL: 1155 case SCTP_DISPOSITION_NOT_IMPL:
1154 printk(KERN_WARNING "sctp unimplemented feature in state %d, " 1156 pr_warn("unimplemented feature in state %d, event_type %d, event_id %d\n",
1155 "event_type %d, event_id %d\n", 1157 state, event_type, subtype.chunk);
1156 state, event_type, subtype.chunk);
1157 break; 1158 break;
1158 1159
1159 case SCTP_DISPOSITION_BUG: 1160 case SCTP_DISPOSITION_BUG:
1160 printk(KERN_ERR "sctp bug in state %d, " 1161 pr_err("bug in state %d, event_type %d, event_id %d\n",
1161 "event_type %d, event_id %d\n",
1162 state, event_type, subtype.chunk); 1162 state, event_type, subtype.chunk);
1163 BUG(); 1163 BUG();
1164 break; 1164 break;
1165 1165
1166 default: 1166 default:
1167 printk(KERN_ERR "sctp impossible disposition %d " 1167 pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n",
1168 "in state %d, event_type %d, event_id %d\n",
1169 status, state, event_type, subtype.chunk); 1168 status, state, event_type, subtype.chunk);
1170 BUG(); 1169 BUG();
1171 break; 1170 break;
@@ -1679,8 +1678,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
1679 sctp_cmd_send_asconf(asoc); 1678 sctp_cmd_send_asconf(asoc);
1680 break; 1679 break;
1681 default: 1680 default:
1682 printk(KERN_WARNING "Impossible command: %u, %p\n", 1681 pr_warn("Impossible command: %u, %p\n",
1683 cmd->verb, cmd->obj.ptr); 1682 cmd->verb, cmd->obj.ptr);
1684 break; 1683 break;
1685 } 1684 }
1686 1685
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d344dc481ccc..4b4eb7c96bbd 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -50,6 +50,8 @@
50 * be incorporated into the next SCTP release. 50 * be incorporated into the next SCTP release.
51 */ 51 */
52 52
53#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
54
53#include <linux/types.h> 55#include <linux/types.h>
54#include <linux/kernel.h> 56#include <linux/kernel.h>
55#include <linux/ip.h> 57#include <linux/ip.h>
@@ -1138,18 +1140,16 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep,
1138 if (unlikely(!link)) { 1140 if (unlikely(!link)) {
1139 if (from_addr.sa.sa_family == AF_INET6) { 1141 if (from_addr.sa.sa_family == AF_INET6) {
1140 if (net_ratelimit()) 1142 if (net_ratelimit())
1141 printk(KERN_WARNING 1143 pr_warn("%s association %p could not find address %pI6\n",
1142 "%s association %p could not find address %pI6\n", 1144 __func__,
1143 __func__, 1145 asoc,
1144 asoc, 1146 &from_addr.v6.sin6_addr);
1145 &from_addr.v6.sin6_addr);
1146 } else { 1147 } else {
1147 if (net_ratelimit()) 1148 if (net_ratelimit())
1148 printk(KERN_WARNING 1149 pr_warn("%s association %p could not find address %pI4\n",
1149 "%s association %p could not find address %pI4\n", 1150 __func__,
1150 __func__, 1151 asoc,
1151 asoc, 1152 &from_addr.v4.sin_addr.s_addr);
1152 &from_addr.v4.sin_addr.s_addr);
1153 } 1153 }
1154 return SCTP_DISPOSITION_DISCARD; 1154 return SCTP_DISPOSITION_DISCARD;
1155 } 1155 }
diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c
index 6d9b3aafcc5d..546d4387fb3c 100644
--- a/net/sctp/sm_statetable.c
+++ b/net/sctp/sm_statetable.c
@@ -46,6 +46,8 @@
46 * be incorporated into the next SCTP release. 46 * be incorporated into the next SCTP release.
47 */ 47 */
48 48
49#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
50
49#include <linux/skbuff.h> 51#include <linux/skbuff.h>
50#include <net/sctp/sctp.h> 52#include <net/sctp/sctp.h>
51#include <net/sctp/sm.h> 53#include <net/sctp/sm.h>
@@ -66,15 +68,19 @@ static const sctp_sm_table_entry_t bug = {
66 .name = "sctp_sf_bug" 68 .name = "sctp_sf_bug"
67}; 69};
68 70
69#define DO_LOOKUP(_max, _type, _table) \ 71#define DO_LOOKUP(_max, _type, _table) \
70 if ((event_subtype._type > (_max))) { \ 72({ \
71 printk(KERN_WARNING \ 73 const sctp_sm_table_entry_t *rtn; \
72 "sctp table %p possible attack:" \ 74 \
73 " event %d exceeds max %d\n", \ 75 if ((event_subtype._type > (_max))) { \
74 _table, event_subtype._type, _max); \ 76 pr_warn("table %p possible attack: event %d exceeds max %d\n", \
75 return &bug; \ 77 _table, event_subtype._type, _max); \
76 } \ 78 rtn = &bug; \
77 return &_table[event_subtype._type][(int)state]; 79 } else \
80 rtn = &_table[event_subtype._type][(int)state]; \
81 \
82 rtn; \
83})
78 84
79const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, 85const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
80 sctp_state_t state, 86 sctp_state_t state,
@@ -83,21 +89,15 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type,
83 switch (event_type) { 89 switch (event_type) {
84 case SCTP_EVENT_T_CHUNK: 90 case SCTP_EVENT_T_CHUNK:
85 return sctp_chunk_event_lookup(event_subtype.chunk, state); 91 return sctp_chunk_event_lookup(event_subtype.chunk, state);
86 break;
87 case SCTP_EVENT_T_TIMEOUT: 92 case SCTP_EVENT_T_TIMEOUT:
88 DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, 93 return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout,
89 timeout_event_table); 94 timeout_event_table);
90 break;
91
92 case SCTP_EVENT_T_OTHER: 95 case SCTP_EVENT_T_OTHER:
93 DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other, other_event_table); 96 return DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other,
94 break; 97 other_event_table);
95
96 case SCTP_EVENT_T_PRIMITIVE: 98 case SCTP_EVENT_T_PRIMITIVE:
97 DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive, 99 return DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive,
98 primitive_event_table); 100 primitive_event_table);
99 break;
100
101 default: 101 default:
102 /* Yikes! We got an illegal event type. */ 102 /* Yikes! We got an illegal event type. */
103 return &bug; 103 return &bug;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index fbb70770ad05..e34ca9cc1167 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -57,6 +57,8 @@
57 * be incorporated into the next SCTP release. 57 * be incorporated into the next SCTP release.
58 */ 58 */
59 59
60#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
61
60#include <linux/types.h> 62#include <linux/types.h>
61#include <linux/kernel.h> 63#include <linux/kernel.h>
62#include <linux/wait.h> 64#include <linux/wait.h>
@@ -2469,9 +2471,8 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk,
2469 if (params.sack_delay == 0 && params.sack_freq == 0) 2471 if (params.sack_delay == 0 && params.sack_freq == 0)
2470 return 0; 2472 return 0;
2471 } else if (optlen == sizeof(struct sctp_assoc_value)) { 2473 } else if (optlen == sizeof(struct sctp_assoc_value)) {
2472 printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " 2474 pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n");
2473 "in delayed_ack socket option deprecated\n"); 2475 pr_warn("Use struct sctp_sack_info instead\n");
2474 printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n");
2475 if (copy_from_user(&params, optval, optlen)) 2476 if (copy_from_user(&params, optval, optlen))
2476 return -EFAULT; 2477 return -EFAULT;
2477 2478
@@ -2879,10 +2880,8 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned
2879 int val; 2880 int val;
2880 2881
2881 if (optlen == sizeof(int)) { 2882 if (optlen == sizeof(int)) {
2882 printk(KERN_WARNING 2883 pr_warn("Use of int in maxseg socket option deprecated\n");
2883 "SCTP: Use of int in maxseg socket option deprecated\n"); 2884 pr_warn("Use struct sctp_assoc_value instead\n");
2884 printk(KERN_WARNING
2885 "SCTP: Use struct sctp_assoc_value instead\n");
2886 if (copy_from_user(&val, optval, optlen)) 2885 if (copy_from_user(&val, optval, optlen))
2887 return -EFAULT; 2886 return -EFAULT;
2888 params.assoc_id = 0; 2887 params.assoc_id = 0;
@@ -3132,10 +3131,8 @@ static int sctp_setsockopt_maxburst(struct sock *sk,
3132 int assoc_id = 0; 3131 int assoc_id = 0;
3133 3132
3134 if (optlen == sizeof(int)) { 3133 if (optlen == sizeof(int)) {
3135 printk(KERN_WARNING 3134 pr_warn("Use of int in max_burst socket option deprecated\n");
3136 "SCTP: Use of int in max_burst socket option deprecated\n"); 3135 pr_warn("Use struct sctp_assoc_value instead\n");
3137 printk(KERN_WARNING
3138 "SCTP: Use struct sctp_assoc_value instead\n");
3139 if (copy_from_user(&val, optval, optlen)) 3136 if (copy_from_user(&val, optval, optlen))
3140 return -EFAULT; 3137 return -EFAULT;
3141 } else if (optlen == sizeof(struct sctp_assoc_value)) { 3138 } else if (optlen == sizeof(struct sctp_assoc_value)) {
@@ -3606,7 +3603,40 @@ out:
3606/* The SCTP ioctl handler. */ 3603/* The SCTP ioctl handler. */
3607SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) 3604SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
3608{ 3605{
3609 return -ENOIOCTLCMD; 3606 int rc = -ENOTCONN;
3607
3608 sctp_lock_sock(sk);
3609
3610 /*
3611 * SEQPACKET-style sockets in LISTENING state are valid, for
3612 * SCTP, so only discard TCP-style sockets in LISTENING state.
3613 */
3614 if (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))
3615 goto out;
3616
3617 switch (cmd) {
3618 case SIOCINQ: {
3619 struct sk_buff *skb;
3620 unsigned int amount = 0;
3621
3622 skb = skb_peek(&sk->sk_receive_queue);
3623 if (skb != NULL) {
3624 /*
3625 * We will only return the amount of this packet since
3626 * that is all that will be read.
3627 */
3628 amount = skb->len;
3629 }
3630 rc = put_user(amount, (int __user *)arg);
3631 break;
3632 }
3633 default:
3634 rc = -ENOIOCTLCMD;
3635 break;
3636 }
3637out:
3638 sctp_release_sock(sk);
3639 return rc;
3610} 3640}
3611 3641
3612/* This is the function which gets called during socket creation to 3642/* This is the function which gets called during socket creation to
@@ -3865,7 +3895,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len,
3865 } 3895 }
3866 3896
3867out: 3897out:
3868 return (retval); 3898 return retval;
3869} 3899}
3870 3900
3871 3901
@@ -3921,7 +3951,7 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len,
3921 } 3951 }
3922 3952
3923out: 3953out:
3924 return (retval); 3954 return retval;
3925} 3955}
3926 3956
3927/* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS) 3957/* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS)
@@ -4292,9 +4322,8 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len,
4292 if (copy_from_user(&params, optval, len)) 4322 if (copy_from_user(&params, optval, len))
4293 return -EFAULT; 4323 return -EFAULT;
4294 } else if (len == sizeof(struct sctp_assoc_value)) { 4324 } else if (len == sizeof(struct sctp_assoc_value)) {
4295 printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " 4325 pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n");
4296 "in delayed_ack socket option deprecated\n"); 4326 pr_warn("Use struct sctp_sack_info instead\n");
4297 printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n");
4298 if (copy_from_user(&params, optval, len)) 4327 if (copy_from_user(&params, optval, len))
4299 return -EFAULT; 4328 return -EFAULT;
4300 } else 4329 } else
@@ -4940,10 +4969,8 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len,
4940 struct sctp_association *asoc; 4969 struct sctp_association *asoc;
4941 4970
4942 if (len == sizeof(int)) { 4971 if (len == sizeof(int)) {
4943 printk(KERN_WARNING 4972 pr_warn("Use of int in maxseg socket option deprecated\n");
4944 "SCTP: Use of int in maxseg socket option deprecated\n"); 4973 pr_warn("Use struct sctp_assoc_value instead\n");
4945 printk(KERN_WARNING
4946 "SCTP: Use struct sctp_assoc_value instead\n");
4947 params.assoc_id = 0; 4974 params.assoc_id = 0;
4948 } else if (len >= sizeof(struct sctp_assoc_value)) { 4975 } else if (len >= sizeof(struct sctp_assoc_value)) {
4949 len = sizeof(struct sctp_assoc_value); 4976 len = sizeof(struct sctp_assoc_value);
@@ -5034,10 +5061,8 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len,
5034 struct sctp_association *asoc; 5061 struct sctp_association *asoc;
5035 5062
5036 if (len == sizeof(int)) { 5063 if (len == sizeof(int)) {
5037 printk(KERN_WARNING 5064 pr_warn("Use of int in max_burst socket option deprecated\n");
5038 "SCTP: Use of int in max_burst socket option deprecated\n"); 5065 pr_warn("Use struct sctp_assoc_value instead\n");
5039 printk(KERN_WARNING
5040 "SCTP: Use struct sctp_assoc_value instead\n");
5041 params.assoc_id = 0; 5066 params.assoc_id = 0;
5042 } else if (len >= sizeof(struct sctp_assoc_value)) { 5067 } else if (len >= sizeof(struct sctp_assoc_value)) {
5043 len = sizeof(struct sctp_assoc_value); 5068 len = sizeof(struct sctp_assoc_value);
@@ -5580,7 +5605,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum)
5580 /* Note: sk->sk_num gets filled in if ephemeral port request. */ 5605 /* Note: sk->sk_num gets filled in if ephemeral port request. */
5581 ret = sctp_get_port_local(sk, &addr); 5606 ret = sctp_get_port_local(sk, &addr);
5582 5607
5583 return (ret ? 1 : 0); 5608 return ret ? 1 : 0;
5584} 5609}
5585 5610
5586/* 5611/*
@@ -5597,8 +5622,7 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog)
5597 tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); 5622 tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC);
5598 if (IS_ERR(tfm)) { 5623 if (IS_ERR(tfm)) {
5599 if (net_ratelimit()) { 5624 if (net_ratelimit()) {
5600 printk(KERN_INFO 5625 pr_info("failed to load transform for %s: %ld\n",
5601 "SCTP: failed to load transform for %s: %ld\n",
5602 sctp_hmac_alg, PTR_ERR(tfm)); 5626 sctp_hmac_alg, PTR_ERR(tfm));
5603 } 5627 }
5604 return -ENOSYS; 5628 return -ENOSYS;
@@ -5727,13 +5751,12 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
5727 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) 5751 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
5728 mask |= POLLERR; 5752 mask |= POLLERR;
5729 if (sk->sk_shutdown & RCV_SHUTDOWN) 5753 if (sk->sk_shutdown & RCV_SHUTDOWN)
5730 mask |= POLLRDHUP; 5754 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
5731 if (sk->sk_shutdown == SHUTDOWN_MASK) 5755 if (sk->sk_shutdown == SHUTDOWN_MASK)
5732 mask |= POLLHUP; 5756 mask |= POLLHUP;
5733 5757
5734 /* Is it readable? Reconsider this code with TCP-style support. */ 5758 /* Is it readable? Reconsider this code with TCP-style support. */
5735 if (!skb_queue_empty(&sk->sk_receive_queue) || 5759 if (!skb_queue_empty(&sk->sk_receive_queue))
5736 (sk->sk_shutdown & RCV_SHUTDOWN))
5737 mask |= POLLIN | POLLRDNORM; 5760 mask |= POLLIN | POLLRDNORM;
5738 5761
5739 /* The association is either gone or not ready. */ 5762 /* The association is either gone or not ready. */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 132046cb82fc..d3ae493d234a 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -48,6 +48,8 @@
48 * be incorporated into the next SCTP release. 48 * be incorporated into the next SCTP release.
49 */ 49 */
50 50
51#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
52
51#include <linux/slab.h> 53#include <linux/slab.h>
52#include <linux/types.h> 54#include <linux/types.h>
53#include <linux/random.h> 55#include <linux/random.h>
@@ -244,10 +246,9 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
244 struct dst_entry *dst; 246 struct dst_entry *dst;
245 247
246 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { 248 if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
247 printk(KERN_WARNING "%s: Reported pmtu %d too low, " 249 pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
248 "using default minimum of %d\n", 250 __func__, pmtu,
249 __func__, pmtu, 251 SCTP_DEFAULT_MINSEGMENT);
250 SCTP_DEFAULT_MINSEGMENT);
251 /* Use default minimum segment size and disable 252 /* Use default minimum segment size and disable
252 * pmtu discovery on this transport. 253 * pmtu discovery on this transport.
253 */ 254 */
diff --git a/net/socket.c b/net/socket.c
index 9eac5c394134..abf3e2561521 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -209,8 +209,8 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr)
209 * specified. Zero is returned for a success. 209 * specified. Zero is returned for a success.
210 */ 210 */
211 211
212int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, 212static int move_addr_to_user(struct sockaddr *kaddr, int klen,
213 int __user *ulen) 213 void __user *uaddr, int __user *ulen)
214{ 214{
215 int err; 215 int err;
216 int len; 216 int len;
@@ -536,14 +536,13 @@ void sock_release(struct socket *sock)
536} 536}
537EXPORT_SYMBOL(sock_release); 537EXPORT_SYMBOL(sock_release);
538 538
539int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, 539int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
540 union skb_shared_tx *shtx)
541{ 540{
542 shtx->flags = 0; 541 *tx_flags = 0;
543 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) 542 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
544 shtx->hardware = 1; 543 *tx_flags |= SKBTX_HW_TSTAMP;
545 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) 544 if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
546 shtx->software = 1; 545 *tx_flags |= SKBTX_SW_TSTAMP;
547 return 0; 546 return 0;
548} 547}
549EXPORT_SYMBOL(sock_tx_timestamp); 548EXPORT_SYMBOL(sock_tx_timestamp);
@@ -663,7 +662,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
663} 662}
664EXPORT_SYMBOL_GPL(__sock_recv_timestamp); 663EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
665 664
666inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) 665static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
666 struct sk_buff *skb)
667{ 667{
668 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) 668 if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
669 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, 669 put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
@@ -1920,7 +1920,8 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags)
1920 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted 1920 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
1921 * checking falls down on this. 1921 * checking falls down on this.
1922 */ 1922 */
1923 if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, 1923 if (copy_from_user(ctl_buf,
1924 (void __user __force *)msg_sys.msg_control,
1924 ctl_len)) 1925 ctl_len))
1925 goto out_freectl; 1926 goto out_freectl;
1926 msg_sys.msg_control = ctl_buf; 1927 msg_sys.msg_control = ctl_buf;
@@ -3055,14 +3056,19 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
3055 char *optval, int *optlen) 3056 char *optval, int *optlen)
3056{ 3057{
3057 mm_segment_t oldfs = get_fs(); 3058 mm_segment_t oldfs = get_fs();
3059 char __user *uoptval;
3060 int __user *uoptlen;
3058 int err; 3061 int err;
3059 3062
3063 uoptval = (char __user __force *) optval;
3064 uoptlen = (int __user __force *) optlen;
3065
3060 set_fs(KERNEL_DS); 3066 set_fs(KERNEL_DS);
3061 if (level == SOL_SOCKET) 3067 if (level == SOL_SOCKET)
3062 err = sock_getsockopt(sock, level, optname, optval, optlen); 3068 err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
3063 else 3069 else
3064 err = sock->ops->getsockopt(sock, level, optname, optval, 3070 err = sock->ops->getsockopt(sock, level, optname, uoptval,
3065 optlen); 3071 uoptlen);
3066 set_fs(oldfs); 3072 set_fs(oldfs);
3067 return err; 3073 return err;
3068} 3074}
@@ -3072,13 +3078,16 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
3072 char *optval, unsigned int optlen) 3078 char *optval, unsigned int optlen)
3073{ 3079{
3074 mm_segment_t oldfs = get_fs(); 3080 mm_segment_t oldfs = get_fs();
3081 char __user *uoptval;
3075 int err; 3082 int err;
3076 3083
3084 uoptval = (char __user __force *) optval;
3085
3077 set_fs(KERNEL_DS); 3086 set_fs(KERNEL_DS);
3078 if (level == SOL_SOCKET) 3087 if (level == SOL_SOCKET)
3079 err = sock_setsockopt(sock, level, optname, optval, optlen); 3088 err = sock_setsockopt(sock, level, optname, uoptval, optlen);
3080 else 3089 else
3081 err = sock->ops->setsockopt(sock, level, optname, optval, 3090 err = sock->ops->setsockopt(sock, level, optname, uoptval,
3082 optlen); 3091 optlen);
3083 set_fs(oldfs); 3092 set_fs(oldfs);
3084 return err; 3093 return err;
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 12c485982814..3835ce35e224 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1050,7 +1050,7 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags)
1050out: 1050out:
1051 if (acred->machine_cred != gss_cred->gc_machine_cred) 1051 if (acred->machine_cred != gss_cred->gc_machine_cred)
1052 return 0; 1052 return 0;
1053 return (rc->cr_uid == acred->uid); 1053 return rc->cr_uid == acred->uid;
1054} 1054}
1055 1055
1056/* 1056/*
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index 310b78e99456..c586e92bcf76 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -76,19 +76,19 @@ static int
76der_length_size( int length) 76der_length_size( int length)
77{ 77{
78 if (length < (1<<7)) 78 if (length < (1<<7))
79 return(1); 79 return 1;
80 else if (length < (1<<8)) 80 else if (length < (1<<8))
81 return(2); 81 return 2;
82#if (SIZEOF_INT == 2) 82#if (SIZEOF_INT == 2)
83 else 83 else
84 return(3); 84 return 3;
85#else 85#else
86 else if (length < (1<<16)) 86 else if (length < (1<<16))
87 return(3); 87 return 3;
88 else if (length < (1<<24)) 88 else if (length < (1<<24))
89 return(4); 89 return 4;
90 else 90 else
91 return(5); 91 return 5;
92#endif 92#endif
93} 93}
94 94
@@ -121,14 +121,14 @@ der_read_length(unsigned char **buf, int *bufsize)
121 int ret; 121 int ret;
122 122
123 if (*bufsize < 1) 123 if (*bufsize < 1)
124 return(-1); 124 return -1;
125 sf = *(*buf)++; 125 sf = *(*buf)++;
126 (*bufsize)--; 126 (*bufsize)--;
127 if (sf & 0x80) { 127 if (sf & 0x80) {
128 if ((sf &= 0x7f) > ((*bufsize)-1)) 128 if ((sf &= 0x7f) > ((*bufsize)-1))
129 return(-1); 129 return -1;
130 if (sf > SIZEOF_INT) 130 if (sf > SIZEOF_INT)
131 return (-1); 131 return -1;
132 ret = 0; 132 ret = 0;
133 for (; sf; sf--) { 133 for (; sf; sf--) {
134 ret = (ret<<8) + (*(*buf)++); 134 ret = (ret<<8) + (*(*buf)++);
@@ -138,7 +138,7 @@ der_read_length(unsigned char **buf, int *bufsize)
138 ret = sf; 138 ret = sf;
139 } 139 }
140 140
141 return(ret); 141 return ret;
142} 142}
143 143
144/* returns the length of a token, given the mech oid and the body size */ 144/* returns the length of a token, given the mech oid and the body size */
@@ -148,7 +148,7 @@ g_token_size(struct xdr_netobj *mech, unsigned int body_size)
148{ 148{
149 /* set body_size to sequence contents size */ 149 /* set body_size to sequence contents size */
150 body_size += 2 + (int) mech->len; /* NEED overflow check */ 150 body_size += 2 + (int) mech->len; /* NEED overflow check */
151 return(1 + der_length_size(body_size) + body_size); 151 return 1 + der_length_size(body_size) + body_size;
152} 152}
153 153
154EXPORT_SYMBOL_GPL(g_token_size); 154EXPORT_SYMBOL_GPL(g_token_size);
@@ -186,27 +186,27 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
186 int ret = 0; 186 int ret = 0;
187 187
188 if ((toksize-=1) < 0) 188 if ((toksize-=1) < 0)
189 return(G_BAD_TOK_HEADER); 189 return G_BAD_TOK_HEADER;
190 if (*buf++ != 0x60) 190 if (*buf++ != 0x60)
191 return(G_BAD_TOK_HEADER); 191 return G_BAD_TOK_HEADER;
192 192
193 if ((seqsize = der_read_length(&buf, &toksize)) < 0) 193 if ((seqsize = der_read_length(&buf, &toksize)) < 0)
194 return(G_BAD_TOK_HEADER); 194 return G_BAD_TOK_HEADER;
195 195
196 if (seqsize != toksize) 196 if (seqsize != toksize)
197 return(G_BAD_TOK_HEADER); 197 return G_BAD_TOK_HEADER;
198 198
199 if ((toksize-=1) < 0) 199 if ((toksize-=1) < 0)
200 return(G_BAD_TOK_HEADER); 200 return G_BAD_TOK_HEADER;
201 if (*buf++ != 0x06) 201 if (*buf++ != 0x06)
202 return(G_BAD_TOK_HEADER); 202 return G_BAD_TOK_HEADER;
203 203
204 if ((toksize-=1) < 0) 204 if ((toksize-=1) < 0)
205 return(G_BAD_TOK_HEADER); 205 return G_BAD_TOK_HEADER;
206 toid.len = *buf++; 206 toid.len = *buf++;
207 207
208 if ((toksize-=toid.len) < 0) 208 if ((toksize-=toid.len) < 0)
209 return(G_BAD_TOK_HEADER); 209 return G_BAD_TOK_HEADER;
210 toid.data = buf; 210 toid.data = buf;
211 buf+=toid.len; 211 buf+=toid.len;
212 212
@@ -217,17 +217,17 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size,
217 to return G_BAD_TOK_HEADER if the token header is in fact bad */ 217 to return G_BAD_TOK_HEADER if the token header is in fact bad */
218 218
219 if ((toksize-=2) < 0) 219 if ((toksize-=2) < 0)
220 return(G_BAD_TOK_HEADER); 220 return G_BAD_TOK_HEADER;
221 221
222 if (ret) 222 if (ret)
223 return(ret); 223 return ret;
224 224
225 if (!ret) { 225 if (!ret) {
226 *buf_in = buf; 226 *buf_in = buf;
227 *body_size = toksize; 227 *body_size = toksize;
228 } 228 }
229 229
230 return(ret); 230 return ret;
231} 231}
232 232
233EXPORT_SYMBOL_GPL(g_verify_token_header); 233EXPORT_SYMBOL_GPL(g_verify_token_header);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 415c013ba382..62ac90c62cb1 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -162,5 +162,5 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
162 *seqnum = ((plain[0]) | 162 *seqnum = ((plain[0]) |
163 (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24)); 163 (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24));
164 164
165 return (0); 165 return 0;
166} 166}
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 2689de39dc78..8b4061049d76 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -331,7 +331,7 @@ gss_delete_sec_context(struct gss_ctx **context_handle)
331 *context_handle); 331 *context_handle);
332 332
333 if (!*context_handle) 333 if (!*context_handle)
334 return(GSS_S_NO_CONTEXT); 334 return GSS_S_NO_CONTEXT;
335 if ((*context_handle)->internal_ctx_id) 335 if ((*context_handle)->internal_ctx_id)
336 (*context_handle)->mech_type->gm_ops 336 (*context_handle)->mech_type->gm_ops
337 ->gss_delete_sec_context((*context_handle) 337 ->gss_delete_sec_context((*context_handle)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cace6049e4a5..aa5dbda6608c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -376,7 +376,7 @@ int rpc_queue_empty(struct rpc_wait_queue *queue)
376 spin_lock_bh(&queue->lock); 376 spin_lock_bh(&queue->lock);
377 res = queue->qlen; 377 res = queue->qlen;
378 spin_unlock_bh(&queue->lock); 378 spin_unlock_bh(&queue->lock);
379 return (res == 0); 379 return res == 0;
380} 380}
381EXPORT_SYMBOL_GPL(rpc_queue_empty); 381EXPORT_SYMBOL_GPL(rpc_queue_empty);
382 382
diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index c048543ffbeb..8a2e89bffde5 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -41,11 +41,6 @@
41#include "cluster.h" 41#include "cluster.h"
42#include "net.h" 42#include "net.h"
43 43
44u32 tipc_get_addr(void)
45{
46 return tipc_own_addr;
47}
48
49/** 44/**
50 * tipc_addr_domain_valid - validates a network domain address 45 * tipc_addr_domain_valid - validates a network domain address
51 * 46 *
@@ -89,7 +84,7 @@ int tipc_addr_domain_valid(u32 addr)
89 84
90int tipc_addr_node_valid(u32 addr) 85int tipc_addr_node_valid(u32 addr)
91{ 86{
92 return (tipc_addr_domain_valid(addr) && tipc_node(addr)); 87 return tipc_addr_domain_valid(addr) && tipc_node(addr);
93} 88}
94 89
95int tipc_in_scope(u32 domain, u32 addr) 90int tipc_in_scope(u32 domain, u32 addr)
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a008c6689305..22a60fc98392 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -121,6 +121,9 @@ static DEFINE_SPINLOCK(bc_lock);
121 121
122const char tipc_bclink_name[] = "broadcast-link"; 122const char tipc_bclink_name[] = "broadcast-link";
123 123
124static void tipc_nmap_diff(struct tipc_node_map *nm_a,
125 struct tipc_node_map *nm_b,
126 struct tipc_node_map *nm_diff);
124 127
125static u32 buf_seqno(struct sk_buff *buf) 128static u32 buf_seqno(struct sk_buff *buf)
126{ 129{
@@ -143,6 +146,19 @@ static void bcbuf_decr_acks(struct sk_buff *buf)
143} 146}
144 147
145 148
149static void bclink_set_last_sent(void)
150{
151 if (bcl->next_out)
152 bcl->fsm_msg_cnt = mod(buf_seqno(bcl->next_out) - 1);
153 else
154 bcl->fsm_msg_cnt = mod(bcl->next_out_no - 1);
155}
156
157u32 tipc_bclink_get_last_sent(void)
158{
159 return bcl->fsm_msg_cnt;
160}
161
146/** 162/**
147 * bclink_set_gap - set gap according to contents of current deferred pkt queue 163 * bclink_set_gap - set gap according to contents of current deferred pkt queue
148 * 164 *
@@ -171,7 +187,7 @@ static void bclink_set_gap(struct tipc_node *n_ptr)
171 187
172static int bclink_ack_allowed(u32 n) 188static int bclink_ack_allowed(u32 n)
173{ 189{
174 return((n % TIPC_MIN_LINK_WIN) == tipc_own_tag); 190 return (n % TIPC_MIN_LINK_WIN) == tipc_own_tag;
175} 191}
176 192
177 193
@@ -237,8 +253,10 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked)
237 253
238 /* Try resolving broadcast link congestion, if necessary */ 254 /* Try resolving broadcast link congestion, if necessary */
239 255
240 if (unlikely(bcl->next_out)) 256 if (unlikely(bcl->next_out)) {
241 tipc_link_push_queue(bcl); 257 tipc_link_push_queue(bcl);
258 bclink_set_last_sent();
259 }
242 if (unlikely(released && !list_empty(&bcl->waiting_ports))) 260 if (unlikely(released && !list_empty(&bcl->waiting_ports)))
243 tipc_link_wakeup_ports(bcl, 0); 261 tipc_link_wakeup_ports(bcl, 0);
244 spin_unlock_bh(&bc_lock); 262 spin_unlock_bh(&bc_lock);
@@ -272,7 +290,7 @@ static void bclink_send_nack(struct tipc_node *n_ptr)
272 if (!less(n_ptr->bclink.gap_after, n_ptr->bclink.gap_to)) 290 if (!less(n_ptr->bclink.gap_after, n_ptr->bclink.gap_to))
273 return; 291 return;
274 292
275 buf = buf_acquire(INT_H_SIZE); 293 buf = tipc_buf_acquire(INT_H_SIZE);
276 if (buf) { 294 if (buf) {
277 msg = buf_msg(buf); 295 msg = buf_msg(buf);
278 tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, 296 tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
@@ -395,7 +413,7 @@ int tipc_bclink_send_msg(struct sk_buff *buf)
395 if (unlikely(res == -ELINKCONG)) 413 if (unlikely(res == -ELINKCONG))
396 buf_discard(buf); 414 buf_discard(buf);
397 else 415 else
398 bcl->stats.sent_info++; 416 bclink_set_last_sent();
399 417
400 if (bcl->out_queue_size > bcl->stats.max_queue_sz) 418 if (bcl->out_queue_size > bcl->stats.max_queue_sz)
401 bcl->stats.max_queue_sz = bcl->out_queue_size; 419 bcl->stats.max_queue_sz = bcl->out_queue_size;
@@ -529,15 +547,6 @@ receive:
529 tipc_node_unlock(node); 547 tipc_node_unlock(node);
530} 548}
531 549
532u32 tipc_bclink_get_last_sent(void)
533{
534 u32 last_sent = mod(bcl->next_out_no - 1);
535
536 if (bcl->next_out)
537 last_sent = mod(buf_seqno(bcl->next_out) - 1);
538 return last_sent;
539}
540
541u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) 550u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr)
542{ 551{
543 return (n_ptr->bclink.supported && 552 return (n_ptr->bclink.supported &&
@@ -570,6 +579,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
570 msg = buf_msg(buf); 579 msg = buf_msg(buf);
571 msg_set_non_seq(msg, 1); 580 msg_set_non_seq(msg, 1);
572 msg_set_mc_netid(msg, tipc_net_id); 581 msg_set_mc_netid(msg, tipc_net_id);
582 bcl->stats.sent_info++;
573 } 583 }
574 584
575 /* Send buffer over bearers until all targets reached */ 585 /* Send buffer over bearers until all targets reached */
@@ -609,11 +619,13 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
609 bcbearer->remains = bcbearer->remains_new; 619 bcbearer->remains = bcbearer->remains_new;
610 } 620 }
611 621
612 /* Unable to reach all targets */ 622 /*
623 * Unable to reach all targets (indicate success, since currently
624 * there isn't code in place to properly block & unblock the
625 * pseudo-bearer used by the broadcast link)
626 */
613 627
614 bcbearer->bearer.publ.blocked = 1; 628 return TIPC_OK;
615 bcl->stats.bearer_congs++;
616 return 1;
617} 629}
618 630
619/** 631/**
@@ -862,8 +874,9 @@ void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
862 * @nm_diff: output node map A-B (i.e. nodes of A that are not in B) 874 * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
863 */ 875 */
864 876
865void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b, 877static void tipc_nmap_diff(struct tipc_node_map *nm_a,
866 struct tipc_node_map *nm_diff) 878 struct tipc_node_map *nm_b,
879 struct tipc_node_map *nm_diff)
867{ 880{
868 int stop = ARRAY_SIZE(nm_a->map); 881 int stop = ARRAY_SIZE(nm_a->map);
869 int w; 882 int w;
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index e8c2b81658c7..011c03f0a4ab 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -84,9 +84,6 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m
84 return !memcmp(nm_a, nm_b, sizeof(*nm_a)); 84 return !memcmp(nm_a, nm_b, sizeof(*nm_a));
85} 85}
86 86
87void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
88 struct tipc_node_map *nm_diff);
89
90void tipc_port_list_add(struct port_list *pl_ptr, u32 port); 87void tipc_port_list_add(struct port_list *pl_ptr, u32 port);
91void tipc_port_list_free(struct port_list *pl_ptr); 88void tipc_port_list_free(struct port_list *pl_ptr);
92 89
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 52ae17b2583e..9927d1d56c4f 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -63,7 +63,7 @@ static int media_name_valid(const char *name)
63 len = strlen(name); 63 len = strlen(name);
64 if ((len + 1) > TIPC_MAX_MEDIA_NAME) 64 if ((len + 1) > TIPC_MAX_MEDIA_NAME)
65 return 0; 65 return 0;
66 return (strspn(name, tipc_alphabet) == len); 66 return strspn(name, tipc_alphabet) == len;
67} 67}
68 68
69/** 69/**
@@ -288,9 +288,6 @@ static struct bearer *bearer_find(const char *name)
288 struct bearer *b_ptr; 288 struct bearer *b_ptr;
289 u32 i; 289 u32 i;
290 290
291 if (tipc_mode != TIPC_NET_MODE)
292 return NULL;
293
294 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) { 291 for (i = 0, b_ptr = tipc_bearers; i < MAX_BEARERS; i++, b_ptr++) {
295 if (b_ptr->active && (!strcmp(b_ptr->publ.name, name))) 292 if (b_ptr->active && (!strcmp(b_ptr->publ.name, name)))
296 return b_ptr; 293 return b_ptr;
@@ -559,8 +556,6 @@ restart:
559 } 556 }
560 557
561 b_ptr = &tipc_bearers[bearer_id]; 558 b_ptr = &tipc_bearers[bearer_id];
562 memset(b_ptr, 0, sizeof(struct bearer));
563
564 strcpy(b_ptr->publ.name, name); 559 strcpy(b_ptr->publ.name, name);
565 res = m_ptr->enable_bearer(&b_ptr->publ); 560 res = m_ptr->enable_bearer(&b_ptr->publ);
566 if (res) { 561 if (res) {
@@ -630,30 +625,17 @@ int tipc_block_bearer(const char *name)
630 * Note: This routine assumes caller holds tipc_net_lock. 625 * Note: This routine assumes caller holds tipc_net_lock.
631 */ 626 */
632 627
633static int bearer_disable(const char *name) 628static int bearer_disable(struct bearer *b_ptr)
634{ 629{
635 struct bearer *b_ptr;
636 struct link *l_ptr; 630 struct link *l_ptr;
637 struct link *temp_l_ptr; 631 struct link *temp_l_ptr;
638 632
639 b_ptr = bearer_find(name); 633 info("Disabling bearer <%s>\n", b_ptr->publ.name);
640 if (!b_ptr) {
641 warn("Attempt to disable unknown bearer <%s>\n", name);
642 return -EINVAL;
643 }
644
645 info("Disabling bearer <%s>\n", name);
646 tipc_disc_stop_link_req(b_ptr->link_req); 634 tipc_disc_stop_link_req(b_ptr->link_req);
647 spin_lock_bh(&b_ptr->publ.lock); 635 spin_lock_bh(&b_ptr->publ.lock);
648 b_ptr->link_req = NULL; 636 b_ptr->link_req = NULL;
649 b_ptr->publ.blocked = 1; 637 b_ptr->publ.blocked = 1;
650 if (b_ptr->media->disable_bearer) { 638 b_ptr->media->disable_bearer(&b_ptr->publ);
651 spin_unlock_bh(&b_ptr->publ.lock);
652 write_unlock_bh(&tipc_net_lock);
653 b_ptr->media->disable_bearer(&b_ptr->publ);
654 write_lock_bh(&tipc_net_lock);
655 spin_lock_bh(&b_ptr->publ.lock);
656 }
657 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) { 639 list_for_each_entry_safe(l_ptr, temp_l_ptr, &b_ptr->links, link_list) {
658 tipc_link_delete(l_ptr); 640 tipc_link_delete(l_ptr);
659 } 641 }
@@ -664,10 +646,16 @@ static int bearer_disable(const char *name)
664 646
665int tipc_disable_bearer(const char *name) 647int tipc_disable_bearer(const char *name)
666{ 648{
649 struct bearer *b_ptr;
667 int res; 650 int res;
668 651
669 write_lock_bh(&tipc_net_lock); 652 write_lock_bh(&tipc_net_lock);
670 res = bearer_disable(name); 653 b_ptr = bearer_find(name);
654 if (b_ptr == NULL) {
655 warn("Attempt to disable unknown bearer <%s>\n", name);
656 res = -EINVAL;
657 } else
658 res = bearer_disable(b_ptr);
671 write_unlock_bh(&tipc_net_lock); 659 write_unlock_bh(&tipc_net_lock);
672 return res; 660 return res;
673} 661}
@@ -680,13 +668,7 @@ void tipc_bearer_stop(void)
680 668
681 for (i = 0; i < MAX_BEARERS; i++) { 669 for (i = 0; i < MAX_BEARERS; i++) {
682 if (tipc_bearers[i].active) 670 if (tipc_bearers[i].active)
683 tipc_bearers[i].publ.blocked = 1; 671 bearer_disable(&tipc_bearers[i]);
684 }
685 for (i = 0; i < MAX_BEARERS; i++) {
686 if (tipc_bearers[i].active)
687 bearer_disable(tipc_bearers[i].publ.name);
688 } 672 }
689 media_count = 0; 673 media_count = 0;
690} 674}
691
692
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index e68f705381bc..7fea14b98b97 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -113,25 +113,6 @@ void tipc_cltr_delete(struct cluster *c_ptr)
113 kfree(c_ptr); 113 kfree(c_ptr);
114} 114}
115 115
116u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr)
117{
118 struct tipc_node *n_ptr;
119 u32 n_num = tipc_node(addr) + 1;
120
121 if (!c_ptr)
122 return addr;
123 for (; n_num <= c_ptr->highest_node; n_num++) {
124 n_ptr = c_ptr->nodes[n_num];
125 if (n_ptr && tipc_node_has_active_links(n_ptr))
126 return n_ptr->addr;
127 }
128 for (n_num = 1; n_num < tipc_node(addr); n_num++) {
129 n_ptr = c_ptr->nodes[n_num];
130 if (n_ptr && tipc_node_has_active_links(n_ptr))
131 return n_ptr->addr;
132 }
133 return 0;
134}
135 116
136void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr) 117void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr)
137{ 118{
@@ -232,7 +213,7 @@ struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector)
232static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest) 213static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest)
233{ 214{
234 u32 size = INT_H_SIZE + data_size; 215 u32 size = INT_H_SIZE + data_size;
235 struct sk_buff *buf = buf_acquire(size); 216 struct sk_buff *buf = tipc_buf_acquire(size);
236 struct tipc_msg *msg; 217 struct tipc_msg *msg;
237 218
238 if (buf) { 219 if (buf) {
diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h
index 333efb0b9c44..32636d98c9c6 100644
--- a/net/tipc/cluster.h
+++ b/net/tipc/cluster.h
@@ -75,7 +75,7 @@ void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr);
75void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest); 75void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest);
76void tipc_cltr_broadcast(struct sk_buff *buf); 76void tipc_cltr_broadcast(struct sk_buff *buf);
77int tipc_cltr_init(void); 77int tipc_cltr_init(void);
78u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr); 78
79void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi); 79void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
80void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest); 80void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest);
81void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi); 81void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi);
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 961d1b097146..50a6133a3668 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -95,7 +95,7 @@ int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
95 return 1; 95 return 1;
96} 96}
97 97
98struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value) 98static struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value)
99{ 99{
100 struct sk_buff *buf; 100 struct sk_buff *buf;
101 __be32 value_net; 101 __be32 value_net;
@@ -109,6 +109,11 @@ struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value)
109 return buf; 109 return buf;
110} 110}
111 111
112static struct sk_buff *tipc_cfg_reply_unsigned(u32 value)
113{
114 return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value);
115}
116
112struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string) 117struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
113{ 118{
114 struct sk_buff *buf; 119 struct sk_buff *buf;
@@ -120,139 +125,6 @@ struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
120 return buf; 125 return buf;
121} 126}
122 127
123
124#if 0
125
126/* Now obsolete code for handling commands not yet implemented the new way */
127
128/*
129 * Some of this code assumed that the manager structure contains two added
130 * fields:
131 * u32 link_subscriptions;
132 * struct list_head link_subscribers;
133 * which are currently not present. These fields may need to be re-introduced
134 * if and when support for link subscriptions is added.
135 */
136
137void tipc_cfg_link_event(u32 addr, char *name, int up)
138{
139 /* TIPC DOESN'T HANDLE LINK EVENT SUBSCRIPTIONS AT THE MOMENT */
140}
141
142int tipc_cfg_cmd(const struct tipc_cmd_msg * msg,
143 char *data,
144 u32 sz,
145 u32 *ret_size,
146 struct tipc_portid *orig)
147{
148 int rv = -EINVAL;
149 u32 cmd = msg->cmd;
150
151 *ret_size = 0;
152 switch (cmd) {
153 case TIPC_REMOVE_LINK:
154 case TIPC_CMD_BLOCK_LINK:
155 case TIPC_CMD_UNBLOCK_LINK:
156 if (!cfg_check_connection(orig))
157 rv = link_control(msg->argv.link_name, msg->cmd, 0);
158 break;
159 case TIPC_ESTABLISH:
160 {
161 int connected;
162
163 tipc_isconnected(mng.conn_port_ref, &connected);
164 if (connected || !orig) {
165 rv = TIPC_FAILURE;
166 break;
167 }
168 rv = tipc_connect2port(mng.conn_port_ref, orig);
169 if (rv == TIPC_OK)
170 orig = 0;
171 break;
172 }
173 case TIPC_GET_PEER_ADDRESS:
174 *ret_size = link_peer_addr(msg->argv.link_name, data, sz);
175 break;
176 case TIPC_GET_ROUTES:
177 rv = TIPC_OK;
178 break;
179 default: {}
180 }
181 if (*ret_size)
182 rv = TIPC_OK;
183 return rv;
184}
185
186static void cfg_cmd_event(struct tipc_cmd_msg *msg,
187 char *data,
188 u32 sz,
189 struct tipc_portid const *orig)
190{
191 int rv = -EINVAL;
192 struct tipc_cmd_result_msg rmsg;
193 struct iovec msg_sect[2];
194 int *arg;
195
196 msg->cmd = ntohl(msg->cmd);
197
198 cfg_prepare_res_msg(msg->cmd, msg->usr_handle, rv, &rmsg, msg_sect,
199 data, 0);
200 if (ntohl(msg->magic) != TIPC_MAGIC)
201 goto exit;
202
203 switch (msg->cmd) {
204 case TIPC_CREATE_LINK:
205 if (!cfg_check_connection(orig))
206 rv = disc_create_link(&msg->argv.create_link);
207 break;
208 case TIPC_LINK_SUBSCRIBE:
209 {
210 struct subscr_data *sub;
211
212 if (mng.link_subscriptions > 64)
213 break;
214 sub = kmalloc(sizeof(*sub),
215 GFP_ATOMIC);
216 if (sub == NULL) {
217 warn("Memory squeeze; dropped remote link subscription\n");
218 break;
219 }
220 INIT_LIST_HEAD(&sub->subd_list);
221 tipc_createport(mng.user_ref,
222 (void *)sub,
223 TIPC_HIGH_IMPORTANCE,
224 0,
225 0,
226 (tipc_conn_shutdown_event)cfg_linksubscr_cancel,
227 0,
228 0,
229 (tipc_conn_msg_event)cfg_linksubscr_cancel,
230 0,
231 &sub->port_ref);
232 if (!sub->port_ref) {
233 kfree(sub);
234 break;
235 }
236 memcpy(sub->usr_handle,msg->usr_handle,
237 sizeof(sub->usr_handle));
238 sub->domain = msg->argv.domain;
239 list_add_tail(&sub->subd_list, &mng.link_subscribers);
240 tipc_connect2port(sub->port_ref, orig);
241 rmsg.retval = TIPC_OK;
242 tipc_send(sub->port_ref, 2u, msg_sect);
243 mng.link_subscriptions++;
244 return;
245 }
246 default:
247 rv = tipc_cfg_cmd(msg, data, sz, (u32 *)&msg_sect[1].iov_len, orig);
248 }
249exit:
250 rmsg.result_len = htonl(msg_sect[1].iov_len);
251 rmsg.retval = htonl(rv);
252 tipc_cfg_respond(msg_sect, 2u, orig);
253}
254#endif
255
256#define MAX_STATS_INFO 2000 128#define MAX_STATS_INFO 2000
257 129
258static struct sk_buff *tipc_show_stats(void) 130static struct sk_buff *tipc_show_stats(void)
@@ -557,14 +429,6 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
557 case TIPC_CMD_SHOW_PORTS: 429 case TIPC_CMD_SHOW_PORTS:
558 rep_tlv_buf = tipc_port_get_ports(); 430 rep_tlv_buf = tipc_port_get_ports();
559 break; 431 break;
560#if 0
561 case TIPC_CMD_SHOW_PORT_STATS:
562 rep_tlv_buf = port_show_stats(req_tlv_area, req_tlv_space);
563 break;
564 case TIPC_CMD_RESET_PORT_STATS:
565 rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED);
566 break;
567#endif
568 case TIPC_CMD_SET_LOG_SIZE: 432 case TIPC_CMD_SET_LOG_SIZE:
569 rep_tlv_buf = tipc_log_resize_cmd(req_tlv_area, req_tlv_space); 433 rep_tlv_buf = tipc_log_resize_cmd(req_tlv_area, req_tlv_space);
570 break; 434 break;
diff --git a/net/tipc/config.h b/net/tipc/config.h
index 5cd7cc56c54d..481e12ece715 100644
--- a/net/tipc/config.h
+++ b/net/tipc/config.h
@@ -45,7 +45,6 @@
45struct sk_buff *tipc_cfg_reply_alloc(int payload_size); 45struct sk_buff *tipc_cfg_reply_alloc(int payload_size);
46int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, 46int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type,
47 void *tlv_data, int tlv_data_size); 47 void *tlv_data, int tlv_data_size);
48struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value);
49struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string); 48struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string);
50 49
51static inline struct sk_buff *tipc_cfg_reply_none(void) 50static inline struct sk_buff *tipc_cfg_reply_none(void)
@@ -53,11 +52,6 @@ static inline struct sk_buff *tipc_cfg_reply_none(void)
53 return tipc_cfg_reply_alloc(0); 52 return tipc_cfg_reply_alloc(0);
54} 53}
55 54
56static inline struct sk_buff *tipc_cfg_reply_unsigned(u32 value)
57{
58 return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value);
59}
60
61static inline struct sk_buff *tipc_cfg_reply_error_string(char *string) 55static inline struct sk_buff *tipc_cfg_reply_error_string(char *string)
62{ 56{
63 return tipc_cfg_reply_string_type(TIPC_TLV_ERROR_STRING, string); 57 return tipc_cfg_reply_string_type(TIPC_TLV_ERROR_STRING, string);
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 696468117985..e2a09eb8efd4 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -96,13 +96,8 @@ int tipc_net_id;
96int tipc_remote_management; 96int tipc_remote_management;
97 97
98 98
99int tipc_get_mode(void)
100{
101 return tipc_mode;
102}
103
104/** 99/**
105 * buf_acquire - creates a TIPC message buffer 100 * tipc_buf_acquire - creates a TIPC message buffer
106 * @size: message size (including TIPC header) 101 * @size: message size (including TIPC header)
107 * 102 *
108 * Returns a new buffer with data pointers set to the specified size. 103 * Returns a new buffer with data pointers set to the specified size.
@@ -111,7 +106,7 @@ int tipc_get_mode(void)
111 * There may also be unrequested tailroom present at the buffer's end. 106 * There may also be unrequested tailroom present at the buffer's end.
112 */ 107 */
113 108
114struct sk_buff *buf_acquire(u32 size) 109struct sk_buff *tipc_buf_acquire(u32 size)
115{ 110{
116 struct sk_buff *skb; 111 struct sk_buff *skb;
117 unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; 112 unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
@@ -129,7 +124,7 @@ struct sk_buff *buf_acquire(u32 size)
129 * tipc_core_stop_net - shut down TIPC networking sub-systems 124 * tipc_core_stop_net - shut down TIPC networking sub-systems
130 */ 125 */
131 126
132void tipc_core_stop_net(void) 127static void tipc_core_stop_net(void)
133{ 128{
134 tipc_eth_media_stop(); 129 tipc_eth_media_stop();
135 tipc_net_stop(); 130 tipc_net_stop();
@@ -154,7 +149,7 @@ int tipc_core_start_net(unsigned long addr)
154 * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode 149 * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode
155 */ 150 */
156 151
157void tipc_core_stop(void) 152static void tipc_core_stop(void)
158{ 153{
159 if (tipc_mode != TIPC_NODE_MODE) 154 if (tipc_mode != TIPC_NODE_MODE)
160 return; 155 return;
@@ -169,13 +164,14 @@ void tipc_core_stop(void)
169 tipc_nametbl_stop(); 164 tipc_nametbl_stop();
170 tipc_ref_table_stop(); 165 tipc_ref_table_stop();
171 tipc_socket_stop(); 166 tipc_socket_stop();
167 tipc_log_resize(0);
172} 168}
173 169
174/** 170/**
175 * tipc_core_start - switch TIPC from NOT RUNNING to SINGLE NODE mode 171 * tipc_core_start - switch TIPC from NOT RUNNING to SINGLE NODE mode
176 */ 172 */
177 173
178int tipc_core_start(void) 174static int tipc_core_start(void)
179{ 175{
180 int res; 176 int res;
181 177
@@ -203,7 +199,9 @@ static int __init tipc_init(void)
203{ 199{
204 int res; 200 int res;
205 201
206 tipc_log_resize(CONFIG_TIPC_LOG); 202 if (tipc_log_resize(CONFIG_TIPC_LOG) != 0)
203 warn("Unable to create log buffer\n");
204
207 info("Activated (version " TIPC_MOD_VER 205 info("Activated (version " TIPC_MOD_VER
208 " compiled " __DATE__ " " __TIME__ ")\n"); 206 " compiled " __DATE__ " " __TIME__ ")\n");
209 207
@@ -230,7 +228,6 @@ static void __exit tipc_exit(void)
230 tipc_core_stop_net(); 228 tipc_core_stop_net();
231 tipc_core_stop(); 229 tipc_core_stop();
232 info("Deactivated\n"); 230 info("Deactivated\n");
233 tipc_log_resize(0);
234} 231}
235 232
236module_init(tipc_init); 233module_init(tipc_init);
@@ -244,8 +241,6 @@ MODULE_VERSION(TIPC_MOD_VER);
244 241
245EXPORT_SYMBOL(tipc_attach); 242EXPORT_SYMBOL(tipc_attach);
246EXPORT_SYMBOL(tipc_detach); 243EXPORT_SYMBOL(tipc_detach);
247EXPORT_SYMBOL(tipc_get_addr);
248EXPORT_SYMBOL(tipc_get_mode);
249EXPORT_SYMBOL(tipc_createport); 244EXPORT_SYMBOL(tipc_createport);
250EXPORT_SYMBOL(tipc_deleteport); 245EXPORT_SYMBOL(tipc_deleteport);
251EXPORT_SYMBOL(tipc_ownidentity); 246EXPORT_SYMBOL(tipc_ownidentity);
@@ -260,23 +255,10 @@ EXPORT_SYMBOL(tipc_withdraw);
260EXPORT_SYMBOL(tipc_connect2port); 255EXPORT_SYMBOL(tipc_connect2port);
261EXPORT_SYMBOL(tipc_disconnect); 256EXPORT_SYMBOL(tipc_disconnect);
262EXPORT_SYMBOL(tipc_shutdown); 257EXPORT_SYMBOL(tipc_shutdown);
263EXPORT_SYMBOL(tipc_isconnected);
264EXPORT_SYMBOL(tipc_peer);
265EXPORT_SYMBOL(tipc_ref_valid);
266EXPORT_SYMBOL(tipc_send); 258EXPORT_SYMBOL(tipc_send);
267EXPORT_SYMBOL(tipc_send_buf);
268EXPORT_SYMBOL(tipc_send2name); 259EXPORT_SYMBOL(tipc_send2name);
269EXPORT_SYMBOL(tipc_forward2name);
270EXPORT_SYMBOL(tipc_send_buf2name);
271EXPORT_SYMBOL(tipc_forward_buf2name);
272EXPORT_SYMBOL(tipc_send2port); 260EXPORT_SYMBOL(tipc_send2port);
273EXPORT_SYMBOL(tipc_forward2port);
274EXPORT_SYMBOL(tipc_send_buf2port);
275EXPORT_SYMBOL(tipc_forward_buf2port);
276EXPORT_SYMBOL(tipc_multicast); 261EXPORT_SYMBOL(tipc_multicast);
277/* EXPORT_SYMBOL(tipc_multicast_buf); not available yet */
278EXPORT_SYMBOL(tipc_ispublished);
279EXPORT_SYMBOL(tipc_available_nodes);
280 262
281/* TIPC API for external bearers (see tipc_bearer.h) */ 263/* TIPC API for external bearers (see tipc_bearer.h) */
282 264
@@ -293,6 +275,4 @@ EXPORT_SYMBOL(tipc_createport_raw);
293EXPORT_SYMBOL(tipc_reject_msg); 275EXPORT_SYMBOL(tipc_reject_msg);
294EXPORT_SYMBOL(tipc_send_buf_fast); 276EXPORT_SYMBOL(tipc_send_buf_fast);
295EXPORT_SYMBOL(tipc_acknowledge); 277EXPORT_SYMBOL(tipc_acknowledge);
296EXPORT_SYMBOL(tipc_get_port);
297EXPORT_SYMBOL(tipc_get_handle);
298 278
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 188799017abd..e19389e57227 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -83,9 +83,7 @@
83 * Note: TIPC_LOG is configured to echo its output to the system console; 83 * Note: TIPC_LOG is configured to echo its output to the system console;
84 * user-defined buffers can be configured to do the same thing. 84 * user-defined buffers can be configured to do the same thing.
85 */ 85 */
86
87extern struct print_buf *const TIPC_NULL; 86extern struct print_buf *const TIPC_NULL;
88extern struct print_buf *const TIPC_CONS;
89extern struct print_buf *const TIPC_LOG; 87extern struct print_buf *const TIPC_LOG;
90 88
91void tipc_printf(struct print_buf *, const char *fmt, ...); 89void tipc_printf(struct print_buf *, const char *fmt, ...);
@@ -204,10 +202,7 @@ extern atomic_t tipc_user_count;
204 * Routines available to privileged subsystems 202 * Routines available to privileged subsystems
205 */ 203 */
206 204
207extern int tipc_core_start(void); 205extern int tipc_core_start_net(unsigned long);
208extern void tipc_core_stop(void);
209extern int tipc_core_start_net(unsigned long addr);
210extern void tipc_core_stop_net(void);
211extern int tipc_handler_start(void); 206extern int tipc_handler_start(void);
212extern void tipc_handler_stop(void); 207extern void tipc_handler_stop(void);
213extern int tipc_netlink_start(void); 208extern int tipc_netlink_start(void);
@@ -328,7 +323,7 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
328 return (struct tipc_msg *)skb->data; 323 return (struct tipc_msg *)skb->data;
329} 324}
330 325
331extern struct sk_buff *buf_acquire(u32 size); 326extern struct sk_buff *tipc_buf_acquire(u32 size);
332 327
333/** 328/**
334 * buf_discard - frees a TIPC message buffer 329 * buf_discard - frees a TIPC message buffer
diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c
index 1885a7edb0c8..46f51d208e5e 100644
--- a/net/tipc/dbg.c
+++ b/net/tipc/dbg.c
@@ -52,7 +52,7 @@ static struct print_buf null_buf = { NULL, 0, NULL, 0 };
52struct print_buf *const TIPC_NULL = &null_buf; 52struct print_buf *const TIPC_NULL = &null_buf;
53 53
54static struct print_buf cons_buf = { NULL, 0, NULL, 1 }; 54static struct print_buf cons_buf = { NULL, 0, NULL, 1 };
55struct print_buf *const TIPC_CONS = &cons_buf; 55static struct print_buf *const TIPC_CONS = &cons_buf;
56 56
57static struct print_buf log_buf = { NULL, 0, NULL, 1 }; 57static struct print_buf log_buf = { NULL, 0, NULL, 1 };
58struct print_buf *const TIPC_LOG = &log_buf; 58struct print_buf *const TIPC_LOG = &log_buf;
@@ -76,6 +76,10 @@ struct print_buf *const TIPC_LOG = &log_buf;
76static char print_string[TIPC_PB_MAX_STR]; 76static char print_string[TIPC_PB_MAX_STR];
77static DEFINE_SPINLOCK(print_lock); 77static DEFINE_SPINLOCK(print_lock);
78 78
79static void tipc_printbuf_reset(struct print_buf *pb);
80static int tipc_printbuf_empty(struct print_buf *pb);
81static void tipc_printbuf_move(struct print_buf *pb_to,
82 struct print_buf *pb_from);
79 83
80#define FORMAT(PTR,LEN,FMT) \ 84#define FORMAT(PTR,LEN,FMT) \
81{\ 85{\
@@ -116,7 +120,7 @@ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size)
116 * @pb: pointer to print buffer structure 120 * @pb: pointer to print buffer structure
117 */ 121 */
118 122
119void tipc_printbuf_reset(struct print_buf *pb) 123static void tipc_printbuf_reset(struct print_buf *pb)
120{ 124{
121 if (pb->buf) { 125 if (pb->buf) {
122 pb->crs = pb->buf; 126 pb->crs = pb->buf;
@@ -132,9 +136,9 @@ void tipc_printbuf_reset(struct print_buf *pb)
132 * Returns non-zero if print buffer is empty. 136 * Returns non-zero if print buffer is empty.
133 */ 137 */
134 138
135int tipc_printbuf_empty(struct print_buf *pb) 139static int tipc_printbuf_empty(struct print_buf *pb)
136{ 140{
137 return (!pb->buf || (pb->crs == pb->buf)); 141 return !pb->buf || (pb->crs == pb->buf);
138} 142}
139 143
140/** 144/**
@@ -169,7 +173,7 @@ int tipc_printbuf_validate(struct print_buf *pb)
169 tipc_printf(pb, err); 173 tipc_printf(pb, err);
170 } 174 }
171 } 175 }
172 return (pb->crs - pb->buf + 1); 176 return pb->crs - pb->buf + 1;
173} 177}
174 178
175/** 179/**
@@ -181,7 +185,8 @@ int tipc_printbuf_validate(struct print_buf *pb)
181 * Source print buffer becomes empty if a successful move occurs. 185 * Source print buffer becomes empty if a successful move occurs.
182 */ 186 */
183 187
184void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from) 188static void tipc_printbuf_move(struct print_buf *pb_to,
189 struct print_buf *pb_from)
185{ 190{
186 int len; 191 int len;
187 192
diff --git a/net/tipc/dbg.h b/net/tipc/dbg.h
index 5ef1bc8f64ef..3ba6ba8b434a 100644
--- a/net/tipc/dbg.h
+++ b/net/tipc/dbg.h
@@ -56,10 +56,7 @@ struct print_buf {
56#define TIPC_PB_MAX_STR 512 /* max printable string (with trailing NUL) */ 56#define TIPC_PB_MAX_STR 512 /* max printable string (with trailing NUL) */
57 57
58void tipc_printbuf_init(struct print_buf *pb, char *buf, u32 size); 58void tipc_printbuf_init(struct print_buf *pb, char *buf, u32 size);
59void tipc_printbuf_reset(struct print_buf *pb);
60int tipc_printbuf_empty(struct print_buf *pb);
61int tipc_printbuf_validate(struct print_buf *pb); 59int tipc_printbuf_validate(struct print_buf *pb);
62void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from);
63 60
64int tipc_log_resize(int log_size); 61int tipc_log_resize(int log_size);
65 62
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index fc1fcf5e6b53..4a7cd3719b78 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -46,16 +46,6 @@
46#define TIPC_LINK_REQ_FAST 2000 /* normal delay if bearer has no links */ 46#define TIPC_LINK_REQ_FAST 2000 /* normal delay if bearer has no links */
47#define TIPC_LINK_REQ_SLOW 600000 /* normal delay if bearer has links */ 47#define TIPC_LINK_REQ_SLOW 600000 /* normal delay if bearer has links */
48 48
49#if 0
50#define GET_NODE_INFO 300
51#define GET_NODE_INFO_RESULT 301
52#define FORWARD_LINK_PROBE 302
53#define LINK_REQUEST_REJECTED 303
54#define LINK_REQUEST_ACCEPTED 304
55#define DROP_LINK_REQUEST 305
56#define CHECK_LINK_COUNT 306
57#endif
58
59/* 49/*
60 * TODO: Most of the inter-cluster setup stuff should be 50 * TODO: Most of the inter-cluster setup stuff should be
61 * rewritten, and be made conformant with specification. 51 * rewritten, and be made conformant with specification.
@@ -78,30 +68,6 @@ struct link_req {
78 unsigned int timer_intv; 68 unsigned int timer_intv;
79}; 69};
80 70
81
82#if 0
83int disc_create_link(const struct tipc_link_create *argv)
84{
85 /*
86 * Code for inter cluster link setup here
87 */
88 return TIPC_OK;
89}
90#endif
91
92/*
93 * disc_lost_link(): A link has lost contact
94 */
95
96void tipc_disc_link_event(u32 addr, char *name, int up)
97{
98 if (in_own_cluster(addr))
99 return;
100 /*
101 * Code for inter cluster link setup here
102 */
103}
104
105/** 71/**
106 * tipc_disc_init_msg - initialize a link setup message 72 * tipc_disc_init_msg - initialize a link setup message
107 * @type: message type (request or response) 73 * @type: message type (request or response)
@@ -115,7 +81,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type,
115 u32 dest_domain, 81 u32 dest_domain,
116 struct bearer *b_ptr) 82 struct bearer *b_ptr)
117{ 83{
118 struct sk_buff *buf = buf_acquire(DSC_H_SIZE); 84 struct sk_buff *buf = tipc_buf_acquire(DSC_H_SIZE);
119 struct tipc_msg *msg; 85 struct tipc_msg *msg;
120 86
121 if (buf) { 87 if (buf) {
@@ -203,6 +169,14 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
203 return; 169 return;
204 } 170 }
205 spin_lock_bh(&n_ptr->lock); 171 spin_lock_bh(&n_ptr->lock);
172
173 /* Don't talk to neighbor during cleanup after last session */
174
175 if (n_ptr->cleanup_required) {
176 spin_unlock_bh(&n_ptr->lock);
177 return;
178 }
179
206 link = n_ptr->links[b_ptr->identity]; 180 link = n_ptr->links[b_ptr->identity];
207 if (!link) { 181 if (!link) {
208 dbg("creating link\n"); 182 dbg("creating link\n");
diff --git a/net/tipc/discover.h b/net/tipc/discover.h
index c36eaeb7d5d0..f8e750636123 100644
--- a/net/tipc/discover.h
+++ b/net/tipc/discover.h
@@ -50,9 +50,4 @@ void tipc_disc_stop_link_req(struct link_req *req);
50 50
51void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr); 51void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr);
52 52
53void tipc_disc_link_event(u32 addr, char *name, int up);
54#if 0
55int disc_create_link(const struct tipc_link_create *argv);
56#endif
57
58#endif 53#endif
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index 6230d16020c4..6e988ba485fd 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -72,17 +72,26 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
72{ 72{
73 struct sk_buff *clone; 73 struct sk_buff *clone;
74 struct net_device *dev; 74 struct net_device *dev;
75 int delta;
75 76
76 clone = skb_clone(buf, GFP_ATOMIC); 77 clone = skb_clone(buf, GFP_ATOMIC);
77 if (clone) { 78 if (!clone)
78 skb_reset_network_header(clone); 79 return 0;
79 dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev; 80
80 clone->dev = dev; 81 dev = ((struct eth_bearer *)(tb_ptr->usr_handle))->dev;
81 dev_hard_header(clone, dev, ETH_P_TIPC, 82 delta = dev->hard_header_len - skb_headroom(buf);
82 &dest->dev_addr.eth_addr, 83
83 dev->dev_addr, clone->len); 84 if ((delta > 0) &&
84 dev_queue_xmit(clone); 85 pskb_expand_head(clone, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
86 kfree_skb(clone);
87 return 0;
85 } 88 }
89
90 skb_reset_network_header(clone);
91 clone->dev = dev;
92 dev_hard_header(clone, dev, ETH_P_TIPC, &dest->dev_addr.eth_addr,
93 dev->dev_addr, clone->len);
94 dev_queue_xmit(clone);
86 return 0; 95 return 0;
87} 96}
88 97
@@ -92,15 +101,12 @@ static int send_msg(struct sk_buff *buf, struct tipc_bearer *tb_ptr,
92 * Accept only packets explicitly sent to this node, or broadcast packets; 101 * Accept only packets explicitly sent to this node, or broadcast packets;
93 * ignores packets sent using Ethernet multicast, and traffic sent to other 102 * ignores packets sent using Ethernet multicast, and traffic sent to other
94 * nodes (which can happen if interface is running in promiscuous mode). 103 * nodes (which can happen if interface is running in promiscuous mode).
95 * Routine truncates any Ethernet padding/CRC appended to the message,
96 * and ensures message size matches actual length
97 */ 104 */
98 105
99static int recv_msg(struct sk_buff *buf, struct net_device *dev, 106static int recv_msg(struct sk_buff *buf, struct net_device *dev,
100 struct packet_type *pt, struct net_device *orig_dev) 107 struct packet_type *pt, struct net_device *orig_dev)
101{ 108{
102 struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv; 109 struct eth_bearer *eb_ptr = (struct eth_bearer *)pt->af_packet_priv;
103 u32 size;
104 110
105 if (!net_eq(dev_net(dev), &init_net)) { 111 if (!net_eq(dev_net(dev), &init_net)) {
106 kfree_skb(buf); 112 kfree_skb(buf);
@@ -109,13 +115,9 @@ static int recv_msg(struct sk_buff *buf, struct net_device *dev,
109 115
110 if (likely(eb_ptr->bearer)) { 116 if (likely(eb_ptr->bearer)) {
111 if (likely(buf->pkt_type <= PACKET_BROADCAST)) { 117 if (likely(buf->pkt_type <= PACKET_BROADCAST)) {
112 size = msg_size((struct tipc_msg *)buf->data); 118 buf->next = NULL;
113 skb_trim(buf, size); 119 tipc_recv_msg(buf, eb_ptr->bearer);
114 if (likely(buf->len == size)) { 120 return 0;
115 buf->next = NULL;
116 tipc_recv_msg(buf, eb_ptr->bearer);
117 return 0;
118 }
119 } 121 }
120 } 122 }
121 kfree_skb(buf); 123 kfree_skb(buf);
@@ -133,6 +135,16 @@ static int enable_bearer(struct tipc_bearer *tb_ptr)
133 struct eth_bearer *eb_ptr = &eth_bearers[0]; 135 struct eth_bearer *eb_ptr = &eth_bearers[0];
134 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS]; 136 struct eth_bearer *stop = &eth_bearers[MAX_ETH_BEARERS];
135 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1; 137 char *driver_name = strchr((const char *)tb_ptr->name, ':') + 1;
138 int pending_dev = 0;
139
140 /* Find unused Ethernet bearer structure */
141
142 while (eb_ptr->dev) {
143 if (!eb_ptr->bearer)
144 pending_dev++;
145 if (++eb_ptr == stop)
146 return pending_dev ? -EAGAIN : -EDQUOT;
147 }
136 148
137 /* Find device with specified name */ 149 /* Find device with specified name */
138 150
diff --git a/net/tipc/link.c b/net/tipc/link.c
index a3616b99529b..b31992ccd5d3 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -99,23 +99,6 @@ struct link_name {
99 char if_peer[TIPC_MAX_IF_NAME]; 99 char if_peer[TIPC_MAX_IF_NAME];
100}; 100};
101 101
102#if 0
103
104/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */
105
106/**
107 * struct link_event - link up/down event notification
108 */
109
110struct link_event {
111 u32 addr;
112 int up;
113 void (*fcn)(u32, char *, int);
114 char name[TIPC_MAX_LINK_NAME];
115};
116
117#endif
118
119static void link_handle_out_of_seq_msg(struct link *l_ptr, 102static void link_handle_out_of_seq_msg(struct link *l_ptr,
120 struct sk_buff *buf); 103 struct sk_buff *buf);
121static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf); 104static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf);
@@ -129,6 +112,9 @@ static void link_state_event(struct link *l_ptr, u32 event);
129static void link_reset_statistics(struct link *l_ptr); 112static void link_reset_statistics(struct link *l_ptr);
130static void link_print(struct link *l_ptr, struct print_buf *buf, 113static void link_print(struct link *l_ptr, struct print_buf *buf,
131 const char *str); 114 const char *str);
115static void link_start(struct link *l_ptr);
116static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf);
117
132 118
133/* 119/*
134 * Debugging code used by link routines only 120 * Debugging code used by link routines only
@@ -239,13 +225,13 @@ int tipc_link_is_up(struct link *l_ptr)
239{ 225{
240 if (!l_ptr) 226 if (!l_ptr)
241 return 0; 227 return 0;
242 return (link_working_working(l_ptr) || link_working_unknown(l_ptr)); 228 return link_working_working(l_ptr) || link_working_unknown(l_ptr);
243} 229}
244 230
245int tipc_link_is_active(struct link *l_ptr) 231int tipc_link_is_active(struct link *l_ptr)
246{ 232{
247 return ((l_ptr->owner->active_links[0] == l_ptr) || 233 return (l_ptr->owner->active_links[0] == l_ptr) ||
248 (l_ptr->owner->active_links[1] == l_ptr)); 234 (l_ptr->owner->active_links[1] == l_ptr);
249} 235}
250 236
251/** 237/**
@@ -459,7 +445,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
459 445
460 k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); 446 k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr);
461 list_add_tail(&l_ptr->link_list, &b_ptr->links); 447 list_add_tail(&l_ptr->link_list, &b_ptr->links);
462 tipc_k_signal((Handler)tipc_link_start, (unsigned long)l_ptr); 448 tipc_k_signal((Handler)link_start, (unsigned long)l_ptr);
463 449
464 dbg("tipc_link_create(): tolerance = %u,cont intv = %u, abort_limit = %u\n", 450 dbg("tipc_link_create(): tolerance = %u,cont intv = %u, abort_limit = %u\n",
465 l_ptr->tolerance, l_ptr->continuity_interval, l_ptr->abort_limit); 451 l_ptr->tolerance, l_ptr->continuity_interval, l_ptr->abort_limit);
@@ -499,9 +485,9 @@ void tipc_link_delete(struct link *l_ptr)
499 kfree(l_ptr); 485 kfree(l_ptr);
500} 486}
501 487
502void tipc_link_start(struct link *l_ptr) 488static void link_start(struct link *l_ptr)
503{ 489{
504 dbg("tipc_link_start %x\n", l_ptr); 490 dbg("link_start %x\n", l_ptr);
505 link_state_event(l_ptr, STARTING_EVT); 491 link_state_event(l_ptr, STARTING_EVT);
506} 492}
507 493
@@ -634,39 +620,9 @@ void tipc_link_stop(struct link *l_ptr)
634 l_ptr->proto_msg_queue = NULL; 620 l_ptr->proto_msg_queue = NULL;
635} 621}
636 622
637#if 0
638
639/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */ 623/* LINK EVENT CODE IS NOT SUPPORTED AT PRESENT */
640
641static void link_recv_event(struct link_event *ev)
642{
643 ev->fcn(ev->addr, ev->name, ev->up);
644 kfree(ev);
645}
646
647static void link_send_event(void (*fcn)(u32 a, char *n, int up),
648 struct link *l_ptr, int up)
649{
650 struct link_event *ev;
651
652 ev = kmalloc(sizeof(*ev), GFP_ATOMIC);
653 if (!ev) {
654 warn("Link event allocation failure\n");
655 return;
656 }
657 ev->addr = l_ptr->addr;
658 ev->up = up;
659 ev->fcn = fcn;
660 memcpy(ev->name, l_ptr->name, TIPC_MAX_LINK_NAME);
661 tipc_k_signal((Handler)link_recv_event, (unsigned long)ev);
662}
663
664#else
665
666#define link_send_event(fcn, l_ptr, up) do { } while (0) 624#define link_send_event(fcn, l_ptr, up) do { } while (0)
667 625
668#endif
669
670void tipc_link_reset(struct link *l_ptr) 626void tipc_link_reset(struct link *l_ptr)
671{ 627{
672 struct sk_buff *buf; 628 struct sk_buff *buf;
@@ -690,10 +646,7 @@ void tipc_link_reset(struct link *l_ptr)
690 646
691 tipc_node_link_down(l_ptr->owner, l_ptr); 647 tipc_node_link_down(l_ptr->owner, l_ptr);
692 tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr); 648 tipc_bearer_remove_dest(l_ptr->b_ptr, l_ptr->addr);
693#if 0 649
694 tipc_printf(TIPC_CONS, "\nReset link <%s>\n", l_ptr->name);
695 dbg_link_dump();
696#endif
697 if (was_active_link && tipc_node_has_active_links(l_ptr->owner) && 650 if (was_active_link && tipc_node_has_active_links(l_ptr->owner) &&
698 l_ptr->owner->permit_changeover) { 651 l_ptr->owner->permit_changeover) {
699 l_ptr->reset_checkpoint = checkpoint; 652 l_ptr->reset_checkpoint = checkpoint;
@@ -1050,7 +1003,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
1050 /* Fragmentation needed ? */ 1003 /* Fragmentation needed ? */
1051 1004
1052 if (size > max_packet) 1005 if (size > max_packet)
1053 return tipc_link_send_long_buf(l_ptr, buf); 1006 return link_send_long_buf(l_ptr, buf);
1054 1007
1055 /* Packet can be queued or sent: */ 1008 /* Packet can be queued or sent: */
1056 1009
@@ -1086,7 +1039,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
1086 /* Try creating a new bundle */ 1039 /* Try creating a new bundle */
1087 1040
1088 if (size <= max_packet * 2 / 3) { 1041 if (size <= max_packet * 2 / 3) {
1089 struct sk_buff *bundler = buf_acquire(max_packet); 1042 struct sk_buff *bundler = tipc_buf_acquire(max_packet);
1090 struct tipc_msg bundler_hdr; 1043 struct tipc_msg bundler_hdr;
1091 1044
1092 if (bundler) { 1045 if (bundler) {
@@ -1362,7 +1315,7 @@ again:
1362 1315
1363 /* Prepare header of first fragment: */ 1316 /* Prepare header of first fragment: */
1364 1317
1365 buf_chain = buf = buf_acquire(max_pkt); 1318 buf_chain = buf = tipc_buf_acquire(max_pkt);
1366 if (!buf) 1319 if (!buf)
1367 return -ENOMEM; 1320 return -ENOMEM;
1368 buf->next = NULL; 1321 buf->next = NULL;
@@ -1419,7 +1372,7 @@ error:
1419 msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); 1372 msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE);
1420 msg_set_fragm_no(&fragm_hdr, ++fragm_no); 1373 msg_set_fragm_no(&fragm_hdr, ++fragm_no);
1421 prev = buf; 1374 prev = buf;
1422 buf = buf_acquire(fragm_sz + INT_H_SIZE); 1375 buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE);
1423 if (!buf) 1376 if (!buf)
1424 goto error; 1377 goto error;
1425 1378
@@ -1802,6 +1755,15 @@ static int link_recv_buf_validate(struct sk_buff *buf)
1802 return pskb_may_pull(buf, hdr_size); 1755 return pskb_may_pull(buf, hdr_size);
1803} 1756}
1804 1757
1758/**
1759 * tipc_recv_msg - process TIPC messages arriving from off-node
1760 * @head: pointer to message buffer chain
1761 * @tb_ptr: pointer to bearer message arrived on
1762 *
1763 * Invoked with no locks held. Bearer pointer must point to a valid bearer
1764 * structure (i.e. cannot be NULL), but bearer can be inactive.
1765 */
1766
1805void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) 1767void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1806{ 1768{
1807 read_lock_bh(&tipc_net_lock); 1769 read_lock_bh(&tipc_net_lock);
@@ -1819,6 +1781,11 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1819 1781
1820 head = head->next; 1782 head = head->next;
1821 1783
1784 /* Ensure bearer is still enabled */
1785
1786 if (unlikely(!b_ptr->active))
1787 goto cont;
1788
1822 /* Ensure message is well-formed */ 1789 /* Ensure message is well-formed */
1823 1790
1824 if (unlikely(!link_recv_buf_validate(buf))) 1791 if (unlikely(!link_recv_buf_validate(buf)))
@@ -1855,13 +1822,22 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr)
1855 goto cont; 1822 goto cont;
1856 } 1823 }
1857 1824
1858 /* Locate unicast link endpoint that should handle message */ 1825 /* Locate neighboring node that sent message */
1859 1826
1860 n_ptr = tipc_node_find(msg_prevnode(msg)); 1827 n_ptr = tipc_node_find(msg_prevnode(msg));
1861 if (unlikely(!n_ptr)) 1828 if (unlikely(!n_ptr))
1862 goto cont; 1829 goto cont;
1863 tipc_node_lock(n_ptr); 1830 tipc_node_lock(n_ptr);
1864 1831
1832 /* Don't talk to neighbor during cleanup after last session */
1833
1834 if (n_ptr->cleanup_required) {
1835 tipc_node_unlock(n_ptr);
1836 goto cont;
1837 }
1838
1839 /* Locate unicast link endpoint that should handle message */
1840
1865 l_ptr = n_ptr->links[b_ptr->identity]; 1841 l_ptr = n_ptr->links[b_ptr->identity];
1866 if (unlikely(!l_ptr)) { 1842 if (unlikely(!l_ptr)) {
1867 tipc_node_unlock(n_ptr); 1843 tipc_node_unlock(n_ptr);
@@ -2172,7 +2148,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
2172 if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) { 2148 if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
2173 if (!l_ptr->proto_msg_queue) { 2149 if (!l_ptr->proto_msg_queue) {
2174 l_ptr->proto_msg_queue = 2150 l_ptr->proto_msg_queue =
2175 buf_acquire(sizeof(l_ptr->proto_msg)); 2151 tipc_buf_acquire(sizeof(l_ptr->proto_msg));
2176 } 2152 }
2177 buf = l_ptr->proto_msg_queue; 2153 buf = l_ptr->proto_msg_queue;
2178 if (!buf) 2154 if (!buf)
@@ -2186,7 +2162,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg,
2186 2162
2187 msg_dbg(msg, ">>"); 2163 msg_dbg(msg, ">>");
2188 2164
2189 buf = buf_acquire(msg_size); 2165 buf = tipc_buf_acquire(msg_size);
2190 if (!buf) 2166 if (!buf)
2191 return; 2167 return;
2192 2168
@@ -2345,10 +2321,10 @@ exit:
2345 * tipc_link_tunnel(): Send one message via a link belonging to 2321 * tipc_link_tunnel(): Send one message via a link belonging to
2346 * another bearer. Owner node is locked. 2322 * another bearer. Owner node is locked.
2347 */ 2323 */
2348void tipc_link_tunnel(struct link *l_ptr, 2324static void tipc_link_tunnel(struct link *l_ptr,
2349 struct tipc_msg *tunnel_hdr, 2325 struct tipc_msg *tunnel_hdr,
2350 struct tipc_msg *msg, 2326 struct tipc_msg *msg,
2351 u32 selector) 2327 u32 selector)
2352{ 2328{
2353 struct link *tunnel; 2329 struct link *tunnel;
2354 struct sk_buff *buf; 2330 struct sk_buff *buf;
@@ -2361,7 +2337,7 @@ void tipc_link_tunnel(struct link *l_ptr,
2361 return; 2337 return;
2362 } 2338 }
2363 msg_set_size(tunnel_hdr, length + INT_H_SIZE); 2339 msg_set_size(tunnel_hdr, length + INT_H_SIZE);
2364 buf = buf_acquire(length + INT_H_SIZE); 2340 buf = tipc_buf_acquire(length + INT_H_SIZE);
2365 if (!buf) { 2341 if (!buf) {
2366 warn("Link changeover error, " 2342 warn("Link changeover error, "
2367 "unable to send tunnel msg\n"); 2343 "unable to send tunnel msg\n");
@@ -2407,7 +2383,7 @@ void tipc_link_changeover(struct link *l_ptr)
2407 if (!l_ptr->first_out) { 2383 if (!l_ptr->first_out) {
2408 struct sk_buff *buf; 2384 struct sk_buff *buf;
2409 2385
2410 buf = buf_acquire(INT_H_SIZE); 2386 buf = tipc_buf_acquire(INT_H_SIZE);
2411 if (buf) { 2387 if (buf) {
2412 skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE); 2388 skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE);
2413 msg_set_size(&tunnel_hdr, INT_H_SIZE); 2389 msg_set_size(&tunnel_hdr, INT_H_SIZE);
@@ -2468,7 +2444,7 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
2468 msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); /* Update */ 2444 msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); /* Update */
2469 msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); 2445 msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
2470 msg_set_size(&tunnel_hdr, length + INT_H_SIZE); 2446 msg_set_size(&tunnel_hdr, length + INT_H_SIZE);
2471 outbuf = buf_acquire(length + INT_H_SIZE); 2447 outbuf = tipc_buf_acquire(length + INT_H_SIZE);
2472 if (outbuf == NULL) { 2448 if (outbuf == NULL) {
2473 warn("Link changeover error, " 2449 warn("Link changeover error, "
2474 "unable to send duplicate msg\n"); 2450 "unable to send duplicate msg\n");
@@ -2504,7 +2480,7 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos)
2504 u32 size = msg_size(msg); 2480 u32 size = msg_size(msg);
2505 struct sk_buff *eb; 2481 struct sk_buff *eb;
2506 2482
2507 eb = buf_acquire(size); 2483 eb = tipc_buf_acquire(size);
2508 if (eb) 2484 if (eb)
2509 skb_copy_to_linear_data(eb, msg, size); 2485 skb_copy_to_linear_data(eb, msg, size);
2510 return eb; 2486 return eb;
@@ -2632,11 +2608,11 @@ void tipc_link_recv_bundle(struct sk_buff *buf)
2632 2608
2633 2609
2634/* 2610/*
2635 * tipc_link_send_long_buf: Entry for buffers needing fragmentation. 2611 * link_send_long_buf: Entry for buffers needing fragmentation.
2636 * The buffer is complete, inclusive total message length. 2612 * The buffer is complete, inclusive total message length.
2637 * Returns user data length. 2613 * Returns user data length.
2638 */ 2614 */
2639int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) 2615static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
2640{ 2616{
2641 struct tipc_msg *inmsg = buf_msg(buf); 2617 struct tipc_msg *inmsg = buf_msg(buf);
2642 struct tipc_msg fragm_hdr; 2618 struct tipc_msg fragm_hdr;
@@ -2675,7 +2651,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
2675 fragm_sz = rest; 2651 fragm_sz = rest;
2676 msg_set_type(&fragm_hdr, LAST_FRAGMENT); 2652 msg_set_type(&fragm_hdr, LAST_FRAGMENT);
2677 } 2653 }
2678 fragm = buf_acquire(fragm_sz + INT_H_SIZE); 2654 fragm = tipc_buf_acquire(fragm_sz + INT_H_SIZE);
2679 if (fragm == NULL) { 2655 if (fragm == NULL) {
2680 warn("Link unable to fragment message\n"); 2656 warn("Link unable to fragment message\n");
2681 dsz = -ENOMEM; 2657 dsz = -ENOMEM;
@@ -2780,7 +2756,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb,
2780 buf_discard(fbuf); 2756 buf_discard(fbuf);
2781 return 0; 2757 return 0;
2782 } 2758 }
2783 pbuf = buf_acquire(msg_size(imsg)); 2759 pbuf = tipc_buf_acquire(msg_size(imsg));
2784 if (pbuf != NULL) { 2760 if (pbuf != NULL) {
2785 pbuf->next = *pending; 2761 pbuf->next = *pending;
2786 *pending = pbuf; 2762 *pending = pbuf;
@@ -3174,44 +3150,6 @@ struct sk_buff *tipc_link_cmd_show_stats(const void *req_tlv_area, int req_tlv_s
3174 return buf; 3150 return buf;
3175} 3151}
3176 3152
3177#if 0
3178int link_control(const char *name, u32 op, u32 val)
3179{
3180 int res = -EINVAL;
3181 struct link *l_ptr;
3182 u32 bearer_id;
3183 struct tipc_node * node;
3184 u32 a;
3185
3186 a = link_name2addr(name, &bearer_id);
3187 read_lock_bh(&tipc_net_lock);
3188 node = tipc_node_find(a);
3189 if (node) {
3190 tipc_node_lock(node);
3191 l_ptr = node->links[bearer_id];
3192 if (l_ptr) {
3193 if (op == TIPC_REMOVE_LINK) {
3194 struct bearer *b_ptr = l_ptr->b_ptr;
3195 spin_lock_bh(&b_ptr->publ.lock);
3196 tipc_link_delete(l_ptr);
3197 spin_unlock_bh(&b_ptr->publ.lock);
3198 }
3199 if (op == TIPC_CMD_BLOCK_LINK) {
3200 tipc_link_reset(l_ptr);
3201 l_ptr->blocked = 1;
3202 }
3203 if (op == TIPC_CMD_UNBLOCK_LINK) {
3204 l_ptr->blocked = 0;
3205 }
3206 res = 0;
3207 }
3208 tipc_node_unlock(node);
3209 }
3210 read_unlock_bh(&tipc_net_lock);
3211 return res;
3212}
3213#endif
3214
3215/** 3153/**
3216 * tipc_link_get_max_pkt - get maximum packet size to use when sending to destination 3154 * tipc_link_get_max_pkt - get maximum packet size to use when sending to destination
3217 * @dest: network address of destination node 3155 * @dest: network address of destination node
@@ -3242,28 +3180,6 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
3242 return res; 3180 return res;
3243} 3181}
3244 3182
3245#if 0
3246static void link_dump_rec_queue(struct link *l_ptr)
3247{
3248 struct sk_buff *crs;
3249
3250 if (!l_ptr->oldest_deferred_in) {
3251 info("Reception queue empty\n");
3252 return;
3253 }
3254 info("Contents of Reception queue:\n");
3255 crs = l_ptr->oldest_deferred_in;
3256 while (crs) {
3257 if (crs->data == (void *)0x0000a3a3) {
3258 info("buffer %x invalid\n", crs);
3259 return;
3260 }
3261 msg_dbg(buf_msg(crs), "In rec queue:\n");
3262 crs = crs->next;
3263 }
3264}
3265#endif
3266
3267static void link_dump_send_queue(struct link *l_ptr) 3183static void link_dump_send_queue(struct link *l_ptr)
3268{ 3184{
3269 if (l_ptr->next_out) { 3185 if (l_ptr->next_out) {
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 2e5385c47d30..f98bc613de67 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -210,10 +210,6 @@ struct link {
210 u32 msg_length_counts; 210 u32 msg_length_counts;
211 u32 msg_lengths_total; 211 u32 msg_lengths_total;
212 u32 msg_length_profile[7]; 212 u32 msg_length_profile[7];
213#if 0
214 u32 sent_tunneled;
215 u32 recv_tunneled;
216#endif
217 } stats; 213 } stats;
218 214
219 struct print_buf print_buf; 215 struct print_buf print_buf;
@@ -229,7 +225,6 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *dest);
229void tipc_link_reset_fragments(struct link *l_ptr); 225void tipc_link_reset_fragments(struct link *l_ptr);
230int tipc_link_is_up(struct link *l_ptr); 226int tipc_link_is_up(struct link *l_ptr);
231int tipc_link_is_active(struct link *l_ptr); 227int tipc_link_is_active(struct link *l_ptr);
232void tipc_link_start(struct link *l_ptr);
233u32 tipc_link_push_packet(struct link *l_ptr); 228u32 tipc_link_push_packet(struct link *l_ptr);
234void tipc_link_stop(struct link *l_ptr); 229void tipc_link_stop(struct link *l_ptr);
235struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd); 230struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd);
@@ -243,9 +238,6 @@ int tipc_link_send_sections_fast(struct port* sender,
243 struct iovec const *msg_sect, 238 struct iovec const *msg_sect,
244 const u32 num_sect, 239 const u32 num_sect,
245 u32 destnode); 240 u32 destnode);
246int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf);
247void tipc_link_tunnel(struct link *l_ptr, struct tipc_msg *tnl_hdr,
248 struct tipc_msg *msg, u32 selector);
249void tipc_link_recv_bundle(struct sk_buff *buf); 241void tipc_link_recv_bundle(struct sk_buff *buf);
250int tipc_link_recv_fragment(struct sk_buff **pending, 242int tipc_link_recv_fragment(struct sk_buff **pending,
251 struct sk_buff **fb, 243 struct sk_buff **fb,
@@ -279,12 +271,12 @@ static inline int between(u32 lower, u32 upper, u32 n)
279 271
280static inline int less_eq(u32 left, u32 right) 272static inline int less_eq(u32 left, u32 right)
281{ 273{
282 return (mod(right - left) < 32768u); 274 return mod(right - left) < 32768u;
283} 275}
284 276
285static inline int less(u32 left, u32 right) 277static inline int less(u32 left, u32 right)
286{ 278{
287 return (less_eq(left, right) && (mod(right) != mod(left))); 279 return less_eq(left, right) && (mod(right) != mod(left));
288} 280}
289 281
290static inline u32 lesser(u32 left, u32 right) 282static inline u32 lesser(u32 left, u32 right)
@@ -299,32 +291,32 @@ static inline u32 lesser(u32 left, u32 right)
299 291
300static inline int link_working_working(struct link *l_ptr) 292static inline int link_working_working(struct link *l_ptr)
301{ 293{
302 return (l_ptr->state == WORKING_WORKING); 294 return l_ptr->state == WORKING_WORKING;
303} 295}
304 296
305static inline int link_working_unknown(struct link *l_ptr) 297static inline int link_working_unknown(struct link *l_ptr)
306{ 298{
307 return (l_ptr->state == WORKING_UNKNOWN); 299 return l_ptr->state == WORKING_UNKNOWN;
308} 300}
309 301
310static inline int link_reset_unknown(struct link *l_ptr) 302static inline int link_reset_unknown(struct link *l_ptr)
311{ 303{
312 return (l_ptr->state == RESET_UNKNOWN); 304 return l_ptr->state == RESET_UNKNOWN;
313} 305}
314 306
315static inline int link_reset_reset(struct link *l_ptr) 307static inline int link_reset_reset(struct link *l_ptr)
316{ 308{
317 return (l_ptr->state == RESET_RESET); 309 return l_ptr->state == RESET_RESET;
318} 310}
319 311
320static inline int link_blocked(struct link *l_ptr) 312static inline int link_blocked(struct link *l_ptr)
321{ 313{
322 return (l_ptr->exp_msg_count || l_ptr->blocked); 314 return l_ptr->exp_msg_count || l_ptr->blocked;
323} 315}
324 316
325static inline int link_congested(struct link *l_ptr) 317static inline int link_congested(struct link *l_ptr)
326{ 318{
327 return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]); 319 return l_ptr->out_queue_size >= l_ptr->queue_limit[0];
328} 320}
329 321
330#endif 322#endif
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 381063817b41..ecb532fb0351 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -112,7 +112,7 @@ int tipc_msg_build(struct tipc_msg *hdr,
112 return dsz; 112 return dsz;
113 } 113 }
114 114
115 *buf = buf_acquire(sz); 115 *buf = tipc_buf_acquire(sz);
116 if (!(*buf)) 116 if (!(*buf))
117 return -ENOMEM; 117 return -ENOMEM;
118 skb_copy_to_linear_data(*buf, hdr, hsz); 118 skb_copy_to_linear_data(*buf, hdr, hsz);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 995d2da35b01..031aad18efce 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -104,7 +104,7 @@ static inline u32 msg_user(struct tipc_msg *m)
104 104
105static inline u32 msg_isdata(struct tipc_msg *m) 105static inline u32 msg_isdata(struct tipc_msg *m)
106{ 106{
107 return (msg_user(m) <= TIPC_CRITICAL_IMPORTANCE); 107 return msg_user(m) <= TIPC_CRITICAL_IMPORTANCE;
108} 108}
109 109
110static inline void msg_set_user(struct tipc_msg *m, u32 n) 110static inline void msg_set_user(struct tipc_msg *m, u32 n)
@@ -289,7 +289,7 @@ static inline void msg_set_destnode(struct tipc_msg *m, u32 a)
289 289
290static inline int msg_is_dest(struct tipc_msg *m, u32 d) 290static inline int msg_is_dest(struct tipc_msg *m, u32 d)
291{ 291{
292 return(msg_short(m) || (msg_destnode(m) == d)); 292 return msg_short(m) || (msg_destnode(m) == d);
293} 293}
294 294
295static inline u32 msg_routed(struct tipc_msg *m) 295static inline u32 msg_routed(struct tipc_msg *m)
@@ -632,7 +632,7 @@ static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n)
632 632
633static inline u32 msg_max_pkt(struct tipc_msg *m) 633static inline u32 msg_max_pkt(struct tipc_msg *m)
634{ 634{
635 return (msg_bits(m, 9, 16, 0xffff) * 4); 635 return msg_bits(m, 9, 16, 0xffff) * 4;
636} 636}
637 637
638static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n) 638static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n)
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 6ac3c543250b..7b907171f879 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -98,7 +98,7 @@ static void publ_to_item(struct distr_item *i, struct publication *p)
98 98
99static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) 99static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
100{ 100{
101 struct sk_buff *buf = buf_acquire(LONG_H_SIZE + size); 101 struct sk_buff *buf = tipc_buf_acquire(LONG_H_SIZE + size);
102 struct tipc_msg *msg; 102 struct tipc_msg *msg;
103 103
104 if (buf != NULL) { 104 if (buf != NULL) {
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 8ba79620db3f..3a8de4334da1 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -116,7 +116,7 @@ DEFINE_RWLOCK(tipc_nametbl_lock);
116 116
117static int hash(int x) 117static int hash(int x)
118{ 118{
119 return(x & (tipc_nametbl_size - 1)); 119 return x & (tipc_nametbl_size - 1);
120} 120}
121 121
122/** 122/**
@@ -613,8 +613,7 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
613} 613}
614 614
615/* 615/*
616 * tipc_nametbl_translate(): Translate tipc_name -> tipc_portid. 616 * tipc_nametbl_translate - translate name to port id
617 * Very time-critical.
618 * 617 *
619 * Note: on entry 'destnode' is the search domain used during translation; 618 * Note: on entry 'destnode' is the search domain used during translation;
620 * on exit it passes back the node address of the matching port (if any) 619 * on exit it passes back the node address of the matching port (if any)
@@ -685,7 +684,6 @@ found:
685 } 684 }
686 spin_unlock_bh(&seq->lock); 685 spin_unlock_bh(&seq->lock);
687not_found: 686not_found:
688 *destnode = 0;
689 read_unlock_bh(&tipc_nametbl_lock); 687 read_unlock_bh(&tipc_nametbl_lock);
690 return 0; 688 return 0;
691} 689}
@@ -877,7 +875,7 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
877 u32 index) 875 u32 index)
878{ 876{
879 char portIdStr[27]; 877 char portIdStr[27];
880 char *scopeStr; 878 const char *scope_str[] = {"", " zone", " cluster", " node"};
881 struct publication *publ = sseq->zone_list; 879 struct publication *publ = sseq->zone_list;
882 880
883 tipc_printf(buf, "%-10u %-10u ", sseq->lower, sseq->upper); 881 tipc_printf(buf, "%-10u %-10u ", sseq->lower, sseq->upper);
@@ -893,15 +891,8 @@ static void subseq_list(struct sub_seq *sseq, struct print_buf *buf, u32 depth,
893 tipc_node(publ->node), publ->ref); 891 tipc_node(publ->node), publ->ref);
894 tipc_printf(buf, "%-26s ", portIdStr); 892 tipc_printf(buf, "%-26s ", portIdStr);
895 if (depth > 3) { 893 if (depth > 3) {
896 if (publ->node != tipc_own_addr) 894 tipc_printf(buf, "%-10u %s", publ->key,
897 scopeStr = ""; 895 scope_str[publ->scope]);
898 else if (publ->scope == TIPC_NODE_SCOPE)
899 scopeStr = "node";
900 else if (publ->scope == TIPC_CLUSTER_SCOPE)
901 scopeStr = "cluster";
902 else
903 scopeStr = "zone";
904 tipc_printf(buf, "%-10u %s", publ->key, scopeStr);
905 } 896 }
906 897
907 publ = publ->zone_list_next; 898 publ = publ->zone_list_next;
@@ -951,24 +942,19 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth,
951 942
952static void nametbl_header(struct print_buf *buf, u32 depth) 943static void nametbl_header(struct print_buf *buf, u32 depth)
953{ 944{
954 tipc_printf(buf, "Type "); 945 const char *header[] = {
955 946 "Type ",
956 if (depth > 1) 947 "Lower Upper ",
957 tipc_printf(buf, "Lower Upper "); 948 "Port Identity ",
958 if (depth > 2) 949 "Publication Scope"
959 tipc_printf(buf, "Port Identity "); 950 };
960 if (depth > 3) 951
961 tipc_printf(buf, "Publication"); 952 int i;
962 953
963 tipc_printf(buf, "\n-----------"); 954 if (depth > 4)
964 955 depth = 4;
965 if (depth > 1) 956 for (i = 0; i < depth; i++)
966 tipc_printf(buf, "--------------------- "); 957 tipc_printf(buf, header[i]);
967 if (depth > 2)
968 tipc_printf(buf, "-------------------------- ");
969 if (depth > 3)
970 tipc_printf(buf, "------------------");
971
972 tipc_printf(buf, "\n"); 958 tipc_printf(buf, "\n");
973} 959}
974 960
@@ -1023,16 +1009,6 @@ static void nametbl_list(struct print_buf *buf, u32 depth_info,
1023 } 1009 }
1024} 1010}
1025 1011
1026#if 0
1027void tipc_nametbl_print(struct print_buf *buf, const char *str)
1028{
1029 tipc_printf(buf, str);
1030 read_lock_bh(&tipc_nametbl_lock);
1031 nametbl_list(buf, 0, 0, 0, 0);
1032 read_unlock_bh(&tipc_nametbl_lock);
1033}
1034#endif
1035
1036#define MAX_NAME_TBL_QUERY 32768 1012#define MAX_NAME_TBL_QUERY 32768
1037 1013
1038struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space) 1014struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
@@ -1065,13 +1041,6 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
1065 return buf; 1041 return buf;
1066} 1042}
1067 1043
1068#if 0
1069void tipc_nametbl_dump(void)
1070{
1071 nametbl_list(TIPC_CONS, 0, 0, 0, 0);
1072}
1073#endif
1074
1075int tipc_nametbl_init(void) 1044int tipc_nametbl_init(void)
1076{ 1045{
1077 table.types = kcalloc(tipc_nametbl_size, sizeof(struct hlist_head), 1046 table.types = kcalloc(tipc_nametbl_size, sizeof(struct hlist_head),
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f61b7694138b..1a621cfd6604 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -129,15 +129,6 @@ u32 tipc_net_select_router(u32 addr, u32 ref)
129 return tipc_zone_select_router(tipc_net.zones[tipc_zone(addr)], addr, ref); 129 return tipc_zone_select_router(tipc_net.zones[tipc_zone(addr)], addr, ref);
130} 130}
131 131
132#if 0
133u32 tipc_net_next_node(u32 a)
134{
135 if (tipc_net.zones[tipc_zone(a)])
136 return tipc_zone_next_node(a);
137 return 0;
138}
139#endif
140
141void tipc_net_remove_as_router(u32 router) 132void tipc_net_remove_as_router(u32 router)
142{ 133{
143 u32 z_num; 134 u32 z_num;
@@ -248,6 +239,7 @@ void tipc_net_route_msg(struct sk_buff *buf)
248 239
249 /* Handle message for another node */ 240 /* Handle message for another node */
250 msg_dbg(msg, "NET>SEND>: "); 241 msg_dbg(msg, "NET>SEND>: ");
242 skb_trim(buf, msg_size(msg));
251 tipc_link_send(buf, dnode, msg_link_selector(msg)); 243 tipc_link_send(buf, dnode, msg_link_selector(msg));
252} 244}
253 245
diff --git a/net/tipc/node.c b/net/tipc/node.c
index b634942caba5..b4d87eb2dc5d 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -50,7 +50,8 @@ void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str);
50static void node_lost_contact(struct tipc_node *n_ptr); 50static void node_lost_contact(struct tipc_node *n_ptr);
51static void node_established_contact(struct tipc_node *n_ptr); 51static void node_established_contact(struct tipc_node *n_ptr);
52 52
53struct tipc_node *tipc_nodes = NULL; /* sorted list of nodes within cluster */ 53/* sorted list of nodes within cluster */
54static struct tipc_node *tipc_nodes = NULL;
54 55
55static DEFINE_SPINLOCK(node_create_lock); 56static DEFINE_SPINLOCK(node_create_lock);
56 57
@@ -125,16 +126,6 @@ void tipc_node_delete(struct tipc_node *n_ptr)
125 if (!n_ptr) 126 if (!n_ptr)
126 return; 127 return;
127 128
128#if 0
129 /* Not needed because links are already deleted via tipc_bearer_stop() */
130
131 u32 l_num;
132
133 for (l_num = 0; l_num < MAX_BEARERS; l_num++) {
134 link_delete(n_ptr->links[l_num]);
135 }
136#endif
137
138 dbg("node %x deleted\n", n_ptr->addr); 129 dbg("node %x deleted\n", n_ptr->addr);
139 kfree(n_ptr); 130 kfree(n_ptr);
140} 131}
@@ -237,23 +228,22 @@ void tipc_node_link_down(struct tipc_node *n_ptr, struct link *l_ptr)
237 228
238int tipc_node_has_active_links(struct tipc_node *n_ptr) 229int tipc_node_has_active_links(struct tipc_node *n_ptr)
239{ 230{
240 return (n_ptr && 231 return n_ptr->active_links[0] != NULL;
241 ((n_ptr->active_links[0]) || (n_ptr->active_links[1])));
242} 232}
243 233
244int tipc_node_has_redundant_links(struct tipc_node *n_ptr) 234int tipc_node_has_redundant_links(struct tipc_node *n_ptr)
245{ 235{
246 return (n_ptr->working_links > 1); 236 return n_ptr->working_links > 1;
247} 237}
248 238
249static int tipc_node_has_active_routes(struct tipc_node *n_ptr) 239static int tipc_node_has_active_routes(struct tipc_node *n_ptr)
250{ 240{
251 return (n_ptr && (n_ptr->last_router >= 0)); 241 return n_ptr && (n_ptr->last_router >= 0);
252} 242}
253 243
254int tipc_node_is_up(struct tipc_node *n_ptr) 244int tipc_node_is_up(struct tipc_node *n_ptr)
255{ 245{
256 return (tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr)); 246 return tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr);
257} 247}
258 248
259struct tipc_node *tipc_node_attach_link(struct link *l_ptr) 249struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
@@ -384,6 +374,20 @@ static void node_established_contact(struct tipc_node *n_ptr)
384 tipc_highest_allowed_slave); 374 tipc_highest_allowed_slave);
385} 375}
386 376
377static void node_cleanup_finished(unsigned long node_addr)
378{
379 struct tipc_node *n_ptr;
380
381 read_lock_bh(&tipc_net_lock);
382 n_ptr = tipc_node_find(node_addr);
383 if (n_ptr) {
384 tipc_node_lock(n_ptr);
385 n_ptr->cleanup_required = 0;
386 tipc_node_unlock(n_ptr);
387 }
388 read_unlock_bh(&tipc_net_lock);
389}
390
387static void node_lost_contact(struct tipc_node *n_ptr) 391static void node_lost_contact(struct tipc_node *n_ptr)
388{ 392{
389 struct cluster *c_ptr; 393 struct cluster *c_ptr;
@@ -458,6 +462,11 @@ static void node_lost_contact(struct tipc_node *n_ptr)
458 tipc_k_signal((Handler)ns->handle_node_down, 462 tipc_k_signal((Handler)ns->handle_node_down,
459 (unsigned long)ns->usr_handle); 463 (unsigned long)ns->usr_handle);
460 } 464 }
465
466 /* Prevent re-contact with node until all cleanup is done */
467
468 n_ptr->cleanup_required = 1;
469 tipc_k_signal((Handler)node_cleanup_finished, n_ptr->addr);
461} 470}
462 471
463/** 472/**
@@ -579,38 +588,6 @@ void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router)
579 node_lost_contact(n_ptr); 588 node_lost_contact(n_ptr);
580} 589}
581 590
582#if 0
583void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str)
584{
585 u32 i;
586
587 tipc_printf(buf, "\n\n%s", str);
588 for (i = 0; i < MAX_BEARERS; i++) {
589 if (!n_ptr->links[i])
590 continue;
591 tipc_printf(buf, "Links[%u]: %x, ", i, n_ptr->links[i]);
592 }
593 tipc_printf(buf, "Active links: [%x,%x]\n",
594 n_ptr->active_links[0], n_ptr->active_links[1]);
595}
596#endif
597
598u32 tipc_available_nodes(const u32 domain)
599{
600 struct tipc_node *n_ptr;
601 u32 cnt = 0;
602
603 read_lock_bh(&tipc_net_lock);
604 for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
605 if (!tipc_in_scope(domain, n_ptr->addr))
606 continue;
607 if (tipc_node_is_up(n_ptr))
608 cnt++;
609 }
610 read_unlock_bh(&tipc_net_lock);
611 return cnt;
612}
613
614struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) 591struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
615{ 592{
616 u32 domain; 593 u32 domain;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 6f990da5d143..fff331b2d26c 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -52,6 +52,7 @@
52 * @active_links: pointers to active links to node 52 * @active_links: pointers to active links to node
53 * @links: pointers to all links to node 53 * @links: pointers to all links to node
54 * @working_links: number of working links to node (both active and standby) 54 * @working_links: number of working links to node (both active and standby)
55 * @cleanup_required: non-zero if cleaning up after a prior loss of contact
55 * @link_cnt: number of links to node 56 * @link_cnt: number of links to node
56 * @permit_changeover: non-zero if node has redundant links to this system 57 * @permit_changeover: non-zero if node has redundant links to this system
57 * @routers: bitmap (used for multicluster communication) 58 * @routers: bitmap (used for multicluster communication)
@@ -78,6 +79,7 @@ struct tipc_node {
78 struct link *links[MAX_BEARERS]; 79 struct link *links[MAX_BEARERS];
79 int link_cnt; 80 int link_cnt;
80 int working_links; 81 int working_links;
82 int cleanup_required;
81 int permit_changeover; 83 int permit_changeover;
82 u32 routers[512/32]; 84 u32 routers[512/32];
83 int last_router; 85 int last_router;
@@ -94,7 +96,6 @@ struct tipc_node {
94 } bclink; 96 } bclink;
95}; 97};
96 98
97extern struct tipc_node *tipc_nodes;
98extern u32 tipc_own_tag; 99extern u32 tipc_own_tag;
99 100
100struct tipc_node *tipc_node_create(u32 addr); 101struct tipc_node *tipc_node_create(u32 addr);
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 0737680e9266..82092eaa1536 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -293,34 +293,6 @@ int tipc_deleteport(u32 ref)
293 return 0; 293 return 0;
294} 294}
295 295
296/**
297 * tipc_get_port() - return port associated with 'ref'
298 *
299 * Note: Port is not locked.
300 */
301
302struct tipc_port *tipc_get_port(const u32 ref)
303{
304 return (struct tipc_port *)tipc_ref_deref(ref);
305}
306
307/**
308 * tipc_get_handle - return user handle associated to port 'ref'
309 */
310
311void *tipc_get_handle(const u32 ref)
312{
313 struct port *p_ptr;
314 void * handle;
315
316 p_ptr = tipc_port_lock(ref);
317 if (!p_ptr)
318 return NULL;
319 handle = p_ptr->publ.usr_handle;
320 tipc_port_unlock(p_ptr);
321 return handle;
322}
323
324static int port_unreliable(struct port *p_ptr) 296static int port_unreliable(struct port *p_ptr)
325{ 297{
326 return msg_src_droppable(&p_ptr->publ.phdr); 298 return msg_src_droppable(&p_ptr->publ.phdr);
@@ -392,7 +364,7 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
392 struct sk_buff *buf; 364 struct sk_buff *buf;
393 struct tipc_msg *msg; 365 struct tipc_msg *msg;
394 366
395 buf = buf_acquire(LONG_H_SIZE); 367 buf = tipc_buf_acquire(LONG_H_SIZE);
396 if (buf) { 368 if (buf) {
397 msg = buf_msg(buf); 369 msg = buf_msg(buf);
398 tipc_msg_init(msg, usr, type, LONG_H_SIZE, destnode); 370 tipc_msg_init(msg, usr, type, LONG_H_SIZE, destnode);
@@ -433,7 +405,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
433 hdr_sz = MCAST_H_SIZE; 405 hdr_sz = MCAST_H_SIZE;
434 else 406 else
435 hdr_sz = LONG_H_SIZE; 407 hdr_sz = LONG_H_SIZE;
436 rbuf = buf_acquire(data_sz + hdr_sz); 408 rbuf = tipc_buf_acquire(data_sz + hdr_sz);
437 if (rbuf == NULL) { 409 if (rbuf == NULL) {
438 buf_discard(buf); 410 buf_discard(buf);
439 return data_sz; 411 return data_sz;
@@ -588,19 +560,10 @@ void tipc_port_recv_proto_msg(struct sk_buff *buf)
588 if (!p_ptr) { 560 if (!p_ptr) {
589 err = TIPC_ERR_NO_PORT; 561 err = TIPC_ERR_NO_PORT;
590 } else if (p_ptr->publ.connected) { 562 } else if (p_ptr->publ.connected) {
591 if (port_peernode(p_ptr) != msg_orignode(msg)) 563 if ((port_peernode(p_ptr) != msg_orignode(msg)) ||
564 (port_peerport(p_ptr) != msg_origport(msg))) {
592 err = TIPC_ERR_NO_PORT; 565 err = TIPC_ERR_NO_PORT;
593 if (port_peerport(p_ptr) != msg_origport(msg)) 566 } else if (msg_type(msg) == CONN_ACK) {
594 err = TIPC_ERR_NO_PORT;
595 if (!err && msg_routed(msg)) {
596 u32 seqno = msg_transp_seqno(msg);
597 u32 myno = ++p_ptr->last_in_seqno;
598 if (seqno != myno) {
599 err = TIPC_ERR_NO_PORT;
600 abort_buf = port_build_self_abort_msg(p_ptr, err);
601 }
602 }
603 if (msg_type(msg) == CONN_ACK) {
604 int wakeup = tipc_port_congested(p_ptr) && 567 int wakeup = tipc_port_congested(p_ptr) &&
605 p_ptr->publ.congested && 568 p_ptr->publ.congested &&
606 p_ptr->wakeup; 569 p_ptr->wakeup;
@@ -719,50 +682,6 @@ struct sk_buff *tipc_port_get_ports(void)
719 return buf; 682 return buf;
720} 683}
721 684
722#if 0
723
724#define MAX_PORT_STATS 2000
725
726struct sk_buff *port_show_stats(const void *req_tlv_area, int req_tlv_space)
727{
728 u32 ref;
729 struct port *p_ptr;
730 struct sk_buff *buf;
731 struct tlv_desc *rep_tlv;
732 struct print_buf pb;
733 int str_len;
734
735 if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_PORT_REF))
736 return cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
737
738 ref = *(u32 *)TLV_DATA(req_tlv_area);
739 ref = ntohl(ref);
740
741 p_ptr = tipc_port_lock(ref);
742 if (!p_ptr)
743 return cfg_reply_error_string("port not found");
744
745 buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_PORT_STATS));
746 if (!buf) {
747 tipc_port_unlock(p_ptr);
748 return NULL;
749 }
750 rep_tlv = (struct tlv_desc *)buf->data;
751
752 tipc_printbuf_init(&pb, TLV_DATA(rep_tlv), MAX_PORT_STATS);
753 port_print(p_ptr, &pb, 1);
754 /* NEED TO FILL IN ADDITIONAL PORT STATISTICS HERE */
755 tipc_port_unlock(p_ptr);
756 str_len = tipc_printbuf_validate(&pb);
757
758 skb_put(buf, TLV_SPACE(str_len));
759 TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
760
761 return buf;
762}
763
764#endif
765
766void tipc_port_reinit(void) 685void tipc_port_reinit(void)
767{ 686{
768 struct port *p_ptr; 687 struct port *p_ptr;
@@ -1295,50 +1214,13 @@ int tipc_shutdown(u32 ref)
1295 return tipc_disconnect(ref); 1214 return tipc_disconnect(ref);
1296} 1215}
1297 1216
1298int tipc_isconnected(u32 ref, int *isconnected)
1299{
1300 struct port *p_ptr;
1301
1302 p_ptr = tipc_port_lock(ref);
1303 if (!p_ptr)
1304 return -EINVAL;
1305 *isconnected = p_ptr->publ.connected;
1306 tipc_port_unlock(p_ptr);
1307 return 0;
1308}
1309
1310int tipc_peer(u32 ref, struct tipc_portid *peer)
1311{
1312 struct port *p_ptr;
1313 int res;
1314
1315 p_ptr = tipc_port_lock(ref);
1316 if (!p_ptr)
1317 return -EINVAL;
1318 if (p_ptr->publ.connected) {
1319 peer->ref = port_peerport(p_ptr);
1320 peer->node = port_peernode(p_ptr);
1321 res = 0;
1322 } else
1323 res = -ENOTCONN;
1324 tipc_port_unlock(p_ptr);
1325 return res;
1326}
1327
1328int tipc_ref_valid(u32 ref)
1329{
1330 /* Works irrespective of type */
1331 return !!tipc_ref_deref(ref);
1332}
1333
1334
1335/* 1217/*
1336 * tipc_port_recv_sections(): Concatenate and deliver sectioned 1218 * tipc_port_recv_sections(): Concatenate and deliver sectioned
1337 * message for this node. 1219 * message for this node.
1338 */ 1220 */
1339 1221
1340int tipc_port_recv_sections(struct port *sender, unsigned int num_sect, 1222static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect,
1341 struct iovec const *msg_sect) 1223 struct iovec const *msg_sect)
1342{ 1224{
1343 struct sk_buff *buf; 1225 struct sk_buff *buf;
1344 int res; 1226 int res;
@@ -1389,65 +1271,16 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
1389} 1271}
1390 1272
1391/** 1273/**
1392 * tipc_send_buf - send message buffer on connection
1393 */
1394
1395int tipc_send_buf(u32 ref, struct sk_buff *buf, unsigned int dsz)
1396{
1397 struct port *p_ptr;
1398 struct tipc_msg *msg;
1399 u32 destnode;
1400 u32 hsz;
1401 u32 sz;
1402 u32 res;
1403
1404 p_ptr = tipc_port_deref(ref);
1405 if (!p_ptr || !p_ptr->publ.connected)
1406 return -EINVAL;
1407
1408 msg = &p_ptr->publ.phdr;
1409 hsz = msg_hdr_sz(msg);
1410 sz = hsz + dsz;
1411 msg_set_size(msg, sz);
1412 if (skb_cow(buf, hsz))
1413 return -ENOMEM;
1414
1415 skb_push(buf, hsz);
1416 skb_copy_to_linear_data(buf, msg, hsz);
1417 destnode = msg_destnode(msg);
1418 p_ptr->publ.congested = 1;
1419 if (!tipc_port_congested(p_ptr)) {
1420 if (likely(destnode != tipc_own_addr))
1421 res = tipc_send_buf_fast(buf, destnode);
1422 else {
1423 tipc_port_recv_msg(buf);
1424 res = sz;
1425 }
1426 if (likely(res != -ELINKCONG)) {
1427 port_incr_out_seqno(p_ptr);
1428 p_ptr->sent++;
1429 p_ptr->publ.congested = 0;
1430 return res;
1431 }
1432 }
1433 if (port_unreliable(p_ptr)) {
1434 p_ptr->publ.congested = 0;
1435 return dsz;
1436 }
1437 return -ELINKCONG;
1438}
1439
1440/**
1441 * tipc_forward2name - forward message sections to port name 1274 * tipc_forward2name - forward message sections to port name
1442 */ 1275 */
1443 1276
1444int tipc_forward2name(u32 ref, 1277static int tipc_forward2name(u32 ref,
1445 struct tipc_name const *name, 1278 struct tipc_name const *name,
1446 u32 domain, 1279 u32 domain,
1447 u32 num_sect, 1280 u32 num_sect,
1448 struct iovec const *msg_sect, 1281 struct iovec const *msg_sect,
1449 struct tipc_portid const *orig, 1282 struct tipc_portid const *orig,
1450 unsigned int importance) 1283 unsigned int importance)
1451{ 1284{
1452 struct port *p_ptr; 1285 struct port *p_ptr;
1453 struct tipc_msg *msg; 1286 struct tipc_msg *msg;
@@ -1473,7 +1306,7 @@ int tipc_forward2name(u32 ref,
1473 msg_set_destnode(msg, destnode); 1306 msg_set_destnode(msg, destnode);
1474 msg_set_destport(msg, destport); 1307 msg_set_destport(msg, destport);
1475 1308
1476 if (likely(destport || destnode)) { 1309 if (likely(destport)) {
1477 p_ptr->sent++; 1310 p_ptr->sent++;
1478 if (likely(destnode == tipc_own_addr)) 1311 if (likely(destnode == tipc_own_addr))
1479 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect); 1312 return tipc_port_recv_sections(p_ptr, num_sect, msg_sect);
@@ -1510,89 +1343,15 @@ int tipc_send2name(u32 ref,
1510} 1343}
1511 1344
1512/** 1345/**
1513 * tipc_forward_buf2name - forward message buffer to port name
1514 */
1515
1516int tipc_forward_buf2name(u32 ref,
1517 struct tipc_name const *name,
1518 u32 domain,
1519 struct sk_buff *buf,
1520 unsigned int dsz,
1521 struct tipc_portid const *orig,
1522 unsigned int importance)
1523{
1524 struct port *p_ptr;
1525 struct tipc_msg *msg;
1526 u32 destnode = domain;
1527 u32 destport;
1528 int res;
1529
1530 p_ptr = (struct port *)tipc_ref_deref(ref);
1531 if (!p_ptr || p_ptr->publ.connected)
1532 return -EINVAL;
1533
1534 msg = &p_ptr->publ.phdr;
1535 if (importance <= TIPC_CRITICAL_IMPORTANCE)
1536 msg_set_importance(msg, importance);
1537 msg_set_type(msg, TIPC_NAMED_MSG);
1538 msg_set_orignode(msg, orig->node);
1539 msg_set_origport(msg, orig->ref);
1540 msg_set_nametype(msg, name->type);
1541 msg_set_nameinst(msg, name->instance);
1542 msg_set_lookup_scope(msg, tipc_addr_scope(domain));
1543 msg_set_hdr_sz(msg, LONG_H_SIZE);
1544 msg_set_size(msg, LONG_H_SIZE + dsz);
1545 destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
1546 msg_set_destnode(msg, destnode);
1547 msg_set_destport(msg, destport);
1548 msg_dbg(msg, "forw2name ==> ");
1549 if (skb_cow(buf, LONG_H_SIZE))
1550 return -ENOMEM;
1551 skb_push(buf, LONG_H_SIZE);
1552 skb_copy_to_linear_data(buf, msg, LONG_H_SIZE);
1553 msg_dbg(buf_msg(buf),"PREP:");
1554 if (likely(destport || destnode)) {
1555 p_ptr->sent++;
1556 if (destnode == tipc_own_addr)
1557 return tipc_port_recv_msg(buf);
1558 res = tipc_send_buf_fast(buf, destnode);
1559 if (likely(res != -ELINKCONG))
1560 return res;
1561 if (port_unreliable(p_ptr))
1562 return dsz;
1563 return -ELINKCONG;
1564 }
1565 return tipc_reject_msg(buf, TIPC_ERR_NO_NAME);
1566}
1567
1568/**
1569 * tipc_send_buf2name - send message buffer to port name
1570 */
1571
1572int tipc_send_buf2name(u32 ref,
1573 struct tipc_name const *dest,
1574 u32 domain,
1575 struct sk_buff *buf,
1576 unsigned int dsz)
1577{
1578 struct tipc_portid orig;
1579
1580 orig.ref = ref;
1581 orig.node = tipc_own_addr;
1582 return tipc_forward_buf2name(ref, dest, domain, buf, dsz, &orig,
1583 TIPC_PORT_IMPORTANCE);
1584}
1585
1586/**
1587 * tipc_forward2port - forward message sections to port identity 1346 * tipc_forward2port - forward message sections to port identity
1588 */ 1347 */
1589 1348
1590int tipc_forward2port(u32 ref, 1349static int tipc_forward2port(u32 ref,
1591 struct tipc_portid const *dest, 1350 struct tipc_portid const *dest,
1592 unsigned int num_sect, 1351 unsigned int num_sect,
1593 struct iovec const *msg_sect, 1352 struct iovec const *msg_sect,
1594 struct tipc_portid const *orig, 1353 struct tipc_portid const *orig,
1595 unsigned int importance) 1354 unsigned int importance)
1596{ 1355{
1597 struct port *p_ptr; 1356 struct port *p_ptr;
1598 struct tipc_msg *msg; 1357 struct tipc_msg *msg;
@@ -1644,12 +1403,12 @@ int tipc_send2port(u32 ref,
1644/** 1403/**
1645 * tipc_forward_buf2port - forward message buffer to port identity 1404 * tipc_forward_buf2port - forward message buffer to port identity
1646 */ 1405 */
1647int tipc_forward_buf2port(u32 ref, 1406static int tipc_forward_buf2port(u32 ref,
1648 struct tipc_portid const *dest, 1407 struct tipc_portid const *dest,
1649 struct sk_buff *buf, 1408 struct sk_buff *buf,
1650 unsigned int dsz, 1409 unsigned int dsz,
1651 struct tipc_portid const *orig, 1410 struct tipc_portid const *orig,
1652 unsigned int importance) 1411 unsigned int importance)
1653{ 1412{
1654 struct port *p_ptr; 1413 struct port *p_ptr;
1655 struct tipc_msg *msg; 1414 struct tipc_msg *msg;
diff --git a/net/tipc/port.h b/net/tipc/port.h
index 8d1652aab298..73bbf442b346 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -109,8 +109,6 @@ struct port {
109extern spinlock_t tipc_port_list_lock; 109extern spinlock_t tipc_port_list_lock;
110struct port_list; 110struct port_list;
111 111
112int tipc_port_recv_sections(struct port *p_ptr, u32 num_sect,
113 struct iovec const *msg_sect);
114int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, 112int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
115 struct iovec const *msg_sect, u32 num_sect, 113 struct iovec const *msg_sect, u32 num_sect,
116 int err); 114 int err);
@@ -157,7 +155,7 @@ static inline u32 tipc_peer_node(struct port *p_ptr)
157 155
158static inline int tipc_port_congested(struct port *p_ptr) 156static inline int tipc_port_congested(struct port *p_ptr)
159{ 157{
160 return((p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2)); 158 return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2);
161} 159}
162 160
163/** 161/**
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index 8dea66500cf5..ab8ad32d8c20 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -282,23 +282,6 @@ void *tipc_ref_lock(u32 ref)
282 return NULL; 282 return NULL;
283} 283}
284 284
285/**
286 * tipc_ref_unlock - unlock referenced object
287 */
288
289void tipc_ref_unlock(u32 ref)
290{
291 if (likely(tipc_ref_table.entries)) {
292 struct reference *entry;
293
294 entry = &tipc_ref_table.entries[ref &
295 tipc_ref_table.index_mask];
296 if (likely((entry->ref == ref) && (entry->object)))
297 spin_unlock_bh(&entry->lock);
298 else
299 err("Attempt to unlock non-existent reference\n");
300 }
301}
302 285
303/** 286/**
304 * tipc_ref_deref - return pointer referenced object (without locking it) 287 * tipc_ref_deref - return pointer referenced object (without locking it)
diff --git a/net/tipc/ref.h b/net/tipc/ref.h
index 7e3798ea93b9..5bc8e7ab84de 100644
--- a/net/tipc/ref.h
+++ b/net/tipc/ref.h
@@ -44,7 +44,6 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock);
44void tipc_ref_discard(u32 ref); 44void tipc_ref_discard(u32 ref);
45 45
46void *tipc_ref_lock(u32 ref); 46void *tipc_ref_lock(u32 ref);
47void tipc_ref_unlock(u32 ref);
48void *tipc_ref_deref(u32 ref); 47void *tipc_ref_deref(u32 ref);
49 48
50#endif 49#endif
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 66e889ba48fd..33217fc3d697 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -64,6 +64,7 @@ struct tipc_sock {
64 struct sock sk; 64 struct sock sk;
65 struct tipc_port *p; 65 struct tipc_port *p;
66 struct tipc_portid peer_name; 66 struct tipc_portid peer_name;
67 long conn_timeout;
67}; 68};
68 69
69#define tipc_sk(sk) ((struct tipc_sock *)(sk)) 70#define tipc_sk(sk) ((struct tipc_sock *)(sk))
@@ -240,9 +241,9 @@ static int tipc_create(struct net *net, struct socket *sock, int protocol,
240 sock->state = state; 241 sock->state = state;
241 242
242 sock_init_data(sock, sk); 243 sock_init_data(sock, sk);
243 sk->sk_rcvtimeo = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
244 sk->sk_backlog_rcv = backlog_rcv; 244 sk->sk_backlog_rcv = backlog_rcv;
245 tipc_sk(sk)->p = tp_ptr; 245 tipc_sk(sk)->p = tp_ptr;
246 tipc_sk(sk)->conn_timeout = msecs_to_jiffies(CONN_TIMEOUT_DEFAULT);
246 247
247 spin_unlock_bh(tp_ptr->lock); 248 spin_unlock_bh(tp_ptr->lock);
248 249
@@ -429,36 +430,55 @@ static int get_name(struct socket *sock, struct sockaddr *uaddr,
429 * to handle any preventable race conditions, so TIPC will do the same ... 430 * to handle any preventable race conditions, so TIPC will do the same ...
430 * 431 *
431 * TIPC sets the returned events as follows: 432 * TIPC sets the returned events as follows:
432 * a) POLLRDNORM and POLLIN are set if the socket's receive queue is non-empty 433 *
433 * or if a connection-oriented socket is does not have an active connection 434 * socket state flags set
434 * (i.e. a read operation will not block). 435 * ------------ ---------
435 * b) POLLOUT is set except when a socket's connection has been terminated 436 * unconnected no read flags
436 * (i.e. a write operation will not block). 437 * no write flags
437 * c) POLLHUP is set when a socket's connection has been terminated. 438 *
438 * 439 * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue
439 * IMPORTANT: The fact that a read or write operation will not block does NOT 440 * no write flags
440 * imply that the operation will succeed! 441 *
442 * connected POLLIN/POLLRDNORM if data in rx queue
443 * POLLOUT if port is not congested
444 *
445 * disconnecting POLLIN/POLLRDNORM/POLLHUP
446 * no write flags
447 *
448 * listening POLLIN if SYN in rx queue
449 * no write flags
450 *
451 * ready POLLIN/POLLRDNORM if data in rx queue
452 * [connectionless] POLLOUT (since port cannot be congested)
453 *
454 * IMPORTANT: The fact that a read or write operation is indicated does NOT
455 * imply that the operation will succeed, merely that it should be performed
456 * and will not block.
441 */ 457 */
442 458
443static unsigned int poll(struct file *file, struct socket *sock, 459static unsigned int poll(struct file *file, struct socket *sock,
444 poll_table *wait) 460 poll_table *wait)
445{ 461{
446 struct sock *sk = sock->sk; 462 struct sock *sk = sock->sk;
447 u32 mask; 463 u32 mask = 0;
448 464
449 poll_wait(file, sk_sleep(sk), wait); 465 poll_wait(file, sk_sleep(sk), wait);
450 466
451 if (!skb_queue_empty(&sk->sk_receive_queue) || 467 switch ((int)sock->state) {
452 (sock->state == SS_UNCONNECTED) || 468 case SS_READY:
453 (sock->state == SS_DISCONNECTING)) 469 case SS_CONNECTED:
454 mask = (POLLRDNORM | POLLIN); 470 if (!tipc_sk_port(sk)->congested)
455 else 471 mask |= POLLOUT;
456 mask = 0; 472 /* fall thru' */
457 473 case SS_CONNECTING:
458 if (sock->state == SS_DISCONNECTING) 474 case SS_LISTENING:
459 mask |= POLLHUP; 475 if (!skb_queue_empty(&sk->sk_receive_queue))
460 else 476 mask |= (POLLIN | POLLRDNORM);
461 mask |= POLLOUT; 477 break;
478 case SS_DISCONNECTING:
479 mask = (POLLIN | POLLRDNORM | POLLHUP);
480 break;
481 }
462 482
463 return mask; 483 return mask;
464} 484}
@@ -1026,9 +1046,8 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1026 struct sk_buff *buf; 1046 struct sk_buff *buf;
1027 struct tipc_msg *msg; 1047 struct tipc_msg *msg;
1028 unsigned int sz; 1048 unsigned int sz;
1029 int sz_to_copy; 1049 int sz_to_copy, target, needed;
1030 int sz_copied = 0; 1050 int sz_copied = 0;
1031 int needed;
1032 char __user *crs = m->msg_iov->iov_base; 1051 char __user *crs = m->msg_iov->iov_base;
1033 unsigned char *buf_crs; 1052 unsigned char *buf_crs;
1034 u32 err; 1053 u32 err;
@@ -1050,6 +1069,8 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
1050 goto exit; 1069 goto exit;
1051 } 1070 }
1052 1071
1072 target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
1073
1053restart: 1074restart:
1054 1075
1055 /* Look for a message in receive queue; wait if necessary */ 1076 /* Look for a message in receive queue; wait if necessary */
@@ -1138,7 +1159,7 @@ restart:
1138 1159
1139 if ((sz_copied < buf_len) && /* didn't get all requested data */ 1160 if ((sz_copied < buf_len) && /* didn't get all requested data */
1140 (!skb_queue_empty(&sk->sk_receive_queue) || 1161 (!skb_queue_empty(&sk->sk_receive_queue) ||
1141 (flags & MSG_WAITALL)) && /* and more is ready or required */ 1162 (sz_copied < target)) && /* and more is ready or required */
1142 (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */ 1163 (!(flags & MSG_PEEK)) && /* and aren't just peeking at data */
1143 (!err)) /* and haven't reached a FIN */ 1164 (!err)) /* and haven't reached a FIN */
1144 goto restart; 1165 goto restart;
@@ -1174,7 +1195,7 @@ static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base)
1174 if (msg_connected(msg)) 1195 if (msg_connected(msg))
1175 threshold *= 4; 1196 threshold *= 4;
1176 1197
1177 return (queue_size >= threshold); 1198 return queue_size >= threshold;
1178} 1199}
1179 1200
1180/** 1201/**
@@ -1365,6 +1386,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1365 struct msghdr m = {NULL,}; 1386 struct msghdr m = {NULL,};
1366 struct sk_buff *buf; 1387 struct sk_buff *buf;
1367 struct tipc_msg *msg; 1388 struct tipc_msg *msg;
1389 long timeout;
1368 int res; 1390 int res;
1369 1391
1370 lock_sock(sk); 1392 lock_sock(sk);
@@ -1379,7 +1401,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1379 /* For now, TIPC does not support the non-blocking form of connect() */ 1401 /* For now, TIPC does not support the non-blocking form of connect() */
1380 1402
1381 if (flags & O_NONBLOCK) { 1403 if (flags & O_NONBLOCK) {
1382 res = -EWOULDBLOCK; 1404 res = -EOPNOTSUPP;
1383 goto exit; 1405 goto exit;
1384 } 1406 }
1385 1407
@@ -1425,11 +1447,12 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
1425 1447
1426 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */ 1448 /* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
1427 1449
1450 timeout = tipc_sk(sk)->conn_timeout;
1428 release_sock(sk); 1451 release_sock(sk);
1429 res = wait_event_interruptible_timeout(*sk_sleep(sk), 1452 res = wait_event_interruptible_timeout(*sk_sleep(sk),
1430 (!skb_queue_empty(&sk->sk_receive_queue) || 1453 (!skb_queue_empty(&sk->sk_receive_queue) ||
1431 (sock->state != SS_CONNECTING)), 1454 (sock->state != SS_CONNECTING)),
1432 sk->sk_rcvtimeo); 1455 timeout ? timeout : MAX_SCHEDULE_TIMEOUT);
1433 lock_sock(sk); 1456 lock_sock(sk);
1434 1457
1435 if (res > 0) { 1458 if (res > 0) {
@@ -1692,7 +1715,7 @@ static int setsockopt(struct socket *sock,
1692 res = tipc_set_portunreturnable(tport->ref, value); 1715 res = tipc_set_portunreturnable(tport->ref, value);
1693 break; 1716 break;
1694 case TIPC_CONN_TIMEOUT: 1717 case TIPC_CONN_TIMEOUT:
1695 sk->sk_rcvtimeo = msecs_to_jiffies(value); 1718 tipc_sk(sk)->conn_timeout = msecs_to_jiffies(value);
1696 /* no need to set "res", since already 0 at this point */ 1719 /* no need to set "res", since already 0 at this point */
1697 break; 1720 break;
1698 default: 1721 default:
@@ -1747,7 +1770,7 @@ static int getsockopt(struct socket *sock,
1747 res = tipc_portunreturnable(tport->ref, &value); 1770 res = tipc_portunreturnable(tport->ref, &value);
1748 break; 1771 break;
1749 case TIPC_CONN_TIMEOUT: 1772 case TIPC_CONN_TIMEOUT:
1750 value = jiffies_to_msecs(sk->sk_rcvtimeo); 1773 value = jiffies_to_msecs(tipc_sk(sk)->conn_timeout);
1751 /* no need to set "res", since already 0 at this point */ 1774 /* no need to set "res", since already 0 at this point */
1752 break; 1775 break;
1753 case TIPC_NODE_RECVQ_DEPTH: 1776 case TIPC_NODE_RECVQ_DEPTH:
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ab6eab4c45e2..33313961d010 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -76,6 +76,19 @@ struct top_srv {
76static struct top_srv topsrv = { 0 }; 76static struct top_srv topsrv = { 0 };
77 77
78/** 78/**
79 * htohl - convert value to endianness used by destination
80 * @in: value to convert
81 * @swap: non-zero if endianness must be reversed
82 *
83 * Returns converted value
84 */
85
86static u32 htohl(u32 in, int swap)
87{
88 return swap ? swab32(in) : in;
89}
90
91/**
79 * subscr_send_event - send a message containing a tipc_event to the subscriber 92 * subscr_send_event - send a message containing a tipc_event to the subscriber
80 * 93 *
81 * Note: Must not hold subscriber's server port lock, since tipc_send() will 94 * Note: Must not hold subscriber's server port lock, since tipc_send() will
@@ -94,11 +107,11 @@ static void subscr_send_event(struct subscription *sub,
94 msg_sect.iov_base = (void *)&sub->evt; 107 msg_sect.iov_base = (void *)&sub->evt;
95 msg_sect.iov_len = sizeof(struct tipc_event); 108 msg_sect.iov_len = sizeof(struct tipc_event);
96 109
97 sub->evt.event = htonl(event); 110 sub->evt.event = htohl(event, sub->swap);
98 sub->evt.found_lower = htonl(found_lower); 111 sub->evt.found_lower = htohl(found_lower, sub->swap);
99 sub->evt.found_upper = htonl(found_upper); 112 sub->evt.found_upper = htohl(found_upper, sub->swap);
100 sub->evt.port.ref = htonl(port_ref); 113 sub->evt.port.ref = htohl(port_ref, sub->swap);
101 sub->evt.port.node = htonl(node); 114 sub->evt.port.node = htohl(node, sub->swap);
102 tipc_send(sub->server_ref, 1, &msg_sect); 115 tipc_send(sub->server_ref, 1, &msg_sect);
103} 116}
104 117
@@ -274,29 +287,16 @@ static void subscr_cancel(struct tipc_subscr *s,
274{ 287{
275 struct subscription *sub; 288 struct subscription *sub;
276 struct subscription *sub_temp; 289 struct subscription *sub_temp;
277 __u32 type, lower, upper, timeout, filter;
278 int found = 0; 290 int found = 0;
279 291
280 /* Find first matching subscription, exit if not found */ 292 /* Find first matching subscription, exit if not found */
281 293
282 type = ntohl(s->seq.type);
283 lower = ntohl(s->seq.lower);
284 upper = ntohl(s->seq.upper);
285 timeout = ntohl(s->timeout);
286 filter = ntohl(s->filter) & ~TIPC_SUB_CANCEL;
287
288 list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list, 294 list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
289 subscription_list) { 295 subscription_list) {
290 if ((type == sub->seq.type) && 296 if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
291 (lower == sub->seq.lower) && 297 found = 1;
292 (upper == sub->seq.upper) && 298 break;
293 (timeout == sub->timeout) && 299 }
294 (filter == sub->filter) &&
295 !memcmp(s->usr_handle,sub->evt.s.usr_handle,
296 sizeof(s->usr_handle)) ){
297 found = 1;
298 break;
299 }
300 } 300 }
301 if (!found) 301 if (!found)
302 return; 302 return;
@@ -310,7 +310,7 @@ static void subscr_cancel(struct tipc_subscr *s,
310 k_term_timer(&sub->timer); 310 k_term_timer(&sub->timer);
311 spin_lock_bh(subscriber->lock); 311 spin_lock_bh(subscriber->lock);
312 } 312 }
313 dbg("Cancel: removing sub %u,%u,%u from subscriber %p list\n", 313 dbg("Cancel: removing sub %u,%u,%u from subscriber %x list\n",
314 sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber); 314 sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber);
315 subscr_del(sub); 315 subscr_del(sub);
316} 316}
@@ -325,10 +325,16 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
325 struct subscriber *subscriber) 325 struct subscriber *subscriber)
326{ 326{
327 struct subscription *sub; 327 struct subscription *sub;
328 int swap;
329
330 /* Determine subscriber's endianness */
331
332 swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE));
328 333
329 /* Detect & process a subscription cancellation request */ 334 /* Detect & process a subscription cancellation request */
330 335
331 if (ntohl(s->filter) & TIPC_SUB_CANCEL) { 336 if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) {
337 s->filter &= ~htohl(TIPC_SUB_CANCEL, swap);
332 subscr_cancel(s, subscriber); 338 subscr_cancel(s, subscriber);
333 return NULL; 339 return NULL;
334 } 340 }
@@ -353,12 +359,13 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
353 359
354 /* Initialize subscription object */ 360 /* Initialize subscription object */
355 361
356 sub->seq.type = ntohl(s->seq.type); 362 sub->seq.type = htohl(s->seq.type, swap);
357 sub->seq.lower = ntohl(s->seq.lower); 363 sub->seq.lower = htohl(s->seq.lower, swap);
358 sub->seq.upper = ntohl(s->seq.upper); 364 sub->seq.upper = htohl(s->seq.upper, swap);
359 sub->timeout = ntohl(s->timeout); 365 sub->timeout = htohl(s->timeout, swap);
360 sub->filter = ntohl(s->filter); 366 sub->filter = htohl(s->filter, swap);
361 if ((sub->filter && (sub->filter != TIPC_SUB_PORTS)) || 367 if ((!(sub->filter & TIPC_SUB_PORTS) ==
368 !(sub->filter & TIPC_SUB_SERVICE)) ||
362 (sub->seq.lower > sub->seq.upper)) { 369 (sub->seq.lower > sub->seq.upper)) {
363 warn("Subscription rejected, illegal request\n"); 370 warn("Subscription rejected, illegal request\n");
364 kfree(sub); 371 kfree(sub);
@@ -369,6 +376,7 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
369 INIT_LIST_HEAD(&sub->nameseq_list); 376 INIT_LIST_HEAD(&sub->nameseq_list);
370 list_add(&sub->subscription_list, &subscriber->subscription_list); 377 list_add(&sub->subscription_list, &subscriber->subscription_list);
371 sub->server_ref = subscriber->port_ref; 378 sub->server_ref = subscriber->port_ref;
379 sub->swap = swap;
372 memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr)); 380 memcpy(&sub->evt.s, s, sizeof(struct tipc_subscr));
373 atomic_inc(&topsrv.subscription_count); 381 atomic_inc(&topsrv.subscription_count);
374 if (sub->timeout != TIPC_WAIT_FOREVER) { 382 if (sub->timeout != TIPC_WAIT_FOREVER) {
@@ -598,12 +606,3 @@ void tipc_subscr_stop(void)
598 topsrv.user_ref = 0; 606 topsrv.user_ref = 0;
599 } 607 }
600} 608}
601
602
603int tipc_ispublished(struct tipc_name const *name)
604{
605 u32 domain = 0;
606
607 return(tipc_nametbl_translate(name->type, name->instance,&domain) != 0);
608}
609
diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h
index c20f496d95b2..45d89bf4d202 100644
--- a/net/tipc/subscr.h
+++ b/net/tipc/subscr.h
@@ -53,6 +53,7 @@ typedef void (*tipc_subscr_event) (struct subscription *sub,
53 * @nameseq_list: adjacent subscriptions in name sequence's subscription list 53 * @nameseq_list: adjacent subscriptions in name sequence's subscription list
54 * @subscription_list: adjacent subscriptions in subscriber's subscription list 54 * @subscription_list: adjacent subscriptions in subscriber's subscription list
55 * @server_ref: object reference of server port associated with subscription 55 * @server_ref: object reference of server port associated with subscription
56 * @swap: indicates if subscriber uses opposite endianness in its messages
56 * @evt: template for events generated by subscription 57 * @evt: template for events generated by subscription
57 */ 58 */
58 59
@@ -65,6 +66,7 @@ struct subscription {
65 struct list_head nameseq_list; 66 struct list_head nameseq_list;
66 struct list_head subscription_list; 67 struct list_head subscription_list;
67 u32 server_ref; 68 u32 server_ref;
69 int swap;
68 struct tipc_event evt; 70 struct tipc_event evt;
69}; 71};
70 72
diff --git a/net/tipc/zone.c b/net/tipc/zone.c
index 2c01ba2d86bf..83f8b5e91fc8 100644
--- a/net/tipc/zone.c
+++ b/net/tipc/zone.c
@@ -160,14 +160,3 @@ u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref)
160 } 160 }
161 return 0; 161 return 0;
162} 162}
163
164
165u32 tipc_zone_next_node(u32 addr)
166{
167 struct cluster *c_ptr = tipc_cltr_find(addr);
168
169 if (c_ptr)
170 return tipc_cltr_next_node(c_ptr, addr);
171 return 0;
172}
173
diff --git a/net/tipc/zone.h b/net/tipc/zone.h
index 7bdc3406ba9b..bd1c20ce9d06 100644
--- a/net/tipc/zone.h
+++ b/net/tipc/zone.h
@@ -61,7 +61,6 @@ void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest);
61struct _zone *tipc_zone_create(u32 addr); 61struct _zone *tipc_zone_create(u32 addr);
62void tipc_zone_delete(struct _zone *z_ptr); 62void tipc_zone_delete(struct _zone *z_ptr);
63void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr); 63void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr);
64u32 tipc_zone_next_node(u32 addr);
65 64
66static inline struct _zone *tipc_zone_find(u32 addr) 65static inline struct _zone *tipc_zone_find(u32 addr)
67{ 66{
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 0b39b2451ea5..0ebc777a6660 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1511,6 +1511,8 @@ restart:
1511 goto restart; 1511 goto restart;
1512 } 1512 }
1513 1513
1514 if (sock_flag(other, SOCK_RCVTSTAMP))
1515 __net_timestamp(skb);
1514 skb_queue_tail(&other->sk_receive_queue, skb); 1516 skb_queue_tail(&other->sk_receive_queue, skb);
1515 unix_state_unlock(other); 1517 unix_state_unlock(other);
1516 other->sk_data_ready(other, len); 1518 other->sk_data_ready(other, len);
@@ -1722,6 +1724,9 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1722 if (err) 1724 if (err)
1723 goto out_free; 1725 goto out_free;
1724 1726
1727 if (sock_flag(sk, SOCK_RCVTSTAMP))
1728 __sock_recv_timestamp(msg, sk, skb);
1729
1725 if (!siocb->scm) { 1730 if (!siocb->scm) {
1726 siocb->scm = &tmp_scm; 1731 siocb->scm = &tmp_scm;
1727 memset(&tmp_scm, 0, sizeof(tmp_scm)); 1732 memset(&tmp_scm, 0, sizeof(tmp_scm));
@@ -2033,11 +2038,10 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
2033 if (sk->sk_shutdown == SHUTDOWN_MASK) 2038 if (sk->sk_shutdown == SHUTDOWN_MASK)
2034 mask |= POLLHUP; 2039 mask |= POLLHUP;
2035 if (sk->sk_shutdown & RCV_SHUTDOWN) 2040 if (sk->sk_shutdown & RCV_SHUTDOWN)
2036 mask |= POLLRDHUP; 2041 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2037 2042
2038 /* readable? */ 2043 /* readable? */
2039 if (!skb_queue_empty(&sk->sk_receive_queue) || 2044 if (!skb_queue_empty(&sk->sk_receive_queue))
2040 (sk->sk_shutdown & RCV_SHUTDOWN))
2041 mask |= POLLIN | POLLRDNORM; 2045 mask |= POLLIN | POLLRDNORM;
2042 2046
2043 /* Connection-based need to check for termination and startup */ 2047 /* Connection-based need to check for termination and startup */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index d6d046b9f6f2..9c21ebf9780e 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -253,11 +253,16 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
253 WARN_ON(err); 253 WARN_ON(err);
254 wdev->netdev->features |= NETIF_F_NETNS_LOCAL; 254 wdev->netdev->features |= NETIF_F_NETNS_LOCAL;
255 } 255 }
256
257 return err;
256 } 258 }
257 259
258 wiphy_net_set(&rdev->wiphy, net); 260 wiphy_net_set(&rdev->wiphy, net);
259 261
260 return err; 262 err = device_rename(&rdev->wiphy.dev, dev_name(&rdev->wiphy.dev));
263 WARN_ON(err);
264
265 return 0;
261} 266}
262 267
263static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data) 268static void cfg80211_rfkill_poll(struct rfkill *rfkill, void *data)
@@ -428,7 +433,7 @@ int wiphy_register(struct wiphy *wiphy)
428 433
429 /* sanity check ifmodes */ 434 /* sanity check ifmodes */
430 WARN_ON(!ifmodes); 435 WARN_ON(!ifmodes);
431 ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; 436 ifmodes &= ((1 << NUM_NL80211_IFTYPES) - 1) & ~1;
432 if (WARN_ON(ifmodes != wiphy->interface_modes)) 437 if (WARN_ON(ifmodes != wiphy->interface_modes))
433 wiphy->interface_modes = ifmodes; 438 wiphy->interface_modes = ifmodes;
434 439
@@ -683,8 +688,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
683 INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); 688 INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work);
684 INIT_LIST_HEAD(&wdev->event_list); 689 INIT_LIST_HEAD(&wdev->event_list);
685 spin_lock_init(&wdev->event_lock); 690 spin_lock_init(&wdev->event_lock);
686 INIT_LIST_HEAD(&wdev->action_registrations); 691 INIT_LIST_HEAD(&wdev->mgmt_registrations);
687 spin_lock_init(&wdev->action_registrations_lock); 692 spin_lock_init(&wdev->mgmt_registrations_lock);
688 693
689 mutex_lock(&rdev->devlist_mtx); 694 mutex_lock(&rdev->devlist_mtx);
690 list_add_rcu(&wdev->list, &rdev->netdev_list); 695 list_add_rcu(&wdev->list, &rdev->netdev_list);
@@ -724,6 +729,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
724 dev->ethtool_ops = &cfg80211_ethtool_ops; 729 dev->ethtool_ops = &cfg80211_ethtool_ops;
725 730
726 if ((wdev->iftype == NL80211_IFTYPE_STATION || 731 if ((wdev->iftype == NL80211_IFTYPE_STATION ||
732 wdev->iftype == NL80211_IFTYPE_P2P_CLIENT ||
727 wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) 733 wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr)
728 dev->priv_flags |= IFF_DONT_BRIDGE; 734 dev->priv_flags |= IFF_DONT_BRIDGE;
729 break; 735 break;
@@ -732,6 +738,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
732 case NL80211_IFTYPE_ADHOC: 738 case NL80211_IFTYPE_ADHOC:
733 cfg80211_leave_ibss(rdev, dev, true); 739 cfg80211_leave_ibss(rdev, dev, true);
734 break; 740 break;
741 case NL80211_IFTYPE_P2P_CLIENT:
735 case NL80211_IFTYPE_STATION: 742 case NL80211_IFTYPE_STATION:
736 wdev_lock(wdev); 743 wdev_lock(wdev);
737#ifdef CONFIG_CFG80211_WEXT 744#ifdef CONFIG_CFG80211_WEXT
@@ -804,7 +811,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
804 sysfs_remove_link(&dev->dev.kobj, "phy80211"); 811 sysfs_remove_link(&dev->dev.kobj, "phy80211");
805 list_del_rcu(&wdev->list); 812 list_del_rcu(&wdev->list);
806 rdev->devlist_generation++; 813 rdev->devlist_generation++;
807 cfg80211_mlme_purge_actions(wdev); 814 cfg80211_mlme_purge_registrations(wdev);
808#ifdef CONFIG_CFG80211_WEXT 815#ifdef CONFIG_CFG80211_WEXT
809 kfree(wdev->wext.keys); 816 kfree(wdev->wext.keys);
810#endif 817#endif
@@ -910,52 +917,3 @@ static void __exit cfg80211_exit(void)
910 destroy_workqueue(cfg80211_wq); 917 destroy_workqueue(cfg80211_wq);
911} 918}
912module_exit(cfg80211_exit); 919module_exit(cfg80211_exit);
913
914static int ___wiphy_printk(const char *level, const struct wiphy *wiphy,
915 struct va_format *vaf)
916{
917 if (!wiphy)
918 return printk("%s(NULL wiphy *): %pV", level, vaf);
919
920 return printk("%s%s: %pV", level, wiphy_name(wiphy), vaf);
921}
922
923int __wiphy_printk(const char *level, const struct wiphy *wiphy,
924 const char *fmt, ...)
925{
926 struct va_format vaf;
927 va_list args;
928 int r;
929
930 va_start(args, fmt);
931
932 vaf.fmt = fmt;
933 vaf.va = &args;
934
935 r = ___wiphy_printk(level, wiphy, &vaf);
936 va_end(args);
937
938 return r;
939}
940EXPORT_SYMBOL(__wiphy_printk);
941
942#define define_wiphy_printk_level(func, kern_level) \
943int func(const struct wiphy *wiphy, const char *fmt, ...) \
944{ \
945 struct va_format vaf; \
946 va_list args; \
947 int r; \
948 \
949 va_start(args, fmt); \
950 \
951 vaf.fmt = fmt; \
952 vaf.va = &args; \
953 \
954 r = ___wiphy_printk(kern_level, wiphy, &vaf); \
955 va_end(args); \
956 \
957 return r; \
958} \
959EXPORT_SYMBOL(func);
960
961define_wiphy_printk_level(wiphy_debug, KERN_DEBUG);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 63d57ae399c3..6583cca0e2ee 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -86,7 +86,7 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy)
86static inline 86static inline
87bool wiphy_idx_valid(int wiphy_idx) 87bool wiphy_idx_valid(int wiphy_idx)
88{ 88{
89 return (wiphy_idx >= 0); 89 return wiphy_idx >= 0;
90} 90}
91 91
92 92
@@ -95,7 +95,10 @@ extern struct mutex cfg80211_mutex;
95extern struct list_head cfg80211_rdev_list; 95extern struct list_head cfg80211_rdev_list;
96extern int cfg80211_rdev_list_generation; 96extern int cfg80211_rdev_list_generation;
97 97
98#define assert_cfg80211_lock() WARN_ON(!mutex_is_locked(&cfg80211_mutex)) 98static inline void assert_cfg80211_lock(void)
99{
100 lockdep_assert_held(&cfg80211_mutex);
101}
99 102
100/* 103/*
101 * You can use this to mark a wiphy_idx as not having an associated wiphy. 104 * You can use this to mark a wiphy_idx as not having an associated wiphy.
@@ -202,8 +205,8 @@ static inline void wdev_unlock(struct wireless_dev *wdev)
202 mutex_unlock(&wdev->mtx); 205 mutex_unlock(&wdev->mtx);
203} 206}
204 207
205#define ASSERT_RDEV_LOCK(rdev) WARN_ON(!mutex_is_locked(&(rdev)->mtx)); 208#define ASSERT_RDEV_LOCK(rdev) lockdep_assert_held(&(rdev)->mtx)
206#define ASSERT_WDEV_LOCK(wdev) WARN_ON(!mutex_is_locked(&(wdev)->mtx)); 209#define ASSERT_WDEV_LOCK(wdev) lockdep_assert_held(&(wdev)->mtx)
207 210
208enum cfg80211_event_type { 211enum cfg80211_event_type {
209 EVENT_CONNECT_RESULT, 212 EVENT_CONNECT_RESULT,
@@ -331,16 +334,17 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
331 const u8 *resp_ie, size_t resp_ie_len, 334 const u8 *resp_ie, size_t resp_ie_len,
332 u16 status, bool wextev, 335 u16 status, bool wextev,
333 struct cfg80211_bss *bss); 336 struct cfg80211_bss *bss);
334int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, 337int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
335 const u8 *match_data, int match_len); 338 u16 frame_type, const u8 *match_data,
336void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid); 339 int match_len);
337void cfg80211_mlme_purge_actions(struct wireless_dev *wdev); 340void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid);
338int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, 341void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev);
339 struct net_device *dev, 342int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
340 struct ieee80211_channel *chan, 343 struct net_device *dev,
341 enum nl80211_channel_type channel_type, 344 struct ieee80211_channel *chan,
342 bool channel_type_valid, 345 enum nl80211_channel_type channel_type,
343 const u8 *buf, size_t len, u64 *cookie); 346 bool channel_type_valid,
347 const u8 *buf, size_t len, u64 *cookie);
344 348
345/* SME */ 349/* SME */
346int __cfg80211_connect(struct cfg80211_registered_device *rdev, 350int __cfg80211_connect(struct cfg80211_registered_device *rdev,
@@ -371,7 +375,7 @@ bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev);
371/* internal helpers */ 375/* internal helpers */
372int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, 376int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
373 struct key_params *params, int key_idx, 377 struct key_params *params, int key_idx,
374 const u8 *mac_addr); 378 bool pairwise, const u8 *mac_addr);
375void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, 379void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
376 size_t ie_len, u16 reason, bool from_ap); 380 size_t ie_len, u16 reason, bool from_ap);
377void cfg80211_sme_scan_done(struct net_device *dev); 381void cfg80211_sme_scan_done(struct net_device *dev);
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 27a8ce9343c3..f33fbb79437c 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -88,6 +88,25 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
88 if (wdev->ssid_len) 88 if (wdev->ssid_len)
89 return -EALREADY; 89 return -EALREADY;
90 90
91 if (!params->basic_rates) {
92 /*
93 * If no rates were explicitly configured,
94 * use the mandatory rate set for 11b or
95 * 11a for maximum compatibility.
96 */
97 struct ieee80211_supported_band *sband =
98 rdev->wiphy.bands[params->channel->band];
99 int j;
100 u32 flag = params->channel->band == IEEE80211_BAND_5GHZ ?
101 IEEE80211_RATE_MANDATORY_A :
102 IEEE80211_RATE_MANDATORY_B;
103
104 for (j = 0; j < sband->n_bitrates; j++) {
105 if (sband->bitrates[j].flags & flag)
106 params->basic_rates |= BIT(j);
107 }
108 }
109
91 if (WARN_ON(wdev->connect_keys)) 110 if (WARN_ON(wdev->connect_keys))
92 kfree(wdev->connect_keys); 111 kfree(wdev->connect_keys);
93 wdev->connect_keys = connkeys; 112 wdev->connect_keys = connkeys;
@@ -141,7 +160,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext)
141 */ 160 */
142 if (rdev->ops->del_key) 161 if (rdev->ops->del_key)
143 for (i = 0; i < 6; i++) 162 for (i = 0; i < 6; i++)
144 rdev->ops->del_key(wdev->wiphy, dev, i, NULL); 163 rdev->ops->del_key(wdev->wiphy, dev, i, false, NULL);
145 164
146 if (wdev->current_bss) { 165 if (wdev->current_bss) {
147 cfg80211_unhold_bss(wdev->current_bss); 166 cfg80211_unhold_bss(wdev->current_bss);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index d1a3fb99fdf2..26838d903b9a 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -149,7 +149,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
149 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf; 149 struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)buf;
150 const u8 *bssid = mgmt->bssid; 150 const u8 *bssid = mgmt->bssid;
151 int i; 151 int i;
152 bool found = false; 152 bool found = false, was_current = false;
153 153
154 ASSERT_WDEV_LOCK(wdev); 154 ASSERT_WDEV_LOCK(wdev);
155 155
@@ -159,6 +159,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
159 cfg80211_put_bss(&wdev->current_bss->pub); 159 cfg80211_put_bss(&wdev->current_bss->pub);
160 wdev->current_bss = NULL; 160 wdev->current_bss = NULL;
161 found = true; 161 found = true;
162 was_current = true;
162 } else for (i = 0; i < MAX_AUTH_BSSES; i++) { 163 } else for (i = 0; i < MAX_AUTH_BSSES; i++) {
163 if (wdev->auth_bsses[i] && 164 if (wdev->auth_bsses[i] &&
164 memcmp(wdev->auth_bsses[i]->pub.bssid, bssid, ETH_ALEN) == 0) { 165 memcmp(wdev->auth_bsses[i]->pub.bssid, bssid, ETH_ALEN) == 0) {
@@ -183,7 +184,7 @@ void __cfg80211_send_deauth(struct net_device *dev,
183 184
184 nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL); 185 nl80211_send_deauth(rdev, dev, buf, len, GFP_KERNEL);
185 186
186 if (wdev->sme_state == CFG80211_SME_CONNECTED) { 187 if (wdev->sme_state == CFG80211_SME_CONNECTED && was_current) {
187 u16 reason_code; 188 u16 reason_code;
188 bool from_ap; 189 bool from_ap;
189 190
@@ -747,31 +748,53 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
747} 748}
748EXPORT_SYMBOL(cfg80211_new_sta); 749EXPORT_SYMBOL(cfg80211_new_sta);
749 750
750struct cfg80211_action_registration { 751struct cfg80211_mgmt_registration {
751 struct list_head list; 752 struct list_head list;
752 753
753 u32 nlpid; 754 u32 nlpid;
754 755
755 int match_len; 756 int match_len;
756 757
758 __le16 frame_type;
759
757 u8 match[]; 760 u8 match[];
758}; 761};
759 762
760int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, 763int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
761 const u8 *match_data, int match_len) 764 u16 frame_type, const u8 *match_data,
765 int match_len)
762{ 766{
763 struct cfg80211_action_registration *reg, *nreg; 767 struct wiphy *wiphy = wdev->wiphy;
768 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
769 struct cfg80211_mgmt_registration *reg, *nreg;
764 int err = 0; 770 int err = 0;
771 u16 mgmt_type;
772
773 if (!wdev->wiphy->mgmt_stypes)
774 return -EOPNOTSUPP;
775
776 if ((frame_type & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT)
777 return -EINVAL;
778
779 if (frame_type & ~(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE))
780 return -EINVAL;
781
782 mgmt_type = (frame_type & IEEE80211_FCTL_STYPE) >> 4;
783 if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].rx & BIT(mgmt_type)))
784 return -EINVAL;
765 785
766 nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL); 786 nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL);
767 if (!nreg) 787 if (!nreg)
768 return -ENOMEM; 788 return -ENOMEM;
769 789
770 spin_lock_bh(&wdev->action_registrations_lock); 790 spin_lock_bh(&wdev->mgmt_registrations_lock);
771 791
772 list_for_each_entry(reg, &wdev->action_registrations, list) { 792 list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
773 int mlen = min(match_len, reg->match_len); 793 int mlen = min(match_len, reg->match_len);
774 794
795 if (frame_type != le16_to_cpu(reg->frame_type))
796 continue;
797
775 if (memcmp(reg->match, match_data, mlen) == 0) { 798 if (memcmp(reg->match, match_data, mlen) == 0) {
776 err = -EALREADY; 799 err = -EALREADY;
777 break; 800 break;
@@ -786,140 +809,212 @@ int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid,
786 memcpy(nreg->match, match_data, match_len); 809 memcpy(nreg->match, match_data, match_len);
787 nreg->match_len = match_len; 810 nreg->match_len = match_len;
788 nreg->nlpid = snd_pid; 811 nreg->nlpid = snd_pid;
789 list_add(&nreg->list, &wdev->action_registrations); 812 nreg->frame_type = cpu_to_le16(frame_type);
813 list_add(&nreg->list, &wdev->mgmt_registrations);
814
815 if (rdev->ops->mgmt_frame_register)
816 rdev->ops->mgmt_frame_register(wiphy, wdev->netdev,
817 frame_type, true);
790 818
791 out: 819 out:
792 spin_unlock_bh(&wdev->action_registrations_lock); 820 spin_unlock_bh(&wdev->mgmt_registrations_lock);
821
793 return err; 822 return err;
794} 823}
795 824
796void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid) 825void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
797{ 826{
798 struct cfg80211_action_registration *reg, *tmp; 827 struct wiphy *wiphy = wdev->wiphy;
828 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
829 struct cfg80211_mgmt_registration *reg, *tmp;
799 830
800 spin_lock_bh(&wdev->action_registrations_lock); 831 spin_lock_bh(&wdev->mgmt_registrations_lock);
801 832
802 list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { 833 list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
803 if (reg->nlpid == nlpid) { 834 if (reg->nlpid != nlpid)
804 list_del(&reg->list); 835 continue;
805 kfree(reg); 836
837 if (rdev->ops->mgmt_frame_register) {
838 u16 frame_type = le16_to_cpu(reg->frame_type);
839
840 rdev->ops->mgmt_frame_register(wiphy, wdev->netdev,
841 frame_type, false);
806 } 842 }
843
844 list_del(&reg->list);
845 kfree(reg);
807 } 846 }
808 847
809 spin_unlock_bh(&wdev->action_registrations_lock); 848 spin_unlock_bh(&wdev->mgmt_registrations_lock);
810} 849}
811 850
812void cfg80211_mlme_purge_actions(struct wireless_dev *wdev) 851void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
813{ 852{
814 struct cfg80211_action_registration *reg, *tmp; 853 struct cfg80211_mgmt_registration *reg, *tmp;
815 854
816 spin_lock_bh(&wdev->action_registrations_lock); 855 spin_lock_bh(&wdev->mgmt_registrations_lock);
817 856
818 list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { 857 list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
819 list_del(&reg->list); 858 list_del(&reg->list);
820 kfree(reg); 859 kfree(reg);
821 } 860 }
822 861
823 spin_unlock_bh(&wdev->action_registrations_lock); 862 spin_unlock_bh(&wdev->mgmt_registrations_lock);
824} 863}
825 864
826int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, 865int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
827 struct net_device *dev, 866 struct net_device *dev,
828 struct ieee80211_channel *chan, 867 struct ieee80211_channel *chan,
829 enum nl80211_channel_type channel_type, 868 enum nl80211_channel_type channel_type,
830 bool channel_type_valid, 869 bool channel_type_valid,
831 const u8 *buf, size_t len, u64 *cookie) 870 const u8 *buf, size_t len, u64 *cookie)
832{ 871{
833 struct wireless_dev *wdev = dev->ieee80211_ptr; 872 struct wireless_dev *wdev = dev->ieee80211_ptr;
834 const struct ieee80211_mgmt *mgmt; 873 const struct ieee80211_mgmt *mgmt;
874 u16 stype;
835 875
836 if (rdev->ops->action == NULL) 876 if (!wdev->wiphy->mgmt_stypes)
837 return -EOPNOTSUPP; 877 return -EOPNOTSUPP;
878
879 if (!rdev->ops->mgmt_tx)
880 return -EOPNOTSUPP;
881
838 if (len < 24 + 1) 882 if (len < 24 + 1)
839 return -EINVAL; 883 return -EINVAL;
840 884
841 mgmt = (const struct ieee80211_mgmt *) buf; 885 mgmt = (const struct ieee80211_mgmt *) buf;
842 if (!ieee80211_is_action(mgmt->frame_control)) 886
887 if (!ieee80211_is_mgmt(mgmt->frame_control))
843 return -EINVAL; 888 return -EINVAL;
844 if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { 889
845 /* Verify that we are associated with the destination AP */ 890 stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE;
891 if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].tx & BIT(stype >> 4)))
892 return -EINVAL;
893
894 if (ieee80211_is_action(mgmt->frame_control) &&
895 mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) {
896 int err = 0;
897
846 wdev_lock(wdev); 898 wdev_lock(wdev);
847 899
848 if (!wdev->current_bss || 900 switch (wdev->iftype) {
849 memcmp(wdev->current_bss->pub.bssid, mgmt->bssid, 901 case NL80211_IFTYPE_ADHOC:
850 ETH_ALEN) != 0 || 902 case NL80211_IFTYPE_STATION:
851 (wdev->iftype == NL80211_IFTYPE_STATION && 903 case NL80211_IFTYPE_P2P_CLIENT:
852 memcmp(wdev->current_bss->pub.bssid, mgmt->da, 904 if (!wdev->current_bss) {
853 ETH_ALEN) != 0)) { 905 err = -ENOTCONN;
854 wdev_unlock(wdev); 906 break;
855 return -ENOTCONN; 907 }
856 } 908
909 if (memcmp(wdev->current_bss->pub.bssid,
910 mgmt->bssid, ETH_ALEN)) {
911 err = -ENOTCONN;
912 break;
913 }
914
915 /*
916 * check for IBSS DA must be done by driver as
917 * cfg80211 doesn't track the stations
918 */
919 if (wdev->iftype == NL80211_IFTYPE_ADHOC)
920 break;
857 921
922 /* for station, check that DA is the AP */
923 if (memcmp(wdev->current_bss->pub.bssid,
924 mgmt->da, ETH_ALEN)) {
925 err = -ENOTCONN;
926 break;
927 }
928 break;
929 case NL80211_IFTYPE_AP:
930 case NL80211_IFTYPE_P2P_GO:
931 case NL80211_IFTYPE_AP_VLAN:
932 if (memcmp(mgmt->bssid, dev->dev_addr, ETH_ALEN))
933 err = -EINVAL;
934 break;
935 default:
936 err = -EOPNOTSUPP;
937 break;
938 }
858 wdev_unlock(wdev); 939 wdev_unlock(wdev);
940
941 if (err)
942 return err;
859 } 943 }
860 944
861 if (memcmp(mgmt->sa, dev->dev_addr, ETH_ALEN) != 0) 945 if (memcmp(mgmt->sa, dev->dev_addr, ETH_ALEN) != 0)
862 return -EINVAL; 946 return -EINVAL;
863 947
864 /* Transmit the Action frame as requested by user space */ 948 /* Transmit the Action frame as requested by user space */
865 return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type, 949 return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type,
866 channel_type_valid, buf, len, cookie); 950 channel_type_valid, buf, len, cookie);
867} 951}
868 952
869bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, 953bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf,
870 size_t len, gfp_t gfp) 954 size_t len, gfp_t gfp)
871{ 955{
872 struct wireless_dev *wdev = dev->ieee80211_ptr; 956 struct wireless_dev *wdev = dev->ieee80211_ptr;
873 struct wiphy *wiphy = wdev->wiphy; 957 struct wiphy *wiphy = wdev->wiphy;
874 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 958 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
875 struct cfg80211_action_registration *reg; 959 struct cfg80211_mgmt_registration *reg;
876 const u8 *action_data; 960 const struct ieee80211_txrx_stypes *stypes =
877 int action_data_len; 961 &wiphy->mgmt_stypes[wdev->iftype];
962 struct ieee80211_mgmt *mgmt = (void *)buf;
963 const u8 *data;
964 int data_len;
878 bool result = false; 965 bool result = false;
966 __le16 ftype = mgmt->frame_control &
967 cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE);
968 u16 stype;
879 969
880 /* frame length - min size excluding category */ 970 stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4;
881 action_data_len = len - (IEEE80211_MIN_ACTION_SIZE - 1);
882 971
883 /* action data starts with category */ 972 if (!(stypes->rx & BIT(stype)))
884 action_data = buf + IEEE80211_MIN_ACTION_SIZE - 1; 973 return false;
885 974
886 spin_lock_bh(&wdev->action_registrations_lock); 975 data = buf + ieee80211_hdrlen(mgmt->frame_control);
976 data_len = len - ieee80211_hdrlen(mgmt->frame_control);
977
978 spin_lock_bh(&wdev->mgmt_registrations_lock);
979
980 list_for_each_entry(reg, &wdev->mgmt_registrations, list) {
981 if (reg->frame_type != ftype)
982 continue;
887 983
888 list_for_each_entry(reg, &wdev->action_registrations, list) { 984 if (reg->match_len > data_len)
889 if (reg->match_len > action_data_len)
890 continue; 985 continue;
891 986
892 if (memcmp(reg->match, action_data, reg->match_len)) 987 if (memcmp(reg->match, data, reg->match_len))
893 continue; 988 continue;
894 989
895 /* found match! */ 990 /* found match! */
896 991
897 /* Indicate the received Action frame to user space */ 992 /* Indicate the received Action frame to user space */
898 if (nl80211_send_action(rdev, dev, reg->nlpid, freq, 993 if (nl80211_send_mgmt(rdev, dev, reg->nlpid, freq,
899 buf, len, gfp)) 994 buf, len, gfp))
900 continue; 995 continue;
901 996
902 result = true; 997 result = true;
903 break; 998 break;
904 } 999 }
905 1000
906 spin_unlock_bh(&wdev->action_registrations_lock); 1001 spin_unlock_bh(&wdev->mgmt_registrations_lock);
907 1002
908 return result; 1003 return result;
909} 1004}
910EXPORT_SYMBOL(cfg80211_rx_action); 1005EXPORT_SYMBOL(cfg80211_rx_mgmt);
911 1006
912void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, 1007void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie,
913 const u8 *buf, size_t len, bool ack, gfp_t gfp) 1008 const u8 *buf, size_t len, bool ack, gfp_t gfp)
914{ 1009{
915 struct wireless_dev *wdev = dev->ieee80211_ptr; 1010 struct wireless_dev *wdev = dev->ieee80211_ptr;
916 struct wiphy *wiphy = wdev->wiphy; 1011 struct wiphy *wiphy = wdev->wiphy;
917 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); 1012 struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
918 1013
919 /* Indicate TX status of the Action frame to user space */ 1014 /* Indicate TX status of the Action frame to user space */
920 nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp); 1015 nl80211_send_mgmt_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
921} 1016}
922EXPORT_SYMBOL(cfg80211_action_tx_status); 1017EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
923 1018
924void cfg80211_cqm_rssi_notify(struct net_device *dev, 1019void cfg80211_cqm_rssi_notify(struct net_device *dev,
925 enum nl80211_cqm_rssi_threshold_event rssi_event, 1020 enum nl80211_cqm_rssi_threshold_event rssi_event,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 37902a54e9c1..c506241f8637 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -23,6 +23,11 @@
23#include "nl80211.h" 23#include "nl80211.h"
24#include "reg.h" 24#include "reg.h"
25 25
26static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb,
27 struct genl_info *info);
28static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb,
29 struct genl_info *info);
30
26/* the netlink family */ 31/* the netlink family */
27static struct genl_family nl80211_fam = { 32static struct genl_family nl80211_fam = {
28 .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */ 33 .id = GENL_ID_GENERATE, /* don't bother with a hardcoded ID */
@@ -31,6 +36,8 @@ static struct genl_family nl80211_fam = {
31 .version = 1, /* no particular meaning now */ 36 .version = 1, /* no particular meaning now */
32 .maxattr = NL80211_ATTR_MAX, 37 .maxattr = NL80211_ATTR_MAX,
33 .netnsok = true, 38 .netnsok = true,
39 .pre_doit = nl80211_pre_doit,
40 .post_doit = nl80211_post_doit,
34}; 41};
35 42
36/* internal helper: get rdev and dev */ 43/* internal helper: get rdev and dev */
@@ -86,6 +93,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
86 [NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 }, 93 [NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 },
87 [NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG }, 94 [NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG },
88 [NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 }, 95 [NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 },
96 [NL80211_ATTR_KEY_TYPE] = { .type = NLA_U32 },
89 97
90 [NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 }, 98 [NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 },
91 [NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 }, 99 [NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 },
@@ -136,6 +144,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
136 .len = sizeof(struct nl80211_sta_flag_update), 144 .len = sizeof(struct nl80211_sta_flag_update),
137 }, 145 },
138 [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG }, 146 [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG },
147 [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 },
148 [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG },
139 [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, 149 [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG },
140 [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, 150 [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 },
141 [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, 151 [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
@@ -156,9 +166,10 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
156 166
157 [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, 167 [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 },
158 [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, 168 [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 },
169 [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 },
159}; 170};
160 171
161/* policy for the attributes */ 172/* policy for the key attributes */
162static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = { 173static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = {
163 [NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, 174 [NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN },
164 [NL80211_KEY_IDX] = { .type = NLA_U8 }, 175 [NL80211_KEY_IDX] = { .type = NLA_U8 },
@@ -166,6 +177,7 @@ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = {
166 [NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 }, 177 [NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 },
167 [NL80211_KEY_DEFAULT] = { .type = NLA_FLAG }, 178 [NL80211_KEY_DEFAULT] = { .type = NLA_FLAG },
168 [NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG }, 179 [NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG },
180 [NL80211_KEY_TYPE] = { .type = NLA_U32 },
169}; 181};
170 182
171/* ifidx get helper */ 183/* ifidx get helper */
@@ -188,6 +200,47 @@ static int nl80211_get_ifidx(struct netlink_callback *cb)
188 return res; 200 return res;
189} 201}
190 202
203static int nl80211_prepare_netdev_dump(struct sk_buff *skb,
204 struct netlink_callback *cb,
205 struct cfg80211_registered_device **rdev,
206 struct net_device **dev)
207{
208 int ifidx = cb->args[0];
209 int err;
210
211 if (!ifidx)
212 ifidx = nl80211_get_ifidx(cb);
213 if (ifidx < 0)
214 return ifidx;
215
216 cb->args[0] = ifidx;
217
218 rtnl_lock();
219
220 *dev = __dev_get_by_index(sock_net(skb->sk), ifidx);
221 if (!*dev) {
222 err = -ENODEV;
223 goto out_rtnl;
224 }
225
226 *rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
227 if (IS_ERR(dev)) {
228 err = PTR_ERR(dev);
229 goto out_rtnl;
230 }
231
232 return 0;
233 out_rtnl:
234 rtnl_unlock();
235 return err;
236}
237
238static void nl80211_finish_netdev_dump(struct cfg80211_registered_device *rdev)
239{
240 cfg80211_unlock_rdev(rdev);
241 rtnl_unlock();
242}
243
191/* IE validation */ 244/* IE validation */
192static bool is_valid_ie_attr(const struct nlattr *attr) 245static bool is_valid_ie_attr(const struct nlattr *attr)
193{ 246{
@@ -255,6 +308,7 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
255struct key_parse { 308struct key_parse {
256 struct key_params p; 309 struct key_params p;
257 int idx; 310 int idx;
311 int type;
258 bool def, defmgmt; 312 bool def, defmgmt;
259}; 313};
260 314
@@ -285,6 +339,12 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k)
285 if (tb[NL80211_KEY_CIPHER]) 339 if (tb[NL80211_KEY_CIPHER])
286 k->p.cipher = nla_get_u32(tb[NL80211_KEY_CIPHER]); 340 k->p.cipher = nla_get_u32(tb[NL80211_KEY_CIPHER]);
287 341
342 if (tb[NL80211_KEY_TYPE]) {
343 k->type = nla_get_u32(tb[NL80211_KEY_TYPE]);
344 if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES)
345 return -EINVAL;
346 }
347
288 return 0; 348 return 0;
289} 349}
290 350
@@ -309,6 +369,12 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k)
309 k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT]; 369 k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT];
310 k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]; 370 k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT];
311 371
372 if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
373 k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
374 if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES)
375 return -EINVAL;
376 }
377
312 return 0; 378 return 0;
313} 379}
314 380
@@ -318,6 +384,7 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k)
318 384
319 memset(k, 0, sizeof(*k)); 385 memset(k, 0, sizeof(*k));
320 k->idx = -1; 386 k->idx = -1;
387 k->type = -1;
321 388
322 if (info->attrs[NL80211_ATTR_KEY]) 389 if (info->attrs[NL80211_ATTR_KEY])
323 err = nl80211_parse_key_new(info->attrs[NL80211_ATTR_KEY], k); 390 err = nl80211_parse_key_new(info->attrs[NL80211_ATTR_KEY], k);
@@ -382,7 +449,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev,
382 } else if (parse.defmgmt) 449 } else if (parse.defmgmt)
383 goto error; 450 goto error;
384 err = cfg80211_validate_key_settings(rdev, &parse.p, 451 err = cfg80211_validate_key_settings(rdev, &parse.p,
385 parse.idx, NULL); 452 parse.idx, false, NULL);
386 if (err) 453 if (err)
387 goto error; 454 goto error;
388 result->params[parse.idx].cipher = parse.p.cipher; 455 result->params[parse.idx].cipher = parse.p.cipher;
@@ -401,18 +468,17 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
401{ 468{
402 ASSERT_WDEV_LOCK(wdev); 469 ASSERT_WDEV_LOCK(wdev);
403 470
404 if (!netif_running(wdev->netdev))
405 return -ENETDOWN;
406
407 switch (wdev->iftype) { 471 switch (wdev->iftype) {
408 case NL80211_IFTYPE_AP: 472 case NL80211_IFTYPE_AP:
409 case NL80211_IFTYPE_AP_VLAN: 473 case NL80211_IFTYPE_AP_VLAN:
474 case NL80211_IFTYPE_P2P_GO:
410 break; 475 break;
411 case NL80211_IFTYPE_ADHOC: 476 case NL80211_IFTYPE_ADHOC:
412 if (!wdev->current_bss) 477 if (!wdev->current_bss)
413 return -ENOLINK; 478 return -ENOLINK;
414 break; 479 break;
415 case NL80211_IFTYPE_STATION: 480 case NL80211_IFTYPE_STATION:
481 case NL80211_IFTYPE_P2P_CLIENT:
416 if (wdev->sme_state != CFG80211_SME_CONNECTED) 482 if (wdev->sme_state != CFG80211_SME_CONNECTED)
417 return -ENOLINK; 483 return -ENOLINK;
418 break; 484 break;
@@ -437,6 +503,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
437 struct ieee80211_rate *rate; 503 struct ieee80211_rate *rate;
438 int i; 504 int i;
439 u16 ifmodes = dev->wiphy.interface_modes; 505 u16 ifmodes = dev->wiphy.interface_modes;
506 const struct ieee80211_txrx_stypes *mgmt_stypes =
507 dev->wiphy.mgmt_stypes;
440 508
441 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); 509 hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY);
442 if (!hdr) 510 if (!hdr)
@@ -464,6 +532,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
464 NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, 532 NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN,
465 dev->wiphy.max_scan_ie_len); 533 dev->wiphy.max_scan_ie_len);
466 534
535 if (dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)
536 NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_IBSS_RSN);
537
467 NLA_PUT(msg, NL80211_ATTR_CIPHER_SUITES, 538 NLA_PUT(msg, NL80211_ATTR_CIPHER_SUITES,
468 sizeof(u32) * dev->wiphy.n_cipher_suites, 539 sizeof(u32) * dev->wiphy.n_cipher_suites,
469 dev->wiphy.cipher_suites); 540 dev->wiphy.cipher_suites);
@@ -471,6 +542,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
471 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, 542 NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS,
472 dev->wiphy.max_num_pmkids); 543 dev->wiphy.max_num_pmkids);
473 544
545 if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL)
546 NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE);
547
474 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); 548 nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES);
475 if (!nl_modes) 549 if (!nl_modes)
476 goto nla_put_failure; 550 goto nla_put_failure;
@@ -587,12 +661,13 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
587 CMD(flush_pmksa, FLUSH_PMKSA); 661 CMD(flush_pmksa, FLUSH_PMKSA);
588 CMD(remain_on_channel, REMAIN_ON_CHANNEL); 662 CMD(remain_on_channel, REMAIN_ON_CHANNEL);
589 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); 663 CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
590 CMD(action, ACTION); 664 CMD(mgmt_tx, FRAME);
591 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { 665 if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
592 i++; 666 i++;
593 NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); 667 NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
594 } 668 }
595 CMD(set_channel, SET_CHANNEL); 669 CMD(set_channel, SET_CHANNEL);
670 CMD(set_wds_peer, SET_WDS_PEER);
596 671
597#undef CMD 672#undef CMD
598 673
@@ -608,6 +683,55 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
608 683
609 nla_nest_end(msg, nl_cmds); 684 nla_nest_end(msg, nl_cmds);
610 685
686 if (mgmt_stypes) {
687 u16 stypes;
688 struct nlattr *nl_ftypes, *nl_ifs;
689 enum nl80211_iftype ift;
690
691 nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES);
692 if (!nl_ifs)
693 goto nla_put_failure;
694
695 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
696 nl_ftypes = nla_nest_start(msg, ift);
697 if (!nl_ftypes)
698 goto nla_put_failure;
699 i = 0;
700 stypes = mgmt_stypes[ift].tx;
701 while (stypes) {
702 if (stypes & 1)
703 NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE,
704 (i << 4) | IEEE80211_FTYPE_MGMT);
705 stypes >>= 1;
706 i++;
707 }
708 nla_nest_end(msg, nl_ftypes);
709 }
710
711 nla_nest_end(msg, nl_ifs);
712
713 nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES);
714 if (!nl_ifs)
715 goto nla_put_failure;
716
717 for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) {
718 nl_ftypes = nla_nest_start(msg, ift);
719 if (!nl_ftypes)
720 goto nla_put_failure;
721 i = 0;
722 stypes = mgmt_stypes[ift].rx;
723 while (stypes) {
724 if (stypes & 1)
725 NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE,
726 (i << 4) | IEEE80211_FTYPE_MGMT);
727 stypes >>= 1;
728 i++;
729 }
730 nla_nest_end(msg, nl_ftypes);
731 }
732 nla_nest_end(msg, nl_ifs);
733 }
734
611 return genlmsg_end(msg, hdr); 735 return genlmsg_end(msg, hdr);
612 736
613 nla_put_failure: 737 nla_put_failure:
@@ -644,28 +768,18 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
644static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) 768static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info)
645{ 769{
646 struct sk_buff *msg; 770 struct sk_buff *msg;
647 struct cfg80211_registered_device *dev; 771 struct cfg80211_registered_device *dev = info->user_ptr[0];
648
649 dev = cfg80211_get_dev_from_info(info);
650 if (IS_ERR(dev))
651 return PTR_ERR(dev);
652 772
653 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 773 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
654 if (!msg) 774 if (!msg)
655 goto out_err; 775 return -ENOMEM;
656
657 if (nl80211_send_wiphy(msg, info->snd_pid, info->snd_seq, 0, dev) < 0)
658 goto out_free;
659 776
660 cfg80211_unlock_rdev(dev); 777 if (nl80211_send_wiphy(msg, info->snd_pid, info->snd_seq, 0, dev) < 0) {
778 nlmsg_free(msg);
779 return -ENOBUFS;
780 }
661 781
662 return genlmsg_reply(msg, info); 782 return genlmsg_reply(msg, info);
663
664 out_free:
665 nlmsg_free(msg);
666 out_err:
667 cfg80211_unlock_rdev(dev);
668 return -ENOBUFS;
669} 783}
670 784
671static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = { 785static const struct nla_policy txq_params_policy[NL80211_TXQ_ATTR_MAX + 1] = {
@@ -709,7 +823,8 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev)
709 wdev->iftype == NL80211_IFTYPE_AP || 823 wdev->iftype == NL80211_IFTYPE_AP ||
710 wdev->iftype == NL80211_IFTYPE_WDS || 824 wdev->iftype == NL80211_IFTYPE_WDS ||
711 wdev->iftype == NL80211_IFTYPE_MESH_POINT || 825 wdev->iftype == NL80211_IFTYPE_MESH_POINT ||
712 wdev->iftype == NL80211_IFTYPE_MONITOR; 826 wdev->iftype == NL80211_IFTYPE_MONITOR ||
827 wdev->iftype == NL80211_IFTYPE_P2P_GO;
713} 828}
714 829
715static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, 830static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
@@ -753,38 +868,48 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
753 868
754static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) 869static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
755{ 870{
756 struct cfg80211_registered_device *rdev; 871 struct cfg80211_registered_device *rdev = info->user_ptr[0];
757 struct net_device *netdev; 872 struct net_device *netdev = info->user_ptr[1];
758 int result;
759 873
760 rtnl_lock(); 874 return __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info);
875}
761 876
762 result = get_rdev_dev_by_info_ifindex(info, &rdev, &netdev); 877static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info)
763 if (result) 878{
764 goto unlock; 879 struct cfg80211_registered_device *rdev = info->user_ptr[0];
880 struct net_device *dev = info->user_ptr[1];
881 struct wireless_dev *wdev = dev->ieee80211_ptr;
882 const u8 *bssid;
765 883
766 result = __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info); 884 if (!info->attrs[NL80211_ATTR_MAC])
885 return -EINVAL;
767 886
768 unlock: 887 if (netif_running(dev))
769 rtnl_unlock(); 888 return -EBUSY;
770 889
771 return result; 890 if (!rdev->ops->set_wds_peer)
891 return -EOPNOTSUPP;
892
893 if (wdev->iftype != NL80211_IFTYPE_WDS)
894 return -EOPNOTSUPP;
895
896 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
897 return rdev->ops->set_wds_peer(wdev->wiphy, dev, bssid);
772} 898}
773 899
900
774static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) 901static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
775{ 902{
776 struct cfg80211_registered_device *rdev; 903 struct cfg80211_registered_device *rdev;
777 struct net_device *netdev = NULL; 904 struct net_device *netdev = NULL;
778 struct wireless_dev *wdev; 905 struct wireless_dev *wdev;
779 int result, rem_txq_params = 0; 906 int result = 0, rem_txq_params = 0;
780 struct nlattr *nl_txq_params; 907 struct nlattr *nl_txq_params;
781 u32 changed; 908 u32 changed;
782 u8 retry_short = 0, retry_long = 0; 909 u8 retry_short = 0, retry_long = 0;
783 u32 frag_threshold = 0, rts_threshold = 0; 910 u32 frag_threshold = 0, rts_threshold = 0;
784 u8 coverage_class = 0; 911 u8 coverage_class = 0;
785 912
786 rtnl_lock();
787
788 /* 913 /*
789 * Try to find the wiphy and netdev. Normally this 914 * Try to find the wiphy and netdev. Normally this
790 * function shouldn't need the netdev, but this is 915 * function shouldn't need the netdev, but this is
@@ -811,8 +936,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
811 rdev = __cfg80211_rdev_from_info(info); 936 rdev = __cfg80211_rdev_from_info(info);
812 if (IS_ERR(rdev)) { 937 if (IS_ERR(rdev)) {
813 mutex_unlock(&cfg80211_mutex); 938 mutex_unlock(&cfg80211_mutex);
814 result = PTR_ERR(rdev); 939 return PTR_ERR(rdev);
815 goto unlock;
816 } 940 }
817 wdev = NULL; 941 wdev = NULL;
818 netdev = NULL; 942 netdev = NULL;
@@ -994,8 +1118,6 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
994 mutex_unlock(&rdev->mtx); 1118 mutex_unlock(&rdev->mtx);
995 if (netdev) 1119 if (netdev)
996 dev_put(netdev); 1120 dev_put(netdev);
997 unlock:
998 rtnl_unlock();
999 return result; 1121 return result;
1000} 1122}
1001 1123
@@ -1075,33 +1197,20 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
1075static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) 1197static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info)
1076{ 1198{
1077 struct sk_buff *msg; 1199 struct sk_buff *msg;
1078 struct cfg80211_registered_device *dev; 1200 struct cfg80211_registered_device *dev = info->user_ptr[0];
1079 struct net_device *netdev; 1201 struct net_device *netdev = info->user_ptr[1];
1080 int err;
1081
1082 err = get_rdev_dev_by_info_ifindex(info, &dev, &netdev);
1083 if (err)
1084 return err;
1085 1202
1086 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1203 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1087 if (!msg) 1204 if (!msg)
1088 goto out_err; 1205 return -ENOMEM;
1089 1206
1090 if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, 1207 if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0,
1091 dev, netdev) < 0) 1208 dev, netdev) < 0) {
1092 goto out_free; 1209 nlmsg_free(msg);
1093 1210 return -ENOBUFS;
1094 dev_put(netdev); 1211 }
1095 cfg80211_unlock_rdev(dev);
1096 1212
1097 return genlmsg_reply(msg, info); 1213 return genlmsg_reply(msg, info);
1098
1099 out_free:
1100 nlmsg_free(msg);
1101 out_err:
1102 dev_put(netdev);
1103 cfg80211_unlock_rdev(dev);
1104 return -ENOBUFS;
1105} 1214}
1106 1215
1107static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = { 1216static const struct nla_policy mntr_flags_policy[NL80211_MNTR_FLAG_MAX + 1] = {
@@ -1161,39 +1270,29 @@ static int nl80211_valid_4addr(struct cfg80211_registered_device *rdev,
1161 1270
1162static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) 1271static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
1163{ 1272{
1164 struct cfg80211_registered_device *rdev; 1273 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1165 struct vif_params params; 1274 struct vif_params params;
1166 int err; 1275 int err;
1167 enum nl80211_iftype otype, ntype; 1276 enum nl80211_iftype otype, ntype;
1168 struct net_device *dev; 1277 struct net_device *dev = info->user_ptr[1];
1169 u32 _flags, *flags = NULL; 1278 u32 _flags, *flags = NULL;
1170 bool change = false; 1279 bool change = false;
1171 1280
1172 memset(&params, 0, sizeof(params)); 1281 memset(&params, 0, sizeof(params));
1173 1282
1174 rtnl_lock();
1175
1176 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
1177 if (err)
1178 goto unlock_rtnl;
1179
1180 otype = ntype = dev->ieee80211_ptr->iftype; 1283 otype = ntype = dev->ieee80211_ptr->iftype;
1181 1284
1182 if (info->attrs[NL80211_ATTR_IFTYPE]) { 1285 if (info->attrs[NL80211_ATTR_IFTYPE]) {
1183 ntype = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); 1286 ntype = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]);
1184 if (otype != ntype) 1287 if (otype != ntype)
1185 change = true; 1288 change = true;
1186 if (ntype > NL80211_IFTYPE_MAX) { 1289 if (ntype > NL80211_IFTYPE_MAX)
1187 err = -EINVAL; 1290 return -EINVAL;
1188 goto unlock;
1189 }
1190 } 1291 }
1191 1292
1192 if (info->attrs[NL80211_ATTR_MESH_ID]) { 1293 if (info->attrs[NL80211_ATTR_MESH_ID]) {
1193 if (ntype != NL80211_IFTYPE_MESH_POINT) { 1294 if (ntype != NL80211_IFTYPE_MESH_POINT)
1194 err = -EINVAL; 1295 return -EINVAL;
1195 goto unlock;
1196 }
1197 params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); 1296 params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]);
1198 params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); 1297 params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]);
1199 change = true; 1298 change = true;
@@ -1204,20 +1303,18 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
1204 change = true; 1303 change = true;
1205 err = nl80211_valid_4addr(rdev, dev, params.use_4addr, ntype); 1304 err = nl80211_valid_4addr(rdev, dev, params.use_4addr, ntype);
1206 if (err) 1305 if (err)
1207 goto unlock; 1306 return err;
1208 } else { 1307 } else {
1209 params.use_4addr = -1; 1308 params.use_4addr = -1;
1210 } 1309 }
1211 1310
1212 if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) { 1311 if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) {
1213 if (ntype != NL80211_IFTYPE_MONITOR) { 1312 if (ntype != NL80211_IFTYPE_MONITOR)
1214 err = -EINVAL; 1313 return -EINVAL;
1215 goto unlock;
1216 }
1217 err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS], 1314 err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS],
1218 &_flags); 1315 &_flags);
1219 if (err) 1316 if (err)
1220 goto unlock; 1317 return err;
1221 1318
1222 flags = &_flags; 1319 flags = &_flags;
1223 change = true; 1320 change = true;
@@ -1231,17 +1328,12 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info)
1231 if (!err && params.use_4addr != -1) 1328 if (!err && params.use_4addr != -1)
1232 dev->ieee80211_ptr->use_4addr = params.use_4addr; 1329 dev->ieee80211_ptr->use_4addr = params.use_4addr;
1233 1330
1234 unlock:
1235 dev_put(dev);
1236 cfg80211_unlock_rdev(rdev);
1237 unlock_rtnl:
1238 rtnl_unlock();
1239 return err; 1331 return err;
1240} 1332}
1241 1333
1242static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) 1334static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
1243{ 1335{
1244 struct cfg80211_registered_device *rdev; 1336 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1245 struct vif_params params; 1337 struct vif_params params;
1246 int err; 1338 int err;
1247 enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; 1339 enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
@@ -1258,19 +1350,9 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
1258 return -EINVAL; 1350 return -EINVAL;
1259 } 1351 }
1260 1352
1261 rtnl_lock();
1262
1263 rdev = cfg80211_get_dev_from_info(info);
1264 if (IS_ERR(rdev)) {
1265 err = PTR_ERR(rdev);
1266 goto unlock_rtnl;
1267 }
1268
1269 if (!rdev->ops->add_virtual_intf || 1353 if (!rdev->ops->add_virtual_intf ||
1270 !(rdev->wiphy.interface_modes & (1 << type))) { 1354 !(rdev->wiphy.interface_modes & (1 << type)))
1271 err = -EOPNOTSUPP; 1355 return -EOPNOTSUPP;
1272 goto unlock;
1273 }
1274 1356
1275 if (type == NL80211_IFTYPE_MESH_POINT && 1357 if (type == NL80211_IFTYPE_MESH_POINT &&
1276 info->attrs[NL80211_ATTR_MESH_ID]) { 1358 info->attrs[NL80211_ATTR_MESH_ID]) {
@@ -1282,7 +1364,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
1282 params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]); 1364 params.use_4addr = !!nla_get_u8(info->attrs[NL80211_ATTR_4ADDR]);
1283 err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type); 1365 err = nl80211_valid_4addr(rdev, NULL, params.use_4addr, type);
1284 if (err) 1366 if (err)
1285 goto unlock; 1367 return err;
1286 } 1368 }
1287 1369
1288 err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? 1370 err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ?
@@ -1292,38 +1374,18 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info)
1292 nla_data(info->attrs[NL80211_ATTR_IFNAME]), 1374 nla_data(info->attrs[NL80211_ATTR_IFNAME]),
1293 type, err ? NULL : &flags, &params); 1375 type, err ? NULL : &flags, &params);
1294 1376
1295 unlock:
1296 cfg80211_unlock_rdev(rdev);
1297 unlock_rtnl:
1298 rtnl_unlock();
1299 return err; 1377 return err;
1300} 1378}
1301 1379
1302static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) 1380static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
1303{ 1381{
1304 struct cfg80211_registered_device *rdev; 1382 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1305 int err; 1383 struct net_device *dev = info->user_ptr[1];
1306 struct net_device *dev;
1307 1384
1308 rtnl_lock(); 1385 if (!rdev->ops->del_virtual_intf)
1309 1386 return -EOPNOTSUPP;
1310 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
1311 if (err)
1312 goto unlock_rtnl;
1313
1314 if (!rdev->ops->del_virtual_intf) {
1315 err = -EOPNOTSUPP;
1316 goto out;
1317 }
1318
1319 err = rdev->ops->del_virtual_intf(&rdev->wiphy, dev);
1320 1387
1321 out: 1388 return rdev->ops->del_virtual_intf(&rdev->wiphy, dev);
1322 cfg80211_unlock_rdev(rdev);
1323 dev_put(dev);
1324 unlock_rtnl:
1325 rtnl_unlock();
1326 return err;
1327} 1389}
1328 1390
1329struct get_key_cookie { 1391struct get_key_cookie {
@@ -1376,11 +1438,12 @@ static void get_key_callback(void *c, struct key_params *params)
1376 1438
1377static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) 1439static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
1378{ 1440{
1379 struct cfg80211_registered_device *rdev; 1441 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1380 int err; 1442 int err;
1381 struct net_device *dev; 1443 struct net_device *dev = info->user_ptr[1];
1382 u8 key_idx = 0; 1444 u8 key_idx = 0;
1383 u8 *mac_addr = NULL; 1445 const u8 *mac_addr = NULL;
1446 bool pairwise;
1384 struct get_key_cookie cookie = { 1447 struct get_key_cookie cookie = {
1385 .error = 0, 1448 .error = 0,
1386 }; 1449 };
@@ -1396,30 +1459,28 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
1396 if (info->attrs[NL80211_ATTR_MAC]) 1459 if (info->attrs[NL80211_ATTR_MAC])
1397 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 1460 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
1398 1461
1399 rtnl_lock(); 1462 pairwise = !!mac_addr;
1400 1463 if (info->attrs[NL80211_ATTR_KEY_TYPE]) {
1401 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 1464 u32 kt = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]);
1402 if (err) 1465 if (kt >= NUM_NL80211_KEYTYPES)
1403 goto unlock_rtnl; 1466 return -EINVAL;
1404 1467 if (kt != NL80211_KEYTYPE_GROUP &&
1405 if (!rdev->ops->get_key) { 1468 kt != NL80211_KEYTYPE_PAIRWISE)
1406 err = -EOPNOTSUPP; 1469 return -EINVAL;
1407 goto out; 1470 pairwise = kt == NL80211_KEYTYPE_PAIRWISE;
1408 } 1471 }
1409 1472
1473 if (!rdev->ops->get_key)
1474 return -EOPNOTSUPP;
1475
1410 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1476 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1411 if (!msg) { 1477 if (!msg)
1412 err = -ENOMEM; 1478 return -ENOMEM;
1413 goto out;
1414 }
1415 1479
1416 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 1480 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
1417 NL80211_CMD_NEW_KEY); 1481 NL80211_CMD_NEW_KEY);
1418 1482 if (IS_ERR(hdr))
1419 if (IS_ERR(hdr)) { 1483 return PTR_ERR(hdr);
1420 err = PTR_ERR(hdr);
1421 goto free_msg;
1422 }
1423 1484
1424 cookie.msg = msg; 1485 cookie.msg = msg;
1425 cookie.idx = key_idx; 1486 cookie.idx = key_idx;
@@ -1429,8 +1490,12 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
1429 if (mac_addr) 1490 if (mac_addr)
1430 NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr); 1491 NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr);
1431 1492
1432 err = rdev->ops->get_key(&rdev->wiphy, dev, key_idx, mac_addr, 1493 if (pairwise && mac_addr &&
1433 &cookie, get_key_callback); 1494 !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
1495 return -ENOENT;
1496
1497 err = rdev->ops->get_key(&rdev->wiphy, dev, key_idx, pairwise,
1498 mac_addr, &cookie, get_key_callback);
1434 1499
1435 if (err) 1500 if (err)
1436 goto free_msg; 1501 goto free_msg;
@@ -1439,28 +1504,21 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
1439 goto nla_put_failure; 1504 goto nla_put_failure;
1440 1505
1441 genlmsg_end(msg, hdr); 1506 genlmsg_end(msg, hdr);
1442 err = genlmsg_reply(msg, info); 1507 return genlmsg_reply(msg, info);
1443 goto out;
1444 1508
1445 nla_put_failure: 1509 nla_put_failure:
1446 err = -ENOBUFS; 1510 err = -ENOBUFS;
1447 free_msg: 1511 free_msg:
1448 nlmsg_free(msg); 1512 nlmsg_free(msg);
1449 out:
1450 cfg80211_unlock_rdev(rdev);
1451 dev_put(dev);
1452 unlock_rtnl:
1453 rtnl_unlock();
1454
1455 return err; 1513 return err;
1456} 1514}
1457 1515
1458static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) 1516static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
1459{ 1517{
1460 struct cfg80211_registered_device *rdev; 1518 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1461 struct key_parse key; 1519 struct key_parse key;
1462 int err; 1520 int err;
1463 struct net_device *dev; 1521 struct net_device *dev = info->user_ptr[1];
1464 int (*func)(struct wiphy *wiphy, struct net_device *netdev, 1522 int (*func)(struct wiphy *wiphy, struct net_device *netdev,
1465 u8 key_index); 1523 u8 key_index);
1466 1524
@@ -1475,21 +1533,13 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
1475 if (!key.def && !key.defmgmt) 1533 if (!key.def && !key.defmgmt)
1476 return -EINVAL; 1534 return -EINVAL;
1477 1535
1478 rtnl_lock();
1479
1480 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
1481 if (err)
1482 goto unlock_rtnl;
1483
1484 if (key.def) 1536 if (key.def)
1485 func = rdev->ops->set_default_key; 1537 func = rdev->ops->set_default_key;
1486 else 1538 else
1487 func = rdev->ops->set_default_mgmt_key; 1539 func = rdev->ops->set_default_mgmt_key;
1488 1540
1489 if (!func) { 1541 if (!func)
1490 err = -EOPNOTSUPP; 1542 return -EOPNOTSUPP;
1491 goto out;
1492 }
1493 1543
1494 wdev_lock(dev->ieee80211_ptr); 1544 wdev_lock(dev->ieee80211_ptr);
1495 err = nl80211_key_allowed(dev->ieee80211_ptr); 1545 err = nl80211_key_allowed(dev->ieee80211_ptr);
@@ -1506,23 +1556,16 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info)
1506#endif 1556#endif
1507 wdev_unlock(dev->ieee80211_ptr); 1557 wdev_unlock(dev->ieee80211_ptr);
1508 1558
1509 out:
1510 cfg80211_unlock_rdev(rdev);
1511 dev_put(dev);
1512
1513 unlock_rtnl:
1514 rtnl_unlock();
1515
1516 return err; 1559 return err;
1517} 1560}
1518 1561
1519static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) 1562static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
1520{ 1563{
1521 struct cfg80211_registered_device *rdev; 1564 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1522 int err; 1565 int err;
1523 struct net_device *dev; 1566 struct net_device *dev = info->user_ptr[1];
1524 struct key_parse key; 1567 struct key_parse key;
1525 u8 *mac_addr = NULL; 1568 const u8 *mac_addr = NULL;
1526 1569
1527 err = nl80211_parse_key(info, &key); 1570 err = nl80211_parse_key(info, &key);
1528 if (err) 1571 if (err)
@@ -1534,43 +1577,42 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info)
1534 if (info->attrs[NL80211_ATTR_MAC]) 1577 if (info->attrs[NL80211_ATTR_MAC])
1535 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 1578 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
1536 1579
1537 rtnl_lock(); 1580 if (key.type == -1) {
1581 if (mac_addr)
1582 key.type = NL80211_KEYTYPE_PAIRWISE;
1583 else
1584 key.type = NL80211_KEYTYPE_GROUP;
1585 }
1538 1586
1539 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 1587 /* for now */
1540 if (err) 1588 if (key.type != NL80211_KEYTYPE_PAIRWISE &&
1541 goto unlock_rtnl; 1589 key.type != NL80211_KEYTYPE_GROUP)
1590 return -EINVAL;
1542 1591
1543 if (!rdev->ops->add_key) { 1592 if (!rdev->ops->add_key)
1544 err = -EOPNOTSUPP; 1593 return -EOPNOTSUPP;
1545 goto out;
1546 }
1547 1594
1548 if (cfg80211_validate_key_settings(rdev, &key.p, key.idx, mac_addr)) { 1595 if (cfg80211_validate_key_settings(rdev, &key.p, key.idx,
1549 err = -EINVAL; 1596 key.type == NL80211_KEYTYPE_PAIRWISE,
1550 goto out; 1597 mac_addr))
1551 } 1598 return -EINVAL;
1552 1599
1553 wdev_lock(dev->ieee80211_ptr); 1600 wdev_lock(dev->ieee80211_ptr);
1554 err = nl80211_key_allowed(dev->ieee80211_ptr); 1601 err = nl80211_key_allowed(dev->ieee80211_ptr);
1555 if (!err) 1602 if (!err)
1556 err = rdev->ops->add_key(&rdev->wiphy, dev, key.idx, 1603 err = rdev->ops->add_key(&rdev->wiphy, dev, key.idx,
1604 key.type == NL80211_KEYTYPE_PAIRWISE,
1557 mac_addr, &key.p); 1605 mac_addr, &key.p);
1558 wdev_unlock(dev->ieee80211_ptr); 1606 wdev_unlock(dev->ieee80211_ptr);
1559 1607
1560 out:
1561 cfg80211_unlock_rdev(rdev);
1562 dev_put(dev);
1563 unlock_rtnl:
1564 rtnl_unlock();
1565
1566 return err; 1608 return err;
1567} 1609}
1568 1610
1569static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) 1611static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
1570{ 1612{
1571 struct cfg80211_registered_device *rdev; 1613 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1572 int err; 1614 int err;
1573 struct net_device *dev; 1615 struct net_device *dev = info->user_ptr[1];
1574 u8 *mac_addr = NULL; 1616 u8 *mac_addr = NULL;
1575 struct key_parse key; 1617 struct key_parse key;
1576 1618
@@ -1581,21 +1623,32 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
1581 if (info->attrs[NL80211_ATTR_MAC]) 1623 if (info->attrs[NL80211_ATTR_MAC])
1582 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 1624 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
1583 1625
1584 rtnl_lock(); 1626 if (key.type == -1) {
1627 if (mac_addr)
1628 key.type = NL80211_KEYTYPE_PAIRWISE;
1629 else
1630 key.type = NL80211_KEYTYPE_GROUP;
1631 }
1585 1632
1586 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 1633 /* for now */
1587 if (err) 1634 if (key.type != NL80211_KEYTYPE_PAIRWISE &&
1588 goto unlock_rtnl; 1635 key.type != NL80211_KEYTYPE_GROUP)
1636 return -EINVAL;
1589 1637
1590 if (!rdev->ops->del_key) { 1638 if (!rdev->ops->del_key)
1591 err = -EOPNOTSUPP; 1639 return -EOPNOTSUPP;
1592 goto out;
1593 }
1594 1640
1595 wdev_lock(dev->ieee80211_ptr); 1641 wdev_lock(dev->ieee80211_ptr);
1596 err = nl80211_key_allowed(dev->ieee80211_ptr); 1642 err = nl80211_key_allowed(dev->ieee80211_ptr);
1643
1644 if (key.type == NL80211_KEYTYPE_PAIRWISE && mac_addr &&
1645 !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
1646 err = -ENOENT;
1647
1597 if (!err) 1648 if (!err)
1598 err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx, mac_addr); 1649 err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx,
1650 key.type == NL80211_KEYTYPE_PAIRWISE,
1651 mac_addr);
1599 1652
1600#ifdef CONFIG_CFG80211_WEXT 1653#ifdef CONFIG_CFG80211_WEXT
1601 if (!err) { 1654 if (!err) {
@@ -1607,13 +1660,6 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info)
1607#endif 1660#endif
1608 wdev_unlock(dev->ieee80211_ptr); 1661 wdev_unlock(dev->ieee80211_ptr);
1609 1662
1610 out:
1611 cfg80211_unlock_rdev(rdev);
1612 dev_put(dev);
1613
1614 unlock_rtnl:
1615 rtnl_unlock();
1616
1617 return err; 1663 return err;
1618} 1664}
1619 1665
@@ -1621,35 +1667,25 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1621{ 1667{
1622 int (*call)(struct wiphy *wiphy, struct net_device *dev, 1668 int (*call)(struct wiphy *wiphy, struct net_device *dev,
1623 struct beacon_parameters *info); 1669 struct beacon_parameters *info);
1624 struct cfg80211_registered_device *rdev; 1670 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1625 int err; 1671 struct net_device *dev = info->user_ptr[1];
1626 struct net_device *dev;
1627 struct beacon_parameters params; 1672 struct beacon_parameters params;
1628 int haveinfo = 0; 1673 int haveinfo = 0;
1629 1674
1630 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_BEACON_TAIL])) 1675 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_BEACON_TAIL]))
1631 return -EINVAL; 1676 return -EINVAL;
1632 1677
1633 rtnl_lock(); 1678 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
1634 1679 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
1635 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 1680 return -EOPNOTSUPP;
1636 if (err)
1637 goto unlock_rtnl;
1638
1639 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) {
1640 err = -EOPNOTSUPP;
1641 goto out;
1642 }
1643 1681
1644 switch (info->genlhdr->cmd) { 1682 switch (info->genlhdr->cmd) {
1645 case NL80211_CMD_NEW_BEACON: 1683 case NL80211_CMD_NEW_BEACON:
1646 /* these are required for NEW_BEACON */ 1684 /* these are required for NEW_BEACON */
1647 if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] || 1685 if (!info->attrs[NL80211_ATTR_BEACON_INTERVAL] ||
1648 !info->attrs[NL80211_ATTR_DTIM_PERIOD] || 1686 !info->attrs[NL80211_ATTR_DTIM_PERIOD] ||
1649 !info->attrs[NL80211_ATTR_BEACON_HEAD]) { 1687 !info->attrs[NL80211_ATTR_BEACON_HEAD])
1650 err = -EINVAL; 1688 return -EINVAL;
1651 goto out;
1652 }
1653 1689
1654 call = rdev->ops->add_beacon; 1690 call = rdev->ops->add_beacon;
1655 break; 1691 break;
@@ -1658,14 +1694,11 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1658 break; 1694 break;
1659 default: 1695 default:
1660 WARN_ON(1); 1696 WARN_ON(1);
1661 err = -EOPNOTSUPP; 1697 return -EOPNOTSUPP;
1662 goto out;
1663 } 1698 }
1664 1699
1665 if (!call) { 1700 if (!call)
1666 err = -EOPNOTSUPP; 1701 return -EOPNOTSUPP;
1667 goto out;
1668 }
1669 1702
1670 memset(&params, 0, sizeof(params)); 1703 memset(&params, 0, sizeof(params));
1671 1704
@@ -1695,52 +1728,25 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
1695 haveinfo = 1; 1728 haveinfo = 1;
1696 } 1729 }
1697 1730
1698 if (!haveinfo) { 1731 if (!haveinfo)
1699 err = -EINVAL; 1732 return -EINVAL;
1700 goto out;
1701 }
1702
1703 err = call(&rdev->wiphy, dev, &params);
1704
1705 out:
1706 cfg80211_unlock_rdev(rdev);
1707 dev_put(dev);
1708 unlock_rtnl:
1709 rtnl_unlock();
1710 1733
1711 return err; 1734 return call(&rdev->wiphy, dev, &params);
1712} 1735}
1713 1736
1714static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info) 1737static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info)
1715{ 1738{
1716 struct cfg80211_registered_device *rdev; 1739 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1717 int err; 1740 struct net_device *dev = info->user_ptr[1];
1718 struct net_device *dev;
1719 1741
1720 rtnl_lock(); 1742 if (!rdev->ops->del_beacon)
1721 1743 return -EOPNOTSUPP;
1722 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
1723 if (err)
1724 goto unlock_rtnl;
1725
1726 if (!rdev->ops->del_beacon) {
1727 err = -EOPNOTSUPP;
1728 goto out;
1729 }
1730
1731 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) {
1732 err = -EOPNOTSUPP;
1733 goto out;
1734 }
1735 err = rdev->ops->del_beacon(&rdev->wiphy, dev);
1736 1744
1737 out: 1745 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
1738 cfg80211_unlock_rdev(rdev); 1746 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
1739 dev_put(dev); 1747 return -EOPNOTSUPP;
1740 unlock_rtnl:
1741 rtnl_unlock();
1742 1748
1743 return err; 1749 return rdev->ops->del_beacon(&rdev->wiphy, dev);
1744} 1750}
1745 1751
1746static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = { 1752static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = {
@@ -1861,6 +1867,12 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
1861 if (sinfo->filled & STATION_INFO_TX_PACKETS) 1867 if (sinfo->filled & STATION_INFO_TX_PACKETS)
1862 NLA_PUT_U32(msg, NL80211_STA_INFO_TX_PACKETS, 1868 NLA_PUT_U32(msg, NL80211_STA_INFO_TX_PACKETS,
1863 sinfo->tx_packets); 1869 sinfo->tx_packets);
1870 if (sinfo->filled & STATION_INFO_TX_RETRIES)
1871 NLA_PUT_U32(msg, NL80211_STA_INFO_TX_RETRIES,
1872 sinfo->tx_retries);
1873 if (sinfo->filled & STATION_INFO_TX_FAILED)
1874 NLA_PUT_U32(msg, NL80211_STA_INFO_TX_FAILED,
1875 sinfo->tx_failed);
1864 nla_nest_end(msg, sinfoattr); 1876 nla_nest_end(msg, sinfoattr);
1865 1877
1866 return genlmsg_end(msg, hdr); 1878 return genlmsg_end(msg, hdr);
@@ -1877,28 +1889,12 @@ static int nl80211_dump_station(struct sk_buff *skb,
1877 struct cfg80211_registered_device *dev; 1889 struct cfg80211_registered_device *dev;
1878 struct net_device *netdev; 1890 struct net_device *netdev;
1879 u8 mac_addr[ETH_ALEN]; 1891 u8 mac_addr[ETH_ALEN];
1880 int ifidx = cb->args[0];
1881 int sta_idx = cb->args[1]; 1892 int sta_idx = cb->args[1];
1882 int err; 1893 int err;
1883 1894
1884 if (!ifidx) 1895 err = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev);
1885 ifidx = nl80211_get_ifidx(cb); 1896 if (err)
1886 if (ifidx < 0) 1897 return err;
1887 return ifidx;
1888
1889 rtnl_lock();
1890
1891 netdev = __dev_get_by_index(sock_net(skb->sk), ifidx);
1892 if (!netdev) {
1893 err = -ENODEV;
1894 goto out_rtnl;
1895 }
1896
1897 dev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
1898 if (IS_ERR(dev)) {
1899 err = PTR_ERR(dev);
1900 goto out_rtnl;
1901 }
1902 1898
1903 if (!dev->ops->dump_station) { 1899 if (!dev->ops->dump_station) {
1904 err = -EOPNOTSUPP; 1900 err = -EOPNOTSUPP;
@@ -1928,21 +1924,19 @@ static int nl80211_dump_station(struct sk_buff *skb,
1928 cb->args[1] = sta_idx; 1924 cb->args[1] = sta_idx;
1929 err = skb->len; 1925 err = skb->len;
1930 out_err: 1926 out_err:
1931 cfg80211_unlock_rdev(dev); 1927 nl80211_finish_netdev_dump(dev);
1932 out_rtnl:
1933 rtnl_unlock();
1934 1928
1935 return err; 1929 return err;
1936} 1930}
1937 1931
1938static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) 1932static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
1939{ 1933{
1940 struct cfg80211_registered_device *rdev; 1934 struct cfg80211_registered_device *rdev = info->user_ptr[0];
1941 int err; 1935 struct net_device *dev = info->user_ptr[1];
1942 struct net_device *dev;
1943 struct station_info sinfo; 1936 struct station_info sinfo;
1944 struct sk_buff *msg; 1937 struct sk_buff *msg;
1945 u8 *mac_addr = NULL; 1938 u8 *mac_addr = NULL;
1939 int err;
1946 1940
1947 memset(&sinfo, 0, sizeof(sinfo)); 1941 memset(&sinfo, 0, sizeof(sinfo));
1948 1942
@@ -1951,41 +1945,24 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
1951 1945
1952 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 1946 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
1953 1947
1954 rtnl_lock(); 1948 if (!rdev->ops->get_station)
1955 1949 return -EOPNOTSUPP;
1956 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
1957 if (err)
1958 goto out_rtnl;
1959
1960 if (!rdev->ops->get_station) {
1961 err = -EOPNOTSUPP;
1962 goto out;
1963 }
1964 1950
1965 err = rdev->ops->get_station(&rdev->wiphy, dev, mac_addr, &sinfo); 1951 err = rdev->ops->get_station(&rdev->wiphy, dev, mac_addr, &sinfo);
1966 if (err) 1952 if (err)
1967 goto out; 1953 return err;
1968 1954
1969 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 1955 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1970 if (!msg) 1956 if (!msg)
1971 goto out; 1957 return -ENOMEM;
1972 1958
1973 if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0, 1959 if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0,
1974 dev, mac_addr, &sinfo) < 0) 1960 dev, mac_addr, &sinfo) < 0) {
1975 goto out_free; 1961 nlmsg_free(msg);
1976 1962 return -ENOBUFS;
1977 err = genlmsg_reply(msg, info); 1963 }
1978 goto out;
1979
1980 out_free:
1981 nlmsg_free(msg);
1982 out:
1983 cfg80211_unlock_rdev(rdev);
1984 dev_put(dev);
1985 out_rtnl:
1986 rtnl_unlock();
1987 1964
1988 return err; 1965 return genlmsg_reply(msg, info);
1989} 1966}
1990 1967
1991/* 1968/*
@@ -2015,9 +1992,9 @@ static int get_vlan(struct genl_info *info,
2015 1992
2016static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) 1993static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
2017{ 1994{
2018 struct cfg80211_registered_device *rdev; 1995 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2019 int err; 1996 int err;
2020 struct net_device *dev; 1997 struct net_device *dev = info->user_ptr[1];
2021 struct station_parameters params; 1998 struct station_parameters params;
2022 u8 *mac_addr = NULL; 1999 u8 *mac_addr = NULL;
2023 2000
@@ -2055,12 +2032,6 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
2055 params.plink_action = 2032 params.plink_action =
2056 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); 2033 nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
2057 2034
2058 rtnl_lock();
2059
2060 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2061 if (err)
2062 goto out_rtnl;
2063
2064 err = get_vlan(info, rdev, &params.vlan); 2035 err = get_vlan(info, rdev, &params.vlan);
2065 if (err) 2036 if (err)
2066 goto out; 2037 goto out;
@@ -2071,10 +2042,12 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
2071 switch (dev->ieee80211_ptr->iftype) { 2042 switch (dev->ieee80211_ptr->iftype) {
2072 case NL80211_IFTYPE_AP: 2043 case NL80211_IFTYPE_AP:
2073 case NL80211_IFTYPE_AP_VLAN: 2044 case NL80211_IFTYPE_AP_VLAN:
2045 case NL80211_IFTYPE_P2P_GO:
2074 /* disallow mesh-specific things */ 2046 /* disallow mesh-specific things */
2075 if (params.plink_action) 2047 if (params.plink_action)
2076 err = -EINVAL; 2048 err = -EINVAL;
2077 break; 2049 break;
2050 case NL80211_IFTYPE_P2P_CLIENT:
2078 case NL80211_IFTYPE_STATION: 2051 case NL80211_IFTYPE_STATION:
2079 /* disallow everything but AUTHORIZED flag */ 2052 /* disallow everything but AUTHORIZED flag */
2080 if (params.plink_action) 2053 if (params.plink_action)
@@ -2120,19 +2093,15 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
2120 out: 2093 out:
2121 if (params.vlan) 2094 if (params.vlan)
2122 dev_put(params.vlan); 2095 dev_put(params.vlan);
2123 cfg80211_unlock_rdev(rdev);
2124 dev_put(dev);
2125 out_rtnl:
2126 rtnl_unlock();
2127 2096
2128 return err; 2097 return err;
2129} 2098}
2130 2099
2131static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) 2100static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
2132{ 2101{
2133 struct cfg80211_registered_device *rdev; 2102 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2134 int err; 2103 int err;
2135 struct net_device *dev; 2104 struct net_device *dev = info->user_ptr[1];
2136 struct station_parameters params; 2105 struct station_parameters params;
2137 u8 *mac_addr = NULL; 2106 u8 *mac_addr = NULL;
2138 2107
@@ -2169,17 +2138,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
2169 if (parse_station_flags(info, &params)) 2138 if (parse_station_flags(info, &params))
2170 return -EINVAL; 2139 return -EINVAL;
2171 2140
2172 rtnl_lock();
2173
2174 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2175 if (err)
2176 goto out_rtnl;
2177
2178 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && 2141 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2179 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) { 2142 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
2180 err = -EINVAL; 2143 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
2181 goto out; 2144 return -EINVAL;
2182 }
2183 2145
2184 err = get_vlan(info, rdev, &params.vlan); 2146 err = get_vlan(info, rdev, &params.vlan);
2185 if (err) 2147 if (err)
@@ -2193,61 +2155,33 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
2193 goto out; 2155 goto out;
2194 } 2156 }
2195 2157
2196 if (!netif_running(dev)) {
2197 err = -ENETDOWN;
2198 goto out;
2199 }
2200
2201 err = rdev->ops->add_station(&rdev->wiphy, dev, mac_addr, &params); 2158 err = rdev->ops->add_station(&rdev->wiphy, dev, mac_addr, &params);
2202 2159
2203 out: 2160 out:
2204 if (params.vlan) 2161 if (params.vlan)
2205 dev_put(params.vlan); 2162 dev_put(params.vlan);
2206 cfg80211_unlock_rdev(rdev);
2207 dev_put(dev);
2208 out_rtnl:
2209 rtnl_unlock();
2210
2211 return err; 2163 return err;
2212} 2164}
2213 2165
2214static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) 2166static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
2215{ 2167{
2216 struct cfg80211_registered_device *rdev; 2168 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2217 int err; 2169 struct net_device *dev = info->user_ptr[1];
2218 struct net_device *dev;
2219 u8 *mac_addr = NULL; 2170 u8 *mac_addr = NULL;
2220 2171
2221 if (info->attrs[NL80211_ATTR_MAC]) 2172 if (info->attrs[NL80211_ATTR_MAC])
2222 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); 2173 mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
2223 2174
2224 rtnl_lock();
2225
2226 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2227 if (err)
2228 goto out_rtnl;
2229
2230 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP && 2175 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2231 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN && 2176 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
2232 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { 2177 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT &&
2233 err = -EINVAL; 2178 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
2234 goto out; 2179 return -EINVAL;
2235 }
2236
2237 if (!rdev->ops->del_station) {
2238 err = -EOPNOTSUPP;
2239 goto out;
2240 }
2241
2242 err = rdev->ops->del_station(&rdev->wiphy, dev, mac_addr);
2243 2180
2244 out: 2181 if (!rdev->ops->del_station)
2245 cfg80211_unlock_rdev(rdev); 2182 return -EOPNOTSUPP;
2246 dev_put(dev);
2247 out_rtnl:
2248 rtnl_unlock();
2249 2183
2250 return err; 2184 return rdev->ops->del_station(&rdev->wiphy, dev, mac_addr);
2251} 2185}
2252 2186
2253static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, 2187static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq,
@@ -2310,28 +2244,12 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
2310 struct net_device *netdev; 2244 struct net_device *netdev;
2311 u8 dst[ETH_ALEN]; 2245 u8 dst[ETH_ALEN];
2312 u8 next_hop[ETH_ALEN]; 2246 u8 next_hop[ETH_ALEN];
2313 int ifidx = cb->args[0];
2314 int path_idx = cb->args[1]; 2247 int path_idx = cb->args[1];
2315 int err; 2248 int err;
2316 2249
2317 if (!ifidx) 2250 err = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev);
2318 ifidx = nl80211_get_ifidx(cb); 2251 if (err)
2319 if (ifidx < 0) 2252 return err;
2320 return ifidx;
2321
2322 rtnl_lock();
2323
2324 netdev = __dev_get_by_index(sock_net(skb->sk), ifidx);
2325 if (!netdev) {
2326 err = -ENODEV;
2327 goto out_rtnl;
2328 }
2329
2330 dev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
2331 if (IS_ERR(dev)) {
2332 err = PTR_ERR(dev);
2333 goto out_rtnl;
2334 }
2335 2253
2336 if (!dev->ops->dump_mpath) { 2254 if (!dev->ops->dump_mpath) {
2337 err = -EOPNOTSUPP; 2255 err = -EOPNOTSUPP;
@@ -2365,18 +2283,15 @@ static int nl80211_dump_mpath(struct sk_buff *skb,
2365 cb->args[1] = path_idx; 2283 cb->args[1] = path_idx;
2366 err = skb->len; 2284 err = skb->len;
2367 out_err: 2285 out_err:
2368 cfg80211_unlock_rdev(dev); 2286 nl80211_finish_netdev_dump(dev);
2369 out_rtnl:
2370 rtnl_unlock();
2371
2372 return err; 2287 return err;
2373} 2288}
2374 2289
2375static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) 2290static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info)
2376{ 2291{
2377 struct cfg80211_registered_device *rdev; 2292 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2378 int err; 2293 int err;
2379 struct net_device *dev; 2294 struct net_device *dev = info->user_ptr[1];
2380 struct mpath_info pinfo; 2295 struct mpath_info pinfo;
2381 struct sk_buff *msg; 2296 struct sk_buff *msg;
2382 u8 *dst = NULL; 2297 u8 *dst = NULL;
@@ -2389,53 +2304,33 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info)
2389 2304
2390 dst = nla_data(info->attrs[NL80211_ATTR_MAC]); 2305 dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
2391 2306
2392 rtnl_lock(); 2307 if (!rdev->ops->get_mpath)
2393 2308 return -EOPNOTSUPP;
2394 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2395 if (err)
2396 goto out_rtnl;
2397
2398 if (!rdev->ops->get_mpath) {
2399 err = -EOPNOTSUPP;
2400 goto out;
2401 }
2402 2309
2403 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { 2310 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
2404 err = -EOPNOTSUPP; 2311 return -EOPNOTSUPP;
2405 goto out;
2406 }
2407 2312
2408 err = rdev->ops->get_mpath(&rdev->wiphy, dev, dst, next_hop, &pinfo); 2313 err = rdev->ops->get_mpath(&rdev->wiphy, dev, dst, next_hop, &pinfo);
2409 if (err) 2314 if (err)
2410 goto out; 2315 return err;
2411 2316
2412 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2317 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2413 if (!msg) 2318 if (!msg)
2414 goto out; 2319 return -ENOMEM;
2415 2320
2416 if (nl80211_send_mpath(msg, info->snd_pid, info->snd_seq, 0, 2321 if (nl80211_send_mpath(msg, info->snd_pid, info->snd_seq, 0,
2417 dev, dst, next_hop, &pinfo) < 0) 2322 dev, dst, next_hop, &pinfo) < 0) {
2418 goto out_free; 2323 nlmsg_free(msg);
2419 2324 return -ENOBUFS;
2420 err = genlmsg_reply(msg, info); 2325 }
2421 goto out;
2422
2423 out_free:
2424 nlmsg_free(msg);
2425 out:
2426 cfg80211_unlock_rdev(rdev);
2427 dev_put(dev);
2428 out_rtnl:
2429 rtnl_unlock();
2430 2326
2431 return err; 2327 return genlmsg_reply(msg, info);
2432} 2328}
2433 2329
2434static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info) 2330static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info)
2435{ 2331{
2436 struct cfg80211_registered_device *rdev; 2332 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2437 int err; 2333 struct net_device *dev = info->user_ptr[1];
2438 struct net_device *dev;
2439 u8 *dst = NULL; 2334 u8 *dst = NULL;
2440 u8 *next_hop = NULL; 2335 u8 *next_hop = NULL;
2441 2336
@@ -2448,42 +2343,19 @@ static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info)
2448 dst = nla_data(info->attrs[NL80211_ATTR_MAC]); 2343 dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
2449 next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); 2344 next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]);
2450 2345
2451 rtnl_lock(); 2346 if (!rdev->ops->change_mpath)
2452 2347 return -EOPNOTSUPP;
2453 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2454 if (err)
2455 goto out_rtnl;
2456
2457 if (!rdev->ops->change_mpath) {
2458 err = -EOPNOTSUPP;
2459 goto out;
2460 }
2461
2462 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
2463 err = -EOPNOTSUPP;
2464 goto out;
2465 }
2466
2467 if (!netif_running(dev)) {
2468 err = -ENETDOWN;
2469 goto out;
2470 }
2471
2472 err = rdev->ops->change_mpath(&rdev->wiphy, dev, dst, next_hop);
2473 2348
2474 out: 2349 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
2475 cfg80211_unlock_rdev(rdev); 2350 return -EOPNOTSUPP;
2476 dev_put(dev);
2477 out_rtnl:
2478 rtnl_unlock();
2479 2351
2480 return err; 2352 return rdev->ops->change_mpath(&rdev->wiphy, dev, dst, next_hop);
2481} 2353}
2354
2482static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info) 2355static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info)
2483{ 2356{
2484 struct cfg80211_registered_device *rdev; 2357 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2485 int err; 2358 struct net_device *dev = info->user_ptr[1];
2486 struct net_device *dev;
2487 u8 *dst = NULL; 2359 u8 *dst = NULL;
2488 u8 *next_hop = NULL; 2360 u8 *next_hop = NULL;
2489 2361
@@ -2496,75 +2368,34 @@ static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info)
2496 dst = nla_data(info->attrs[NL80211_ATTR_MAC]); 2368 dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
2497 next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); 2369 next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]);
2498 2370
2499 rtnl_lock(); 2371 if (!rdev->ops->add_mpath)
2500 2372 return -EOPNOTSUPP;
2501 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2502 if (err)
2503 goto out_rtnl;
2504
2505 if (!rdev->ops->add_mpath) {
2506 err = -EOPNOTSUPP;
2507 goto out;
2508 }
2509
2510 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
2511 err = -EOPNOTSUPP;
2512 goto out;
2513 }
2514
2515 if (!netif_running(dev)) {
2516 err = -ENETDOWN;
2517 goto out;
2518 }
2519
2520 err = rdev->ops->add_mpath(&rdev->wiphy, dev, dst, next_hop);
2521 2373
2522 out: 2374 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
2523 cfg80211_unlock_rdev(rdev); 2375 return -EOPNOTSUPP;
2524 dev_put(dev);
2525 out_rtnl:
2526 rtnl_unlock();
2527 2376
2528 return err; 2377 return rdev->ops->add_mpath(&rdev->wiphy, dev, dst, next_hop);
2529} 2378}
2530 2379
2531static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) 2380static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info)
2532{ 2381{
2533 struct cfg80211_registered_device *rdev; 2382 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2534 int err; 2383 struct net_device *dev = info->user_ptr[1];
2535 struct net_device *dev;
2536 u8 *dst = NULL; 2384 u8 *dst = NULL;
2537 2385
2538 if (info->attrs[NL80211_ATTR_MAC]) 2386 if (info->attrs[NL80211_ATTR_MAC])
2539 dst = nla_data(info->attrs[NL80211_ATTR_MAC]); 2387 dst = nla_data(info->attrs[NL80211_ATTR_MAC]);
2540 2388
2541 rtnl_lock(); 2389 if (!rdev->ops->del_mpath)
2542 2390 return -EOPNOTSUPP;
2543 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2544 if (err)
2545 goto out_rtnl;
2546
2547 if (!rdev->ops->del_mpath) {
2548 err = -EOPNOTSUPP;
2549 goto out;
2550 }
2551
2552 err = rdev->ops->del_mpath(&rdev->wiphy, dev, dst);
2553
2554 out:
2555 cfg80211_unlock_rdev(rdev);
2556 dev_put(dev);
2557 out_rtnl:
2558 rtnl_unlock();
2559 2391
2560 return err; 2392 return rdev->ops->del_mpath(&rdev->wiphy, dev, dst);
2561} 2393}
2562 2394
2563static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) 2395static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
2564{ 2396{
2565 struct cfg80211_registered_device *rdev; 2397 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2566 int err; 2398 struct net_device *dev = info->user_ptr[1];
2567 struct net_device *dev;
2568 struct bss_parameters params; 2399 struct bss_parameters params;
2569 2400
2570 memset(&params, 0, sizeof(params)); 2401 memset(&params, 0, sizeof(params));
@@ -2592,31 +2423,14 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
2592 if (info->attrs[NL80211_ATTR_AP_ISOLATE]) 2423 if (info->attrs[NL80211_ATTR_AP_ISOLATE])
2593 params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]); 2424 params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]);
2594 2425
2595 rtnl_lock(); 2426 if (!rdev->ops->change_bss)
2596 2427 return -EOPNOTSUPP;
2597 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2598 if (err)
2599 goto out_rtnl;
2600
2601 if (!rdev->ops->change_bss) {
2602 err = -EOPNOTSUPP;
2603 goto out;
2604 }
2605
2606 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) {
2607 err = -EOPNOTSUPP;
2608 goto out;
2609 }
2610
2611 err = rdev->ops->change_bss(&rdev->wiphy, dev, &params);
2612 2428
2613 out: 2429 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
2614 cfg80211_unlock_rdev(rdev); 2430 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
2615 dev_put(dev); 2431 return -EOPNOTSUPP;
2616 out_rtnl:
2617 rtnl_unlock();
2618 2432
2619 return err; 2433 return rdev->ops->change_bss(&rdev->wiphy, dev, &params);
2620} 2434}
2621 2435
2622static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { 2436static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = {
@@ -2695,37 +2509,26 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
2695static int nl80211_get_mesh_params(struct sk_buff *skb, 2509static int nl80211_get_mesh_params(struct sk_buff *skb,
2696 struct genl_info *info) 2510 struct genl_info *info)
2697{ 2511{
2698 struct cfg80211_registered_device *rdev; 2512 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2699 struct mesh_config cur_params; 2513 struct mesh_config cur_params;
2700 int err; 2514 int err;
2701 struct net_device *dev; 2515 struct net_device *dev = info->user_ptr[1];
2702 void *hdr; 2516 void *hdr;
2703 struct nlattr *pinfoattr; 2517 struct nlattr *pinfoattr;
2704 struct sk_buff *msg; 2518 struct sk_buff *msg;
2705 2519
2706 rtnl_lock(); 2520 if (!rdev->ops->get_mesh_params)
2707 2521 return -EOPNOTSUPP;
2708 /* Look up our device */
2709 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2710 if (err)
2711 goto out_rtnl;
2712
2713 if (!rdev->ops->get_mesh_params) {
2714 err = -EOPNOTSUPP;
2715 goto out;
2716 }
2717 2522
2718 /* Get the mesh params */ 2523 /* Get the mesh params */
2719 err = rdev->ops->get_mesh_params(&rdev->wiphy, dev, &cur_params); 2524 err = rdev->ops->get_mesh_params(&rdev->wiphy, dev, &cur_params);
2720 if (err) 2525 if (err)
2721 goto out; 2526 return err;
2722 2527
2723 /* Draw up a netlink message to send back */ 2528 /* Draw up a netlink message to send back */
2724 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 2529 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2725 if (!msg) { 2530 if (!msg)
2726 err = -ENOBUFS; 2531 return -ENOMEM;
2727 goto out;
2728 }
2729 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 2532 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
2730 NL80211_CMD_GET_MESH_PARAMS); 2533 NL80211_CMD_GET_MESH_PARAMS);
2731 if (!hdr) 2534 if (!hdr)
@@ -2764,21 +2567,12 @@ static int nl80211_get_mesh_params(struct sk_buff *skb,
2764 cur_params.dot11MeshHWMPRootMode); 2567 cur_params.dot11MeshHWMPRootMode);
2765 nla_nest_end(msg, pinfoattr); 2568 nla_nest_end(msg, pinfoattr);
2766 genlmsg_end(msg, hdr); 2569 genlmsg_end(msg, hdr);
2767 err = genlmsg_reply(msg, info); 2570 return genlmsg_reply(msg, info);
2768 goto out;
2769 2571
2770 nla_put_failure: 2572 nla_put_failure:
2771 genlmsg_cancel(msg, hdr); 2573 genlmsg_cancel(msg, hdr);
2772 nlmsg_free(msg); 2574 nlmsg_free(msg);
2773 err = -EMSGSIZE; 2575 return -ENOBUFS;
2774 out:
2775 /* Cleanup */
2776 cfg80211_unlock_rdev(rdev);
2777 dev_put(dev);
2778 out_rtnl:
2779 rtnl_unlock();
2780
2781 return err;
2782} 2576}
2783 2577
2784#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \ 2578#define FILL_IN_MESH_PARAM_IF_SET(table, cfg, param, mask, attr_num, nla_fn) \
@@ -2808,10 +2602,9 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A
2808 2602
2809static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) 2603static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
2810{ 2604{
2811 int err;
2812 u32 mask; 2605 u32 mask;
2813 struct cfg80211_registered_device *rdev; 2606 struct cfg80211_registered_device *rdev = info->user_ptr[0];
2814 struct net_device *dev; 2607 struct net_device *dev = info->user_ptr[1];
2815 struct mesh_config cfg; 2608 struct mesh_config cfg;
2816 struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1]; 2609 struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1];
2817 struct nlattr *parent_attr; 2610 struct nlattr *parent_attr;
@@ -2823,16 +2616,8 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
2823 parent_attr, nl80211_meshconf_params_policy)) 2616 parent_attr, nl80211_meshconf_params_policy))
2824 return -EINVAL; 2617 return -EINVAL;
2825 2618
2826 rtnl_lock(); 2619 if (!rdev->ops->set_mesh_params)
2827 2620 return -EOPNOTSUPP;
2828 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
2829 if (err)
2830 goto out_rtnl;
2831
2832 if (!rdev->ops->set_mesh_params) {
2833 err = -EOPNOTSUPP;
2834 goto out;
2835 }
2836 2621
2837 /* This makes sure that there aren't more than 32 mesh config 2622 /* This makes sure that there aren't more than 32 mesh config
2838 * parameters (otherwise our bitfield scheme would not work.) */ 2623 * parameters (otherwise our bitfield scheme would not work.) */
@@ -2878,16 +2663,7 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info)
2878 nla_get_u8); 2663 nla_get_u8);
2879 2664
2880 /* Apply changes */ 2665 /* Apply changes */
2881 err = rdev->ops->set_mesh_params(&rdev->wiphy, dev, &cfg, mask); 2666 return rdev->ops->set_mesh_params(&rdev->wiphy, dev, &cfg, mask);
2882
2883 out:
2884 /* cleanup */
2885 cfg80211_unlock_rdev(rdev);
2886 dev_put(dev);
2887 out_rtnl:
2888 rtnl_unlock();
2889
2890 return err;
2891} 2667}
2892 2668
2893#undef FILL_IN_MESH_PARAM_IF_SET 2669#undef FILL_IN_MESH_PARAM_IF_SET
@@ -3070,8 +2846,8 @@ static int validate_scan_freqs(struct nlattr *freqs)
3070 2846
3071static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) 2847static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
3072{ 2848{
3073 struct cfg80211_registered_device *rdev; 2849 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3074 struct net_device *dev; 2850 struct net_device *dev = info->user_ptr[1];
3075 struct cfg80211_scan_request *request; 2851 struct cfg80211_scan_request *request;
3076 struct cfg80211_ssid *ssid; 2852 struct cfg80211_ssid *ssid;
3077 struct ieee80211_channel *channel; 2853 struct ieee80211_channel *channel;
@@ -3084,36 +2860,19 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
3084 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE])) 2860 if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
3085 return -EINVAL; 2861 return -EINVAL;
3086 2862
3087 rtnl_lock();
3088
3089 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
3090 if (err)
3091 goto out_rtnl;
3092
3093 wiphy = &rdev->wiphy; 2863 wiphy = &rdev->wiphy;
3094 2864
3095 if (!rdev->ops->scan) { 2865 if (!rdev->ops->scan)
3096 err = -EOPNOTSUPP; 2866 return -EOPNOTSUPP;
3097 goto out;
3098 }
3099
3100 if (!netif_running(dev)) {
3101 err = -ENETDOWN;
3102 goto out;
3103 }
3104 2867
3105 if (rdev->scan_req) { 2868 if (rdev->scan_req)
3106 err = -EBUSY; 2869 return -EBUSY;
3107 goto out;
3108 }
3109 2870
3110 if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { 2871 if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
3111 n_channels = validate_scan_freqs( 2872 n_channels = validate_scan_freqs(
3112 info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]); 2873 info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]);
3113 if (!n_channels) { 2874 if (!n_channels)
3114 err = -EINVAL; 2875 return -EINVAL;
3115 goto out;
3116 }
3117 } else { 2876 } else {
3118 n_channels = 0; 2877 n_channels = 0;
3119 2878
@@ -3126,29 +2885,23 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
3126 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) 2885 nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp)
3127 n_ssids++; 2886 n_ssids++;
3128 2887
3129 if (n_ssids > wiphy->max_scan_ssids) { 2888 if (n_ssids > wiphy->max_scan_ssids)
3130 err = -EINVAL; 2889 return -EINVAL;
3131 goto out;
3132 }
3133 2890
3134 if (info->attrs[NL80211_ATTR_IE]) 2891 if (info->attrs[NL80211_ATTR_IE])
3135 ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); 2892 ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
3136 else 2893 else
3137 ie_len = 0; 2894 ie_len = 0;
3138 2895
3139 if (ie_len > wiphy->max_scan_ie_len) { 2896 if (ie_len > wiphy->max_scan_ie_len)
3140 err = -EINVAL; 2897 return -EINVAL;
3141 goto out;
3142 }
3143 2898
3144 request = kzalloc(sizeof(*request) 2899 request = kzalloc(sizeof(*request)
3145 + sizeof(*ssid) * n_ssids 2900 + sizeof(*ssid) * n_ssids
3146 + sizeof(channel) * n_channels 2901 + sizeof(channel) * n_channels
3147 + ie_len, GFP_KERNEL); 2902 + ie_len, GFP_KERNEL);
3148 if (!request) { 2903 if (!request)
3149 err = -ENOMEM; 2904 return -ENOMEM;
3150 goto out;
3151 }
3152 2905
3153 if (n_ssids) 2906 if (n_ssids)
3154 request->ssids = (void *)&request->channels[n_channels]; 2907 request->ssids = (void *)&request->channels[n_channels];
@@ -3236,18 +2989,11 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
3236 if (!err) { 2989 if (!err) {
3237 nl80211_send_scan_start(rdev, dev); 2990 nl80211_send_scan_start(rdev, dev);
3238 dev_hold(dev); 2991 dev_hold(dev);
3239 } 2992 } else {
3240
3241 out_free: 2993 out_free:
3242 if (err) {
3243 rdev->scan_req = NULL; 2994 rdev->scan_req = NULL;
3244 kfree(request); 2995 kfree(request);
3245 } 2996 }
3246 out:
3247 cfg80211_unlock_rdev(rdev);
3248 dev_put(dev);
3249 out_rtnl:
3250 rtnl_unlock();
3251 2997
3252 return err; 2998 return err;
3253} 2999}
@@ -3306,6 +3052,7 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
3306 } 3052 }
3307 3053
3308 switch (wdev->iftype) { 3054 switch (wdev->iftype) {
3055 case NL80211_IFTYPE_P2P_CLIENT:
3309 case NL80211_IFTYPE_STATION: 3056 case NL80211_IFTYPE_STATION:
3310 if (intbss == wdev->current_bss) 3057 if (intbss == wdev->current_bss)
3311 NLA_PUT_U32(msg, NL80211_BSS_STATUS, 3058 NLA_PUT_U32(msg, NL80211_BSS_STATUS,
@@ -3343,25 +3090,12 @@ static int nl80211_dump_scan(struct sk_buff *skb,
3343 struct net_device *dev; 3090 struct net_device *dev;
3344 struct cfg80211_internal_bss *scan; 3091 struct cfg80211_internal_bss *scan;
3345 struct wireless_dev *wdev; 3092 struct wireless_dev *wdev;
3346 int ifidx = cb->args[0];
3347 int start = cb->args[1], idx = 0; 3093 int start = cb->args[1], idx = 0;
3348 int err; 3094 int err;
3349 3095
3350 if (!ifidx) 3096 err = nl80211_prepare_netdev_dump(skb, cb, &rdev, &dev);
3351 ifidx = nl80211_get_ifidx(cb); 3097 if (err)
3352 if (ifidx < 0) 3098 return err;
3353 return ifidx;
3354 cb->args[0] = ifidx;
3355
3356 dev = dev_get_by_index(sock_net(skb->sk), ifidx);
3357 if (!dev)
3358 return -ENODEV;
3359
3360 rdev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
3361 if (IS_ERR(rdev)) {
3362 err = PTR_ERR(rdev);
3363 goto out_put_netdev;
3364 }
3365 3099
3366 wdev = dev->ieee80211_ptr; 3100 wdev = dev->ieee80211_ptr;
3367 3101
@@ -3377,21 +3111,17 @@ static int nl80211_dump_scan(struct sk_buff *skb,
3377 cb->nlh->nlmsg_seq, NLM_F_MULTI, 3111 cb->nlh->nlmsg_seq, NLM_F_MULTI,
3378 rdev, wdev, scan) < 0) { 3112 rdev, wdev, scan) < 0) {
3379 idx--; 3113 idx--;
3380 goto out; 3114 break;
3381 } 3115 }
3382 } 3116 }
3383 3117
3384 out:
3385 spin_unlock_bh(&rdev->bss_lock); 3118 spin_unlock_bh(&rdev->bss_lock);
3386 wdev_unlock(wdev); 3119 wdev_unlock(wdev);
3387 3120
3388 cb->args[1] = idx; 3121 cb->args[1] = idx;
3389 err = skb->len; 3122 nl80211_finish_netdev_dump(rdev);
3390 cfg80211_unlock_rdev(rdev);
3391 out_put_netdev:
3392 dev_put(dev);
3393 3123
3394 return err; 3124 return skb->len;
3395} 3125}
3396 3126
3397static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq, 3127static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq,
@@ -3421,6 +3151,23 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq,
3421 if (survey->filled & SURVEY_INFO_NOISE_DBM) 3151 if (survey->filled & SURVEY_INFO_NOISE_DBM)
3422 NLA_PUT_U8(msg, NL80211_SURVEY_INFO_NOISE, 3152 NLA_PUT_U8(msg, NL80211_SURVEY_INFO_NOISE,
3423 survey->noise); 3153 survey->noise);
3154 if (survey->filled & SURVEY_INFO_IN_USE)
3155 NLA_PUT_FLAG(msg, NL80211_SURVEY_INFO_IN_USE);
3156 if (survey->filled & SURVEY_INFO_CHANNEL_TIME)
3157 NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME,
3158 survey->channel_time);
3159 if (survey->filled & SURVEY_INFO_CHANNEL_TIME_BUSY)
3160 NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY,
3161 survey->channel_time_busy);
3162 if (survey->filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY)
3163 NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY,
3164 survey->channel_time_ext_busy);
3165 if (survey->filled & SURVEY_INFO_CHANNEL_TIME_RX)
3166 NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_RX,
3167 survey->channel_time_rx);
3168 if (survey->filled & SURVEY_INFO_CHANNEL_TIME_TX)
3169 NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_TX,
3170 survey->channel_time_tx);
3424 3171
3425 nla_nest_end(msg, infoattr); 3172 nla_nest_end(msg, infoattr);
3426 3173
@@ -3437,29 +3184,12 @@ static int nl80211_dump_survey(struct sk_buff *skb,
3437 struct survey_info survey; 3184 struct survey_info survey;
3438 struct cfg80211_registered_device *dev; 3185 struct cfg80211_registered_device *dev;
3439 struct net_device *netdev; 3186 struct net_device *netdev;
3440 int ifidx = cb->args[0];
3441 int survey_idx = cb->args[1]; 3187 int survey_idx = cb->args[1];
3442 int res; 3188 int res;
3443 3189
3444 if (!ifidx) 3190 res = nl80211_prepare_netdev_dump(skb, cb, &dev, &netdev);
3445 ifidx = nl80211_get_ifidx(cb); 3191 if (res)
3446 if (ifidx < 0) 3192 return res;
3447 return ifidx;
3448 cb->args[0] = ifidx;
3449
3450 rtnl_lock();
3451
3452 netdev = __dev_get_by_index(sock_net(skb->sk), ifidx);
3453 if (!netdev) {
3454 res = -ENODEV;
3455 goto out_rtnl;
3456 }
3457
3458 dev = cfg80211_get_dev_from_ifindex(sock_net(skb->sk), ifidx);
3459 if (IS_ERR(dev)) {
3460 res = PTR_ERR(dev);
3461 goto out_rtnl;
3462 }
3463 3193
3464 if (!dev->ops->dump_survey) { 3194 if (!dev->ops->dump_survey) {
3465 res = -EOPNOTSUPP; 3195 res = -EOPNOTSUPP;
@@ -3487,10 +3217,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
3487 cb->args[1] = survey_idx; 3217 cb->args[1] = survey_idx;
3488 res = skb->len; 3218 res = skb->len;
3489 out_err: 3219 out_err:
3490 cfg80211_unlock_rdev(dev); 3220 nl80211_finish_netdev_dump(dev);
3491 out_rtnl:
3492 rtnl_unlock();
3493
3494 return res; 3221 return res;
3495} 3222}
3496 3223
@@ -3523,8 +3250,8 @@ static bool nl80211_valid_cipher_suite(u32 cipher)
3523 3250
3524static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) 3251static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
3525{ 3252{
3526 struct cfg80211_registered_device *rdev; 3253 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3527 struct net_device *dev; 3254 struct net_device *dev = info->user_ptr[1];
3528 struct ieee80211_channel *chan; 3255 struct ieee80211_channel *chan;
3529 const u8 *bssid, *ssid, *ie = NULL; 3256 const u8 *bssid, *ssid, *ie = NULL;
3530 int err, ssid_len, ie_len = 0; 3257 int err, ssid_len, ie_len = 0;
@@ -3552,6 +3279,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
3552 return err; 3279 return err;
3553 3280
3554 if (key.idx >= 0) { 3281 if (key.idx >= 0) {
3282 if (key.type != -1 && key.type != NL80211_KEYTYPE_GROUP)
3283 return -EINVAL;
3555 if (!key.p.key || !key.p.key_len) 3284 if (!key.p.key || !key.p.key_len)
3556 return -EINVAL; 3285 return -EINVAL;
3557 if ((key.p.cipher != WLAN_CIPHER_SUITE_WEP40 || 3286 if ((key.p.cipher != WLAN_CIPHER_SUITE_WEP40 ||
@@ -3566,34 +3295,31 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
3566 key.p.key = NULL; 3295 key.p.key = NULL;
3567 } 3296 }
3568 3297
3569 rtnl_lock(); 3298 if (key.idx >= 0) {
3570 3299 int i;
3571 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 3300 bool ok = false;
3572 if (err) 3301 for (i = 0; i < rdev->wiphy.n_cipher_suites; i++) {
3573 goto unlock_rtnl; 3302 if (key.p.cipher == rdev->wiphy.cipher_suites[i]) {
3574 3303 ok = true;
3575 if (!rdev->ops->auth) { 3304 break;
3576 err = -EOPNOTSUPP; 3305 }
3577 goto out; 3306 }
3307 if (!ok)
3308 return -EINVAL;
3578 } 3309 }
3579 3310
3580 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 3311 if (!rdev->ops->auth)
3581 err = -EOPNOTSUPP; 3312 return -EOPNOTSUPP;
3582 goto out;
3583 }
3584 3313
3585 if (!netif_running(dev)) { 3314 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3586 err = -ENETDOWN; 3315 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
3587 goto out; 3316 return -EOPNOTSUPP;
3588 }
3589 3317
3590 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); 3318 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
3591 chan = ieee80211_get_channel(&rdev->wiphy, 3319 chan = ieee80211_get_channel(&rdev->wiphy,
3592 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); 3320 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
3593 if (!chan || (chan->flags & IEEE80211_CHAN_DISABLED)) { 3321 if (!chan || (chan->flags & IEEE80211_CHAN_DISABLED))
3594 err = -EINVAL; 3322 return -EINVAL;
3595 goto out;
3596 }
3597 3323
3598 ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); 3324 ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
3599 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); 3325 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
@@ -3604,27 +3330,19 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
3604 } 3330 }
3605 3331
3606 auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]); 3332 auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]);
3607 if (!nl80211_valid_auth_type(auth_type)) { 3333 if (!nl80211_valid_auth_type(auth_type))
3608 err = -EINVAL; 3334 return -EINVAL;
3609 goto out;
3610 }
3611 3335
3612 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; 3336 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
3613 3337
3614 err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid, 3338 return cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
3615 ssid, ssid_len, ie, ie_len, 3339 ssid, ssid_len, ie, ie_len,
3616 key.p.key, key.p.key_len, key.idx, 3340 key.p.key, key.p.key_len, key.idx,
3617 local_state_change); 3341 local_state_change);
3618
3619out:
3620 cfg80211_unlock_rdev(rdev);
3621 dev_put(dev);
3622unlock_rtnl:
3623 rtnl_unlock();
3624 return err;
3625} 3342}
3626 3343
3627static int nl80211_crypto_settings(struct genl_info *info, 3344static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
3345 struct genl_info *info,
3628 struct cfg80211_crypto_settings *settings, 3346 struct cfg80211_crypto_settings *settings,
3629 int cipher_limit) 3347 int cipher_limit)
3630{ 3348{
@@ -3632,6 +3350,19 @@ static int nl80211_crypto_settings(struct genl_info *info,
3632 3350
3633 settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT]; 3351 settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT];
3634 3352
3353 if (info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) {
3354 u16 proto;
3355 proto = nla_get_u16(
3356 info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]);
3357 settings->control_port_ethertype = cpu_to_be16(proto);
3358 if (!(rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) &&
3359 proto != ETH_P_PAE)
3360 return -EINVAL;
3361 if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT])
3362 settings->control_port_no_encrypt = true;
3363 } else
3364 settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE);
3365
3635 if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) { 3366 if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) {
3636 void *data; 3367 void *data;
3637 int len, i; 3368 int len, i;
@@ -3691,8 +3422,8 @@ static int nl80211_crypto_settings(struct genl_info *info,
3691 3422
3692static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) 3423static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
3693{ 3424{
3694 struct cfg80211_registered_device *rdev; 3425 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3695 struct net_device *dev; 3426 struct net_device *dev = info->user_ptr[1];
3696 struct cfg80211_crypto_settings crypto; 3427 struct cfg80211_crypto_settings crypto;
3697 struct ieee80211_channel *chan; 3428 struct ieee80211_channel *chan;
3698 const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL; 3429 const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL;
@@ -3707,35 +3438,19 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
3707 !info->attrs[NL80211_ATTR_WIPHY_FREQ]) 3438 !info->attrs[NL80211_ATTR_WIPHY_FREQ])
3708 return -EINVAL; 3439 return -EINVAL;
3709 3440
3710 rtnl_lock(); 3441 if (!rdev->ops->assoc)
3711 3442 return -EOPNOTSUPP;
3712 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
3713 if (err)
3714 goto unlock_rtnl;
3715
3716 if (!rdev->ops->assoc) {
3717 err = -EOPNOTSUPP;
3718 goto out;
3719 }
3720
3721 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
3722 err = -EOPNOTSUPP;
3723 goto out;
3724 }
3725 3443
3726 if (!netif_running(dev)) { 3444 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3727 err = -ENETDOWN; 3445 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
3728 goto out; 3446 return -EOPNOTSUPP;
3729 }
3730 3447
3731 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); 3448 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
3732 3449
3733 chan = ieee80211_get_channel(&rdev->wiphy, 3450 chan = ieee80211_get_channel(&rdev->wiphy,
3734 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); 3451 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
3735 if (!chan || (chan->flags & IEEE80211_CHAN_DISABLED)) { 3452 if (!chan || (chan->flags & IEEE80211_CHAN_DISABLED))
3736 err = -EINVAL; 3453 return -EINVAL;
3737 goto out;
3738 }
3739 3454
3740 ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); 3455 ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
3741 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); 3456 ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
@@ -3750,35 +3465,28 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
3750 nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); 3465 nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]);
3751 if (mfp == NL80211_MFP_REQUIRED) 3466 if (mfp == NL80211_MFP_REQUIRED)
3752 use_mfp = true; 3467 use_mfp = true;
3753 else if (mfp != NL80211_MFP_NO) { 3468 else if (mfp != NL80211_MFP_NO)
3754 err = -EINVAL; 3469 return -EINVAL;
3755 goto out;
3756 }
3757 } 3470 }
3758 3471
3759 if (info->attrs[NL80211_ATTR_PREV_BSSID]) 3472 if (info->attrs[NL80211_ATTR_PREV_BSSID])
3760 prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); 3473 prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
3761 3474
3762 err = nl80211_crypto_settings(info, &crypto, 1); 3475 err = nl80211_crypto_settings(rdev, info, &crypto, 1);
3763 if (!err) 3476 if (!err)
3764 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, 3477 err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
3765 ssid, ssid_len, ie, ie_len, use_mfp, 3478 ssid, ssid_len, ie, ie_len, use_mfp,
3766 &crypto); 3479 &crypto);
3767 3480
3768out:
3769 cfg80211_unlock_rdev(rdev);
3770 dev_put(dev);
3771unlock_rtnl:
3772 rtnl_unlock();
3773 return err; 3481 return err;
3774} 3482}
3775 3483
3776static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) 3484static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
3777{ 3485{
3778 struct cfg80211_registered_device *rdev; 3486 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3779 struct net_device *dev; 3487 struct net_device *dev = info->user_ptr[1];
3780 const u8 *ie = NULL, *bssid; 3488 const u8 *ie = NULL, *bssid;
3781 int err, ie_len = 0; 3489 int ie_len = 0;
3782 u16 reason_code; 3490 u16 reason_code;
3783 bool local_state_change; 3491 bool local_state_change;
3784 3492
@@ -3791,34 +3499,19 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
3791 if (!info->attrs[NL80211_ATTR_REASON_CODE]) 3499 if (!info->attrs[NL80211_ATTR_REASON_CODE])
3792 return -EINVAL; 3500 return -EINVAL;
3793 3501
3794 rtnl_lock(); 3502 if (!rdev->ops->deauth)
3795 3503 return -EOPNOTSUPP;
3796 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
3797 if (err)
3798 goto unlock_rtnl;
3799
3800 if (!rdev->ops->deauth) {
3801 err = -EOPNOTSUPP;
3802 goto out;
3803 }
3804
3805 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
3806 err = -EOPNOTSUPP;
3807 goto out;
3808 }
3809 3504
3810 if (!netif_running(dev)) { 3505 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3811 err = -ENETDOWN; 3506 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
3812 goto out; 3507 return -EOPNOTSUPP;
3813 }
3814 3508
3815 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); 3509 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
3816 3510
3817 reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); 3511 reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
3818 if (reason_code == 0) { 3512 if (reason_code == 0) {
3819 /* Reason Code 0 is reserved */ 3513 /* Reason Code 0 is reserved */
3820 err = -EINVAL; 3514 return -EINVAL;
3821 goto out;
3822 } 3515 }
3823 3516
3824 if (info->attrs[NL80211_ATTR_IE]) { 3517 if (info->attrs[NL80211_ATTR_IE]) {
@@ -3828,23 +3521,16 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
3828 3521
3829 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; 3522 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
3830 3523
3831 err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code, 3524 return cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code,
3832 local_state_change); 3525 local_state_change);
3833
3834out:
3835 cfg80211_unlock_rdev(rdev);
3836 dev_put(dev);
3837unlock_rtnl:
3838 rtnl_unlock();
3839 return err;
3840} 3526}
3841 3527
3842static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) 3528static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
3843{ 3529{
3844 struct cfg80211_registered_device *rdev; 3530 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3845 struct net_device *dev; 3531 struct net_device *dev = info->user_ptr[1];
3846 const u8 *ie = NULL, *bssid; 3532 const u8 *ie = NULL, *bssid;
3847 int err, ie_len = 0; 3533 int ie_len = 0;
3848 u16 reason_code; 3534 u16 reason_code;
3849 bool local_state_change; 3535 bool local_state_change;
3850 3536
@@ -3857,34 +3543,19 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
3857 if (!info->attrs[NL80211_ATTR_REASON_CODE]) 3543 if (!info->attrs[NL80211_ATTR_REASON_CODE])
3858 return -EINVAL; 3544 return -EINVAL;
3859 3545
3860 rtnl_lock(); 3546 if (!rdev->ops->disassoc)
3861 3547 return -EOPNOTSUPP;
3862 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
3863 if (err)
3864 goto unlock_rtnl;
3865
3866 if (!rdev->ops->disassoc) {
3867 err = -EOPNOTSUPP;
3868 goto out;
3869 }
3870
3871 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
3872 err = -EOPNOTSUPP;
3873 goto out;
3874 }
3875 3548
3876 if (!netif_running(dev)) { 3549 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
3877 err = -ENETDOWN; 3550 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
3878 goto out; 3551 return -EOPNOTSUPP;
3879 }
3880 3552
3881 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); 3553 bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
3882 3554
3883 reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); 3555 reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]);
3884 if (reason_code == 0) { 3556 if (reason_code == 0) {
3885 /* Reason Code 0 is reserved */ 3557 /* Reason Code 0 is reserved */
3886 err = -EINVAL; 3558 return -EINVAL;
3887 goto out;
3888 } 3559 }
3889 3560
3890 if (info->attrs[NL80211_ATTR_IE]) { 3561 if (info->attrs[NL80211_ATTR_IE]) {
@@ -3894,21 +3565,14 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
3894 3565
3895 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE]; 3566 local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
3896 3567
3897 err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code, 3568 return cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code,
3898 local_state_change); 3569 local_state_change);
3899
3900out:
3901 cfg80211_unlock_rdev(rdev);
3902 dev_put(dev);
3903unlock_rtnl:
3904 rtnl_unlock();
3905 return err;
3906} 3570}
3907 3571
3908static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) 3572static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
3909{ 3573{
3910 struct cfg80211_registered_device *rdev; 3574 struct cfg80211_registered_device *rdev = info->user_ptr[0];
3911 struct net_device *dev; 3575 struct net_device *dev = info->user_ptr[1];
3912 struct cfg80211_ibss_params ibss; 3576 struct cfg80211_ibss_params ibss;
3913 struct wiphy *wiphy; 3577 struct wiphy *wiphy;
3914 struct cfg80211_cached_keys *connkeys = NULL; 3578 struct cfg80211_cached_keys *connkeys = NULL;
@@ -3933,26 +3597,11 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
3933 return -EINVAL; 3597 return -EINVAL;
3934 } 3598 }
3935 3599
3936 rtnl_lock(); 3600 if (!rdev->ops->join_ibss)
3937 3601 return -EOPNOTSUPP;
3938 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
3939 if (err)
3940 goto unlock_rtnl;
3941
3942 if (!rdev->ops->join_ibss) {
3943 err = -EOPNOTSUPP;
3944 goto out;
3945 }
3946
3947 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) {
3948 err = -EOPNOTSUPP;
3949 goto out;
3950 }
3951 3602
3952 if (!netif_running(dev)) { 3603 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC)
3953 err = -ENETDOWN; 3604 return -EOPNOTSUPP;
3954 goto out;
3955 }
3956 3605
3957 wiphy = &rdev->wiphy; 3606 wiphy = &rdev->wiphy;
3958 3607
@@ -3970,24 +3619,12 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
3970 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); 3619 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
3971 if (!ibss.channel || 3620 if (!ibss.channel ||
3972 ibss.channel->flags & IEEE80211_CHAN_NO_IBSS || 3621 ibss.channel->flags & IEEE80211_CHAN_NO_IBSS ||
3973 ibss.channel->flags & IEEE80211_CHAN_DISABLED) { 3622 ibss.channel->flags & IEEE80211_CHAN_DISABLED)
3974 err = -EINVAL; 3623 return -EINVAL;
3975 goto out;
3976 }
3977 3624
3978 ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; 3625 ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED];
3979 ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; 3626 ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY];
3980 3627
3981 if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) {
3982 connkeys = nl80211_parse_connkeys(rdev,
3983 info->attrs[NL80211_ATTR_KEYS]);
3984 if (IS_ERR(connkeys)) {
3985 err = PTR_ERR(connkeys);
3986 connkeys = NULL;
3987 goto out;
3988 }
3989 }
3990
3991 if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) { 3628 if (info->attrs[NL80211_ATTR_BSS_BASIC_RATES]) {
3992 u8 *rates = 3629 u8 *rates =
3993 nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); 3630 nla_data(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
@@ -3997,10 +3634,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
3997 wiphy->bands[ibss.channel->band]; 3634 wiphy->bands[ibss.channel->band];
3998 int i, j; 3635 int i, j;
3999 3636
4000 if (n_rates == 0) { 3637 if (n_rates == 0)
4001 err = -EINVAL; 3638 return -EINVAL;
4002 goto out;
4003 }
4004 3639
4005 for (i = 0; i < n_rates; i++) { 3640 for (i = 0; i < n_rates; i++) {
4006 int rate = (rates[i] & 0x7f) * 5; 3641 int rate = (rates[i] & 0x7f) * 5;
@@ -4013,77 +3648,36 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
4013 break; 3648 break;
4014 } 3649 }
4015 } 3650 }
4016 if (!found) { 3651 if (!found)
4017 err = -EINVAL; 3652 return -EINVAL;
4018 goto out;
4019 }
4020 }
4021 } else {
4022 /*
4023 * If no rates were explicitly configured,
4024 * use the mandatory rate set for 11b or
4025 * 11a for maximum compatibility.
4026 */
4027 struct ieee80211_supported_band *sband =
4028 wiphy->bands[ibss.channel->band];
4029 int j;
4030 u32 flag = ibss.channel->band == IEEE80211_BAND_5GHZ ?
4031 IEEE80211_RATE_MANDATORY_A :
4032 IEEE80211_RATE_MANDATORY_B;
4033
4034 for (j = 0; j < sband->n_bitrates; j++) {
4035 if (sband->bitrates[j].flags & flag)
4036 ibss.basic_rates |= BIT(j);
4037 } 3653 }
4038 } 3654 }
4039 3655
4040 err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys); 3656 if (ibss.privacy && info->attrs[NL80211_ATTR_KEYS]) {
3657 connkeys = nl80211_parse_connkeys(rdev,
3658 info->attrs[NL80211_ATTR_KEYS]);
3659 if (IS_ERR(connkeys))
3660 return PTR_ERR(connkeys);
3661 }
4041 3662
4042out: 3663 err = cfg80211_join_ibss(rdev, dev, &ibss, connkeys);
4043 cfg80211_unlock_rdev(rdev);
4044 dev_put(dev);
4045unlock_rtnl:
4046 if (err) 3664 if (err)
4047 kfree(connkeys); 3665 kfree(connkeys);
4048 rtnl_unlock();
4049 return err; 3666 return err;
4050} 3667}
4051 3668
4052static int nl80211_leave_ibss(struct sk_buff *skb, struct genl_info *info) 3669static int nl80211_leave_ibss(struct sk_buff *skb, struct genl_info *info)
4053{ 3670{
4054 struct cfg80211_registered_device *rdev; 3671 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4055 struct net_device *dev; 3672 struct net_device *dev = info->user_ptr[1];
4056 int err;
4057
4058 rtnl_lock();
4059
4060 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4061 if (err)
4062 goto unlock_rtnl;
4063
4064 if (!rdev->ops->leave_ibss) {
4065 err = -EOPNOTSUPP;
4066 goto out;
4067 }
4068 3673
4069 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { 3674 if (!rdev->ops->leave_ibss)
4070 err = -EOPNOTSUPP; 3675 return -EOPNOTSUPP;
4071 goto out;
4072 }
4073
4074 if (!netif_running(dev)) {
4075 err = -ENETDOWN;
4076 goto out;
4077 }
4078 3676
4079 err = cfg80211_leave_ibss(rdev, dev, false); 3677 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC)
3678 return -EOPNOTSUPP;
4080 3679
4081out: 3680 return cfg80211_leave_ibss(rdev, dev, false);
4082 cfg80211_unlock_rdev(rdev);
4083 dev_put(dev);
4084unlock_rtnl:
4085 rtnl_unlock();
4086 return err;
4087} 3681}
4088 3682
4089#ifdef CONFIG_NL80211_TESTMODE 3683#ifdef CONFIG_NL80211_TESTMODE
@@ -4093,20 +3687,12 @@ static struct genl_multicast_group nl80211_testmode_mcgrp = {
4093 3687
4094static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info) 3688static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info)
4095{ 3689{
4096 struct cfg80211_registered_device *rdev; 3690 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4097 int err; 3691 int err;
4098 3692
4099 if (!info->attrs[NL80211_ATTR_TESTDATA]) 3693 if (!info->attrs[NL80211_ATTR_TESTDATA])
4100 return -EINVAL; 3694 return -EINVAL;
4101 3695
4102 rtnl_lock();
4103
4104 rdev = cfg80211_get_dev_from_info(info);
4105 if (IS_ERR(rdev)) {
4106 err = PTR_ERR(rdev);
4107 goto unlock_rtnl;
4108 }
4109
4110 err = -EOPNOTSUPP; 3696 err = -EOPNOTSUPP;
4111 if (rdev->ops->testmode_cmd) { 3697 if (rdev->ops->testmode_cmd) {
4112 rdev->testmode_info = info; 3698 rdev->testmode_info = info;
@@ -4116,10 +3702,6 @@ static int nl80211_testmode_do(struct sk_buff *skb, struct genl_info *info)
4116 rdev->testmode_info = NULL; 3702 rdev->testmode_info = NULL;
4117 } 3703 }
4118 3704
4119 cfg80211_unlock_rdev(rdev);
4120
4121 unlock_rtnl:
4122 rtnl_unlock();
4123 return err; 3705 return err;
4124} 3706}
4125 3707
@@ -4210,8 +3792,8 @@ EXPORT_SYMBOL(cfg80211_testmode_event);
4210 3792
4211static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) 3793static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
4212{ 3794{
4213 struct cfg80211_registered_device *rdev; 3795 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4214 struct net_device *dev; 3796 struct net_device *dev = info->user_ptr[1];
4215 struct cfg80211_connect_params connect; 3797 struct cfg80211_connect_params connect;
4216 struct wiphy *wiphy; 3798 struct wiphy *wiphy;
4217 struct cfg80211_cached_keys *connkeys = NULL; 3799 struct cfg80211_cached_keys *connkeys = NULL;
@@ -4236,25 +3818,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
4236 3818
4237 connect.privacy = info->attrs[NL80211_ATTR_PRIVACY]; 3819 connect.privacy = info->attrs[NL80211_ATTR_PRIVACY];
4238 3820
4239 err = nl80211_crypto_settings(info, &connect.crypto, 3821 err = nl80211_crypto_settings(rdev, info, &connect.crypto,
4240 NL80211_MAX_NR_CIPHER_SUITES); 3822 NL80211_MAX_NR_CIPHER_SUITES);
4241 if (err) 3823 if (err)
4242 return err; 3824 return err;
4243 rtnl_lock();
4244 3825
4245 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 3826 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4246 if (err) 3827 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
4247 goto unlock_rtnl; 3828 return -EOPNOTSUPP;
4248
4249 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
4250 err = -EOPNOTSUPP;
4251 goto out;
4252 }
4253
4254 if (!netif_running(dev)) {
4255 err = -ENETDOWN;
4256 goto out;
4257 }
4258 3829
4259 wiphy = &rdev->wiphy; 3830 wiphy = &rdev->wiphy;
4260 3831
@@ -4273,39 +3844,27 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
4273 ieee80211_get_channel(wiphy, 3844 ieee80211_get_channel(wiphy,
4274 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); 3845 nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
4275 if (!connect.channel || 3846 if (!connect.channel ||
4276 connect.channel->flags & IEEE80211_CHAN_DISABLED) { 3847 connect.channel->flags & IEEE80211_CHAN_DISABLED)
4277 err = -EINVAL; 3848 return -EINVAL;
4278 goto out;
4279 }
4280 } 3849 }
4281 3850
4282 if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) { 3851 if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) {
4283 connkeys = nl80211_parse_connkeys(rdev, 3852 connkeys = nl80211_parse_connkeys(rdev,
4284 info->attrs[NL80211_ATTR_KEYS]); 3853 info->attrs[NL80211_ATTR_KEYS]);
4285 if (IS_ERR(connkeys)) { 3854 if (IS_ERR(connkeys))
4286 err = PTR_ERR(connkeys); 3855 return PTR_ERR(connkeys);
4287 connkeys = NULL;
4288 goto out;
4289 }
4290 } 3856 }
4291 3857
4292 err = cfg80211_connect(rdev, dev, &connect, connkeys); 3858 err = cfg80211_connect(rdev, dev, &connect, connkeys);
4293
4294out:
4295 cfg80211_unlock_rdev(rdev);
4296 dev_put(dev);
4297unlock_rtnl:
4298 if (err) 3859 if (err)
4299 kfree(connkeys); 3860 kfree(connkeys);
4300 rtnl_unlock();
4301 return err; 3861 return err;
4302} 3862}
4303 3863
4304static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info) 3864static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
4305{ 3865{
4306 struct cfg80211_registered_device *rdev; 3866 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4307 struct net_device *dev; 3867 struct net_device *dev = info->user_ptr[1];
4308 int err;
4309 u16 reason; 3868 u16 reason;
4310 3869
4311 if (!info->attrs[NL80211_ATTR_REASON_CODE]) 3870 if (!info->attrs[NL80211_ATTR_REASON_CODE])
@@ -4316,35 +3875,16 @@ static int nl80211_disconnect(struct sk_buff *skb, struct genl_info *info)
4316 if (reason == 0) 3875 if (reason == 0)
4317 return -EINVAL; 3876 return -EINVAL;
4318 3877
4319 rtnl_lock(); 3878 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4320 3879 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
4321 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev); 3880 return -EOPNOTSUPP;
4322 if (err)
4323 goto unlock_rtnl;
4324
4325 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
4326 err = -EOPNOTSUPP;
4327 goto out;
4328 }
4329
4330 if (!netif_running(dev)) {
4331 err = -ENETDOWN;
4332 goto out;
4333 }
4334
4335 err = cfg80211_disconnect(rdev, dev, reason, true);
4336 3881
4337out: 3882 return cfg80211_disconnect(rdev, dev, reason, true);
4338 cfg80211_unlock_rdev(rdev);
4339 dev_put(dev);
4340unlock_rtnl:
4341 rtnl_unlock();
4342 return err;
4343} 3883}
4344 3884
4345static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info) 3885static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
4346{ 3886{
4347 struct cfg80211_registered_device *rdev; 3887 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4348 struct net *net; 3888 struct net *net;
4349 int err; 3889 int err;
4350 u32 pid; 3890 u32 pid;
@@ -4354,43 +3894,26 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
4354 3894
4355 pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]); 3895 pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]);
4356 3896
4357 rtnl_lock();
4358
4359 rdev = cfg80211_get_dev_from_info(info);
4360 if (IS_ERR(rdev)) {
4361 err = PTR_ERR(rdev);
4362 goto out_rtnl;
4363 }
4364
4365 net = get_net_ns_by_pid(pid); 3897 net = get_net_ns_by_pid(pid);
4366 if (IS_ERR(net)) { 3898 if (IS_ERR(net))
4367 err = PTR_ERR(net); 3899 return PTR_ERR(net);
4368 goto out;
4369 }
4370 3900
4371 err = 0; 3901 err = 0;
4372 3902
4373 /* check if anything to do */ 3903 /* check if anything to do */
4374 if (net_eq(wiphy_net(&rdev->wiphy), net)) 3904 if (!net_eq(wiphy_net(&rdev->wiphy), net))
4375 goto out_put_net; 3905 err = cfg80211_switch_netns(rdev, net);
4376 3906
4377 err = cfg80211_switch_netns(rdev, net);
4378 out_put_net:
4379 put_net(net); 3907 put_net(net);
4380 out:
4381 cfg80211_unlock_rdev(rdev);
4382 out_rtnl:
4383 rtnl_unlock();
4384 return err; 3908 return err;
4385} 3909}
4386 3910
4387static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info) 3911static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
4388{ 3912{
4389 struct cfg80211_registered_device *rdev; 3913 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4390 int (*rdev_ops)(struct wiphy *wiphy, struct net_device *dev, 3914 int (*rdev_ops)(struct wiphy *wiphy, struct net_device *dev,
4391 struct cfg80211_pmksa *pmksa) = NULL; 3915 struct cfg80211_pmksa *pmksa) = NULL;
4392 int err; 3916 struct net_device *dev = info->user_ptr[1];
4393 struct net_device *dev;
4394 struct cfg80211_pmksa pmksa; 3917 struct cfg80211_pmksa pmksa;
4395 3918
4396 memset(&pmksa, 0, sizeof(struct cfg80211_pmksa)); 3919 memset(&pmksa, 0, sizeof(struct cfg80211_pmksa));
@@ -4401,19 +3924,12 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
4401 if (!info->attrs[NL80211_ATTR_PMKID]) 3924 if (!info->attrs[NL80211_ATTR_PMKID])
4402 return -EINVAL; 3925 return -EINVAL;
4403 3926
4404 rtnl_lock();
4405
4406 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4407 if (err)
4408 goto out_rtnl;
4409
4410 pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]); 3927 pmksa.pmkid = nla_data(info->attrs[NL80211_ATTR_PMKID]);
4411 pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]); 3928 pmksa.bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
4412 3929
4413 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { 3930 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4414 err = -EOPNOTSUPP; 3931 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
4415 goto out; 3932 return -EOPNOTSUPP;
4416 }
4417 3933
4418 switch (info->genlhdr->cmd) { 3934 switch (info->genlhdr->cmd) {
4419 case NL80211_CMD_SET_PMKSA: 3935 case NL80211_CMD_SET_PMKSA:
@@ -4427,61 +3943,32 @@ static int nl80211_setdel_pmksa(struct sk_buff *skb, struct genl_info *info)
4427 break; 3943 break;
4428 } 3944 }
4429 3945
4430 if (!rdev_ops) { 3946 if (!rdev_ops)
4431 err = -EOPNOTSUPP; 3947 return -EOPNOTSUPP;
4432 goto out;
4433 }
4434
4435 err = rdev_ops(&rdev->wiphy, dev, &pmksa);
4436
4437 out:
4438 cfg80211_unlock_rdev(rdev);
4439 dev_put(dev);
4440 out_rtnl:
4441 rtnl_unlock();
4442 3948
4443 return err; 3949 return rdev_ops(&rdev->wiphy, dev, &pmksa);
4444} 3950}
4445 3951
4446static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info) 3952static int nl80211_flush_pmksa(struct sk_buff *skb, struct genl_info *info)
4447{ 3953{
4448 struct cfg80211_registered_device *rdev; 3954 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4449 int err; 3955 struct net_device *dev = info->user_ptr[1];
4450 struct net_device *dev;
4451
4452 rtnl_lock();
4453
4454 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4455 if (err)
4456 goto out_rtnl;
4457
4458 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) {
4459 err = -EOPNOTSUPP;
4460 goto out;
4461 }
4462
4463 if (!rdev->ops->flush_pmksa) {
4464 err = -EOPNOTSUPP;
4465 goto out;
4466 }
4467
4468 err = rdev->ops->flush_pmksa(&rdev->wiphy, dev);
4469 3956
4470 out: 3957 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4471 cfg80211_unlock_rdev(rdev); 3958 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT)
4472 dev_put(dev); 3959 return -EOPNOTSUPP;
4473 out_rtnl:
4474 rtnl_unlock();
4475 3960
4476 return err; 3961 if (!rdev->ops->flush_pmksa)
3962 return -EOPNOTSUPP;
4477 3963
3964 return rdev->ops->flush_pmksa(&rdev->wiphy, dev);
4478} 3965}
4479 3966
4480static int nl80211_remain_on_channel(struct sk_buff *skb, 3967static int nl80211_remain_on_channel(struct sk_buff *skb,
4481 struct genl_info *info) 3968 struct genl_info *info)
4482{ 3969{
4483 struct cfg80211_registered_device *rdev; 3970 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4484 struct net_device *dev; 3971 struct net_device *dev = info->user_ptr[1];
4485 struct ieee80211_channel *chan; 3972 struct ieee80211_channel *chan;
4486 struct sk_buff *msg; 3973 struct sk_buff *msg;
4487 void *hdr; 3974 void *hdr;
@@ -4503,21 +3990,8 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
4503 if (!duration || !msecs_to_jiffies(duration) || duration > 5000) 3990 if (!duration || !msecs_to_jiffies(duration) || duration > 5000)
4504 return -EINVAL; 3991 return -EINVAL;
4505 3992
4506 rtnl_lock(); 3993 if (!rdev->ops->remain_on_channel)
4507 3994 return -EOPNOTSUPP;
4508 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4509 if (err)
4510 goto unlock_rtnl;
4511
4512 if (!rdev->ops->remain_on_channel) {
4513 err = -EOPNOTSUPP;
4514 goto out;
4515 }
4516
4517 if (!netif_running(dev)) {
4518 err = -ENETDOWN;
4519 goto out;
4520 }
4521 3995
4522 if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { 3996 if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
4523 channel_type = nla_get_u32( 3997 channel_type = nla_get_u32(
@@ -4525,24 +3999,18 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
4525 if (channel_type != NL80211_CHAN_NO_HT && 3999 if (channel_type != NL80211_CHAN_NO_HT &&
4526 channel_type != NL80211_CHAN_HT20 && 4000 channel_type != NL80211_CHAN_HT20 &&
4527 channel_type != NL80211_CHAN_HT40PLUS && 4001 channel_type != NL80211_CHAN_HT40PLUS &&
4528 channel_type != NL80211_CHAN_HT40MINUS) { 4002 channel_type != NL80211_CHAN_HT40MINUS)
4529 err = -EINVAL; 4003 return -EINVAL;
4530 goto out;
4531 }
4532 } 4004 }
4533 4005
4534 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); 4006 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
4535 chan = rdev_freq_to_chan(rdev, freq, channel_type); 4007 chan = rdev_freq_to_chan(rdev, freq, channel_type);
4536 if (chan == NULL) { 4008 if (chan == NULL)
4537 err = -EINVAL; 4009 return -EINVAL;
4538 goto out;
4539 }
4540 4010
4541 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 4011 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
4542 if (!msg) { 4012 if (!msg)
4543 err = -ENOMEM; 4013 return -ENOMEM;
4544 goto out;
4545 }
4546 4014
4547 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 4015 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
4548 NL80211_CMD_REMAIN_ON_CHANNEL); 4016 NL80211_CMD_REMAIN_ON_CHANNEL);
@@ -4561,58 +4029,32 @@ static int nl80211_remain_on_channel(struct sk_buff *skb,
4561 NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); 4029 NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
4562 4030
4563 genlmsg_end(msg, hdr); 4031 genlmsg_end(msg, hdr);
4564 err = genlmsg_reply(msg, info); 4032
4565 goto out; 4033 return genlmsg_reply(msg, info);
4566 4034
4567 nla_put_failure: 4035 nla_put_failure:
4568 err = -ENOBUFS; 4036 err = -ENOBUFS;
4569 free_msg: 4037 free_msg:
4570 nlmsg_free(msg); 4038 nlmsg_free(msg);
4571 out:
4572 cfg80211_unlock_rdev(rdev);
4573 dev_put(dev);
4574 unlock_rtnl:
4575 rtnl_unlock();
4576 return err; 4039 return err;
4577} 4040}
4578 4041
4579static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, 4042static int nl80211_cancel_remain_on_channel(struct sk_buff *skb,
4580 struct genl_info *info) 4043 struct genl_info *info)
4581{ 4044{
4582 struct cfg80211_registered_device *rdev; 4045 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4583 struct net_device *dev; 4046 struct net_device *dev = info->user_ptr[1];
4584 u64 cookie; 4047 u64 cookie;
4585 int err;
4586 4048
4587 if (!info->attrs[NL80211_ATTR_COOKIE]) 4049 if (!info->attrs[NL80211_ATTR_COOKIE])
4588 return -EINVAL; 4050 return -EINVAL;
4589 4051
4590 rtnl_lock(); 4052 if (!rdev->ops->cancel_remain_on_channel)
4591 4053 return -EOPNOTSUPP;
4592 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4593 if (err)
4594 goto unlock_rtnl;
4595
4596 if (!rdev->ops->cancel_remain_on_channel) {
4597 err = -EOPNOTSUPP;
4598 goto out;
4599 }
4600
4601 if (!netif_running(dev)) {
4602 err = -ENETDOWN;
4603 goto out;
4604 }
4605 4054
4606 cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); 4055 cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]);
4607 4056
4608 err = rdev->ops->cancel_remain_on_channel(&rdev->wiphy, dev, cookie); 4057 return rdev->ops->cancel_remain_on_channel(&rdev->wiphy, dev, cookie);
4609
4610 out:
4611 cfg80211_unlock_rdev(rdev);
4612 dev_put(dev);
4613 unlock_rtnl:
4614 rtnl_unlock();
4615 return err;
4616} 4058}
4617 4059
4618static u32 rateset_to_mask(struct ieee80211_supported_band *sband, 4060static u32 rateset_to_mask(struct ieee80211_supported_band *sband,
@@ -4648,26 +4090,18 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
4648 struct genl_info *info) 4090 struct genl_info *info)
4649{ 4091{
4650 struct nlattr *tb[NL80211_TXRATE_MAX + 1]; 4092 struct nlattr *tb[NL80211_TXRATE_MAX + 1];
4651 struct cfg80211_registered_device *rdev; 4093 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4652 struct cfg80211_bitrate_mask mask; 4094 struct cfg80211_bitrate_mask mask;
4653 int err, rem, i; 4095 int rem, i;
4654 struct net_device *dev; 4096 struct net_device *dev = info->user_ptr[1];
4655 struct nlattr *tx_rates; 4097 struct nlattr *tx_rates;
4656 struct ieee80211_supported_band *sband; 4098 struct ieee80211_supported_band *sband;
4657 4099
4658 if (info->attrs[NL80211_ATTR_TX_RATES] == NULL) 4100 if (info->attrs[NL80211_ATTR_TX_RATES] == NULL)
4659 return -EINVAL; 4101 return -EINVAL;
4660 4102
4661 rtnl_lock(); 4103 if (!rdev->ops->set_bitrate_mask)
4662 4104 return -EOPNOTSUPP;
4663 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4664 if (err)
4665 goto unlock_rtnl;
4666
4667 if (!rdev->ops->set_bitrate_mask) {
4668 err = -EOPNOTSUPP;
4669 goto unlock;
4670 }
4671 4105
4672 memset(&mask, 0, sizeof(mask)); 4106 memset(&mask, 0, sizeof(mask));
4673 /* Default to all rates enabled */ 4107 /* Default to all rates enabled */
@@ -4684,15 +4118,11 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
4684 nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) 4118 nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem)
4685 { 4119 {
4686 enum ieee80211_band band = nla_type(tx_rates); 4120 enum ieee80211_band band = nla_type(tx_rates);
4687 if (band < 0 || band >= IEEE80211_NUM_BANDS) { 4121 if (band < 0 || band >= IEEE80211_NUM_BANDS)
4688 err = -EINVAL; 4122 return -EINVAL;
4689 goto unlock;
4690 }
4691 sband = rdev->wiphy.bands[band]; 4123 sband = rdev->wiphy.bands[band];
4692 if (sband == NULL) { 4124 if (sband == NULL)
4693 err = -EINVAL; 4125 return -EINVAL;
4694 goto unlock;
4695 }
4696 nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), 4126 nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates),
4697 nla_len(tx_rates), nl80211_txattr_policy); 4127 nla_len(tx_rates), nl80211_txattr_policy);
4698 if (tb[NL80211_TXRATE_LEGACY]) { 4128 if (tb[NL80211_TXRATE_LEGACY]) {
@@ -4700,68 +4130,48 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
4700 sband, 4130 sband,
4701 nla_data(tb[NL80211_TXRATE_LEGACY]), 4131 nla_data(tb[NL80211_TXRATE_LEGACY]),
4702 nla_len(tb[NL80211_TXRATE_LEGACY])); 4132 nla_len(tb[NL80211_TXRATE_LEGACY]));
4703 if (mask.control[band].legacy == 0) { 4133 if (mask.control[band].legacy == 0)
4704 err = -EINVAL; 4134 return -EINVAL;
4705 goto unlock;
4706 }
4707 } 4135 }
4708 } 4136 }
4709 4137
4710 err = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, NULL, &mask); 4138 return rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, NULL, &mask);
4711
4712 unlock:
4713 dev_put(dev);
4714 cfg80211_unlock_rdev(rdev);
4715 unlock_rtnl:
4716 rtnl_unlock();
4717 return err;
4718} 4139}
4719 4140
4720static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) 4141static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
4721{ 4142{
4722 struct cfg80211_registered_device *rdev; 4143 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4723 struct net_device *dev; 4144 struct net_device *dev = info->user_ptr[1];
4724 int err; 4145 u16 frame_type = IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION;
4725 4146
4726 if (!info->attrs[NL80211_ATTR_FRAME_MATCH]) 4147 if (!info->attrs[NL80211_ATTR_FRAME_MATCH])
4727 return -EINVAL; 4148 return -EINVAL;
4728 4149
4729 if (nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]) < 1) 4150 if (info->attrs[NL80211_ATTR_FRAME_TYPE])
4730 return -EINVAL; 4151 frame_type = nla_get_u16(info->attrs[NL80211_ATTR_FRAME_TYPE]);
4731
4732 rtnl_lock();
4733
4734 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4735 if (err)
4736 goto unlock_rtnl;
4737 4152
4738 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && 4153 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4739 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { 4154 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
4740 err = -EOPNOTSUPP; 4155 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
4741 goto out; 4156 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
4742 } 4157 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
4158 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
4159 return -EOPNOTSUPP;
4743 4160
4744 /* not much point in registering if we can't reply */ 4161 /* not much point in registering if we can't reply */
4745 if (!rdev->ops->action) { 4162 if (!rdev->ops->mgmt_tx)
4746 err = -EOPNOTSUPP; 4163 return -EOPNOTSUPP;
4747 goto out;
4748 }
4749 4164
4750 err = cfg80211_mlme_register_action(dev->ieee80211_ptr, info->snd_pid, 4165 return cfg80211_mlme_register_mgmt(dev->ieee80211_ptr, info->snd_pid,
4166 frame_type,
4751 nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), 4167 nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]),
4752 nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); 4168 nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]));
4753 out:
4754 cfg80211_unlock_rdev(rdev);
4755 dev_put(dev);
4756 unlock_rtnl:
4757 rtnl_unlock();
4758 return err;
4759} 4169}
4760 4170
4761static int nl80211_action(struct sk_buff *skb, struct genl_info *info) 4171static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
4762{ 4172{
4763 struct cfg80211_registered_device *rdev; 4173 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4764 struct net_device *dev; 4174 struct net_device *dev = info->user_ptr[1];
4765 struct ieee80211_channel *chan; 4175 struct ieee80211_channel *chan;
4766 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; 4176 enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
4767 bool channel_type_valid = false; 4177 bool channel_type_valid = false;
@@ -4775,27 +4185,16 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4775 !info->attrs[NL80211_ATTR_WIPHY_FREQ]) 4185 !info->attrs[NL80211_ATTR_WIPHY_FREQ])
4776 return -EINVAL; 4186 return -EINVAL;
4777 4187
4778 rtnl_lock(); 4188 if (!rdev->ops->mgmt_tx)
4779 4189 return -EOPNOTSUPP;
4780 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4781 if (err)
4782 goto unlock_rtnl;
4783
4784 if (!rdev->ops->action) {
4785 err = -EOPNOTSUPP;
4786 goto out;
4787 }
4788 4190
4789 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION && 4191 if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION &&
4790 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC) { 4192 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
4791 err = -EOPNOTSUPP; 4193 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_CLIENT &&
4792 goto out; 4194 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
4793 } 4195 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
4794 4196 dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO)
4795 if (!netif_running(dev)) { 4197 return -EOPNOTSUPP;
4796 err = -ENETDOWN;
4797 goto out;
4798 }
4799 4198
4800 if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { 4199 if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
4801 channel_type = nla_get_u32( 4200 channel_type = nla_get_u32(
@@ -4803,147 +4202,104 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info)
4803 if (channel_type != NL80211_CHAN_NO_HT && 4202 if (channel_type != NL80211_CHAN_NO_HT &&
4804 channel_type != NL80211_CHAN_HT20 && 4203 channel_type != NL80211_CHAN_HT20 &&
4805 channel_type != NL80211_CHAN_HT40PLUS && 4204 channel_type != NL80211_CHAN_HT40PLUS &&
4806 channel_type != NL80211_CHAN_HT40MINUS) { 4205 channel_type != NL80211_CHAN_HT40MINUS)
4807 err = -EINVAL; 4206 return -EINVAL;
4808 goto out;
4809 }
4810 channel_type_valid = true; 4207 channel_type_valid = true;
4811 } 4208 }
4812 4209
4813 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); 4210 freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
4814 chan = rdev_freq_to_chan(rdev, freq, channel_type); 4211 chan = rdev_freq_to_chan(rdev, freq, channel_type);
4815 if (chan == NULL) { 4212 if (chan == NULL)
4816 err = -EINVAL; 4213 return -EINVAL;
4817 goto out;
4818 }
4819 4214
4820 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 4215 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
4821 if (!msg) { 4216 if (!msg)
4822 err = -ENOMEM; 4217 return -ENOMEM;
4823 goto out;
4824 }
4825 4218
4826 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 4219 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
4827 NL80211_CMD_ACTION); 4220 NL80211_CMD_FRAME);
4828 4221
4829 if (IS_ERR(hdr)) { 4222 if (IS_ERR(hdr)) {
4830 err = PTR_ERR(hdr); 4223 err = PTR_ERR(hdr);
4831 goto free_msg; 4224 goto free_msg;
4832 } 4225 }
4833 err = cfg80211_mlme_action(rdev, dev, chan, channel_type, 4226 err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type,
4834 channel_type_valid, 4227 channel_type_valid,
4835 nla_data(info->attrs[NL80211_ATTR_FRAME]), 4228 nla_data(info->attrs[NL80211_ATTR_FRAME]),
4836 nla_len(info->attrs[NL80211_ATTR_FRAME]), 4229 nla_len(info->attrs[NL80211_ATTR_FRAME]),
4837 &cookie); 4230 &cookie);
4838 if (err) 4231 if (err)
4839 goto free_msg; 4232 goto free_msg;
4840 4233
4841 NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie); 4234 NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
4842 4235
4843 genlmsg_end(msg, hdr); 4236 genlmsg_end(msg, hdr);
4844 err = genlmsg_reply(msg, info); 4237 return genlmsg_reply(msg, info);
4845 goto out;
4846 4238
4847 nla_put_failure: 4239 nla_put_failure:
4848 err = -ENOBUFS; 4240 err = -ENOBUFS;
4849 free_msg: 4241 free_msg:
4850 nlmsg_free(msg); 4242 nlmsg_free(msg);
4851 out:
4852 cfg80211_unlock_rdev(rdev);
4853 dev_put(dev);
4854unlock_rtnl:
4855 rtnl_unlock();
4856 return err; 4243 return err;
4857} 4244}
4858 4245
4859static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info) 4246static int nl80211_set_power_save(struct sk_buff *skb, struct genl_info *info)
4860{ 4247{
4861 struct cfg80211_registered_device *rdev; 4248 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4862 struct wireless_dev *wdev; 4249 struct wireless_dev *wdev;
4863 struct net_device *dev; 4250 struct net_device *dev = info->user_ptr[1];
4864 u8 ps_state; 4251 u8 ps_state;
4865 bool state; 4252 bool state;
4866 int err; 4253 int err;
4867 4254
4868 if (!info->attrs[NL80211_ATTR_PS_STATE]) { 4255 if (!info->attrs[NL80211_ATTR_PS_STATE])
4869 err = -EINVAL; 4256 return -EINVAL;
4870 goto out;
4871 }
4872 4257
4873 ps_state = nla_get_u32(info->attrs[NL80211_ATTR_PS_STATE]); 4258 ps_state = nla_get_u32(info->attrs[NL80211_ATTR_PS_STATE]);
4874 4259
4875 if (ps_state != NL80211_PS_DISABLED && ps_state != NL80211_PS_ENABLED) { 4260 if (ps_state != NL80211_PS_DISABLED && ps_state != NL80211_PS_ENABLED)
4876 err = -EINVAL; 4261 return -EINVAL;
4877 goto out;
4878 }
4879
4880 rtnl_lock();
4881
4882 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4883 if (err)
4884 goto unlock_rdev;
4885 4262
4886 wdev = dev->ieee80211_ptr; 4263 wdev = dev->ieee80211_ptr;
4887 4264
4888 if (!rdev->ops->set_power_mgmt) { 4265 if (!rdev->ops->set_power_mgmt)
4889 err = -EOPNOTSUPP; 4266 return -EOPNOTSUPP;
4890 goto unlock_rdev;
4891 }
4892 4267
4893 state = (ps_state == NL80211_PS_ENABLED) ? true : false; 4268 state = (ps_state == NL80211_PS_ENABLED) ? true : false;
4894 4269
4895 if (state == wdev->ps) 4270 if (state == wdev->ps)
4896 goto unlock_rdev; 4271 return 0;
4897
4898 wdev->ps = state;
4899
4900 if (rdev->ops->set_power_mgmt(wdev->wiphy, dev, wdev->ps,
4901 wdev->ps_timeout))
4902 /* assume this means it's off */
4903 wdev->ps = false;
4904
4905unlock_rdev:
4906 cfg80211_unlock_rdev(rdev);
4907 dev_put(dev);
4908 rtnl_unlock();
4909 4272
4910out: 4273 err = rdev->ops->set_power_mgmt(wdev->wiphy, dev, state,
4274 wdev->ps_timeout);
4275 if (!err)
4276 wdev->ps = state;
4911 return err; 4277 return err;
4912} 4278}
4913 4279
4914static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info) 4280static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info)
4915{ 4281{
4916 struct cfg80211_registered_device *rdev; 4282 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4917 enum nl80211_ps_state ps_state; 4283 enum nl80211_ps_state ps_state;
4918 struct wireless_dev *wdev; 4284 struct wireless_dev *wdev;
4919 struct net_device *dev; 4285 struct net_device *dev = info->user_ptr[1];
4920 struct sk_buff *msg; 4286 struct sk_buff *msg;
4921 void *hdr; 4287 void *hdr;
4922 int err; 4288 int err;
4923 4289
4924 rtnl_lock();
4925
4926 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4927 if (err)
4928 goto unlock_rtnl;
4929
4930 wdev = dev->ieee80211_ptr; 4290 wdev = dev->ieee80211_ptr;
4931 4291
4932 if (!rdev->ops->set_power_mgmt) { 4292 if (!rdev->ops->set_power_mgmt)
4933 err = -EOPNOTSUPP; 4293 return -EOPNOTSUPP;
4934 goto out;
4935 }
4936 4294
4937 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 4295 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
4938 if (!msg) { 4296 if (!msg)
4939 err = -ENOMEM; 4297 return -ENOMEM;
4940 goto out;
4941 }
4942 4298
4943 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, 4299 hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
4944 NL80211_CMD_GET_POWER_SAVE); 4300 NL80211_CMD_GET_POWER_SAVE);
4945 if (!hdr) { 4301 if (!hdr) {
4946 err = -ENOMEM; 4302 err = -ENOBUFS;
4947 goto free_msg; 4303 goto free_msg;
4948 } 4304 }
4949 4305
@@ -4955,22 +4311,12 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info)
4955 NLA_PUT_U32(msg, NL80211_ATTR_PS_STATE, ps_state); 4311 NLA_PUT_U32(msg, NL80211_ATTR_PS_STATE, ps_state);
4956 4312
4957 genlmsg_end(msg, hdr); 4313 genlmsg_end(msg, hdr);
4958 err = genlmsg_reply(msg, info); 4314 return genlmsg_reply(msg, info);
4959 goto out;
4960 4315
4961nla_put_failure: 4316 nla_put_failure:
4962 err = -ENOBUFS; 4317 err = -ENOBUFS;
4963 4318 free_msg:
4964free_msg:
4965 nlmsg_free(msg); 4319 nlmsg_free(msg);
4966
4967out:
4968 cfg80211_unlock_rdev(rdev);
4969 dev_put(dev);
4970
4971unlock_rtnl:
4972 rtnl_unlock();
4973
4974 return err; 4320 return err;
4975} 4321}
4976 4322
@@ -4984,41 +4330,24 @@ nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] __read_mostly = {
4984static int nl80211_set_cqm_rssi(struct genl_info *info, 4330static int nl80211_set_cqm_rssi(struct genl_info *info,
4985 s32 threshold, u32 hysteresis) 4331 s32 threshold, u32 hysteresis)
4986{ 4332{
4987 struct cfg80211_registered_device *rdev; 4333 struct cfg80211_registered_device *rdev = info->user_ptr[0];
4988 struct wireless_dev *wdev; 4334 struct wireless_dev *wdev;
4989 struct net_device *dev; 4335 struct net_device *dev = info->user_ptr[1];
4990 int err;
4991 4336
4992 if (threshold > 0) 4337 if (threshold > 0)
4993 return -EINVAL; 4338 return -EINVAL;
4994 4339
4995 rtnl_lock();
4996
4997 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4998 if (err)
4999 goto unlock_rdev;
5000
5001 wdev = dev->ieee80211_ptr; 4340 wdev = dev->ieee80211_ptr;
5002 4341
5003 if (!rdev->ops->set_cqm_rssi_config) { 4342 if (!rdev->ops->set_cqm_rssi_config)
5004 err = -EOPNOTSUPP; 4343 return -EOPNOTSUPP;
5005 goto unlock_rdev;
5006 }
5007
5008 if (wdev->iftype != NL80211_IFTYPE_STATION) {
5009 err = -EOPNOTSUPP;
5010 goto unlock_rdev;
5011 }
5012
5013 err = rdev->ops->set_cqm_rssi_config(wdev->wiphy, dev,
5014 threshold, hysteresis);
5015 4344
5016unlock_rdev: 4345 if (wdev->iftype != NL80211_IFTYPE_STATION &&
5017 cfg80211_unlock_rdev(rdev); 4346 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)
5018 dev_put(dev); 4347 return -EOPNOTSUPP;
5019 rtnl_unlock();
5020 4348
5021 return err; 4349 return rdev->ops->set_cqm_rssi_config(wdev->wiphy, dev,
4350 threshold, hysteresis);
5022} 4351}
5023 4352
5024static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info) 4353static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
@@ -5052,6 +4381,65 @@ out:
5052 return err; 4381 return err;
5053} 4382}
5054 4383
4384#define NL80211_FLAG_NEED_WIPHY 0x01
4385#define NL80211_FLAG_NEED_NETDEV 0x02
4386#define NL80211_FLAG_NEED_RTNL 0x04
4387#define NL80211_FLAG_CHECK_NETDEV_UP 0x08
4388#define NL80211_FLAG_NEED_NETDEV_UP (NL80211_FLAG_NEED_NETDEV |\
4389 NL80211_FLAG_CHECK_NETDEV_UP)
4390
4391static int nl80211_pre_doit(struct genl_ops *ops, struct sk_buff *skb,
4392 struct genl_info *info)
4393{
4394 struct cfg80211_registered_device *rdev;
4395 struct net_device *dev;
4396 int err;
4397 bool rtnl = ops->internal_flags & NL80211_FLAG_NEED_RTNL;
4398
4399 if (rtnl)
4400 rtnl_lock();
4401
4402 if (ops->internal_flags & NL80211_FLAG_NEED_WIPHY) {
4403 rdev = cfg80211_get_dev_from_info(info);
4404 if (IS_ERR(rdev)) {
4405 if (rtnl)
4406 rtnl_unlock();
4407 return PTR_ERR(rdev);
4408 }
4409 info->user_ptr[0] = rdev;
4410 } else if (ops->internal_flags & NL80211_FLAG_NEED_NETDEV) {
4411 err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
4412 if (err) {
4413 if (rtnl)
4414 rtnl_unlock();
4415 return err;
4416 }
4417 if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP &&
4418 !netif_running(dev)) {
4419 cfg80211_unlock_rdev(rdev);
4420 dev_put(dev);
4421 if (rtnl)
4422 rtnl_unlock();
4423 return -ENETDOWN;
4424 }
4425 info->user_ptr[0] = rdev;
4426 info->user_ptr[1] = dev;
4427 }
4428
4429 return 0;
4430}
4431
4432static void nl80211_post_doit(struct genl_ops *ops, struct sk_buff *skb,
4433 struct genl_info *info)
4434{
4435 if (info->user_ptr[0])
4436 cfg80211_unlock_rdev(info->user_ptr[0]);
4437 if (info->user_ptr[1])
4438 dev_put(info->user_ptr[1]);
4439 if (ops->internal_flags & NL80211_FLAG_NEED_RTNL)
4440 rtnl_unlock();
4441}
4442
5055static struct genl_ops nl80211_ops[] = { 4443static struct genl_ops nl80211_ops[] = {
5056 { 4444 {
5057 .cmd = NL80211_CMD_GET_WIPHY, 4445 .cmd = NL80211_CMD_GET_WIPHY,
@@ -5059,12 +4447,14 @@ static struct genl_ops nl80211_ops[] = {
5059 .dumpit = nl80211_dump_wiphy, 4447 .dumpit = nl80211_dump_wiphy,
5060 .policy = nl80211_policy, 4448 .policy = nl80211_policy,
5061 /* can be retrieved by unprivileged users */ 4449 /* can be retrieved by unprivileged users */
4450 .internal_flags = NL80211_FLAG_NEED_WIPHY,
5062 }, 4451 },
5063 { 4452 {
5064 .cmd = NL80211_CMD_SET_WIPHY, 4453 .cmd = NL80211_CMD_SET_WIPHY,
5065 .doit = nl80211_set_wiphy, 4454 .doit = nl80211_set_wiphy,
5066 .policy = nl80211_policy, 4455 .policy = nl80211_policy,
5067 .flags = GENL_ADMIN_PERM, 4456 .flags = GENL_ADMIN_PERM,
4457 .internal_flags = NL80211_FLAG_NEED_RTNL,
5068 }, 4458 },
5069 { 4459 {
5070 .cmd = NL80211_CMD_GET_INTERFACE, 4460 .cmd = NL80211_CMD_GET_INTERFACE,
@@ -5072,90 +4462,119 @@ static struct genl_ops nl80211_ops[] = {
5072 .dumpit = nl80211_dump_interface, 4462 .dumpit = nl80211_dump_interface,
5073 .policy = nl80211_policy, 4463 .policy = nl80211_policy,
5074 /* can be retrieved by unprivileged users */ 4464 /* can be retrieved by unprivileged users */
4465 .internal_flags = NL80211_FLAG_NEED_NETDEV,
5075 }, 4466 },
5076 { 4467 {
5077 .cmd = NL80211_CMD_SET_INTERFACE, 4468 .cmd = NL80211_CMD_SET_INTERFACE,
5078 .doit = nl80211_set_interface, 4469 .doit = nl80211_set_interface,
5079 .policy = nl80211_policy, 4470 .policy = nl80211_policy,
5080 .flags = GENL_ADMIN_PERM, 4471 .flags = GENL_ADMIN_PERM,
4472 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4473 NL80211_FLAG_NEED_RTNL,
5081 }, 4474 },
5082 { 4475 {
5083 .cmd = NL80211_CMD_NEW_INTERFACE, 4476 .cmd = NL80211_CMD_NEW_INTERFACE,
5084 .doit = nl80211_new_interface, 4477 .doit = nl80211_new_interface,
5085 .policy = nl80211_policy, 4478 .policy = nl80211_policy,
5086 .flags = GENL_ADMIN_PERM, 4479 .flags = GENL_ADMIN_PERM,
4480 .internal_flags = NL80211_FLAG_NEED_WIPHY |
4481 NL80211_FLAG_NEED_RTNL,
5087 }, 4482 },
5088 { 4483 {
5089 .cmd = NL80211_CMD_DEL_INTERFACE, 4484 .cmd = NL80211_CMD_DEL_INTERFACE,
5090 .doit = nl80211_del_interface, 4485 .doit = nl80211_del_interface,
5091 .policy = nl80211_policy, 4486 .policy = nl80211_policy,
5092 .flags = GENL_ADMIN_PERM, 4487 .flags = GENL_ADMIN_PERM,
4488 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4489 NL80211_FLAG_NEED_RTNL,
5093 }, 4490 },
5094 { 4491 {
5095 .cmd = NL80211_CMD_GET_KEY, 4492 .cmd = NL80211_CMD_GET_KEY,
5096 .doit = nl80211_get_key, 4493 .doit = nl80211_get_key,
5097 .policy = nl80211_policy, 4494 .policy = nl80211_policy,
5098 .flags = GENL_ADMIN_PERM, 4495 .flags = GENL_ADMIN_PERM,
4496 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4497 NL80211_FLAG_NEED_RTNL,
5099 }, 4498 },
5100 { 4499 {
5101 .cmd = NL80211_CMD_SET_KEY, 4500 .cmd = NL80211_CMD_SET_KEY,
5102 .doit = nl80211_set_key, 4501 .doit = nl80211_set_key,
5103 .policy = nl80211_policy, 4502 .policy = nl80211_policy,
5104 .flags = GENL_ADMIN_PERM, 4503 .flags = GENL_ADMIN_PERM,
4504 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4505 NL80211_FLAG_NEED_RTNL,
5105 }, 4506 },
5106 { 4507 {
5107 .cmd = NL80211_CMD_NEW_KEY, 4508 .cmd = NL80211_CMD_NEW_KEY,
5108 .doit = nl80211_new_key, 4509 .doit = nl80211_new_key,
5109 .policy = nl80211_policy, 4510 .policy = nl80211_policy,
5110 .flags = GENL_ADMIN_PERM, 4511 .flags = GENL_ADMIN_PERM,
4512 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4513 NL80211_FLAG_NEED_RTNL,
5111 }, 4514 },
5112 { 4515 {
5113 .cmd = NL80211_CMD_DEL_KEY, 4516 .cmd = NL80211_CMD_DEL_KEY,
5114 .doit = nl80211_del_key, 4517 .doit = nl80211_del_key,
5115 .policy = nl80211_policy, 4518 .policy = nl80211_policy,
5116 .flags = GENL_ADMIN_PERM, 4519 .flags = GENL_ADMIN_PERM,
4520 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4521 NL80211_FLAG_NEED_RTNL,
5117 }, 4522 },
5118 { 4523 {
5119 .cmd = NL80211_CMD_SET_BEACON, 4524 .cmd = NL80211_CMD_SET_BEACON,
5120 .policy = nl80211_policy, 4525 .policy = nl80211_policy,
5121 .flags = GENL_ADMIN_PERM, 4526 .flags = GENL_ADMIN_PERM,
5122 .doit = nl80211_addset_beacon, 4527 .doit = nl80211_addset_beacon,
4528 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4529 NL80211_FLAG_NEED_RTNL,
5123 }, 4530 },
5124 { 4531 {
5125 .cmd = NL80211_CMD_NEW_BEACON, 4532 .cmd = NL80211_CMD_NEW_BEACON,
5126 .policy = nl80211_policy, 4533 .policy = nl80211_policy,
5127 .flags = GENL_ADMIN_PERM, 4534 .flags = GENL_ADMIN_PERM,
5128 .doit = nl80211_addset_beacon, 4535 .doit = nl80211_addset_beacon,
4536 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4537 NL80211_FLAG_NEED_RTNL,
5129 }, 4538 },
5130 { 4539 {
5131 .cmd = NL80211_CMD_DEL_BEACON, 4540 .cmd = NL80211_CMD_DEL_BEACON,
5132 .policy = nl80211_policy, 4541 .policy = nl80211_policy,
5133 .flags = GENL_ADMIN_PERM, 4542 .flags = GENL_ADMIN_PERM,
5134 .doit = nl80211_del_beacon, 4543 .doit = nl80211_del_beacon,
4544 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4545 NL80211_FLAG_NEED_RTNL,
5135 }, 4546 },
5136 { 4547 {
5137 .cmd = NL80211_CMD_GET_STATION, 4548 .cmd = NL80211_CMD_GET_STATION,
5138 .doit = nl80211_get_station, 4549 .doit = nl80211_get_station,
5139 .dumpit = nl80211_dump_station, 4550 .dumpit = nl80211_dump_station,
5140 .policy = nl80211_policy, 4551 .policy = nl80211_policy,
4552 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4553 NL80211_FLAG_NEED_RTNL,
5141 }, 4554 },
5142 { 4555 {
5143 .cmd = NL80211_CMD_SET_STATION, 4556 .cmd = NL80211_CMD_SET_STATION,
5144 .doit = nl80211_set_station, 4557 .doit = nl80211_set_station,
5145 .policy = nl80211_policy, 4558 .policy = nl80211_policy,
5146 .flags = GENL_ADMIN_PERM, 4559 .flags = GENL_ADMIN_PERM,
4560 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4561 NL80211_FLAG_NEED_RTNL,
5147 }, 4562 },
5148 { 4563 {
5149 .cmd = NL80211_CMD_NEW_STATION, 4564 .cmd = NL80211_CMD_NEW_STATION,
5150 .doit = nl80211_new_station, 4565 .doit = nl80211_new_station,
5151 .policy = nl80211_policy, 4566 .policy = nl80211_policy,
5152 .flags = GENL_ADMIN_PERM, 4567 .flags = GENL_ADMIN_PERM,
4568 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4569 NL80211_FLAG_NEED_RTNL,
5153 }, 4570 },
5154 { 4571 {
5155 .cmd = NL80211_CMD_DEL_STATION, 4572 .cmd = NL80211_CMD_DEL_STATION,
5156 .doit = nl80211_del_station, 4573 .doit = nl80211_del_station,
5157 .policy = nl80211_policy, 4574 .policy = nl80211_policy,
5158 .flags = GENL_ADMIN_PERM, 4575 .flags = GENL_ADMIN_PERM,
4576 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4577 NL80211_FLAG_NEED_RTNL,
5159 }, 4578 },
5160 { 4579 {
5161 .cmd = NL80211_CMD_GET_MPATH, 4580 .cmd = NL80211_CMD_GET_MPATH,
@@ -5163,30 +4582,40 @@ static struct genl_ops nl80211_ops[] = {
5163 .dumpit = nl80211_dump_mpath, 4582 .dumpit = nl80211_dump_mpath,
5164 .policy = nl80211_policy, 4583 .policy = nl80211_policy,
5165 .flags = GENL_ADMIN_PERM, 4584 .flags = GENL_ADMIN_PERM,
4585 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4586 NL80211_FLAG_NEED_RTNL,
5166 }, 4587 },
5167 { 4588 {
5168 .cmd = NL80211_CMD_SET_MPATH, 4589 .cmd = NL80211_CMD_SET_MPATH,
5169 .doit = nl80211_set_mpath, 4590 .doit = nl80211_set_mpath,
5170 .policy = nl80211_policy, 4591 .policy = nl80211_policy,
5171 .flags = GENL_ADMIN_PERM, 4592 .flags = GENL_ADMIN_PERM,
4593 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4594 NL80211_FLAG_NEED_RTNL,
5172 }, 4595 },
5173 { 4596 {
5174 .cmd = NL80211_CMD_NEW_MPATH, 4597 .cmd = NL80211_CMD_NEW_MPATH,
5175 .doit = nl80211_new_mpath, 4598 .doit = nl80211_new_mpath,
5176 .policy = nl80211_policy, 4599 .policy = nl80211_policy,
5177 .flags = GENL_ADMIN_PERM, 4600 .flags = GENL_ADMIN_PERM,
4601 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4602 NL80211_FLAG_NEED_RTNL,
5178 }, 4603 },
5179 { 4604 {
5180 .cmd = NL80211_CMD_DEL_MPATH, 4605 .cmd = NL80211_CMD_DEL_MPATH,
5181 .doit = nl80211_del_mpath, 4606 .doit = nl80211_del_mpath,
5182 .policy = nl80211_policy, 4607 .policy = nl80211_policy,
5183 .flags = GENL_ADMIN_PERM, 4608 .flags = GENL_ADMIN_PERM,
4609 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4610 NL80211_FLAG_NEED_RTNL,
5184 }, 4611 },
5185 { 4612 {
5186 .cmd = NL80211_CMD_SET_BSS, 4613 .cmd = NL80211_CMD_SET_BSS,
5187 .doit = nl80211_set_bss, 4614 .doit = nl80211_set_bss,
5188 .policy = nl80211_policy, 4615 .policy = nl80211_policy,
5189 .flags = GENL_ADMIN_PERM, 4616 .flags = GENL_ADMIN_PERM,
4617 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4618 NL80211_FLAG_NEED_RTNL,
5190 }, 4619 },
5191 { 4620 {
5192 .cmd = NL80211_CMD_GET_REG, 4621 .cmd = NL80211_CMD_GET_REG,
@@ -5211,18 +4640,24 @@ static struct genl_ops nl80211_ops[] = {
5211 .doit = nl80211_get_mesh_params, 4640 .doit = nl80211_get_mesh_params,
5212 .policy = nl80211_policy, 4641 .policy = nl80211_policy,
5213 /* can be retrieved by unprivileged users */ 4642 /* can be retrieved by unprivileged users */
4643 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4644 NL80211_FLAG_NEED_RTNL,
5214 }, 4645 },
5215 { 4646 {
5216 .cmd = NL80211_CMD_SET_MESH_PARAMS, 4647 .cmd = NL80211_CMD_SET_MESH_PARAMS,
5217 .doit = nl80211_set_mesh_params, 4648 .doit = nl80211_set_mesh_params,
5218 .policy = nl80211_policy, 4649 .policy = nl80211_policy,
5219 .flags = GENL_ADMIN_PERM, 4650 .flags = GENL_ADMIN_PERM,
4651 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4652 NL80211_FLAG_NEED_RTNL,
5220 }, 4653 },
5221 { 4654 {
5222 .cmd = NL80211_CMD_TRIGGER_SCAN, 4655 .cmd = NL80211_CMD_TRIGGER_SCAN,
5223 .doit = nl80211_trigger_scan, 4656 .doit = nl80211_trigger_scan,
5224 .policy = nl80211_policy, 4657 .policy = nl80211_policy,
5225 .flags = GENL_ADMIN_PERM, 4658 .flags = GENL_ADMIN_PERM,
4659 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4660 NL80211_FLAG_NEED_RTNL,
5226 }, 4661 },
5227 { 4662 {
5228 .cmd = NL80211_CMD_GET_SCAN, 4663 .cmd = NL80211_CMD_GET_SCAN,
@@ -5234,36 +4669,48 @@ static struct genl_ops nl80211_ops[] = {
5234 .doit = nl80211_authenticate, 4669 .doit = nl80211_authenticate,
5235 .policy = nl80211_policy, 4670 .policy = nl80211_policy,
5236 .flags = GENL_ADMIN_PERM, 4671 .flags = GENL_ADMIN_PERM,
4672 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4673 NL80211_FLAG_NEED_RTNL,
5237 }, 4674 },
5238 { 4675 {
5239 .cmd = NL80211_CMD_ASSOCIATE, 4676 .cmd = NL80211_CMD_ASSOCIATE,
5240 .doit = nl80211_associate, 4677 .doit = nl80211_associate,
5241 .policy = nl80211_policy, 4678 .policy = nl80211_policy,
5242 .flags = GENL_ADMIN_PERM, 4679 .flags = GENL_ADMIN_PERM,
4680 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4681 NL80211_FLAG_NEED_RTNL,
5243 }, 4682 },
5244 { 4683 {
5245 .cmd = NL80211_CMD_DEAUTHENTICATE, 4684 .cmd = NL80211_CMD_DEAUTHENTICATE,
5246 .doit = nl80211_deauthenticate, 4685 .doit = nl80211_deauthenticate,
5247 .policy = nl80211_policy, 4686 .policy = nl80211_policy,
5248 .flags = GENL_ADMIN_PERM, 4687 .flags = GENL_ADMIN_PERM,
4688 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4689 NL80211_FLAG_NEED_RTNL,
5249 }, 4690 },
5250 { 4691 {
5251 .cmd = NL80211_CMD_DISASSOCIATE, 4692 .cmd = NL80211_CMD_DISASSOCIATE,
5252 .doit = nl80211_disassociate, 4693 .doit = nl80211_disassociate,
5253 .policy = nl80211_policy, 4694 .policy = nl80211_policy,
5254 .flags = GENL_ADMIN_PERM, 4695 .flags = GENL_ADMIN_PERM,
4696 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4697 NL80211_FLAG_NEED_RTNL,
5255 }, 4698 },
5256 { 4699 {
5257 .cmd = NL80211_CMD_JOIN_IBSS, 4700 .cmd = NL80211_CMD_JOIN_IBSS,
5258 .doit = nl80211_join_ibss, 4701 .doit = nl80211_join_ibss,
5259 .policy = nl80211_policy, 4702 .policy = nl80211_policy,
5260 .flags = GENL_ADMIN_PERM, 4703 .flags = GENL_ADMIN_PERM,
4704 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4705 NL80211_FLAG_NEED_RTNL,
5261 }, 4706 },
5262 { 4707 {
5263 .cmd = NL80211_CMD_LEAVE_IBSS, 4708 .cmd = NL80211_CMD_LEAVE_IBSS,
5264 .doit = nl80211_leave_ibss, 4709 .doit = nl80211_leave_ibss,
5265 .policy = nl80211_policy, 4710 .policy = nl80211_policy,
5266 .flags = GENL_ADMIN_PERM, 4711 .flags = GENL_ADMIN_PERM,
4712 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4713 NL80211_FLAG_NEED_RTNL,
5267 }, 4714 },
5268#ifdef CONFIG_NL80211_TESTMODE 4715#ifdef CONFIG_NL80211_TESTMODE
5269 { 4716 {
@@ -5271,6 +4718,8 @@ static struct genl_ops nl80211_ops[] = {
5271 .doit = nl80211_testmode_do, 4718 .doit = nl80211_testmode_do,
5272 .policy = nl80211_policy, 4719 .policy = nl80211_policy,
5273 .flags = GENL_ADMIN_PERM, 4720 .flags = GENL_ADMIN_PERM,
4721 .internal_flags = NL80211_FLAG_NEED_WIPHY |
4722 NL80211_FLAG_NEED_RTNL,
5274 }, 4723 },
5275#endif 4724#endif
5276 { 4725 {
@@ -5278,18 +4727,24 @@ static struct genl_ops nl80211_ops[] = {
5278 .doit = nl80211_connect, 4727 .doit = nl80211_connect,
5279 .policy = nl80211_policy, 4728 .policy = nl80211_policy,
5280 .flags = GENL_ADMIN_PERM, 4729 .flags = GENL_ADMIN_PERM,
4730 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4731 NL80211_FLAG_NEED_RTNL,
5281 }, 4732 },
5282 { 4733 {
5283 .cmd = NL80211_CMD_DISCONNECT, 4734 .cmd = NL80211_CMD_DISCONNECT,
5284 .doit = nl80211_disconnect, 4735 .doit = nl80211_disconnect,
5285 .policy = nl80211_policy, 4736 .policy = nl80211_policy,
5286 .flags = GENL_ADMIN_PERM, 4737 .flags = GENL_ADMIN_PERM,
4738 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4739 NL80211_FLAG_NEED_RTNL,
5287 }, 4740 },
5288 { 4741 {
5289 .cmd = NL80211_CMD_SET_WIPHY_NETNS, 4742 .cmd = NL80211_CMD_SET_WIPHY_NETNS,
5290 .doit = nl80211_wiphy_netns, 4743 .doit = nl80211_wiphy_netns,
5291 .policy = nl80211_policy, 4744 .policy = nl80211_policy,
5292 .flags = GENL_ADMIN_PERM, 4745 .flags = GENL_ADMIN_PERM,
4746 .internal_flags = NL80211_FLAG_NEED_WIPHY |
4747 NL80211_FLAG_NEED_RTNL,
5293 }, 4748 },
5294 { 4749 {
5295 .cmd = NL80211_CMD_GET_SURVEY, 4750 .cmd = NL80211_CMD_GET_SURVEY,
@@ -5301,72 +4756,104 @@ static struct genl_ops nl80211_ops[] = {
5301 .doit = nl80211_setdel_pmksa, 4756 .doit = nl80211_setdel_pmksa,
5302 .policy = nl80211_policy, 4757 .policy = nl80211_policy,
5303 .flags = GENL_ADMIN_PERM, 4758 .flags = GENL_ADMIN_PERM,
4759 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4760 NL80211_FLAG_NEED_RTNL,
5304 }, 4761 },
5305 { 4762 {
5306 .cmd = NL80211_CMD_DEL_PMKSA, 4763 .cmd = NL80211_CMD_DEL_PMKSA,
5307 .doit = nl80211_setdel_pmksa, 4764 .doit = nl80211_setdel_pmksa,
5308 .policy = nl80211_policy, 4765 .policy = nl80211_policy,
5309 .flags = GENL_ADMIN_PERM, 4766 .flags = GENL_ADMIN_PERM,
4767 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4768 NL80211_FLAG_NEED_RTNL,
5310 }, 4769 },
5311 { 4770 {
5312 .cmd = NL80211_CMD_FLUSH_PMKSA, 4771 .cmd = NL80211_CMD_FLUSH_PMKSA,
5313 .doit = nl80211_flush_pmksa, 4772 .doit = nl80211_flush_pmksa,
5314 .policy = nl80211_policy, 4773 .policy = nl80211_policy,
5315 .flags = GENL_ADMIN_PERM, 4774 .flags = GENL_ADMIN_PERM,
4775 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4776 NL80211_FLAG_NEED_RTNL,
5316 }, 4777 },
5317 { 4778 {
5318 .cmd = NL80211_CMD_REMAIN_ON_CHANNEL, 4779 .cmd = NL80211_CMD_REMAIN_ON_CHANNEL,
5319 .doit = nl80211_remain_on_channel, 4780 .doit = nl80211_remain_on_channel,
5320 .policy = nl80211_policy, 4781 .policy = nl80211_policy,
5321 .flags = GENL_ADMIN_PERM, 4782 .flags = GENL_ADMIN_PERM,
4783 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4784 NL80211_FLAG_NEED_RTNL,
5322 }, 4785 },
5323 { 4786 {
5324 .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, 4787 .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
5325 .doit = nl80211_cancel_remain_on_channel, 4788 .doit = nl80211_cancel_remain_on_channel,
5326 .policy = nl80211_policy, 4789 .policy = nl80211_policy,
5327 .flags = GENL_ADMIN_PERM, 4790 .flags = GENL_ADMIN_PERM,
4791 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4792 NL80211_FLAG_NEED_RTNL,
5328 }, 4793 },
5329 { 4794 {
5330 .cmd = NL80211_CMD_SET_TX_BITRATE_MASK, 4795 .cmd = NL80211_CMD_SET_TX_BITRATE_MASK,
5331 .doit = nl80211_set_tx_bitrate_mask, 4796 .doit = nl80211_set_tx_bitrate_mask,
5332 .policy = nl80211_policy, 4797 .policy = nl80211_policy,
5333 .flags = GENL_ADMIN_PERM, 4798 .flags = GENL_ADMIN_PERM,
4799 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4800 NL80211_FLAG_NEED_RTNL,
5334 }, 4801 },
5335 { 4802 {
5336 .cmd = NL80211_CMD_REGISTER_ACTION, 4803 .cmd = NL80211_CMD_REGISTER_FRAME,
5337 .doit = nl80211_register_action, 4804 .doit = nl80211_register_mgmt,
5338 .policy = nl80211_policy, 4805 .policy = nl80211_policy,
5339 .flags = GENL_ADMIN_PERM, 4806 .flags = GENL_ADMIN_PERM,
4807 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4808 NL80211_FLAG_NEED_RTNL,
5340 }, 4809 },
5341 { 4810 {
5342 .cmd = NL80211_CMD_ACTION, 4811 .cmd = NL80211_CMD_FRAME,
5343 .doit = nl80211_action, 4812 .doit = nl80211_tx_mgmt,
5344 .policy = nl80211_policy, 4813 .policy = nl80211_policy,
5345 .flags = GENL_ADMIN_PERM, 4814 .flags = GENL_ADMIN_PERM,
4815 .internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
4816 NL80211_FLAG_NEED_RTNL,
5346 }, 4817 },
5347 { 4818 {
5348 .cmd = NL80211_CMD_SET_POWER_SAVE, 4819 .cmd = NL80211_CMD_SET_POWER_SAVE,
5349 .doit = nl80211_set_power_save, 4820 .doit = nl80211_set_power_save,
5350 .policy = nl80211_policy, 4821 .policy = nl80211_policy,
5351 .flags = GENL_ADMIN_PERM, 4822 .flags = GENL_ADMIN_PERM,
4823 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4824 NL80211_FLAG_NEED_RTNL,
5352 }, 4825 },
5353 { 4826 {
5354 .cmd = NL80211_CMD_GET_POWER_SAVE, 4827 .cmd = NL80211_CMD_GET_POWER_SAVE,
5355 .doit = nl80211_get_power_save, 4828 .doit = nl80211_get_power_save,
5356 .policy = nl80211_policy, 4829 .policy = nl80211_policy,
5357 /* can be retrieved by unprivileged users */ 4830 /* can be retrieved by unprivileged users */
4831 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4832 NL80211_FLAG_NEED_RTNL,
5358 }, 4833 },
5359 { 4834 {
5360 .cmd = NL80211_CMD_SET_CQM, 4835 .cmd = NL80211_CMD_SET_CQM,
5361 .doit = nl80211_set_cqm, 4836 .doit = nl80211_set_cqm,
5362 .policy = nl80211_policy, 4837 .policy = nl80211_policy,
5363 .flags = GENL_ADMIN_PERM, 4838 .flags = GENL_ADMIN_PERM,
4839 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4840 NL80211_FLAG_NEED_RTNL,
5364 }, 4841 },
5365 { 4842 {
5366 .cmd = NL80211_CMD_SET_CHANNEL, 4843 .cmd = NL80211_CMD_SET_CHANNEL,
5367 .doit = nl80211_set_channel, 4844 .doit = nl80211_set_channel,
5368 .policy = nl80211_policy, 4845 .policy = nl80211_policy,
5369 .flags = GENL_ADMIN_PERM, 4846 .flags = GENL_ADMIN_PERM,
4847 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4848 NL80211_FLAG_NEED_RTNL,
4849 },
4850 {
4851 .cmd = NL80211_CMD_SET_WDS_PEER,
4852 .doit = nl80211_set_wds_peer,
4853 .policy = nl80211_policy,
4854 .flags = GENL_ADMIN_PERM,
4855 .internal_flags = NL80211_FLAG_NEED_NETDEV |
4856 NL80211_FLAG_NEED_RTNL,
5370 }, 4857 },
5371}; 4858};
5372 4859
@@ -6040,9 +5527,9 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
6040 nl80211_mlme_mcgrp.id, gfp); 5527 nl80211_mlme_mcgrp.id, gfp);
6041} 5528}
6042 5529
6043int nl80211_send_action(struct cfg80211_registered_device *rdev, 5530int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
6044 struct net_device *netdev, u32 nlpid, 5531 struct net_device *netdev, u32 nlpid,
6045 int freq, const u8 *buf, size_t len, gfp_t gfp) 5532 int freq, const u8 *buf, size_t len, gfp_t gfp)
6046{ 5533{
6047 struct sk_buff *msg; 5534 struct sk_buff *msg;
6048 void *hdr; 5535 void *hdr;
@@ -6052,7 +5539,7 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev,
6052 if (!msg) 5539 if (!msg)
6053 return -ENOMEM; 5540 return -ENOMEM;
6054 5541
6055 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION); 5542 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME);
6056 if (!hdr) { 5543 if (!hdr) {
6057 nlmsg_free(msg); 5544 nlmsg_free(msg);
6058 return -ENOMEM; 5545 return -ENOMEM;
@@ -6080,10 +5567,10 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev,
6080 return -ENOBUFS; 5567 return -ENOBUFS;
6081} 5568}
6082 5569
6083void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, 5570void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
6084 struct net_device *netdev, u64 cookie, 5571 struct net_device *netdev, u64 cookie,
6085 const u8 *buf, size_t len, bool ack, 5572 const u8 *buf, size_t len, bool ack,
6086 gfp_t gfp) 5573 gfp_t gfp)
6087{ 5574{
6088 struct sk_buff *msg; 5575 struct sk_buff *msg;
6089 void *hdr; 5576 void *hdr;
@@ -6092,7 +5579,7 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
6092 if (!msg) 5579 if (!msg)
6093 return; 5580 return;
6094 5581
6095 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION_TX_STATUS); 5582 hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME_TX_STATUS);
6096 if (!hdr) { 5583 if (!hdr) {
6097 nlmsg_free(msg); 5584 nlmsg_free(msg);
6098 return; 5585 return;
@@ -6179,7 +5666,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
6179 5666
6180 list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) 5667 list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list)
6181 list_for_each_entry_rcu(wdev, &rdev->netdev_list, list) 5668 list_for_each_entry_rcu(wdev, &rdev->netdev_list, list)
6182 cfg80211_mlme_unregister_actions(wdev, notify->pid); 5669 cfg80211_mlme_unregister_socket(wdev, notify->pid);
6183 5670
6184 rcu_read_unlock(); 5671 rcu_read_unlock();
6185 5672
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 2ad7fbc7d9f1..30d2f939150d 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -74,13 +74,13 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev,
74 struct net_device *dev, const u8 *mac_addr, 74 struct net_device *dev, const u8 *mac_addr,
75 struct station_info *sinfo, gfp_t gfp); 75 struct station_info *sinfo, gfp_t gfp);
76 76
77int nl80211_send_action(struct cfg80211_registered_device *rdev, 77int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
78 struct net_device *netdev, u32 nlpid, int freq, 78 struct net_device *netdev, u32 nlpid, int freq,
79 const u8 *buf, size_t len, gfp_t gfp); 79 const u8 *buf, size_t len, gfp_t gfp);
80void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, 80void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev,
81 struct net_device *netdev, u64 cookie, 81 struct net_device *netdev, u64 cookie,
82 const u8 *buf, size_t len, bool ack, 82 const u8 *buf, size_t len, bool ack,
83 gfp_t gfp); 83 gfp_t gfp);
84 84
85void 85void
86nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, 86nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 1332c445d1c7..dbe35e138e94 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -14,6 +14,7 @@
14 * See COPYING for more details. 14 * See COPYING for more details.
15 */ 15 */
16 16
17#include <linux/kernel.h>
17#include <net/cfg80211.h> 18#include <net/cfg80211.h>
18#include <net/ieee80211_radiotap.h> 19#include <net/ieee80211_radiotap.h>
19#include <asm/unaligned.h> 20#include <asm/unaligned.h>
@@ -45,7 +46,7 @@ static const struct radiotap_align_size rtap_namespace_sizes[] = {
45}; 46};
46 47
47static const struct ieee80211_radiotap_namespace radiotap_ns = { 48static const struct ieee80211_radiotap_namespace radiotap_ns = {
48 .n_bits = sizeof(rtap_namespace_sizes) / sizeof(rtap_namespace_sizes[0]), 49 .n_bits = ARRAY_SIZE(rtap_namespace_sizes),
49 .align_size = rtap_namespace_sizes, 50 .align_size = rtap_namespace_sizes,
50}; 51};
51 52
@@ -200,7 +201,7 @@ int ieee80211_radiotap_iterator_next(
200{ 201{
201 while (1) { 202 while (1) {
202 int hit = 0; 203 int hit = 0;
203 int pad, align, size, subns, vnslen; 204 int pad, align, size, subns;
204 uint32_t oui; 205 uint32_t oui;
205 206
206 /* if no more EXT bits, that's it */ 207 /* if no more EXT bits, that's it */
@@ -260,6 +261,27 @@ int ieee80211_radiotap_iterator_next(
260 if (pad) 261 if (pad)
261 iterator->_arg += align - pad; 262 iterator->_arg += align - pad;
262 263
264 if (iterator->_arg_index % 32 == IEEE80211_RADIOTAP_VENDOR_NAMESPACE) {
265 int vnslen;
266
267 if ((unsigned long)iterator->_arg + size -
268 (unsigned long)iterator->_rtheader >
269 (unsigned long)iterator->_max_length)
270 return -EINVAL;
271
272 oui = (*iterator->_arg << 16) |
273 (*(iterator->_arg + 1) << 8) |
274 *(iterator->_arg + 2);
275 subns = *(iterator->_arg + 3);
276
277 find_ns(iterator, oui, subns);
278
279 vnslen = get_unaligned_le16(iterator->_arg + 4);
280 iterator->_next_ns_data = iterator->_arg + size + vnslen;
281 if (!iterator->current_namespace)
282 size += vnslen;
283 }
284
263 /* 285 /*
264 * this is what we will return to user, but we need to 286 * this is what we will return to user, but we need to
265 * move on first so next call has something fresh to test 287 * move on first so next call has something fresh to test
@@ -286,40 +308,25 @@ int ieee80211_radiotap_iterator_next(
286 /* these special ones are valid in each bitmap word */ 308 /* these special ones are valid in each bitmap word */
287 switch (iterator->_arg_index % 32) { 309 switch (iterator->_arg_index % 32) {
288 case IEEE80211_RADIOTAP_VENDOR_NAMESPACE: 310 case IEEE80211_RADIOTAP_VENDOR_NAMESPACE:
289 iterator->_bitmap_shifter >>= 1;
290 iterator->_arg_index++;
291
292 iterator->_reset_on_ext = 1; 311 iterator->_reset_on_ext = 1;
293 312
294 vnslen = get_unaligned_le16(iterator->this_arg + 4);
295 iterator->_next_ns_data = iterator->_arg + vnslen;
296 oui = (*iterator->this_arg << 16) |
297 (*(iterator->this_arg + 1) << 8) |
298 *(iterator->this_arg + 2);
299 subns = *(iterator->this_arg + 3);
300
301 find_ns(iterator, oui, subns);
302
303 iterator->is_radiotap_ns = 0; 313 iterator->is_radiotap_ns = 0;
304 /* allow parsers to show this information */ 314 /*
315 * If parser didn't register this vendor
316 * namespace with us, allow it to show it
317 * as 'raw. Do do that, set argument index
318 * to vendor namespace.
319 */
305 iterator->this_arg_index = 320 iterator->this_arg_index =
306 IEEE80211_RADIOTAP_VENDOR_NAMESPACE; 321 IEEE80211_RADIOTAP_VENDOR_NAMESPACE;
307 iterator->this_arg_size += vnslen; 322 if (!iterator->current_namespace)
308 if ((unsigned long)iterator->this_arg + 323 hit = 1;
309 iterator->this_arg_size - 324 goto next_entry;
310 (unsigned long)iterator->_rtheader >
311 (unsigned long)(unsigned long)iterator->_max_length)
312 return -EINVAL;
313 hit = 1;
314 break;
315 case IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE: 325 case IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE:
316 iterator->_bitmap_shifter >>= 1;
317 iterator->_arg_index++;
318
319 iterator->_reset_on_ext = 1; 326 iterator->_reset_on_ext = 1;
320 iterator->current_namespace = &radiotap_ns; 327 iterator->current_namespace = &radiotap_ns;
321 iterator->is_radiotap_ns = 1; 328 iterator->is_radiotap_ns = 1;
322 break; 329 goto next_entry;
323 case IEEE80211_RADIOTAP_EXT: 330 case IEEE80211_RADIOTAP_EXT:
324 /* 331 /*
325 * bit 31 was set, there is more 332 * bit 31 was set, there is more
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index f180db0de66c..d14bbf960c18 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -36,6 +36,7 @@
36#include <linux/slab.h> 36#include <linux/slab.h>
37#include <linux/list.h> 37#include <linux/list.h>
38#include <linux/random.h> 38#include <linux/random.h>
39#include <linux/ctype.h>
39#include <linux/nl80211.h> 40#include <linux/nl80211.h>
40#include <linux/platform_device.h> 41#include <linux/platform_device.h>
41#include <net/cfg80211.h> 42#include <net/cfg80211.h>
@@ -73,7 +74,11 @@ const struct ieee80211_regdomain *cfg80211_regdomain;
73 * - last_request 74 * - last_request
74 */ 75 */
75static DEFINE_MUTEX(reg_mutex); 76static DEFINE_MUTEX(reg_mutex);
76#define assert_reg_lock() WARN_ON(!mutex_is_locked(&reg_mutex)) 77
78static inline void assert_reg_lock(void)
79{
80 lockdep_assert_held(&reg_mutex);
81}
77 82
78/* Used to queue up regulatory hints */ 83/* Used to queue up regulatory hints */
79static LIST_HEAD(reg_requests_list); 84static LIST_HEAD(reg_requests_list);
@@ -181,14 +186,6 @@ static bool is_alpha2_set(const char *alpha2)
181 return false; 186 return false;
182} 187}
183 188
184static bool is_alpha_upper(char letter)
185{
186 /* ASCII A - Z */
187 if (letter >= 65 && letter <= 90)
188 return true;
189 return false;
190}
191
192static bool is_unknown_alpha2(const char *alpha2) 189static bool is_unknown_alpha2(const char *alpha2)
193{ 190{
194 if (!alpha2) 191 if (!alpha2)
@@ -220,7 +217,7 @@ static bool is_an_alpha2(const char *alpha2)
220{ 217{
221 if (!alpha2) 218 if (!alpha2)
222 return false; 219 return false;
223 if (is_alpha_upper(alpha2[0]) && is_alpha_upper(alpha2[1])) 220 if (isalpha(alpha2[0]) && isalpha(alpha2[1]))
224 return true; 221 return true;
225 return false; 222 return false;
226} 223}
@@ -1399,6 +1396,11 @@ static DECLARE_WORK(reg_work, reg_todo);
1399 1396
1400static void queue_regulatory_request(struct regulatory_request *request) 1397static void queue_regulatory_request(struct regulatory_request *request)
1401{ 1398{
1399 if (isalpha(request->alpha2[0]))
1400 request->alpha2[0] = toupper(request->alpha2[0]);
1401 if (isalpha(request->alpha2[1]))
1402 request->alpha2[1] = toupper(request->alpha2[1]);
1403
1402 spin_lock(&reg_requests_lock); 1404 spin_lock(&reg_requests_lock);
1403 list_add_tail(&request->list, &reg_requests_list); 1405 list_add_tail(&request->list, &reg_requests_list);
1404 spin_unlock(&reg_requests_lock); 1406 spin_unlock(&reg_requests_lock);
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 5ca8c7180141..503ebb86ba18 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -650,14 +650,14 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub)
650 bss = container_of(pub, struct cfg80211_internal_bss, pub); 650 bss = container_of(pub, struct cfg80211_internal_bss, pub);
651 651
652 spin_lock_bh(&dev->bss_lock); 652 spin_lock_bh(&dev->bss_lock);
653 if (!list_empty(&bss->list)) {
654 list_del_init(&bss->list);
655 dev->bss_generation++;
656 rb_erase(&bss->rbn, &dev->bss_tree);
653 657
654 list_del(&bss->list); 658 kref_put(&bss->ref, bss_release);
655 dev->bss_generation++; 659 }
656 rb_erase(&bss->rbn, &dev->bss_tree);
657
658 spin_unlock_bh(&dev->bss_lock); 660 spin_unlock_bh(&dev->bss_lock);
659
660 kref_put(&bss->ref, bss_release);
661} 661}
662EXPORT_SYMBOL(cfg80211_unlink_bss); 662EXPORT_SYMBOL(cfg80211_unlink_bss);
663 663
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index a8c2d6b877ae..e17b0bee6bdc 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -411,7 +411,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
411 411
412 ASSERT_WDEV_LOCK(wdev); 412 ASSERT_WDEV_LOCK(wdev);
413 413
414 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) 414 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
415 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
415 return; 416 return;
416 417
417 if (wdev->sme_state != CFG80211_SME_CONNECTING) 418 if (wdev->sme_state != CFG80211_SME_CONNECTING)
@@ -548,7 +549,8 @@ void __cfg80211_roamed(struct wireless_dev *wdev, const u8 *bssid,
548 549
549 ASSERT_WDEV_LOCK(wdev); 550 ASSERT_WDEV_LOCK(wdev);
550 551
551 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) 552 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
553 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
552 return; 554 return;
553 555
554 if (wdev->sme_state != CFG80211_SME_CONNECTED) 556 if (wdev->sme_state != CFG80211_SME_CONNECTED)
@@ -644,7 +646,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
644 646
645 ASSERT_WDEV_LOCK(wdev); 647 ASSERT_WDEV_LOCK(wdev);
646 648
647 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) 649 if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
650 wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
648 return; 651 return;
649 652
650 if (wdev->sme_state != CFG80211_SME_CONNECTED) 653 if (wdev->sme_state != CFG80211_SME_CONNECTED)
@@ -695,7 +698,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
695 */ 698 */
696 if (rdev->ops->del_key) 699 if (rdev->ops->del_key)
697 for (i = 0; i < 6; i++) 700 for (i = 0; i < 6; i++)
698 rdev->ops->del_key(wdev->wiphy, dev, i, NULL); 701 rdev->ops->del_key(wdev->wiphy, dev, i, false, NULL);
699 702
700#ifdef CONFIG_CFG80211_WEXT 703#ifdef CONFIG_CFG80211_WEXT
701 memset(&wrqu, 0, sizeof(wrqu)); 704 memset(&wrqu, 0, sizeof(wrqu));
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 9f2cef3e0ca0..4294fa22bb2d 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -35,6 +35,14 @@ SHOW_FMT(index, "%d", wiphy_idx);
35SHOW_FMT(macaddress, "%pM", wiphy.perm_addr); 35SHOW_FMT(macaddress, "%pM", wiphy.perm_addr);
36SHOW_FMT(address_mask, "%pM", wiphy.addr_mask); 36SHOW_FMT(address_mask, "%pM", wiphy.addr_mask);
37 37
38static ssize_t name_show(struct device *dev,
39 struct device_attribute *attr,
40 char *buf) {
41 struct wiphy *wiphy = &dev_to_rdev(dev)->wiphy;
42 return sprintf(buf, "%s\n", dev_name(&wiphy->dev));
43}
44
45
38static ssize_t addresses_show(struct device *dev, 46static ssize_t addresses_show(struct device *dev,
39 struct device_attribute *attr, 47 struct device_attribute *attr,
40 char *buf) 48 char *buf)
@@ -57,6 +65,7 @@ static struct device_attribute ieee80211_dev_attrs[] = {
57 __ATTR_RO(macaddress), 65 __ATTR_RO(macaddress),
58 __ATTR_RO(address_mask), 66 __ATTR_RO(address_mask),
59 __ATTR_RO(addresses), 67 __ATTR_RO(addresses),
68 __ATTR_RO(name),
60 {} 69 {}
61}; 70};
62 71
@@ -110,6 +119,13 @@ static int wiphy_resume(struct device *dev)
110 return ret; 119 return ret;
111} 120}
112 121
122static const void *wiphy_namespace(struct device *d)
123{
124 struct wiphy *wiphy = container_of(d, struct wiphy, dev);
125
126 return wiphy_net(wiphy);
127}
128
113struct class ieee80211_class = { 129struct class ieee80211_class = {
114 .name = "ieee80211", 130 .name = "ieee80211",
115 .owner = THIS_MODULE, 131 .owner = THIS_MODULE,
@@ -120,6 +136,8 @@ struct class ieee80211_class = {
120#endif 136#endif
121 .suspend = wiphy_suspend, 137 .suspend = wiphy_suspend,
122 .resume = wiphy_resume, 138 .resume = wiphy_resume,
139 .ns_type = &net_ns_type_operations,
140 .namespace = wiphy_namespace,
123}; 141};
124 142
125int wiphy_sysfs_init(void) 143int wiphy_sysfs_init(void)
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 0c8a1e8b7690..76120aeda57d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -144,19 +144,25 @@ void ieee80211_set_bitrate_flags(struct wiphy *wiphy)
144 144
145int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, 145int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
146 struct key_params *params, int key_idx, 146 struct key_params *params, int key_idx,
147 const u8 *mac_addr) 147 bool pairwise, const u8 *mac_addr)
148{ 148{
149 int i; 149 int i;
150 150
151 if (key_idx > 5) 151 if (key_idx > 5)
152 return -EINVAL; 152 return -EINVAL;
153 153
154 if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
155 return -EINVAL;
156
157 if (pairwise && !mac_addr)
158 return -EINVAL;
159
154 /* 160 /*
155 * Disallow pairwise keys with non-zero index unless it's WEP 161 * Disallow pairwise keys with non-zero index unless it's WEP
156 * (because current deployments use pairwise WEP keys with 162 * (because current deployments use pairwise WEP keys with
157 * non-zero indizes but 802.11i clearly specifies to use zero) 163 * non-zero indizes but 802.11i clearly specifies to use zero)
158 */ 164 */
159 if (mac_addr && key_idx && 165 if (pairwise && key_idx &&
160 params->cipher != WLAN_CIPHER_SUITE_WEP40 && 166 params->cipher != WLAN_CIPHER_SUITE_WEP40 &&
161 params->cipher != WLAN_CIPHER_SUITE_WEP104) 167 params->cipher != WLAN_CIPHER_SUITE_WEP104)
162 return -EINVAL; 168 return -EINVAL;
@@ -183,7 +189,14 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
183 return -EINVAL; 189 return -EINVAL;
184 break; 190 break;
185 default: 191 default:
186 return -EINVAL; 192 /*
193 * We don't know anything about this algorithm,
194 * allow using it -- but the driver must check
195 * all parameters! We still check below whether
196 * or not the driver supports this algorithm,
197 * of course.
198 */
199 break;
187 } 200 }
188 201
189 if (params->seq) { 202 if (params->seq) {
@@ -221,7 +234,7 @@ const unsigned char bridge_tunnel_header[] __aligned(2) =
221 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; 234 { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 };
222EXPORT_SYMBOL(bridge_tunnel_header); 235EXPORT_SYMBOL(bridge_tunnel_header);
223 236
224unsigned int ieee80211_hdrlen(__le16 fc) 237unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc)
225{ 238{
226 unsigned int hdrlen = 24; 239 unsigned int hdrlen = 24;
227 240
@@ -319,7 +332,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
319 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { 332 cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
320 case cpu_to_le16(IEEE80211_FCTL_TODS): 333 case cpu_to_le16(IEEE80211_FCTL_TODS):
321 if (unlikely(iftype != NL80211_IFTYPE_AP && 334 if (unlikely(iftype != NL80211_IFTYPE_AP &&
322 iftype != NL80211_IFTYPE_AP_VLAN)) 335 iftype != NL80211_IFTYPE_AP_VLAN &&
336 iftype != NL80211_IFTYPE_P2P_GO))
323 return -1; 337 return -1;
324 break; 338 break;
325 case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): 339 case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS):
@@ -347,7 +361,8 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
347 break; 361 break;
348 case cpu_to_le16(IEEE80211_FCTL_FROMDS): 362 case cpu_to_le16(IEEE80211_FCTL_FROMDS):
349 if ((iftype != NL80211_IFTYPE_STATION && 363 if ((iftype != NL80211_IFTYPE_STATION &&
350 iftype != NL80211_IFTYPE_MESH_POINT) || 364 iftype != NL80211_IFTYPE_P2P_CLIENT &&
365 iftype != NL80211_IFTYPE_MESH_POINT) ||
351 (is_multicast_ether_addr(dst) && 366 (is_multicast_ether_addr(dst) &&
352 !compare_ether_addr(src, addr))) 367 !compare_ether_addr(src, addr)))
353 return -1; 368 return -1;
@@ -424,6 +439,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
424 switch (iftype) { 439 switch (iftype) {
425 case NL80211_IFTYPE_AP: 440 case NL80211_IFTYPE_AP:
426 case NL80211_IFTYPE_AP_VLAN: 441 case NL80211_IFTYPE_AP_VLAN:
442 case NL80211_IFTYPE_P2P_GO:
427 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); 443 fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS);
428 /* DA BSSID SA */ 444 /* DA BSSID SA */
429 memcpy(hdr.addr1, skb->data, ETH_ALEN); 445 memcpy(hdr.addr1, skb->data, ETH_ALEN);
@@ -432,6 +448,7 @@ int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr,
432 hdrlen = 24; 448 hdrlen = 24;
433 break; 449 break;
434 case NL80211_IFTYPE_STATION: 450 case NL80211_IFTYPE_STATION:
451 case NL80211_IFTYPE_P2P_CLIENT:
435 fc |= cpu_to_le16(IEEE80211_FCTL_TODS); 452 fc |= cpu_to_le16(IEEE80211_FCTL_TODS);
436 /* BSSID SA DA */ 453 /* BSSID SA DA */
437 memcpy(hdr.addr1, bssid, ETH_ALEN); 454 memcpy(hdr.addr1, bssid, ETH_ALEN);
@@ -666,7 +683,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
666 for (i = 0; i < 6; i++) { 683 for (i = 0; i < 6; i++) {
667 if (!wdev->connect_keys->params[i].cipher) 684 if (!wdev->connect_keys->params[i].cipher)
668 continue; 685 continue;
669 if (rdev->ops->add_key(wdev->wiphy, dev, i, NULL, 686 if (rdev->ops->add_key(wdev->wiphy, dev, i, false, NULL,
670 &wdev->connect_keys->params[i])) { 687 &wdev->connect_keys->params[i])) {
671 printk(KERN_ERR "%s: failed to set key %d\n", 688 printk(KERN_ERR "%s: failed to set key %d\n",
672 dev->name, i); 689 dev->name, i);
@@ -771,7 +788,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
771 788
772 /* if it's part of a bridge, reject changing type to station/ibss */ 789 /* if it's part of a bridge, reject changing type to station/ibss */
773 if ((dev->priv_flags & IFF_BRIDGE_PORT) && 790 if ((dev->priv_flags & IFF_BRIDGE_PORT) &&
774 (ntype == NL80211_IFTYPE_ADHOC || ntype == NL80211_IFTYPE_STATION)) 791 (ntype == NL80211_IFTYPE_ADHOC ||
792 ntype == NL80211_IFTYPE_STATION ||
793 ntype == NL80211_IFTYPE_P2P_CLIENT))
775 return -EBUSY; 794 return -EBUSY;
776 795
777 if (ntype != otype) { 796 if (ntype != otype) {
@@ -782,6 +801,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
782 cfg80211_leave_ibss(rdev, dev, false); 801 cfg80211_leave_ibss(rdev, dev, false);
783 break; 802 break;
784 case NL80211_IFTYPE_STATION: 803 case NL80211_IFTYPE_STATION:
804 case NL80211_IFTYPE_P2P_CLIENT:
785 cfg80211_disconnect(rdev, dev, 805 cfg80211_disconnect(rdev, dev,
786 WLAN_REASON_DEAUTH_LEAVING, true); 806 WLAN_REASON_DEAUTH_LEAVING, true);
787 break; 807 break;
@@ -810,9 +830,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
810 if (dev->ieee80211_ptr->use_4addr) 830 if (dev->ieee80211_ptr->use_4addr)
811 break; 831 break;
812 /* fall through */ 832 /* fall through */
833 case NL80211_IFTYPE_P2P_CLIENT:
813 case NL80211_IFTYPE_ADHOC: 834 case NL80211_IFTYPE_ADHOC:
814 dev->priv_flags |= IFF_DONT_BRIDGE; 835 dev->priv_flags |= IFF_DONT_BRIDGE;
815 break; 836 break;
837 case NL80211_IFTYPE_P2P_GO:
816 case NL80211_IFTYPE_AP: 838 case NL80211_IFTYPE_AP:
817 case NL80211_IFTYPE_AP_VLAN: 839 case NL80211_IFTYPE_AP_VLAN:
818 case NL80211_IFTYPE_WDS: 840 case NL80211_IFTYPE_WDS:
@@ -823,7 +845,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
823 /* monitor can't bridge anyway */ 845 /* monitor can't bridge anyway */
824 break; 846 break;
825 case NL80211_IFTYPE_UNSPECIFIED: 847 case NL80211_IFTYPE_UNSPECIFIED:
826 case __NL80211_IFTYPE_AFTER_LAST: 848 case NUM_NL80211_IFTYPES:
827 /* not happening */ 849 /* not happening */
828 break; 850 break;
829 } 851 }
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 7e5c3a45f811..12222ee6ebf2 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -432,14 +432,17 @@ int cfg80211_wext_giwretry(struct net_device *dev,
432EXPORT_SYMBOL_GPL(cfg80211_wext_giwretry); 432EXPORT_SYMBOL_GPL(cfg80211_wext_giwretry);
433 433
434static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, 434static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
435 struct net_device *dev, const u8 *addr, 435 struct net_device *dev, bool pairwise,
436 bool remove, bool tx_key, int idx, 436 const u8 *addr, bool remove, bool tx_key,
437 struct key_params *params) 437 int idx, struct key_params *params)
438{ 438{
439 struct wireless_dev *wdev = dev->ieee80211_ptr; 439 struct wireless_dev *wdev = dev->ieee80211_ptr;
440 int err, i; 440 int err, i;
441 bool rejoin = false; 441 bool rejoin = false;
442 442
443 if (pairwise && !addr)
444 return -EINVAL;
445
443 if (!wdev->wext.keys) { 446 if (!wdev->wext.keys) {
444 wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys), 447 wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys),
445 GFP_KERNEL); 448 GFP_KERNEL);
@@ -478,7 +481,13 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
478 __cfg80211_leave_ibss(rdev, wdev->netdev, true); 481 __cfg80211_leave_ibss(rdev, wdev->netdev, true);
479 rejoin = true; 482 rejoin = true;
480 } 483 }
481 err = rdev->ops->del_key(&rdev->wiphy, dev, idx, addr); 484
485 if (!pairwise && addr &&
486 !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN))
487 err = -ENOENT;
488 else
489 err = rdev->ops->del_key(&rdev->wiphy, dev, idx,
490 pairwise, addr);
482 } 491 }
483 wdev->wext.connect.privacy = false; 492 wdev->wext.connect.privacy = false;
484 /* 493 /*
@@ -507,12 +516,13 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
507 if (addr) 516 if (addr)
508 tx_key = false; 517 tx_key = false;
509 518
510 if (cfg80211_validate_key_settings(rdev, params, idx, addr)) 519 if (cfg80211_validate_key_settings(rdev, params, idx, pairwise, addr))
511 return -EINVAL; 520 return -EINVAL;
512 521
513 err = 0; 522 err = 0;
514 if (wdev->current_bss) 523 if (wdev->current_bss)
515 err = rdev->ops->add_key(&rdev->wiphy, dev, idx, addr, params); 524 err = rdev->ops->add_key(&rdev->wiphy, dev, idx,
525 pairwise, addr, params);
516 if (err) 526 if (err)
517 return err; 527 return err;
518 528
@@ -563,17 +573,17 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
563} 573}
564 574
565static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, 575static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
566 struct net_device *dev, const u8 *addr, 576 struct net_device *dev, bool pairwise,
567 bool remove, bool tx_key, int idx, 577 const u8 *addr, bool remove, bool tx_key,
568 struct key_params *params) 578 int idx, struct key_params *params)
569{ 579{
570 int err; 580 int err;
571 581
572 /* devlist mutex needed for possible IBSS re-join */ 582 /* devlist mutex needed for possible IBSS re-join */
573 mutex_lock(&rdev->devlist_mtx); 583 mutex_lock(&rdev->devlist_mtx);
574 wdev_lock(dev->ieee80211_ptr); 584 wdev_lock(dev->ieee80211_ptr);
575 err = __cfg80211_set_encryption(rdev, dev, addr, remove, 585 err = __cfg80211_set_encryption(rdev, dev, pairwise, addr,
576 tx_key, idx, params); 586 remove, tx_key, idx, params);
577 wdev_unlock(dev->ieee80211_ptr); 587 wdev_unlock(dev->ieee80211_ptr);
578 mutex_unlock(&rdev->devlist_mtx); 588 mutex_unlock(&rdev->devlist_mtx);
579 589
@@ -635,7 +645,7 @@ int cfg80211_wext_siwencode(struct net_device *dev,
635 else if (!remove) 645 else if (!remove)
636 return -EINVAL; 646 return -EINVAL;
637 647
638 return cfg80211_set_encryption(rdev, dev, NULL, remove, 648 return cfg80211_set_encryption(rdev, dev, false, NULL, remove,
639 wdev->wext.default_key == -1, 649 wdev->wext.default_key == -1,
640 idx, &params); 650 idx, &params);
641} 651}
@@ -725,7 +735,9 @@ int cfg80211_wext_siwencodeext(struct net_device *dev,
725 } 735 }
726 736
727 return cfg80211_set_encryption( 737 return cfg80211_set_encryption(
728 rdev, dev, addr, remove, 738 rdev, dev,
739 !(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY),
740 addr, remove,
729 ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, 741 ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY,
730 idx, &params); 742 idx, &params);
731} 743}
@@ -1354,6 +1366,10 @@ struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
1354 } 1366 }
1355 1367
1356 wstats.qual.updated |= IW_QUAL_NOISE_INVALID; 1368 wstats.qual.updated |= IW_QUAL_NOISE_INVALID;
1369 if (sinfo.filled & STATION_INFO_RX_DROP_MISC)
1370 wstats.discard.misc = sinfo.rx_dropped_misc;
1371 if (sinfo.filled & STATION_INFO_TX_FAILED)
1372 wstats.discard.retries = sinfo.tx_failed;
1357 1373
1358 return &wstats; 1374 return &wstats;
1359} 1375}
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 8f5116f5af19..dc675a3daa3d 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -611,7 +611,7 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev)
611#endif 611#endif
612 612
613#ifdef CONFIG_CFG80211_WEXT 613#ifdef CONFIG_CFG80211_WEXT
614 if (dev->ieee80211_ptr && dev->ieee80211_ptr && 614 if (dev->ieee80211_ptr &&
615 dev->ieee80211_ptr->wiphy && 615 dev->ieee80211_ptr->wiphy &&
616 dev->ieee80211_ptr->wiphy->wext && 616 dev->ieee80211_ptr->wiphy->wext &&
617 dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) 617 dev->ieee80211_ptr->wiphy->wext->get_wireless_stats)
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 9818198add8a..6fffe62d7c25 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -197,6 +197,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev,
197 wdev->wext.connect.ssid_len = len; 197 wdev->wext.connect.ssid_len = len;
198 198
199 wdev->wext.connect.crypto.control_port = false; 199 wdev->wext.connect.crypto.control_port = false;
200 wdev->wext.connect.crypto.control_port_ethertype =
201 cpu_to_be16(ETH_P_PAE);
200 202
201 err = cfg80211_mgd_wext_connect(rdev, wdev); 203 err = cfg80211_mgd_wext_connect(rdev, wdev);
202 out: 204 out:
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5e86d4e97dce..f7af98dff409 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -507,14 +507,14 @@ static int x25_listen(struct socket *sock, int backlog)
507 struct sock *sk = sock->sk; 507 struct sock *sk = sock->sk;
508 int rc = -EOPNOTSUPP; 508 int rc = -EOPNOTSUPP;
509 509
510 lock_kernel(); 510 lock_sock(sk);
511 if (sk->sk_state != TCP_LISTEN) { 511 if (sk->sk_state != TCP_LISTEN) {
512 memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN); 512 memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN);
513 sk->sk_max_ack_backlog = backlog; 513 sk->sk_max_ack_backlog = backlog;
514 sk->sk_state = TCP_LISTEN; 514 sk->sk_state = TCP_LISTEN;
515 rc = 0; 515 rc = 0;
516 } 516 }
517 unlock_kernel(); 517 release_sock(sk);
518 518
519 return rc; 519 return rc;
520} 520}
@@ -688,7 +688,6 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
688 struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr; 688 struct sockaddr_x25 *addr = (struct sockaddr_x25 *)uaddr;
689 int len, i, rc = 0; 689 int len, i, rc = 0;
690 690
691 lock_kernel();
692 if (!sock_flag(sk, SOCK_ZAPPED) || 691 if (!sock_flag(sk, SOCK_ZAPPED) ||
693 addr_len != sizeof(struct sockaddr_x25) || 692 addr_len != sizeof(struct sockaddr_x25) ||
694 addr->sx25_family != AF_X25) { 693 addr->sx25_family != AF_X25) {
@@ -704,12 +703,13 @@ static int x25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
704 } 703 }
705 } 704 }
706 705
706 lock_sock(sk);
707 x25_sk(sk)->source_addr = addr->sx25_addr; 707 x25_sk(sk)->source_addr = addr->sx25_addr;
708 x25_insert_socket(sk); 708 x25_insert_socket(sk);
709 sock_reset_flag(sk, SOCK_ZAPPED); 709 sock_reset_flag(sk, SOCK_ZAPPED);
710 release_sock(sk);
710 SOCK_DEBUG(sk, "x25_bind: socket is bound\n"); 711 SOCK_DEBUG(sk, "x25_bind: socket is bound\n");
711out: 712out:
712 unlock_kernel();
713 return rc; 713 return rc;
714} 714}
715 715
@@ -751,7 +751,6 @@ static int x25_connect(struct socket *sock, struct sockaddr *uaddr,
751 struct x25_route *rt; 751 struct x25_route *rt;
752 int rc = 0; 752 int rc = 0;
753 753
754 lock_kernel();
755 lock_sock(sk); 754 lock_sock(sk);
756 if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { 755 if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) {
757 sock->state = SS_CONNECTED; 756 sock->state = SS_CONNECTED;
@@ -829,7 +828,6 @@ out_put_route:
829 x25_route_put(rt); 828 x25_route_put(rt);
830out: 829out:
831 release_sock(sk); 830 release_sock(sk);
832 unlock_kernel();
833 return rc; 831 return rc;
834} 832}
835 833
@@ -869,8 +867,7 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
869 struct sk_buff *skb; 867 struct sk_buff *skb;
870 int rc = -EINVAL; 868 int rc = -EINVAL;
871 869
872 lock_kernel(); 870 if (!sk)
873 if (!sk || sk->sk_state != TCP_LISTEN)
874 goto out; 871 goto out;
875 872
876 rc = -EOPNOTSUPP; 873 rc = -EOPNOTSUPP;
@@ -878,6 +875,10 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
878 goto out; 875 goto out;
879 876
880 lock_sock(sk); 877 lock_sock(sk);
878 rc = -EINVAL;
879 if (sk->sk_state != TCP_LISTEN)
880 goto out2;
881
881 rc = x25_wait_for_data(sk, sk->sk_rcvtimeo); 882 rc = x25_wait_for_data(sk, sk->sk_rcvtimeo);
882 if (rc) 883 if (rc)
883 goto out2; 884 goto out2;
@@ -897,7 +898,6 @@ static int x25_accept(struct socket *sock, struct socket *newsock, int flags)
897out2: 898out2:
898 release_sock(sk); 899 release_sock(sk);
899out: 900out:
900 unlock_kernel();
901 return rc; 901 return rc;
902} 902}
903 903
@@ -909,7 +909,6 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
909 struct x25_sock *x25 = x25_sk(sk); 909 struct x25_sock *x25 = x25_sk(sk);
910 int rc = 0; 910 int rc = 0;
911 911
912 lock_kernel();
913 if (peer) { 912 if (peer) {
914 if (sk->sk_state != TCP_ESTABLISHED) { 913 if (sk->sk_state != TCP_ESTABLISHED) {
915 rc = -ENOTCONN; 914 rc = -ENOTCONN;
@@ -923,19 +922,6 @@ static int x25_getname(struct socket *sock, struct sockaddr *uaddr,
923 *uaddr_len = sizeof(*sx25); 922 *uaddr_len = sizeof(*sx25);
924 923
925out: 924out:
926 unlock_kernel();
927 return rc;
928}
929
930static unsigned int x25_datagram_poll(struct file *file, struct socket *sock,
931 poll_table *wait)
932{
933 int rc;
934
935 lock_kernel();
936 rc = datagram_poll(file, sock, wait);
937 unlock_kernel();
938
939 return rc; 925 return rc;
940} 926}
941 927
@@ -1746,7 +1732,7 @@ static const struct proto_ops x25_proto_ops = {
1746 .socketpair = sock_no_socketpair, 1732 .socketpair = sock_no_socketpair,
1747 .accept = x25_accept, 1733 .accept = x25_accept,
1748 .getname = x25_getname, 1734 .getname = x25_getname,
1749 .poll = x25_datagram_poll, 1735 .poll = datagram_poll,
1750 .ioctl = x25_ioctl, 1736 .ioctl = x25_ioctl,
1751#ifdef CONFIG_COMPAT 1737#ifdef CONFIG_COMPAT
1752 .compat_ioctl = compat_x25_ioctl, 1738 .compat_ioctl = compat_x25_ioctl,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index cbab6e1a8c9c..044e77898512 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -50,6 +50,9 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
50static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); 50static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
51static void xfrm_init_pmtu(struct dst_entry *dst); 51static void xfrm_init_pmtu(struct dst_entry *dst);
52static int stale_bundle(struct dst_entry *dst); 52static int stale_bundle(struct dst_entry *dst);
53static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst,
54 struct flowi *fl, int family, int strict);
55
53 56
54static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, 57static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
55 int dir); 58 int dir);
@@ -2276,7 +2279,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst)
2276 * still valid. 2279 * still valid.
2277 */ 2280 */
2278 2281
2279int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, 2282static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2280 struct flowi *fl, int family, int strict) 2283 struct flowi *fl, int family, int strict)
2281{ 2284{
2282 struct dst_entry *dst = &first->u.dst; 2285 struct dst_entry *dst = &first->u.dst;
@@ -2358,8 +2361,6 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2358 return 1; 2361 return 1;
2359} 2362}
2360 2363
2361EXPORT_SYMBOL(xfrm_bundle_ok);
2362
2363int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) 2364int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2364{ 2365{
2365 struct net *net; 2366 struct net *net;